4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti.masterd import iallocator
66 import ganeti.masterd.instance # pylint: disable=W0611
70 INSTANCE_DOWN = [constants.ADMINST_DOWN]
71 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
72 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
74 #: Instance status in which an instance can be marked as offline/online
75 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
76 constants.ADMINST_OFFLINE,
81 """Data container for LU results with jobs.
83 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
84 by L{mcpu._ProcessResult}. The latter will then submit the jobs
85 contained in the C{jobs} attribute and include the job IDs in the opcode
89 def __init__(self, jobs, **kwargs):
90 """Initializes this class.
92 Additional return values can be specified as keyword arguments.
94 @type jobs: list of lists of L{opcode.OpCode}
95 @param jobs: A list of lists of opcode objects
102 class LogicalUnit(object):
103 """Logical Unit base class.
105 Subclasses must follow these rules:
106 - implement ExpandNames
107 - implement CheckPrereq (except when tasklets are used)
108 - implement Exec (except when tasklets are used)
109 - implement BuildHooksEnv
110 - implement BuildHooksNodes
111 - redefine HPATH and HTYPE
112 - optionally redefine their run requirements:
113 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
115 Note that all commands require root permissions.
117 @ivar dry_run_result: the value (if any) that will be returned to the caller
118 in dry-run mode (signalled by opcode dry_run parameter)
125 def __init__(self, processor, op, context, rpc_runner):
126 """Constructor for LogicalUnit.
128 This needs to be overridden in derived classes in order to check op
132 self.proc = processor
134 self.cfg = context.cfg
135 self.glm = context.glm
137 self.owned_locks = context.glm.list_owned
138 self.context = context
139 self.rpc = rpc_runner
140 # Dicts used to declare locking needs to mcpu
141 self.needed_locks = None
142 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
144 self.remove_locks = {}
145 # Used to force good behavior when calling helper functions
146 self.recalculate_locks = {}
148 self.Log = processor.Log # pylint: disable=C0103
149 self.LogWarning = processor.LogWarning # pylint: disable=C0103
150 self.LogInfo = processor.LogInfo # pylint: disable=C0103
151 self.LogStep = processor.LogStep # pylint: disable=C0103
152 # support for dry-run
153 self.dry_run_result = None
154 # support for generic debug attribute
155 if (not hasattr(self.op, "debug_level") or
156 not isinstance(self.op.debug_level, int)):
157 self.op.debug_level = 0
162 # Validate opcode parameters and set defaults
163 self.op.Validate(True)
165 self.CheckArguments()
167 def CheckArguments(self):
168 """Check syntactic validity for the opcode arguments.
170 This method is for doing a simple syntactic check and ensure
171 validity of opcode parameters, without any cluster-related
172 checks. While the same can be accomplished in ExpandNames and/or
173 CheckPrereq, doing these separate is better because:
175 - ExpandNames is left as as purely a lock-related function
176 - CheckPrereq is run after we have acquired locks (and possible
179 The function is allowed to change the self.op attribute so that
180 later methods can no longer worry about missing parameters.
185 def ExpandNames(self):
186 """Expand names for this LU.
188 This method is called before starting to execute the opcode, and it should
189 update all the parameters of the opcode to their canonical form (e.g. a
190 short node name must be fully expanded after this method has successfully
191 completed). This way locking, hooks, logging, etc. can work correctly.
193 LUs which implement this method must also populate the self.needed_locks
194 member, as a dict with lock levels as keys, and a list of needed lock names
197 - use an empty dict if you don't need any lock
198 - if you don't need any lock at a particular level omit that
199 level (note that in this case C{DeclareLocks} won't be called
200 at all for that level)
201 - if you need locks at a level, but you can't calculate it in
202 this function, initialise that level with an empty list and do
203 further processing in L{LogicalUnit.DeclareLocks} (see that
204 function's docstring)
205 - don't put anything for the BGL level
206 - if you want all locks at a level use L{locking.ALL_SET} as a value
208 If you need to share locks (rather than acquire them exclusively) at one
209 level you can modify self.share_locks, setting a true value (usually 1) for
210 that level. By default locks are not shared.
212 This function can also define a list of tasklets, which then will be
213 executed in order instead of the usual LU-level CheckPrereq and Exec
214 functions, if those are not defined by the LU.
218 # Acquire all nodes and one instance
219 self.needed_locks = {
220 locking.LEVEL_NODE: locking.ALL_SET,
221 locking.LEVEL_INSTANCE: ['instance1.example.com'],
223 # Acquire just two nodes
224 self.needed_locks = {
225 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
228 self.needed_locks = {} # No, you can't leave it to the default value None
231 # The implementation of this method is mandatory only if the new LU is
232 # concurrent, so that old LUs don't need to be changed all at the same
235 self.needed_locks = {} # Exclusive LUs don't need locks.
237 raise NotImplementedError
239 def DeclareLocks(self, level):
240 """Declare LU locking needs for a level
242 While most LUs can just declare their locking needs at ExpandNames time,
243 sometimes there's the need to calculate some locks after having acquired
244 the ones before. This function is called just before acquiring locks at a
245 particular level, but after acquiring the ones at lower levels, and permits
246 such calculations. It can be used to modify self.needed_locks, and by
247 default it does nothing.
249 This function is only called if you have something already set in
250 self.needed_locks for the level.
252 @param level: Locking level which is going to be locked
253 @type level: member of L{ganeti.locking.LEVELS}
257 def CheckPrereq(self):
258 """Check prerequisites for this LU.
260 This method should check that the prerequisites for the execution
261 of this LU are fulfilled. It can do internode communication, but
262 it should be idempotent - no cluster or system changes are
265 The method should raise errors.OpPrereqError in case something is
266 not fulfilled. Its return value is ignored.
268 This method should also update all the parameters of the opcode to
269 their canonical form if it hasn't been done by ExpandNames before.
272 if self.tasklets is not None:
273 for (idx, tl) in enumerate(self.tasklets):
274 logging.debug("Checking prerequisites for tasklet %s/%s",
275 idx + 1, len(self.tasklets))
280 def Exec(self, feedback_fn):
283 This method should implement the actual work. It should raise
284 errors.OpExecError for failures that are somewhat dealt with in
288 if self.tasklets is not None:
289 for (idx, tl) in enumerate(self.tasklets):
290 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
293 raise NotImplementedError
295 def BuildHooksEnv(self):
296 """Build hooks environment for this LU.
299 @return: Dictionary containing the environment that will be used for
300 running the hooks for this LU. The keys of the dict must not be prefixed
301 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
302 will extend the environment with additional variables. If no environment
303 should be defined, an empty dictionary should be returned (not C{None}).
304 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
308 raise NotImplementedError
310 def BuildHooksNodes(self):
311 """Build list of nodes to run LU's hooks.
313 @rtype: tuple; (list, list)
314 @return: Tuple containing a list of node names on which the hook
315 should run before the execution and a list of node names on which the
316 hook should run after the execution. No nodes should be returned as an
317 empty list (and not None).
318 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
322 raise NotImplementedError
324 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
325 """Notify the LU about the results of its hooks.
327 This method is called every time a hooks phase is executed, and notifies
328 the Logical Unit about the hooks' result. The LU can then use it to alter
329 its result based on the hooks. By default the method does nothing and the
330 previous result is passed back unchanged but any LU can define it if it
331 wants to use the local cluster hook-scripts somehow.
333 @param phase: one of L{constants.HOOKS_PHASE_POST} or
334 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
335 @param hook_results: the results of the multi-node hooks rpc call
336 @param feedback_fn: function used send feedback back to the caller
337 @param lu_result: the previous Exec result this LU had, or None
339 @return: the new Exec result, based on the previous result
343 # API must be kept, thus we ignore the unused argument and could
344 # be a function warnings
345 # pylint: disable=W0613,R0201
348 def _ExpandAndLockInstance(self):
349 """Helper function to expand and lock an instance.
351 Many LUs that work on an instance take its name in self.op.instance_name
352 and need to expand it and then declare the expanded name for locking. This
353 function does it, and then updates self.op.instance_name to the expanded
354 name. It also initializes needed_locks as a dict, if this hasn't been done
358 if self.needed_locks is None:
359 self.needed_locks = {}
361 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
362 "_ExpandAndLockInstance called with instance-level locks set"
363 self.op.instance_name = _ExpandInstanceName(self.cfg,
364 self.op.instance_name)
365 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
367 def _LockInstancesNodes(self, primary_only=False,
368 level=locking.LEVEL_NODE):
369 """Helper function to declare instances' nodes for locking.
371 This function should be called after locking one or more instances to lock
372 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
373 with all primary or secondary nodes for instances already locked and
374 present in self.needed_locks[locking.LEVEL_INSTANCE].
376 It should be called from DeclareLocks, and for safety only works if
377 self.recalculate_locks[locking.LEVEL_NODE] is set.
379 In the future it may grow parameters to just lock some instance's nodes, or
380 to just lock primaries or secondary nodes, if needed.
382 If should be called in DeclareLocks in a way similar to::
384 if level == locking.LEVEL_NODE:
385 self._LockInstancesNodes()
387 @type primary_only: boolean
388 @param primary_only: only lock primary nodes of locked instances
389 @param level: Which lock level to use for locking nodes
392 assert level in self.recalculate_locks, \
393 "_LockInstancesNodes helper function called with no nodes to recalculate"
395 # TODO: check if we're really been called with the instance locks held
397 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
398 # future we might want to have different behaviors depending on the value
399 # of self.recalculate_locks[locking.LEVEL_NODE]
401 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
402 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
403 wanted_nodes.append(instance.primary_node)
405 wanted_nodes.extend(instance.secondary_nodes)
407 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
408 self.needed_locks[level] = wanted_nodes
409 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
410 self.needed_locks[level].extend(wanted_nodes)
412 raise errors.ProgrammerError("Unknown recalculation mode")
414 del self.recalculate_locks[level]
417 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
418 """Simple LU which runs no hooks.
420 This LU is intended as a parent for other LogicalUnits which will
421 run no hooks, in order to reduce duplicate code.
427 def BuildHooksEnv(self):
428 """Empty BuildHooksEnv for NoHooksLu.
430 This just raises an error.
433 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
435 def BuildHooksNodes(self):
436 """Empty BuildHooksNodes for NoHooksLU.
439 raise AssertionError("BuildHooksNodes called for NoHooksLU")
443 """Tasklet base class.
445 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
446 they can mix legacy code with tasklets. Locking needs to be done in the LU,
447 tasklets know nothing about locks.
449 Subclasses must follow these rules:
450 - Implement CheckPrereq
454 def __init__(self, lu):
461 def CheckPrereq(self):
462 """Check prerequisites for this tasklets.
464 This method should check whether the prerequisites for the execution of
465 this tasklet are fulfilled. It can do internode communication, but it
466 should be idempotent - no cluster or system changes are allowed.
468 The method should raise errors.OpPrereqError in case something is not
469 fulfilled. Its return value is ignored.
471 This method should also update all parameters to their canonical form if it
472 hasn't been done before.
477 def Exec(self, feedback_fn):
478 """Execute the tasklet.
480 This method should implement the actual work. It should raise
481 errors.OpExecError for failures that are somewhat dealt with in code, or
485 raise NotImplementedError
489 """Base for query utility classes.
492 #: Attribute holding field definitions
498 def __init__(self, qfilter, fields, use_locking):
499 """Initializes this class.
502 self.use_locking = use_locking
504 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
505 namefield=self.SORT_FIELD)
506 self.requested_data = self.query.RequestedData()
507 self.names = self.query.RequestedNames()
509 # Sort only if no names were requested
510 self.sort_by_name = not self.names
512 self.do_locking = None
515 def _GetNames(self, lu, all_names, lock_level):
516 """Helper function to determine names asked for in the query.
520 names = lu.owned_locks(lock_level)
524 if self.wanted == locking.ALL_SET:
525 assert not self.names
526 # caller didn't specify names, so ordering is not important
527 return utils.NiceSort(names)
529 # caller specified names and we must keep the same order
531 assert not self.do_locking or lu.glm.is_owned(lock_level)
533 missing = set(self.wanted).difference(names)
535 raise errors.OpExecError("Some items were removed before retrieving"
536 " their data: %s" % missing)
538 # Return expanded names
541 def ExpandNames(self, lu):
542 """Expand names for this query.
544 See L{LogicalUnit.ExpandNames}.
547 raise NotImplementedError()
549 def DeclareLocks(self, lu, level):
550 """Declare locks for this query.
552 See L{LogicalUnit.DeclareLocks}.
555 raise NotImplementedError()
557 def _GetQueryData(self, lu):
558 """Collects all data for this query.
560 @return: Query data object
563 raise NotImplementedError()
565 def NewStyleQuery(self, lu):
566 """Collect data and execute query.
569 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
570 sort_by_name=self.sort_by_name)
572 def OldStyleQuery(self, lu):
573 """Collect data and execute query.
576 return self.query.OldStyleQuery(self._GetQueryData(lu),
577 sort_by_name=self.sort_by_name)
581 """Returns a dict declaring all lock levels shared.
584 return dict.fromkeys(locking.LEVELS, 1)
587 def _AnnotateDiskParams(instance, devs, cfg):
588 """Little helper wrapper to the rpc annotation method.
590 @param instance: The instance object
591 @type devs: List of L{objects.Disk}
592 @param devs: The root devices (not any of its children!)
593 @param cfg: The config object
594 @returns The annotated disk copies
595 @see L{rpc.AnnotateDiskParams}
598 return rpc.AnnotateDiskParams(instance.disk_template, devs,
599 cfg.GetInstanceDiskParams(instance))
602 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
604 """Checks if node groups for locked instances are still correct.
606 @type cfg: L{config.ConfigWriter}
607 @param cfg: Cluster configuration
608 @type instances: dict; string as key, L{objects.Instance} as value
609 @param instances: Dictionary, instance name as key, instance object as value
610 @type owned_groups: iterable of string
611 @param owned_groups: List of owned groups
612 @type owned_nodes: iterable of string
613 @param owned_nodes: List of owned nodes
614 @type cur_group_uuid: string or None
615 @param cur_group_uuid: Optional group UUID to check against instance's groups
618 for (name, inst) in instances.items():
619 assert owned_nodes.issuperset(inst.all_nodes), \
620 "Instance %s's nodes changed while we kept the lock" % name
622 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
624 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
625 "Instance %s has no node in group %s" % (name, cur_group_uuid)
628 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
630 """Checks if the owned node groups are still correct for an instance.
632 @type cfg: L{config.ConfigWriter}
633 @param cfg: The cluster configuration
634 @type instance_name: string
635 @param instance_name: Instance name
636 @type owned_groups: set or frozenset
637 @param owned_groups: List of currently owned node groups
638 @type primary_only: boolean
639 @param primary_only: Whether to check node groups for only the primary node
642 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
644 if not owned_groups.issuperset(inst_groups):
645 raise errors.OpPrereqError("Instance %s's node groups changed since"
646 " locks were acquired, current groups are"
647 " are '%s', owning groups '%s'; retry the"
650 utils.CommaJoin(inst_groups),
651 utils.CommaJoin(owned_groups)),
657 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
658 """Checks if the instances in a node group are still correct.
660 @type cfg: L{config.ConfigWriter}
661 @param cfg: The cluster configuration
662 @type group_uuid: string
663 @param group_uuid: Node group UUID
664 @type owned_instances: set or frozenset
665 @param owned_instances: List of currently owned instances
668 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
669 if owned_instances != wanted_instances:
670 raise errors.OpPrereqError("Instances in node group '%s' changed since"
671 " locks were acquired, wanted '%s', have '%s';"
672 " retry the operation" %
674 utils.CommaJoin(wanted_instances),
675 utils.CommaJoin(owned_instances)),
678 return wanted_instances
681 def _SupportsOob(cfg, node):
682 """Tells if node supports OOB.
684 @type cfg: L{config.ConfigWriter}
685 @param cfg: The cluster configuration
686 @type node: L{objects.Node}
687 @param node: The node
688 @return: The OOB script if supported or an empty string otherwise
691 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
694 def _CopyLockList(names):
695 """Makes a copy of a list of lock names.
697 Handles L{locking.ALL_SET} correctly.
700 if names == locking.ALL_SET:
701 return locking.ALL_SET
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
960 except Exception, err: # pylint: disable=W0703
961 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
964 def _CheckOutputFields(static, dynamic, selected):
965 """Checks whether all selected fields are valid.
967 @type static: L{utils.FieldSet}
968 @param static: static fields set
969 @type dynamic: L{utils.FieldSet}
970 @param dynamic: dynamic fields set
977 delta = f.NonMatching(selected)
979 raise errors.OpPrereqError("Unknown output fields selected: %s"
980 % ",".join(delta), errors.ECODE_INVAL)
983 def _CheckGlobalHvParams(params):
984 """Validates that given hypervisor params are not global ones.
986 This will ensure that instances don't get customised versions of
990 used_globals = constants.HVC_GLOBALS.intersection(params)
992 msg = ("The following hypervisor parameters are global and cannot"
993 " be customized at instance level, please modify them at"
994 " cluster level: %s" % utils.CommaJoin(used_globals))
995 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
998 def _CheckNodeOnline(lu, node, msg=None):
999 """Ensure that a given node is online.
1001 @param lu: the LU on behalf of which we make the check
1002 @param node: the node to check
1003 @param msg: if passed, should be a message to replace the default one
1004 @raise errors.OpPrereqError: if the node is offline
1008 msg = "Can't use offline node"
1009 if lu.cfg.GetNodeInfo(node).offline:
1010 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1013 def _CheckNodeNotDrained(lu, node):
1014 """Ensure that a given node is not drained.
1016 @param lu: the LU on behalf of which we make the check
1017 @param node: the node to check
1018 @raise errors.OpPrereqError: if the node is drained
1021 if lu.cfg.GetNodeInfo(node).drained:
1022 raise errors.OpPrereqError("Can't use drained node %s" % node,
1026 def _CheckNodeVmCapable(lu, node):
1027 """Ensure that a given node is vm capable.
1029 @param lu: the LU on behalf of which we make the check
1030 @param node: the node to check
1031 @raise errors.OpPrereqError: if the node is not vm capable
1034 if not lu.cfg.GetNodeInfo(node).vm_capable:
1035 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040 """Ensure that a node supports a given OS.
1042 @param lu: the LU on behalf of which we make the check
1043 @param node: the node to check
1044 @param os_name: the OS to query about
1045 @param force_variant: whether to ignore variant errors
1046 @raise errors.OpPrereqError: if the node is not supporting the OS
1049 result = lu.rpc.call_os_get(node, os_name)
1050 result.Raise("OS '%s' not in supported OS list for node %s" %
1052 prereq=True, ecode=errors.ECODE_INVAL)
1053 if not force_variant:
1054 _CheckOSVariant(result.payload, os_name)
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058 """Ensure that a node has the given secondary ip.
1060 @type lu: L{LogicalUnit}
1061 @param lu: the LU on behalf of which we make the check
1063 @param node: the node to check
1064 @type secondary_ip: string
1065 @param secondary_ip: the ip to check
1066 @type prereq: boolean
1067 @param prereq: whether to throw a prerequisite or an execute error
1068 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1072 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073 result.Raise("Failure checking secondary ip on node %s" % node,
1074 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075 if not result.payload:
1076 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077 " please fix and re-run this command" % secondary_ip)
1079 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1081 raise errors.OpExecError(msg)
1084 def _GetClusterDomainSecret():
1085 """Reads the cluster domain secret.
1088 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093 """Ensure that an instance is in one of the required states.
1095 @param lu: the LU on behalf of which we make the check
1096 @param instance: the instance to check
1097 @param msg: if passed, should be a message to replace the default one
1098 @raise errors.OpPrereqError: if the instance is not in the required state
1102 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103 if instance.admin_state not in req_states:
1104 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105 (instance.name, instance.admin_state, msg),
1108 if constants.ADMINST_UP not in req_states:
1109 pnode = instance.primary_node
1110 if not lu.cfg.GetNodeInfo(pnode).offline:
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1114 if instance.name in ins_l.payload:
1115 raise errors.OpPrereqError("Instance %s is running, %s" %
1116 (instance.name, msg), errors.ECODE_STATE)
1118 lu.LogWarning("Primary node offline, ignoring check that instance"
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123 """Computes if value is in the desired range.
1125 @param name: name of the parameter for which we perform the check
1126 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1128 @param ipolicy: dictionary containing min, max and std values
1129 @param value: actual value that we want to use
1130 @return: None or element not meeting the criteria
1134 if value in [None, constants.VALUE_AUTO]:
1136 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138 if value > max_v or min_v > value:
1140 fqn = "%s/%s" % (name, qualifier)
1143 return ("%s value %s is not in range [%s, %s]" %
1144 (fqn, value, min_v, max_v))
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149 nic_count, disk_sizes, spindle_use,
1150 _compute_fn=_ComputeMinMaxSpec):
1151 """Verifies ipolicy against provided specs.
1154 @param ipolicy: The ipolicy
1156 @param mem_size: The memory size
1157 @type cpu_count: int
1158 @param cpu_count: Used cpu cores
1159 @type disk_count: int
1160 @param disk_count: Number of disks used
1161 @type nic_count: int
1162 @param nic_count: Number of nics used
1163 @type disk_sizes: list of ints
1164 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165 @type spindle_use: int
1166 @param spindle_use: The number of spindles this instance uses
1167 @param _compute_fn: The compute function (unittest only)
1168 @return: A list of violations, or an empty list of no violations are found
1171 assert disk_count == len(disk_sizes)
1174 (constants.ISPEC_MEM_SIZE, "", mem_size),
1175 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176 (constants.ISPEC_DISK_COUNT, "", disk_count),
1177 (constants.ISPEC_NIC_COUNT, "", nic_count),
1178 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180 for idx, d in enumerate(disk_sizes)]
1183 (_compute_fn(name, qualifier, ipolicy, value)
1184 for (name, qualifier, value) in test_settings))
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188 _compute_fn=_ComputeIPolicySpecViolation):
1189 """Compute if instance meets the specs of ipolicy.
1192 @param ipolicy: The ipolicy to verify against
1193 @type instance: L{objects.Instance}
1194 @param instance: The instance to verify
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202 disk_count = len(instance.disks)
1203 disk_sizes = [disk.size for disk in instance.disks]
1204 nic_count = len(instance.nics)
1206 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207 disk_sizes, spindle_use)
1210 def _ComputeIPolicyInstanceSpecViolation(
1211 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1212 """Compute if instance specs meets the specs of ipolicy.
1215 @param ipolicy: The ipolicy to verify against
1216 @param instance_spec: dict
1217 @param instance_spec: The instance spec to verify
1218 @param _compute_fn: The function to verify ipolicy (unittest only)
1219 @see: L{_ComputeIPolicySpecViolation}
1222 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1229 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230 disk_sizes, spindle_use)
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1235 _compute_fn=_ComputeIPolicyInstanceViolation):
1236 """Compute if instance meets the specs of the new target group.
1238 @param ipolicy: The ipolicy to verify
1239 @param instance: The instance object to verify
1240 @param current_group: The current group of the instance
1241 @param target_group: The new group of the instance
1242 @param _compute_fn: The function to verify ipolicy (unittest only)
1243 @see: L{_ComputeIPolicySpecViolation}
1246 if current_group == target_group:
1249 return _compute_fn(ipolicy, instance)
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253 _compute_fn=_ComputeIPolicyNodeViolation):
1254 """Checks that the target node is correct in terms of instance policy.
1256 @param ipolicy: The ipolicy to verify
1257 @param instance: The instance object to verify
1258 @param node: The new node to relocate
1259 @param ignore: Ignore violations of the ipolicy
1260 @param _compute_fn: The function to verify ipolicy (unittest only)
1261 @see: L{_ComputeIPolicySpecViolation}
1264 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1268 msg = ("Instance does not meet target node group's (%s) instance"
1269 " policy: %s") % (node.group, utils.CommaJoin(res))
1273 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277 """Computes a set of any instances that would violate the new ipolicy.
1279 @param old_ipolicy: The current (still in-place) ipolicy
1280 @param new_ipolicy: The new (to become) ipolicy
1281 @param instances: List of instances to verify
1282 @return: A list of instances which violates the new ipolicy but
1286 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287 _ComputeViolatingInstances(old_ipolicy, instances))
1290 def _ExpandItemName(fn, name, kind):
1291 """Expand an item name.
1293 @param fn: the function to use for expansion
1294 @param name: requested item name
1295 @param kind: text description ('Node' or 'Instance')
1296 @return: the resolved (full) name
1297 @raise errors.OpPrereqError: if the item is not found
1300 full_name = fn(name)
1301 if full_name is None:
1302 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1307 def _ExpandNodeName(cfg, name):
1308 """Wrapper over L{_ExpandItemName} for nodes."""
1309 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1312 def _ExpandInstanceName(cfg, name):
1313 """Wrapper over L{_ExpandItemName} for instance."""
1314 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1317 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1318 minmem, maxmem, vcpus, nics, disk_template, disks,
1319 bep, hvp, hypervisor_name, tags):
1320 """Builds instance related env variables for hooks
1322 This builds the hook environment from individual variables.
1325 @param name: the name of the instance
1326 @type primary_node: string
1327 @param primary_node: the name of the instance's primary node
1328 @type secondary_nodes: list
1329 @param secondary_nodes: list of secondary nodes as strings
1330 @type os_type: string
1331 @param os_type: the name of the instance's OS
1332 @type status: string
1333 @param status: the desired status of the instance
1334 @type minmem: string
1335 @param minmem: the minimum memory size of the instance
1336 @type maxmem: string
1337 @param maxmem: the maximum memory size of the instance
1339 @param vcpus: the count of VCPUs the instance has
1341 @param nics: list of tuples (ip, mac, mode, link) representing
1342 the NICs the instance has
1343 @type disk_template: string
1344 @param disk_template: the disk template of the instance
1346 @param disks: the list of (size, mode) pairs
1348 @param bep: the backend parameters for the instance
1350 @param hvp: the hypervisor parameters for the instance
1351 @type hypervisor_name: string
1352 @param hypervisor_name: the hypervisor for the instance
1354 @param tags: list of instance tags as strings
1356 @return: the hook environment for this instance
1361 "INSTANCE_NAME": name,
1362 "INSTANCE_PRIMARY": primary_node,
1363 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1364 "INSTANCE_OS_TYPE": os_type,
1365 "INSTANCE_STATUS": status,
1366 "INSTANCE_MINMEM": minmem,
1367 "INSTANCE_MAXMEM": maxmem,
1368 # TODO(2.7) remove deprecated "memory" value
1369 "INSTANCE_MEMORY": maxmem,
1370 "INSTANCE_VCPUS": vcpus,
1371 "INSTANCE_DISK_TEMPLATE": disk_template,
1372 "INSTANCE_HYPERVISOR": hypervisor_name,
1375 nic_count = len(nics)
1376 for idx, (ip, mac, mode, link) in enumerate(nics):
1379 env["INSTANCE_NIC%d_IP" % idx] = ip
1380 env["INSTANCE_NIC%d_MAC" % idx] = mac
1381 env["INSTANCE_NIC%d_MODE" % idx] = mode
1382 env["INSTANCE_NIC%d_LINK" % idx] = link
1383 if mode == constants.NIC_MODE_BRIDGED:
1384 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1388 env["INSTANCE_NIC_COUNT"] = nic_count
1391 disk_count = len(disks)
1392 for idx, (size, mode) in enumerate(disks):
1393 env["INSTANCE_DISK%d_SIZE" % idx] = size
1394 env["INSTANCE_DISK%d_MODE" % idx] = mode
1398 env["INSTANCE_DISK_COUNT"] = disk_count
1403 env["INSTANCE_TAGS"] = " ".join(tags)
1405 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1406 for key, value in source.items():
1407 env["INSTANCE_%s_%s" % (kind, key)] = value
1412 def _NICListToTuple(lu, nics):
1413 """Build a list of nic information tuples.
1415 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1416 value in LUInstanceQueryData.
1418 @type lu: L{LogicalUnit}
1419 @param lu: the logical unit on whose behalf we execute
1420 @type nics: list of L{objects.NIC}
1421 @param nics: list of nics to convert to hooks tuples
1425 cluster = lu.cfg.GetClusterInfo()
1429 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1430 mode = filled_params[constants.NIC_MODE]
1431 link = filled_params[constants.NIC_LINK]
1432 hooks_nics.append((ip, mac, mode, link))
1436 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1437 """Builds instance related env variables for hooks from an object.
1439 @type lu: L{LogicalUnit}
1440 @param lu: the logical unit on whose behalf we execute
1441 @type instance: L{objects.Instance}
1442 @param instance: the instance for which we should build the
1444 @type override: dict
1445 @param override: dictionary with key/values that will override
1448 @return: the hook environment dictionary
1451 cluster = lu.cfg.GetClusterInfo()
1452 bep = cluster.FillBE(instance)
1453 hvp = cluster.FillHV(instance)
1455 "name": instance.name,
1456 "primary_node": instance.primary_node,
1457 "secondary_nodes": instance.secondary_nodes,
1458 "os_type": instance.os,
1459 "status": instance.admin_state,
1460 "maxmem": bep[constants.BE_MAXMEM],
1461 "minmem": bep[constants.BE_MINMEM],
1462 "vcpus": bep[constants.BE_VCPUS],
1463 "nics": _NICListToTuple(lu, instance.nics),
1464 "disk_template": instance.disk_template,
1465 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1468 "hypervisor_name": instance.hypervisor,
1469 "tags": instance.tags,
1472 args.update(override)
1473 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1476 def _AdjustCandidatePool(lu, exceptions):
1477 """Adjust the candidate pool after node operations.
1480 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1482 lu.LogInfo("Promoted nodes to master candidate role: %s",
1483 utils.CommaJoin(node.name for node in mod_list))
1484 for name in mod_list:
1485 lu.context.ReaddNode(name)
1486 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1488 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1492 def _DecideSelfPromotion(lu, exceptions=None):
1493 """Decide whether I should promote myself as a master candidate.
1496 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1497 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1498 # the new node will increase mc_max with one, so:
1499 mc_should = min(mc_should + 1, cp_size)
1500 return mc_now < mc_should
1503 def _ComputeViolatingInstances(ipolicy, instances):
1504 """Computes a set of instances who violates given ipolicy.
1506 @param ipolicy: The ipolicy to verify
1507 @type instances: object.Instance
1508 @param instances: List of instances to verify
1509 @return: A frozenset of instance names violating the ipolicy
1512 return frozenset([inst.name for inst in instances
1513 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1516 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1517 """Check that the brigdes needed by a list of nics exist.
1520 cluster = lu.cfg.GetClusterInfo()
1521 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1522 brlist = [params[constants.NIC_LINK] for params in paramslist
1523 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1525 result = lu.rpc.call_bridges_exist(target_node, brlist)
1526 result.Raise("Error checking bridges on destination node '%s'" %
1527 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1530 def _CheckInstanceBridgesExist(lu, instance, node=None):
1531 """Check that the brigdes needed by an instance exist.
1535 node = instance.primary_node
1536 _CheckNicsBridgesExist(lu, instance.nics, node)
1539 def _CheckOSVariant(os_obj, name):
1540 """Check whether an OS name conforms to the os variants specification.
1542 @type os_obj: L{objects.OS}
1543 @param os_obj: OS object to check
1545 @param name: OS name passed by the user, to check for validity
1548 variant = objects.OS.GetVariant(name)
1549 if not os_obj.supported_variants:
1551 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1552 " passed)" % (os_obj.name, variant),
1556 raise errors.OpPrereqError("OS name must include a variant",
1559 if variant not in os_obj.supported_variants:
1560 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1563 def _GetNodeInstancesInner(cfg, fn):
1564 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1567 def _GetNodeInstances(cfg, node_name):
1568 """Returns a list of all primary and secondary instances on a node.
1572 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1575 def _GetNodePrimaryInstances(cfg, node_name):
1576 """Returns primary instances on a node.
1579 return _GetNodeInstancesInner(cfg,
1580 lambda inst: node_name == inst.primary_node)
1583 def _GetNodeSecondaryInstances(cfg, node_name):
1584 """Returns secondary instances on a node.
1587 return _GetNodeInstancesInner(cfg,
1588 lambda inst: node_name in inst.secondary_nodes)
1591 def _GetStorageTypeArgs(cfg, storage_type):
1592 """Returns the arguments for a storage type.
1595 # Special case for file storage
1596 if storage_type == constants.ST_FILE:
1597 # storage.FileStorage wants a list of storage directories
1598 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1603 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1606 for dev in instance.disks:
1607 cfg.SetDiskID(dev, node_name)
1609 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1611 result.Raise("Failed to get disk status from node %s" % node_name,
1612 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1614 for idx, bdev_status in enumerate(result.payload):
1615 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1621 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1622 """Check the sanity of iallocator and node arguments and use the
1623 cluster-wide iallocator if appropriate.
1625 Check that at most one of (iallocator, node) is specified. If none is
1626 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1627 then the LU's opcode's iallocator slot is filled with the cluster-wide
1630 @type iallocator_slot: string
1631 @param iallocator_slot: the name of the opcode iallocator slot
1632 @type node_slot: string
1633 @param node_slot: the name of the opcode target node slot
1636 node = getattr(lu.op, node_slot, None)
1637 ialloc = getattr(lu.op, iallocator_slot, None)
1641 if node is not None and ialloc is not None:
1642 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1644 elif ((node is None and ialloc is None) or
1645 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1646 default_iallocator = lu.cfg.GetDefaultIAllocator()
1647 if default_iallocator:
1648 setattr(lu.op, iallocator_slot, default_iallocator)
1650 raise errors.OpPrereqError("No iallocator or node given and no"
1651 " cluster-wide default iallocator found;"
1652 " please specify either an iallocator or a"
1653 " node, or set a cluster-wide default"
1654 " iallocator", errors.ECODE_INVAL)
1657 def _GetDefaultIAllocator(cfg, ialloc):
1658 """Decides on which iallocator to use.
1660 @type cfg: L{config.ConfigWriter}
1661 @param cfg: Cluster configuration object
1662 @type ialloc: string or None
1663 @param ialloc: Iallocator specified in opcode
1665 @return: Iallocator name
1669 # Use default iallocator
1670 ialloc = cfg.GetDefaultIAllocator()
1673 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1674 " opcode nor as a cluster-wide default",
1680 class LUClusterPostInit(LogicalUnit):
1681 """Logical unit for running hooks after cluster initialization.
1684 HPATH = "cluster-init"
1685 HTYPE = constants.HTYPE_CLUSTER
1687 def BuildHooksEnv(self):
1692 "OP_TARGET": self.cfg.GetClusterName(),
1695 def BuildHooksNodes(self):
1696 """Build hooks nodes.
1699 return ([], [self.cfg.GetMasterNode()])
1701 def Exec(self, feedback_fn):
1708 class LUClusterDestroy(LogicalUnit):
1709 """Logical unit for destroying the cluster.
1712 HPATH = "cluster-destroy"
1713 HTYPE = constants.HTYPE_CLUSTER
1715 def BuildHooksEnv(self):
1720 "OP_TARGET": self.cfg.GetClusterName(),
1723 def BuildHooksNodes(self):
1724 """Build hooks nodes.
1729 def CheckPrereq(self):
1730 """Check prerequisites.
1732 This checks whether the cluster is empty.
1734 Any errors are signaled by raising errors.OpPrereqError.
1737 master = self.cfg.GetMasterNode()
1739 nodelist = self.cfg.GetNodeList()
1740 if len(nodelist) != 1 or nodelist[0] != master:
1741 raise errors.OpPrereqError("There are still %d node(s) in"
1742 " this cluster." % (len(nodelist) - 1),
1744 instancelist = self.cfg.GetInstanceList()
1746 raise errors.OpPrereqError("There are still %d instance(s) in"
1747 " this cluster." % len(instancelist),
1750 def Exec(self, feedback_fn):
1751 """Destroys the cluster.
1754 master_params = self.cfg.GetMasterNetworkParameters()
1756 # Run post hooks on master node before it's removed
1757 _RunPostHook(self, master_params.name)
1759 ems = self.cfg.GetUseExternalMipScript()
1760 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1763 self.LogWarning("Error disabling the master IP address: %s",
1766 return master_params.name
1769 def _VerifyCertificate(filename):
1770 """Verifies a certificate for L{LUClusterVerifyConfig}.
1772 @type filename: string
1773 @param filename: Path to PEM file
1777 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1778 utils.ReadFile(filename))
1779 except Exception, err: # pylint: disable=W0703
1780 return (LUClusterVerifyConfig.ETYPE_ERROR,
1781 "Failed to load X509 certificate %s: %s" % (filename, err))
1784 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1785 constants.SSL_CERT_EXPIRATION_ERROR)
1788 fnamemsg = "While verifying %s: %s" % (filename, msg)
1793 return (None, fnamemsg)
1794 elif errcode == utils.CERT_WARNING:
1795 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1796 elif errcode == utils.CERT_ERROR:
1797 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1799 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1802 def _GetAllHypervisorParameters(cluster, instances):
1803 """Compute the set of all hypervisor parameters.
1805 @type cluster: L{objects.Cluster}
1806 @param cluster: the cluster object
1807 @param instances: list of L{objects.Instance}
1808 @param instances: additional instances from which to obtain parameters
1809 @rtype: list of (origin, hypervisor, parameters)
1810 @return: a list with all parameters found, indicating the hypervisor they
1811 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1816 for hv_name in cluster.enabled_hypervisors:
1817 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1819 for os_name, os_hvp in cluster.os_hvp.items():
1820 for hv_name, hv_params in os_hvp.items():
1822 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1823 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1825 # TODO: collapse identical parameter values in a single one
1826 for instance in instances:
1827 if instance.hvparams:
1828 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1829 cluster.FillHV(instance)))
1834 class _VerifyErrors(object):
1835 """Mix-in for cluster/group verify LUs.
1837 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1838 self.op and self._feedback_fn to be available.)
1842 ETYPE_FIELD = "code"
1843 ETYPE_ERROR = "ERROR"
1844 ETYPE_WARNING = "WARNING"
1846 def _Error(self, ecode, item, msg, *args, **kwargs):
1847 """Format an error message.
1849 Based on the opcode's error_codes parameter, either format a
1850 parseable error code, or a simpler error string.
1852 This must be called only from Exec and functions called from Exec.
1855 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1856 itype, etxt, _ = ecode
1857 # first complete the msg
1860 # then format the whole message
1861 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1862 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1868 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1869 # and finally report it via the feedback_fn
1870 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1872 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1873 """Log an error message if the passed condition is True.
1877 or self.op.debug_simulate_errors) # pylint: disable=E1101
1879 # If the error code is in the list of ignored errors, demote the error to a
1881 (_, etxt, _) = ecode
1882 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1883 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1886 self._Error(ecode, *args, **kwargs)
1888 # do not mark the operation as failed for WARN cases only
1889 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1890 self.bad = self.bad or cond
1893 class LUClusterVerify(NoHooksLU):
1894 """Submits all jobs necessary to verify the cluster.
1899 def ExpandNames(self):
1900 self.needed_locks = {}
1902 def Exec(self, feedback_fn):
1905 if self.op.group_name:
1906 groups = [self.op.group_name]
1907 depends_fn = lambda: None
1909 groups = self.cfg.GetNodeGroupList()
1911 # Verify global configuration
1913 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1916 # Always depend on global verification
1917 depends_fn = lambda: [(-len(jobs), [])]
1920 [opcodes.OpClusterVerifyGroup(group_name=group,
1921 ignore_errors=self.op.ignore_errors,
1922 depends=depends_fn())]
1923 for group in groups)
1925 # Fix up all parameters
1926 for op in itertools.chain(*jobs): # pylint: disable=W0142
1927 op.debug_simulate_errors = self.op.debug_simulate_errors
1928 op.verbose = self.op.verbose
1929 op.error_codes = self.op.error_codes
1931 op.skip_checks = self.op.skip_checks
1932 except AttributeError:
1933 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1935 return ResultWithJobs(jobs)
1938 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1939 """Verifies the cluster config.
1944 def _VerifyHVP(self, hvp_data):
1945 """Verifies locally the syntax of the hypervisor parameters.
1948 for item, hv_name, hv_params in hvp_data:
1949 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1952 hv_class = hypervisor.GetHypervisor(hv_name)
1953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1954 hv_class.CheckParameterSyntax(hv_params)
1955 except errors.GenericError, err:
1956 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1958 def ExpandNames(self):
1959 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1960 self.share_locks = _ShareAll()
1962 def CheckPrereq(self):
1963 """Check prerequisites.
1966 # Retrieve all information
1967 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1968 self.all_node_info = self.cfg.GetAllNodesInfo()
1969 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1971 def Exec(self, feedback_fn):
1972 """Verify integrity of cluster, performing various test on nodes.
1976 self._feedback_fn = feedback_fn
1978 feedback_fn("* Verifying cluster config")
1980 for msg in self.cfg.VerifyConfig():
1981 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1983 feedback_fn("* Verifying cluster certificate files")
1985 for cert_filename in pathutils.ALL_CERT_FILES:
1986 (errcode, msg) = _VerifyCertificate(cert_filename)
1987 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1989 feedback_fn("* Verifying hypervisor parameters")
1991 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1992 self.all_inst_info.values()))
1994 feedback_fn("* Verifying all nodes belong to an existing group")
1996 # We do this verification here because, should this bogus circumstance
1997 # occur, it would never be caught by VerifyGroup, which only acts on
1998 # nodes/instances reachable from existing node groups.
2000 dangling_nodes = set(node.name for node in self.all_node_info.values()
2001 if node.group not in self.all_group_info)
2003 dangling_instances = {}
2004 no_node_instances = []
2006 for inst in self.all_inst_info.values():
2007 if inst.primary_node in dangling_nodes:
2008 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2009 elif inst.primary_node not in self.all_node_info:
2010 no_node_instances.append(inst.name)
2015 utils.CommaJoin(dangling_instances.get(node.name,
2017 for node in dangling_nodes]
2019 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2021 "the following nodes (and their instances) belong to a non"
2022 " existing group: %s", utils.CommaJoin(pretty_dangling))
2024 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2026 "the following instances have a non-existing primary-node:"
2027 " %s", utils.CommaJoin(no_node_instances))
2032 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2033 """Verifies the status of a node group.
2036 HPATH = "cluster-verify"
2037 HTYPE = constants.HTYPE_CLUSTER
2040 _HOOKS_INDENT_RE = re.compile("^", re.M)
2042 class NodeImage(object):
2043 """A class representing the logical and physical status of a node.
2046 @ivar name: the node name to which this object refers
2047 @ivar volumes: a structure as returned from
2048 L{ganeti.backend.GetVolumeList} (runtime)
2049 @ivar instances: a list of running instances (runtime)
2050 @ivar pinst: list of configured primary instances (config)
2051 @ivar sinst: list of configured secondary instances (config)
2052 @ivar sbp: dictionary of {primary-node: list of instances} for all
2053 instances for which this node is secondary (config)
2054 @ivar mfree: free memory, as reported by hypervisor (runtime)
2055 @ivar dfree: free disk, as reported by the node (runtime)
2056 @ivar offline: the offline status (config)
2057 @type rpc_fail: boolean
2058 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2059 not whether the individual keys were correct) (runtime)
2060 @type lvm_fail: boolean
2061 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2062 @type hyp_fail: boolean
2063 @ivar hyp_fail: whether the RPC call didn't return the instance list
2064 @type ghost: boolean
2065 @ivar ghost: whether this is a known node or not (config)
2066 @type os_fail: boolean
2067 @ivar os_fail: whether the RPC call didn't return valid OS data
2069 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2070 @type vm_capable: boolean
2071 @ivar vm_capable: whether the node can host instances
2074 def __init__(self, offline=False, name=None, vm_capable=True):
2083 self.offline = offline
2084 self.vm_capable = vm_capable
2085 self.rpc_fail = False
2086 self.lvm_fail = False
2087 self.hyp_fail = False
2089 self.os_fail = False
2092 def ExpandNames(self):
2093 # This raises errors.OpPrereqError on its own:
2094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2096 # Get instances in node group; this is unsafe and needs verification later
2098 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2100 self.needed_locks = {
2101 locking.LEVEL_INSTANCE: inst_names,
2102 locking.LEVEL_NODEGROUP: [self.group_uuid],
2103 locking.LEVEL_NODE: [],
2106 self.share_locks = _ShareAll()
2108 def DeclareLocks(self, level):
2109 if level == locking.LEVEL_NODE:
2110 # Get members of node group; this is unsafe and needs verification later
2111 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2113 all_inst_info = self.cfg.GetAllInstancesInfo()
2115 # In Exec(), we warn about mirrored instances that have primary and
2116 # secondary living in separate node groups. To fully verify that
2117 # volumes for these instances are healthy, we will need to do an
2118 # extra call to their secondaries. We ensure here those nodes will
2120 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2121 # Important: access only the instances whose lock is owned
2122 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2123 nodes.update(all_inst_info[inst].secondary_nodes)
2125 self.needed_locks[locking.LEVEL_NODE] = nodes
2127 def CheckPrereq(self):
2128 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2129 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2131 group_nodes = set(self.group_info.members)
2133 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2136 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2138 unlocked_instances = \
2139 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2142 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2143 utils.CommaJoin(unlocked_nodes),
2146 if unlocked_instances:
2147 raise errors.OpPrereqError("Missing lock for instances: %s" %
2148 utils.CommaJoin(unlocked_instances),
2151 self.all_node_info = self.cfg.GetAllNodesInfo()
2152 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2154 self.my_node_names = utils.NiceSort(group_nodes)
2155 self.my_inst_names = utils.NiceSort(group_instances)
2157 self.my_node_info = dict((name, self.all_node_info[name])
2158 for name in self.my_node_names)
2160 self.my_inst_info = dict((name, self.all_inst_info[name])
2161 for name in self.my_inst_names)
2163 # We detect here the nodes that will need the extra RPC calls for verifying
2164 # split LV volumes; they should be locked.
2165 extra_lv_nodes = set()
2167 for inst in self.my_inst_info.values():
2168 if inst.disk_template in constants.DTS_INT_MIRROR:
2169 for nname in inst.all_nodes:
2170 if self.all_node_info[nname].group != self.group_uuid:
2171 extra_lv_nodes.add(nname)
2173 unlocked_lv_nodes = \
2174 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2176 if unlocked_lv_nodes:
2177 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2178 utils.CommaJoin(unlocked_lv_nodes),
2180 self.extra_lv_nodes = list(extra_lv_nodes)
2182 def _VerifyNode(self, ninfo, nresult):
2183 """Perform some basic validation on data returned from a node.
2185 - check the result data structure is well formed and has all the
2187 - check ganeti version
2189 @type ninfo: L{objects.Node}
2190 @param ninfo: the node to check
2191 @param nresult: the results from the node
2193 @return: whether overall this call was successful (and we can expect
2194 reasonable values in the respose)
2198 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2200 # main result, nresult should be a non-empty dict
2201 test = not nresult or not isinstance(nresult, dict)
2202 _ErrorIf(test, constants.CV_ENODERPC, node,
2203 "unable to verify node: no data returned")
2207 # compares ganeti version
2208 local_version = constants.PROTOCOL_VERSION
2209 remote_version = nresult.get("version", None)
2210 test = not (remote_version and
2211 isinstance(remote_version, (list, tuple)) and
2212 len(remote_version) == 2)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "connection to node returned invalid data")
2218 test = local_version != remote_version[0]
2219 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2220 "incompatible protocol versions: master %s,"
2221 " node %s", local_version, remote_version[0])
2225 # node seems compatible, we can actually try to look into its results
2227 # full package version
2228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2229 constants.CV_ENODEVERSION, node,
2230 "software version mismatch: master %s, node %s",
2231 constants.RELEASE_VERSION, remote_version[1],
2232 code=self.ETYPE_WARNING)
2234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2235 if ninfo.vm_capable and isinstance(hyp_result, dict):
2236 for hv_name, hv_result in hyp_result.iteritems():
2237 test = hv_result is not None
2238 _ErrorIf(test, constants.CV_ENODEHV, node,
2239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2241 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2242 if ninfo.vm_capable and isinstance(hvp_result, list):
2243 for item, hv_name, hv_result in hvp_result:
2244 _ErrorIf(True, constants.CV_ENODEHV, node,
2245 "hypervisor %s parameter verify failure (source %s): %s",
2246 hv_name, item, hv_result)
2248 test = nresult.get(constants.NV_NODESETUP,
2249 ["Missing NODESETUP results"])
2250 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2255 def _VerifyNodeTime(self, ninfo, nresult,
2256 nvinfo_starttime, nvinfo_endtime):
2257 """Check the node time.
2259 @type ninfo: L{objects.Node}
2260 @param ninfo: the node to check
2261 @param nresult: the remote results for the node
2262 @param nvinfo_starttime: the start time of the RPC call
2263 @param nvinfo_endtime: the end time of the RPC call
2267 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2269 ntime = nresult.get(constants.NV_TIME, None)
2271 ntime_merged = utils.MergeTime(ntime)
2272 except (ValueError, TypeError):
2273 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2276 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2277 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2278 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2279 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2283 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2284 "Node time diverges by at least %s from master node time",
2287 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2288 """Check the node LVM results.
2290 @type ninfo: L{objects.Node}
2291 @param ninfo: the node to check
2292 @param nresult: the remote results for the node
2293 @param vg_name: the configured VG name
2300 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2302 # checks vg existence and size > 20G
2303 vglist = nresult.get(constants.NV_VGLIST, None)
2305 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2307 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2308 constants.MIN_VG_SIZE)
2309 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2312 pvlist = nresult.get(constants.NV_PVLIST, None)
2313 test = pvlist is None
2314 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2316 # check that ':' is not present in PV names, since it's a
2317 # special character for lvcreate (denotes the range of PEs to
2319 for _, pvname, owner_vg in pvlist:
2320 test = ":" in pvname
2321 _ErrorIf(test, constants.CV_ENODELVM, node,
2322 "Invalid character ':' in PV '%s' of VG '%s'",
2325 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2326 """Check the node bridges.
2328 @type ninfo: L{objects.Node}
2329 @param ninfo: the node to check
2330 @param nresult: the remote results for the node
2331 @param bridges: the expected list of bridges
2338 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2340 missing = nresult.get(constants.NV_BRIDGES, None)
2341 test = not isinstance(missing, list)
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "did not return valid bridge information")
2345 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2346 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2348 def _VerifyNodeUserScripts(self, ninfo, nresult):
2349 """Check the results of user scripts presence and executability on the node
2351 @type ninfo: L{objects.Node}
2352 @param ninfo: the node to check
2353 @param nresult: the remote results for the node
2358 test = not constants.NV_USERSCRIPTS in nresult
2359 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2360 "did not return user scripts information")
2362 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2364 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2365 "user scripts not present or not executable: %s" %
2366 utils.CommaJoin(sorted(broken_scripts)))
2368 def _VerifyNodeNetwork(self, ninfo, nresult):
2369 """Check the node network connectivity results.
2371 @type ninfo: L{objects.Node}
2372 @param ninfo: the node to check
2373 @param nresult: the remote results for the node
2377 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2379 test = constants.NV_NODELIST not in nresult
2380 _ErrorIf(test, constants.CV_ENODESSH, node,
2381 "node hasn't returned node ssh connectivity data")
2383 if nresult[constants.NV_NODELIST]:
2384 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2385 _ErrorIf(True, constants.CV_ENODESSH, node,
2386 "ssh communication with node '%s': %s", a_node, a_msg)
2388 test = constants.NV_NODENETTEST not in nresult
2389 _ErrorIf(test, constants.CV_ENODENET, node,
2390 "node hasn't returned node tcp connectivity data")
2392 if nresult[constants.NV_NODENETTEST]:
2393 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2395 _ErrorIf(True, constants.CV_ENODENET, node,
2396 "tcp communication with node '%s': %s",
2397 anode, nresult[constants.NV_NODENETTEST][anode])
2399 test = constants.NV_MASTERIP not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node master IP reachability data")
2403 if not nresult[constants.NV_MASTERIP]:
2404 if node == self.master_node:
2405 msg = "the master node cannot reach the master IP (not configured?)"
2407 msg = "cannot reach the master IP"
2408 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2410 def _VerifyInstance(self, instance, instanceconfig, node_image,
2412 """Verify an instance.
2414 This function checks to see if the required block devices are
2415 available on the instance's node.
2418 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2419 node_current = instanceconfig.primary_node
2421 node_vol_should = {}
2422 instanceconfig.MapLVsByNode(node_vol_should)
2424 cluster = self.cfg.GetClusterInfo()
2425 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2427 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2428 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2430 for node in node_vol_should:
2431 n_img = node_image[node]
2432 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2433 # ignore missing volumes on offline or broken nodes
2435 for volume in node_vol_should[node]:
2436 test = volume not in n_img.volumes
2437 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2438 "volume %s missing on node %s", volume, node)
2440 if instanceconfig.admin_state == constants.ADMINST_UP:
2441 pri_img = node_image[node_current]
2442 test = instance not in pri_img.instances and not pri_img.offline
2443 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2444 "instance not running on its primary node %s",
2447 diskdata = [(nname, success, status, idx)
2448 for (nname, disks) in diskstatus.items()
2449 for idx, (success, status) in enumerate(disks)]
2451 for nname, success, bdev_status, idx in diskdata:
2452 # the 'ghost node' construction in Exec() ensures that we have a
2454 snode = node_image[nname]
2455 bad_snode = snode.ghost or snode.offline
2456 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2457 not success and not bad_snode,
2458 constants.CV_EINSTANCEFAULTYDISK, instance,
2459 "couldn't retrieve status for disk/%s on %s: %s",
2460 idx, nname, bdev_status)
2461 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2462 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2463 constants.CV_EINSTANCEFAULTYDISK, instance,
2464 "disk/%s on %s is faulty", idx, nname)
2466 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2467 """Verify if there are any unknown volumes in the cluster.
2469 The .os, .swap and backup volumes are ignored. All other volumes are
2470 reported as unknown.
2472 @type reserved: L{ganeti.utils.FieldSet}
2473 @param reserved: a FieldSet of reserved volume names
2476 for node, n_img in node_image.items():
2477 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2478 self.all_node_info[node].group != self.group_uuid):
2479 # skip non-healthy nodes
2481 for volume in n_img.volumes:
2482 test = ((node not in node_vol_should or
2483 volume not in node_vol_should[node]) and
2484 not reserved.Matches(volume))
2485 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2486 "volume %s is unknown", volume)
2488 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2489 """Verify N+1 Memory Resilience.
2491 Check that if one single node dies we can still start all the
2492 instances it was primary for.
2495 cluster_info = self.cfg.GetClusterInfo()
2496 for node, n_img in node_image.items():
2497 # This code checks that every node which is now listed as
2498 # secondary has enough memory to host all instances it is
2499 # supposed to should a single other node in the cluster fail.
2500 # FIXME: not ready for failover to an arbitrary node
2501 # FIXME: does not support file-backed instances
2502 # WARNING: we currently take into account down instances as well
2503 # as up ones, considering that even if they're down someone
2504 # might want to start them even in the event of a node failure.
2505 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2506 # we're skipping nodes marked offline and nodes in other groups from
2507 # the N+1 warning, since most likely we don't have good memory
2508 # infromation from them; we already list instances living on such
2509 # nodes, and that's enough warning
2511 #TODO(dynmem): also consider ballooning out other instances
2512 for prinode, instances in n_img.sbp.items():
2514 for instance in instances:
2515 bep = cluster_info.FillBE(instance_cfg[instance])
2516 if bep[constants.BE_AUTO_BALANCE]:
2517 needed_mem += bep[constants.BE_MINMEM]
2518 test = n_img.mfree < needed_mem
2519 self._ErrorIf(test, constants.CV_ENODEN1, node,
2520 "not enough memory to accomodate instance failovers"
2521 " should node %s fail (%dMiB needed, %dMiB available)",
2522 prinode, needed_mem, n_img.mfree)
2525 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2526 (files_all, files_opt, files_mc, files_vm)):
2527 """Verifies file checksums collected from all nodes.
2529 @param errorif: Callback for reporting errors
2530 @param nodeinfo: List of L{objects.Node} objects
2531 @param master_node: Name of master node
2532 @param all_nvinfo: RPC results
2535 # Define functions determining which nodes to consider for a file
2538 (files_mc, lambda node: (node.master_candidate or
2539 node.name == master_node)),
2540 (files_vm, lambda node: node.vm_capable),
2543 # Build mapping from filename to list of nodes which should have the file
2545 for (files, fn) in files2nodefn:
2547 filenodes = nodeinfo
2549 filenodes = filter(fn, nodeinfo)
2550 nodefiles.update((filename,
2551 frozenset(map(operator.attrgetter("name"), filenodes)))
2552 for filename in files)
2554 assert set(nodefiles) == (files_all | files_mc | files_vm)
2556 fileinfo = dict((filename, {}) for filename in nodefiles)
2557 ignore_nodes = set()
2559 for node in nodeinfo:
2561 ignore_nodes.add(node.name)
2564 nresult = all_nvinfo[node.name]
2566 if nresult.fail_msg or not nresult.payload:
2569 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2570 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2571 for (key, value) in fingerprints.items())
2574 test = not (node_files and isinstance(node_files, dict))
2575 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2576 "Node did not return file checksum data")
2578 ignore_nodes.add(node.name)
2581 # Build per-checksum mapping from filename to nodes having it
2582 for (filename, checksum) in node_files.items():
2583 assert filename in nodefiles
2584 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2586 for (filename, checksums) in fileinfo.items():
2587 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2589 # Nodes having the file
2590 with_file = frozenset(node_name
2591 for nodes in fileinfo[filename].values()
2592 for node_name in nodes) - ignore_nodes
2594 expected_nodes = nodefiles[filename] - ignore_nodes
2596 # Nodes missing file
2597 missing_file = expected_nodes - with_file
2599 if filename in files_opt:
2601 errorif(missing_file and missing_file != expected_nodes,
2602 constants.CV_ECLUSTERFILECHECK, None,
2603 "File %s is optional, but it must exist on all or no"
2604 " nodes (not found on %s)",
2605 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2607 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2608 "File %s is missing from node(s) %s", filename,
2609 utils.CommaJoin(utils.NiceSort(missing_file)))
2611 # Warn if a node has a file it shouldn't
2612 unexpected = with_file - expected_nodes
2614 constants.CV_ECLUSTERFILECHECK, None,
2615 "File %s should not exist on node(s) %s",
2616 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2618 # See if there are multiple versions of the file
2619 test = len(checksums) > 1
2621 variants = ["variant %s on %s" %
2622 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2623 for (idx, (checksum, nodes)) in
2624 enumerate(sorted(checksums.items()))]
2628 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2629 "File %s found with %s different checksums (%s)",
2630 filename, len(checksums), "; ".join(variants))
2632 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2634 """Verifies and the node DRBD status.
2636 @type ninfo: L{objects.Node}
2637 @param ninfo: the node to check
2638 @param nresult: the remote results for the node
2639 @param instanceinfo: the dict of instances
2640 @param drbd_helper: the configured DRBD usermode helper
2641 @param drbd_map: the DRBD map as returned by
2642 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2646 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2649 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2650 test = (helper_result is None)
2651 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2652 "no drbd usermode helper returned")
2654 status, payload = helper_result
2656 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2657 "drbd usermode helper check unsuccessful: %s", payload)
2658 test = status and (payload != drbd_helper)
2659 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2660 "wrong drbd usermode helper: %s", payload)
2662 # compute the DRBD minors
2664 for minor, instance in drbd_map[node].items():
2665 test = instance not in instanceinfo
2666 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2667 "ghost instance '%s' in temporary DRBD map", instance)
2668 # ghost instance should not be running, but otherwise we
2669 # don't give double warnings (both ghost instance and
2670 # unallocated minor in use)
2672 node_drbd[minor] = (instance, False)
2674 instance = instanceinfo[instance]
2675 node_drbd[minor] = (instance.name,
2676 instance.admin_state == constants.ADMINST_UP)
2678 # and now check them
2679 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2680 test = not isinstance(used_minors, (tuple, list))
2681 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2682 "cannot parse drbd status file: %s", str(used_minors))
2684 # we cannot check drbd status
2687 for minor, (iname, must_exist) in node_drbd.items():
2688 test = minor not in used_minors and must_exist
2689 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2690 "drbd minor %d of instance %s is not active", minor, iname)
2691 for minor in used_minors:
2692 test = minor not in node_drbd
2693 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2694 "unallocated drbd minor %d is in use", minor)
2696 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2697 """Builds the node OS structures.
2699 @type ninfo: L{objects.Node}
2700 @param ninfo: the node to check
2701 @param nresult: the remote results for the node
2702 @param nimg: the node image object
2706 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2708 remote_os = nresult.get(constants.NV_OSLIST, None)
2709 test = (not isinstance(remote_os, list) or
2710 not compat.all(isinstance(v, list) and len(v) == 7
2711 for v in remote_os))
2713 _ErrorIf(test, constants.CV_ENODEOS, node,
2714 "node hasn't returned valid OS data")
2723 for (name, os_path, status, diagnose,
2724 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2726 if name not in os_dict:
2729 # parameters is a list of lists instead of list of tuples due to
2730 # JSON lacking a real tuple type, fix it:
2731 parameters = [tuple(v) for v in parameters]
2732 os_dict[name].append((os_path, status, diagnose,
2733 set(variants), set(parameters), set(api_ver)))
2735 nimg.oslist = os_dict
2737 def _VerifyNodeOS(self, ninfo, nimg, base):
2738 """Verifies the node OS list.
2740 @type ninfo: L{objects.Node}
2741 @param ninfo: the node to check
2742 @param nimg: the node image object
2743 @param base: the 'template' node we match against (e.g. from the master)
2747 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2749 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2751 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2752 for os_name, os_data in nimg.oslist.items():
2753 assert os_data, "Empty OS status for OS %s?!" % os_name
2754 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2755 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2756 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2757 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2758 "OS '%s' has multiple entries (first one shadows the rest): %s",
2759 os_name, utils.CommaJoin([v[0] for v in os_data]))
2760 # comparisons with the 'base' image
2761 test = os_name not in base.oslist
2762 _ErrorIf(test, constants.CV_ENODEOS, node,
2763 "Extra OS %s not present on reference node (%s)",
2767 assert base.oslist[os_name], "Base node has empty OS status?"
2768 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2770 # base OS is invalid, skipping
2772 for kind, a, b in [("API version", f_api, b_api),
2773 ("variants list", f_var, b_var),
2774 ("parameters", beautify_params(f_param),
2775 beautify_params(b_param))]:
2776 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2777 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2778 kind, os_name, base.name,
2779 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2781 # check any missing OSes
2782 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2783 _ErrorIf(missing, constants.CV_ENODEOS, node,
2784 "OSes present on reference node %s but missing on this node: %s",
2785 base.name, utils.CommaJoin(missing))
2787 def _VerifyOob(self, ninfo, nresult):
2788 """Verifies out of band functionality of a node.
2790 @type ninfo: L{objects.Node}
2791 @param ninfo: the node to check
2792 @param nresult: the remote results for the node
2796 # We just have to verify the paths on master and/or master candidates
2797 # as the oob helper is invoked on the master
2798 if ((ninfo.master_candidate or ninfo.master_capable) and
2799 constants.NV_OOB_PATHS in nresult):
2800 for path_result in nresult[constants.NV_OOB_PATHS]:
2801 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2803 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2804 """Verifies and updates the node volume data.
2806 This function will update a L{NodeImage}'s internal structures
2807 with data from the remote call.
2809 @type ninfo: L{objects.Node}
2810 @param ninfo: the node to check
2811 @param nresult: the remote results for the node
2812 @param nimg: the node image object
2813 @param vg_name: the configured VG name
2817 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2819 nimg.lvm_fail = True
2820 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2823 elif isinstance(lvdata, basestring):
2824 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2825 utils.SafeEncode(lvdata))
2826 elif not isinstance(lvdata, dict):
2827 _ErrorIf(True, constants.CV_ENODELVM, node,
2828 "rpc call to node failed (lvlist)")
2830 nimg.volumes = lvdata
2831 nimg.lvm_fail = False
2833 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2834 """Verifies and updates the node instance list.
2836 If the listing was successful, then updates this node's instance
2837 list. Otherwise, it marks the RPC call as failed for the instance
2840 @type ninfo: L{objects.Node}
2841 @param ninfo: the node to check
2842 @param nresult: the remote results for the node
2843 @param nimg: the node image object
2846 idata = nresult.get(constants.NV_INSTANCELIST, None)
2847 test = not isinstance(idata, list)
2848 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2849 "rpc call to node failed (instancelist): %s",
2850 utils.SafeEncode(str(idata)))
2852 nimg.hyp_fail = True
2854 nimg.instances = idata
2856 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2857 """Verifies and computes a node information map
2859 @type ninfo: L{objects.Node}
2860 @param ninfo: the node to check
2861 @param nresult: the remote results for the node
2862 @param nimg: the node image object
2863 @param vg_name: the configured VG name
2867 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2869 # try to read free memory (from the hypervisor)
2870 hv_info = nresult.get(constants.NV_HVINFO, None)
2871 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2872 _ErrorIf(test, constants.CV_ENODEHV, node,
2873 "rpc call to node failed (hvinfo)")
2876 nimg.mfree = int(hv_info["memory_free"])
2877 except (ValueError, TypeError):
2878 _ErrorIf(True, constants.CV_ENODERPC, node,
2879 "node returned invalid nodeinfo, check hypervisor")
2881 # FIXME: devise a free space model for file based instances as well
2882 if vg_name is not None:
2883 test = (constants.NV_VGLIST not in nresult or
2884 vg_name not in nresult[constants.NV_VGLIST])
2885 _ErrorIf(test, constants.CV_ENODELVM, node,
2886 "node didn't return data for the volume group '%s'"
2887 " - it is either missing or broken", vg_name)
2890 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2891 except (ValueError, TypeError):
2892 _ErrorIf(True, constants.CV_ENODERPC, node,
2893 "node returned invalid LVM info, check LVM status")
2895 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2896 """Gets per-disk status information for all instances.
2898 @type nodelist: list of strings
2899 @param nodelist: Node names
2900 @type node_image: dict of (name, L{objects.Node})
2901 @param node_image: Node objects
2902 @type instanceinfo: dict of (name, L{objects.Instance})
2903 @param instanceinfo: Instance objects
2904 @rtype: {instance: {node: [(succes, payload)]}}
2905 @return: a dictionary of per-instance dictionaries with nodes as
2906 keys and disk information as values; the disk information is a
2907 list of tuples (success, payload)
2910 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2913 node_disks_devonly = {}
2914 diskless_instances = set()
2915 diskless = constants.DT_DISKLESS
2917 for nname in nodelist:
2918 node_instances = list(itertools.chain(node_image[nname].pinst,
2919 node_image[nname].sinst))
2920 diskless_instances.update(inst for inst in node_instances
2921 if instanceinfo[inst].disk_template == diskless)
2922 disks = [(inst, disk)
2923 for inst in node_instances
2924 for disk in instanceinfo[inst].disks]
2927 # No need to collect data
2930 node_disks[nname] = disks
2932 # _AnnotateDiskParams makes already copies of the disks
2934 for (inst, dev) in disks:
2935 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2936 self.cfg.SetDiskID(anno_disk, nname)
2937 devonly.append(anno_disk)
2939 node_disks_devonly[nname] = devonly
2941 assert len(node_disks) == len(node_disks_devonly)
2943 # Collect data from all nodes with disks
2944 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2947 assert len(result) == len(node_disks)
2951 for (nname, nres) in result.items():
2952 disks = node_disks[nname]
2955 # No data from this node
2956 data = len(disks) * [(False, "node offline")]
2959 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2960 "while getting disk information: %s", msg)
2962 # No data from this node
2963 data = len(disks) * [(False, msg)]
2966 for idx, i in enumerate(nres.payload):
2967 if isinstance(i, (tuple, list)) and len(i) == 2:
2970 logging.warning("Invalid result from node %s, entry %d: %s",
2972 data.append((False, "Invalid result from the remote node"))
2974 for ((inst, _), status) in zip(disks, data):
2975 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2977 # Add empty entries for diskless instances.
2978 for inst in diskless_instances:
2979 assert inst not in instdisk
2982 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2983 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2984 compat.all(isinstance(s, (tuple, list)) and
2985 len(s) == 2 for s in statuses)
2986 for inst, nnames in instdisk.items()
2987 for nname, statuses in nnames.items())
2988 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2993 def _SshNodeSelector(group_uuid, all_nodes):
2994 """Create endless iterators for all potential SSH check hosts.
2997 nodes = [node for node in all_nodes
2998 if (node.group != group_uuid and
3000 keyfunc = operator.attrgetter("group")
3002 return map(itertools.cycle,
3003 [sorted(map(operator.attrgetter("name"), names))
3004 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3008 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3009 """Choose which nodes should talk to which other nodes.
3011 We will make nodes contact all nodes in their group, and one node from
3014 @warning: This algorithm has a known issue if one node group is much
3015 smaller than others (e.g. just one node). In such a case all other
3016 nodes will talk to the single node.
3019 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3020 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3022 return (online_nodes,
3023 dict((name, sorted([i.next() for i in sel]))
3024 for name in online_nodes))
3026 def BuildHooksEnv(self):
3029 Cluster-Verify hooks just ran in the post phase and their failure makes
3030 the output be logged in the verify output and the verification to fail.
3034 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3037 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3038 for node in self.my_node_info.values())
3042 def BuildHooksNodes(self):
3043 """Build hooks nodes.
3046 return ([], self.my_node_names)
3048 def Exec(self, feedback_fn):
3049 """Verify integrity of the node group, performing various test on nodes.
3052 # This method has too many local variables. pylint: disable=R0914
3053 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3055 if not self.my_node_names:
3057 feedback_fn("* Empty node group, skipping verification")
3061 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3062 verbose = self.op.verbose
3063 self._feedback_fn = feedback_fn
3065 vg_name = self.cfg.GetVGName()
3066 drbd_helper = self.cfg.GetDRBDHelper()
3067 cluster = self.cfg.GetClusterInfo()
3068 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3069 hypervisors = cluster.enabled_hypervisors
3070 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3072 i_non_redundant = [] # Non redundant instances
3073 i_non_a_balanced = [] # Non auto-balanced instances
3074 i_offline = 0 # Count of offline instances
3075 n_offline = 0 # Count of offline nodes
3076 n_drained = 0 # Count of nodes being drained
3077 node_vol_should = {}
3079 # FIXME: verify OS list
3082 filemap = _ComputeAncillaryFiles(cluster, False)
3084 # do local checksums
3085 master_node = self.master_node = self.cfg.GetMasterNode()
3086 master_ip = self.cfg.GetMasterIP()
3088 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3091 if self.cfg.GetUseExternalMipScript():
3092 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3094 node_verify_param = {
3095 constants.NV_FILELIST:
3096 map(vcluster.MakeVirtualPath,
3097 utils.UniqueSequence(filename
3098 for files in filemap
3099 for filename in files)),
3100 constants.NV_NODELIST:
3101 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3102 self.all_node_info.values()),
3103 constants.NV_HYPERVISOR: hypervisors,
3104 constants.NV_HVPARAMS:
3105 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3106 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3107 for node in node_data_list
3108 if not node.offline],
3109 constants.NV_INSTANCELIST: hypervisors,
3110 constants.NV_VERSION: None,
3111 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3112 constants.NV_NODESETUP: None,
3113 constants.NV_TIME: None,
3114 constants.NV_MASTERIP: (master_node, master_ip),
3115 constants.NV_OSLIST: None,
3116 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3117 constants.NV_USERSCRIPTS: user_scripts,
3120 if vg_name is not None:
3121 node_verify_param[constants.NV_VGLIST] = None
3122 node_verify_param[constants.NV_LVLIST] = vg_name
3123 node_verify_param[constants.NV_PVLIST] = [vg_name]
3126 node_verify_param[constants.NV_DRBDLIST] = None
3127 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3130 # FIXME: this needs to be changed per node-group, not cluster-wide
3132 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3133 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3134 bridges.add(default_nicpp[constants.NIC_LINK])
3135 for instance in self.my_inst_info.values():
3136 for nic in instance.nics:
3137 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3138 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3139 bridges.add(full_nic[constants.NIC_LINK])
3142 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3144 # Build our expected cluster state
3145 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3147 vm_capable=node.vm_capable))
3148 for node in node_data_list)
3152 for node in self.all_node_info.values():
3153 path = _SupportsOob(self.cfg, node)
3154 if path and path not in oob_paths:
3155 oob_paths.append(path)
3158 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3160 for instance in self.my_inst_names:
3161 inst_config = self.my_inst_info[instance]
3162 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3165 for nname in inst_config.all_nodes:
3166 if nname not in node_image:
3167 gnode = self.NodeImage(name=nname)
3168 gnode.ghost = (nname not in self.all_node_info)
3169 node_image[nname] = gnode
3171 inst_config.MapLVsByNode(node_vol_should)
3173 pnode = inst_config.primary_node
3174 node_image[pnode].pinst.append(instance)
3176 for snode in inst_config.secondary_nodes:
3177 nimg = node_image[snode]
3178 nimg.sinst.append(instance)
3179 if pnode not in nimg.sbp:
3180 nimg.sbp[pnode] = []
3181 nimg.sbp[pnode].append(instance)
3183 # At this point, we have the in-memory data structures complete,
3184 # except for the runtime information, which we'll gather next
3186 # Due to the way our RPC system works, exact response times cannot be
3187 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3188 # time before and after executing the request, we can at least have a time
3190 nvinfo_starttime = time.time()
3191 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3193 self.cfg.GetClusterName())
3194 nvinfo_endtime = time.time()
3196 if self.extra_lv_nodes and vg_name is not None:
3198 self.rpc.call_node_verify(self.extra_lv_nodes,
3199 {constants.NV_LVLIST: vg_name},
3200 self.cfg.GetClusterName())
3202 extra_lv_nvinfo = {}
3204 all_drbd_map = self.cfg.ComputeDRBDMap()
3206 feedback_fn("* Gathering disk information (%s nodes)" %
3207 len(self.my_node_names))
3208 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3211 feedback_fn("* Verifying configuration file consistency")
3213 # If not all nodes are being checked, we need to make sure the master node
3214 # and a non-checked vm_capable node are in the list.
3215 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3217 vf_nvinfo = all_nvinfo.copy()
3218 vf_node_info = list(self.my_node_info.values())
3219 additional_nodes = []
3220 if master_node not in self.my_node_info:
3221 additional_nodes.append(master_node)
3222 vf_node_info.append(self.all_node_info[master_node])
3223 # Add the first vm_capable node we find which is not included,
3224 # excluding the master node (which we already have)
3225 for node in absent_nodes:
3226 nodeinfo = self.all_node_info[node]
3227 if (nodeinfo.vm_capable and not nodeinfo.offline and
3228 node != master_node):
3229 additional_nodes.append(node)
3230 vf_node_info.append(self.all_node_info[node])
3232 key = constants.NV_FILELIST
3233 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3234 {key: node_verify_param[key]},
3235 self.cfg.GetClusterName()))
3237 vf_nvinfo = all_nvinfo
3238 vf_node_info = self.my_node_info.values()
3240 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3242 feedback_fn("* Verifying node status")
3246 for node_i in node_data_list:
3248 nimg = node_image[node]
3252 feedback_fn("* Skipping offline node %s" % (node,))
3256 if node == master_node:
3258 elif node_i.master_candidate:
3259 ntype = "master candidate"
3260 elif node_i.drained:
3266 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3268 msg = all_nvinfo[node].fail_msg
3269 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3272 nimg.rpc_fail = True
3275 nresult = all_nvinfo[node].payload
3277 nimg.call_ok = self._VerifyNode(node_i, nresult)
3278 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3279 self._VerifyNodeNetwork(node_i, nresult)
3280 self._VerifyNodeUserScripts(node_i, nresult)
3281 self._VerifyOob(node_i, nresult)
3284 self._VerifyNodeLVM(node_i, nresult, vg_name)
3285 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3288 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3289 self._UpdateNodeInstances(node_i, nresult, nimg)
3290 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3291 self._UpdateNodeOS(node_i, nresult, nimg)
3293 if not nimg.os_fail:
3294 if refos_img is None:
3296 self._VerifyNodeOS(node_i, nimg, refos_img)
3297 self._VerifyNodeBridges(node_i, nresult, bridges)
3299 # Check whether all running instancies are primary for the node. (This
3300 # can no longer be done from _VerifyInstance below, since some of the
3301 # wrong instances could be from other node groups.)
3302 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3304 for inst in non_primary_inst:
3305 test = inst in self.all_inst_info
3306 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3307 "instance should not run on node %s", node_i.name)
3308 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3309 "node is running unknown instance %s", inst)
3311 for node, result in extra_lv_nvinfo.items():
3312 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3313 node_image[node], vg_name)
3315 feedback_fn("* Verifying instance status")
3316 for instance in self.my_inst_names:
3318 feedback_fn("* Verifying instance %s" % instance)
3319 inst_config = self.my_inst_info[instance]
3320 self._VerifyInstance(instance, inst_config, node_image,
3322 inst_nodes_offline = []
3324 pnode = inst_config.primary_node
3325 pnode_img = node_image[pnode]
3326 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3327 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3328 " primary node failed", instance)
3330 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3332 constants.CV_EINSTANCEBADNODE, instance,
3333 "instance is marked as running and lives on offline node %s",
3334 inst_config.primary_node)
3336 # If the instance is non-redundant we cannot survive losing its primary
3337 # node, so we are not N+1 compliant. On the other hand we have no disk
3338 # templates with more than one secondary so that situation is not well
3340 # FIXME: does not support file-backed instances
3341 if not inst_config.secondary_nodes:
3342 i_non_redundant.append(instance)
3344 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3345 constants.CV_EINSTANCELAYOUT,
3346 instance, "instance has multiple secondary nodes: %s",
3347 utils.CommaJoin(inst_config.secondary_nodes),
3348 code=self.ETYPE_WARNING)
3350 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3351 pnode = inst_config.primary_node
3352 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3353 instance_groups = {}
3355 for node in instance_nodes:
3356 instance_groups.setdefault(self.all_node_info[node].group,
3360 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3361 # Sort so that we always list the primary node first.
3362 for group, nodes in sorted(instance_groups.items(),
3363 key=lambda (_, nodes): pnode in nodes,
3366 self._ErrorIf(len(instance_groups) > 1,
3367 constants.CV_EINSTANCESPLITGROUPS,
3368 instance, "instance has primary and secondary nodes in"
3369 " different groups: %s", utils.CommaJoin(pretty_list),
3370 code=self.ETYPE_WARNING)
3372 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3373 i_non_a_balanced.append(instance)
3375 for snode in inst_config.secondary_nodes:
3376 s_img = node_image[snode]
3377 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3378 snode, "instance %s, connection to secondary node failed",
3382 inst_nodes_offline.append(snode)
3384 # warn that the instance lives on offline nodes
3385 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3386 "instance has offline secondary node(s) %s",
3387 utils.CommaJoin(inst_nodes_offline))
3388 # ... or ghost/non-vm_capable nodes
3389 for node in inst_config.all_nodes:
3390 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3391 instance, "instance lives on ghost node %s", node)
3392 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3393 instance, "instance lives on non-vm_capable node %s", node)
3395 feedback_fn("* Verifying orphan volumes")
3396 reserved = utils.FieldSet(*cluster.reserved_lvs)
3398 # We will get spurious "unknown volume" warnings if any node of this group
3399 # is secondary for an instance whose primary is in another group. To avoid
3400 # them, we find these instances and add their volumes to node_vol_should.
3401 for inst in self.all_inst_info.values():
3402 for secondary in inst.secondary_nodes:
3403 if (secondary in self.my_node_info
3404 and inst.name not in self.my_inst_info):
3405 inst.MapLVsByNode(node_vol_should)
3408 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3410 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3411 feedback_fn("* Verifying N+1 Memory redundancy")
3412 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3414 feedback_fn("* Other Notes")
3416 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3417 % len(i_non_redundant))
3419 if i_non_a_balanced:
3420 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3421 % len(i_non_a_balanced))
3424 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3427 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3430 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3434 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3435 """Analyze the post-hooks' result
3437 This method analyses the hook result, handles it, and sends some
3438 nicely-formatted feedback back to the user.
3440 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3441 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3442 @param hooks_results: the results of the multi-node hooks rpc call
3443 @param feedback_fn: function used send feedback back to the caller
3444 @param lu_result: previous Exec result
3445 @return: the new Exec result, based on the previous result
3449 # We only really run POST phase hooks, only for non-empty groups,
3450 # and are only interested in their results
3451 if not self.my_node_names:
3454 elif phase == constants.HOOKS_PHASE_POST:
3455 # Used to change hooks' output to proper indentation
3456 feedback_fn("* Hooks Results")
3457 assert hooks_results, "invalid result from hooks"
3459 for node_name in hooks_results:
3460 res = hooks_results[node_name]
3462 test = msg and not res.offline
3463 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3464 "Communication failure in hooks execution: %s", msg)
3465 if res.offline or msg:
3466 # No need to investigate payload if node is offline or gave
3469 for script, hkr, output in res.payload:
3470 test = hkr == constants.HKR_FAIL
3471 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3472 "Script %s failed, output:", script)
3474 output = self._HOOKS_INDENT_RE.sub(" ", output)
3475 feedback_fn("%s" % output)
3481 class LUClusterVerifyDisks(NoHooksLU):
3482 """Verifies the cluster disks status.
3487 def ExpandNames(self):
3488 self.share_locks = _ShareAll()
3489 self.needed_locks = {
3490 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3493 def Exec(self, feedback_fn):
3494 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3496 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3497 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3498 for group in group_names])
3501 class LUGroupVerifyDisks(NoHooksLU):
3502 """Verifies the status of all disks in a node group.
3507 def ExpandNames(self):
3508 # Raises errors.OpPrereqError on its own if group can't be found
3509 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3511 self.share_locks = _ShareAll()
3512 self.needed_locks = {
3513 locking.LEVEL_INSTANCE: [],
3514 locking.LEVEL_NODEGROUP: [],
3515 locking.LEVEL_NODE: [],
3518 def DeclareLocks(self, level):
3519 if level == locking.LEVEL_INSTANCE:
3520 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3522 # Lock instances optimistically, needs verification once node and group
3523 # locks have been acquired
3524 self.needed_locks[locking.LEVEL_INSTANCE] = \
3525 self.cfg.GetNodeGroupInstances(self.group_uuid)
3527 elif level == locking.LEVEL_NODEGROUP:
3528 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3530 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3531 set([self.group_uuid] +
3532 # Lock all groups used by instances optimistically; this requires
3533 # going via the node before it's locked, requiring verification
3536 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3537 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3539 elif level == locking.LEVEL_NODE:
3540 # This will only lock the nodes in the group to be verified which contain
3542 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3543 self._LockInstancesNodes()
3545 # Lock all nodes in group to be verified
3546 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3547 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3548 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3550 def CheckPrereq(self):
3551 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3552 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3553 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3555 assert self.group_uuid in owned_groups
3557 # Check if locked instances are still correct
3558 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3560 # Get instance information
3561 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3563 # Check if node groups for locked instances are still correct
3564 _CheckInstancesNodeGroups(self.cfg, self.instances,
3565 owned_groups, owned_nodes, self.group_uuid)
3567 def Exec(self, feedback_fn):
3568 """Verify integrity of cluster disks.
3570 @rtype: tuple of three items
3571 @return: a tuple of (dict of node-to-node_error, list of instances
3572 which need activate-disks, dict of instance: (node, volume) for
3577 res_instances = set()
3580 nv_dict = _MapInstanceDisksToNodes(
3581 [inst for inst in self.instances.values()
3582 if inst.admin_state == constants.ADMINST_UP])
3585 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3586 set(self.cfg.GetVmCapableNodeList()))
3588 node_lvs = self.rpc.call_lv_list(nodes, [])
3590 for (node, node_res) in node_lvs.items():
3591 if node_res.offline:
3594 msg = node_res.fail_msg
3596 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3597 res_nodes[node] = msg
3600 for lv_name, (_, _, lv_online) in node_res.payload.items():
3601 inst = nv_dict.pop((node, lv_name), None)
3602 if not (lv_online or inst is None):
3603 res_instances.add(inst)
3605 # any leftover items in nv_dict are missing LVs, let's arrange the data
3607 for key, inst in nv_dict.iteritems():
3608 res_missing.setdefault(inst, []).append(list(key))
3610 return (res_nodes, list(res_instances), res_missing)
3613 class LUClusterRepairDiskSizes(NoHooksLU):
3614 """Verifies the cluster disks sizes.
3619 def ExpandNames(self):
3620 if self.op.instances:
3621 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3622 self.needed_locks = {
3623 locking.LEVEL_NODE_RES: [],
3624 locking.LEVEL_INSTANCE: self.wanted_names,
3626 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3628 self.wanted_names = None
3629 self.needed_locks = {
3630 locking.LEVEL_NODE_RES: locking.ALL_SET,
3631 locking.LEVEL_INSTANCE: locking.ALL_SET,
3633 self.share_locks = {
3634 locking.LEVEL_NODE_RES: 1,
3635 locking.LEVEL_INSTANCE: 0,
3638 def DeclareLocks(self, level):
3639 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3640 self._LockInstancesNodes(primary_only=True, level=level)
3642 def CheckPrereq(self):
3643 """Check prerequisites.
3645 This only checks the optional instance list against the existing names.
3648 if self.wanted_names is None:
3649 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3651 self.wanted_instances = \
3652 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3654 def _EnsureChildSizes(self, disk):
3655 """Ensure children of the disk have the needed disk size.
3657 This is valid mainly for DRBD8 and fixes an issue where the
3658 children have smaller disk size.
3660 @param disk: an L{ganeti.objects.Disk} object
3663 if disk.dev_type == constants.LD_DRBD8:
3664 assert disk.children, "Empty children for DRBD8?"
3665 fchild = disk.children[0]
3666 mismatch = fchild.size < disk.size
3668 self.LogInfo("Child disk has size %d, parent %d, fixing",
3669 fchild.size, disk.size)
3670 fchild.size = disk.size
3672 # and we recurse on this child only, not on the metadev
3673 return self._EnsureChildSizes(fchild) or mismatch
3677 def Exec(self, feedback_fn):
3678 """Verify the size of cluster disks.
3681 # TODO: check child disks too
3682 # TODO: check differences in size between primary/secondary nodes
3684 for instance in self.wanted_instances:
3685 pnode = instance.primary_node
3686 if pnode not in per_node_disks:
3687 per_node_disks[pnode] = []
3688 for idx, disk in enumerate(instance.disks):
3689 per_node_disks[pnode].append((instance, idx, disk))
3691 assert not (frozenset(per_node_disks.keys()) -
3692 self.owned_locks(locking.LEVEL_NODE_RES)), \
3693 "Not owning correct locks"
3694 assert not self.owned_locks(locking.LEVEL_NODE)
3697 for node, dskl in per_node_disks.items():
3698 newl = [v[2].Copy() for v in dskl]
3700 self.cfg.SetDiskID(dsk, node)
3701 result = self.rpc.call_blockdev_getsize(node, newl)
3703 self.LogWarning("Failure in blockdev_getsize call to node"
3704 " %s, ignoring", node)
3706 if len(result.payload) != len(dskl):
3707 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3708 " result.payload=%s", node, len(dskl), result.payload)
3709 self.LogWarning("Invalid result from node %s, ignoring node results",
3712 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3714 self.LogWarning("Disk %d of instance %s did not return size"
3715 " information, ignoring", idx, instance.name)
3717 if not isinstance(size, (int, long)):
3718 self.LogWarning("Disk %d of instance %s did not return valid"
3719 " size information, ignoring", idx, instance.name)
3722 if size != disk.size:
3723 self.LogInfo("Disk %d of instance %s has mismatched size,"
3724 " correcting: recorded %d, actual %d", idx,
3725 instance.name, disk.size, size)
3727 self.cfg.Update(instance, feedback_fn)
3728 changed.append((instance.name, idx, size))
3729 if self._EnsureChildSizes(disk):
3730 self.cfg.Update(instance, feedback_fn)
3731 changed.append((instance.name, idx, disk.size))
3735 class LUClusterRename(LogicalUnit):
3736 """Rename the cluster.
3739 HPATH = "cluster-rename"
3740 HTYPE = constants.HTYPE_CLUSTER
3742 def BuildHooksEnv(self):
3747 "OP_TARGET": self.cfg.GetClusterName(),
3748 "NEW_NAME": self.op.name,
3751 def BuildHooksNodes(self):
3752 """Build hooks nodes.
3755 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3757 def CheckPrereq(self):
3758 """Verify that the passed name is a valid one.
3761 hostname = netutils.GetHostname(name=self.op.name,
3762 family=self.cfg.GetPrimaryIPFamily())
3764 new_name = hostname.name
3765 self.ip = new_ip = hostname.ip
3766 old_name = self.cfg.GetClusterName()
3767 old_ip = self.cfg.GetMasterIP()
3768 if new_name == old_name and new_ip == old_ip:
3769 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3770 " cluster has changed",
3772 if new_ip != old_ip:
3773 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3774 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3775 " reachable on the network" %
3776 new_ip, errors.ECODE_NOTUNIQUE)
3778 self.op.name = new_name
3780 def Exec(self, feedback_fn):
3781 """Rename the cluster.
3784 clustername = self.op.name
3787 # shutdown the master IP
3788 master_params = self.cfg.GetMasterNetworkParameters()
3789 ems = self.cfg.GetUseExternalMipScript()
3790 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3792 result.Raise("Could not disable the master role")
3795 cluster = self.cfg.GetClusterInfo()
3796 cluster.cluster_name = clustername
3797 cluster.master_ip = new_ip
3798 self.cfg.Update(cluster, feedback_fn)
3800 # update the known hosts file
3801 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3802 node_list = self.cfg.GetOnlineNodeList()
3804 node_list.remove(master_params.name)
3807 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3809 master_params.ip = new_ip
3810 result = self.rpc.call_node_activate_master_ip(master_params.name,
3812 msg = result.fail_msg
3814 self.LogWarning("Could not re-enable the master role on"
3815 " the master, please restart manually: %s", msg)
3820 def _ValidateNetmask(cfg, netmask):
3821 """Checks if a netmask is valid.
3823 @type cfg: L{config.ConfigWriter}
3824 @param cfg: The cluster configuration
3826 @param netmask: the netmask to be verified
3827 @raise errors.OpPrereqError: if the validation fails
3830 ip_family = cfg.GetPrimaryIPFamily()
3832 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3833 except errors.ProgrammerError:
3834 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3835 ip_family, errors.ECODE_INVAL)
3836 if not ipcls.ValidateNetmask(netmask):
3837 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3838 (netmask), errors.ECODE_INVAL)
3841 class LUClusterSetParams(LogicalUnit):
3842 """Change the parameters of the cluster.
3845 HPATH = "cluster-modify"
3846 HTYPE = constants.HTYPE_CLUSTER
3849 def CheckArguments(self):
3853 if self.op.uid_pool:
3854 uidpool.CheckUidPool(self.op.uid_pool)
3856 if self.op.add_uids:
3857 uidpool.CheckUidPool(self.op.add_uids)
3859 if self.op.remove_uids:
3860 uidpool.CheckUidPool(self.op.remove_uids)
3862 if self.op.master_netmask is not None:
3863 _ValidateNetmask(self.cfg, self.op.master_netmask)
3865 if self.op.diskparams:
3866 for dt_params in self.op.diskparams.values():
3867 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3869 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3870 except errors.OpPrereqError, err:
3871 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3874 def ExpandNames(self):
3875 # FIXME: in the future maybe other cluster params won't require checking on
3876 # all nodes to be modified.
3877 self.needed_locks = {
3878 locking.LEVEL_NODE: locking.ALL_SET,
3879 locking.LEVEL_INSTANCE: locking.ALL_SET,
3880 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3882 self.share_locks = {
3883 locking.LEVEL_NODE: 1,
3884 locking.LEVEL_INSTANCE: 1,
3885 locking.LEVEL_NODEGROUP: 1,
3888 def BuildHooksEnv(self):
3893 "OP_TARGET": self.cfg.GetClusterName(),
3894 "NEW_VG_NAME": self.op.vg_name,
3897 def BuildHooksNodes(self):
3898 """Build hooks nodes.
3901 mn = self.cfg.GetMasterNode()
3904 def CheckPrereq(self):
3905 """Check prerequisites.
3907 This checks whether the given params don't conflict and
3908 if the given volume group is valid.
3911 if self.op.vg_name is not None and not self.op.vg_name:
3912 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3913 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3914 " instances exist", errors.ECODE_INVAL)
3916 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3917 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3918 raise errors.OpPrereqError("Cannot disable drbd helper while"
3919 " drbd-based instances exist",
3922 node_list = self.owned_locks(locking.LEVEL_NODE)
3924 # if vg_name not None, checks given volume group on all nodes
3926 vglist = self.rpc.call_vg_list(node_list)
3927 for node in node_list:
3928 msg = vglist[node].fail_msg
3930 # ignoring down node
3931 self.LogWarning("Error while gathering data on node %s"
3932 " (ignoring node): %s", node, msg)
3934 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3936 constants.MIN_VG_SIZE)
3938 raise errors.OpPrereqError("Error on node '%s': %s" %
3939 (node, vgstatus), errors.ECODE_ENVIRON)
3941 if self.op.drbd_helper:
3942 # checks given drbd helper on all nodes
3943 helpers = self.rpc.call_drbd_helper(node_list)
3944 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3946 self.LogInfo("Not checking drbd helper on offline node %s", node)
3948 msg = helpers[node].fail_msg
3950 raise errors.OpPrereqError("Error checking drbd helper on node"
3951 " '%s': %s" % (node, msg),
3952 errors.ECODE_ENVIRON)
3953 node_helper = helpers[node].payload
3954 if node_helper != self.op.drbd_helper:
3955 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3956 (node, node_helper), errors.ECODE_ENVIRON)
3958 self.cluster = cluster = self.cfg.GetClusterInfo()
3959 # validate params changes
3960 if self.op.beparams:
3961 objects.UpgradeBeParams(self.op.beparams)
3962 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3963 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3965 if self.op.ndparams:
3966 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3967 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3969 # TODO: we need a more general way to handle resetting
3970 # cluster-level parameters to default values
3971 if self.new_ndparams["oob_program"] == "":
3972 self.new_ndparams["oob_program"] = \
3973 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3975 if self.op.hv_state:
3976 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3977 self.cluster.hv_state_static)
3978 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3979 for hv, values in new_hv_state.items())
3981 if self.op.disk_state:
3982 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3983 self.cluster.disk_state_static)
3984 self.new_disk_state = \
3985 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3986 for name, values in svalues.items()))
3987 for storage, svalues in new_disk_state.items())
3990 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3993 all_instances = self.cfg.GetAllInstancesInfo().values()
3995 for group in self.cfg.GetAllNodeGroupsInfo().values():
3996 instances = frozenset([inst for inst in all_instances
3997 if compat.any(node in group.members
3998 for node in inst.all_nodes)])
3999 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4000 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4001 new = _ComputeNewInstanceViolations(ipol,
4002 new_ipolicy, instances)
4004 violations.update(new)
4007 self.LogWarning("After the ipolicy change the following instances"
4008 " violate them: %s",
4009 utils.CommaJoin(utils.NiceSort(violations)))
4011 if self.op.nicparams:
4012 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4013 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4014 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4017 # check all instances for consistency
4018 for instance in self.cfg.GetAllInstancesInfo().values():
4019 for nic_idx, nic in enumerate(instance.nics):
4020 params_copy = copy.deepcopy(nic.nicparams)
4021 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4023 # check parameter syntax
4025 objects.NIC.CheckParameterSyntax(params_filled)
4026 except errors.ConfigurationError, err:
4027 nic_errors.append("Instance %s, nic/%d: %s" %
4028 (instance.name, nic_idx, err))
4030 # if we're moving instances to routed, check that they have an ip
4031 target_mode = params_filled[constants.NIC_MODE]
4032 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4033 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4034 " address" % (instance.name, nic_idx))
4036 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4037 "\n".join(nic_errors), errors.ECODE_INVAL)
4039 # hypervisor list/parameters
4040 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4041 if self.op.hvparams:
4042 for hv_name, hv_dict in self.op.hvparams.items():
4043 if hv_name not in self.new_hvparams:
4044 self.new_hvparams[hv_name] = hv_dict
4046 self.new_hvparams[hv_name].update(hv_dict)
4048 # disk template parameters
4049 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4050 if self.op.diskparams:
4051 for dt_name, dt_params in self.op.diskparams.items():
4052 if dt_name not in self.op.diskparams:
4053 self.new_diskparams[dt_name] = dt_params
4055 self.new_diskparams[dt_name].update(dt_params)
4057 # os hypervisor parameters
4058 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4060 for os_name, hvs in self.op.os_hvp.items():
4061 if os_name not in self.new_os_hvp:
4062 self.new_os_hvp[os_name] = hvs
4064 for hv_name, hv_dict in hvs.items():
4065 if hv_name not in self.new_os_hvp[os_name]:
4066 self.new_os_hvp[os_name][hv_name] = hv_dict
4068 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4071 self.new_osp = objects.FillDict(cluster.osparams, {})
4072 if self.op.osparams:
4073 for os_name, osp in self.op.osparams.items():
4074 if os_name not in self.new_osp:
4075 self.new_osp[os_name] = {}
4077 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4080 if not self.new_osp[os_name]:
4081 # we removed all parameters
4082 del self.new_osp[os_name]
4084 # check the parameter validity (remote check)
4085 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4086 os_name, self.new_osp[os_name])
4088 # changes to the hypervisor list
4089 if self.op.enabled_hypervisors is not None:
4090 self.hv_list = self.op.enabled_hypervisors
4091 for hv in self.hv_list:
4092 # if the hypervisor doesn't already exist in the cluster
4093 # hvparams, we initialize it to empty, and then (in both
4094 # cases) we make sure to fill the defaults, as we might not
4095 # have a complete defaults list if the hypervisor wasn't
4097 if hv not in new_hvp:
4099 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4100 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4102 self.hv_list = cluster.enabled_hypervisors
4104 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4105 # either the enabled list has changed, or the parameters have, validate
4106 for hv_name, hv_params in self.new_hvparams.items():
4107 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4108 (self.op.enabled_hypervisors and
4109 hv_name in self.op.enabled_hypervisors)):
4110 # either this is a new hypervisor, or its parameters have changed
4111 hv_class = hypervisor.GetHypervisor(hv_name)
4112 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4113 hv_class.CheckParameterSyntax(hv_params)
4114 _CheckHVParams(self, node_list, hv_name, hv_params)
4117 # no need to check any newly-enabled hypervisors, since the
4118 # defaults have already been checked in the above code-block
4119 for os_name, os_hvp in self.new_os_hvp.items():
4120 for hv_name, hv_params in os_hvp.items():
4121 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4122 # we need to fill in the new os_hvp on top of the actual hv_p
4123 cluster_defaults = self.new_hvparams.get(hv_name, {})
4124 new_osp = objects.FillDict(cluster_defaults, hv_params)
4125 hv_class = hypervisor.GetHypervisor(hv_name)
4126 hv_class.CheckParameterSyntax(new_osp)
4127 _CheckHVParams(self, node_list, hv_name, new_osp)
4129 if self.op.default_iallocator:
4130 alloc_script = utils.FindFile(self.op.default_iallocator,
4131 constants.IALLOCATOR_SEARCH_PATH,
4133 if alloc_script is None:
4134 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4135 " specified" % self.op.default_iallocator,
4138 def Exec(self, feedback_fn):
4139 """Change the parameters of the cluster.
4142 if self.op.vg_name is not None:
4143 new_volume = self.op.vg_name
4146 if new_volume != self.cfg.GetVGName():
4147 self.cfg.SetVGName(new_volume)
4149 feedback_fn("Cluster LVM configuration already in desired"
4150 " state, not changing")
4151 if self.op.drbd_helper is not None:
4152 new_helper = self.op.drbd_helper
4155 if new_helper != self.cfg.GetDRBDHelper():
4156 self.cfg.SetDRBDHelper(new_helper)
4158 feedback_fn("Cluster DRBD helper already in desired state,"
4160 if self.op.hvparams:
4161 self.cluster.hvparams = self.new_hvparams
4163 self.cluster.os_hvp = self.new_os_hvp
4164 if self.op.enabled_hypervisors is not None:
4165 self.cluster.hvparams = self.new_hvparams
4166 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4167 if self.op.beparams:
4168 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4169 if self.op.nicparams:
4170 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4172 self.cluster.ipolicy = self.new_ipolicy
4173 if self.op.osparams:
4174 self.cluster.osparams = self.new_osp
4175 if self.op.ndparams:
4176 self.cluster.ndparams = self.new_ndparams
4177 if self.op.diskparams:
4178 self.cluster.diskparams = self.new_diskparams
4179 if self.op.hv_state:
4180 self.cluster.hv_state_static = self.new_hv_state
4181 if self.op.disk_state:
4182 self.cluster.disk_state_static = self.new_disk_state
4184 if self.op.candidate_pool_size is not None:
4185 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4186 # we need to update the pool size here, otherwise the save will fail
4187 _AdjustCandidatePool(self, [])
4189 if self.op.maintain_node_health is not None:
4190 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4191 feedback_fn("Note: CONFD was disabled at build time, node health"
4192 " maintenance is not useful (still enabling it)")
4193 self.cluster.maintain_node_health = self.op.maintain_node_health
4195 if self.op.prealloc_wipe_disks is not None:
4196 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4198 if self.op.add_uids is not None:
4199 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4201 if self.op.remove_uids is not None:
4202 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4204 if self.op.uid_pool is not None:
4205 self.cluster.uid_pool = self.op.uid_pool
4207 if self.op.default_iallocator is not None:
4208 self.cluster.default_iallocator = self.op.default_iallocator
4210 if self.op.reserved_lvs is not None:
4211 self.cluster.reserved_lvs = self.op.reserved_lvs
4213 if self.op.use_external_mip_script is not None:
4214 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4216 def helper_os(aname, mods, desc):
4218 lst = getattr(self.cluster, aname)
4219 for key, val in mods:
4220 if key == constants.DDM_ADD:
4222 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4225 elif key == constants.DDM_REMOVE:
4229 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4231 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4233 if self.op.hidden_os:
4234 helper_os("hidden_os", self.op.hidden_os, "hidden")
4236 if self.op.blacklisted_os:
4237 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4239 if self.op.master_netdev:
4240 master_params = self.cfg.GetMasterNetworkParameters()
4241 ems = self.cfg.GetUseExternalMipScript()
4242 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4243 self.cluster.master_netdev)
4244 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4246 result.Raise("Could not disable the master ip")
4247 feedback_fn("Changing master_netdev from %s to %s" %
4248 (master_params.netdev, self.op.master_netdev))
4249 self.cluster.master_netdev = self.op.master_netdev
4251 if self.op.master_netmask:
4252 master_params = self.cfg.GetMasterNetworkParameters()
4253 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4254 result = self.rpc.call_node_change_master_netmask(master_params.name,
4255 master_params.netmask,
4256 self.op.master_netmask,
4258 master_params.netdev)
4260 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4263 self.cluster.master_netmask = self.op.master_netmask
4265 self.cfg.Update(self.cluster, feedback_fn)
4267 if self.op.master_netdev:
4268 master_params = self.cfg.GetMasterNetworkParameters()
4269 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4270 self.op.master_netdev)
4271 ems = self.cfg.GetUseExternalMipScript()
4272 result = self.rpc.call_node_activate_master_ip(master_params.name,
4275 self.LogWarning("Could not re-enable the master ip on"
4276 " the master, please restart manually: %s",
4280 def _UploadHelper(lu, nodes, fname):
4281 """Helper for uploading a file and showing warnings.
4284 if os.path.exists(fname):
4285 result = lu.rpc.call_upload_file(nodes, fname)
4286 for to_node, to_result in result.items():
4287 msg = to_result.fail_msg
4289 msg = ("Copy of file %s to node %s failed: %s" %
4290 (fname, to_node, msg))
4291 lu.proc.LogWarning(msg)
4294 def _ComputeAncillaryFiles(cluster, redist):
4295 """Compute files external to Ganeti which need to be consistent.
4297 @type redist: boolean
4298 @param redist: Whether to include files which need to be redistributed
4301 # Compute files for all nodes
4303 pathutils.SSH_KNOWN_HOSTS_FILE,
4304 pathutils.CONFD_HMAC_KEY,
4305 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4306 pathutils.SPICE_CERT_FILE,
4307 pathutils.SPICE_CACERT_FILE,
4308 pathutils.RAPI_USERS_FILE,
4312 # we need to ship at least the RAPI certificate
4313 files_all.add(pathutils.RAPI_CERT_FILE)
4315 files_all.update(pathutils.ALL_CERT_FILES)
4316 files_all.update(ssconf.SimpleStore().GetFileList())
4318 if cluster.modify_etc_hosts:
4319 files_all.add(pathutils.ETC_HOSTS)
4321 if cluster.use_external_mip_script:
4322 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4324 # Files which are optional, these must:
4325 # - be present in one other category as well
4326 # - either exist or not exist on all nodes of that category (mc, vm all)
4328 pathutils.RAPI_USERS_FILE,
4331 # Files which should only be on master candidates
4335 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4339 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4340 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4341 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4343 # Files which should only be on VM-capable nodes
4346 for hv_name in cluster.enabled_hypervisors
4347 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4351 for hv_name in cluster.enabled_hypervisors
4352 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4354 # Filenames in each category must be unique
4355 all_files_set = files_all | files_mc | files_vm
4356 assert (len(all_files_set) ==
4357 sum(map(len, [files_all, files_mc, files_vm]))), \
4358 "Found file listed in more than one file list"
4360 # Optional files must be present in one other category
4361 assert all_files_set.issuperset(files_opt), \
4362 "Optional file not in a different required list"
4364 # This one file should never ever be re-distributed via RPC
4365 assert not (redist and
4366 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4368 return (files_all, files_opt, files_mc, files_vm)
4371 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4372 """Distribute additional files which are part of the cluster configuration.
4374 ConfigWriter takes care of distributing the config and ssconf files, but
4375 there are more files which should be distributed to all nodes. This function
4376 makes sure those are copied.
4378 @param lu: calling logical unit
4379 @param additional_nodes: list of nodes not in the config to distribute to
4380 @type additional_vm: boolean
4381 @param additional_vm: whether the additional nodes are vm-capable or not
4384 # Gather target nodes
4385 cluster = lu.cfg.GetClusterInfo()
4386 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4388 online_nodes = lu.cfg.GetOnlineNodeList()
4389 online_set = frozenset(online_nodes)
4390 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4392 if additional_nodes is not None:
4393 online_nodes.extend(additional_nodes)
4395 vm_nodes.extend(additional_nodes)
4397 # Never distribute to master node
4398 for nodelist in [online_nodes, vm_nodes]:
4399 if master_info.name in nodelist:
4400 nodelist.remove(master_info.name)
4403 (files_all, _, files_mc, files_vm) = \
4404 _ComputeAncillaryFiles(cluster, True)
4406 # Never re-distribute configuration file from here
4407 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4408 pathutils.CLUSTER_CONF_FILE in files_vm)
4409 assert not files_mc, "Master candidates not handled in this function"
4412 (online_nodes, files_all),
4413 (vm_nodes, files_vm),
4417 for (node_list, files) in filemap:
4419 _UploadHelper(lu, node_list, fname)
4422 class LUClusterRedistConf(NoHooksLU):
4423 """Force the redistribution of cluster configuration.
4425 This is a very simple LU.
4430 def ExpandNames(self):
4431 self.needed_locks = {
4432 locking.LEVEL_NODE: locking.ALL_SET,
4434 self.share_locks[locking.LEVEL_NODE] = 1
4436 def Exec(self, feedback_fn):
4437 """Redistribute the configuration.
4440 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4441 _RedistributeAncillaryFiles(self)
4444 class LUClusterActivateMasterIp(NoHooksLU):
4445 """Activate the master IP on the master node.
4448 def Exec(self, feedback_fn):
4449 """Activate the master IP.
4452 master_params = self.cfg.GetMasterNetworkParameters()
4453 ems = self.cfg.GetUseExternalMipScript()
4454 result = self.rpc.call_node_activate_master_ip(master_params.name,
4456 result.Raise("Could not activate the master IP")
4459 class LUClusterDeactivateMasterIp(NoHooksLU):
4460 """Deactivate the master IP on the master node.
4463 def Exec(self, feedback_fn):
4464 """Deactivate the master IP.
4467 master_params = self.cfg.GetMasterNetworkParameters()
4468 ems = self.cfg.GetUseExternalMipScript()
4469 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4471 result.Raise("Could not deactivate the master IP")
4474 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4475 """Sleep and poll for an instance's disk to sync.
4478 if not instance.disks or disks is not None and not disks:
4481 disks = _ExpandCheckDisks(instance, disks)
4484 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4486 node = instance.primary_node
4489 lu.cfg.SetDiskID(dev, node)
4491 # TODO: Convert to utils.Retry
4494 degr_retries = 10 # in seconds, as we sleep 1 second each time
4498 cumul_degraded = False
4499 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4500 msg = rstats.fail_msg
4502 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4505 raise errors.RemoteError("Can't contact node %s for mirror data,"
4506 " aborting." % node)
4509 rstats = rstats.payload
4511 for i, mstat in enumerate(rstats):
4513 lu.LogWarning("Can't compute data for node %s/%s",
4514 node, disks[i].iv_name)
4517 cumul_degraded = (cumul_degraded or
4518 (mstat.is_degraded and mstat.sync_percent is None))
4519 if mstat.sync_percent is not None:
4521 if mstat.estimated_time is not None:
4522 rem_time = ("%s remaining (estimated)" %
4523 utils.FormatSeconds(mstat.estimated_time))
4524 max_time = mstat.estimated_time
4526 rem_time = "no time estimate"
4527 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4528 (disks[i].iv_name, mstat.sync_percent, rem_time))
4530 # if we're done but degraded, let's do a few small retries, to
4531 # make sure we see a stable and not transient situation; therefore
4532 # we force restart of the loop
4533 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4534 logging.info("Degraded disks found, %d retries left", degr_retries)
4542 time.sleep(min(60, max_time))
4545 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4546 return not cumul_degraded
4549 def _BlockdevFind(lu, node, dev, instance):
4550 """Wrapper around call_blockdev_find to annotate diskparams.
4552 @param lu: A reference to the lu object
4553 @param node: The node to call out
4554 @param dev: The device to find
4555 @param instance: The instance object the device belongs to
4556 @returns The result of the rpc call
4559 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4560 return lu.rpc.call_blockdev_find(node, disk)
4563 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4564 """Wrapper around L{_CheckDiskConsistencyInner}.
4567 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4568 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4572 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4574 """Check that mirrors are not degraded.
4576 @attention: The device has to be annotated already.
4578 The ldisk parameter, if True, will change the test from the
4579 is_degraded attribute (which represents overall non-ok status for
4580 the device(s)) to the ldisk (representing the local storage status).
4583 lu.cfg.SetDiskID(dev, node)
4587 if on_primary or dev.AssembleOnSecondary():
4588 rstats = lu.rpc.call_blockdev_find(node, dev)
4589 msg = rstats.fail_msg
4591 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4593 elif not rstats.payload:
4594 lu.LogWarning("Can't find disk on node %s", node)
4598 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4600 result = result and not rstats.payload.is_degraded
4603 for child in dev.children:
4604 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4610 class LUOobCommand(NoHooksLU):
4611 """Logical unit for OOB handling.
4615 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4617 def ExpandNames(self):
4618 """Gather locks we need.
4621 if self.op.node_names:
4622 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4623 lock_names = self.op.node_names
4625 lock_names = locking.ALL_SET
4627 self.needed_locks = {
4628 locking.LEVEL_NODE: lock_names,
4631 def CheckPrereq(self):
4632 """Check prerequisites.
4635 - the node exists in the configuration
4638 Any errors are signaled by raising errors.OpPrereqError.
4642 self.master_node = self.cfg.GetMasterNode()
4644 assert self.op.power_delay >= 0.0
4646 if self.op.node_names:
4647 if (self.op.command in self._SKIP_MASTER and
4648 self.master_node in self.op.node_names):
4649 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4650 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4652 if master_oob_handler:
4653 additional_text = ("run '%s %s %s' if you want to operate on the"
4654 " master regardless") % (master_oob_handler,
4658 additional_text = "it does not support out-of-band operations"
4660 raise errors.OpPrereqError(("Operating on the master node %s is not"
4661 " allowed for %s; %s") %
4662 (self.master_node, self.op.command,
4663 additional_text), errors.ECODE_INVAL)
4665 self.op.node_names = self.cfg.GetNodeList()
4666 if self.op.command in self._SKIP_MASTER:
4667 self.op.node_names.remove(self.master_node)
4669 if self.op.command in self._SKIP_MASTER:
4670 assert self.master_node not in self.op.node_names
4672 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4674 raise errors.OpPrereqError("Node %s not found" % node_name,
4677 self.nodes.append(node)
4679 if (not self.op.ignore_status and
4680 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4681 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4682 " not marked offline") % node_name,
4685 def Exec(self, feedback_fn):
4686 """Execute OOB and return result if we expect any.
4689 master_node = self.master_node
4692 for idx, node in enumerate(utils.NiceSort(self.nodes,
4693 key=lambda node: node.name)):
4694 node_entry = [(constants.RS_NORMAL, node.name)]
4695 ret.append(node_entry)
4697 oob_program = _SupportsOob(self.cfg, node)
4700 node_entry.append((constants.RS_UNAVAIL, None))
4703 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4704 self.op.command, oob_program, node.name)
4705 result = self.rpc.call_run_oob(master_node, oob_program,
4706 self.op.command, node.name,
4710 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4711 node.name, result.fail_msg)
4712 node_entry.append((constants.RS_NODATA, None))
4715 self._CheckPayload(result)
4716 except errors.OpExecError, err:
4717 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4719 node_entry.append((constants.RS_NODATA, None))
4721 if self.op.command == constants.OOB_HEALTH:
4722 # For health we should log important events
4723 for item, status in result.payload:
4724 if status in [constants.OOB_STATUS_WARNING,
4725 constants.OOB_STATUS_CRITICAL]:
4726 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4727 item, node.name, status)
4729 if self.op.command == constants.OOB_POWER_ON:
4731 elif self.op.command == constants.OOB_POWER_OFF:
4732 node.powered = False
4733 elif self.op.command == constants.OOB_POWER_STATUS:
4734 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4735 if powered != node.powered:
4736 logging.warning(("Recorded power state (%s) of node '%s' does not"
4737 " match actual power state (%s)"), node.powered,
4740 # For configuration changing commands we should update the node
4741 if self.op.command in (constants.OOB_POWER_ON,
4742 constants.OOB_POWER_OFF):
4743 self.cfg.Update(node, feedback_fn)
4745 node_entry.append((constants.RS_NORMAL, result.payload))
4747 if (self.op.command == constants.OOB_POWER_ON and
4748 idx < len(self.nodes) - 1):
4749 time.sleep(self.op.power_delay)
4753 def _CheckPayload(self, result):
4754 """Checks if the payload is valid.
4756 @param result: RPC result
4757 @raises errors.OpExecError: If payload is not valid
4761 if self.op.command == constants.OOB_HEALTH:
4762 if not isinstance(result.payload, list):
4763 errs.append("command 'health' is expected to return a list but got %s" %
4764 type(result.payload))
4766 for item, status in result.payload:
4767 if status not in constants.OOB_STATUSES:
4768 errs.append("health item '%s' has invalid status '%s'" %
4771 if self.op.command == constants.OOB_POWER_STATUS:
4772 if not isinstance(result.payload, dict):
4773 errs.append("power-status is expected to return a dict but got %s" %
4774 type(result.payload))
4776 if self.op.command in [
4777 constants.OOB_POWER_ON,
4778 constants.OOB_POWER_OFF,
4779 constants.OOB_POWER_CYCLE,
4781 if result.payload is not None:
4782 errs.append("%s is expected to not return payload but got '%s'" %
4783 (self.op.command, result.payload))
4786 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4787 utils.CommaJoin(errs))
4790 class _OsQuery(_QueryBase):
4791 FIELDS = query.OS_FIELDS
4793 def ExpandNames(self, lu):
4794 # Lock all nodes in shared mode
4795 # Temporary removal of locks, should be reverted later
4796 # TODO: reintroduce locks when they are lighter-weight
4797 lu.needed_locks = {}
4798 #self.share_locks[locking.LEVEL_NODE] = 1
4799 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4801 # The following variables interact with _QueryBase._GetNames
4803 self.wanted = self.names
4805 self.wanted = locking.ALL_SET
4807 self.do_locking = self.use_locking
4809 def DeclareLocks(self, lu, level):
4813 def _DiagnoseByOS(rlist):
4814 """Remaps a per-node return list into an a per-os per-node dictionary
4816 @param rlist: a map with node names as keys and OS objects as values
4819 @return: a dictionary with osnames as keys and as value another
4820 map, with nodes as keys and tuples of (path, status, diagnose,
4821 variants, parameters, api_versions) as values, eg::
4823 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4824 (/srv/..., False, "invalid api")],
4825 "node2": [(/srv/..., True, "", [], [])]}
4830 # we build here the list of nodes that didn't fail the RPC (at RPC
4831 # level), so that nodes with a non-responding node daemon don't
4832 # make all OSes invalid
4833 good_nodes = [node_name for node_name in rlist
4834 if not rlist[node_name].fail_msg]
4835 for node_name, nr in rlist.items():
4836 if nr.fail_msg or not nr.payload:
4838 for (name, path, status, diagnose, variants,
4839 params, api_versions) in nr.payload:
4840 if name not in all_os:
4841 # build a list of nodes for this os containing empty lists
4842 # for each node in node_list
4844 for nname in good_nodes:
4845 all_os[name][nname] = []
4846 # convert params from [name, help] to (name, help)
4847 params = [tuple(v) for v in params]
4848 all_os[name][node_name].append((path, status, diagnose,
4849 variants, params, api_versions))
4852 def _GetQueryData(self, lu):
4853 """Computes the list of nodes and their attributes.
4856 # Locking is not used
4857 assert not (compat.any(lu.glm.is_owned(level)
4858 for level in locking.LEVELS
4859 if level != locking.LEVEL_CLUSTER) or
4860 self.do_locking or self.use_locking)
4862 valid_nodes = [node.name
4863 for node in lu.cfg.GetAllNodesInfo().values()
4864 if not node.offline and node.vm_capable]
4865 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4866 cluster = lu.cfg.GetClusterInfo()
4870 for (os_name, os_data) in pol.items():
4871 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4872 hidden=(os_name in cluster.hidden_os),
4873 blacklisted=(os_name in cluster.blacklisted_os))
4877 api_versions = set()
4879 for idx, osl in enumerate(os_data.values()):
4880 info.valid = bool(info.valid and osl and osl[0][1])
4884 (node_variants, node_params, node_api) = osl[0][3:6]
4887 variants.update(node_variants)
4888 parameters.update(node_params)
4889 api_versions.update(node_api)
4891 # Filter out inconsistent values
4892 variants.intersection_update(node_variants)
4893 parameters.intersection_update(node_params)
4894 api_versions.intersection_update(node_api)
4896 info.variants = list(variants)
4897 info.parameters = list(parameters)
4898 info.api_versions = list(api_versions)
4900 data[os_name] = info
4902 # Prepare data in requested order
4903 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4907 class LUOsDiagnose(NoHooksLU):
4908 """Logical unit for OS diagnose/query.
4914 def _BuildFilter(fields, names):
4915 """Builds a filter for querying OSes.
4918 name_filter = qlang.MakeSimpleFilter("name", names)
4920 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4921 # respective field is not requested
4922 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4923 for fname in ["hidden", "blacklisted"]
4924 if fname not in fields]
4925 if "valid" not in fields:
4926 status_filter.append([qlang.OP_TRUE, "valid"])
4929 status_filter.insert(0, qlang.OP_AND)
4931 status_filter = None
4933 if name_filter and status_filter:
4934 return [qlang.OP_AND, name_filter, status_filter]
4938 return status_filter
4940 def CheckArguments(self):
4941 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4942 self.op.output_fields, False)
4944 def ExpandNames(self):
4945 self.oq.ExpandNames(self)
4947 def Exec(self, feedback_fn):
4948 return self.oq.OldStyleQuery(self)
4951 class LUNodeRemove(LogicalUnit):
4952 """Logical unit for removing a node.
4955 HPATH = "node-remove"
4956 HTYPE = constants.HTYPE_NODE
4958 def BuildHooksEnv(self):
4963 "OP_TARGET": self.op.node_name,
4964 "NODE_NAME": self.op.node_name,
4967 def BuildHooksNodes(self):
4968 """Build hooks nodes.
4970 This doesn't run on the target node in the pre phase as a failed
4971 node would then be impossible to remove.
4974 all_nodes = self.cfg.GetNodeList()
4976 all_nodes.remove(self.op.node_name)
4979 return (all_nodes, all_nodes)
4981 def CheckPrereq(self):
4982 """Check prerequisites.
4985 - the node exists in the configuration
4986 - it does not have primary or secondary instances
4987 - it's not the master
4989 Any errors are signaled by raising errors.OpPrereqError.
4992 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4993 node = self.cfg.GetNodeInfo(self.op.node_name)
4994 assert node is not None
4996 masternode = self.cfg.GetMasterNode()
4997 if node.name == masternode:
4998 raise errors.OpPrereqError("Node is the master node, failover to another"
4999 " node is required", errors.ECODE_INVAL)
5001 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5002 if node.name in instance.all_nodes:
5003 raise errors.OpPrereqError("Instance %s is still running on the node,"
5004 " please remove first" % instance_name,
5006 self.op.node_name = node.name
5009 def Exec(self, feedback_fn):
5010 """Removes the node from the cluster.
5014 logging.info("Stopping the node daemon and removing configs from node %s",
5017 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5019 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5022 # Promote nodes to master candidate as needed
5023 _AdjustCandidatePool(self, exceptions=[node.name])
5024 self.context.RemoveNode(node.name)
5026 # Run post hooks on the node before it's removed
5027 _RunPostHook(self, node.name)
5029 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5030 msg = result.fail_msg
5032 self.LogWarning("Errors encountered on the remote node while leaving"
5033 " the cluster: %s", msg)
5035 # Remove node from our /etc/hosts
5036 if self.cfg.GetClusterInfo().modify_etc_hosts:
5037 master_node = self.cfg.GetMasterNode()
5038 result = self.rpc.call_etc_hosts_modify(master_node,
5039 constants.ETC_HOSTS_REMOVE,
5041 result.Raise("Can't update hosts file with new host data")
5042 _RedistributeAncillaryFiles(self)
5045 class _NodeQuery(_QueryBase):
5046 FIELDS = query.NODE_FIELDS
5048 def ExpandNames(self, lu):
5049 lu.needed_locks = {}
5050 lu.share_locks = _ShareAll()
5053 self.wanted = _GetWantedNodes(lu, self.names)
5055 self.wanted = locking.ALL_SET
5057 self.do_locking = (self.use_locking and
5058 query.NQ_LIVE in self.requested_data)
5061 # If any non-static field is requested we need to lock the nodes
5062 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5064 def DeclareLocks(self, lu, level):
5067 def _GetQueryData(self, lu):
5068 """Computes the list of nodes and their attributes.
5071 all_info = lu.cfg.GetAllNodesInfo()
5073 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5075 # Gather data as requested
5076 if query.NQ_LIVE in self.requested_data:
5077 # filter out non-vm_capable nodes
5078 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5080 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5081 [lu.cfg.GetHypervisorType()])
5082 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5083 for (name, nresult) in node_data.items()
5084 if not nresult.fail_msg and nresult.payload)
5088 if query.NQ_INST in self.requested_data:
5089 node_to_primary = dict([(name, set()) for name in nodenames])
5090 node_to_secondary = dict([(name, set()) for name in nodenames])
5092 inst_data = lu.cfg.GetAllInstancesInfo()
5094 for inst in inst_data.values():
5095 if inst.primary_node in node_to_primary:
5096 node_to_primary[inst.primary_node].add(inst.name)
5097 for secnode in inst.secondary_nodes:
5098 if secnode in node_to_secondary:
5099 node_to_secondary[secnode].add(inst.name)
5101 node_to_primary = None
5102 node_to_secondary = None
5104 if query.NQ_OOB in self.requested_data:
5105 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5106 for name, node in all_info.iteritems())
5110 if query.NQ_GROUP in self.requested_data:
5111 groups = lu.cfg.GetAllNodeGroupsInfo()
5115 return query.NodeQueryData([all_info[name] for name in nodenames],
5116 live_data, lu.cfg.GetMasterNode(),
5117 node_to_primary, node_to_secondary, groups,
5118 oob_support, lu.cfg.GetClusterInfo())
5121 class LUNodeQuery(NoHooksLU):
5122 """Logical unit for querying nodes.
5125 # pylint: disable=W0142
5128 def CheckArguments(self):
5129 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5130 self.op.output_fields, self.op.use_locking)
5132 def ExpandNames(self):
5133 self.nq.ExpandNames(self)
5135 def DeclareLocks(self, level):
5136 self.nq.DeclareLocks(self, level)
5138 def Exec(self, feedback_fn):
5139 return self.nq.OldStyleQuery(self)
5142 class LUNodeQueryvols(NoHooksLU):
5143 """Logical unit for getting volumes on node(s).
5147 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5148 _FIELDS_STATIC = utils.FieldSet("node")
5150 def CheckArguments(self):
5151 _CheckOutputFields(static=self._FIELDS_STATIC,
5152 dynamic=self._FIELDS_DYNAMIC,
5153 selected=self.op.output_fields)
5155 def ExpandNames(self):
5156 self.share_locks = _ShareAll()
5157 self.needed_locks = {}
5159 if not self.op.nodes:
5160 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5162 self.needed_locks[locking.LEVEL_NODE] = \
5163 _GetWantedNodes(self, self.op.nodes)
5165 def Exec(self, feedback_fn):
5166 """Computes the list of nodes and their attributes.
5169 nodenames = self.owned_locks(locking.LEVEL_NODE)
5170 volumes = self.rpc.call_node_volumes(nodenames)
5172 ilist = self.cfg.GetAllInstancesInfo()
5173 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5176 for node in nodenames:
5177 nresult = volumes[node]
5180 msg = nresult.fail_msg
5182 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5185 node_vols = sorted(nresult.payload,
5186 key=operator.itemgetter("dev"))
5188 for vol in node_vols:
5190 for field in self.op.output_fields:
5193 elif field == "phys":
5197 elif field == "name":
5199 elif field == "size":
5200 val = int(float(vol["size"]))
5201 elif field == "instance":
5202 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5204 raise errors.ParameterError(field)
5205 node_output.append(str(val))
5207 output.append(node_output)
5212 class LUNodeQueryStorage(NoHooksLU):
5213 """Logical unit for getting information on storage units on node(s).
5216 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5219 def CheckArguments(self):
5220 _CheckOutputFields(static=self._FIELDS_STATIC,
5221 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5222 selected=self.op.output_fields)
5224 def ExpandNames(self):
5225 self.share_locks = _ShareAll()
5226 self.needed_locks = {}
5229 self.needed_locks[locking.LEVEL_NODE] = \
5230 _GetWantedNodes(self, self.op.nodes)
5232 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5234 def Exec(self, feedback_fn):
5235 """Computes the list of nodes and their attributes.
5238 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5240 # Always get name to sort by
5241 if constants.SF_NAME in self.op.output_fields:
5242 fields = self.op.output_fields[:]
5244 fields = [constants.SF_NAME] + self.op.output_fields
5246 # Never ask for node or type as it's only known to the LU
5247 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5248 while extra in fields:
5249 fields.remove(extra)
5251 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5252 name_idx = field_idx[constants.SF_NAME]
5254 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5255 data = self.rpc.call_storage_list(self.nodes,
5256 self.op.storage_type, st_args,
5257 self.op.name, fields)
5261 for node in utils.NiceSort(self.nodes):
5262 nresult = data[node]
5266 msg = nresult.fail_msg
5268 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5271 rows = dict([(row[name_idx], row) for row in nresult.payload])
5273 for name in utils.NiceSort(rows.keys()):
5278 for field in self.op.output_fields:
5279 if field == constants.SF_NODE:
5281 elif field == constants.SF_TYPE:
5282 val = self.op.storage_type
5283 elif field in field_idx:
5284 val = row[field_idx[field]]
5286 raise errors.ParameterError(field)
5295 class _InstanceQuery(_QueryBase):
5296 FIELDS = query.INSTANCE_FIELDS
5298 def ExpandNames(self, lu):
5299 lu.needed_locks = {}
5300 lu.share_locks = _ShareAll()
5303 self.wanted = _GetWantedInstances(lu, self.names)
5305 self.wanted = locking.ALL_SET
5307 self.do_locking = (self.use_locking and
5308 query.IQ_LIVE in self.requested_data)
5310 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5311 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5312 lu.needed_locks[locking.LEVEL_NODE] = []
5313 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5315 self.do_grouplocks = (self.do_locking and
5316 query.IQ_NODES in self.requested_data)
5318 def DeclareLocks(self, lu, level):
5320 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5321 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5323 # Lock all groups used by instances optimistically; this requires going
5324 # via the node before it's locked, requiring verification later on
5325 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5327 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5328 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5329 elif level == locking.LEVEL_NODE:
5330 lu._LockInstancesNodes() # pylint: disable=W0212
5333 def _CheckGroupLocks(lu):
5334 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5335 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5337 # Check if node groups for locked instances are still correct
5338 for instance_name in owned_instances:
5339 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5341 def _GetQueryData(self, lu):
5342 """Computes the list of instances and their attributes.
5345 if self.do_grouplocks:
5346 self._CheckGroupLocks(lu)
5348 cluster = lu.cfg.GetClusterInfo()
5349 all_info = lu.cfg.GetAllInstancesInfo()
5351 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5353 instance_list = [all_info[name] for name in instance_names]
5354 nodes = frozenset(itertools.chain(*(inst.all_nodes
5355 for inst in instance_list)))
5356 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5359 wrongnode_inst = set()
5361 # Gather data as requested
5362 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5364 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5366 result = node_data[name]
5368 # offline nodes will be in both lists
5369 assert result.fail_msg
5370 offline_nodes.append(name)
5372 bad_nodes.append(name)
5373 elif result.payload:
5374 for inst in result.payload:
5375 if inst in all_info:
5376 if all_info[inst].primary_node == name:
5377 live_data.update(result.payload)
5379 wrongnode_inst.add(inst)
5381 # orphan instance; we don't list it here as we don't
5382 # handle this case yet in the output of instance listing
5383 logging.warning("Orphan instance '%s' found on node %s",
5385 # else no instance is alive
5389 if query.IQ_DISKUSAGE in self.requested_data:
5390 gmi = ganeti.masterd.instance
5391 disk_usage = dict((inst.name,
5392 gmi.ComputeDiskSize(inst.disk_template,
5393 [{constants.IDISK_SIZE: disk.size}
5394 for disk in inst.disks]))
5395 for inst in instance_list)
5399 if query.IQ_CONSOLE in self.requested_data:
5401 for inst in instance_list:
5402 if inst.name in live_data:
5403 # Instance is running
5404 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5406 consinfo[inst.name] = None
5407 assert set(consinfo.keys()) == set(instance_names)
5411 if query.IQ_NODES in self.requested_data:
5412 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5414 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5415 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5416 for uuid in set(map(operator.attrgetter("group"),
5422 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5423 disk_usage, offline_nodes, bad_nodes,
5424 live_data, wrongnode_inst, consinfo,
5428 class LUQuery(NoHooksLU):
5429 """Query for resources/items of a certain kind.
5432 # pylint: disable=W0142
5435 def CheckArguments(self):
5436 qcls = _GetQueryImplementation(self.op.what)
5438 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5440 def ExpandNames(self):
5441 self.impl.ExpandNames(self)
5443 def DeclareLocks(self, level):
5444 self.impl.DeclareLocks(self, level)
5446 def Exec(self, feedback_fn):
5447 return self.impl.NewStyleQuery(self)
5450 class LUQueryFields(NoHooksLU):
5451 """Query for resources/items of a certain kind.
5454 # pylint: disable=W0142
5457 def CheckArguments(self):
5458 self.qcls = _GetQueryImplementation(self.op.what)
5460 def ExpandNames(self):
5461 self.needed_locks = {}
5463 def Exec(self, feedback_fn):
5464 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5467 class LUNodeModifyStorage(NoHooksLU):
5468 """Logical unit for modifying a storage volume on a node.
5473 def CheckArguments(self):
5474 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5476 storage_type = self.op.storage_type
5479 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5481 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5482 " modified" % storage_type,
5485 diff = set(self.op.changes.keys()) - modifiable
5487 raise errors.OpPrereqError("The following fields can not be modified for"
5488 " storage units of type '%s': %r" %
5489 (storage_type, list(diff)),
5492 def ExpandNames(self):
5493 self.needed_locks = {
5494 locking.LEVEL_NODE: self.op.node_name,
5497 def Exec(self, feedback_fn):
5498 """Computes the list of nodes and their attributes.
5501 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5502 result = self.rpc.call_storage_modify(self.op.node_name,
5503 self.op.storage_type, st_args,
5504 self.op.name, self.op.changes)
5505 result.Raise("Failed to modify storage unit '%s' on %s" %
5506 (self.op.name, self.op.node_name))
5509 class LUNodeAdd(LogicalUnit):
5510 """Logical unit for adding node to the cluster.
5514 HTYPE = constants.HTYPE_NODE
5515 _NFLAGS = ["master_capable", "vm_capable"]
5517 def CheckArguments(self):
5518 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5519 # validate/normalize the node name
5520 self.hostname = netutils.GetHostname(name=self.op.node_name,
5521 family=self.primary_ip_family)
5522 self.op.node_name = self.hostname.name
5524 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5525 raise errors.OpPrereqError("Cannot readd the master node",
5528 if self.op.readd and self.op.group:
5529 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5530 " being readded", errors.ECODE_INVAL)
5532 def BuildHooksEnv(self):
5535 This will run on all nodes before, and on all nodes + the new node after.
5539 "OP_TARGET": self.op.node_name,
5540 "NODE_NAME": self.op.node_name,
5541 "NODE_PIP": self.op.primary_ip,
5542 "NODE_SIP": self.op.secondary_ip,
5543 "MASTER_CAPABLE": str(self.op.master_capable),
5544 "VM_CAPABLE": str(self.op.vm_capable),
5547 def BuildHooksNodes(self):
5548 """Build hooks nodes.
5551 # Exclude added node
5552 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5553 post_nodes = pre_nodes + [self.op.node_name, ]
5555 return (pre_nodes, post_nodes)
5557 def CheckPrereq(self):
5558 """Check prerequisites.
5561 - the new node is not already in the config
5563 - its parameters (single/dual homed) matches the cluster
5565 Any errors are signaled by raising errors.OpPrereqError.
5569 hostname = self.hostname
5570 node = hostname.name
5571 primary_ip = self.op.primary_ip = hostname.ip
5572 if self.op.secondary_ip is None:
5573 if self.primary_ip_family == netutils.IP6Address.family:
5574 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5575 " IPv4 address must be given as secondary",
5577 self.op.secondary_ip = primary_ip
5579 secondary_ip = self.op.secondary_ip
5580 if not netutils.IP4Address.IsValid(secondary_ip):
5581 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5582 " address" % secondary_ip, errors.ECODE_INVAL)
5584 node_list = cfg.GetNodeList()
5585 if not self.op.readd and node in node_list:
5586 raise errors.OpPrereqError("Node %s is already in the configuration" %
5587 node, errors.ECODE_EXISTS)
5588 elif self.op.readd and node not in node_list:
5589 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5592 self.changed_primary_ip = False
5594 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5595 if self.op.readd and node == existing_node_name:
5596 if existing_node.secondary_ip != secondary_ip:
5597 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5598 " address configuration as before",
5600 if existing_node.primary_ip != primary_ip:
5601 self.changed_primary_ip = True
5605 if (existing_node.primary_ip == primary_ip or
5606 existing_node.secondary_ip == primary_ip or
5607 existing_node.primary_ip == secondary_ip or
5608 existing_node.secondary_ip == secondary_ip):
5609 raise errors.OpPrereqError("New node ip address(es) conflict with"
5610 " existing node %s" % existing_node.name,
5611 errors.ECODE_NOTUNIQUE)
5613 # After this 'if' block, None is no longer a valid value for the
5614 # _capable op attributes
5616 old_node = self.cfg.GetNodeInfo(node)
5617 assert old_node is not None, "Can't retrieve locked node %s" % node
5618 for attr in self._NFLAGS:
5619 if getattr(self.op, attr) is None:
5620 setattr(self.op, attr, getattr(old_node, attr))
5622 for attr in self._NFLAGS:
5623 if getattr(self.op, attr) is None:
5624 setattr(self.op, attr, True)
5626 if self.op.readd and not self.op.vm_capable:
5627 pri, sec = cfg.GetNodeInstances(node)
5629 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5630 " flag set to false, but it already holds"
5631 " instances" % node,
5634 # check that the type of the node (single versus dual homed) is the
5635 # same as for the master
5636 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5637 master_singlehomed = myself.secondary_ip == myself.primary_ip
5638 newbie_singlehomed = secondary_ip == primary_ip
5639 if master_singlehomed != newbie_singlehomed:
5640 if master_singlehomed:
5641 raise errors.OpPrereqError("The master has no secondary ip but the"
5642 " new node has one",
5645 raise errors.OpPrereqError("The master has a secondary ip but the"
5646 " new node doesn't have one",
5649 # checks reachability
5650 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5651 raise errors.OpPrereqError("Node not reachable by ping",
5652 errors.ECODE_ENVIRON)
5654 if not newbie_singlehomed:
5655 # check reachability from my secondary ip to newbie's secondary ip
5656 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5657 source=myself.secondary_ip):
5658 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5659 " based ping to node daemon port",
5660 errors.ECODE_ENVIRON)
5667 if self.op.master_capable:
5668 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5670 self.master_candidate = False
5673 self.new_node = old_node
5675 node_group = cfg.LookupNodeGroup(self.op.group)
5676 self.new_node = objects.Node(name=node,
5677 primary_ip=primary_ip,
5678 secondary_ip=secondary_ip,
5679 master_candidate=self.master_candidate,
5680 offline=False, drained=False,
5683 if self.op.ndparams:
5684 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5686 if self.op.hv_state:
5687 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5689 if self.op.disk_state:
5690 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5692 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5693 # it a property on the base class.
5694 result = rpc.DnsOnlyRunner().call_version([node])[node]
5695 result.Raise("Can't get version information from node %s" % node)
5696 if constants.PROTOCOL_VERSION == result.payload:
5697 logging.info("Communication to node %s fine, sw version %s match",
5698 node, result.payload)
5700 raise errors.OpPrereqError("Version mismatch master version %s,"
5701 " node version %s" %
5702 (constants.PROTOCOL_VERSION, result.payload),
5703 errors.ECODE_ENVIRON)
5705 def Exec(self, feedback_fn):
5706 """Adds the new node to the cluster.
5709 new_node = self.new_node
5710 node = new_node.name
5712 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5715 # We adding a new node so we assume it's powered
5716 new_node.powered = True
5718 # for re-adds, reset the offline/drained/master-candidate flags;
5719 # we need to reset here, otherwise offline would prevent RPC calls
5720 # later in the procedure; this also means that if the re-add
5721 # fails, we are left with a non-offlined, broken node
5723 new_node.drained = new_node.offline = False # pylint: disable=W0201
5724 self.LogInfo("Readding a node, the offline/drained flags were reset")
5725 # if we demote the node, we do cleanup later in the procedure
5726 new_node.master_candidate = self.master_candidate
5727 if self.changed_primary_ip:
5728 new_node.primary_ip = self.op.primary_ip
5730 # copy the master/vm_capable flags
5731 for attr in self._NFLAGS:
5732 setattr(new_node, attr, getattr(self.op, attr))
5734 # notify the user about any possible mc promotion
5735 if new_node.master_candidate:
5736 self.LogInfo("Node will be a master candidate")
5738 if self.op.ndparams:
5739 new_node.ndparams = self.op.ndparams
5741 new_node.ndparams = {}
5743 if self.op.hv_state:
5744 new_node.hv_state_static = self.new_hv_state
5746 if self.op.disk_state:
5747 new_node.disk_state_static = self.new_disk_state
5749 # Add node to our /etc/hosts, and add key to known_hosts
5750 if self.cfg.GetClusterInfo().modify_etc_hosts:
5751 master_node = self.cfg.GetMasterNode()
5752 result = self.rpc.call_etc_hosts_modify(master_node,
5753 constants.ETC_HOSTS_ADD,
5756 result.Raise("Can't update hosts file with new host data")
5758 if new_node.secondary_ip != new_node.primary_ip:
5759 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5762 node_verify_list = [self.cfg.GetMasterNode()]
5763 node_verify_param = {
5764 constants.NV_NODELIST: ([node], {}),
5765 # TODO: do a node-net-test as well?
5768 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5769 self.cfg.GetClusterName())
5770 for verifier in node_verify_list:
5771 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5772 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5774 for failed in nl_payload:
5775 feedback_fn("ssh/hostname verification failed"
5776 " (checking from %s): %s" %
5777 (verifier, nl_payload[failed]))
5778 raise errors.OpExecError("ssh/hostname verification failed")
5781 _RedistributeAncillaryFiles(self)
5782 self.context.ReaddNode(new_node)
5783 # make sure we redistribute the config
5784 self.cfg.Update(new_node, feedback_fn)
5785 # and make sure the new node will not have old files around
5786 if not new_node.master_candidate:
5787 result = self.rpc.call_node_demote_from_mc(new_node.name)
5788 msg = result.fail_msg
5790 self.LogWarning("Node failed to demote itself from master"
5791 " candidate status: %s" % msg)
5793 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5794 additional_vm=self.op.vm_capable)
5795 self.context.AddNode(new_node, self.proc.GetECId())
5798 class LUNodeSetParams(LogicalUnit):
5799 """Modifies the parameters of a node.
5801 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5802 to the node role (as _ROLE_*)
5803 @cvar _R2F: a dictionary from node role to tuples of flags
5804 @cvar _FLAGS: a list of attribute names corresponding to the flags
5807 HPATH = "node-modify"
5808 HTYPE = constants.HTYPE_NODE
5810 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5812 (True, False, False): _ROLE_CANDIDATE,
5813 (False, True, False): _ROLE_DRAINED,
5814 (False, False, True): _ROLE_OFFLINE,
5815 (False, False, False): _ROLE_REGULAR,
5817 _R2F = dict((v, k) for k, v in _F2R.items())
5818 _FLAGS = ["master_candidate", "drained", "offline"]
5820 def CheckArguments(self):
5821 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5822 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5823 self.op.master_capable, self.op.vm_capable,
5824 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5826 if all_mods.count(None) == len(all_mods):
5827 raise errors.OpPrereqError("Please pass at least one modification",
5829 if all_mods.count(True) > 1:
5830 raise errors.OpPrereqError("Can't set the node into more than one"
5831 " state at the same time",
5834 # Boolean value that tells us whether we might be demoting from MC
5835 self.might_demote = (self.op.master_candidate is False or
5836 self.op.offline is True or
5837 self.op.drained is True or
5838 self.op.master_capable is False)
5840 if self.op.secondary_ip:
5841 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5842 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5843 " address" % self.op.secondary_ip,
5846 self.lock_all = self.op.auto_promote and self.might_demote
5847 self.lock_instances = self.op.secondary_ip is not None
5849 def _InstanceFilter(self, instance):
5850 """Filter for getting affected instances.
5853 return (instance.disk_template in constants.DTS_INT_MIRROR and
5854 self.op.node_name in instance.all_nodes)
5856 def ExpandNames(self):
5858 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5860 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5862 # Since modifying a node can have severe effects on currently running
5863 # operations the resource lock is at least acquired in shared mode
5864 self.needed_locks[locking.LEVEL_NODE_RES] = \
5865 self.needed_locks[locking.LEVEL_NODE]
5867 # Get node resource and instance locks in shared mode; they are not used
5868 # for anything but read-only access
5869 self.share_locks[locking.LEVEL_NODE_RES] = 1
5870 self.share_locks[locking.LEVEL_INSTANCE] = 1
5872 if self.lock_instances:
5873 self.needed_locks[locking.LEVEL_INSTANCE] = \
5874 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5876 def BuildHooksEnv(self):
5879 This runs on the master node.
5883 "OP_TARGET": self.op.node_name,
5884 "MASTER_CANDIDATE": str(self.op.master_candidate),
5885 "OFFLINE": str(self.op.offline),
5886 "DRAINED": str(self.op.drained),
5887 "MASTER_CAPABLE": str(self.op.master_capable),
5888 "VM_CAPABLE": str(self.op.vm_capable),
5891 def BuildHooksNodes(self):
5892 """Build hooks nodes.
5895 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5898 def CheckPrereq(self):
5899 """Check prerequisites.
5901 This only checks the instance list against the existing names.
5904 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5906 if self.lock_instances:
5907 affected_instances = \
5908 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5910 # Verify instance locks
5911 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5912 wanted_instances = frozenset(affected_instances.keys())
5913 if wanted_instances - owned_instances:
5914 raise errors.OpPrereqError("Instances affected by changing node %s's"
5915 " secondary IP address have changed since"
5916 " locks were acquired, wanted '%s', have"
5917 " '%s'; retry the operation" %
5919 utils.CommaJoin(wanted_instances),
5920 utils.CommaJoin(owned_instances)),
5923 affected_instances = None
5925 if (self.op.master_candidate is not None or
5926 self.op.drained is not None or
5927 self.op.offline is not None):
5928 # we can't change the master's node flags
5929 if self.op.node_name == self.cfg.GetMasterNode():
5930 raise errors.OpPrereqError("The master role can be changed"
5931 " only via master-failover",
5934 if self.op.master_candidate and not node.master_capable:
5935 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5936 " it a master candidate" % node.name,
5939 if self.op.vm_capable is False:
5940 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5942 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5943 " the vm_capable flag" % node.name,
5946 if node.master_candidate and self.might_demote and not self.lock_all:
5947 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5948 # check if after removing the current node, we're missing master
5950 (mc_remaining, mc_should, _) = \
5951 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5952 if mc_remaining < mc_should:
5953 raise errors.OpPrereqError("Not enough master candidates, please"
5954 " pass auto promote option to allow"
5955 " promotion (--auto-promote or RAPI"
5956 " auto_promote=True)", errors.ECODE_STATE)
5958 self.old_flags = old_flags = (node.master_candidate,
5959 node.drained, node.offline)
5960 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5961 self.old_role = old_role = self._F2R[old_flags]
5963 # Check for ineffective changes
5964 for attr in self._FLAGS:
5965 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5966 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5967 setattr(self.op, attr, None)
5969 # Past this point, any flag change to False means a transition
5970 # away from the respective state, as only real changes are kept
5972 # TODO: We might query the real power state if it supports OOB
5973 if _SupportsOob(self.cfg, node):
5974 if self.op.offline is False and not (node.powered or
5975 self.op.powered is True):
5976 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5977 " offline status can be reset") %
5978 self.op.node_name, errors.ECODE_STATE)
5979 elif self.op.powered is not None:
5980 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5981 " as it does not support out-of-band"
5982 " handling") % self.op.node_name,
5985 # If we're being deofflined/drained, we'll MC ourself if needed
5986 if (self.op.drained is False or self.op.offline is False or
5987 (self.op.master_capable and not node.master_capable)):
5988 if _DecideSelfPromotion(self):
5989 self.op.master_candidate = True
5990 self.LogInfo("Auto-promoting node to master candidate")
5992 # If we're no longer master capable, we'll demote ourselves from MC
5993 if self.op.master_capable is False and node.master_candidate:
5994 self.LogInfo("Demoting from master candidate")
5995 self.op.master_candidate = False
5998 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5999 if self.op.master_candidate:
6000 new_role = self._ROLE_CANDIDATE
6001 elif self.op.drained:
6002 new_role = self._ROLE_DRAINED
6003 elif self.op.offline:
6004 new_role = self._ROLE_OFFLINE
6005 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6006 # False is still in new flags, which means we're un-setting (the
6008 new_role = self._ROLE_REGULAR
6009 else: # no new flags, nothing, keep old role
6012 self.new_role = new_role
6014 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6015 # Trying to transition out of offline status
6016 result = self.rpc.call_version([node.name])[node.name]
6018 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6019 " to report its version: %s" %
6020 (node.name, result.fail_msg),
6023 self.LogWarning("Transitioning node from offline to online state"
6024 " without using re-add. Please make sure the node"
6027 # When changing the secondary ip, verify if this is a single-homed to
6028 # multi-homed transition or vice versa, and apply the relevant
6030 if self.op.secondary_ip:
6031 # Ok even without locking, because this can't be changed by any LU
6032 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6033 master_singlehomed = master.secondary_ip == master.primary_ip
6034 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6035 if self.op.force and node.name == master.name:
6036 self.LogWarning("Transitioning from single-homed to multi-homed"
6037 " cluster. All nodes will require a secondary ip.")
6039 raise errors.OpPrereqError("Changing the secondary ip on a"
6040 " single-homed cluster requires the"
6041 " --force option to be passed, and the"
6042 " target node to be the master",
6044 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6045 if self.op.force and node.name == master.name:
6046 self.LogWarning("Transitioning from multi-homed to single-homed"
6047 " cluster. Secondary IPs will have to be removed.")
6049 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6050 " same as the primary IP on a multi-homed"
6051 " cluster, unless the --force option is"
6052 " passed, and the target node is the"
6053 " master", errors.ECODE_INVAL)
6055 assert not (frozenset(affected_instances) -
6056 self.owned_locks(locking.LEVEL_INSTANCE))
6059 if affected_instances:
6060 msg = ("Cannot change secondary IP address: offline node has"
6061 " instances (%s) configured to use it" %
6062 utils.CommaJoin(affected_instances.keys()))
6063 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6065 # On online nodes, check that no instances are running, and that
6066 # the node has the new ip and we can reach it.
6067 for instance in affected_instances.values():
6068 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6069 msg="cannot change secondary ip")
6071 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6072 if master.name != node.name:
6073 # check reachability from master secondary ip to new secondary ip
6074 if not netutils.TcpPing(self.op.secondary_ip,
6075 constants.DEFAULT_NODED_PORT,
6076 source=master.secondary_ip):
6077 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6078 " based ping to node daemon port",
6079 errors.ECODE_ENVIRON)
6081 if self.op.ndparams:
6082 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6083 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6084 self.new_ndparams = new_ndparams
6086 if self.op.hv_state:
6087 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6088 self.node.hv_state_static)
6090 if self.op.disk_state:
6091 self.new_disk_state = \
6092 _MergeAndVerifyDiskState(self.op.disk_state,
6093 self.node.disk_state_static)
6095 def Exec(self, feedback_fn):
6100 old_role = self.old_role
6101 new_role = self.new_role
6105 if self.op.ndparams:
6106 node.ndparams = self.new_ndparams
6108 if self.op.powered is not None:
6109 node.powered = self.op.powered
6111 if self.op.hv_state:
6112 node.hv_state_static = self.new_hv_state
6114 if self.op.disk_state:
6115 node.disk_state_static = self.new_disk_state
6117 for attr in ["master_capable", "vm_capable"]:
6118 val = getattr(self.op, attr)
6120 setattr(node, attr, val)
6121 result.append((attr, str(val)))
6123 if new_role != old_role:
6124 # Tell the node to demote itself, if no longer MC and not offline
6125 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6126 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6128 self.LogWarning("Node failed to demote itself: %s", msg)
6130 new_flags = self._R2F[new_role]
6131 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6133 result.append((desc, str(nf)))
6134 (node.master_candidate, node.drained, node.offline) = new_flags
6136 # we locked all nodes, we adjust the CP before updating this node
6138 _AdjustCandidatePool(self, [node.name])
6140 if self.op.secondary_ip:
6141 node.secondary_ip = self.op.secondary_ip
6142 result.append(("secondary_ip", self.op.secondary_ip))
6144 # this will trigger configuration file update, if needed
6145 self.cfg.Update(node, feedback_fn)
6147 # this will trigger job queue propagation or cleanup if the mc
6149 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6150 self.context.ReaddNode(node)
6155 class LUNodePowercycle(NoHooksLU):
6156 """Powercycles a node.
6161 def CheckArguments(self):
6162 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6163 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6164 raise errors.OpPrereqError("The node is the master and the force"
6165 " parameter was not set",
6168 def ExpandNames(self):
6169 """Locking for PowercycleNode.
6171 This is a last-resort option and shouldn't block on other
6172 jobs. Therefore, we grab no locks.
6175 self.needed_locks = {}
6177 def Exec(self, feedback_fn):
6181 result = self.rpc.call_node_powercycle(self.op.node_name,
6182 self.cfg.GetHypervisorType())
6183 result.Raise("Failed to schedule the reboot")
6184 return result.payload
6187 class LUClusterQuery(NoHooksLU):
6188 """Query cluster configuration.
6193 def ExpandNames(self):
6194 self.needed_locks = {}
6196 def Exec(self, feedback_fn):
6197 """Return cluster config.
6200 cluster = self.cfg.GetClusterInfo()
6203 # Filter just for enabled hypervisors
6204 for os_name, hv_dict in cluster.os_hvp.items():
6205 os_hvp[os_name] = {}
6206 for hv_name, hv_params in hv_dict.items():
6207 if hv_name in cluster.enabled_hypervisors:
6208 os_hvp[os_name][hv_name] = hv_params
6210 # Convert ip_family to ip_version
6211 primary_ip_version = constants.IP4_VERSION
6212 if cluster.primary_ip_family == netutils.IP6Address.family:
6213 primary_ip_version = constants.IP6_VERSION
6216 "software_version": constants.RELEASE_VERSION,
6217 "protocol_version": constants.PROTOCOL_VERSION,
6218 "config_version": constants.CONFIG_VERSION,
6219 "os_api_version": max(constants.OS_API_VERSIONS),
6220 "export_version": constants.EXPORT_VERSION,
6221 "architecture": runtime.GetArchInfo(),
6222 "name": cluster.cluster_name,
6223 "master": cluster.master_node,
6224 "default_hypervisor": cluster.primary_hypervisor,
6225 "enabled_hypervisors": cluster.enabled_hypervisors,
6226 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6227 for hypervisor_name in cluster.enabled_hypervisors]),
6229 "beparams": cluster.beparams,
6230 "osparams": cluster.osparams,
6231 "ipolicy": cluster.ipolicy,
6232 "nicparams": cluster.nicparams,
6233 "ndparams": cluster.ndparams,
6234 "diskparams": cluster.diskparams,
6235 "candidate_pool_size": cluster.candidate_pool_size,
6236 "master_netdev": cluster.master_netdev,
6237 "master_netmask": cluster.master_netmask,
6238 "use_external_mip_script": cluster.use_external_mip_script,
6239 "volume_group_name": cluster.volume_group_name,
6240 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6241 "file_storage_dir": cluster.file_storage_dir,
6242 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6243 "maintain_node_health": cluster.maintain_node_health,
6244 "ctime": cluster.ctime,
6245 "mtime": cluster.mtime,
6246 "uuid": cluster.uuid,
6247 "tags": list(cluster.GetTags()),
6248 "uid_pool": cluster.uid_pool,
6249 "default_iallocator": cluster.default_iallocator,
6250 "reserved_lvs": cluster.reserved_lvs,
6251 "primary_ip_version": primary_ip_version,
6252 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6253 "hidden_os": cluster.hidden_os,
6254 "blacklisted_os": cluster.blacklisted_os,
6260 class LUClusterConfigQuery(NoHooksLU):
6261 """Return configuration values.
6266 def CheckArguments(self):
6267 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6269 def ExpandNames(self):
6270 self.cq.ExpandNames(self)
6272 def DeclareLocks(self, level):
6273 self.cq.DeclareLocks(self, level)
6275 def Exec(self, feedback_fn):
6276 result = self.cq.OldStyleQuery(self)
6278 assert len(result) == 1
6283 class _ClusterQuery(_QueryBase):
6284 FIELDS = query.CLUSTER_FIELDS
6286 #: Do not sort (there is only one item)
6289 def ExpandNames(self, lu):
6290 lu.needed_locks = {}
6292 # The following variables interact with _QueryBase._GetNames
6293 self.wanted = locking.ALL_SET
6294 self.do_locking = self.use_locking
6297 raise errors.OpPrereqError("Can not use locking for cluster queries",
6300 def DeclareLocks(self, lu, level):
6303 def _GetQueryData(self, lu):
6304 """Computes the list of nodes and their attributes.
6307 # Locking is not used
6308 assert not (compat.any(lu.glm.is_owned(level)
6309 for level in locking.LEVELS
6310 if level != locking.LEVEL_CLUSTER) or
6311 self.do_locking or self.use_locking)
6313 if query.CQ_CONFIG in self.requested_data:
6314 cluster = lu.cfg.GetClusterInfo()
6316 cluster = NotImplemented
6318 if query.CQ_QUEUE_DRAINED in self.requested_data:
6319 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6321 drain_flag = NotImplemented
6323 if query.CQ_WATCHER_PAUSE in self.requested_data:
6324 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6326 watcher_pause = NotImplemented
6328 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6331 class LUInstanceActivateDisks(NoHooksLU):
6332 """Bring up an instance's disks.
6337 def ExpandNames(self):
6338 self._ExpandAndLockInstance()
6339 self.needed_locks[locking.LEVEL_NODE] = []
6340 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6342 def DeclareLocks(self, level):
6343 if level == locking.LEVEL_NODE:
6344 self._LockInstancesNodes()
6346 def CheckPrereq(self):
6347 """Check prerequisites.
6349 This checks that the instance is in the cluster.
6352 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6353 assert self.instance is not None, \
6354 "Cannot retrieve locked instance %s" % self.op.instance_name
6355 _CheckNodeOnline(self, self.instance.primary_node)
6357 def Exec(self, feedback_fn):
6358 """Activate the disks.
6361 disks_ok, disks_info = \
6362 _AssembleInstanceDisks(self, self.instance,
6363 ignore_size=self.op.ignore_size)
6365 raise errors.OpExecError("Cannot activate block devices")
6367 if self.op.wait_for_sync:
6368 if not _WaitForSync(self, self.instance):
6369 raise errors.OpExecError("Some disks of the instance are degraded!")
6374 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6376 """Prepare the block devices for an instance.
6378 This sets up the block devices on all nodes.
6380 @type lu: L{LogicalUnit}
6381 @param lu: the logical unit on whose behalf we execute
6382 @type instance: L{objects.Instance}
6383 @param instance: the instance for whose disks we assemble
6384 @type disks: list of L{objects.Disk} or None
6385 @param disks: which disks to assemble (or all, if None)
6386 @type ignore_secondaries: boolean
6387 @param ignore_secondaries: if true, errors on secondary nodes
6388 won't result in an error return from the function
6389 @type ignore_size: boolean
6390 @param ignore_size: if true, the current known size of the disk
6391 will not be used during the disk activation, useful for cases
6392 when the size is wrong
6393 @return: False if the operation failed, otherwise a list of
6394 (host, instance_visible_name, node_visible_name)
6395 with the mapping from node devices to instance devices
6400 iname = instance.name
6401 disks = _ExpandCheckDisks(instance, disks)
6403 # With the two passes mechanism we try to reduce the window of
6404 # opportunity for the race condition of switching DRBD to primary
6405 # before handshaking occured, but we do not eliminate it
6407 # The proper fix would be to wait (with some limits) until the
6408 # connection has been made and drbd transitions from WFConnection
6409 # into any other network-connected state (Connected, SyncTarget,
6412 # 1st pass, assemble on all nodes in secondary mode
6413 for idx, inst_disk in enumerate(disks):
6414 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6416 node_disk = node_disk.Copy()
6417 node_disk.UnsetSize()
6418 lu.cfg.SetDiskID(node_disk, node)
6419 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6421 msg = result.fail_msg
6423 is_offline_secondary = (node in instance.secondary_nodes and
6425 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6426 " (is_primary=False, pass=1): %s",
6427 inst_disk.iv_name, node, msg)
6428 if not (ignore_secondaries or is_offline_secondary):
6431 # FIXME: race condition on drbd migration to primary
6433 # 2nd pass, do only the primary node
6434 for idx, inst_disk in enumerate(disks):
6437 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6438 if node != instance.primary_node:
6441 node_disk = node_disk.Copy()
6442 node_disk.UnsetSize()
6443 lu.cfg.SetDiskID(node_disk, node)
6444 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6446 msg = result.fail_msg
6448 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6449 " (is_primary=True, pass=2): %s",
6450 inst_disk.iv_name, node, msg)
6453 dev_path = result.payload
6455 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6457 # leave the disks configured for the primary node
6458 # this is a workaround that would be fixed better by
6459 # improving the logical/physical id handling
6461 lu.cfg.SetDiskID(disk, instance.primary_node)
6463 return disks_ok, device_info
6466 def _StartInstanceDisks(lu, instance, force):
6467 """Start the disks of an instance.
6470 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6471 ignore_secondaries=force)
6473 _ShutdownInstanceDisks(lu, instance)
6474 if force is not None and not force:
6475 lu.proc.LogWarning("", hint="If the message above refers to a"
6477 " you can retry the operation using '--force'.")
6478 raise errors.OpExecError("Disk consistency error")
6481 class LUInstanceDeactivateDisks(NoHooksLU):
6482 """Shutdown an instance's disks.
6487 def ExpandNames(self):
6488 self._ExpandAndLockInstance()
6489 self.needed_locks[locking.LEVEL_NODE] = []
6490 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6492 def DeclareLocks(self, level):
6493 if level == locking.LEVEL_NODE:
6494 self._LockInstancesNodes()
6496 def CheckPrereq(self):
6497 """Check prerequisites.
6499 This checks that the instance is in the cluster.
6502 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6503 assert self.instance is not None, \
6504 "Cannot retrieve locked instance %s" % self.op.instance_name
6506 def Exec(self, feedback_fn):
6507 """Deactivate the disks
6510 instance = self.instance
6512 _ShutdownInstanceDisks(self, instance)
6514 _SafeShutdownInstanceDisks(self, instance)
6517 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6518 """Shutdown block devices of an instance.
6520 This function checks if an instance is running, before calling
6521 _ShutdownInstanceDisks.
6524 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6525 _ShutdownInstanceDisks(lu, instance, disks=disks)
6528 def _ExpandCheckDisks(instance, disks):
6529 """Return the instance disks selected by the disks list
6531 @type disks: list of L{objects.Disk} or None
6532 @param disks: selected disks
6533 @rtype: list of L{objects.Disk}
6534 @return: selected instance disks to act on
6538 return instance.disks
6540 if not set(disks).issubset(instance.disks):
6541 raise errors.ProgrammerError("Can only act on disks belonging to the"
6546 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6547 """Shutdown block devices of an instance.
6549 This does the shutdown on all nodes of the instance.
6551 If the ignore_primary is false, errors on the primary node are
6556 disks = _ExpandCheckDisks(instance, disks)
6559 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6560 lu.cfg.SetDiskID(top_disk, node)
6561 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6562 msg = result.fail_msg
6564 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6565 disk.iv_name, node, msg)
6566 if ((node == instance.primary_node and not ignore_primary) or
6567 (node != instance.primary_node and not result.offline)):
6572 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6573 """Checks if a node has enough free memory.
6575 This function check if a given node has the needed amount of free
6576 memory. In case the node has less memory or we cannot get the
6577 information from the node, this function raise an OpPrereqError
6580 @type lu: C{LogicalUnit}
6581 @param lu: a logical unit from which we get configuration data
6583 @param node: the node to check
6584 @type reason: C{str}
6585 @param reason: string to use in the error message
6586 @type requested: C{int}
6587 @param requested: the amount of memory in MiB to check for
6588 @type hypervisor_name: C{str}
6589 @param hypervisor_name: the hypervisor to ask for memory stats
6591 @return: node current free memory
6592 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6593 we cannot check the node
6596 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6597 nodeinfo[node].Raise("Can't get data from node %s" % node,
6598 prereq=True, ecode=errors.ECODE_ENVIRON)
6599 (_, _, (hv_info, )) = nodeinfo[node].payload
6601 free_mem = hv_info.get("memory_free", None)
6602 if not isinstance(free_mem, int):
6603 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6604 " was '%s'" % (node, free_mem),
6605 errors.ECODE_ENVIRON)
6606 if requested > free_mem:
6607 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6608 " needed %s MiB, available %s MiB" %
6609 (node, reason, requested, free_mem),
6614 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6615 """Checks if nodes have enough free disk space in the all VGs.
6617 This function check if all given nodes have the needed amount of
6618 free disk. In case any node has less disk or we cannot get the
6619 information from the node, this function raise an OpPrereqError
6622 @type lu: C{LogicalUnit}
6623 @param lu: a logical unit from which we get configuration data
6624 @type nodenames: C{list}
6625 @param nodenames: the list of node names to check
6626 @type req_sizes: C{dict}
6627 @param req_sizes: the hash of vg and corresponding amount of disk in
6629 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6630 or we cannot check the node
6633 for vg, req_size in req_sizes.items():
6634 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6637 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6638 """Checks if nodes have enough free disk space in the specified VG.
6640 This function check if all given nodes have the needed amount of
6641 free disk. In case any node has less disk or we cannot get the
6642 information from the node, this function raise an OpPrereqError
6645 @type lu: C{LogicalUnit}
6646 @param lu: a logical unit from which we get configuration data
6647 @type nodenames: C{list}
6648 @param nodenames: the list of node names to check
6650 @param vg: the volume group to check
6651 @type requested: C{int}
6652 @param requested: the amount of disk in MiB to check for
6653 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6654 or we cannot check the node
6657 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6658 for node in nodenames:
6659 info = nodeinfo[node]
6660 info.Raise("Cannot get current information from node %s" % node,
6661 prereq=True, ecode=errors.ECODE_ENVIRON)
6662 (_, (vg_info, ), _) = info.payload
6663 vg_free = vg_info.get("vg_free", None)
6664 if not isinstance(vg_free, int):
6665 raise errors.OpPrereqError("Can't compute free disk space on node"
6666 " %s for vg %s, result was '%s'" %
6667 (node, vg, vg_free), errors.ECODE_ENVIRON)
6668 if requested > vg_free:
6669 raise errors.OpPrereqError("Not enough disk space on target node %s"
6670 " vg %s: required %d MiB, available %d MiB" %
6671 (node, vg, requested, vg_free),
6675 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6676 """Checks if nodes have enough physical CPUs
6678 This function checks if all given nodes have the needed number of
6679 physical CPUs. In case any node has less CPUs or we cannot get the
6680 information from the node, this function raises an OpPrereqError
6683 @type lu: C{LogicalUnit}
6684 @param lu: a logical unit from which we get configuration data
6685 @type nodenames: C{list}
6686 @param nodenames: the list of node names to check
6687 @type requested: C{int}
6688 @param requested: the minimum acceptable number of physical CPUs
6689 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6690 or we cannot check the node
6693 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6694 for node in nodenames:
6695 info = nodeinfo[node]
6696 info.Raise("Cannot get current information from node %s" % node,
6697 prereq=True, ecode=errors.ECODE_ENVIRON)
6698 (_, _, (hv_info, )) = info.payload
6699 num_cpus = hv_info.get("cpu_total", None)
6700 if not isinstance(num_cpus, int):
6701 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6702 " on node %s, result was '%s'" %
6703 (node, num_cpus), errors.ECODE_ENVIRON)
6704 if requested > num_cpus:
6705 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6706 "required" % (node, num_cpus, requested),
6710 class LUInstanceStartup(LogicalUnit):
6711 """Starts an instance.
6714 HPATH = "instance-start"
6715 HTYPE = constants.HTYPE_INSTANCE
6718 def CheckArguments(self):
6720 if self.op.beparams:
6721 # fill the beparams dict
6722 objects.UpgradeBeParams(self.op.beparams)
6723 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6725 def ExpandNames(self):
6726 self._ExpandAndLockInstance()
6727 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6729 def DeclareLocks(self, level):
6730 if level == locking.LEVEL_NODE_RES:
6731 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6733 def BuildHooksEnv(self):
6736 This runs on master, primary and secondary nodes of the instance.
6740 "FORCE": self.op.force,
6743 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6747 def BuildHooksNodes(self):
6748 """Build hooks nodes.
6751 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6754 def CheckPrereq(self):
6755 """Check prerequisites.
6757 This checks that the instance is in the cluster.
6760 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6761 assert self.instance is not None, \
6762 "Cannot retrieve locked instance %s" % self.op.instance_name
6765 if self.op.hvparams:
6766 # check hypervisor parameter syntax (locally)
6767 cluster = self.cfg.GetClusterInfo()
6768 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6769 filled_hvp = cluster.FillHV(instance)
6770 filled_hvp.update(self.op.hvparams)
6771 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6772 hv_type.CheckParameterSyntax(filled_hvp)
6773 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6775 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6777 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6779 if self.primary_offline and self.op.ignore_offline_nodes:
6780 self.proc.LogWarning("Ignoring offline primary node")
6782 if self.op.hvparams or self.op.beparams:
6783 self.proc.LogWarning("Overridden parameters are ignored")
6785 _CheckNodeOnline(self, instance.primary_node)
6787 bep = self.cfg.GetClusterInfo().FillBE(instance)
6788 bep.update(self.op.beparams)
6790 # check bridges existence
6791 _CheckInstanceBridgesExist(self, instance)
6793 remote_info = self.rpc.call_instance_info(instance.primary_node,
6795 instance.hypervisor)
6796 remote_info.Raise("Error checking node %s" % instance.primary_node,
6797 prereq=True, ecode=errors.ECODE_ENVIRON)
6798 if not remote_info.payload: # not running already
6799 _CheckNodeFreeMemory(self, instance.primary_node,
6800 "starting instance %s" % instance.name,
6801 bep[constants.BE_MINMEM], instance.hypervisor)
6803 def Exec(self, feedback_fn):
6804 """Start the instance.
6807 instance = self.instance
6808 force = self.op.force
6810 if not self.op.no_remember:
6811 self.cfg.MarkInstanceUp(instance.name)
6813 if self.primary_offline:
6814 assert self.op.ignore_offline_nodes
6815 self.proc.LogInfo("Primary node offline, marked instance as started")
6817 node_current = instance.primary_node
6819 _StartInstanceDisks(self, instance, force)
6822 self.rpc.call_instance_start(node_current,
6823 (instance, self.op.hvparams,
6825 self.op.startup_paused)
6826 msg = result.fail_msg
6828 _ShutdownInstanceDisks(self, instance)
6829 raise errors.OpExecError("Could not start instance: %s" % msg)
6832 class LUInstanceReboot(LogicalUnit):
6833 """Reboot an instance.
6836 HPATH = "instance-reboot"
6837 HTYPE = constants.HTYPE_INSTANCE
6840 def ExpandNames(self):
6841 self._ExpandAndLockInstance()
6843 def BuildHooksEnv(self):
6846 This runs on master, primary and secondary nodes of the instance.
6850 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6851 "REBOOT_TYPE": self.op.reboot_type,
6852 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6855 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6859 def BuildHooksNodes(self):
6860 """Build hooks nodes.
6863 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6866 def CheckPrereq(self):
6867 """Check prerequisites.
6869 This checks that the instance is in the cluster.
6872 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6873 assert self.instance is not None, \
6874 "Cannot retrieve locked instance %s" % self.op.instance_name
6875 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6876 _CheckNodeOnline(self, instance.primary_node)
6878 # check bridges existence
6879 _CheckInstanceBridgesExist(self, instance)
6881 def Exec(self, feedback_fn):
6882 """Reboot the instance.
6885 instance = self.instance
6886 ignore_secondaries = self.op.ignore_secondaries
6887 reboot_type = self.op.reboot_type
6889 remote_info = self.rpc.call_instance_info(instance.primary_node,
6891 instance.hypervisor)
6892 remote_info.Raise("Error checking node %s" % instance.primary_node)
6893 instance_running = bool(remote_info.payload)
6895 node_current = instance.primary_node
6897 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6898 constants.INSTANCE_REBOOT_HARD]:
6899 for disk in instance.disks:
6900 self.cfg.SetDiskID(disk, node_current)
6901 result = self.rpc.call_instance_reboot(node_current, instance,
6903 self.op.shutdown_timeout)
6904 result.Raise("Could not reboot instance")
6906 if instance_running:
6907 result = self.rpc.call_instance_shutdown(node_current, instance,
6908 self.op.shutdown_timeout)
6909 result.Raise("Could not shutdown instance for full reboot")
6910 _ShutdownInstanceDisks(self, instance)
6912 self.LogInfo("Instance %s was already stopped, starting now",
6914 _StartInstanceDisks(self, instance, ignore_secondaries)
6915 result = self.rpc.call_instance_start(node_current,
6916 (instance, None, None), False)
6917 msg = result.fail_msg
6919 _ShutdownInstanceDisks(self, instance)
6920 raise errors.OpExecError("Could not start instance for"
6921 " full reboot: %s" % msg)
6923 self.cfg.MarkInstanceUp(instance.name)
6926 class LUInstanceShutdown(LogicalUnit):
6927 """Shutdown an instance.
6930 HPATH = "instance-stop"
6931 HTYPE = constants.HTYPE_INSTANCE
6934 def ExpandNames(self):
6935 self._ExpandAndLockInstance()
6937 def BuildHooksEnv(self):
6940 This runs on master, primary and secondary nodes of the instance.
6943 env = _BuildInstanceHookEnvByObject(self, self.instance)
6944 env["TIMEOUT"] = self.op.timeout
6947 def BuildHooksNodes(self):
6948 """Build hooks nodes.
6951 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6954 def CheckPrereq(self):
6955 """Check prerequisites.
6957 This checks that the instance is in the cluster.
6960 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6961 assert self.instance is not None, \
6962 "Cannot retrieve locked instance %s" % self.op.instance_name
6964 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6966 self.primary_offline = \
6967 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6969 if self.primary_offline and self.op.ignore_offline_nodes:
6970 self.proc.LogWarning("Ignoring offline primary node")
6972 _CheckNodeOnline(self, self.instance.primary_node)
6974 def Exec(self, feedback_fn):
6975 """Shutdown the instance.
6978 instance = self.instance
6979 node_current = instance.primary_node
6980 timeout = self.op.timeout
6982 if not self.op.no_remember:
6983 self.cfg.MarkInstanceDown(instance.name)
6985 if self.primary_offline:
6986 assert self.op.ignore_offline_nodes
6987 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6989 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6990 msg = result.fail_msg
6992 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6994 _ShutdownInstanceDisks(self, instance)
6997 class LUInstanceReinstall(LogicalUnit):
6998 """Reinstall an instance.
7001 HPATH = "instance-reinstall"
7002 HTYPE = constants.HTYPE_INSTANCE
7005 def ExpandNames(self):
7006 self._ExpandAndLockInstance()
7008 def BuildHooksEnv(self):
7011 This runs on master, primary and secondary nodes of the instance.
7014 return _BuildInstanceHookEnvByObject(self, self.instance)
7016 def BuildHooksNodes(self):
7017 """Build hooks nodes.
7020 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7023 def CheckPrereq(self):
7024 """Check prerequisites.
7026 This checks that the instance is in the cluster and is not running.
7029 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7030 assert instance is not None, \
7031 "Cannot retrieve locked instance %s" % self.op.instance_name
7032 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7033 " offline, cannot reinstall")
7035 if instance.disk_template == constants.DT_DISKLESS:
7036 raise errors.OpPrereqError("Instance '%s' has no disks" %
7037 self.op.instance_name,
7039 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7041 if self.op.os_type is not None:
7043 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7044 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7045 instance_os = self.op.os_type
7047 instance_os = instance.os
7049 nodelist = list(instance.all_nodes)
7051 if self.op.osparams:
7052 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7053 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7054 self.os_inst = i_osdict # the new dict (without defaults)
7058 self.instance = instance
7060 def Exec(self, feedback_fn):
7061 """Reinstall the instance.
7064 inst = self.instance
7066 if self.op.os_type is not None:
7067 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7068 inst.os = self.op.os_type
7069 # Write to configuration
7070 self.cfg.Update(inst, feedback_fn)
7072 _StartInstanceDisks(self, inst, None)
7074 feedback_fn("Running the instance OS create scripts...")
7075 # FIXME: pass debug option from opcode to backend
7076 result = self.rpc.call_instance_os_add(inst.primary_node,
7077 (inst, self.os_inst), True,
7078 self.op.debug_level)
7079 result.Raise("Could not install OS for instance %s on node %s" %
7080 (inst.name, inst.primary_node))
7082 _ShutdownInstanceDisks(self, inst)
7085 class LUInstanceRecreateDisks(LogicalUnit):
7086 """Recreate an instance's missing disks.
7089 HPATH = "instance-recreate-disks"
7090 HTYPE = constants.HTYPE_INSTANCE
7093 _MODIFYABLE = frozenset([
7094 constants.IDISK_SIZE,
7095 constants.IDISK_MODE,
7098 # New or changed disk parameters may have different semantics
7099 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7100 constants.IDISK_ADOPT,
7102 # TODO: Implement support changing VG while recreating
7104 constants.IDISK_METAVG,
7107 def _RunAllocator(self):
7108 """Run the allocator based on input opcode.
7111 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7114 # The allocator should actually run in "relocate" mode, but current
7115 # allocators don't support relocating all the nodes of an instance at
7116 # the same time. As a workaround we use "allocate" mode, but this is
7117 # suboptimal for two reasons:
7118 # - The instance name passed to the allocator is present in the list of
7119 # existing instances, so there could be a conflict within the
7120 # internal structures of the allocator. This doesn't happen with the
7121 # current allocators, but it's a liability.
7122 # - The allocator counts the resources used by the instance twice: once
7123 # because the instance exists already, and once because it tries to
7124 # allocate a new instance.
7125 # The allocator could choose some of the nodes on which the instance is
7126 # running, but that's not a problem. If the instance nodes are broken,
7127 # they should be already be marked as drained or offline, and hence
7128 # skipped by the allocator. If instance disks have been lost for other
7129 # reasons, then recreating the disks on the same nodes should be fine.
7130 disk_template = self.instance.disk_template
7131 spindle_use = be_full[constants.BE_SPINDLE_USE]
7132 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7133 disk_template=disk_template,
7134 tags=list(self.instance.GetTags()),
7135 os=self.instance.os,
7137 vcpus=be_full[constants.BE_VCPUS],
7138 memory=be_full[constants.BE_MAXMEM],
7139 spindle_use=spindle_use,
7140 disks=[{constants.IDISK_SIZE: d.size,
7141 constants.IDISK_MODE: d.mode}
7142 for d in self.instance.disks],
7143 hypervisor=self.instance.hypervisor)
7144 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7146 ial.Run(self.op.iallocator)
7148 assert req.RequiredNodes() == len(self.instance.all_nodes)
7151 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7152 " %s" % (self.op.iallocator, ial.info),
7155 self.op.nodes = ial.result
7156 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7157 self.op.instance_name, self.op.iallocator,
7158 utils.CommaJoin(ial.result))
7160 def CheckArguments(self):
7161 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7162 # Normalize and convert deprecated list of disk indices
7163 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7165 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7167 raise errors.OpPrereqError("Some disks have been specified more than"
7168 " once: %s" % utils.CommaJoin(duplicates),
7171 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7172 # when neither iallocator nor nodes are specified
7173 if self.op.iallocator or self.op.nodes:
7174 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7176 for (idx, params) in self.op.disks:
7177 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7178 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7180 raise errors.OpPrereqError("Parameters for disk %s try to change"
7181 " unmodifyable parameter(s): %s" %
7182 (idx, utils.CommaJoin(unsupported)),
7185 def ExpandNames(self):
7186 self._ExpandAndLockInstance()
7187 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7189 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7190 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7192 self.needed_locks[locking.LEVEL_NODE] = []
7193 if self.op.iallocator:
7194 # iallocator will select a new node in the same group
7195 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7196 self.needed_locks[locking.LEVEL_NODE_RES] = []
7198 def DeclareLocks(self, level):
7199 if level == locking.LEVEL_NODEGROUP:
7200 assert self.op.iallocator is not None
7201 assert not self.op.nodes
7202 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7203 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7204 # Lock the primary group used by the instance optimistically; this
7205 # requires going via the node before it's locked, requiring
7206 # verification later on
7207 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7208 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7210 elif level == locking.LEVEL_NODE:
7211 # If an allocator is used, then we lock all the nodes in the current
7212 # instance group, as we don't know yet which ones will be selected;
7213 # if we replace the nodes without using an allocator, locks are
7214 # already declared in ExpandNames; otherwise, we need to lock all the
7215 # instance nodes for disk re-creation
7216 if self.op.iallocator:
7217 assert not self.op.nodes
7218 assert not self.needed_locks[locking.LEVEL_NODE]
7219 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7221 # Lock member nodes of the group of the primary node
7222 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7223 self.needed_locks[locking.LEVEL_NODE].extend(
7224 self.cfg.GetNodeGroup(group_uuid).members)
7225 elif not self.op.nodes:
7226 self._LockInstancesNodes(primary_only=False)
7227 elif level == locking.LEVEL_NODE_RES:
7229 self.needed_locks[locking.LEVEL_NODE_RES] = \
7230 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7232 def BuildHooksEnv(self):
7235 This runs on master, primary and secondary nodes of the instance.
7238 return _BuildInstanceHookEnvByObject(self, self.instance)
7240 def BuildHooksNodes(self):
7241 """Build hooks nodes.
7244 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7247 def CheckPrereq(self):
7248 """Check prerequisites.
7250 This checks that the instance is in the cluster and is not running.
7253 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7254 assert instance is not None, \
7255 "Cannot retrieve locked instance %s" % self.op.instance_name
7257 if len(self.op.nodes) != len(instance.all_nodes):
7258 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7259 " %d replacement nodes were specified" %
7260 (instance.name, len(instance.all_nodes),
7261 len(self.op.nodes)),
7263 assert instance.disk_template != constants.DT_DRBD8 or \
7264 len(self.op.nodes) == 2
7265 assert instance.disk_template != constants.DT_PLAIN or \
7266 len(self.op.nodes) == 1
7267 primary_node = self.op.nodes[0]
7269 primary_node = instance.primary_node
7270 if not self.op.iallocator:
7271 _CheckNodeOnline(self, primary_node)
7273 if instance.disk_template == constants.DT_DISKLESS:
7274 raise errors.OpPrereqError("Instance '%s' has no disks" %
7275 self.op.instance_name, errors.ECODE_INVAL)
7277 # Verify if node group locks are still correct
7278 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7280 # Node group locks are acquired only for the primary node (and only
7281 # when the allocator is used)
7282 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7285 # if we replace nodes *and* the old primary is offline, we don't
7286 # check the instance state
7287 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7288 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7289 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7290 msg="cannot recreate disks")
7293 self.disks = dict(self.op.disks)
7295 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7297 maxidx = max(self.disks.keys())
7298 if maxidx >= len(instance.disks):
7299 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7302 if ((self.op.nodes or self.op.iallocator) and
7303 sorted(self.disks.keys()) != range(len(instance.disks))):
7304 raise errors.OpPrereqError("Can't recreate disks partially and"
7305 " change the nodes at the same time",
7308 self.instance = instance
7310 if self.op.iallocator:
7311 self._RunAllocator()
7312 # Release unneeded node and node resource locks
7313 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7314 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7316 def Exec(self, feedback_fn):
7317 """Recreate the disks.
7320 instance = self.instance
7322 assert (self.owned_locks(locking.LEVEL_NODE) ==
7323 self.owned_locks(locking.LEVEL_NODE_RES))
7326 mods = [] # keeps track of needed changes
7328 for idx, disk in enumerate(instance.disks):
7330 changes = self.disks[idx]
7332 # Disk should not be recreated
7336 # update secondaries for disks, if needed
7337 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7338 # need to update the nodes and minors
7339 assert len(self.op.nodes) == 2
7340 assert len(disk.logical_id) == 6 # otherwise disk internals
7342 (_, _, old_port, _, _, old_secret) = disk.logical_id
7343 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7344 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7345 new_minors[0], new_minors[1], old_secret)
7346 assert len(disk.logical_id) == len(new_id)
7350 mods.append((idx, new_id, changes))
7352 # now that we have passed all asserts above, we can apply the mods
7353 # in a single run (to avoid partial changes)
7354 for idx, new_id, changes in mods:
7355 disk = instance.disks[idx]
7356 if new_id is not None:
7357 assert disk.dev_type == constants.LD_DRBD8
7358 disk.logical_id = new_id
7360 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7361 mode=changes.get(constants.IDISK_MODE, None))
7363 # change primary node, if needed
7365 instance.primary_node = self.op.nodes[0]
7366 self.LogWarning("Changing the instance's nodes, you will have to"
7367 " remove any disks left on the older nodes manually")
7370 self.cfg.Update(instance, feedback_fn)
7372 # All touched nodes must be locked
7373 mylocks = self.owned_locks(locking.LEVEL_NODE)
7374 assert mylocks.issuperset(frozenset(instance.all_nodes))
7375 _CreateDisks(self, instance, to_skip=to_skip)
7378 class LUInstanceRename(LogicalUnit):
7379 """Rename an instance.
7382 HPATH = "instance-rename"
7383 HTYPE = constants.HTYPE_INSTANCE
7385 def CheckArguments(self):
7389 if self.op.ip_check and not self.op.name_check:
7390 # TODO: make the ip check more flexible and not depend on the name check
7391 raise errors.OpPrereqError("IP address check requires a name check",
7394 def BuildHooksEnv(self):
7397 This runs on master, primary and secondary nodes of the instance.
7400 env = _BuildInstanceHookEnvByObject(self, self.instance)
7401 env["INSTANCE_NEW_NAME"] = self.op.new_name
7404 def BuildHooksNodes(self):
7405 """Build hooks nodes.
7408 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7411 def CheckPrereq(self):
7412 """Check prerequisites.
7414 This checks that the instance is in the cluster and is not running.
7417 self.op.instance_name = _ExpandInstanceName(self.cfg,
7418 self.op.instance_name)
7419 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7420 assert instance is not None
7421 _CheckNodeOnline(self, instance.primary_node)
7422 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7423 msg="cannot rename")
7424 self.instance = instance
7426 new_name = self.op.new_name
7427 if self.op.name_check:
7428 hostname = netutils.GetHostname(name=new_name)
7429 if hostname.name != new_name:
7430 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7432 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7433 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7434 " same as given hostname '%s'") %
7435 (hostname.name, self.op.new_name),
7437 new_name = self.op.new_name = hostname.name
7438 if (self.op.ip_check and
7439 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7440 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7441 (hostname.ip, new_name),
7442 errors.ECODE_NOTUNIQUE)
7444 instance_list = self.cfg.GetInstanceList()
7445 if new_name in instance_list and new_name != instance.name:
7446 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7447 new_name, errors.ECODE_EXISTS)
7449 def Exec(self, feedback_fn):
7450 """Rename the instance.
7453 inst = self.instance
7454 old_name = inst.name
7456 rename_file_storage = False
7457 if (inst.disk_template in constants.DTS_FILEBASED and
7458 self.op.new_name != inst.name):
7459 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7460 rename_file_storage = True
7462 self.cfg.RenameInstance(inst.name, self.op.new_name)
7463 # Change the instance lock. This is definitely safe while we hold the BGL.
7464 # Otherwise the new lock would have to be added in acquired mode.
7466 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7467 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7469 # re-read the instance from the configuration after rename
7470 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7472 if rename_file_storage:
7473 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7474 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7475 old_file_storage_dir,
7476 new_file_storage_dir)
7477 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7478 " (but the instance has been renamed in Ganeti)" %
7479 (inst.primary_node, old_file_storage_dir,
7480 new_file_storage_dir))
7482 _StartInstanceDisks(self, inst, None)
7484 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7485 old_name, self.op.debug_level)
7486 msg = result.fail_msg
7488 msg = ("Could not run OS rename script for instance %s on node %s"
7489 " (but the instance has been renamed in Ganeti): %s" %
7490 (inst.name, inst.primary_node, msg))
7491 self.proc.LogWarning(msg)
7493 _ShutdownInstanceDisks(self, inst)
7498 class LUInstanceRemove(LogicalUnit):
7499 """Remove an instance.
7502 HPATH = "instance-remove"
7503 HTYPE = constants.HTYPE_INSTANCE
7506 def ExpandNames(self):
7507 self._ExpandAndLockInstance()
7508 self.needed_locks[locking.LEVEL_NODE] = []
7509 self.needed_locks[locking.LEVEL_NODE_RES] = []
7510 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7512 def DeclareLocks(self, level):
7513 if level == locking.LEVEL_NODE:
7514 self._LockInstancesNodes()
7515 elif level == locking.LEVEL_NODE_RES:
7517 self.needed_locks[locking.LEVEL_NODE_RES] = \
7518 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7520 def BuildHooksEnv(self):
7523 This runs on master, primary and secondary nodes of the instance.
7526 env = _BuildInstanceHookEnvByObject(self, self.instance)
7527 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7530 def BuildHooksNodes(self):
7531 """Build hooks nodes.
7534 nl = [self.cfg.GetMasterNode()]
7535 nl_post = list(self.instance.all_nodes) + nl
7536 return (nl, nl_post)
7538 def CheckPrereq(self):
7539 """Check prerequisites.
7541 This checks that the instance is in the cluster.
7544 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7545 assert self.instance is not None, \
7546 "Cannot retrieve locked instance %s" % self.op.instance_name
7548 def Exec(self, feedback_fn):
7549 """Remove the instance.
7552 instance = self.instance
7553 logging.info("Shutting down instance %s on node %s",
7554 instance.name, instance.primary_node)
7556 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7557 self.op.shutdown_timeout)
7558 msg = result.fail_msg
7560 if self.op.ignore_failures:
7561 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7563 raise errors.OpExecError("Could not shutdown instance %s on"
7565 (instance.name, instance.primary_node, msg))
7567 assert (self.owned_locks(locking.LEVEL_NODE) ==
7568 self.owned_locks(locking.LEVEL_NODE_RES))
7569 assert not (set(instance.all_nodes) -
7570 self.owned_locks(locking.LEVEL_NODE)), \
7571 "Not owning correct locks"
7573 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7576 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7577 """Utility function to remove an instance.
7580 logging.info("Removing block devices for instance %s", instance.name)
7582 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7583 if not ignore_failures:
7584 raise errors.OpExecError("Can't remove instance's disks")
7585 feedback_fn("Warning: can't remove instance's disks")
7587 logging.info("Removing instance %s out of cluster config", instance.name)
7589 lu.cfg.RemoveInstance(instance.name)
7591 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7592 "Instance lock removal conflict"
7594 # Remove lock for the instance
7595 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7598 class LUInstanceQuery(NoHooksLU):
7599 """Logical unit for querying instances.
7602 # pylint: disable=W0142
7605 def CheckArguments(self):
7606 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7607 self.op.output_fields, self.op.use_locking)
7609 def ExpandNames(self):
7610 self.iq.ExpandNames(self)
7612 def DeclareLocks(self, level):
7613 self.iq.DeclareLocks(self, level)
7615 def Exec(self, feedback_fn):
7616 return self.iq.OldStyleQuery(self)
7619 class LUInstanceFailover(LogicalUnit):
7620 """Failover an instance.
7623 HPATH = "instance-failover"
7624 HTYPE = constants.HTYPE_INSTANCE
7627 def CheckArguments(self):
7628 """Check the arguments.
7631 self.iallocator = getattr(self.op, "iallocator", None)
7632 self.target_node = getattr(self.op, "target_node", None)
7634 def ExpandNames(self):
7635 self._ExpandAndLockInstance()
7637 if self.op.target_node is not None:
7638 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7640 self.needed_locks[locking.LEVEL_NODE] = []
7641 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7643 self.needed_locks[locking.LEVEL_NODE_RES] = []
7644 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7646 ignore_consistency = self.op.ignore_consistency
7647 shutdown_timeout = self.op.shutdown_timeout
7648 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7651 ignore_consistency=ignore_consistency,
7652 shutdown_timeout=shutdown_timeout,
7653 ignore_ipolicy=self.op.ignore_ipolicy)
7654 self.tasklets = [self._migrater]
7656 def DeclareLocks(self, level):
7657 if level == locking.LEVEL_NODE:
7658 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7659 if instance.disk_template in constants.DTS_EXT_MIRROR:
7660 if self.op.target_node is None:
7661 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7663 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7664 self.op.target_node]
7665 del self.recalculate_locks[locking.LEVEL_NODE]
7667 self._LockInstancesNodes()
7668 elif level == locking.LEVEL_NODE_RES:
7670 self.needed_locks[locking.LEVEL_NODE_RES] = \
7671 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7673 def BuildHooksEnv(self):
7676 This runs on master, primary and secondary nodes of the instance.
7679 instance = self._migrater.instance
7680 source_node = instance.primary_node
7681 target_node = self.op.target_node
7683 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7684 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7685 "OLD_PRIMARY": source_node,
7686 "NEW_PRIMARY": target_node,
7689 if instance.disk_template in constants.DTS_INT_MIRROR:
7690 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7691 env["NEW_SECONDARY"] = source_node
7693 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7695 env.update(_BuildInstanceHookEnvByObject(self, instance))
7699 def BuildHooksNodes(self):
7700 """Build hooks nodes.
7703 instance = self._migrater.instance
7704 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7705 return (nl, nl + [instance.primary_node])
7708 class LUInstanceMigrate(LogicalUnit):
7709 """Migrate an instance.
7711 This is migration without shutting down, compared to the failover,
7712 which is done with shutdown.
7715 HPATH = "instance-migrate"
7716 HTYPE = constants.HTYPE_INSTANCE
7719 def ExpandNames(self):
7720 self._ExpandAndLockInstance()
7722 if self.op.target_node is not None:
7723 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7725 self.needed_locks[locking.LEVEL_NODE] = []
7726 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7728 self.needed_locks[locking.LEVEL_NODE] = []
7729 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7732 TLMigrateInstance(self, self.op.instance_name,
7733 cleanup=self.op.cleanup,
7735 fallback=self.op.allow_failover,
7736 allow_runtime_changes=self.op.allow_runtime_changes,
7737 ignore_ipolicy=self.op.ignore_ipolicy)
7738 self.tasklets = [self._migrater]
7740 def DeclareLocks(self, level):
7741 if level == locking.LEVEL_NODE:
7742 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7743 if instance.disk_template in constants.DTS_EXT_MIRROR:
7744 if self.op.target_node is None:
7745 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7747 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7748 self.op.target_node]
7749 del self.recalculate_locks[locking.LEVEL_NODE]
7751 self._LockInstancesNodes()
7752 elif level == locking.LEVEL_NODE_RES:
7754 self.needed_locks[locking.LEVEL_NODE_RES] = \
7755 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7757 def BuildHooksEnv(self):
7760 This runs on master, primary and secondary nodes of the instance.
7763 instance = self._migrater.instance
7764 source_node = instance.primary_node
7765 target_node = self.op.target_node
7766 env = _BuildInstanceHookEnvByObject(self, instance)
7768 "MIGRATE_LIVE": self._migrater.live,
7769 "MIGRATE_CLEANUP": self.op.cleanup,
7770 "OLD_PRIMARY": source_node,
7771 "NEW_PRIMARY": target_node,
7772 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7775 if instance.disk_template in constants.DTS_INT_MIRROR:
7776 env["OLD_SECONDARY"] = target_node
7777 env["NEW_SECONDARY"] = source_node
7779 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7783 def BuildHooksNodes(self):
7784 """Build hooks nodes.
7787 instance = self._migrater.instance
7788 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7789 return (nl, nl + [instance.primary_node])
7792 class LUInstanceMove(LogicalUnit):
7793 """Move an instance by data-copying.
7796 HPATH = "instance-move"
7797 HTYPE = constants.HTYPE_INSTANCE
7800 def ExpandNames(self):
7801 self._ExpandAndLockInstance()
7802 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7803 self.op.target_node = target_node
7804 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7805 self.needed_locks[locking.LEVEL_NODE_RES] = []
7806 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7808 def DeclareLocks(self, level):
7809 if level == locking.LEVEL_NODE:
7810 self._LockInstancesNodes(primary_only=True)
7811 elif level == locking.LEVEL_NODE_RES:
7813 self.needed_locks[locking.LEVEL_NODE_RES] = \
7814 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7816 def BuildHooksEnv(self):
7819 This runs on master, primary and secondary nodes of the instance.
7823 "TARGET_NODE": self.op.target_node,
7824 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7826 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7829 def BuildHooksNodes(self):
7830 """Build hooks nodes.
7834 self.cfg.GetMasterNode(),
7835 self.instance.primary_node,
7836 self.op.target_node,
7840 def CheckPrereq(self):
7841 """Check prerequisites.
7843 This checks that the instance is in the cluster.
7846 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7847 assert self.instance is not None, \
7848 "Cannot retrieve locked instance %s" % self.op.instance_name
7850 node = self.cfg.GetNodeInfo(self.op.target_node)
7851 assert node is not None, \
7852 "Cannot retrieve locked node %s" % self.op.target_node
7854 self.target_node = target_node = node.name
7856 if target_node == instance.primary_node:
7857 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7858 (instance.name, target_node),
7861 bep = self.cfg.GetClusterInfo().FillBE(instance)
7863 for idx, dsk in enumerate(instance.disks):
7864 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7865 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7866 " cannot copy" % idx, errors.ECODE_STATE)
7868 _CheckNodeOnline(self, target_node)
7869 _CheckNodeNotDrained(self, target_node)
7870 _CheckNodeVmCapable(self, target_node)
7871 cluster = self.cfg.GetClusterInfo()
7872 group_info = self.cfg.GetNodeGroup(node.group)
7873 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7874 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7875 ignore=self.op.ignore_ipolicy)
7877 if instance.admin_state == constants.ADMINST_UP:
7878 # check memory requirements on the secondary node
7879 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7880 instance.name, bep[constants.BE_MAXMEM],
7881 instance.hypervisor)
7883 self.LogInfo("Not checking memory on the secondary node as"
7884 " instance will not be started")
7886 # check bridge existance
7887 _CheckInstanceBridgesExist(self, instance, node=target_node)
7889 def Exec(self, feedback_fn):
7890 """Move an instance.
7892 The move is done by shutting it down on its present node, copying
7893 the data over (slow) and starting it on the new node.
7896 instance = self.instance
7898 source_node = instance.primary_node
7899 target_node = self.target_node
7901 self.LogInfo("Shutting down instance %s on source node %s",
7902 instance.name, source_node)
7904 assert (self.owned_locks(locking.LEVEL_NODE) ==
7905 self.owned_locks(locking.LEVEL_NODE_RES))
7907 result = self.rpc.call_instance_shutdown(source_node, instance,
7908 self.op.shutdown_timeout)
7909 msg = result.fail_msg
7911 if self.op.ignore_consistency:
7912 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7913 " Proceeding anyway. Please make sure node"
7914 " %s is down. Error details: %s",
7915 instance.name, source_node, source_node, msg)
7917 raise errors.OpExecError("Could not shutdown instance %s on"
7919 (instance.name, source_node, msg))
7921 # create the target disks
7923 _CreateDisks(self, instance, target_node=target_node)
7924 except errors.OpExecError:
7925 self.LogWarning("Device creation failed, reverting...")
7927 _RemoveDisks(self, instance, target_node=target_node)
7929 self.cfg.ReleaseDRBDMinors(instance.name)
7932 cluster_name = self.cfg.GetClusterInfo().cluster_name
7935 # activate, get path, copy the data over
7936 for idx, disk in enumerate(instance.disks):
7937 self.LogInfo("Copying data for disk %d", idx)
7938 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7939 instance.name, True, idx)
7941 self.LogWarning("Can't assemble newly created disk %d: %s",
7942 idx, result.fail_msg)
7943 errs.append(result.fail_msg)
7945 dev_path = result.payload
7946 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7947 target_node, dev_path,
7950 self.LogWarning("Can't copy data over for disk %d: %s",
7951 idx, result.fail_msg)
7952 errs.append(result.fail_msg)
7956 self.LogWarning("Some disks failed to copy, aborting")
7958 _RemoveDisks(self, instance, target_node=target_node)
7960 self.cfg.ReleaseDRBDMinors(instance.name)
7961 raise errors.OpExecError("Errors during disk copy: %s" %
7964 instance.primary_node = target_node
7965 self.cfg.Update(instance, feedback_fn)
7967 self.LogInfo("Removing the disks on the original node")
7968 _RemoveDisks(self, instance, target_node=source_node)
7970 # Only start the instance if it's marked as up
7971 if instance.admin_state == constants.ADMINST_UP:
7972 self.LogInfo("Starting instance %s on node %s",
7973 instance.name, target_node)
7975 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7976 ignore_secondaries=True)
7978 _ShutdownInstanceDisks(self, instance)
7979 raise errors.OpExecError("Can't activate the instance's disks")
7981 result = self.rpc.call_instance_start(target_node,
7982 (instance, None, None), False)
7983 msg = result.fail_msg
7985 _ShutdownInstanceDisks(self, instance)
7986 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7987 (instance.name, target_node, msg))
7990 class LUNodeMigrate(LogicalUnit):
7991 """Migrate all instances from a node.
7994 HPATH = "node-migrate"
7995 HTYPE = constants.HTYPE_NODE
7998 def CheckArguments(self):
8001 def ExpandNames(self):
8002 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8004 self.share_locks = _ShareAll()
8005 self.needed_locks = {
8006 locking.LEVEL_NODE: [self.op.node_name],
8009 def BuildHooksEnv(self):
8012 This runs on the master, the primary and all the secondaries.
8016 "NODE_NAME": self.op.node_name,
8017 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8020 def BuildHooksNodes(self):
8021 """Build hooks nodes.
8024 nl = [self.cfg.GetMasterNode()]
8027 def CheckPrereq(self):
8030 def Exec(self, feedback_fn):
8031 # Prepare jobs for migration instances
8032 allow_runtime_changes = self.op.allow_runtime_changes
8034 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8037 iallocator=self.op.iallocator,
8038 target_node=self.op.target_node,
8039 allow_runtime_changes=allow_runtime_changes,
8040 ignore_ipolicy=self.op.ignore_ipolicy)]
8041 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8044 # TODO: Run iallocator in this opcode and pass correct placement options to
8045 # OpInstanceMigrate. Since other jobs can modify the cluster between
8046 # running the iallocator and the actual migration, a good consistency model
8047 # will have to be found.
8049 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8050 frozenset([self.op.node_name]))
8052 return ResultWithJobs(jobs)
8055 class TLMigrateInstance(Tasklet):
8056 """Tasklet class for instance migration.
8059 @ivar live: whether the migration will be done live or non-live;
8060 this variable is initalized only after CheckPrereq has run
8061 @type cleanup: boolean
8062 @ivar cleanup: Wheater we cleanup from a failed migration
8063 @type iallocator: string
8064 @ivar iallocator: The iallocator used to determine target_node
8065 @type target_node: string
8066 @ivar target_node: If given, the target_node to reallocate the instance to
8067 @type failover: boolean
8068 @ivar failover: Whether operation results in failover or migration
8069 @type fallback: boolean
8070 @ivar fallback: Whether fallback to failover is allowed if migration not
8072 @type ignore_consistency: boolean
8073 @ivar ignore_consistency: Wheter we should ignore consistency between source
8075 @type shutdown_timeout: int
8076 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8077 @type ignore_ipolicy: bool
8078 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8083 _MIGRATION_POLL_INTERVAL = 1 # seconds
8084 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8086 def __init__(self, lu, instance_name, cleanup=False,
8087 failover=False, fallback=False,
8088 ignore_consistency=False,
8089 allow_runtime_changes=True,
8090 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8091 ignore_ipolicy=False):
8092 """Initializes this class.
8095 Tasklet.__init__(self, lu)
8098 self.instance_name = instance_name
8099 self.cleanup = cleanup
8100 self.live = False # will be overridden later
8101 self.failover = failover
8102 self.fallback = fallback
8103 self.ignore_consistency = ignore_consistency
8104 self.shutdown_timeout = shutdown_timeout
8105 self.ignore_ipolicy = ignore_ipolicy
8106 self.allow_runtime_changes = allow_runtime_changes
8108 def CheckPrereq(self):
8109 """Check prerequisites.
8111 This checks that the instance is in the cluster.
8114 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8115 instance = self.cfg.GetInstanceInfo(instance_name)
8116 assert instance is not None
8117 self.instance = instance
8118 cluster = self.cfg.GetClusterInfo()
8120 if (not self.cleanup and
8121 not instance.admin_state == constants.ADMINST_UP and
8122 not self.failover and self.fallback):
8123 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8124 " switching to failover")
8125 self.failover = True
8127 if instance.disk_template not in constants.DTS_MIRRORED:
8132 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8133 " %s" % (instance.disk_template, text),
8136 if instance.disk_template in constants.DTS_EXT_MIRROR:
8137 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8139 if self.lu.op.iallocator:
8140 self._RunAllocator()
8142 # We set set self.target_node as it is required by
8144 self.target_node = self.lu.op.target_node
8146 # Check that the target node is correct in terms of instance policy
8147 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8148 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8149 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8151 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8152 ignore=self.ignore_ipolicy)
8154 # self.target_node is already populated, either directly or by the
8156 target_node = self.target_node
8157 if self.target_node == instance.primary_node:
8158 raise errors.OpPrereqError("Cannot migrate instance %s"
8159 " to its primary (%s)" %
8160 (instance.name, instance.primary_node),
8163 if len(self.lu.tasklets) == 1:
8164 # It is safe to release locks only when we're the only tasklet
8166 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8167 keep=[instance.primary_node, self.target_node])
8170 secondary_nodes = instance.secondary_nodes
8171 if not secondary_nodes:
8172 raise errors.ConfigurationError("No secondary node but using"
8173 " %s disk template" %
8174 instance.disk_template)
8175 target_node = secondary_nodes[0]
8176 if self.lu.op.iallocator or (self.lu.op.target_node and
8177 self.lu.op.target_node != target_node):
8179 text = "failed over"
8182 raise errors.OpPrereqError("Instances with disk template %s cannot"
8183 " be %s to arbitrary nodes"
8184 " (neither an iallocator nor a target"
8185 " node can be passed)" %
8186 (instance.disk_template, text),
8188 nodeinfo = self.cfg.GetNodeInfo(target_node)
8189 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8190 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8192 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8193 ignore=self.ignore_ipolicy)
8195 i_be = cluster.FillBE(instance)
8197 # check memory requirements on the secondary node
8198 if (not self.cleanup and
8199 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8200 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8201 "migrating instance %s" %
8203 i_be[constants.BE_MINMEM],
8204 instance.hypervisor)
8206 self.lu.LogInfo("Not checking memory on the secondary node as"
8207 " instance will not be started")
8209 # check if failover must be forced instead of migration
8210 if (not self.cleanup and not self.failover and
8211 i_be[constants.BE_ALWAYS_FAILOVER]):
8212 self.lu.LogInfo("Instance configured to always failover; fallback"
8214 self.failover = True
8216 # check bridge existance
8217 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8219 if not self.cleanup:
8220 _CheckNodeNotDrained(self.lu, target_node)
8221 if not self.failover:
8222 result = self.rpc.call_instance_migratable(instance.primary_node,
8224 if result.fail_msg and self.fallback:
8225 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8227 self.failover = True
8229 result.Raise("Can't migrate, please use failover",
8230 prereq=True, ecode=errors.ECODE_STATE)
8232 assert not (self.failover and self.cleanup)
8234 if not self.failover:
8235 if self.lu.op.live is not None and self.lu.op.mode is not None:
8236 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8237 " parameters are accepted",
8239 if self.lu.op.live is not None:
8241 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8243 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8244 # reset the 'live' parameter to None so that repeated
8245 # invocations of CheckPrereq do not raise an exception
8246 self.lu.op.live = None
8247 elif self.lu.op.mode is None:
8248 # read the default value from the hypervisor
8249 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8250 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8252 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8254 # Failover is never live
8257 if not (self.failover or self.cleanup):
8258 remote_info = self.rpc.call_instance_info(instance.primary_node,
8260 instance.hypervisor)
8261 remote_info.Raise("Error checking instance on node %s" %
8262 instance.primary_node)
8263 instance_running = bool(remote_info.payload)
8264 if instance_running:
8265 self.current_mem = int(remote_info.payload["memory"])
8267 def _RunAllocator(self):
8268 """Run the allocator based on input opcode.
8271 # FIXME: add a self.ignore_ipolicy option
8272 req = iallocator.IAReqRelocate(name=self.instance_name,
8273 relocate_from=[self.instance.primary_node])
8274 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8276 ial.Run(self.lu.op.iallocator)
8279 raise errors.OpPrereqError("Can't compute nodes using"
8280 " iallocator '%s': %s" %
8281 (self.lu.op.iallocator, ial.info),
8283 self.target_node = ial.result[0]
8284 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8285 self.instance_name, self.lu.op.iallocator,
8286 utils.CommaJoin(ial.result))
8288 def _WaitUntilSync(self):
8289 """Poll with custom rpc for disk sync.
8291 This uses our own step-based rpc call.
8294 self.feedback_fn("* wait until resync is done")
8298 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8300 (self.instance.disks,
8303 for node, nres in result.items():
8304 nres.Raise("Cannot resync disks on node %s" % node)
8305 node_done, node_percent = nres.payload
8306 all_done = all_done and node_done
8307 if node_percent is not None:
8308 min_percent = min(min_percent, node_percent)
8310 if min_percent < 100:
8311 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8314 def _EnsureSecondary(self, node):
8315 """Demote a node to secondary.
8318 self.feedback_fn("* switching node %s to secondary mode" % node)
8320 for dev in self.instance.disks:
8321 self.cfg.SetDiskID(dev, node)
8323 result = self.rpc.call_blockdev_close(node, self.instance.name,
8324 self.instance.disks)
8325 result.Raise("Cannot change disk to secondary on node %s" % node)
8327 def _GoStandalone(self):
8328 """Disconnect from the network.
8331 self.feedback_fn("* changing into standalone mode")
8332 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8333 self.instance.disks)
8334 for node, nres in result.items():
8335 nres.Raise("Cannot disconnect disks node %s" % node)
8337 def _GoReconnect(self, multimaster):
8338 """Reconnect to the network.
8344 msg = "single-master"
8345 self.feedback_fn("* changing disks into %s mode" % msg)
8346 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8347 (self.instance.disks, self.instance),
8348 self.instance.name, multimaster)
8349 for node, nres in result.items():
8350 nres.Raise("Cannot change disks config on node %s" % node)
8352 def _ExecCleanup(self):
8353 """Try to cleanup after a failed migration.
8355 The cleanup is done by:
8356 - check that the instance is running only on one node
8357 (and update the config if needed)
8358 - change disks on its secondary node to secondary
8359 - wait until disks are fully synchronized
8360 - disconnect from the network
8361 - change disks into single-master mode
8362 - wait again until disks are fully synchronized
8365 instance = self.instance
8366 target_node = self.target_node
8367 source_node = self.source_node
8369 # check running on only one node
8370 self.feedback_fn("* checking where the instance actually runs"
8371 " (if this hangs, the hypervisor might be in"
8373 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8374 for node, result in ins_l.items():
8375 result.Raise("Can't contact node %s" % node)
8377 runningon_source = instance.name in ins_l[source_node].payload
8378 runningon_target = instance.name in ins_l[target_node].payload
8380 if runningon_source and runningon_target:
8381 raise errors.OpExecError("Instance seems to be running on two nodes,"
8382 " or the hypervisor is confused; you will have"
8383 " to ensure manually that it runs only on one"
8384 " and restart this operation")
8386 if not (runningon_source or runningon_target):
8387 raise errors.OpExecError("Instance does not seem to be running at all;"
8388 " in this case it's safer to repair by"
8389 " running 'gnt-instance stop' to ensure disk"
8390 " shutdown, and then restarting it")
8392 if runningon_target:
8393 # the migration has actually succeeded, we need to update the config
8394 self.feedback_fn("* instance running on secondary node (%s),"
8395 " updating config" % target_node)
8396 instance.primary_node = target_node
8397 self.cfg.Update(instance, self.feedback_fn)
8398 demoted_node = source_node
8400 self.feedback_fn("* instance confirmed to be running on its"
8401 " primary node (%s)" % source_node)
8402 demoted_node = target_node
8404 if instance.disk_template in constants.DTS_INT_MIRROR:
8405 self._EnsureSecondary(demoted_node)
8407 self._WaitUntilSync()
8408 except errors.OpExecError:
8409 # we ignore here errors, since if the device is standalone, it
8410 # won't be able to sync
8412 self._GoStandalone()
8413 self._GoReconnect(False)
8414 self._WaitUntilSync()
8416 self.feedback_fn("* done")
8418 def _RevertDiskStatus(self):
8419 """Try to revert the disk status after a failed migration.
8422 target_node = self.target_node
8423 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8427 self._EnsureSecondary(target_node)
8428 self._GoStandalone()
8429 self._GoReconnect(False)
8430 self._WaitUntilSync()
8431 except errors.OpExecError, err:
8432 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8433 " please try to recover the instance manually;"
8434 " error '%s'" % str(err))
8436 def _AbortMigration(self):
8437 """Call the hypervisor code to abort a started migration.
8440 instance = self.instance
8441 target_node = self.target_node
8442 source_node = self.source_node
8443 migration_info = self.migration_info
8445 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8449 abort_msg = abort_result.fail_msg
8451 logging.error("Aborting migration failed on target node %s: %s",
8452 target_node, abort_msg)
8453 # Don't raise an exception here, as we stil have to try to revert the
8454 # disk status, even if this step failed.
8456 abort_result = self.rpc.call_instance_finalize_migration_src(
8457 source_node, instance, False, self.live)
8458 abort_msg = abort_result.fail_msg
8460 logging.error("Aborting migration failed on source node %s: %s",
8461 source_node, abort_msg)
8463 def _ExecMigration(self):
8464 """Migrate an instance.
8466 The migrate is done by:
8467 - change the disks into dual-master mode
8468 - wait until disks are fully synchronized again
8469 - migrate the instance
8470 - change disks on the new secondary node (the old primary) to secondary
8471 - wait until disks are fully synchronized
8472 - change disks into single-master mode
8475 instance = self.instance
8476 target_node = self.target_node
8477 source_node = self.source_node
8479 # Check for hypervisor version mismatch and warn the user.
8480 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8481 None, [self.instance.hypervisor])
8482 for ninfo in nodeinfo.values():
8483 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8485 (_, _, (src_info, )) = nodeinfo[source_node].payload
8486 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8488 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8489 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8490 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8491 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8492 if src_version != dst_version:
8493 self.feedback_fn("* warning: hypervisor version mismatch between"
8494 " source (%s) and target (%s) node" %
8495 (src_version, dst_version))
8497 self.feedback_fn("* checking disk consistency between source and target")
8498 for (idx, dev) in enumerate(instance.disks):
8499 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8500 raise errors.OpExecError("Disk %s is degraded or not fully"
8501 " synchronized on target node,"
8502 " aborting migration" % idx)
8504 if self.current_mem > self.tgt_free_mem:
8505 if not self.allow_runtime_changes:
8506 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8507 " free memory to fit instance %s on target"
8508 " node %s (have %dMB, need %dMB)" %
8509 (instance.name, target_node,
8510 self.tgt_free_mem, self.current_mem))
8511 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8512 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8515 rpcres.Raise("Cannot modify instance runtime memory")
8517 # First get the migration information from the remote node
8518 result = self.rpc.call_migration_info(source_node, instance)
8519 msg = result.fail_msg
8521 log_err = ("Failed fetching source migration information from %s: %s" %
8523 logging.error(log_err)
8524 raise errors.OpExecError(log_err)
8526 self.migration_info = migration_info = result.payload
8528 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8529 # Then switch the disks to master/master mode
8530 self._EnsureSecondary(target_node)
8531 self._GoStandalone()
8532 self._GoReconnect(True)
8533 self._WaitUntilSync()
8535 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8536 result = self.rpc.call_accept_instance(target_node,
8539 self.nodes_ip[target_node])
8541 msg = result.fail_msg
8543 logging.error("Instance pre-migration failed, trying to revert"
8544 " disk status: %s", msg)
8545 self.feedback_fn("Pre-migration failed, aborting")
8546 self._AbortMigration()
8547 self._RevertDiskStatus()
8548 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8549 (instance.name, msg))
8551 self.feedback_fn("* migrating instance to %s" % target_node)
8552 result = self.rpc.call_instance_migrate(source_node, instance,
8553 self.nodes_ip[target_node],
8555 msg = result.fail_msg
8557 logging.error("Instance migration failed, trying to revert"
8558 " disk status: %s", msg)
8559 self.feedback_fn("Migration failed, aborting")
8560 self._AbortMigration()
8561 self._RevertDiskStatus()
8562 raise errors.OpExecError("Could not migrate instance %s: %s" %
8563 (instance.name, msg))
8565 self.feedback_fn("* starting memory transfer")
8566 last_feedback = time.time()
8568 result = self.rpc.call_instance_get_migration_status(source_node,
8570 msg = result.fail_msg
8571 ms = result.payload # MigrationStatus instance
8572 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8573 logging.error("Instance migration failed, trying to revert"
8574 " disk status: %s", msg)
8575 self.feedback_fn("Migration failed, aborting")
8576 self._AbortMigration()
8577 self._RevertDiskStatus()
8578 raise errors.OpExecError("Could not migrate instance %s: %s" %
8579 (instance.name, msg))
8581 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8582 self.feedback_fn("* memory transfer complete")
8585 if (utils.TimeoutExpired(last_feedback,
8586 self._MIGRATION_FEEDBACK_INTERVAL) and
8587 ms.transferred_ram is not None):
8588 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8589 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8590 last_feedback = time.time()
8592 time.sleep(self._MIGRATION_POLL_INTERVAL)
8594 result = self.rpc.call_instance_finalize_migration_src(source_node,
8598 msg = result.fail_msg
8600 logging.error("Instance migration succeeded, but finalization failed"
8601 " on the source node: %s", msg)
8602 raise errors.OpExecError("Could not finalize instance migration: %s" %
8605 instance.primary_node = target_node
8607 # distribute new instance config to the other nodes
8608 self.cfg.Update(instance, self.feedback_fn)
8610 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8614 msg = result.fail_msg
8616 logging.error("Instance migration succeeded, but finalization failed"
8617 " on the target node: %s", msg)
8618 raise errors.OpExecError("Could not finalize instance migration: %s" %
8621 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8622 self._EnsureSecondary(source_node)
8623 self._WaitUntilSync()
8624 self._GoStandalone()
8625 self._GoReconnect(False)
8626 self._WaitUntilSync()
8628 # If the instance's disk template is `rbd' and there was a successful
8629 # migration, unmap the device from the source node.
8630 if self.instance.disk_template == constants.DT_RBD:
8631 disks = _ExpandCheckDisks(instance, instance.disks)
8632 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8634 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8635 msg = result.fail_msg
8637 logging.error("Migration was successful, but couldn't unmap the"
8638 " block device %s on source node %s: %s",
8639 disk.iv_name, source_node, msg)
8640 logging.error("You need to unmap the device %s manually on %s",
8641 disk.iv_name, source_node)
8643 self.feedback_fn("* done")
8645 def _ExecFailover(self):
8646 """Failover an instance.
8648 The failover is done by shutting it down on its present node and
8649 starting it on the secondary.
8652 instance = self.instance
8653 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8655 source_node = instance.primary_node
8656 target_node = self.target_node
8658 if instance.admin_state == constants.ADMINST_UP:
8659 self.feedback_fn("* checking disk consistency between source and target")
8660 for (idx, dev) in enumerate(instance.disks):
8661 # for drbd, these are drbd over lvm
8662 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8664 if primary_node.offline:
8665 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8667 (primary_node.name, idx, target_node))
8668 elif not self.ignore_consistency:
8669 raise errors.OpExecError("Disk %s is degraded on target node,"
8670 " aborting failover" % idx)
8672 self.feedback_fn("* not checking disk consistency as instance is not"
8675 self.feedback_fn("* shutting down instance on source node")
8676 logging.info("Shutting down instance %s on node %s",
8677 instance.name, source_node)
8679 result = self.rpc.call_instance_shutdown(source_node, instance,
8680 self.shutdown_timeout)
8681 msg = result.fail_msg
8683 if self.ignore_consistency or primary_node.offline:
8684 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8685 " proceeding anyway; please make sure node"
8686 " %s is down; error details: %s",
8687 instance.name, source_node, source_node, msg)
8689 raise errors.OpExecError("Could not shutdown instance %s on"
8691 (instance.name, source_node, msg))
8693 self.feedback_fn("* deactivating the instance's disks on source node")
8694 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8695 raise errors.OpExecError("Can't shut down the instance's disks")
8697 instance.primary_node = target_node
8698 # distribute new instance config to the other nodes
8699 self.cfg.Update(instance, self.feedback_fn)
8701 # Only start the instance if it's marked as up
8702 if instance.admin_state == constants.ADMINST_UP:
8703 self.feedback_fn("* activating the instance's disks on target node %s" %
8705 logging.info("Starting instance %s on node %s",
8706 instance.name, target_node)
8708 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8709 ignore_secondaries=True)
8711 _ShutdownInstanceDisks(self.lu, instance)
8712 raise errors.OpExecError("Can't activate the instance's disks")
8714 self.feedback_fn("* starting the instance on the target node %s" %
8716 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8718 msg = result.fail_msg
8720 _ShutdownInstanceDisks(self.lu, instance)
8721 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8722 (instance.name, target_node, msg))
8724 def Exec(self, feedback_fn):
8725 """Perform the migration.
8728 self.feedback_fn = feedback_fn
8729 self.source_node = self.instance.primary_node
8731 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8732 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8733 self.target_node = self.instance.secondary_nodes[0]
8734 # Otherwise self.target_node has been populated either
8735 # directly, or through an iallocator.
8737 self.all_nodes = [self.source_node, self.target_node]
8738 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8739 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8742 feedback_fn("Failover instance %s" % self.instance.name)
8743 self._ExecFailover()
8745 feedback_fn("Migrating instance %s" % self.instance.name)
8748 return self._ExecCleanup()
8750 return self._ExecMigration()
8753 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8755 """Wrapper around L{_CreateBlockDevInner}.
8757 This method annotates the root device first.
8760 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8761 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8765 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8767 """Create a tree of block devices on a given node.
8769 If this device type has to be created on secondaries, create it and
8772 If not, just recurse to children keeping the same 'force' value.
8774 @attention: The device has to be annotated already.
8776 @param lu: the lu on whose behalf we execute
8777 @param node: the node on which to create the device
8778 @type instance: L{objects.Instance}
8779 @param instance: the instance which owns the device
8780 @type device: L{objects.Disk}
8781 @param device: the device to create
8782 @type force_create: boolean
8783 @param force_create: whether to force creation of this device; this
8784 will be change to True whenever we find a device which has
8785 CreateOnSecondary() attribute
8786 @param info: the extra 'metadata' we should attach to the device
8787 (this will be represented as a LVM tag)
8788 @type force_open: boolean
8789 @param force_open: this parameter will be passes to the
8790 L{backend.BlockdevCreate} function where it specifies
8791 whether we run on primary or not, and it affects both
8792 the child assembly and the device own Open() execution
8795 if device.CreateOnSecondary():
8799 for child in device.children:
8800 _CreateBlockDevInner(lu, node, instance, child, force_create,
8803 if not force_create:
8806 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8809 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8810 """Create a single block device on a given node.
8812 This will not recurse over children of the device, so they must be
8815 @param lu: the lu on whose behalf we execute
8816 @param node: the node on which to create the device
8817 @type instance: L{objects.Instance}
8818 @param instance: the instance which owns the device
8819 @type device: L{objects.Disk}
8820 @param device: the device to create
8821 @param info: the extra 'metadata' we should attach to the device
8822 (this will be represented as a LVM tag)
8823 @type force_open: boolean
8824 @param force_open: this parameter will be passes to the
8825 L{backend.BlockdevCreate} function where it specifies
8826 whether we run on primary or not, and it affects both
8827 the child assembly and the device own Open() execution
8830 lu.cfg.SetDiskID(device, node)
8831 result = lu.rpc.call_blockdev_create(node, device, device.size,
8832 instance.name, force_open, info)
8833 result.Raise("Can't create block device %s on"
8834 " node %s for instance %s" % (device, node, instance.name))
8835 if device.physical_id is None:
8836 device.physical_id = result.payload
8839 def _GenerateUniqueNames(lu, exts):
8840 """Generate a suitable LV name.
8842 This will generate a logical volume name for the given instance.
8847 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8848 results.append("%s%s" % (new_id, val))
8852 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8853 iv_name, p_minor, s_minor):
8854 """Generate a drbd8 device complete with its children.
8857 assert len(vgnames) == len(names) == 2
8858 port = lu.cfg.AllocatePort()
8859 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8861 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8862 logical_id=(vgnames[0], names[0]),
8864 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8865 size=constants.DRBD_META_SIZE,
8866 logical_id=(vgnames[1], names[1]),
8868 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8869 logical_id=(primary, secondary, port,
8872 children=[dev_data, dev_meta],
8873 iv_name=iv_name, params={})
8877 _DISK_TEMPLATE_NAME_PREFIX = {
8878 constants.DT_PLAIN: "",
8879 constants.DT_RBD: ".rbd",
8883 _DISK_TEMPLATE_DEVICE_TYPE = {
8884 constants.DT_PLAIN: constants.LD_LV,
8885 constants.DT_FILE: constants.LD_FILE,
8886 constants.DT_SHARED_FILE: constants.LD_FILE,
8887 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8888 constants.DT_RBD: constants.LD_RBD,
8892 def _GenerateDiskTemplate(
8893 lu, template_name, instance_name, primary_node, secondary_nodes,
8894 disk_info, file_storage_dir, file_driver, base_index,
8895 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8896 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8897 """Generate the entire disk layout for a given template type.
8900 #TODO: compute space requirements
8902 vgname = lu.cfg.GetVGName()
8903 disk_count = len(disk_info)
8906 if template_name == constants.DT_DISKLESS:
8908 elif template_name == constants.DT_DRBD8:
8909 if len(secondary_nodes) != 1:
8910 raise errors.ProgrammerError("Wrong template configuration")
8911 remote_node = secondary_nodes[0]
8912 minors = lu.cfg.AllocateDRBDMinor(
8913 [primary_node, remote_node] * len(disk_info), instance_name)
8915 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8917 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8920 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8921 for i in range(disk_count)]):
8922 names.append(lv_prefix + "_data")
8923 names.append(lv_prefix + "_meta")
8924 for idx, disk in enumerate(disk_info):
8925 disk_index = idx + base_index
8926 data_vg = disk.get(constants.IDISK_VG, vgname)
8927 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8928 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8929 disk[constants.IDISK_SIZE],
8931 names[idx * 2:idx * 2 + 2],
8932 "disk/%d" % disk_index,
8933 minors[idx * 2], minors[idx * 2 + 1])
8934 disk_dev.mode = disk[constants.IDISK_MODE]
8935 disks.append(disk_dev)
8938 raise errors.ProgrammerError("Wrong template configuration")
8940 if template_name == constants.DT_FILE:
8942 elif template_name == constants.DT_SHARED_FILE:
8943 _req_shr_file_storage()
8945 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8946 if name_prefix is None:
8949 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8950 (name_prefix, base_index + i)
8951 for i in range(disk_count)])
8953 if template_name == constants.DT_PLAIN:
8954 def logical_id_fn(idx, _, disk):
8955 vg = disk.get(constants.IDISK_VG, vgname)
8956 return (vg, names[idx])
8957 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8959 lambda _, disk_index, disk: (file_driver,
8960 "%s/disk%d" % (file_storage_dir,
8962 elif template_name == constants.DT_BLOCK:
8964 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8965 disk[constants.IDISK_ADOPT])
8966 elif template_name == constants.DT_RBD:
8967 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8969 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8971 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8973 for idx, disk in enumerate(disk_info):
8974 disk_index = idx + base_index
8975 size = disk[constants.IDISK_SIZE]
8976 feedback_fn("* disk %s, size %s" %
8977 (disk_index, utils.FormatUnit(size, "h")))
8978 disks.append(objects.Disk(dev_type=dev_type, size=size,
8979 logical_id=logical_id_fn(idx, disk_index, disk),
8980 iv_name="disk/%d" % disk_index,
8981 mode=disk[constants.IDISK_MODE],
8987 def _GetInstanceInfoText(instance):
8988 """Compute that text that should be added to the disk's metadata.
8991 return "originstname+%s" % instance.name
8994 def _CalcEta(time_taken, written, total_size):
8995 """Calculates the ETA based on size written and total size.
8997 @param time_taken: The time taken so far
8998 @param written: amount written so far
8999 @param total_size: The total size of data to be written
9000 @return: The remaining time in seconds
9003 avg_time = time_taken / float(written)
9004 return (total_size - written) * avg_time
9007 def _WipeDisks(lu, instance, disks=None):
9008 """Wipes instance disks.
9010 @type lu: L{LogicalUnit}
9011 @param lu: the logical unit on whose behalf we execute
9012 @type instance: L{objects.Instance}
9013 @param instance: the instance whose disks we should create
9014 @return: the success of the wipe
9017 node = instance.primary_node
9020 disks = [(idx, disk, 0)
9021 for (idx, disk) in enumerate(instance.disks)]
9023 for (_, device, _) in disks:
9024 lu.cfg.SetDiskID(device, node)
9026 logging.info("Pausing synchronization of disks of instance '%s'",
9028 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9029 (map(compat.snd, disks),
9032 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9034 for idx, success in enumerate(result.payload):
9036 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9037 " failed", idx, instance.name)
9040 for (idx, device, offset) in disks:
9041 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9042 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9044 int(min(constants.MAX_WIPE_CHUNK,
9045 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9049 start_time = time.time()
9054 info_text = (" (from %s to %s)" %
9055 (utils.FormatUnit(offset, "h"),
9056 utils.FormatUnit(size, "h")))
9058 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9060 logging.info("Wiping disk %d for instance %s on node %s using"
9061 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9063 while offset < size:
9064 wipe_size = min(wipe_chunk_size, size - offset)
9066 logging.debug("Wiping disk %d, offset %s, chunk %s",
9067 idx, offset, wipe_size)
9069 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9071 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9072 (idx, offset, wipe_size))
9076 if now - last_output >= 60:
9077 eta = _CalcEta(now - start_time, offset, size)
9078 lu.LogInfo(" - done: %.1f%% ETA: %s",
9079 offset / float(size) * 100, utils.FormatSeconds(eta))
9082 logging.info("Resuming synchronization of disks for instance '%s'",
9085 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9086 (map(compat.snd, disks),
9091 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9092 node, result.fail_msg)
9094 for idx, success in enumerate(result.payload):
9096 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9097 " failed", idx, instance.name)
9100 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9101 """Create all disks for an instance.
9103 This abstracts away some work from AddInstance.
9105 @type lu: L{LogicalUnit}
9106 @param lu: the logical unit on whose behalf we execute
9107 @type instance: L{objects.Instance}
9108 @param instance: the instance whose disks we should create
9110 @param to_skip: list of indices to skip
9111 @type target_node: string
9112 @param target_node: if passed, overrides the target node for creation
9114 @return: the success of the creation
9117 info = _GetInstanceInfoText(instance)
9118 if target_node is None:
9119 pnode = instance.primary_node
9120 all_nodes = instance.all_nodes
9125 if instance.disk_template in constants.DTS_FILEBASED:
9126 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9127 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9129 result.Raise("Failed to create directory '%s' on"
9130 " node %s" % (file_storage_dir, pnode))
9132 # Note: this needs to be kept in sync with adding of disks in
9133 # LUInstanceSetParams
9134 for idx, device in enumerate(instance.disks):
9135 if to_skip and idx in to_skip:
9137 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9139 for node in all_nodes:
9140 f_create = node == pnode
9141 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9144 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9145 """Remove all disks for an instance.
9147 This abstracts away some work from `AddInstance()` and
9148 `RemoveInstance()`. Note that in case some of the devices couldn't
9149 be removed, the removal will continue with the other ones (compare
9150 with `_CreateDisks()`).
9152 @type lu: L{LogicalUnit}
9153 @param lu: the logical unit on whose behalf we execute
9154 @type instance: L{objects.Instance}
9155 @param instance: the instance whose disks we should remove
9156 @type target_node: string
9157 @param target_node: used to override the node on which to remove the disks
9159 @return: the success of the removal
9162 logging.info("Removing block devices for instance %s", instance.name)
9165 ports_to_release = set()
9166 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9167 for (idx, device) in enumerate(anno_disks):
9169 edata = [(target_node, device)]
9171 edata = device.ComputeNodeTree(instance.primary_node)
9172 for node, disk in edata:
9173 lu.cfg.SetDiskID(disk, node)
9174 result = lu.rpc.call_blockdev_remove(node, disk)
9176 lu.LogWarning("Could not remove disk %s on node %s,"
9177 " continuing anyway: %s", idx, node, result.fail_msg)
9178 if not (result.offline and node != instance.primary_node):
9181 # if this is a DRBD disk, return its port to the pool
9182 if device.dev_type in constants.LDS_DRBD:
9183 ports_to_release.add(device.logical_id[2])
9185 if all_result or ignore_failures:
9186 for port in ports_to_release:
9187 lu.cfg.AddTcpUdpPort(port)
9189 if instance.disk_template == constants.DT_FILE:
9190 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9194 tgt = instance.primary_node
9195 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9197 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9198 file_storage_dir, instance.primary_node, result.fail_msg)
9204 def _ComputeDiskSizePerVG(disk_template, disks):
9205 """Compute disk size requirements in the volume group
9208 def _compute(disks, payload):
9209 """Universal algorithm.
9214 vgs[disk[constants.IDISK_VG]] = \
9215 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9219 # Required free disk space as a function of disk and swap space
9221 constants.DT_DISKLESS: {},
9222 constants.DT_PLAIN: _compute(disks, 0),
9223 # 128 MB are added for drbd metadata for each disk
9224 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9225 constants.DT_FILE: {},
9226 constants.DT_SHARED_FILE: {},
9229 if disk_template not in req_size_dict:
9230 raise errors.ProgrammerError("Disk template '%s' size requirement"
9231 " is unknown" % disk_template)
9233 return req_size_dict[disk_template]
9236 def _FilterVmNodes(lu, nodenames):
9237 """Filters out non-vm_capable nodes from a list.
9239 @type lu: L{LogicalUnit}
9240 @param lu: the logical unit for which we check
9241 @type nodenames: list
9242 @param nodenames: the list of nodes on which we should check
9244 @return: the list of vm-capable nodes
9247 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9248 return [name for name in nodenames if name not in vm_nodes]
9251 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9252 """Hypervisor parameter validation.
9254 This function abstract the hypervisor parameter validation to be
9255 used in both instance create and instance modify.
9257 @type lu: L{LogicalUnit}
9258 @param lu: the logical unit for which we check
9259 @type nodenames: list
9260 @param nodenames: the list of nodes on which we should check
9261 @type hvname: string
9262 @param hvname: the name of the hypervisor we should use
9263 @type hvparams: dict
9264 @param hvparams: the parameters which we need to check
9265 @raise errors.OpPrereqError: if the parameters are not valid
9268 nodenames = _FilterVmNodes(lu, nodenames)
9270 cluster = lu.cfg.GetClusterInfo()
9271 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9273 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9274 for node in nodenames:
9278 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9281 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9282 """OS parameters validation.
9284 @type lu: L{LogicalUnit}
9285 @param lu: the logical unit for which we check
9286 @type required: boolean
9287 @param required: whether the validation should fail if the OS is not
9289 @type nodenames: list
9290 @param nodenames: the list of nodes on which we should check
9291 @type osname: string
9292 @param osname: the name of the hypervisor we should use
9293 @type osparams: dict
9294 @param osparams: the parameters which we need to check
9295 @raise errors.OpPrereqError: if the parameters are not valid
9298 nodenames = _FilterVmNodes(lu, nodenames)
9299 result = lu.rpc.call_os_validate(nodenames, required, osname,
9300 [constants.OS_VALIDATE_PARAMETERS],
9302 for node, nres in result.items():
9303 # we don't check for offline cases since this should be run only
9304 # against the master node and/or an instance's nodes
9305 nres.Raise("OS Parameters validation failed on node %s" % node)
9306 if not nres.payload:
9307 lu.LogInfo("OS %s not found on node %s, validation skipped",
9311 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9312 """Wrapper around IAReqInstanceAlloc.
9314 @param op: The instance opcode
9315 @param disks: The computed disks
9316 @param nics: The computed nics
9317 @param beparams: The full filled beparams
9319 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9322 spindle_use = beparams[constants.BE_SPINDLE_USE]
9323 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9324 disk_template=op.disk_template,
9327 vcpus=beparams[constants.BE_VCPUS],
9328 memory=beparams[constants.BE_MAXMEM],
9329 spindle_use=spindle_use,
9331 nics=[n.ToDict() for n in nics],
9332 hypervisor=op.hypervisor)
9335 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9336 """Computes the nics.
9338 @param op: The instance opcode
9339 @param cluster: Cluster configuration object
9340 @param default_ip: The default ip to assign
9341 @param cfg: An instance of the configuration object
9342 @param proc: The executer instance
9344 @returns: The build up nics
9348 for idx, nic in enumerate(op.nics):
9349 nic_mode_req = nic.get(constants.INIC_MODE, None)
9350 nic_mode = nic_mode_req
9351 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9352 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9354 # in routed mode, for the first nic, the default ip is 'auto'
9355 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9356 default_ip_mode = constants.VALUE_AUTO
9358 default_ip_mode = constants.VALUE_NONE
9360 # ip validity checks
9361 ip = nic.get(constants.INIC_IP, default_ip_mode)
9362 if ip is None or ip.lower() == constants.VALUE_NONE:
9364 elif ip.lower() == constants.VALUE_AUTO:
9365 if not op.name_check:
9366 raise errors.OpPrereqError("IP address set to auto but name checks"
9367 " have been skipped",
9371 if not netutils.IPAddress.IsValid(ip):
9372 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9376 # TODO: check the ip address for uniqueness
9377 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9378 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9381 # MAC address verification
9382 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9383 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9384 mac = utils.NormalizeAndValidateMac(mac)
9387 # TODO: We need to factor this out
9388 cfg.ReserveMAC(mac, proc.GetECId())
9389 except errors.ReservationError:
9390 raise errors.OpPrereqError("MAC address %s already in use"
9391 " in cluster" % mac,
9392 errors.ECODE_NOTUNIQUE)
9394 # Build nic parameters
9395 link = nic.get(constants.INIC_LINK, None)
9396 if link == constants.VALUE_AUTO:
9397 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9400 nicparams[constants.NIC_MODE] = nic_mode
9402 nicparams[constants.NIC_LINK] = link
9404 check_params = cluster.SimpleFillNIC(nicparams)
9405 objects.NIC.CheckParameterSyntax(check_params)
9406 nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9411 def _ComputeDisks(op, default_vg):
9412 """Computes the instance disks.
9414 @param op: The instance opcode
9415 @param default_vg: The default_vg to assume
9417 @return: The computer disks
9421 for disk in op.disks:
9422 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9423 if mode not in constants.DISK_ACCESS_SET:
9424 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9425 mode, errors.ECODE_INVAL)
9426 size = disk.get(constants.IDISK_SIZE, None)
9428 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9431 except (TypeError, ValueError):
9432 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9435 data_vg = disk.get(constants.IDISK_VG, default_vg)
9437 constants.IDISK_SIZE: size,
9438 constants.IDISK_MODE: mode,
9439 constants.IDISK_VG: data_vg,
9441 if constants.IDISK_METAVG in disk:
9442 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9443 if constants.IDISK_ADOPT in disk:
9444 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9445 disks.append(new_disk)
9450 def _ComputeFullBeParams(op, cluster):
9451 """Computes the full beparams.
9453 @param op: The instance opcode
9454 @param cluster: The cluster config object
9456 @return: The fully filled beparams
9459 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9460 for param, value in op.beparams.iteritems():
9461 if value == constants.VALUE_AUTO:
9462 op.beparams[param] = default_beparams[param]
9463 objects.UpgradeBeParams(op.beparams)
9464 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9465 return cluster.SimpleFillBE(op.beparams)
9468 class LUInstanceCreate(LogicalUnit):
9469 """Create an instance.
9472 HPATH = "instance-add"
9473 HTYPE = constants.HTYPE_INSTANCE
9476 def CheckArguments(self):
9480 # do not require name_check to ease forward/backward compatibility
9482 if self.op.no_install and self.op.start:
9483 self.LogInfo("No-installation mode selected, disabling startup")
9484 self.op.start = False
9485 # validate/normalize the instance name
9486 self.op.instance_name = \
9487 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9489 if self.op.ip_check and not self.op.name_check:
9490 # TODO: make the ip check more flexible and not depend on the name check
9491 raise errors.OpPrereqError("Cannot do IP address check without a name"
9492 " check", errors.ECODE_INVAL)
9494 # check nics' parameter names
9495 for nic in self.op.nics:
9496 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9498 # check disks. parameter names and consistent adopt/no-adopt strategy
9499 has_adopt = has_no_adopt = False
9500 for disk in self.op.disks:
9501 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9502 if constants.IDISK_ADOPT in disk:
9506 if has_adopt and has_no_adopt:
9507 raise errors.OpPrereqError("Either all disks are adopted or none is",
9510 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9511 raise errors.OpPrereqError("Disk adoption is not supported for the"
9512 " '%s' disk template" %
9513 self.op.disk_template,
9515 if self.op.iallocator is not None:
9516 raise errors.OpPrereqError("Disk adoption not allowed with an"
9517 " iallocator script", errors.ECODE_INVAL)
9518 if self.op.mode == constants.INSTANCE_IMPORT:
9519 raise errors.OpPrereqError("Disk adoption not allowed for"
9520 " instance import", errors.ECODE_INVAL)
9522 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9523 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9524 " but no 'adopt' parameter given" %
9525 self.op.disk_template,
9528 self.adopt_disks = has_adopt
9530 # instance name verification
9531 if self.op.name_check:
9532 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9533 self.op.instance_name = self.hostname1.name
9534 # used in CheckPrereq for ip ping check
9535 self.check_ip = self.hostname1.ip
9537 self.check_ip = None
9539 # file storage checks
9540 if (self.op.file_driver and
9541 not self.op.file_driver in constants.FILE_DRIVER):
9542 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9543 self.op.file_driver, errors.ECODE_INVAL)
9545 if self.op.disk_template == constants.DT_FILE:
9546 opcodes.RequireFileStorage()
9547 elif self.op.disk_template == constants.DT_SHARED_FILE:
9548 opcodes.RequireSharedFileStorage()
9550 ### Node/iallocator related checks
9551 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9553 if self.op.pnode is not None:
9554 if self.op.disk_template in constants.DTS_INT_MIRROR:
9555 if self.op.snode is None:
9556 raise errors.OpPrereqError("The networked disk templates need"
9557 " a mirror node", errors.ECODE_INVAL)
9559 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9561 self.op.snode = None
9563 self._cds = _GetClusterDomainSecret()
9565 if self.op.mode == constants.INSTANCE_IMPORT:
9566 # On import force_variant must be True, because if we forced it at
9567 # initial install, our only chance when importing it back is that it
9569 self.op.force_variant = True
9571 if self.op.no_install:
9572 self.LogInfo("No-installation mode has no effect during import")
9574 elif self.op.mode == constants.INSTANCE_CREATE:
9575 if self.op.os_type is None:
9576 raise errors.OpPrereqError("No guest OS specified",
9578 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9579 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9580 " installation" % self.op.os_type,
9582 if self.op.disk_template is None:
9583 raise errors.OpPrereqError("No disk template specified",
9586 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9587 # Check handshake to ensure both clusters have the same domain secret
9588 src_handshake = self.op.source_handshake
9589 if not src_handshake:
9590 raise errors.OpPrereqError("Missing source handshake",
9593 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9596 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9599 # Load and check source CA
9600 self.source_x509_ca_pem = self.op.source_x509_ca
9601 if not self.source_x509_ca_pem:
9602 raise errors.OpPrereqError("Missing source X509 CA",
9606 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9608 except OpenSSL.crypto.Error, err:
9609 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9610 (err, ), errors.ECODE_INVAL)
9612 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9613 if errcode is not None:
9614 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9617 self.source_x509_ca = cert
9619 src_instance_name = self.op.source_instance_name
9620 if not src_instance_name:
9621 raise errors.OpPrereqError("Missing source instance name",
9624 self.source_instance_name = \
9625 netutils.GetHostname(name=src_instance_name).name
9628 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9629 self.op.mode, errors.ECODE_INVAL)
9631 def ExpandNames(self):
9632 """ExpandNames for CreateInstance.
9634 Figure out the right locks for instance creation.
9637 self.needed_locks = {}
9639 instance_name = self.op.instance_name
9640 # this is just a preventive check, but someone might still add this
9641 # instance in the meantime, and creation will fail at lock-add time
9642 if instance_name in self.cfg.GetInstanceList():
9643 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9644 instance_name, errors.ECODE_EXISTS)
9646 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9648 if self.op.iallocator:
9649 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9650 # specifying a group on instance creation and then selecting nodes from
9652 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9653 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9655 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9656 nodelist = [self.op.pnode]
9657 if self.op.snode is not None:
9658 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9659 nodelist.append(self.op.snode)
9660 self.needed_locks[locking.LEVEL_NODE] = nodelist
9661 # Lock resources of instance's primary and secondary nodes (copy to
9662 # prevent accidential modification)
9663 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9665 # in case of import lock the source node too
9666 if self.op.mode == constants.INSTANCE_IMPORT:
9667 src_node = self.op.src_node
9668 src_path = self.op.src_path
9670 if src_path is None:
9671 self.op.src_path = src_path = self.op.instance_name
9673 if src_node is None:
9674 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9675 self.op.src_node = None
9676 if os.path.isabs(src_path):
9677 raise errors.OpPrereqError("Importing an instance from a path"
9678 " requires a source node option",
9681 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9682 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9683 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9684 if not os.path.isabs(src_path):
9685 self.op.src_path = src_path = \
9686 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9688 def _RunAllocator(self):
9689 """Run the allocator based on input opcode.
9692 req = _CreateInstanceAllocRequest(self.op, self.disks,
9693 self.nics, self.be_full)
9694 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9696 ial.Run(self.op.iallocator)
9699 raise errors.OpPrereqError("Can't compute nodes using"
9700 " iallocator '%s': %s" %
9701 (self.op.iallocator, ial.info),
9703 self.op.pnode = ial.result[0]
9704 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9705 self.op.instance_name, self.op.iallocator,
9706 utils.CommaJoin(ial.result))
9708 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9710 if req.RequiredNodes() == 2:
9711 self.op.snode = ial.result[1]
9713 def BuildHooksEnv(self):
9716 This runs on master, primary and secondary nodes of the instance.
9720 "ADD_MODE": self.op.mode,
9722 if self.op.mode == constants.INSTANCE_IMPORT:
9723 env["SRC_NODE"] = self.op.src_node
9724 env["SRC_PATH"] = self.op.src_path
9725 env["SRC_IMAGES"] = self.src_images
9727 env.update(_BuildInstanceHookEnv(
9728 name=self.op.instance_name,
9729 primary_node=self.op.pnode,
9730 secondary_nodes=self.secondaries,
9731 status=self.op.start,
9732 os_type=self.op.os_type,
9733 minmem=self.be_full[constants.BE_MINMEM],
9734 maxmem=self.be_full[constants.BE_MAXMEM],
9735 vcpus=self.be_full[constants.BE_VCPUS],
9736 nics=_NICListToTuple(self, self.nics),
9737 disk_template=self.op.disk_template,
9738 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9739 for d in self.disks],
9742 hypervisor_name=self.op.hypervisor,
9748 def BuildHooksNodes(self):
9749 """Build hooks nodes.
9752 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9755 def _ReadExportInfo(self):
9756 """Reads the export information from disk.
9758 It will override the opcode source node and path with the actual
9759 information, if these two were not specified before.
9761 @return: the export information
9764 assert self.op.mode == constants.INSTANCE_IMPORT
9766 src_node = self.op.src_node
9767 src_path = self.op.src_path
9769 if src_node is None:
9770 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9771 exp_list = self.rpc.call_export_list(locked_nodes)
9773 for node in exp_list:
9774 if exp_list[node].fail_msg:
9776 if src_path in exp_list[node].payload:
9778 self.op.src_node = src_node = node
9779 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9783 raise errors.OpPrereqError("No export found for relative path %s" %
9784 src_path, errors.ECODE_INVAL)
9786 _CheckNodeOnline(self, src_node)
9787 result = self.rpc.call_export_info(src_node, src_path)
9788 result.Raise("No export or invalid export found in dir %s" % src_path)
9790 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9791 if not export_info.has_section(constants.INISECT_EXP):
9792 raise errors.ProgrammerError("Corrupted export config",
9793 errors.ECODE_ENVIRON)
9795 ei_version = export_info.get(constants.INISECT_EXP, "version")
9796 if (int(ei_version) != constants.EXPORT_VERSION):
9797 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9798 (ei_version, constants.EXPORT_VERSION),
9799 errors.ECODE_ENVIRON)
9802 def _ReadExportParams(self, einfo):
9803 """Use export parameters as defaults.
9805 In case the opcode doesn't specify (as in override) some instance
9806 parameters, then try to use them from the export information, if
9810 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9812 if self.op.disk_template is None:
9813 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9814 self.op.disk_template = einfo.get(constants.INISECT_INS,
9816 if self.op.disk_template not in constants.DISK_TEMPLATES:
9817 raise errors.OpPrereqError("Disk template specified in configuration"
9818 " file is not one of the allowed values:"
9820 " ".join(constants.DISK_TEMPLATES),
9823 raise errors.OpPrereqError("No disk template specified and the export"
9824 " is missing the disk_template information",
9827 if not self.op.disks:
9829 # TODO: import the disk iv_name too
9830 for idx in range(constants.MAX_DISKS):
9831 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9832 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9833 disks.append({constants.IDISK_SIZE: disk_sz})
9834 self.op.disks = disks
9835 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9836 raise errors.OpPrereqError("No disk info specified and the export"
9837 " is missing the disk information",
9840 if not self.op.nics:
9842 for idx in range(constants.MAX_NICS):
9843 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9845 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9846 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9853 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9854 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9856 if (self.op.hypervisor is None and
9857 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9858 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9860 if einfo.has_section(constants.INISECT_HYP):
9861 # use the export parameters but do not override the ones
9862 # specified by the user
9863 for name, value in einfo.items(constants.INISECT_HYP):
9864 if name not in self.op.hvparams:
9865 self.op.hvparams[name] = value
9867 if einfo.has_section(constants.INISECT_BEP):
9868 # use the parameters, without overriding
9869 for name, value in einfo.items(constants.INISECT_BEP):
9870 if name not in self.op.beparams:
9871 self.op.beparams[name] = value
9872 # Compatibility for the old "memory" be param
9873 if name == constants.BE_MEMORY:
9874 if constants.BE_MAXMEM not in self.op.beparams:
9875 self.op.beparams[constants.BE_MAXMEM] = value
9876 if constants.BE_MINMEM not in self.op.beparams:
9877 self.op.beparams[constants.BE_MINMEM] = value
9879 # try to read the parameters old style, from the main section
9880 for name in constants.BES_PARAMETERS:
9881 if (name not in self.op.beparams and
9882 einfo.has_option(constants.INISECT_INS, name)):
9883 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9885 if einfo.has_section(constants.INISECT_OSP):
9886 # use the parameters, without overriding
9887 for name, value in einfo.items(constants.INISECT_OSP):
9888 if name not in self.op.osparams:
9889 self.op.osparams[name] = value
9891 def _RevertToDefaults(self, cluster):
9892 """Revert the instance parameters to the default values.
9896 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9897 for name in self.op.hvparams.keys():
9898 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9899 del self.op.hvparams[name]
9901 be_defs = cluster.SimpleFillBE({})
9902 for name in self.op.beparams.keys():
9903 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9904 del self.op.beparams[name]
9906 nic_defs = cluster.SimpleFillNIC({})
9907 for nic in self.op.nics:
9908 for name in constants.NICS_PARAMETERS:
9909 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9912 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9913 for name in self.op.osparams.keys():
9914 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9915 del self.op.osparams[name]
9917 def _CalculateFileStorageDir(self):
9918 """Calculate final instance file storage dir.
9921 # file storage dir calculation/check
9922 self.instance_file_storage_dir = None
9923 if self.op.disk_template in constants.DTS_FILEBASED:
9924 # build the full file storage dir path
9927 if self.op.disk_template == constants.DT_SHARED_FILE:
9928 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9930 get_fsd_fn = self.cfg.GetFileStorageDir
9932 cfg_storagedir = get_fsd_fn()
9933 if not cfg_storagedir:
9934 raise errors.OpPrereqError("Cluster file storage dir not defined",
9936 joinargs.append(cfg_storagedir)
9938 if self.op.file_storage_dir is not None:
9939 joinargs.append(self.op.file_storage_dir)
9941 joinargs.append(self.op.instance_name)
9943 # pylint: disable=W0142
9944 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9946 def CheckPrereq(self): # pylint: disable=R0914
9947 """Check prerequisites.
9950 self._CalculateFileStorageDir()
9952 if self.op.mode == constants.INSTANCE_IMPORT:
9953 export_info = self._ReadExportInfo()
9954 self._ReadExportParams(export_info)
9955 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9957 self._old_instance_name = None
9959 if (not self.cfg.GetVGName() and
9960 self.op.disk_template not in constants.DTS_NOT_LVM):
9961 raise errors.OpPrereqError("Cluster does not support lvm-based"
9962 " instances", errors.ECODE_STATE)
9964 if (self.op.hypervisor is None or
9965 self.op.hypervisor == constants.VALUE_AUTO):
9966 self.op.hypervisor = self.cfg.GetHypervisorType()
9968 cluster = self.cfg.GetClusterInfo()
9969 enabled_hvs = cluster.enabled_hypervisors
9970 if self.op.hypervisor not in enabled_hvs:
9971 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9973 (self.op.hypervisor, ",".join(enabled_hvs)),
9976 # Check tag validity
9977 for tag in self.op.tags:
9978 objects.TaggableObject.ValidateTag(tag)
9980 # check hypervisor parameter syntax (locally)
9981 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9982 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9984 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9985 hv_type.CheckParameterSyntax(filled_hvp)
9986 self.hv_full = filled_hvp
9987 # check that we don't specify global parameters on an instance
9988 _CheckGlobalHvParams(self.op.hvparams)
9990 # fill and remember the beparams dict
9991 self.be_full = _ComputeFullBeParams(self.op, cluster)
9993 # build os parameters
9994 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9996 # now that hvp/bep are in final format, let's reset to defaults,
9998 if self.op.identify_defaults:
9999 self._RevertToDefaults(cluster)
10002 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10005 # disk checks/pre-build
10006 default_vg = self.cfg.GetVGName()
10007 self.disks = _ComputeDisks(self.op, default_vg)
10009 if self.op.mode == constants.INSTANCE_IMPORT:
10011 for idx in range(len(self.disks)):
10012 option = "disk%d_dump" % idx
10013 if export_info.has_option(constants.INISECT_INS, option):
10014 # FIXME: are the old os-es, disk sizes, etc. useful?
10015 export_name = export_info.get(constants.INISECT_INS, option)
10016 image = utils.PathJoin(self.op.src_path, export_name)
10017 disk_images.append(image)
10019 disk_images.append(False)
10021 self.src_images = disk_images
10023 if self.op.instance_name == self._old_instance_name:
10024 for idx, nic in enumerate(self.nics):
10025 if nic.mac == constants.VALUE_AUTO:
10026 nic_mac_ini = "nic%d_mac" % idx
10027 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10029 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10031 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10032 if self.op.ip_check:
10033 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10034 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10035 (self.check_ip, self.op.instance_name),
10036 errors.ECODE_NOTUNIQUE)
10038 #### mac address generation
10039 # By generating here the mac address both the allocator and the hooks get
10040 # the real final mac address rather than the 'auto' or 'generate' value.
10041 # There is a race condition between the generation and the instance object
10042 # creation, which means that we know the mac is valid now, but we're not
10043 # sure it will be when we actually add the instance. If things go bad
10044 # adding the instance will abort because of a duplicate mac, and the
10045 # creation job will fail.
10046 for nic in self.nics:
10047 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10048 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10052 if self.op.iallocator is not None:
10053 self._RunAllocator()
10055 # Release all unneeded node locks
10056 _ReleaseLocks(self, locking.LEVEL_NODE,
10057 keep=filter(None, [self.op.pnode, self.op.snode,
10058 self.op.src_node]))
10059 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10060 keep=filter(None, [self.op.pnode, self.op.snode,
10061 self.op.src_node]))
10063 #### node related checks
10065 # check primary node
10066 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10067 assert self.pnode is not None, \
10068 "Cannot retrieve locked node %s" % self.op.pnode
10070 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10071 pnode.name, errors.ECODE_STATE)
10073 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10074 pnode.name, errors.ECODE_STATE)
10075 if not pnode.vm_capable:
10076 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10077 " '%s'" % pnode.name, errors.ECODE_STATE)
10079 self.secondaries = []
10081 # mirror node verification
10082 if self.op.disk_template in constants.DTS_INT_MIRROR:
10083 if self.op.snode == pnode.name:
10084 raise errors.OpPrereqError("The secondary node cannot be the"
10085 " primary node", errors.ECODE_INVAL)
10086 _CheckNodeOnline(self, self.op.snode)
10087 _CheckNodeNotDrained(self, self.op.snode)
10088 _CheckNodeVmCapable(self, self.op.snode)
10089 self.secondaries.append(self.op.snode)
10091 snode = self.cfg.GetNodeInfo(self.op.snode)
10092 if pnode.group != snode.group:
10093 self.LogWarning("The primary and secondary nodes are in two"
10094 " different node groups; the disk parameters"
10095 " from the first disk's node group will be"
10098 nodenames = [pnode.name] + self.secondaries
10100 # Verify instance specs
10101 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10103 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10104 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10105 constants.ISPEC_DISK_COUNT: len(self.disks),
10106 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10107 constants.ISPEC_NIC_COUNT: len(self.nics),
10108 constants.ISPEC_SPINDLE_USE: spindle_use,
10111 group_info = self.cfg.GetNodeGroup(pnode.group)
10112 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10113 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10114 if not self.op.ignore_ipolicy and res:
10115 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10116 (pnode.group, group_info.name, utils.CommaJoin(res)))
10117 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10119 if not self.adopt_disks:
10120 if self.op.disk_template == constants.DT_RBD:
10121 # _CheckRADOSFreeSpace() is just a placeholder.
10122 # Any function that checks prerequisites can be placed here.
10123 # Check if there is enough space on the RADOS cluster.
10124 _CheckRADOSFreeSpace()
10126 # Check lv size requirements, if not adopting
10127 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10128 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10130 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10131 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10132 disk[constants.IDISK_ADOPT])
10133 for disk in self.disks])
10134 if len(all_lvs) != len(self.disks):
10135 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10136 errors.ECODE_INVAL)
10137 for lv_name in all_lvs:
10139 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10140 # to ReserveLV uses the same syntax
10141 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10142 except errors.ReservationError:
10143 raise errors.OpPrereqError("LV named %s used by another instance" %
10144 lv_name, errors.ECODE_NOTUNIQUE)
10146 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10147 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10149 node_lvs = self.rpc.call_lv_list([pnode.name],
10150 vg_names.payload.keys())[pnode.name]
10151 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10152 node_lvs = node_lvs.payload
10154 delta = all_lvs.difference(node_lvs.keys())
10156 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10157 utils.CommaJoin(delta),
10158 errors.ECODE_INVAL)
10159 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10161 raise errors.OpPrereqError("Online logical volumes found, cannot"
10162 " adopt: %s" % utils.CommaJoin(online_lvs),
10163 errors.ECODE_STATE)
10164 # update the size of disk based on what is found
10165 for dsk in self.disks:
10166 dsk[constants.IDISK_SIZE] = \
10167 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10168 dsk[constants.IDISK_ADOPT])][0]))
10170 elif self.op.disk_template == constants.DT_BLOCK:
10171 # Normalize and de-duplicate device paths
10172 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10173 for disk in self.disks])
10174 if len(all_disks) != len(self.disks):
10175 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10176 errors.ECODE_INVAL)
10177 baddisks = [d for d in all_disks
10178 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10180 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10181 " cannot be adopted" %
10182 (", ".join(baddisks),
10183 constants.ADOPTABLE_BLOCKDEV_ROOT),
10184 errors.ECODE_INVAL)
10186 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10187 list(all_disks))[pnode.name]
10188 node_disks.Raise("Cannot get block device information from node %s" %
10190 node_disks = node_disks.payload
10191 delta = all_disks.difference(node_disks.keys())
10193 raise errors.OpPrereqError("Missing block device(s): %s" %
10194 utils.CommaJoin(delta),
10195 errors.ECODE_INVAL)
10196 for dsk in self.disks:
10197 dsk[constants.IDISK_SIZE] = \
10198 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10200 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10202 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10203 # check OS parameters (remotely)
10204 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10206 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10208 # memory check on primary node
10209 #TODO(dynmem): use MINMEM for checking
10211 _CheckNodeFreeMemory(self, self.pnode.name,
10212 "creating instance %s" % self.op.instance_name,
10213 self.be_full[constants.BE_MAXMEM],
10214 self.op.hypervisor)
10216 self.dry_run_result = list(nodenames)
10218 def Exec(self, feedback_fn):
10219 """Create and add the instance to the cluster.
10222 instance = self.op.instance_name
10223 pnode_name = self.pnode.name
10225 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10226 self.owned_locks(locking.LEVEL_NODE)), \
10227 "Node locks differ from node resource locks"
10229 ht_kind = self.op.hypervisor
10230 if ht_kind in constants.HTS_REQ_PORT:
10231 network_port = self.cfg.AllocatePort()
10233 network_port = None
10235 # This is ugly but we got a chicken-egg problem here
10236 # We can only take the group disk parameters, as the instance
10237 # has no disks yet (we are generating them right here).
10238 node = self.cfg.GetNodeInfo(pnode_name)
10239 nodegroup = self.cfg.GetNodeGroup(node.group)
10240 disks = _GenerateDiskTemplate(self,
10241 self.op.disk_template,
10242 instance, pnode_name,
10245 self.instance_file_storage_dir,
10246 self.op.file_driver,
10249 self.cfg.GetGroupDiskParams(nodegroup))
10251 iobj = objects.Instance(name=instance, os=self.op.os_type,
10252 primary_node=pnode_name,
10253 nics=self.nics, disks=disks,
10254 disk_template=self.op.disk_template,
10255 admin_state=constants.ADMINST_DOWN,
10256 network_port=network_port,
10257 beparams=self.op.beparams,
10258 hvparams=self.op.hvparams,
10259 hypervisor=self.op.hypervisor,
10260 osparams=self.op.osparams,
10264 for tag in self.op.tags:
10267 if self.adopt_disks:
10268 if self.op.disk_template == constants.DT_PLAIN:
10269 # rename LVs to the newly-generated names; we need to construct
10270 # 'fake' LV disks with the old data, plus the new unique_id
10271 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10273 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10274 rename_to.append(t_dsk.logical_id)
10275 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10276 self.cfg.SetDiskID(t_dsk, pnode_name)
10277 result = self.rpc.call_blockdev_rename(pnode_name,
10278 zip(tmp_disks, rename_to))
10279 result.Raise("Failed to rename adoped LVs")
10281 feedback_fn("* creating instance disks...")
10283 _CreateDisks(self, iobj)
10284 except errors.OpExecError:
10285 self.LogWarning("Device creation failed, reverting...")
10287 _RemoveDisks(self, iobj)
10289 self.cfg.ReleaseDRBDMinors(instance)
10292 feedback_fn("adding instance %s to cluster config" % instance)
10294 self.cfg.AddInstance(iobj, self.proc.GetECId())
10296 # Declare that we don't want to remove the instance lock anymore, as we've
10297 # added the instance to the config
10298 del self.remove_locks[locking.LEVEL_INSTANCE]
10300 if self.op.mode == constants.INSTANCE_IMPORT:
10301 # Release unused nodes
10302 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10304 # Release all nodes
10305 _ReleaseLocks(self, locking.LEVEL_NODE)
10308 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10309 feedback_fn("* wiping instance disks...")
10311 _WipeDisks(self, iobj)
10312 except errors.OpExecError, err:
10313 logging.exception("Wiping disks failed")
10314 self.LogWarning("Wiping instance disks failed (%s)", err)
10318 # Something is already wrong with the disks, don't do anything else
10320 elif self.op.wait_for_sync:
10321 disk_abort = not _WaitForSync(self, iobj)
10322 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10323 # make sure the disks are not degraded (still sync-ing is ok)
10324 feedback_fn("* checking mirrors status")
10325 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10330 _RemoveDisks(self, iobj)
10331 self.cfg.RemoveInstance(iobj.name)
10332 # Make sure the instance lock gets removed
10333 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10334 raise errors.OpExecError("There are some degraded disks for"
10337 # Release all node resource locks
10338 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10340 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10341 # we need to set the disks ID to the primary node, since the
10342 # preceding code might or might have not done it, depending on
10343 # disk template and other options
10344 for disk in iobj.disks:
10345 self.cfg.SetDiskID(disk, pnode_name)
10346 if self.op.mode == constants.INSTANCE_CREATE:
10347 if not self.op.no_install:
10348 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10349 not self.op.wait_for_sync)
10351 feedback_fn("* pausing disk sync to install instance OS")
10352 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10355 for idx, success in enumerate(result.payload):
10357 logging.warn("pause-sync of instance %s for disk %d failed",
10360 feedback_fn("* running the instance OS create scripts...")
10361 # FIXME: pass debug option from opcode to backend
10363 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10364 self.op.debug_level)
10366 feedback_fn("* resuming disk sync")
10367 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10370 for idx, success in enumerate(result.payload):
10372 logging.warn("resume-sync of instance %s for disk %d failed",
10375 os_add_result.Raise("Could not add os for instance %s"
10376 " on node %s" % (instance, pnode_name))
10379 if self.op.mode == constants.INSTANCE_IMPORT:
10380 feedback_fn("* running the instance OS import scripts...")
10384 for idx, image in enumerate(self.src_images):
10388 # FIXME: pass debug option from opcode to backend
10389 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10390 constants.IEIO_FILE, (image, ),
10391 constants.IEIO_SCRIPT,
10392 (iobj.disks[idx], idx),
10394 transfers.append(dt)
10397 masterd.instance.TransferInstanceData(self, feedback_fn,
10398 self.op.src_node, pnode_name,
10399 self.pnode.secondary_ip,
10401 if not compat.all(import_result):
10402 self.LogWarning("Some disks for instance %s on node %s were not"
10403 " imported successfully" % (instance, pnode_name))
10405 rename_from = self._old_instance_name
10407 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10408 feedback_fn("* preparing remote import...")
10409 # The source cluster will stop the instance before attempting to make
10410 # a connection. In some cases stopping an instance can take a long
10411 # time, hence the shutdown timeout is added to the connection
10413 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10414 self.op.source_shutdown_timeout)
10415 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10417 assert iobj.primary_node == self.pnode.name
10419 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10420 self.source_x509_ca,
10421 self._cds, timeouts)
10422 if not compat.all(disk_results):
10423 # TODO: Should the instance still be started, even if some disks
10424 # failed to import (valid for local imports, too)?
10425 self.LogWarning("Some disks for instance %s on node %s were not"
10426 " imported successfully" % (instance, pnode_name))
10428 rename_from = self.source_instance_name
10431 # also checked in the prereq part
10432 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10435 # Run rename script on newly imported instance
10436 assert iobj.name == instance
10437 feedback_fn("Running rename script for %s" % instance)
10438 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10440 self.op.debug_level)
10441 if result.fail_msg:
10442 self.LogWarning("Failed to run rename script for %s on node"
10443 " %s: %s" % (instance, pnode_name, result.fail_msg))
10445 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10448 iobj.admin_state = constants.ADMINST_UP
10449 self.cfg.Update(iobj, feedback_fn)
10450 logging.info("Starting instance %s on node %s", instance, pnode_name)
10451 feedback_fn("* starting instance...")
10452 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10454 result.Raise("Could not start instance")
10456 return list(iobj.all_nodes)
10459 class LUInstanceMultiAlloc(NoHooksLU):
10460 """Allocates multiple instances at the same time.
10465 def CheckArguments(self):
10466 """Check arguments.
10470 for inst in self.op.instances:
10471 if inst.iallocator is not None:
10472 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10473 " instance objects", errors.ECODE_INVAL)
10474 nodes.append(bool(inst.pnode))
10475 if inst.disk_template in constants.DTS_INT_MIRROR:
10476 nodes.append(bool(inst.snode))
10478 has_nodes = compat.any(nodes)
10479 if compat.all(nodes) ^ has_nodes:
10480 raise errors.OpPrereqError("There are instance objects providing"
10481 " pnode/snode while others do not",
10482 errors.ECODE_INVAL)
10484 if self.op.iallocator is None:
10485 default_iallocator = self.cfg.GetDefaultIAllocator()
10486 if default_iallocator and has_nodes:
10487 self.op.iallocator = default_iallocator
10489 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10490 " given and no cluster-wide default"
10491 " iallocator found; please specify either"
10492 " an iallocator or nodes on the instances"
10493 " or set a cluster-wide default iallocator",
10494 errors.ECODE_INVAL)
10496 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10498 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10499 utils.CommaJoin(dups), errors.ECODE_INVAL)
10501 def ExpandNames(self):
10502 """Calculate the locks.
10505 self.share_locks = _ShareAll()
10506 self.needed_locks = {}
10508 if self.op.iallocator:
10509 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10510 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10513 for inst in self.op.instances:
10514 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10515 nodeslist.append(inst.pnode)
10516 if inst.snode is not None:
10517 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10518 nodeslist.append(inst.snode)
10520 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10521 # Lock resources of instance's primary and secondary nodes (copy to
10522 # prevent accidential modification)
10523 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10525 def CheckPrereq(self):
10526 """Check prerequisite.
10529 cluster = self.cfg.GetClusterInfo()
10530 default_vg = self.cfg.GetVGName()
10531 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10532 _ComputeNics(op, cluster, None,
10533 self.cfg, self.proc),
10534 _ComputeFullBeParams(op, cluster))
10535 for op in self.op.instances]
10536 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10537 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10539 ial.Run(self.op.iallocator)
10541 if not ial.success:
10542 raise errors.OpPrereqError("Can't compute nodes using"
10543 " iallocator '%s': %s" %
10544 (self.op.iallocator, ial.info),
10545 errors.ECODE_NORES)
10547 self.ia_result = ial.result
10549 if self.op.dry_run:
10550 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10551 constants.JOB_IDS_KEY: [],
10554 def _ConstructPartialResult(self):
10555 """Contructs the partial result.
10558 (allocatable, failed) = self.ia_result
10560 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10561 map(compat.fst, allocatable),
10562 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10565 def Exec(self, feedback_fn):
10566 """Executes the opcode.
10569 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10570 (allocatable, failed) = self.ia_result
10573 for (name, nodes) in allocatable:
10574 op = op2inst.pop(name)
10577 (op.pnode, op.snode) = nodes
10579 (op.pnode,) = nodes
10583 missing = set(op2inst.keys()) - set(failed)
10584 assert not missing, \
10585 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10587 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10590 def _CheckRADOSFreeSpace():
10591 """Compute disk size requirements inside the RADOS cluster.
10594 # For the RADOS cluster we assume there is always enough space.
10598 class LUInstanceConsole(NoHooksLU):
10599 """Connect to an instance's console.
10601 This is somewhat special in that it returns the command line that
10602 you need to run on the master node in order to connect to the
10608 def ExpandNames(self):
10609 self.share_locks = _ShareAll()
10610 self._ExpandAndLockInstance()
10612 def CheckPrereq(self):
10613 """Check prerequisites.
10615 This checks that the instance is in the cluster.
10618 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10619 assert self.instance is not None, \
10620 "Cannot retrieve locked instance %s" % self.op.instance_name
10621 _CheckNodeOnline(self, self.instance.primary_node)
10623 def Exec(self, feedback_fn):
10624 """Connect to the console of an instance
10627 instance = self.instance
10628 node = instance.primary_node
10630 node_insts = self.rpc.call_instance_list([node],
10631 [instance.hypervisor])[node]
10632 node_insts.Raise("Can't get node information from %s" % node)
10634 if instance.name not in node_insts.payload:
10635 if instance.admin_state == constants.ADMINST_UP:
10636 state = constants.INSTST_ERRORDOWN
10637 elif instance.admin_state == constants.ADMINST_DOWN:
10638 state = constants.INSTST_ADMINDOWN
10640 state = constants.INSTST_ADMINOFFLINE
10641 raise errors.OpExecError("Instance %s is not running (state %s)" %
10642 (instance.name, state))
10644 logging.debug("Connecting to console of %s on %s", instance.name, node)
10646 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10649 def _GetInstanceConsole(cluster, instance):
10650 """Returns console information for an instance.
10652 @type cluster: L{objects.Cluster}
10653 @type instance: L{objects.Instance}
10657 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10658 # beparams and hvparams are passed separately, to avoid editing the
10659 # instance and then saving the defaults in the instance itself.
10660 hvparams = cluster.FillHV(instance)
10661 beparams = cluster.FillBE(instance)
10662 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10664 assert console.instance == instance.name
10665 assert console.Validate()
10667 return console.ToDict()
10670 class LUInstanceReplaceDisks(LogicalUnit):
10671 """Replace the disks of an instance.
10674 HPATH = "mirrors-replace"
10675 HTYPE = constants.HTYPE_INSTANCE
10678 def CheckArguments(self):
10679 """Check arguments.
10682 remote_node = self.op.remote_node
10683 ialloc = self.op.iallocator
10684 if self.op.mode == constants.REPLACE_DISK_CHG:
10685 if remote_node is None and ialloc is None:
10686 raise errors.OpPrereqError("When changing the secondary either an"
10687 " iallocator script must be used or the"
10688 " new node given", errors.ECODE_INVAL)
10690 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10692 elif remote_node is not None or ialloc is not None:
10693 # Not replacing the secondary
10694 raise errors.OpPrereqError("The iallocator and new node options can"
10695 " only be used when changing the"
10696 " secondary node", errors.ECODE_INVAL)
10698 def ExpandNames(self):
10699 self._ExpandAndLockInstance()
10701 assert locking.LEVEL_NODE not in self.needed_locks
10702 assert locking.LEVEL_NODE_RES not in self.needed_locks
10703 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10705 assert self.op.iallocator is None or self.op.remote_node is None, \
10706 "Conflicting options"
10708 if self.op.remote_node is not None:
10709 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10711 # Warning: do not remove the locking of the new secondary here
10712 # unless DRBD8.AddChildren is changed to work in parallel;
10713 # currently it doesn't since parallel invocations of
10714 # FindUnusedMinor will conflict
10715 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10716 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10718 self.needed_locks[locking.LEVEL_NODE] = []
10719 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10721 if self.op.iallocator is not None:
10722 # iallocator will select a new node in the same group
10723 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10725 self.needed_locks[locking.LEVEL_NODE_RES] = []
10727 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10728 self.op.iallocator, self.op.remote_node,
10729 self.op.disks, False, self.op.early_release,
10730 self.op.ignore_ipolicy)
10732 self.tasklets = [self.replacer]
10734 def DeclareLocks(self, level):
10735 if level == locking.LEVEL_NODEGROUP:
10736 assert self.op.remote_node is None
10737 assert self.op.iallocator is not None
10738 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10740 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10741 # Lock all groups used by instance optimistically; this requires going
10742 # via the node before it's locked, requiring verification later on
10743 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10744 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10746 elif level == locking.LEVEL_NODE:
10747 if self.op.iallocator is not None:
10748 assert self.op.remote_node is None
10749 assert not self.needed_locks[locking.LEVEL_NODE]
10751 # Lock member nodes of all locked groups
10752 self.needed_locks[locking.LEVEL_NODE] = \
10754 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10755 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10757 self._LockInstancesNodes()
10758 elif level == locking.LEVEL_NODE_RES:
10760 self.needed_locks[locking.LEVEL_NODE_RES] = \
10761 self.needed_locks[locking.LEVEL_NODE]
10763 def BuildHooksEnv(self):
10764 """Build hooks env.
10766 This runs on the master, the primary and all the secondaries.
10769 instance = self.replacer.instance
10771 "MODE": self.op.mode,
10772 "NEW_SECONDARY": self.op.remote_node,
10773 "OLD_SECONDARY": instance.secondary_nodes[0],
10775 env.update(_BuildInstanceHookEnvByObject(self, instance))
10778 def BuildHooksNodes(self):
10779 """Build hooks nodes.
10782 instance = self.replacer.instance
10784 self.cfg.GetMasterNode(),
10785 instance.primary_node,
10787 if self.op.remote_node is not None:
10788 nl.append(self.op.remote_node)
10791 def CheckPrereq(self):
10792 """Check prerequisites.
10795 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10796 self.op.iallocator is None)
10798 # Verify if node group locks are still correct
10799 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10801 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10803 return LogicalUnit.CheckPrereq(self)
10806 class TLReplaceDisks(Tasklet):
10807 """Replaces disks for an instance.
10809 Note: Locking is not within the scope of this class.
10812 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10813 disks, delay_iallocator, early_release, ignore_ipolicy):
10814 """Initializes this class.
10817 Tasklet.__init__(self, lu)
10820 self.instance_name = instance_name
10822 self.iallocator_name = iallocator_name
10823 self.remote_node = remote_node
10825 self.delay_iallocator = delay_iallocator
10826 self.early_release = early_release
10827 self.ignore_ipolicy = ignore_ipolicy
10830 self.instance = None
10831 self.new_node = None
10832 self.target_node = None
10833 self.other_node = None
10834 self.remote_node_info = None
10835 self.node_secondary_ip = None
10838 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10839 """Compute a new secondary node using an IAllocator.
10842 req = iallocator.IAReqRelocate(name=instance_name,
10843 relocate_from=list(relocate_from))
10844 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10846 ial.Run(iallocator_name)
10848 if not ial.success:
10849 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10850 " %s" % (iallocator_name, ial.info),
10851 errors.ECODE_NORES)
10853 remote_node_name = ial.result[0]
10855 lu.LogInfo("Selected new secondary for instance '%s': %s",
10856 instance_name, remote_node_name)
10858 return remote_node_name
10860 def _FindFaultyDisks(self, node_name):
10861 """Wrapper for L{_FindFaultyInstanceDisks}.
10864 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10867 def _CheckDisksActivated(self, instance):
10868 """Checks if the instance disks are activated.
10870 @param instance: The instance to check disks
10871 @return: True if they are activated, False otherwise
10874 nodes = instance.all_nodes
10876 for idx, dev in enumerate(instance.disks):
10878 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10879 self.cfg.SetDiskID(dev, node)
10881 result = _BlockdevFind(self, node, dev, instance)
10885 elif result.fail_msg or not result.payload:
10890 def CheckPrereq(self):
10891 """Check prerequisites.
10893 This checks that the instance is in the cluster.
10896 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10897 assert instance is not None, \
10898 "Cannot retrieve locked instance %s" % self.instance_name
10900 if instance.disk_template != constants.DT_DRBD8:
10901 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10902 " instances", errors.ECODE_INVAL)
10904 if len(instance.secondary_nodes) != 1:
10905 raise errors.OpPrereqError("The instance has a strange layout,"
10906 " expected one secondary but found %d" %
10907 len(instance.secondary_nodes),
10908 errors.ECODE_FAULT)
10910 if not self.delay_iallocator:
10911 self._CheckPrereq2()
10913 def _CheckPrereq2(self):
10914 """Check prerequisites, second part.
10916 This function should always be part of CheckPrereq. It was separated and is
10917 now called from Exec because during node evacuation iallocator was only
10918 called with an unmodified cluster model, not taking planned changes into
10922 instance = self.instance
10923 secondary_node = instance.secondary_nodes[0]
10925 if self.iallocator_name is None:
10926 remote_node = self.remote_node
10928 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10929 instance.name, instance.secondary_nodes)
10931 if remote_node is None:
10932 self.remote_node_info = None
10934 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10935 "Remote node '%s' is not locked" % remote_node
10937 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10938 assert self.remote_node_info is not None, \
10939 "Cannot retrieve locked node %s" % remote_node
10941 if remote_node == self.instance.primary_node:
10942 raise errors.OpPrereqError("The specified node is the primary node of"
10943 " the instance", errors.ECODE_INVAL)
10945 if remote_node == secondary_node:
10946 raise errors.OpPrereqError("The specified node is already the"
10947 " secondary node of the instance",
10948 errors.ECODE_INVAL)
10950 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10951 constants.REPLACE_DISK_CHG):
10952 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10953 errors.ECODE_INVAL)
10955 if self.mode == constants.REPLACE_DISK_AUTO:
10956 if not self._CheckDisksActivated(instance):
10957 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10958 " first" % self.instance_name,
10959 errors.ECODE_STATE)
10960 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10961 faulty_secondary = self._FindFaultyDisks(secondary_node)
10963 if faulty_primary and faulty_secondary:
10964 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10965 " one node and can not be repaired"
10966 " automatically" % self.instance_name,
10967 errors.ECODE_STATE)
10970 self.disks = faulty_primary
10971 self.target_node = instance.primary_node
10972 self.other_node = secondary_node
10973 check_nodes = [self.target_node, self.other_node]
10974 elif faulty_secondary:
10975 self.disks = faulty_secondary
10976 self.target_node = secondary_node
10977 self.other_node = instance.primary_node
10978 check_nodes = [self.target_node, self.other_node]
10984 # Non-automatic modes
10985 if self.mode == constants.REPLACE_DISK_PRI:
10986 self.target_node = instance.primary_node
10987 self.other_node = secondary_node
10988 check_nodes = [self.target_node, self.other_node]
10990 elif self.mode == constants.REPLACE_DISK_SEC:
10991 self.target_node = secondary_node
10992 self.other_node = instance.primary_node
10993 check_nodes = [self.target_node, self.other_node]
10995 elif self.mode == constants.REPLACE_DISK_CHG:
10996 self.new_node = remote_node
10997 self.other_node = instance.primary_node
10998 self.target_node = secondary_node
10999 check_nodes = [self.new_node, self.other_node]
11001 _CheckNodeNotDrained(self.lu, remote_node)
11002 _CheckNodeVmCapable(self.lu, remote_node)
11004 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11005 assert old_node_info is not None
11006 if old_node_info.offline and not self.early_release:
11007 # doesn't make sense to delay the release
11008 self.early_release = True
11009 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11010 " early-release mode", secondary_node)
11013 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11016 # If not specified all disks should be replaced
11018 self.disks = range(len(self.instance.disks))
11020 # TODO: This is ugly, but right now we can't distinguish between internal
11021 # submitted opcode and external one. We should fix that.
11022 if self.remote_node_info:
11023 # We change the node, lets verify it still meets instance policy
11024 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11025 cluster = self.cfg.GetClusterInfo()
11026 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11028 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11029 ignore=self.ignore_ipolicy)
11031 for node in check_nodes:
11032 _CheckNodeOnline(self.lu, node)
11034 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11037 if node_name is not None)
11039 # Release unneeded node and node resource locks
11040 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11041 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11043 # Release any owned node group
11044 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11045 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11047 # Check whether disks are valid
11048 for disk_idx in self.disks:
11049 instance.FindDisk(disk_idx)
11051 # Get secondary node IP addresses
11052 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11053 in self.cfg.GetMultiNodeInfo(touched_nodes))
11055 def Exec(self, feedback_fn):
11056 """Execute disk replacement.
11058 This dispatches the disk replacement to the appropriate handler.
11061 if self.delay_iallocator:
11062 self._CheckPrereq2()
11065 # Verify owned locks before starting operation
11066 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11067 assert set(owned_nodes) == set(self.node_secondary_ip), \
11068 ("Incorrect node locks, owning %s, expected %s" %
11069 (owned_nodes, self.node_secondary_ip.keys()))
11070 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11071 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11073 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11074 assert list(owned_instances) == [self.instance_name], \
11075 "Instance '%s' not locked" % self.instance_name
11077 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11078 "Should not own any node group lock at this point"
11081 feedback_fn("No disks need replacement for instance '%s'" %
11082 self.instance.name)
11085 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11086 (utils.CommaJoin(self.disks), self.instance.name))
11087 feedback_fn("Current primary node: %s", self.instance.primary_node)
11088 feedback_fn("Current seconary node: %s",
11089 utils.CommaJoin(self.instance.secondary_nodes))
11091 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11093 # Activate the instance disks if we're replacing them on a down instance
11095 _StartInstanceDisks(self.lu, self.instance, True)
11098 # Should we replace the secondary node?
11099 if self.new_node is not None:
11100 fn = self._ExecDrbd8Secondary
11102 fn = self._ExecDrbd8DiskOnly
11104 result = fn(feedback_fn)
11106 # Deactivate the instance disks if we're replacing them on a
11109 _SafeShutdownInstanceDisks(self.lu, self.instance)
11111 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11114 # Verify owned locks
11115 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11116 nodes = frozenset(self.node_secondary_ip)
11117 assert ((self.early_release and not owned_nodes) or
11118 (not self.early_release and not (set(owned_nodes) - nodes))), \
11119 ("Not owning the correct locks, early_release=%s, owned=%r,"
11120 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11124 def _CheckVolumeGroup(self, nodes):
11125 self.lu.LogInfo("Checking volume groups")
11127 vgname = self.cfg.GetVGName()
11129 # Make sure volume group exists on all involved nodes
11130 results = self.rpc.call_vg_list(nodes)
11132 raise errors.OpExecError("Can't list volume groups on the nodes")
11135 res = results[node]
11136 res.Raise("Error checking node %s" % node)
11137 if vgname not in res.payload:
11138 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11141 def _CheckDisksExistence(self, nodes):
11142 # Check disk existence
11143 for idx, dev in enumerate(self.instance.disks):
11144 if idx not in self.disks:
11148 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11149 self.cfg.SetDiskID(dev, node)
11151 result = _BlockdevFind(self, node, dev, self.instance)
11153 msg = result.fail_msg
11154 if msg or not result.payload:
11156 msg = "disk not found"
11157 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11160 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11161 for idx, dev in enumerate(self.instance.disks):
11162 if idx not in self.disks:
11165 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11168 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11169 on_primary, ldisk=ldisk):
11170 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11171 " replace disks for instance %s" %
11172 (node_name, self.instance.name))
11174 def _CreateNewStorage(self, node_name):
11175 """Create new storage on the primary or secondary node.
11177 This is only used for same-node replaces, not for changing the
11178 secondary node, hence we don't want to modify the existing disk.
11183 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11184 for idx, dev in enumerate(disks):
11185 if idx not in self.disks:
11188 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11190 self.cfg.SetDiskID(dev, node_name)
11192 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11193 names = _GenerateUniqueNames(self.lu, lv_names)
11195 (data_disk, meta_disk) = dev.children
11196 vg_data = data_disk.logical_id[0]
11197 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11198 logical_id=(vg_data, names[0]),
11199 params=data_disk.params)
11200 vg_meta = meta_disk.logical_id[0]
11201 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11202 size=constants.DRBD_META_SIZE,
11203 logical_id=(vg_meta, names[1]),
11204 params=meta_disk.params)
11206 new_lvs = [lv_data, lv_meta]
11207 old_lvs = [child.Copy() for child in dev.children]
11208 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11210 # we pass force_create=True to force the LVM creation
11211 for new_lv in new_lvs:
11212 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11213 _GetInstanceInfoText(self.instance), False)
11217 def _CheckDevices(self, node_name, iv_names):
11218 for name, (dev, _, _) in iv_names.iteritems():
11219 self.cfg.SetDiskID(dev, node_name)
11221 result = _BlockdevFind(self, node_name, dev, self.instance)
11223 msg = result.fail_msg
11224 if msg or not result.payload:
11226 msg = "disk not found"
11227 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11230 if result.payload.is_degraded:
11231 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11233 def _RemoveOldStorage(self, node_name, iv_names):
11234 for name, (_, old_lvs, _) in iv_names.iteritems():
11235 self.lu.LogInfo("Remove logical volumes for %s" % name)
11238 self.cfg.SetDiskID(lv, node_name)
11240 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11242 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11243 hint="remove unused LVs manually")
11245 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11246 """Replace a disk on the primary or secondary for DRBD 8.
11248 The algorithm for replace is quite complicated:
11250 1. for each disk to be replaced:
11252 1. create new LVs on the target node with unique names
11253 1. detach old LVs from the drbd device
11254 1. rename old LVs to name_replaced.<time_t>
11255 1. rename new LVs to old LVs
11256 1. attach the new LVs (with the old names now) to the drbd device
11258 1. wait for sync across all devices
11260 1. for each modified disk:
11262 1. remove old LVs (which have the name name_replaces.<time_t>)
11264 Failures are not very well handled.
11269 # Step: check device activation
11270 self.lu.LogStep(1, steps_total, "Check device existence")
11271 self._CheckDisksExistence([self.other_node, self.target_node])
11272 self._CheckVolumeGroup([self.target_node, self.other_node])
11274 # Step: check other node consistency
11275 self.lu.LogStep(2, steps_total, "Check peer consistency")
11276 self._CheckDisksConsistency(self.other_node,
11277 self.other_node == self.instance.primary_node,
11280 # Step: create new storage
11281 self.lu.LogStep(3, steps_total, "Allocate new storage")
11282 iv_names = self._CreateNewStorage(self.target_node)
11284 # Step: for each lv, detach+rename*2+attach
11285 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11286 for dev, old_lvs, new_lvs in iv_names.itervalues():
11287 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11289 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11291 result.Raise("Can't detach drbd from local storage on node"
11292 " %s for device %s" % (self.target_node, dev.iv_name))
11294 #cfg.Update(instance)
11296 # ok, we created the new LVs, so now we know we have the needed
11297 # storage; as such, we proceed on the target node to rename
11298 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11299 # using the assumption that logical_id == physical_id (which in
11300 # turn is the unique_id on that node)
11302 # FIXME(iustin): use a better name for the replaced LVs
11303 temp_suffix = int(time.time())
11304 ren_fn = lambda d, suff: (d.physical_id[0],
11305 d.physical_id[1] + "_replaced-%s" % suff)
11307 # Build the rename list based on what LVs exist on the node
11308 rename_old_to_new = []
11309 for to_ren in old_lvs:
11310 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11311 if not result.fail_msg and result.payload:
11313 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11315 self.lu.LogInfo("Renaming the old LVs on the target node")
11316 result = self.rpc.call_blockdev_rename(self.target_node,
11318 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11320 # Now we rename the new LVs to the old LVs
11321 self.lu.LogInfo("Renaming the new LVs on the target node")
11322 rename_new_to_old = [(new, old.physical_id)
11323 for old, new in zip(old_lvs, new_lvs)]
11324 result = self.rpc.call_blockdev_rename(self.target_node,
11326 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11328 # Intermediate steps of in memory modifications
11329 for old, new in zip(old_lvs, new_lvs):
11330 new.logical_id = old.logical_id
11331 self.cfg.SetDiskID(new, self.target_node)
11333 # We need to modify old_lvs so that removal later removes the
11334 # right LVs, not the newly added ones; note that old_lvs is a
11336 for disk in old_lvs:
11337 disk.logical_id = ren_fn(disk, temp_suffix)
11338 self.cfg.SetDiskID(disk, self.target_node)
11340 # Now that the new lvs have the old name, we can add them to the device
11341 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11342 result = self.rpc.call_blockdev_addchildren(self.target_node,
11343 (dev, self.instance), new_lvs)
11344 msg = result.fail_msg
11346 for new_lv in new_lvs:
11347 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11350 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11351 hint=("cleanup manually the unused logical"
11353 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11355 cstep = itertools.count(5)
11357 if self.early_release:
11358 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11359 self._RemoveOldStorage(self.target_node, iv_names)
11360 # TODO: Check if releasing locks early still makes sense
11361 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11363 # Release all resource locks except those used by the instance
11364 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11365 keep=self.node_secondary_ip.keys())
11367 # Release all node locks while waiting for sync
11368 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11370 # TODO: Can the instance lock be downgraded here? Take the optional disk
11371 # shutdown in the caller into consideration.
11374 # This can fail as the old devices are degraded and _WaitForSync
11375 # does a combined result over all disks, so we don't check its return value
11376 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11377 _WaitForSync(self.lu, self.instance)
11379 # Check all devices manually
11380 self._CheckDevices(self.instance.primary_node, iv_names)
11382 # Step: remove old storage
11383 if not self.early_release:
11384 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11385 self._RemoveOldStorage(self.target_node, iv_names)
11387 def _ExecDrbd8Secondary(self, feedback_fn):
11388 """Replace the secondary node for DRBD 8.
11390 The algorithm for replace is quite complicated:
11391 - for all disks of the instance:
11392 - create new LVs on the new node with same names
11393 - shutdown the drbd device on the old secondary
11394 - disconnect the drbd network on the primary
11395 - create the drbd device on the new secondary
11396 - network attach the drbd on the primary, using an artifice:
11397 the drbd code for Attach() will connect to the network if it
11398 finds a device which is connected to the good local disks but
11399 not network enabled
11400 - wait for sync across all devices
11401 - remove all disks from the old secondary
11403 Failures are not very well handled.
11408 pnode = self.instance.primary_node
11410 # Step: check device activation
11411 self.lu.LogStep(1, steps_total, "Check device existence")
11412 self._CheckDisksExistence([self.instance.primary_node])
11413 self._CheckVolumeGroup([self.instance.primary_node])
11415 # Step: check other node consistency
11416 self.lu.LogStep(2, steps_total, "Check peer consistency")
11417 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11419 # Step: create new storage
11420 self.lu.LogStep(3, steps_total, "Allocate new storage")
11421 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11422 for idx, dev in enumerate(disks):
11423 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11424 (self.new_node, idx))
11425 # we pass force_create=True to force LVM creation
11426 for new_lv in dev.children:
11427 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11428 True, _GetInstanceInfoText(self.instance), False)
11430 # Step 4: dbrd minors and drbd setups changes
11431 # after this, we must manually remove the drbd minors on both the
11432 # error and the success paths
11433 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11434 minors = self.cfg.AllocateDRBDMinor([self.new_node
11435 for dev in self.instance.disks],
11436 self.instance.name)
11437 logging.debug("Allocated minors %r", minors)
11440 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11441 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11442 (self.new_node, idx))
11443 # create new devices on new_node; note that we create two IDs:
11444 # one without port, so the drbd will be activated without
11445 # networking information on the new node at this stage, and one
11446 # with network, for the latter activation in step 4
11447 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11448 if self.instance.primary_node == o_node1:
11451 assert self.instance.primary_node == o_node2, "Three-node instance?"
11454 new_alone_id = (self.instance.primary_node, self.new_node, None,
11455 p_minor, new_minor, o_secret)
11456 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11457 p_minor, new_minor, o_secret)
11459 iv_names[idx] = (dev, dev.children, new_net_id)
11460 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11462 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11463 logical_id=new_alone_id,
11464 children=dev.children,
11467 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11470 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11472 _GetInstanceInfoText(self.instance), False)
11473 except errors.GenericError:
11474 self.cfg.ReleaseDRBDMinors(self.instance.name)
11477 # We have new devices, shutdown the drbd on the old secondary
11478 for idx, dev in enumerate(self.instance.disks):
11479 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11480 self.cfg.SetDiskID(dev, self.target_node)
11481 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11482 (dev, self.instance)).fail_msg
11484 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11485 "node: %s" % (idx, msg),
11486 hint=("Please cleanup this device manually as"
11487 " soon as possible"))
11489 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11490 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11491 self.instance.disks)[pnode]
11493 msg = result.fail_msg
11495 # detaches didn't succeed (unlikely)
11496 self.cfg.ReleaseDRBDMinors(self.instance.name)
11497 raise errors.OpExecError("Can't detach the disks from the network on"
11498 " old node: %s" % (msg,))
11500 # if we managed to detach at least one, we update all the disks of
11501 # the instance to point to the new secondary
11502 self.lu.LogInfo("Updating instance configuration")
11503 for dev, _, new_logical_id in iv_names.itervalues():
11504 dev.logical_id = new_logical_id
11505 self.cfg.SetDiskID(dev, self.instance.primary_node)
11507 self.cfg.Update(self.instance, feedback_fn)
11509 # Release all node locks (the configuration has been updated)
11510 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11512 # and now perform the drbd attach
11513 self.lu.LogInfo("Attaching primary drbds to new secondary"
11514 " (standalone => connected)")
11515 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11517 self.node_secondary_ip,
11518 (self.instance.disks, self.instance),
11519 self.instance.name,
11521 for to_node, to_result in result.items():
11522 msg = to_result.fail_msg
11524 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11526 hint=("please do a gnt-instance info to see the"
11527 " status of disks"))
11529 cstep = itertools.count(5)
11531 if self.early_release:
11532 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11533 self._RemoveOldStorage(self.target_node, iv_names)
11534 # TODO: Check if releasing locks early still makes sense
11535 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11537 # Release all resource locks except those used by the instance
11538 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11539 keep=self.node_secondary_ip.keys())
11541 # TODO: Can the instance lock be downgraded here? Take the optional disk
11542 # shutdown in the caller into consideration.
11545 # This can fail as the old devices are degraded and _WaitForSync
11546 # does a combined result over all disks, so we don't check its return value
11547 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11548 _WaitForSync(self.lu, self.instance)
11550 # Check all devices manually
11551 self._CheckDevices(self.instance.primary_node, iv_names)
11553 # Step: remove old storage
11554 if not self.early_release:
11555 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11556 self._RemoveOldStorage(self.target_node, iv_names)
11559 class LURepairNodeStorage(NoHooksLU):
11560 """Repairs the volume group on a node.
11565 def CheckArguments(self):
11566 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11568 storage_type = self.op.storage_type
11570 if (constants.SO_FIX_CONSISTENCY not in
11571 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11572 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11573 " repaired" % storage_type,
11574 errors.ECODE_INVAL)
11576 def ExpandNames(self):
11577 self.needed_locks = {
11578 locking.LEVEL_NODE: [self.op.node_name],
11581 def _CheckFaultyDisks(self, instance, node_name):
11582 """Ensure faulty disks abort the opcode or at least warn."""
11584 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11586 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11587 " node '%s'" % (instance.name, node_name),
11588 errors.ECODE_STATE)
11589 except errors.OpPrereqError, err:
11590 if self.op.ignore_consistency:
11591 self.proc.LogWarning(str(err.args[0]))
11595 def CheckPrereq(self):
11596 """Check prerequisites.
11599 # Check whether any instance on this node has faulty disks
11600 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11601 if inst.admin_state != constants.ADMINST_UP:
11603 check_nodes = set(inst.all_nodes)
11604 check_nodes.discard(self.op.node_name)
11605 for inst_node_name in check_nodes:
11606 self._CheckFaultyDisks(inst, inst_node_name)
11608 def Exec(self, feedback_fn):
11609 feedback_fn("Repairing storage unit '%s' on %s ..." %
11610 (self.op.name, self.op.node_name))
11612 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11613 result = self.rpc.call_storage_execute(self.op.node_name,
11614 self.op.storage_type, st_args,
11616 constants.SO_FIX_CONSISTENCY)
11617 result.Raise("Failed to repair storage unit '%s' on %s" %
11618 (self.op.name, self.op.node_name))
11621 class LUNodeEvacuate(NoHooksLU):
11622 """Evacuates instances off a list of nodes.
11627 _MODE2IALLOCATOR = {
11628 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11629 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11630 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11632 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11633 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11634 constants.IALLOCATOR_NEVAC_MODES)
11636 def CheckArguments(self):
11637 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11639 def ExpandNames(self):
11640 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11642 if self.op.remote_node is not None:
11643 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11644 assert self.op.remote_node
11646 if self.op.remote_node == self.op.node_name:
11647 raise errors.OpPrereqError("Can not use evacuated node as a new"
11648 " secondary node", errors.ECODE_INVAL)
11650 if self.op.mode != constants.NODE_EVAC_SEC:
11651 raise errors.OpPrereqError("Without the use of an iallocator only"
11652 " secondary instances can be evacuated",
11653 errors.ECODE_INVAL)
11656 self.share_locks = _ShareAll()
11657 self.needed_locks = {
11658 locking.LEVEL_INSTANCE: [],
11659 locking.LEVEL_NODEGROUP: [],
11660 locking.LEVEL_NODE: [],
11663 # Determine nodes (via group) optimistically, needs verification once locks
11664 # have been acquired
11665 self.lock_nodes = self._DetermineNodes()
11667 def _DetermineNodes(self):
11668 """Gets the list of nodes to operate on.
11671 if self.op.remote_node is None:
11672 # Iallocator will choose any node(s) in the same group
11673 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11675 group_nodes = frozenset([self.op.remote_node])
11677 # Determine nodes to be locked
11678 return set([self.op.node_name]) | group_nodes
11680 def _DetermineInstances(self):
11681 """Builds list of instances to operate on.
11684 assert self.op.mode in constants.NODE_EVAC_MODES
11686 if self.op.mode == constants.NODE_EVAC_PRI:
11687 # Primary instances only
11688 inst_fn = _GetNodePrimaryInstances
11689 assert self.op.remote_node is None, \
11690 "Evacuating primary instances requires iallocator"
11691 elif self.op.mode == constants.NODE_EVAC_SEC:
11692 # Secondary instances only
11693 inst_fn = _GetNodeSecondaryInstances
11696 assert self.op.mode == constants.NODE_EVAC_ALL
11697 inst_fn = _GetNodeInstances
11698 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11700 raise errors.OpPrereqError("Due to an issue with the iallocator"
11701 " interface it is not possible to evacuate"
11702 " all instances at once; specify explicitly"
11703 " whether to evacuate primary or secondary"
11705 errors.ECODE_INVAL)
11707 return inst_fn(self.cfg, self.op.node_name)
11709 def DeclareLocks(self, level):
11710 if level == locking.LEVEL_INSTANCE:
11711 # Lock instances optimistically, needs verification once node and group
11712 # locks have been acquired
11713 self.needed_locks[locking.LEVEL_INSTANCE] = \
11714 set(i.name for i in self._DetermineInstances())
11716 elif level == locking.LEVEL_NODEGROUP:
11717 # Lock node groups for all potential target nodes optimistically, needs
11718 # verification once nodes have been acquired
11719 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11720 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11722 elif level == locking.LEVEL_NODE:
11723 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11725 def CheckPrereq(self):
11727 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11728 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11729 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11731 need_nodes = self._DetermineNodes()
11733 if not owned_nodes.issuperset(need_nodes):
11734 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11735 " locks were acquired, current nodes are"
11736 " are '%s', used to be '%s'; retry the"
11738 (self.op.node_name,
11739 utils.CommaJoin(need_nodes),
11740 utils.CommaJoin(owned_nodes)),
11741 errors.ECODE_STATE)
11743 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11744 if owned_groups != wanted_groups:
11745 raise errors.OpExecError("Node groups changed since locks were acquired,"
11746 " current groups are '%s', used to be '%s';"
11747 " retry the operation" %
11748 (utils.CommaJoin(wanted_groups),
11749 utils.CommaJoin(owned_groups)))
11751 # Determine affected instances
11752 self.instances = self._DetermineInstances()
11753 self.instance_names = [i.name for i in self.instances]
11755 if set(self.instance_names) != owned_instances:
11756 raise errors.OpExecError("Instances on node '%s' changed since locks"
11757 " were acquired, current instances are '%s',"
11758 " used to be '%s'; retry the operation" %
11759 (self.op.node_name,
11760 utils.CommaJoin(self.instance_names),
11761 utils.CommaJoin(owned_instances)))
11763 if self.instance_names:
11764 self.LogInfo("Evacuating instances from node '%s': %s",
11766 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11768 self.LogInfo("No instances to evacuate from node '%s'",
11771 if self.op.remote_node is not None:
11772 for i in self.instances:
11773 if i.primary_node == self.op.remote_node:
11774 raise errors.OpPrereqError("Node %s is the primary node of"
11775 " instance %s, cannot use it as"
11777 (self.op.remote_node, i.name),
11778 errors.ECODE_INVAL)
11780 def Exec(self, feedback_fn):
11781 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11783 if not self.instance_names:
11784 # No instances to evacuate
11787 elif self.op.iallocator is not None:
11788 # TODO: Implement relocation to other group
11789 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11790 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11791 instances=list(self.instance_names))
11792 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11794 ial.Run(self.op.iallocator)
11796 if not ial.success:
11797 raise errors.OpPrereqError("Can't compute node evacuation using"
11798 " iallocator '%s': %s" %
11799 (self.op.iallocator, ial.info),
11800 errors.ECODE_NORES)
11802 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11804 elif self.op.remote_node is not None:
11805 assert self.op.mode == constants.NODE_EVAC_SEC
11807 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11808 remote_node=self.op.remote_node,
11810 mode=constants.REPLACE_DISK_CHG,
11811 early_release=self.op.early_release)]
11812 for instance_name in self.instance_names
11816 raise errors.ProgrammerError("No iallocator or remote node")
11818 return ResultWithJobs(jobs)
11821 def _SetOpEarlyRelease(early_release, op):
11822 """Sets C{early_release} flag on opcodes if available.
11826 op.early_release = early_release
11827 except AttributeError:
11828 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11833 def _NodeEvacDest(use_nodes, group, nodes):
11834 """Returns group or nodes depending on caller's choice.
11838 return utils.CommaJoin(nodes)
11843 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11844 """Unpacks the result of change-group and node-evacuate iallocator requests.
11846 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11847 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11849 @type lu: L{LogicalUnit}
11850 @param lu: Logical unit instance
11851 @type alloc_result: tuple/list
11852 @param alloc_result: Result from iallocator
11853 @type early_release: bool
11854 @param early_release: Whether to release locks early if possible
11855 @type use_nodes: bool
11856 @param use_nodes: Whether to display node names instead of groups
11859 (moved, failed, jobs) = alloc_result
11862 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11863 for (name, reason) in failed)
11864 lu.LogWarning("Unable to evacuate instances %s", failreason)
11865 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11868 lu.LogInfo("Instances to be moved: %s",
11869 utils.CommaJoin("%s (to %s)" %
11870 (name, _NodeEvacDest(use_nodes, group, nodes))
11871 for (name, group, nodes) in moved))
11873 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11874 map(opcodes.OpCode.LoadOpCode, ops))
11878 def _DiskSizeInBytesToMebibytes(lu, size):
11879 """Converts a disk size in bytes to mebibytes.
11881 Warns and rounds up if the size isn't an even multiple of 1 MiB.
11884 (mib, remainder) = divmod(size, 1024 * 1024)
11887 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
11888 " to not overwrite existing data (%s bytes will not be"
11889 " wiped)", (1024 * 1024) - remainder)
11895 class LUInstanceGrowDisk(LogicalUnit):
11896 """Grow a disk of an instance.
11899 HPATH = "disk-grow"
11900 HTYPE = constants.HTYPE_INSTANCE
11903 def ExpandNames(self):
11904 self._ExpandAndLockInstance()
11905 self.needed_locks[locking.LEVEL_NODE] = []
11906 self.needed_locks[locking.LEVEL_NODE_RES] = []
11907 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11908 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11910 def DeclareLocks(self, level):
11911 if level == locking.LEVEL_NODE:
11912 self._LockInstancesNodes()
11913 elif level == locking.LEVEL_NODE_RES:
11915 self.needed_locks[locking.LEVEL_NODE_RES] = \
11916 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11918 def BuildHooksEnv(self):
11919 """Build hooks env.
11921 This runs on the master, the primary and all the secondaries.
11925 "DISK": self.op.disk,
11926 "AMOUNT": self.op.amount,
11927 "ABSOLUTE": self.op.absolute,
11929 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11932 def BuildHooksNodes(self):
11933 """Build hooks nodes.
11936 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11939 def CheckPrereq(self):
11940 """Check prerequisites.
11942 This checks that the instance is in the cluster.
11945 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11946 assert instance is not None, \
11947 "Cannot retrieve locked instance %s" % self.op.instance_name
11948 nodenames = list(instance.all_nodes)
11949 for node in nodenames:
11950 _CheckNodeOnline(self, node)
11952 self.instance = instance
11954 if instance.disk_template not in constants.DTS_GROWABLE:
11955 raise errors.OpPrereqError("Instance's disk layout does not support"
11956 " growing", errors.ECODE_INVAL)
11958 self.disk = instance.FindDisk(self.op.disk)
11960 if self.op.absolute:
11961 self.target = self.op.amount
11962 self.delta = self.target - self.disk.size
11964 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11965 "current disk size (%s)" %
11966 (utils.FormatUnit(self.target, "h"),
11967 utils.FormatUnit(self.disk.size, "h")),
11968 errors.ECODE_STATE)
11970 self.delta = self.op.amount
11971 self.target = self.disk.size + self.delta
11973 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11974 utils.FormatUnit(self.delta, "h"),
11975 errors.ECODE_INVAL)
11977 if instance.disk_template not in (constants.DT_FILE,
11978 constants.DT_SHARED_FILE,
11980 # TODO: check the free disk space for file, when that feature will be
11982 _CheckNodesFreeDiskPerVG(self, nodenames,
11983 self.disk.ComputeGrowth(self.delta))
11985 def Exec(self, feedback_fn):
11986 """Execute disk grow.
11989 instance = self.instance
11992 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11993 assert (self.owned_locks(locking.LEVEL_NODE) ==
11994 self.owned_locks(locking.LEVEL_NODE_RES))
11996 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
11998 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12000 raise errors.OpExecError("Cannot activate block device to grow")
12002 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12003 (self.op.disk, instance.name,
12004 utils.FormatUnit(self.delta, "h"),
12005 utils.FormatUnit(self.target, "h")))
12007 # First run all grow ops in dry-run mode
12008 for node in instance.all_nodes:
12009 self.cfg.SetDiskID(disk, node)
12010 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12012 result.Raise("Dry-run grow request failed to node %s" % node)
12015 # Get disk size from primary node for wiping
12016 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12017 result.Raise("Failed to retrieve disk size from node '%s'" %
12018 instance.primary_node)
12020 (disk_size_in_bytes, ) = result.payload
12022 if disk_size_in_bytes is None:
12023 raise errors.OpExecError("Failed to retrieve disk size from primary"
12024 " node '%s'" % instance.primary_node)
12026 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12028 assert old_disk_size >= disk.size, \
12029 ("Retrieved disk size too small (got %s, should be at least %s)" %
12030 (old_disk_size, disk.size))
12032 old_disk_size = None
12034 # We know that (as far as we can test) operations across different
12035 # nodes will succeed, time to run it for real on the backing storage
12036 for node in instance.all_nodes:
12037 self.cfg.SetDiskID(disk, node)
12038 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12040 result.Raise("Grow request failed to node %s" % node)
12042 # And now execute it for logical storage, on the primary node
12043 node = instance.primary_node
12044 self.cfg.SetDiskID(disk, node)
12045 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12047 result.Raise("Grow request failed to node %s" % node)
12049 disk.RecordGrow(self.delta)
12050 self.cfg.Update(instance, feedback_fn)
12052 # Changes have been recorded, release node lock
12053 _ReleaseLocks(self, locking.LEVEL_NODE)
12055 # Downgrade lock while waiting for sync
12056 self.glm.downgrade(locking.LEVEL_INSTANCE)
12058 assert wipe_disks ^ (old_disk_size is None)
12061 assert instance.disks[self.op.disk] == disk
12063 # Wipe newly added disk space
12064 _WipeDisks(self, instance,
12065 disks=[(self.op.disk, disk, old_disk_size)])
12067 if self.op.wait_for_sync:
12068 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12070 self.proc.LogWarning("Disk sync-ing has not returned a good"
12071 " status; please check the instance")
12072 if instance.admin_state != constants.ADMINST_UP:
12073 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12074 elif instance.admin_state != constants.ADMINST_UP:
12075 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12076 " not supposed to be running because no wait for"
12077 " sync mode was requested")
12079 assert self.owned_locks(locking.LEVEL_NODE_RES)
12080 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12083 class LUInstanceQueryData(NoHooksLU):
12084 """Query runtime instance data.
12089 def ExpandNames(self):
12090 self.needed_locks = {}
12092 # Use locking if requested or when non-static information is wanted
12093 if not (self.op.static or self.op.use_locking):
12094 self.LogWarning("Non-static data requested, locks need to be acquired")
12095 self.op.use_locking = True
12097 if self.op.instances or not self.op.use_locking:
12098 # Expand instance names right here
12099 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12101 # Will use acquired locks
12102 self.wanted_names = None
12104 if self.op.use_locking:
12105 self.share_locks = _ShareAll()
12107 if self.wanted_names is None:
12108 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12110 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12112 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12113 self.needed_locks[locking.LEVEL_NODE] = []
12114 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12116 def DeclareLocks(self, level):
12117 if self.op.use_locking:
12118 if level == locking.LEVEL_NODEGROUP:
12119 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12121 # Lock all groups used by instances optimistically; this requires going
12122 # via the node before it's locked, requiring verification later on
12123 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12124 frozenset(group_uuid
12125 for instance_name in owned_instances
12127 self.cfg.GetInstanceNodeGroups(instance_name))
12129 elif level == locking.LEVEL_NODE:
12130 self._LockInstancesNodes()
12132 def CheckPrereq(self):
12133 """Check prerequisites.
12135 This only checks the optional instance list against the existing names.
12138 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12139 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12140 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12142 if self.wanted_names is None:
12143 assert self.op.use_locking, "Locking was not used"
12144 self.wanted_names = owned_instances
12146 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12148 if self.op.use_locking:
12149 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12152 assert not (owned_instances or owned_groups or owned_nodes)
12154 self.wanted_instances = instances.values()
12156 def _ComputeBlockdevStatus(self, node, instance, dev):
12157 """Returns the status of a block device
12160 if self.op.static or not node:
12163 self.cfg.SetDiskID(dev, node)
12165 result = self.rpc.call_blockdev_find(node, dev)
12169 result.Raise("Can't compute disk status for %s" % instance.name)
12171 status = result.payload
12175 return (status.dev_path, status.major, status.minor,
12176 status.sync_percent, status.estimated_time,
12177 status.is_degraded, status.ldisk_status)
12179 def _ComputeDiskStatus(self, instance, snode, dev):
12180 """Compute block device status.
12183 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12185 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12187 def _ComputeDiskStatusInner(self, instance, snode, dev):
12188 """Compute block device status.
12190 @attention: The device has to be annotated already.
12193 if dev.dev_type in constants.LDS_DRBD:
12194 # we change the snode then (otherwise we use the one passed in)
12195 if dev.logical_id[0] == instance.primary_node:
12196 snode = dev.logical_id[1]
12198 snode = dev.logical_id[0]
12200 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12202 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12205 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12212 "iv_name": dev.iv_name,
12213 "dev_type": dev.dev_type,
12214 "logical_id": dev.logical_id,
12215 "physical_id": dev.physical_id,
12216 "pstatus": dev_pstatus,
12217 "sstatus": dev_sstatus,
12218 "children": dev_children,
12223 def Exec(self, feedback_fn):
12224 """Gather and return data"""
12227 cluster = self.cfg.GetClusterInfo()
12229 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12230 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12232 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12233 for node in nodes.values()))
12235 group2name_fn = lambda uuid: groups[uuid].name
12237 for instance in self.wanted_instances:
12238 pnode = nodes[instance.primary_node]
12240 if self.op.static or pnode.offline:
12241 remote_state = None
12243 self.LogWarning("Primary node %s is marked offline, returning static"
12244 " information only for instance %s" %
12245 (pnode.name, instance.name))
12247 remote_info = self.rpc.call_instance_info(instance.primary_node,
12249 instance.hypervisor)
12250 remote_info.Raise("Error checking node %s" % instance.primary_node)
12251 remote_info = remote_info.payload
12252 if remote_info and "state" in remote_info:
12253 remote_state = "up"
12255 if instance.admin_state == constants.ADMINST_UP:
12256 remote_state = "down"
12258 remote_state = instance.admin_state
12260 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12263 snodes_group_uuids = [nodes[snode_name].group
12264 for snode_name in instance.secondary_nodes]
12266 result[instance.name] = {
12267 "name": instance.name,
12268 "config_state": instance.admin_state,
12269 "run_state": remote_state,
12270 "pnode": instance.primary_node,
12271 "pnode_group_uuid": pnode.group,
12272 "pnode_group_name": group2name_fn(pnode.group),
12273 "snodes": instance.secondary_nodes,
12274 "snodes_group_uuids": snodes_group_uuids,
12275 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12277 # this happens to be the same format used for hooks
12278 "nics": _NICListToTuple(self, instance.nics),
12279 "disk_template": instance.disk_template,
12281 "hypervisor": instance.hypervisor,
12282 "network_port": instance.network_port,
12283 "hv_instance": instance.hvparams,
12284 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12285 "be_instance": instance.beparams,
12286 "be_actual": cluster.FillBE(instance),
12287 "os_instance": instance.osparams,
12288 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12289 "serial_no": instance.serial_no,
12290 "mtime": instance.mtime,
12291 "ctime": instance.ctime,
12292 "uuid": instance.uuid,
12298 def PrepareContainerMods(mods, private_fn):
12299 """Prepares a list of container modifications by adding a private data field.
12301 @type mods: list of tuples; (operation, index, parameters)
12302 @param mods: List of modifications
12303 @type private_fn: callable or None
12304 @param private_fn: Callable for constructing a private data field for a
12309 if private_fn is None:
12314 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12317 #: Type description for changes as returned by L{ApplyContainerMods}'s
12319 _TApplyContModsCbChanges = \
12320 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12321 ht.TNonEmptyString,
12326 def ApplyContainerMods(kind, container, chgdesc, mods,
12327 create_fn, modify_fn, remove_fn):
12328 """Applies descriptions in C{mods} to C{container}.
12331 @param kind: One-word item description
12332 @type container: list
12333 @param container: Container to modify
12334 @type chgdesc: None or list
12335 @param chgdesc: List of applied changes
12337 @param mods: Modifications as returned by L{PrepareContainerMods}
12338 @type create_fn: callable
12339 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12340 receives absolute item index, parameters and private data object as added
12341 by L{PrepareContainerMods}, returns tuple containing new item and changes
12343 @type modify_fn: callable
12344 @param modify_fn: Callback for modifying an existing item
12345 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12346 and private data object as added by L{PrepareContainerMods}, returns
12348 @type remove_fn: callable
12349 @param remove_fn: Callback on removing item; receives absolute item index,
12350 item and private data object as added by L{PrepareContainerMods}
12353 for (op, idx, params, private) in mods:
12356 absidx = len(container) - 1
12358 raise IndexError("Not accepting negative indices other than -1")
12359 elif idx > len(container):
12360 raise IndexError("Got %s index %s, but there are only %s" %
12361 (kind, idx, len(container)))
12367 if op == constants.DDM_ADD:
12368 # Calculate where item will be added
12370 addidx = len(container)
12374 if create_fn is None:
12377 (item, changes) = create_fn(addidx, params, private)
12380 container.append(item)
12383 assert idx <= len(container)
12384 # list.insert does so before the specified index
12385 container.insert(idx, item)
12387 # Retrieve existing item
12389 item = container[absidx]
12391 raise IndexError("Invalid %s index %s" % (kind, idx))
12393 if op == constants.DDM_REMOVE:
12396 if remove_fn is not None:
12397 remove_fn(absidx, item, private)
12399 changes = [("%s/%s" % (kind, absidx), "remove")]
12401 assert container[absidx] == item
12402 del container[absidx]
12403 elif op == constants.DDM_MODIFY:
12404 if modify_fn is not None:
12405 changes = modify_fn(absidx, item, params, private)
12407 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12409 assert _TApplyContModsCbChanges(changes)
12411 if not (chgdesc is None or changes is None):
12412 chgdesc.extend(changes)
12415 def _UpdateIvNames(base_index, disks):
12416 """Updates the C{iv_name} attribute of disks.
12418 @type disks: list of L{objects.Disk}
12421 for (idx, disk) in enumerate(disks):
12422 disk.iv_name = "disk/%s" % (base_index + idx, )
12425 class _InstNicModPrivate:
12426 """Data structure for network interface modifications.
12428 Used by L{LUInstanceSetParams}.
12431 def __init__(self):
12436 class LUInstanceSetParams(LogicalUnit):
12437 """Modifies an instances's parameters.
12440 HPATH = "instance-modify"
12441 HTYPE = constants.HTYPE_INSTANCE
12445 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12446 assert ht.TList(mods)
12447 assert not mods or len(mods[0]) in (2, 3)
12449 if mods and len(mods[0]) == 2:
12453 for op, params in mods:
12454 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12455 result.append((op, -1, params))
12459 raise errors.OpPrereqError("Only one %s add or remove operation is"
12460 " supported at a time" % kind,
12461 errors.ECODE_INVAL)
12463 result.append((constants.DDM_MODIFY, op, params))
12465 assert verify_fn(result)
12472 def _CheckMods(kind, mods, key_types, item_fn):
12473 """Ensures requested disk/NIC modifications are valid.
12476 for (op, _, params) in mods:
12477 assert ht.TDict(params)
12479 utils.ForceDictType(params, key_types)
12481 if op == constants.DDM_REMOVE:
12483 raise errors.OpPrereqError("No settings should be passed when"
12484 " removing a %s" % kind,
12485 errors.ECODE_INVAL)
12486 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12487 item_fn(op, params)
12489 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12492 def _VerifyDiskModification(op, params):
12493 """Verifies a disk modification.
12496 if op == constants.DDM_ADD:
12497 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12498 if mode not in constants.DISK_ACCESS_SET:
12499 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12500 errors.ECODE_INVAL)
12502 size = params.get(constants.IDISK_SIZE, None)
12504 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12505 constants.IDISK_SIZE, errors.ECODE_INVAL)
12509 except (TypeError, ValueError), err:
12510 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12511 errors.ECODE_INVAL)
12513 params[constants.IDISK_SIZE] = size
12515 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12516 raise errors.OpPrereqError("Disk size change not possible, use"
12517 " grow-disk", errors.ECODE_INVAL)
12520 def _VerifyNicModification(op, params):
12521 """Verifies a network interface modification.
12524 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12525 ip = params.get(constants.INIC_IP, None)
12528 elif ip.lower() == constants.VALUE_NONE:
12529 params[constants.INIC_IP] = None
12530 elif not netutils.IPAddress.IsValid(ip):
12531 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12532 errors.ECODE_INVAL)
12534 bridge = params.get("bridge", None)
12535 link = params.get(constants.INIC_LINK, None)
12536 if bridge and link:
12537 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12538 " at the same time", errors.ECODE_INVAL)
12539 elif bridge and bridge.lower() == constants.VALUE_NONE:
12540 params["bridge"] = None
12541 elif link and link.lower() == constants.VALUE_NONE:
12542 params[constants.INIC_LINK] = None
12544 if op == constants.DDM_ADD:
12545 macaddr = params.get(constants.INIC_MAC, None)
12546 if macaddr is None:
12547 params[constants.INIC_MAC] = constants.VALUE_AUTO
12549 if constants.INIC_MAC in params:
12550 macaddr = params[constants.INIC_MAC]
12551 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12552 macaddr = utils.NormalizeAndValidateMac(macaddr)
12554 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12555 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12556 " modifying an existing NIC",
12557 errors.ECODE_INVAL)
12559 def CheckArguments(self):
12560 if not (self.op.nics or self.op.disks or self.op.disk_template or
12561 self.op.hvparams or self.op.beparams or self.op.os_name or
12562 self.op.offline is not None or self.op.runtime_mem):
12563 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12565 if self.op.hvparams:
12566 _CheckGlobalHvParams(self.op.hvparams)
12568 self.op.disks = self._UpgradeDiskNicMods(
12569 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12570 self.op.nics = self._UpgradeDiskNicMods(
12571 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12573 # Check disk modifications
12574 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12575 self._VerifyDiskModification)
12577 if self.op.disks and self.op.disk_template is not None:
12578 raise errors.OpPrereqError("Disk template conversion and other disk"
12579 " changes not supported at the same time",
12580 errors.ECODE_INVAL)
12582 if (self.op.disk_template and
12583 self.op.disk_template in constants.DTS_INT_MIRROR and
12584 self.op.remote_node is None):
12585 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12586 " one requires specifying a secondary node",
12587 errors.ECODE_INVAL)
12589 # Check NIC modifications
12590 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12591 self._VerifyNicModification)
12593 def ExpandNames(self):
12594 self._ExpandAndLockInstance()
12595 # Can't even acquire node locks in shared mode as upcoming changes in
12596 # Ganeti 2.6 will start to modify the node object on disk conversion
12597 self.needed_locks[locking.LEVEL_NODE] = []
12598 self.needed_locks[locking.LEVEL_NODE_RES] = []
12599 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12601 def DeclareLocks(self, level):
12602 # TODO: Acquire group lock in shared mode (disk parameters)
12603 if level == locking.LEVEL_NODE:
12604 self._LockInstancesNodes()
12605 if self.op.disk_template and self.op.remote_node:
12606 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12607 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12608 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12610 self.needed_locks[locking.LEVEL_NODE_RES] = \
12611 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12613 def BuildHooksEnv(self):
12614 """Build hooks env.
12616 This runs on the master, primary and secondaries.
12620 if constants.BE_MINMEM in self.be_new:
12621 args["minmem"] = self.be_new[constants.BE_MINMEM]
12622 if constants.BE_MAXMEM in self.be_new:
12623 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12624 if constants.BE_VCPUS in self.be_new:
12625 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12626 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12627 # information at all.
12629 if self._new_nics is not None:
12632 for nic in self._new_nics:
12633 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12634 mode = nicparams[constants.NIC_MODE]
12635 link = nicparams[constants.NIC_LINK]
12636 nics.append((nic.ip, nic.mac, mode, link))
12638 args["nics"] = nics
12640 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12641 if self.op.disk_template:
12642 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12643 if self.op.runtime_mem:
12644 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12648 def BuildHooksNodes(self):
12649 """Build hooks nodes.
12652 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12655 def _PrepareNicModification(self, params, private, old_ip, old_params,
12657 update_params_dict = dict([(key, params[key])
12658 for key in constants.NICS_PARAMETERS
12661 if "bridge" in params:
12662 update_params_dict[constants.NIC_LINK] = params["bridge"]
12664 new_params = _GetUpdatedParams(old_params, update_params_dict)
12665 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12667 new_filled_params = cluster.SimpleFillNIC(new_params)
12668 objects.NIC.CheckParameterSyntax(new_filled_params)
12670 new_mode = new_filled_params[constants.NIC_MODE]
12671 if new_mode == constants.NIC_MODE_BRIDGED:
12672 bridge = new_filled_params[constants.NIC_LINK]
12673 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12675 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12677 self.warn.append(msg)
12679 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12681 elif new_mode == constants.NIC_MODE_ROUTED:
12682 ip = params.get(constants.INIC_IP, old_ip)
12684 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12685 " on a routed NIC", errors.ECODE_INVAL)
12687 if constants.INIC_MAC in params:
12688 mac = params[constants.INIC_MAC]
12690 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12691 errors.ECODE_INVAL)
12692 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12693 # otherwise generate the MAC address
12694 params[constants.INIC_MAC] = \
12695 self.cfg.GenerateMAC(self.proc.GetECId())
12697 # or validate/reserve the current one
12699 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12700 except errors.ReservationError:
12701 raise errors.OpPrereqError("MAC address '%s' already in use"
12702 " in cluster" % mac,
12703 errors.ECODE_NOTUNIQUE)
12705 private.params = new_params
12706 private.filled = new_filled_params
12708 def CheckPrereq(self):
12709 """Check prerequisites.
12711 This only checks the instance list against the existing names.
12714 # checking the new params on the primary/secondary nodes
12716 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12717 cluster = self.cluster = self.cfg.GetClusterInfo()
12718 assert self.instance is not None, \
12719 "Cannot retrieve locked instance %s" % self.op.instance_name
12720 pnode = instance.primary_node
12721 nodelist = list(instance.all_nodes)
12722 pnode_info = self.cfg.GetNodeInfo(pnode)
12723 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12725 # Prepare disk/NIC modifications
12726 self.diskmod = PrepareContainerMods(self.op.disks, None)
12727 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12730 if self.op.os_name and not self.op.force:
12731 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12732 self.op.force_variant)
12733 instance_os = self.op.os_name
12735 instance_os = instance.os
12737 assert not (self.op.disk_template and self.op.disks), \
12738 "Can't modify disk template and apply disk changes at the same time"
12740 if self.op.disk_template:
12741 if instance.disk_template == self.op.disk_template:
12742 raise errors.OpPrereqError("Instance already has disk template %s" %
12743 instance.disk_template, errors.ECODE_INVAL)
12745 if (instance.disk_template,
12746 self.op.disk_template) not in self._DISK_CONVERSIONS:
12747 raise errors.OpPrereqError("Unsupported disk template conversion from"
12748 " %s to %s" % (instance.disk_template,
12749 self.op.disk_template),
12750 errors.ECODE_INVAL)
12751 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12752 msg="cannot change disk template")
12753 if self.op.disk_template in constants.DTS_INT_MIRROR:
12754 if self.op.remote_node == pnode:
12755 raise errors.OpPrereqError("Given new secondary node %s is the same"
12756 " as the primary node of the instance" %
12757 self.op.remote_node, errors.ECODE_STATE)
12758 _CheckNodeOnline(self, self.op.remote_node)
12759 _CheckNodeNotDrained(self, self.op.remote_node)
12760 # FIXME: here we assume that the old instance type is DT_PLAIN
12761 assert instance.disk_template == constants.DT_PLAIN
12762 disks = [{constants.IDISK_SIZE: d.size,
12763 constants.IDISK_VG: d.logical_id[0]}
12764 for d in instance.disks]
12765 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12766 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12768 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12769 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12770 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12772 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12773 ignore=self.op.ignore_ipolicy)
12774 if pnode_info.group != snode_info.group:
12775 self.LogWarning("The primary and secondary nodes are in two"
12776 " different node groups; the disk parameters"
12777 " from the first disk's node group will be"
12780 # hvparams processing
12781 if self.op.hvparams:
12782 hv_type = instance.hypervisor
12783 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12784 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12785 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12788 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12789 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12790 self.hv_proposed = self.hv_new = hv_new # the new actual values
12791 self.hv_inst = i_hvdict # the new dict (without defaults)
12793 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12795 self.hv_new = self.hv_inst = {}
12797 # beparams processing
12798 if self.op.beparams:
12799 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12801 objects.UpgradeBeParams(i_bedict)
12802 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12803 be_new = cluster.SimpleFillBE(i_bedict)
12804 self.be_proposed = self.be_new = be_new # the new actual values
12805 self.be_inst = i_bedict # the new dict (without defaults)
12807 self.be_new = self.be_inst = {}
12808 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12809 be_old = cluster.FillBE(instance)
12811 # CPU param validation -- checking every time a parameter is
12812 # changed to cover all cases where either CPU mask or vcpus have
12814 if (constants.BE_VCPUS in self.be_proposed and
12815 constants.HV_CPU_MASK in self.hv_proposed):
12817 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12818 # Verify mask is consistent with number of vCPUs. Can skip this
12819 # test if only 1 entry in the CPU mask, which means same mask
12820 # is applied to all vCPUs.
12821 if (len(cpu_list) > 1 and
12822 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12823 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12825 (self.be_proposed[constants.BE_VCPUS],
12826 self.hv_proposed[constants.HV_CPU_MASK]),
12827 errors.ECODE_INVAL)
12829 # Only perform this test if a new CPU mask is given
12830 if constants.HV_CPU_MASK in self.hv_new:
12831 # Calculate the largest CPU number requested
12832 max_requested_cpu = max(map(max, cpu_list))
12833 # Check that all of the instance's nodes have enough physical CPUs to
12834 # satisfy the requested CPU mask
12835 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12836 max_requested_cpu + 1, instance.hypervisor)
12838 # osparams processing
12839 if self.op.osparams:
12840 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12841 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12842 self.os_inst = i_osdict # the new dict (without defaults)
12848 #TODO(dynmem): do the appropriate check involving MINMEM
12849 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12850 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12851 mem_check_list = [pnode]
12852 if be_new[constants.BE_AUTO_BALANCE]:
12853 # either we changed auto_balance to yes or it was from before
12854 mem_check_list.extend(instance.secondary_nodes)
12855 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12856 instance.hypervisor)
12857 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12858 [instance.hypervisor])
12859 pninfo = nodeinfo[pnode]
12860 msg = pninfo.fail_msg
12862 # Assume the primary node is unreachable and go ahead
12863 self.warn.append("Can't get info from primary node %s: %s" %
12866 (_, _, (pnhvinfo, )) = pninfo.payload
12867 if not isinstance(pnhvinfo.get("memory_free", None), int):
12868 self.warn.append("Node data from primary node %s doesn't contain"
12869 " free memory information" % pnode)
12870 elif instance_info.fail_msg:
12871 self.warn.append("Can't get instance runtime information: %s" %
12872 instance_info.fail_msg)
12874 if instance_info.payload:
12875 current_mem = int(instance_info.payload["memory"])
12877 # Assume instance not running
12878 # (there is a slight race condition here, but it's not very
12879 # probable, and we have no other way to check)
12880 # TODO: Describe race condition
12882 #TODO(dynmem): do the appropriate check involving MINMEM
12883 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12884 pnhvinfo["memory_free"])
12886 raise errors.OpPrereqError("This change will prevent the instance"
12887 " from starting, due to %d MB of memory"
12888 " missing on its primary node" %
12889 miss_mem, errors.ECODE_NORES)
12891 if be_new[constants.BE_AUTO_BALANCE]:
12892 for node, nres in nodeinfo.items():
12893 if node not in instance.secondary_nodes:
12895 nres.Raise("Can't get info from secondary node %s" % node,
12896 prereq=True, ecode=errors.ECODE_STATE)
12897 (_, _, (nhvinfo, )) = nres.payload
12898 if not isinstance(nhvinfo.get("memory_free", None), int):
12899 raise errors.OpPrereqError("Secondary node %s didn't return free"
12900 " memory information" % node,
12901 errors.ECODE_STATE)
12902 #TODO(dynmem): do the appropriate check involving MINMEM
12903 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12904 raise errors.OpPrereqError("This change will prevent the instance"
12905 " from failover to its secondary node"
12906 " %s, due to not enough memory" % node,
12907 errors.ECODE_STATE)
12909 if self.op.runtime_mem:
12910 remote_info = self.rpc.call_instance_info(instance.primary_node,
12912 instance.hypervisor)
12913 remote_info.Raise("Error checking node %s" % instance.primary_node)
12914 if not remote_info.payload: # not running already
12915 raise errors.OpPrereqError("Instance %s is not running" %
12916 instance.name, errors.ECODE_STATE)
12918 current_memory = remote_info.payload["memory"]
12919 if (not self.op.force and
12920 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12921 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12922 raise errors.OpPrereqError("Instance %s must have memory between %d"
12923 " and %d MB of memory unless --force is"
12926 self.be_proposed[constants.BE_MINMEM],
12927 self.be_proposed[constants.BE_MAXMEM]),
12928 errors.ECODE_INVAL)
12930 if self.op.runtime_mem > current_memory:
12931 _CheckNodeFreeMemory(self, instance.primary_node,
12932 "ballooning memory for instance %s" %
12934 self.op.memory - current_memory,
12935 instance.hypervisor)
12937 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12938 raise errors.OpPrereqError("Disk operations not supported for"
12939 " diskless instances", errors.ECODE_INVAL)
12941 def _PrepareNicCreate(_, params, private):
12942 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12943 return (None, None)
12945 def _PrepareNicMod(_, nic, params, private):
12946 self._PrepareNicModification(params, private, nic.ip,
12947 nic.nicparams, cluster, pnode)
12950 # Verify NIC changes (operating on copy)
12951 nics = instance.nics[:]
12952 ApplyContainerMods("NIC", nics, None, self.nicmod,
12953 _PrepareNicCreate, _PrepareNicMod, None)
12954 if len(nics) > constants.MAX_NICS:
12955 raise errors.OpPrereqError("Instance has too many network interfaces"
12956 " (%d), cannot add more" % constants.MAX_NICS,
12957 errors.ECODE_STATE)
12959 # Verify disk changes (operating on a copy)
12960 disks = instance.disks[:]
12961 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12962 if len(disks) > constants.MAX_DISKS:
12963 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12964 " more" % constants.MAX_DISKS,
12965 errors.ECODE_STATE)
12967 if self.op.offline is not None:
12968 if self.op.offline:
12969 msg = "can't change to offline"
12971 msg = "can't change to online"
12972 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12974 # Pre-compute NIC changes (necessary to use result in hooks)
12975 self._nic_chgdesc = []
12977 # Operate on copies as this is still in prereq
12978 nics = [nic.Copy() for nic in instance.nics]
12979 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12980 self._CreateNewNic, self._ApplyNicMods, None)
12981 self._new_nics = nics
12983 self._new_nics = None
12985 def _ConvertPlainToDrbd(self, feedback_fn):
12986 """Converts an instance from plain to drbd.
12989 feedback_fn("Converting template to drbd")
12990 instance = self.instance
12991 pnode = instance.primary_node
12992 snode = self.op.remote_node
12994 assert instance.disk_template == constants.DT_PLAIN
12996 # create a fake disk info for _GenerateDiskTemplate
12997 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12998 constants.IDISK_VG: d.logical_id[0]}
12999 for d in instance.disks]
13000 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13001 instance.name, pnode, [snode],
13002 disk_info, None, None, 0, feedback_fn,
13004 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13006 info = _GetInstanceInfoText(instance)
13007 feedback_fn("Creating additional volumes...")
13008 # first, create the missing data and meta devices
13009 for disk in anno_disks:
13010 # unfortunately this is... not too nice
13011 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13013 for child in disk.children:
13014 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13015 # at this stage, all new LVs have been created, we can rename the
13017 feedback_fn("Renaming original volumes...")
13018 rename_list = [(o, n.children[0].logical_id)
13019 for (o, n) in zip(instance.disks, new_disks)]
13020 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13021 result.Raise("Failed to rename original LVs")
13023 feedback_fn("Initializing DRBD devices...")
13024 # all child devices are in place, we can now create the DRBD devices
13025 for disk in anno_disks:
13026 for node in [pnode, snode]:
13027 f_create = node == pnode
13028 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13030 # at this point, the instance has been modified
13031 instance.disk_template = constants.DT_DRBD8
13032 instance.disks = new_disks
13033 self.cfg.Update(instance, feedback_fn)
13035 # Release node locks while waiting for sync
13036 _ReleaseLocks(self, locking.LEVEL_NODE)
13038 # disks are created, waiting for sync
13039 disk_abort = not _WaitForSync(self, instance,
13040 oneshot=not self.op.wait_for_sync)
13042 raise errors.OpExecError("There are some degraded disks for"
13043 " this instance, please cleanup manually")
13045 # Node resource locks will be released by caller
13047 def _ConvertDrbdToPlain(self, feedback_fn):
13048 """Converts an instance from drbd to plain.
13051 instance = self.instance
13053 assert len(instance.secondary_nodes) == 1
13054 assert instance.disk_template == constants.DT_DRBD8
13056 pnode = instance.primary_node
13057 snode = instance.secondary_nodes[0]
13058 feedback_fn("Converting template to plain")
13060 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13061 new_disks = [d.children[0] for d in instance.disks]
13063 # copy over size and mode
13064 for parent, child in zip(old_disks, new_disks):
13065 child.size = parent.size
13066 child.mode = parent.mode
13068 # this is a DRBD disk, return its port to the pool
13069 # NOTE: this must be done right before the call to cfg.Update!
13070 for disk in old_disks:
13071 tcp_port = disk.logical_id[2]
13072 self.cfg.AddTcpUdpPort(tcp_port)
13074 # update instance structure
13075 instance.disks = new_disks
13076 instance.disk_template = constants.DT_PLAIN
13077 self.cfg.Update(instance, feedback_fn)
13079 # Release locks in case removing disks takes a while
13080 _ReleaseLocks(self, locking.LEVEL_NODE)
13082 feedback_fn("Removing volumes on the secondary node...")
13083 for disk in old_disks:
13084 self.cfg.SetDiskID(disk, snode)
13085 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13087 self.LogWarning("Could not remove block device %s on node %s,"
13088 " continuing anyway: %s", disk.iv_name, snode, msg)
13090 feedback_fn("Removing unneeded volumes on the primary node...")
13091 for idx, disk in enumerate(old_disks):
13092 meta = disk.children[1]
13093 self.cfg.SetDiskID(meta, pnode)
13094 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13096 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13097 " continuing anyway: %s", idx, pnode, msg)
13099 def _CreateNewDisk(self, idx, params, _):
13100 """Creates a new disk.
13103 instance = self.instance
13106 if instance.disk_template in constants.DTS_FILEBASED:
13107 (file_driver, file_path) = instance.disks[0].logical_id
13108 file_path = os.path.dirname(file_path)
13110 file_driver = file_path = None
13113 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13114 instance.primary_node, instance.secondary_nodes,
13115 [params], file_path, file_driver, idx,
13116 self.Log, self.diskparams)[0]
13118 info = _GetInstanceInfoText(instance)
13120 logging.info("Creating volume %s for instance %s",
13121 disk.iv_name, instance.name)
13122 # Note: this needs to be kept in sync with _CreateDisks
13124 for node in instance.all_nodes:
13125 f_create = (node == instance.primary_node)
13127 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13128 except errors.OpExecError, err:
13129 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13130 disk.iv_name, disk, node, err)
13133 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13137 def _ModifyDisk(idx, disk, params, _):
13138 """Modifies a disk.
13141 disk.mode = params[constants.IDISK_MODE]
13144 ("disk.mode/%d" % idx, disk.mode),
13147 def _RemoveDisk(self, idx, root, _):
13151 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13152 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13153 self.cfg.SetDiskID(disk, node)
13154 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13156 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13157 " continuing anyway", idx, node, msg)
13159 # if this is a DRBD disk, return its port to the pool
13160 if root.dev_type in constants.LDS_DRBD:
13161 self.cfg.AddTcpUdpPort(root.logical_id[2])
13164 def _CreateNewNic(idx, params, private):
13165 """Creates data structure for a new network interface.
13168 mac = params[constants.INIC_MAC]
13169 ip = params.get(constants.INIC_IP, None)
13170 nicparams = private.params
13172 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13174 "add:mac=%s,ip=%s,mode=%s,link=%s" %
13175 (mac, ip, private.filled[constants.NIC_MODE],
13176 private.filled[constants.NIC_LINK])),
13180 def _ApplyNicMods(idx, nic, params, private):
13181 """Modifies a network interface.
13186 for key in [constants.INIC_MAC, constants.INIC_IP]:
13188 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13189 setattr(nic, key, params[key])
13192 nic.nicparams = private.params
13194 for (key, val) in params.items():
13195 changes.append(("nic.%s/%d" % (key, idx), val))
13199 def Exec(self, feedback_fn):
13200 """Modifies an instance.
13202 All parameters take effect only at the next restart of the instance.
13205 # Process here the warnings from CheckPrereq, as we don't have a
13206 # feedback_fn there.
13207 # TODO: Replace with self.LogWarning
13208 for warn in self.warn:
13209 feedback_fn("WARNING: %s" % warn)
13211 assert ((self.op.disk_template is None) ^
13212 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13213 "Not owning any node resource locks"
13216 instance = self.instance
13219 if self.op.runtime_mem:
13220 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13222 self.op.runtime_mem)
13223 rpcres.Raise("Cannot modify instance runtime memory")
13224 result.append(("runtime_memory", self.op.runtime_mem))
13226 # Apply disk changes
13227 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13228 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13229 _UpdateIvNames(0, instance.disks)
13231 if self.op.disk_template:
13233 check_nodes = set(instance.all_nodes)
13234 if self.op.remote_node:
13235 check_nodes.add(self.op.remote_node)
13236 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13237 owned = self.owned_locks(level)
13238 assert not (check_nodes - owned), \
13239 ("Not owning the correct locks, owning %r, expected at least %r" %
13240 (owned, check_nodes))
13242 r_shut = _ShutdownInstanceDisks(self, instance)
13244 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13245 " proceed with disk template conversion")
13246 mode = (instance.disk_template, self.op.disk_template)
13248 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13250 self.cfg.ReleaseDRBDMinors(instance.name)
13252 result.append(("disk_template", self.op.disk_template))
13254 assert instance.disk_template == self.op.disk_template, \
13255 ("Expected disk template '%s', found '%s'" %
13256 (self.op.disk_template, instance.disk_template))
13258 # Release node and resource locks if there are any (they might already have
13259 # been released during disk conversion)
13260 _ReleaseLocks(self, locking.LEVEL_NODE)
13261 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13263 # Apply NIC changes
13264 if self._new_nics is not None:
13265 instance.nics = self._new_nics
13266 result.extend(self._nic_chgdesc)
13269 if self.op.hvparams:
13270 instance.hvparams = self.hv_inst
13271 for key, val in self.op.hvparams.iteritems():
13272 result.append(("hv/%s" % key, val))
13275 if self.op.beparams:
13276 instance.beparams = self.be_inst
13277 for key, val in self.op.beparams.iteritems():
13278 result.append(("be/%s" % key, val))
13281 if self.op.os_name:
13282 instance.os = self.op.os_name
13285 if self.op.osparams:
13286 instance.osparams = self.os_inst
13287 for key, val in self.op.osparams.iteritems():
13288 result.append(("os/%s" % key, val))
13290 if self.op.offline is None:
13293 elif self.op.offline:
13294 # Mark instance as offline
13295 self.cfg.MarkInstanceOffline(instance.name)
13296 result.append(("admin_state", constants.ADMINST_OFFLINE))
13298 # Mark instance as online, but stopped
13299 self.cfg.MarkInstanceDown(instance.name)
13300 result.append(("admin_state", constants.ADMINST_DOWN))
13302 self.cfg.Update(instance, feedback_fn)
13304 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13305 self.owned_locks(locking.LEVEL_NODE)), \
13306 "All node locks should have been released by now"
13310 _DISK_CONVERSIONS = {
13311 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13312 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13316 class LUInstanceChangeGroup(LogicalUnit):
13317 HPATH = "instance-change-group"
13318 HTYPE = constants.HTYPE_INSTANCE
13321 def ExpandNames(self):
13322 self.share_locks = _ShareAll()
13323 self.needed_locks = {
13324 locking.LEVEL_NODEGROUP: [],
13325 locking.LEVEL_NODE: [],
13328 self._ExpandAndLockInstance()
13330 if self.op.target_groups:
13331 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13332 self.op.target_groups)
13334 self.req_target_uuids = None
13336 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13338 def DeclareLocks(self, level):
13339 if level == locking.LEVEL_NODEGROUP:
13340 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13342 if self.req_target_uuids:
13343 lock_groups = set(self.req_target_uuids)
13345 # Lock all groups used by instance optimistically; this requires going
13346 # via the node before it's locked, requiring verification later on
13347 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13348 lock_groups.update(instance_groups)
13350 # No target groups, need to lock all of them
13351 lock_groups = locking.ALL_SET
13353 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13355 elif level == locking.LEVEL_NODE:
13356 if self.req_target_uuids:
13357 # Lock all nodes used by instances
13358 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13359 self._LockInstancesNodes()
13361 # Lock all nodes in all potential target groups
13362 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13363 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13364 member_nodes = [node_name
13365 for group in lock_groups
13366 for node_name in self.cfg.GetNodeGroup(group).members]
13367 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13369 # Lock all nodes as all groups are potential targets
13370 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13372 def CheckPrereq(self):
13373 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13374 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13375 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13377 assert (self.req_target_uuids is None or
13378 owned_groups.issuperset(self.req_target_uuids))
13379 assert owned_instances == set([self.op.instance_name])
13381 # Get instance information
13382 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13384 # Check if node groups for locked instance are still correct
13385 assert owned_nodes.issuperset(self.instance.all_nodes), \
13386 ("Instance %s's nodes changed while we kept the lock" %
13387 self.op.instance_name)
13389 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13392 if self.req_target_uuids:
13393 # User requested specific target groups
13394 self.target_uuids = frozenset(self.req_target_uuids)
13396 # All groups except those used by the instance are potential targets
13397 self.target_uuids = owned_groups - inst_groups
13399 conflicting_groups = self.target_uuids & inst_groups
13400 if conflicting_groups:
13401 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13402 " used by the instance '%s'" %
13403 (utils.CommaJoin(conflicting_groups),
13404 self.op.instance_name),
13405 errors.ECODE_INVAL)
13407 if not self.target_uuids:
13408 raise errors.OpPrereqError("There are no possible target groups",
13409 errors.ECODE_INVAL)
13411 def BuildHooksEnv(self):
13412 """Build hooks env.
13415 assert self.target_uuids
13418 "TARGET_GROUPS": " ".join(self.target_uuids),
13421 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13425 def BuildHooksNodes(self):
13426 """Build hooks nodes.
13429 mn = self.cfg.GetMasterNode()
13430 return ([mn], [mn])
13432 def Exec(self, feedback_fn):
13433 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13435 assert instances == [self.op.instance_name], "Instance not locked"
13437 req = iallocator.IAReqGroupChange(instances=instances,
13438 target_groups=list(self.target_uuids))
13439 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13441 ial.Run(self.op.iallocator)
13443 if not ial.success:
13444 raise errors.OpPrereqError("Can't compute solution for changing group of"
13445 " instance '%s' using iallocator '%s': %s" %
13446 (self.op.instance_name, self.op.iallocator,
13447 ial.info), errors.ECODE_NORES)
13449 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13451 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13452 " instance '%s'", len(jobs), self.op.instance_name)
13454 return ResultWithJobs(jobs)
13457 class LUBackupQuery(NoHooksLU):
13458 """Query the exports list
13463 def CheckArguments(self):
13464 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13465 ["node", "export"], self.op.use_locking)
13467 def ExpandNames(self):
13468 self.expq.ExpandNames(self)
13470 def DeclareLocks(self, level):
13471 self.expq.DeclareLocks(self, level)
13473 def Exec(self, feedback_fn):
13476 for (node, expname) in self.expq.OldStyleQuery(self):
13477 if expname is None:
13478 result[node] = False
13480 result.setdefault(node, []).append(expname)
13485 class _ExportQuery(_QueryBase):
13486 FIELDS = query.EXPORT_FIELDS
13488 #: The node name is not a unique key for this query
13489 SORT_FIELD = "node"
13491 def ExpandNames(self, lu):
13492 lu.needed_locks = {}
13494 # The following variables interact with _QueryBase._GetNames
13496 self.wanted = _GetWantedNodes(lu, self.names)
13498 self.wanted = locking.ALL_SET
13500 self.do_locking = self.use_locking
13502 if self.do_locking:
13503 lu.share_locks = _ShareAll()
13504 lu.needed_locks = {
13505 locking.LEVEL_NODE: self.wanted,
13508 def DeclareLocks(self, lu, level):
13511 def _GetQueryData(self, lu):
13512 """Computes the list of nodes and their attributes.
13515 # Locking is not used
13517 assert not (compat.any(lu.glm.is_owned(level)
13518 for level in locking.LEVELS
13519 if level != locking.LEVEL_CLUSTER) or
13520 self.do_locking or self.use_locking)
13522 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13526 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13528 result.append((node, None))
13530 result.extend((node, expname) for expname in nres.payload)
13535 class LUBackupPrepare(NoHooksLU):
13536 """Prepares an instance for an export and returns useful information.
13541 def ExpandNames(self):
13542 self._ExpandAndLockInstance()
13544 def CheckPrereq(self):
13545 """Check prerequisites.
13548 instance_name = self.op.instance_name
13550 self.instance = self.cfg.GetInstanceInfo(instance_name)
13551 assert self.instance is not None, \
13552 "Cannot retrieve locked instance %s" % self.op.instance_name
13553 _CheckNodeOnline(self, self.instance.primary_node)
13555 self._cds = _GetClusterDomainSecret()
13557 def Exec(self, feedback_fn):
13558 """Prepares an instance for an export.
13561 instance = self.instance
13563 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13564 salt = utils.GenerateSecret(8)
13566 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13567 result = self.rpc.call_x509_cert_create(instance.primary_node,
13568 constants.RIE_CERT_VALIDITY)
13569 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13571 (name, cert_pem) = result.payload
13573 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13577 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13578 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13580 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13586 class LUBackupExport(LogicalUnit):
13587 """Export an instance to an image in the cluster.
13590 HPATH = "instance-export"
13591 HTYPE = constants.HTYPE_INSTANCE
13594 def CheckArguments(self):
13595 """Check the arguments.
13598 self.x509_key_name = self.op.x509_key_name
13599 self.dest_x509_ca_pem = self.op.destination_x509_ca
13601 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13602 if not self.x509_key_name:
13603 raise errors.OpPrereqError("Missing X509 key name for encryption",
13604 errors.ECODE_INVAL)
13606 if not self.dest_x509_ca_pem:
13607 raise errors.OpPrereqError("Missing destination X509 CA",
13608 errors.ECODE_INVAL)
13610 def ExpandNames(self):
13611 self._ExpandAndLockInstance()
13613 # Lock all nodes for local exports
13614 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13615 # FIXME: lock only instance primary and destination node
13617 # Sad but true, for now we have do lock all nodes, as we don't know where
13618 # the previous export might be, and in this LU we search for it and
13619 # remove it from its current node. In the future we could fix this by:
13620 # - making a tasklet to search (share-lock all), then create the
13621 # new one, then one to remove, after
13622 # - removing the removal operation altogether
13623 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13625 def DeclareLocks(self, level):
13626 """Last minute lock declaration."""
13627 # All nodes are locked anyway, so nothing to do here.
13629 def BuildHooksEnv(self):
13630 """Build hooks env.
13632 This will run on the master, primary node and target node.
13636 "EXPORT_MODE": self.op.mode,
13637 "EXPORT_NODE": self.op.target_node,
13638 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13639 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13640 # TODO: Generic function for boolean env variables
13641 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13644 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13648 def BuildHooksNodes(self):
13649 """Build hooks nodes.
13652 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13654 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13655 nl.append(self.op.target_node)
13659 def CheckPrereq(self):
13660 """Check prerequisites.
13662 This checks that the instance and node names are valid.
13665 instance_name = self.op.instance_name
13667 self.instance = self.cfg.GetInstanceInfo(instance_name)
13668 assert self.instance is not None, \
13669 "Cannot retrieve locked instance %s" % self.op.instance_name
13670 _CheckNodeOnline(self, self.instance.primary_node)
13672 if (self.op.remove_instance and
13673 self.instance.admin_state == constants.ADMINST_UP and
13674 not self.op.shutdown):
13675 raise errors.OpPrereqError("Can not remove instance without shutting it"
13676 " down before", errors.ECODE_STATE)
13678 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13679 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13680 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13681 assert self.dst_node is not None
13683 _CheckNodeOnline(self, self.dst_node.name)
13684 _CheckNodeNotDrained(self, self.dst_node.name)
13687 self.dest_disk_info = None
13688 self.dest_x509_ca = None
13690 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13691 self.dst_node = None
13693 if len(self.op.target_node) != len(self.instance.disks):
13694 raise errors.OpPrereqError(("Received destination information for %s"
13695 " disks, but instance %s has %s disks") %
13696 (len(self.op.target_node), instance_name,
13697 len(self.instance.disks)),
13698 errors.ECODE_INVAL)
13700 cds = _GetClusterDomainSecret()
13702 # Check X509 key name
13704 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13705 except (TypeError, ValueError), err:
13706 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13707 errors.ECODE_INVAL)
13709 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13710 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13711 errors.ECODE_INVAL)
13713 # Load and verify CA
13715 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13716 except OpenSSL.crypto.Error, err:
13717 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13718 (err, ), errors.ECODE_INVAL)
13720 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13721 if errcode is not None:
13722 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13723 (msg, ), errors.ECODE_INVAL)
13725 self.dest_x509_ca = cert
13727 # Verify target information
13729 for idx, disk_data in enumerate(self.op.target_node):
13731 (host, port, magic) = \
13732 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13733 except errors.GenericError, err:
13734 raise errors.OpPrereqError("Target info for disk %s: %s" %
13735 (idx, err), errors.ECODE_INVAL)
13737 disk_info.append((host, port, magic))
13739 assert len(disk_info) == len(self.op.target_node)
13740 self.dest_disk_info = disk_info
13743 raise errors.ProgrammerError("Unhandled export mode %r" %
13746 # instance disk type verification
13747 # TODO: Implement export support for file-based disks
13748 for disk in self.instance.disks:
13749 if disk.dev_type == constants.LD_FILE:
13750 raise errors.OpPrereqError("Export not supported for instances with"
13751 " file-based disks", errors.ECODE_INVAL)
13753 def _CleanupExports(self, feedback_fn):
13754 """Removes exports of current instance from all other nodes.
13756 If an instance in a cluster with nodes A..D was exported to node C, its
13757 exports will be removed from the nodes A, B and D.
13760 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13762 nodelist = self.cfg.GetNodeList()
13763 nodelist.remove(self.dst_node.name)
13765 # on one-node clusters nodelist will be empty after the removal
13766 # if we proceed the backup would be removed because OpBackupQuery
13767 # substitutes an empty list with the full cluster node list.
13768 iname = self.instance.name
13770 feedback_fn("Removing old exports for instance %s" % iname)
13771 exportlist = self.rpc.call_export_list(nodelist)
13772 for node in exportlist:
13773 if exportlist[node].fail_msg:
13775 if iname in exportlist[node].payload:
13776 msg = self.rpc.call_export_remove(node, iname).fail_msg
13778 self.LogWarning("Could not remove older export for instance %s"
13779 " on node %s: %s", iname, node, msg)
13781 def Exec(self, feedback_fn):
13782 """Export an instance to an image in the cluster.
13785 assert self.op.mode in constants.EXPORT_MODES
13787 instance = self.instance
13788 src_node = instance.primary_node
13790 if self.op.shutdown:
13791 # shutdown the instance, but not the disks
13792 feedback_fn("Shutting down instance %s" % instance.name)
13793 result = self.rpc.call_instance_shutdown(src_node, instance,
13794 self.op.shutdown_timeout)
13795 # TODO: Maybe ignore failures if ignore_remove_failures is set
13796 result.Raise("Could not shutdown instance %s on"
13797 " node %s" % (instance.name, src_node))
13799 # set the disks ID correctly since call_instance_start needs the
13800 # correct drbd minor to create the symlinks
13801 for disk in instance.disks:
13802 self.cfg.SetDiskID(disk, src_node)
13804 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13807 # Activate the instance disks if we'exporting a stopped instance
13808 feedback_fn("Activating disks for %s" % instance.name)
13809 _StartInstanceDisks(self, instance, None)
13812 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13815 helper.CreateSnapshots()
13817 if (self.op.shutdown and
13818 instance.admin_state == constants.ADMINST_UP and
13819 not self.op.remove_instance):
13820 assert not activate_disks
13821 feedback_fn("Starting instance %s" % instance.name)
13822 result = self.rpc.call_instance_start(src_node,
13823 (instance, None, None), False)
13824 msg = result.fail_msg
13826 feedback_fn("Failed to start instance: %s" % msg)
13827 _ShutdownInstanceDisks(self, instance)
13828 raise errors.OpExecError("Could not start instance: %s" % msg)
13830 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13831 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13832 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13833 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13834 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13836 (key_name, _, _) = self.x509_key_name
13839 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13842 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13843 key_name, dest_ca_pem,
13848 # Check for backwards compatibility
13849 assert len(dresults) == len(instance.disks)
13850 assert compat.all(isinstance(i, bool) for i in dresults), \
13851 "Not all results are boolean: %r" % dresults
13855 feedback_fn("Deactivating disks for %s" % instance.name)
13856 _ShutdownInstanceDisks(self, instance)
13858 if not (compat.all(dresults) and fin_resu):
13861 failures.append("export finalization")
13862 if not compat.all(dresults):
13863 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13865 failures.append("disk export: disk(s) %s" % fdsk)
13867 raise errors.OpExecError("Export failed, errors in %s" %
13868 utils.CommaJoin(failures))
13870 # At this point, the export was successful, we can cleanup/finish
13872 # Remove instance if requested
13873 if self.op.remove_instance:
13874 feedback_fn("Removing instance %s" % instance.name)
13875 _RemoveInstance(self, feedback_fn, instance,
13876 self.op.ignore_remove_failures)
13878 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13879 self._CleanupExports(feedback_fn)
13881 return fin_resu, dresults
13884 class LUBackupRemove(NoHooksLU):
13885 """Remove exports related to the named instance.
13890 def ExpandNames(self):
13891 self.needed_locks = {}
13892 # We need all nodes to be locked in order for RemoveExport to work, but we
13893 # don't need to lock the instance itself, as nothing will happen to it (and
13894 # we can remove exports also for a removed instance)
13895 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13897 def Exec(self, feedback_fn):
13898 """Remove any export.
13901 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13902 # If the instance was not found we'll try with the name that was passed in.
13903 # This will only work if it was an FQDN, though.
13905 if not instance_name:
13907 instance_name = self.op.instance_name
13909 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13910 exportlist = self.rpc.call_export_list(locked_nodes)
13912 for node in exportlist:
13913 msg = exportlist[node].fail_msg
13915 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13917 if instance_name in exportlist[node].payload:
13919 result = self.rpc.call_export_remove(node, instance_name)
13920 msg = result.fail_msg
13922 logging.error("Could not remove export for instance %s"
13923 " on node %s: %s", instance_name, node, msg)
13925 if fqdn_warn and not found:
13926 feedback_fn("Export not found. If trying to remove an export belonging"
13927 " to a deleted instance please use its Fully Qualified"
13931 class LUGroupAdd(LogicalUnit):
13932 """Logical unit for creating node groups.
13935 HPATH = "group-add"
13936 HTYPE = constants.HTYPE_GROUP
13939 def ExpandNames(self):
13940 # We need the new group's UUID here so that we can create and acquire the
13941 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13942 # that it should not check whether the UUID exists in the configuration.
13943 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13944 self.needed_locks = {}
13945 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13947 def CheckPrereq(self):
13948 """Check prerequisites.
13950 This checks that the given group name is not an existing node group
13955 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13956 except errors.OpPrereqError:
13959 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13960 " node group (UUID: %s)" %
13961 (self.op.group_name, existing_uuid),
13962 errors.ECODE_EXISTS)
13964 if self.op.ndparams:
13965 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13967 if self.op.hv_state:
13968 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13970 self.new_hv_state = None
13972 if self.op.disk_state:
13973 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13975 self.new_disk_state = None
13977 if self.op.diskparams:
13978 for templ in constants.DISK_TEMPLATES:
13979 if templ in self.op.diskparams:
13980 utils.ForceDictType(self.op.diskparams[templ],
13981 constants.DISK_DT_TYPES)
13982 self.new_diskparams = self.op.diskparams
13984 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13985 except errors.OpPrereqError, err:
13986 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13987 errors.ECODE_INVAL)
13989 self.new_diskparams = {}
13991 if self.op.ipolicy:
13992 cluster = self.cfg.GetClusterInfo()
13993 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13995 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13996 except errors.ConfigurationError, err:
13997 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13998 errors.ECODE_INVAL)
14000 def BuildHooksEnv(self):
14001 """Build hooks env.
14005 "GROUP_NAME": self.op.group_name,
14008 def BuildHooksNodes(self):
14009 """Build hooks nodes.
14012 mn = self.cfg.GetMasterNode()
14013 return ([mn], [mn])
14015 def Exec(self, feedback_fn):
14016 """Add the node group to the cluster.
14019 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14020 uuid=self.group_uuid,
14021 alloc_policy=self.op.alloc_policy,
14022 ndparams=self.op.ndparams,
14023 diskparams=self.new_diskparams,
14024 ipolicy=self.op.ipolicy,
14025 hv_state_static=self.new_hv_state,
14026 disk_state_static=self.new_disk_state)
14028 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14029 del self.remove_locks[locking.LEVEL_NODEGROUP]
14032 class LUGroupAssignNodes(NoHooksLU):
14033 """Logical unit for assigning nodes to groups.
14038 def ExpandNames(self):
14039 # These raise errors.OpPrereqError on their own:
14040 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14041 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14043 # We want to lock all the affected nodes and groups. We have readily
14044 # available the list of nodes, and the *destination* group. To gather the
14045 # list of "source" groups, we need to fetch node information later on.
14046 self.needed_locks = {
14047 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14048 locking.LEVEL_NODE: self.op.nodes,
14051 def DeclareLocks(self, level):
14052 if level == locking.LEVEL_NODEGROUP:
14053 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14055 # Try to get all affected nodes' groups without having the group or node
14056 # lock yet. Needs verification later in the code flow.
14057 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14059 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14061 def CheckPrereq(self):
14062 """Check prerequisites.
14065 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14066 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14067 frozenset(self.op.nodes))
14069 expected_locks = (set([self.group_uuid]) |
14070 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14071 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14072 if actual_locks != expected_locks:
14073 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14074 " current groups are '%s', used to be '%s'" %
14075 (utils.CommaJoin(expected_locks),
14076 utils.CommaJoin(actual_locks)))
14078 self.node_data = self.cfg.GetAllNodesInfo()
14079 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14080 instance_data = self.cfg.GetAllInstancesInfo()
14082 if self.group is None:
14083 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14084 (self.op.group_name, self.group_uuid))
14086 (new_splits, previous_splits) = \
14087 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14088 for node in self.op.nodes],
14089 self.node_data, instance_data)
14092 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14094 if not self.op.force:
14095 raise errors.OpExecError("The following instances get split by this"
14096 " change and --force was not given: %s" %
14099 self.LogWarning("This operation will split the following instances: %s",
14102 if previous_splits:
14103 self.LogWarning("In addition, these already-split instances continue"
14104 " to be split across groups: %s",
14105 utils.CommaJoin(utils.NiceSort(previous_splits)))
14107 def Exec(self, feedback_fn):
14108 """Assign nodes to a new group.
14111 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14113 self.cfg.AssignGroupNodes(mods)
14116 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14117 """Check for split instances after a node assignment.
14119 This method considers a series of node assignments as an atomic operation,
14120 and returns information about split instances after applying the set of
14123 In particular, it returns information about newly split instances, and
14124 instances that were already split, and remain so after the change.
14126 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14129 @type changes: list of (node_name, new_group_uuid) pairs.
14130 @param changes: list of node assignments to consider.
14131 @param node_data: a dict with data for all nodes
14132 @param instance_data: a dict with all instances to consider
14133 @rtype: a two-tuple
14134 @return: a list of instances that were previously okay and result split as a
14135 consequence of this change, and a list of instances that were previously
14136 split and this change does not fix.
14139 changed_nodes = dict((node, group) for node, group in changes
14140 if node_data[node].group != group)
14142 all_split_instances = set()
14143 previously_split_instances = set()
14145 def InstanceNodes(instance):
14146 return [instance.primary_node] + list(instance.secondary_nodes)
14148 for inst in instance_data.values():
14149 if inst.disk_template not in constants.DTS_INT_MIRROR:
14152 instance_nodes = InstanceNodes(inst)
14154 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14155 previously_split_instances.add(inst.name)
14157 if len(set(changed_nodes.get(node, node_data[node].group)
14158 for node in instance_nodes)) > 1:
14159 all_split_instances.add(inst.name)
14161 return (list(all_split_instances - previously_split_instances),
14162 list(previously_split_instances & all_split_instances))
14165 class _GroupQuery(_QueryBase):
14166 FIELDS = query.GROUP_FIELDS
14168 def ExpandNames(self, lu):
14169 lu.needed_locks = {}
14171 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14172 self._cluster = lu.cfg.GetClusterInfo()
14173 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14176 self.wanted = [name_to_uuid[name]
14177 for name in utils.NiceSort(name_to_uuid.keys())]
14179 # Accept names to be either names or UUIDs.
14182 all_uuid = frozenset(self._all_groups.keys())
14184 for name in self.names:
14185 if name in all_uuid:
14186 self.wanted.append(name)
14187 elif name in name_to_uuid:
14188 self.wanted.append(name_to_uuid[name])
14190 missing.append(name)
14193 raise errors.OpPrereqError("Some groups do not exist: %s" %
14194 utils.CommaJoin(missing),
14195 errors.ECODE_NOENT)
14197 def DeclareLocks(self, lu, level):
14200 def _GetQueryData(self, lu):
14201 """Computes the list of node groups and their attributes.
14204 do_nodes = query.GQ_NODE in self.requested_data
14205 do_instances = query.GQ_INST in self.requested_data
14207 group_to_nodes = None
14208 group_to_instances = None
14210 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14211 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14212 # latter GetAllInstancesInfo() is not enough, for we have to go through
14213 # instance->node. Hence, we will need to process nodes even if we only need
14214 # instance information.
14215 if do_nodes or do_instances:
14216 all_nodes = lu.cfg.GetAllNodesInfo()
14217 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14220 for node in all_nodes.values():
14221 if node.group in group_to_nodes:
14222 group_to_nodes[node.group].append(node.name)
14223 node_to_group[node.name] = node.group
14226 all_instances = lu.cfg.GetAllInstancesInfo()
14227 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14229 for instance in all_instances.values():
14230 node = instance.primary_node
14231 if node in node_to_group:
14232 group_to_instances[node_to_group[node]].append(instance.name)
14235 # Do not pass on node information if it was not requested.
14236 group_to_nodes = None
14238 return query.GroupQueryData(self._cluster,
14239 [self._all_groups[uuid]
14240 for uuid in self.wanted],
14241 group_to_nodes, group_to_instances,
14242 query.GQ_DISKPARAMS in self.requested_data)
14245 class LUGroupQuery(NoHooksLU):
14246 """Logical unit for querying node groups.
14251 def CheckArguments(self):
14252 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14253 self.op.output_fields, False)
14255 def ExpandNames(self):
14256 self.gq.ExpandNames(self)
14258 def DeclareLocks(self, level):
14259 self.gq.DeclareLocks(self, level)
14261 def Exec(self, feedback_fn):
14262 return self.gq.OldStyleQuery(self)
14265 class LUGroupSetParams(LogicalUnit):
14266 """Modifies the parameters of a node group.
14269 HPATH = "group-modify"
14270 HTYPE = constants.HTYPE_GROUP
14273 def CheckArguments(self):
14276 self.op.diskparams,
14277 self.op.alloc_policy,
14279 self.op.disk_state,
14283 if all_changes.count(None) == len(all_changes):
14284 raise errors.OpPrereqError("Please pass at least one modification",
14285 errors.ECODE_INVAL)
14287 def ExpandNames(self):
14288 # This raises errors.OpPrereqError on its own:
14289 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14291 self.needed_locks = {
14292 locking.LEVEL_INSTANCE: [],
14293 locking.LEVEL_NODEGROUP: [self.group_uuid],
14296 self.share_locks[locking.LEVEL_INSTANCE] = 1
14298 def DeclareLocks(self, level):
14299 if level == locking.LEVEL_INSTANCE:
14300 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14302 # Lock instances optimistically, needs verification once group lock has
14304 self.needed_locks[locking.LEVEL_INSTANCE] = \
14305 self.cfg.GetNodeGroupInstances(self.group_uuid)
14308 def _UpdateAndVerifyDiskParams(old, new):
14309 """Updates and verifies disk parameters.
14312 new_params = _GetUpdatedParams(old, new)
14313 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14316 def CheckPrereq(self):
14317 """Check prerequisites.
14320 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14322 # Check if locked instances are still correct
14323 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14325 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14326 cluster = self.cfg.GetClusterInfo()
14328 if self.group is None:
14329 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14330 (self.op.group_name, self.group_uuid))
14332 if self.op.ndparams:
14333 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14334 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14335 self.new_ndparams = new_ndparams
14337 if self.op.diskparams:
14338 diskparams = self.group.diskparams
14339 uavdp = self._UpdateAndVerifyDiskParams
14340 # For each disktemplate subdict update and verify the values
14341 new_diskparams = dict((dt,
14342 uavdp(diskparams.get(dt, {}),
14343 self.op.diskparams[dt]))
14344 for dt in constants.DISK_TEMPLATES
14345 if dt in self.op.diskparams)
14346 # As we've all subdicts of diskparams ready, lets merge the actual
14347 # dict with all updated subdicts
14348 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14350 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14351 except errors.OpPrereqError, err:
14352 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14353 errors.ECODE_INVAL)
14355 if self.op.hv_state:
14356 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14357 self.group.hv_state_static)
14359 if self.op.disk_state:
14360 self.new_disk_state = \
14361 _MergeAndVerifyDiskState(self.op.disk_state,
14362 self.group.disk_state_static)
14364 if self.op.ipolicy:
14365 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14369 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14370 inst_filter = lambda inst: inst.name in owned_instances
14371 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14372 gmi = ganeti.masterd.instance
14374 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14376 new_ipolicy, instances)
14379 self.LogWarning("After the ipolicy change the following instances"
14380 " violate them: %s",
14381 utils.CommaJoin(violations))
14383 def BuildHooksEnv(self):
14384 """Build hooks env.
14388 "GROUP_NAME": self.op.group_name,
14389 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14392 def BuildHooksNodes(self):
14393 """Build hooks nodes.
14396 mn = self.cfg.GetMasterNode()
14397 return ([mn], [mn])
14399 def Exec(self, feedback_fn):
14400 """Modifies the node group.
14405 if self.op.ndparams:
14406 self.group.ndparams = self.new_ndparams
14407 result.append(("ndparams", str(self.group.ndparams)))
14409 if self.op.diskparams:
14410 self.group.diskparams = self.new_diskparams
14411 result.append(("diskparams", str(self.group.diskparams)))
14413 if self.op.alloc_policy:
14414 self.group.alloc_policy = self.op.alloc_policy
14416 if self.op.hv_state:
14417 self.group.hv_state_static = self.new_hv_state
14419 if self.op.disk_state:
14420 self.group.disk_state_static = self.new_disk_state
14422 if self.op.ipolicy:
14423 self.group.ipolicy = self.new_ipolicy
14425 self.cfg.Update(self.group, feedback_fn)
14429 class LUGroupRemove(LogicalUnit):
14430 HPATH = "group-remove"
14431 HTYPE = constants.HTYPE_GROUP
14434 def ExpandNames(self):
14435 # This will raises errors.OpPrereqError on its own:
14436 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14437 self.needed_locks = {
14438 locking.LEVEL_NODEGROUP: [self.group_uuid],
14441 def CheckPrereq(self):
14442 """Check prerequisites.
14444 This checks that the given group name exists as a node group, that is
14445 empty (i.e., contains no nodes), and that is not the last group of the
14449 # Verify that the group is empty.
14450 group_nodes = [node.name
14451 for node in self.cfg.GetAllNodesInfo().values()
14452 if node.group == self.group_uuid]
14455 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14457 (self.op.group_name,
14458 utils.CommaJoin(utils.NiceSort(group_nodes))),
14459 errors.ECODE_STATE)
14461 # Verify the cluster would not be left group-less.
14462 if len(self.cfg.GetNodeGroupList()) == 1:
14463 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14464 " removed" % self.op.group_name,
14465 errors.ECODE_STATE)
14467 def BuildHooksEnv(self):
14468 """Build hooks env.
14472 "GROUP_NAME": self.op.group_name,
14475 def BuildHooksNodes(self):
14476 """Build hooks nodes.
14479 mn = self.cfg.GetMasterNode()
14480 return ([mn], [mn])
14482 def Exec(self, feedback_fn):
14483 """Remove the node group.
14487 self.cfg.RemoveNodeGroup(self.group_uuid)
14488 except errors.ConfigurationError:
14489 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14490 (self.op.group_name, self.group_uuid))
14492 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14495 class LUGroupRename(LogicalUnit):
14496 HPATH = "group-rename"
14497 HTYPE = constants.HTYPE_GROUP
14500 def ExpandNames(self):
14501 # This raises errors.OpPrereqError on its own:
14502 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14504 self.needed_locks = {
14505 locking.LEVEL_NODEGROUP: [self.group_uuid],
14508 def CheckPrereq(self):
14509 """Check prerequisites.
14511 Ensures requested new name is not yet used.
14515 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14516 except errors.OpPrereqError:
14519 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14520 " node group (UUID: %s)" %
14521 (self.op.new_name, new_name_uuid),
14522 errors.ECODE_EXISTS)
14524 def BuildHooksEnv(self):
14525 """Build hooks env.
14529 "OLD_NAME": self.op.group_name,
14530 "NEW_NAME": self.op.new_name,
14533 def BuildHooksNodes(self):
14534 """Build hooks nodes.
14537 mn = self.cfg.GetMasterNode()
14539 all_nodes = self.cfg.GetAllNodesInfo()
14540 all_nodes.pop(mn, None)
14543 run_nodes.extend(node.name for node in all_nodes.values()
14544 if node.group == self.group_uuid)
14546 return (run_nodes, run_nodes)
14548 def Exec(self, feedback_fn):
14549 """Rename the node group.
14552 group = self.cfg.GetNodeGroup(self.group_uuid)
14555 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14556 (self.op.group_name, self.group_uuid))
14558 group.name = self.op.new_name
14559 self.cfg.Update(group, feedback_fn)
14561 return self.op.new_name
14564 class LUGroupEvacuate(LogicalUnit):
14565 HPATH = "group-evacuate"
14566 HTYPE = constants.HTYPE_GROUP
14569 def ExpandNames(self):
14570 # This raises errors.OpPrereqError on its own:
14571 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14573 if self.op.target_groups:
14574 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14575 self.op.target_groups)
14577 self.req_target_uuids = []
14579 if self.group_uuid in self.req_target_uuids:
14580 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14581 " as a target group (targets are %s)" %
14583 utils.CommaJoin(self.req_target_uuids)),
14584 errors.ECODE_INVAL)
14586 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14588 self.share_locks = _ShareAll()
14589 self.needed_locks = {
14590 locking.LEVEL_INSTANCE: [],
14591 locking.LEVEL_NODEGROUP: [],
14592 locking.LEVEL_NODE: [],
14595 def DeclareLocks(self, level):
14596 if level == locking.LEVEL_INSTANCE:
14597 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14599 # Lock instances optimistically, needs verification once node and group
14600 # locks have been acquired
14601 self.needed_locks[locking.LEVEL_INSTANCE] = \
14602 self.cfg.GetNodeGroupInstances(self.group_uuid)
14604 elif level == locking.LEVEL_NODEGROUP:
14605 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14607 if self.req_target_uuids:
14608 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14610 # Lock all groups used by instances optimistically; this requires going
14611 # via the node before it's locked, requiring verification later on
14612 lock_groups.update(group_uuid
14613 for instance_name in
14614 self.owned_locks(locking.LEVEL_INSTANCE)
14616 self.cfg.GetInstanceNodeGroups(instance_name))
14618 # No target groups, need to lock all of them
14619 lock_groups = locking.ALL_SET
14621 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14623 elif level == locking.LEVEL_NODE:
14624 # This will only lock the nodes in the group to be evacuated which
14625 # contain actual instances
14626 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14627 self._LockInstancesNodes()
14629 # Lock all nodes in group to be evacuated and target groups
14630 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14631 assert self.group_uuid in owned_groups
14632 member_nodes = [node_name
14633 for group in owned_groups
14634 for node_name in self.cfg.GetNodeGroup(group).members]
14635 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14637 def CheckPrereq(self):
14638 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14639 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14640 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14642 assert owned_groups.issuperset(self.req_target_uuids)
14643 assert self.group_uuid in owned_groups
14645 # Check if locked instances are still correct
14646 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14648 # Get instance information
14649 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14651 # Check if node groups for locked instances are still correct
14652 _CheckInstancesNodeGroups(self.cfg, self.instances,
14653 owned_groups, owned_nodes, self.group_uuid)
14655 if self.req_target_uuids:
14656 # User requested specific target groups
14657 self.target_uuids = self.req_target_uuids
14659 # All groups except the one to be evacuated are potential targets
14660 self.target_uuids = [group_uuid for group_uuid in owned_groups
14661 if group_uuid != self.group_uuid]
14663 if not self.target_uuids:
14664 raise errors.OpPrereqError("There are no possible target groups",
14665 errors.ECODE_INVAL)
14667 def BuildHooksEnv(self):
14668 """Build hooks env.
14672 "GROUP_NAME": self.op.group_name,
14673 "TARGET_GROUPS": " ".join(self.target_uuids),
14676 def BuildHooksNodes(self):
14677 """Build hooks nodes.
14680 mn = self.cfg.GetMasterNode()
14682 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14684 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14686 return (run_nodes, run_nodes)
14688 def Exec(self, feedback_fn):
14689 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14691 assert self.group_uuid not in self.target_uuids
14693 req = iallocator.IAReqGroupChange(instances=instances,
14694 target_groups=self.target_uuids)
14695 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14697 ial.Run(self.op.iallocator)
14699 if not ial.success:
14700 raise errors.OpPrereqError("Can't compute group evacuation using"
14701 " iallocator '%s': %s" %
14702 (self.op.iallocator, ial.info),
14703 errors.ECODE_NORES)
14705 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14707 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14708 len(jobs), self.op.group_name)
14710 return ResultWithJobs(jobs)
14713 class TagsLU(NoHooksLU): # pylint: disable=W0223
14714 """Generic tags LU.
14716 This is an abstract class which is the parent of all the other tags LUs.
14719 def ExpandNames(self):
14720 self.group_uuid = None
14721 self.needed_locks = {}
14723 if self.op.kind == constants.TAG_NODE:
14724 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14725 lock_level = locking.LEVEL_NODE
14726 lock_name = self.op.name
14727 elif self.op.kind == constants.TAG_INSTANCE:
14728 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14729 lock_level = locking.LEVEL_INSTANCE
14730 lock_name = self.op.name
14731 elif self.op.kind == constants.TAG_NODEGROUP:
14732 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14733 lock_level = locking.LEVEL_NODEGROUP
14734 lock_name = self.group_uuid
14739 if lock_level and getattr(self.op, "use_locking", True):
14740 self.needed_locks[lock_level] = lock_name
14742 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14743 # not possible to acquire the BGL based on opcode parameters)
14745 def CheckPrereq(self):
14746 """Check prerequisites.
14749 if self.op.kind == constants.TAG_CLUSTER:
14750 self.target = self.cfg.GetClusterInfo()
14751 elif self.op.kind == constants.TAG_NODE:
14752 self.target = self.cfg.GetNodeInfo(self.op.name)
14753 elif self.op.kind == constants.TAG_INSTANCE:
14754 self.target = self.cfg.GetInstanceInfo(self.op.name)
14755 elif self.op.kind == constants.TAG_NODEGROUP:
14756 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14758 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14759 str(self.op.kind), errors.ECODE_INVAL)
14762 class LUTagsGet(TagsLU):
14763 """Returns the tags of a given object.
14768 def ExpandNames(self):
14769 TagsLU.ExpandNames(self)
14771 # Share locks as this is only a read operation
14772 self.share_locks = _ShareAll()
14774 def Exec(self, feedback_fn):
14775 """Returns the tag list.
14778 return list(self.target.GetTags())
14781 class LUTagsSearch(NoHooksLU):
14782 """Searches the tags for a given pattern.
14787 def ExpandNames(self):
14788 self.needed_locks = {}
14790 def CheckPrereq(self):
14791 """Check prerequisites.
14793 This checks the pattern passed for validity by compiling it.
14797 self.re = re.compile(self.op.pattern)
14798 except re.error, err:
14799 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14800 (self.op.pattern, err), errors.ECODE_INVAL)
14802 def Exec(self, feedback_fn):
14803 """Returns the tag list.
14807 tgts = [("/cluster", cfg.GetClusterInfo())]
14808 ilist = cfg.GetAllInstancesInfo().values()
14809 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14810 nlist = cfg.GetAllNodesInfo().values()
14811 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14812 tgts.extend(("/nodegroup/%s" % n.name, n)
14813 for n in cfg.GetAllNodeGroupsInfo().values())
14815 for path, target in tgts:
14816 for tag in target.GetTags():
14817 if self.re.search(tag):
14818 results.append((path, tag))
14822 class LUTagsSet(TagsLU):
14823 """Sets a tag on a given object.
14828 def CheckPrereq(self):
14829 """Check prerequisites.
14831 This checks the type and length of the tag name and value.
14834 TagsLU.CheckPrereq(self)
14835 for tag in self.op.tags:
14836 objects.TaggableObject.ValidateTag(tag)
14838 def Exec(self, feedback_fn):
14843 for tag in self.op.tags:
14844 self.target.AddTag(tag)
14845 except errors.TagError, err:
14846 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14847 self.cfg.Update(self.target, feedback_fn)
14850 class LUTagsDel(TagsLU):
14851 """Delete a list of tags from a given object.
14856 def CheckPrereq(self):
14857 """Check prerequisites.
14859 This checks that we have the given tag.
14862 TagsLU.CheckPrereq(self)
14863 for tag in self.op.tags:
14864 objects.TaggableObject.ValidateTag(tag)
14865 del_tags = frozenset(self.op.tags)
14866 cur_tags = self.target.GetTags()
14868 diff_tags = del_tags - cur_tags
14870 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14871 raise errors.OpPrereqError("Tag(s) %s not found" %
14872 (utils.CommaJoin(diff_names), ),
14873 errors.ECODE_NOENT)
14875 def Exec(self, feedback_fn):
14876 """Remove the tag from the object.
14879 for tag in self.op.tags:
14880 self.target.RemoveTag(tag)
14881 self.cfg.Update(self.target, feedback_fn)
14884 class LUTestDelay(NoHooksLU):
14885 """Sleep for a specified amount of time.
14887 This LU sleeps on the master and/or nodes for a specified amount of
14893 def ExpandNames(self):
14894 """Expand names and set required locks.
14896 This expands the node list, if any.
14899 self.needed_locks = {}
14900 if self.op.on_nodes:
14901 # _GetWantedNodes can be used here, but is not always appropriate to use
14902 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14903 # more information.
14904 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14905 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14907 def _TestDelay(self):
14908 """Do the actual sleep.
14911 if self.op.on_master:
14912 if not utils.TestDelay(self.op.duration):
14913 raise errors.OpExecError("Error during master delay test")
14914 if self.op.on_nodes:
14915 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14916 for node, node_result in result.items():
14917 node_result.Raise("Failure during rpc call to node %s" % node)
14919 def Exec(self, feedback_fn):
14920 """Execute the test delay opcode, with the wanted repetitions.
14923 if self.op.repeat == 0:
14926 top_value = self.op.repeat - 1
14927 for i in range(self.op.repeat):
14928 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14932 class LUTestJqueue(NoHooksLU):
14933 """Utility LU to test some aspects of the job queue.
14938 # Must be lower than default timeout for WaitForJobChange to see whether it
14939 # notices changed jobs
14940 _CLIENT_CONNECT_TIMEOUT = 20.0
14941 _CLIENT_CONFIRM_TIMEOUT = 60.0
14944 def _NotifyUsingSocket(cls, cb, errcls):
14945 """Opens a Unix socket and waits for another program to connect.
14948 @param cb: Callback to send socket name to client
14949 @type errcls: class
14950 @param errcls: Exception class to use for errors
14953 # Using a temporary directory as there's no easy way to create temporary
14954 # sockets without writing a custom loop around tempfile.mktemp and
14956 tmpdir = tempfile.mkdtemp()
14958 tmpsock = utils.PathJoin(tmpdir, "sock")
14960 logging.debug("Creating temporary socket at %s", tmpsock)
14961 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14966 # Send details to client
14969 # Wait for client to connect before continuing
14970 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14972 (conn, _) = sock.accept()
14973 except socket.error, err:
14974 raise errcls("Client didn't connect in time (%s)" % err)
14978 # Remove as soon as client is connected
14979 shutil.rmtree(tmpdir)
14981 # Wait for client to close
14984 # pylint: disable=E1101
14985 # Instance of '_socketobject' has no ... member
14986 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14988 except socket.error, err:
14989 raise errcls("Client failed to confirm notification (%s)" % err)
14993 def _SendNotification(self, test, arg, sockname):
14994 """Sends a notification to the client.
14997 @param test: Test name
14998 @param arg: Test argument (depends on test)
14999 @type sockname: string
15000 @param sockname: Socket path
15003 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15005 def _Notify(self, prereq, test, arg):
15006 """Notifies the client of a test.
15009 @param prereq: Whether this is a prereq-phase test
15011 @param test: Test name
15012 @param arg: Test argument (depends on test)
15016 errcls = errors.OpPrereqError
15018 errcls = errors.OpExecError
15020 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15024 def CheckArguments(self):
15025 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15026 self.expandnames_calls = 0
15028 def ExpandNames(self):
15029 checkargs_calls = getattr(self, "checkargs_calls", 0)
15030 if checkargs_calls < 1:
15031 raise errors.ProgrammerError("CheckArguments was not called")
15033 self.expandnames_calls += 1
15035 if self.op.notify_waitlock:
15036 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15038 self.LogInfo("Expanding names")
15040 # Get lock on master node (just to get a lock, not for a particular reason)
15041 self.needed_locks = {
15042 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15045 def Exec(self, feedback_fn):
15046 if self.expandnames_calls < 1:
15047 raise errors.ProgrammerError("ExpandNames was not called")
15049 if self.op.notify_exec:
15050 self._Notify(False, constants.JQT_EXEC, None)
15052 self.LogInfo("Executing")
15054 if self.op.log_messages:
15055 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15056 for idx, msg in enumerate(self.op.log_messages):
15057 self.LogInfo("Sending log message %s", idx + 1)
15058 feedback_fn(constants.JQT_MSGPREFIX + msg)
15059 # Report how many test messages have been sent
15060 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15063 raise errors.OpExecError("Opcode failure was requested")
15068 class LUTestAllocator(NoHooksLU):
15069 """Run allocator tests.
15071 This LU runs the allocator tests
15074 def CheckPrereq(self):
15075 """Check prerequisites.
15077 This checks the opcode parameters depending on the director and mode test.
15080 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15081 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15082 for attr in ["memory", "disks", "disk_template",
15083 "os", "tags", "nics", "vcpus"]:
15084 if not hasattr(self.op, attr):
15085 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15086 attr, errors.ECODE_INVAL)
15087 iname = self.cfg.ExpandInstanceName(self.op.name)
15088 if iname is not None:
15089 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15090 iname, errors.ECODE_EXISTS)
15091 if not isinstance(self.op.nics, list):
15092 raise errors.OpPrereqError("Invalid parameter 'nics'",
15093 errors.ECODE_INVAL)
15094 if not isinstance(self.op.disks, list):
15095 raise errors.OpPrereqError("Invalid parameter 'disks'",
15096 errors.ECODE_INVAL)
15097 for row in self.op.disks:
15098 if (not isinstance(row, dict) or
15099 constants.IDISK_SIZE not in row or
15100 not isinstance(row[constants.IDISK_SIZE], int) or
15101 constants.IDISK_MODE not in row or
15102 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15103 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15104 " parameter", errors.ECODE_INVAL)
15105 if self.op.hypervisor is None:
15106 self.op.hypervisor = self.cfg.GetHypervisorType()
15107 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15108 fname = _ExpandInstanceName(self.cfg, self.op.name)
15109 self.op.name = fname
15110 self.relocate_from = \
15111 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15112 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15113 constants.IALLOCATOR_MODE_NODE_EVAC):
15114 if not self.op.instances:
15115 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15116 self.op.instances = _GetWantedInstances(self, self.op.instances)
15118 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15119 self.op.mode, errors.ECODE_INVAL)
15121 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15122 if self.op.allocator is None:
15123 raise errors.OpPrereqError("Missing allocator name",
15124 errors.ECODE_INVAL)
15125 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15126 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15127 self.op.direction, errors.ECODE_INVAL)
15129 def Exec(self, feedback_fn):
15130 """Run the allocator test.
15133 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15134 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15135 memory=self.op.memory,
15136 disks=self.op.disks,
15137 disk_template=self.op.disk_template,
15141 vcpus=self.op.vcpus,
15142 spindle_use=self.op.spindle_use,
15143 hypervisor=self.op.hypervisor)
15144 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15145 req = iallocator.IAReqRelocate(name=self.op.name,
15146 relocate_from=list(self.relocate_from))
15147 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15148 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15149 target_groups=self.op.target_groups)
15150 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15151 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15152 evac_mode=self.op.evac_mode)
15153 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15154 disk_template = self.op.disk_template
15155 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15156 memory=self.op.memory,
15157 disks=self.op.disks,
15158 disk_template=disk_template,
15162 vcpus=self.op.vcpus,
15163 spindle_use=self.op.spindle_use,
15164 hypervisor=self.op.hypervisor)
15165 for idx in range(self.op.count)]
15166 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15168 raise errors.ProgrammerError("Uncatched mode %s in"
15169 " LUTestAllocator.Exec", self.op.mode)
15171 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15172 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15173 result = ial.in_text
15175 ial.Run(self.op.allocator, validate=False)
15176 result = ial.out_text
15180 #: Query type implementations
15182 constants.QR_CLUSTER: _ClusterQuery,
15183 constants.QR_INSTANCE: _InstanceQuery,
15184 constants.QR_NODE: _NodeQuery,
15185 constants.QR_GROUP: _GroupQuery,
15186 constants.QR_OS: _OsQuery,
15187 constants.QR_EXPORT: _ExportQuery,
15190 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15193 def _GetQueryImplementation(name):
15194 """Returns the implemtnation for a query type.
15196 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15200 return _QUERY_IMPL[name]
15202 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15203 errors.ECODE_INVAL)