4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti.masterd import iallocator
66 import ganeti.masterd.instance # pylint: disable=W0611
70 INSTANCE_DOWN = [constants.ADMINST_DOWN]
71 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
72 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
74 #: Instance status in which an instance can be marked as offline/online
75 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
76 constants.ADMINST_OFFLINE,
81 """Data container for LU results with jobs.
83 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
84 by L{mcpu._ProcessResult}. The latter will then submit the jobs
85 contained in the C{jobs} attribute and include the job IDs in the opcode
89 def __init__(self, jobs, **kwargs):
90 """Initializes this class.
92 Additional return values can be specified as keyword arguments.
94 @type jobs: list of lists of L{opcode.OpCode}
95 @param jobs: A list of lists of opcode objects
102 class LogicalUnit(object):
103 """Logical Unit base class.
105 Subclasses must follow these rules:
106 - implement ExpandNames
107 - implement CheckPrereq (except when tasklets are used)
108 - implement Exec (except when tasklets are used)
109 - implement BuildHooksEnv
110 - implement BuildHooksNodes
111 - redefine HPATH and HTYPE
112 - optionally redefine their run requirements:
113 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
115 Note that all commands require root permissions.
117 @ivar dry_run_result: the value (if any) that will be returned to the caller
118 in dry-run mode (signalled by opcode dry_run parameter)
125 def __init__(self, processor, op, context, rpc_runner):
126 """Constructor for LogicalUnit.
128 This needs to be overridden in derived classes in order to check op
132 self.proc = processor
134 self.cfg = context.cfg
135 self.glm = context.glm
137 self.owned_locks = context.glm.list_owned
138 self.context = context
139 self.rpc = rpc_runner
140 # Dicts used to declare locking needs to mcpu
141 self.needed_locks = None
142 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
144 self.remove_locks = {}
145 # Used to force good behavior when calling helper functions
146 self.recalculate_locks = {}
148 self.Log = processor.Log # pylint: disable=C0103
149 self.LogWarning = processor.LogWarning # pylint: disable=C0103
150 self.LogInfo = processor.LogInfo # pylint: disable=C0103
151 self.LogStep = processor.LogStep # pylint: disable=C0103
152 # support for dry-run
153 self.dry_run_result = None
154 # support for generic debug attribute
155 if (not hasattr(self.op, "debug_level") or
156 not isinstance(self.op.debug_level, int)):
157 self.op.debug_level = 0
162 # Validate opcode parameters and set defaults
163 self.op.Validate(True)
165 self.CheckArguments()
167 def CheckArguments(self):
168 """Check syntactic validity for the opcode arguments.
170 This method is for doing a simple syntactic check and ensure
171 validity of opcode parameters, without any cluster-related
172 checks. While the same can be accomplished in ExpandNames and/or
173 CheckPrereq, doing these separate is better because:
175 - ExpandNames is left as as purely a lock-related function
176 - CheckPrereq is run after we have acquired locks (and possible
179 The function is allowed to change the self.op attribute so that
180 later methods can no longer worry about missing parameters.
185 def ExpandNames(self):
186 """Expand names for this LU.
188 This method is called before starting to execute the opcode, and it should
189 update all the parameters of the opcode to their canonical form (e.g. a
190 short node name must be fully expanded after this method has successfully
191 completed). This way locking, hooks, logging, etc. can work correctly.
193 LUs which implement this method must also populate the self.needed_locks
194 member, as a dict with lock levels as keys, and a list of needed lock names
197 - use an empty dict if you don't need any lock
198 - if you don't need any lock at a particular level omit that
199 level (note that in this case C{DeclareLocks} won't be called
200 at all for that level)
201 - if you need locks at a level, but you can't calculate it in
202 this function, initialise that level with an empty list and do
203 further processing in L{LogicalUnit.DeclareLocks} (see that
204 function's docstring)
205 - don't put anything for the BGL level
206 - if you want all locks at a level use L{locking.ALL_SET} as a value
208 If you need to share locks (rather than acquire them exclusively) at one
209 level you can modify self.share_locks, setting a true value (usually 1) for
210 that level. By default locks are not shared.
212 This function can also define a list of tasklets, which then will be
213 executed in order instead of the usual LU-level CheckPrereq and Exec
214 functions, if those are not defined by the LU.
218 # Acquire all nodes and one instance
219 self.needed_locks = {
220 locking.LEVEL_NODE: locking.ALL_SET,
221 locking.LEVEL_INSTANCE: ['instance1.example.com'],
223 # Acquire just two nodes
224 self.needed_locks = {
225 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
228 self.needed_locks = {} # No, you can't leave it to the default value None
231 # The implementation of this method is mandatory only if the new LU is
232 # concurrent, so that old LUs don't need to be changed all at the same
235 self.needed_locks = {} # Exclusive LUs don't need locks.
237 raise NotImplementedError
239 def DeclareLocks(self, level):
240 """Declare LU locking needs for a level
242 While most LUs can just declare their locking needs at ExpandNames time,
243 sometimes there's the need to calculate some locks after having acquired
244 the ones before. This function is called just before acquiring locks at a
245 particular level, but after acquiring the ones at lower levels, and permits
246 such calculations. It can be used to modify self.needed_locks, and by
247 default it does nothing.
249 This function is only called if you have something already set in
250 self.needed_locks for the level.
252 @param level: Locking level which is going to be locked
253 @type level: member of L{ganeti.locking.LEVELS}
257 def CheckPrereq(self):
258 """Check prerequisites for this LU.
260 This method should check that the prerequisites for the execution
261 of this LU are fulfilled. It can do internode communication, but
262 it should be idempotent - no cluster or system changes are
265 The method should raise errors.OpPrereqError in case something is
266 not fulfilled. Its return value is ignored.
268 This method should also update all the parameters of the opcode to
269 their canonical form if it hasn't been done by ExpandNames before.
272 if self.tasklets is not None:
273 for (idx, tl) in enumerate(self.tasklets):
274 logging.debug("Checking prerequisites for tasklet %s/%s",
275 idx + 1, len(self.tasklets))
280 def Exec(self, feedback_fn):
283 This method should implement the actual work. It should raise
284 errors.OpExecError for failures that are somewhat dealt with in
288 if self.tasklets is not None:
289 for (idx, tl) in enumerate(self.tasklets):
290 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
293 raise NotImplementedError
295 def BuildHooksEnv(self):
296 """Build hooks environment for this LU.
299 @return: Dictionary containing the environment that will be used for
300 running the hooks for this LU. The keys of the dict must not be prefixed
301 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
302 will extend the environment with additional variables. If no environment
303 should be defined, an empty dictionary should be returned (not C{None}).
304 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
308 raise NotImplementedError
310 def BuildHooksNodes(self):
311 """Build list of nodes to run LU's hooks.
313 @rtype: tuple; (list, list)
314 @return: Tuple containing a list of node names on which the hook
315 should run before the execution and a list of node names on which the
316 hook should run after the execution. No nodes should be returned as an
317 empty list (and not None).
318 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
322 raise NotImplementedError
324 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
325 """Notify the LU about the results of its hooks.
327 This method is called every time a hooks phase is executed, and notifies
328 the Logical Unit about the hooks' result. The LU can then use it to alter
329 its result based on the hooks. By default the method does nothing and the
330 previous result is passed back unchanged but any LU can define it if it
331 wants to use the local cluster hook-scripts somehow.
333 @param phase: one of L{constants.HOOKS_PHASE_POST} or
334 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
335 @param hook_results: the results of the multi-node hooks rpc call
336 @param feedback_fn: function used send feedback back to the caller
337 @param lu_result: the previous Exec result this LU had, or None
339 @return: the new Exec result, based on the previous result
343 # API must be kept, thus we ignore the unused argument and could
344 # be a function warnings
345 # pylint: disable=W0613,R0201
348 def _ExpandAndLockInstance(self):
349 """Helper function to expand and lock an instance.
351 Many LUs that work on an instance take its name in self.op.instance_name
352 and need to expand it and then declare the expanded name for locking. This
353 function does it, and then updates self.op.instance_name to the expanded
354 name. It also initializes needed_locks as a dict, if this hasn't been done
358 if self.needed_locks is None:
359 self.needed_locks = {}
361 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
362 "_ExpandAndLockInstance called with instance-level locks set"
363 self.op.instance_name = _ExpandInstanceName(self.cfg,
364 self.op.instance_name)
365 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
367 def _LockInstancesNodes(self, primary_only=False,
368 level=locking.LEVEL_NODE):
369 """Helper function to declare instances' nodes for locking.
371 This function should be called after locking one or more instances to lock
372 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
373 with all primary or secondary nodes for instances already locked and
374 present in self.needed_locks[locking.LEVEL_INSTANCE].
376 It should be called from DeclareLocks, and for safety only works if
377 self.recalculate_locks[locking.LEVEL_NODE] is set.
379 In the future it may grow parameters to just lock some instance's nodes, or
380 to just lock primaries or secondary nodes, if needed.
382 If should be called in DeclareLocks in a way similar to::
384 if level == locking.LEVEL_NODE:
385 self._LockInstancesNodes()
387 @type primary_only: boolean
388 @param primary_only: only lock primary nodes of locked instances
389 @param level: Which lock level to use for locking nodes
392 assert level in self.recalculate_locks, \
393 "_LockInstancesNodes helper function called with no nodes to recalculate"
395 # TODO: check if we're really been called with the instance locks held
397 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
398 # future we might want to have different behaviors depending on the value
399 # of self.recalculate_locks[locking.LEVEL_NODE]
401 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
402 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
403 wanted_nodes.append(instance.primary_node)
405 wanted_nodes.extend(instance.secondary_nodes)
407 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
408 self.needed_locks[level] = wanted_nodes
409 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
410 self.needed_locks[level].extend(wanted_nodes)
412 raise errors.ProgrammerError("Unknown recalculation mode")
414 del self.recalculate_locks[level]
417 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
418 """Simple LU which runs no hooks.
420 This LU is intended as a parent for other LogicalUnits which will
421 run no hooks, in order to reduce duplicate code.
427 def BuildHooksEnv(self):
428 """Empty BuildHooksEnv for NoHooksLu.
430 This just raises an error.
433 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
435 def BuildHooksNodes(self):
436 """Empty BuildHooksNodes for NoHooksLU.
439 raise AssertionError("BuildHooksNodes called for NoHooksLU")
443 """Tasklet base class.
445 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
446 they can mix legacy code with tasklets. Locking needs to be done in the LU,
447 tasklets know nothing about locks.
449 Subclasses must follow these rules:
450 - Implement CheckPrereq
454 def __init__(self, lu):
461 def CheckPrereq(self):
462 """Check prerequisites for this tasklets.
464 This method should check whether the prerequisites for the execution of
465 this tasklet are fulfilled. It can do internode communication, but it
466 should be idempotent - no cluster or system changes are allowed.
468 The method should raise errors.OpPrereqError in case something is not
469 fulfilled. Its return value is ignored.
471 This method should also update all parameters to their canonical form if it
472 hasn't been done before.
477 def Exec(self, feedback_fn):
478 """Execute the tasklet.
480 This method should implement the actual work. It should raise
481 errors.OpExecError for failures that are somewhat dealt with in code, or
485 raise NotImplementedError
489 """Base for query utility classes.
492 #: Attribute holding field definitions
498 def __init__(self, qfilter, fields, use_locking):
499 """Initializes this class.
502 self.use_locking = use_locking
504 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
505 namefield=self.SORT_FIELD)
506 self.requested_data = self.query.RequestedData()
507 self.names = self.query.RequestedNames()
509 # Sort only if no names were requested
510 self.sort_by_name = not self.names
512 self.do_locking = None
515 def _GetNames(self, lu, all_names, lock_level):
516 """Helper function to determine names asked for in the query.
520 names = lu.owned_locks(lock_level)
524 if self.wanted == locking.ALL_SET:
525 assert not self.names
526 # caller didn't specify names, so ordering is not important
527 return utils.NiceSort(names)
529 # caller specified names and we must keep the same order
531 assert not self.do_locking or lu.glm.is_owned(lock_level)
533 missing = set(self.wanted).difference(names)
535 raise errors.OpExecError("Some items were removed before retrieving"
536 " their data: %s" % missing)
538 # Return expanded names
541 def ExpandNames(self, lu):
542 """Expand names for this query.
544 See L{LogicalUnit.ExpandNames}.
547 raise NotImplementedError()
549 def DeclareLocks(self, lu, level):
550 """Declare locks for this query.
552 See L{LogicalUnit.DeclareLocks}.
555 raise NotImplementedError()
557 def _GetQueryData(self, lu):
558 """Collects all data for this query.
560 @return: Query data object
563 raise NotImplementedError()
565 def NewStyleQuery(self, lu):
566 """Collect data and execute query.
569 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
570 sort_by_name=self.sort_by_name)
572 def OldStyleQuery(self, lu):
573 """Collect data and execute query.
576 return self.query.OldStyleQuery(self._GetQueryData(lu),
577 sort_by_name=self.sort_by_name)
581 """Returns a dict declaring all lock levels shared.
584 return dict.fromkeys(locking.LEVELS, 1)
587 def _AnnotateDiskParams(instance, devs, cfg):
588 """Little helper wrapper to the rpc annotation method.
590 @param instance: The instance object
591 @type devs: List of L{objects.Disk}
592 @param devs: The root devices (not any of its children!)
593 @param cfg: The config object
594 @returns The annotated disk copies
595 @see L{rpc.AnnotateDiskParams}
598 return rpc.AnnotateDiskParams(instance.disk_template, devs,
599 cfg.GetInstanceDiskParams(instance))
602 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
604 """Checks if node groups for locked instances are still correct.
606 @type cfg: L{config.ConfigWriter}
607 @param cfg: Cluster configuration
608 @type instances: dict; string as key, L{objects.Instance} as value
609 @param instances: Dictionary, instance name as key, instance object as value
610 @type owned_groups: iterable of string
611 @param owned_groups: List of owned groups
612 @type owned_nodes: iterable of string
613 @param owned_nodes: List of owned nodes
614 @type cur_group_uuid: string or None
615 @param cur_group_uuid: Optional group UUID to check against instance's groups
618 for (name, inst) in instances.items():
619 assert owned_nodes.issuperset(inst.all_nodes), \
620 "Instance %s's nodes changed while we kept the lock" % name
622 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
624 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
625 "Instance %s has no node in group %s" % (name, cur_group_uuid)
628 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
630 """Checks if the owned node groups are still correct for an instance.
632 @type cfg: L{config.ConfigWriter}
633 @param cfg: The cluster configuration
634 @type instance_name: string
635 @param instance_name: Instance name
636 @type owned_groups: set or frozenset
637 @param owned_groups: List of currently owned node groups
638 @type primary_only: boolean
639 @param primary_only: Whether to check node groups for only the primary node
642 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
644 if not owned_groups.issuperset(inst_groups):
645 raise errors.OpPrereqError("Instance %s's node groups changed since"
646 " locks were acquired, current groups are"
647 " are '%s', owning groups '%s'; retry the"
650 utils.CommaJoin(inst_groups),
651 utils.CommaJoin(owned_groups)),
657 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
658 """Checks if the instances in a node group are still correct.
660 @type cfg: L{config.ConfigWriter}
661 @param cfg: The cluster configuration
662 @type group_uuid: string
663 @param group_uuid: Node group UUID
664 @type owned_instances: set or frozenset
665 @param owned_instances: List of currently owned instances
668 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
669 if owned_instances != wanted_instances:
670 raise errors.OpPrereqError("Instances in node group '%s' changed since"
671 " locks were acquired, wanted '%s', have '%s';"
672 " retry the operation" %
674 utils.CommaJoin(wanted_instances),
675 utils.CommaJoin(owned_instances)),
678 return wanted_instances
681 def _SupportsOob(cfg, node):
682 """Tells if node supports OOB.
684 @type cfg: L{config.ConfigWriter}
685 @param cfg: The cluster configuration
686 @type node: L{objects.Node}
687 @param node: The node
688 @return: The OOB script if supported or an empty string otherwise
691 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
694 def _CopyLockList(names):
695 """Makes a copy of a list of lock names.
697 Handles L{locking.ALL_SET} correctly.
700 if names == locking.ALL_SET:
701 return locking.ALL_SET
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
960 except Exception, err: # pylint: disable=W0703
961 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
964 def _CheckOutputFields(static, dynamic, selected):
965 """Checks whether all selected fields are valid.
967 @type static: L{utils.FieldSet}
968 @param static: static fields set
969 @type dynamic: L{utils.FieldSet}
970 @param dynamic: dynamic fields set
977 delta = f.NonMatching(selected)
979 raise errors.OpPrereqError("Unknown output fields selected: %s"
980 % ",".join(delta), errors.ECODE_INVAL)
983 def _CheckGlobalHvParams(params):
984 """Validates that given hypervisor params are not global ones.
986 This will ensure that instances don't get customised versions of
990 used_globals = constants.HVC_GLOBALS.intersection(params)
992 msg = ("The following hypervisor parameters are global and cannot"
993 " be customized at instance level, please modify them at"
994 " cluster level: %s" % utils.CommaJoin(used_globals))
995 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
998 def _CheckNodeOnline(lu, node, msg=None):
999 """Ensure that a given node is online.
1001 @param lu: the LU on behalf of which we make the check
1002 @param node: the node to check
1003 @param msg: if passed, should be a message to replace the default one
1004 @raise errors.OpPrereqError: if the node is offline
1008 msg = "Can't use offline node"
1009 if lu.cfg.GetNodeInfo(node).offline:
1010 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1013 def _CheckNodeNotDrained(lu, node):
1014 """Ensure that a given node is not drained.
1016 @param lu: the LU on behalf of which we make the check
1017 @param node: the node to check
1018 @raise errors.OpPrereqError: if the node is drained
1021 if lu.cfg.GetNodeInfo(node).drained:
1022 raise errors.OpPrereqError("Can't use drained node %s" % node,
1026 def _CheckNodeVmCapable(lu, node):
1027 """Ensure that a given node is vm capable.
1029 @param lu: the LU on behalf of which we make the check
1030 @param node: the node to check
1031 @raise errors.OpPrereqError: if the node is not vm capable
1034 if not lu.cfg.GetNodeInfo(node).vm_capable:
1035 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040 """Ensure that a node supports a given OS.
1042 @param lu: the LU on behalf of which we make the check
1043 @param node: the node to check
1044 @param os_name: the OS to query about
1045 @param force_variant: whether to ignore variant errors
1046 @raise errors.OpPrereqError: if the node is not supporting the OS
1049 result = lu.rpc.call_os_get(node, os_name)
1050 result.Raise("OS '%s' not in supported OS list for node %s" %
1052 prereq=True, ecode=errors.ECODE_INVAL)
1053 if not force_variant:
1054 _CheckOSVariant(result.payload, os_name)
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058 """Ensure that a node has the given secondary ip.
1060 @type lu: L{LogicalUnit}
1061 @param lu: the LU on behalf of which we make the check
1063 @param node: the node to check
1064 @type secondary_ip: string
1065 @param secondary_ip: the ip to check
1066 @type prereq: boolean
1067 @param prereq: whether to throw a prerequisite or an execute error
1068 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1072 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073 result.Raise("Failure checking secondary ip on node %s" % node,
1074 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075 if not result.payload:
1076 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077 " please fix and re-run this command" % secondary_ip)
1079 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1081 raise errors.OpExecError(msg)
1084 def _GetClusterDomainSecret():
1085 """Reads the cluster domain secret.
1088 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093 """Ensure that an instance is in one of the required states.
1095 @param lu: the LU on behalf of which we make the check
1096 @param instance: the instance to check
1097 @param msg: if passed, should be a message to replace the default one
1098 @raise errors.OpPrereqError: if the instance is not in the required state
1102 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103 if instance.admin_state not in req_states:
1104 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105 (instance.name, instance.admin_state, msg),
1108 if constants.ADMINST_UP not in req_states:
1109 pnode = instance.primary_node
1110 if not lu.cfg.GetNodeInfo(pnode).offline:
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1114 if instance.name in ins_l.payload:
1115 raise errors.OpPrereqError("Instance %s is running, %s" %
1116 (instance.name, msg), errors.ECODE_STATE)
1118 lu.LogWarning("Primary node offline, ignoring check that instance"
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123 """Computes if value is in the desired range.
1125 @param name: name of the parameter for which we perform the check
1126 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1128 @param ipolicy: dictionary containing min, max and std values
1129 @param value: actual value that we want to use
1130 @return: None or element not meeting the criteria
1134 if value in [None, constants.VALUE_AUTO]:
1136 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138 if value > max_v or min_v > value:
1140 fqn = "%s/%s" % (name, qualifier)
1143 return ("%s value %s is not in range [%s, %s]" %
1144 (fqn, value, min_v, max_v))
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149 nic_count, disk_sizes, spindle_use,
1150 _compute_fn=_ComputeMinMaxSpec):
1151 """Verifies ipolicy against provided specs.
1154 @param ipolicy: The ipolicy
1156 @param mem_size: The memory size
1157 @type cpu_count: int
1158 @param cpu_count: Used cpu cores
1159 @type disk_count: int
1160 @param disk_count: Number of disks used
1161 @type nic_count: int
1162 @param nic_count: Number of nics used
1163 @type disk_sizes: list of ints
1164 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165 @type spindle_use: int
1166 @param spindle_use: The number of spindles this instance uses
1167 @param _compute_fn: The compute function (unittest only)
1168 @return: A list of violations, or an empty list of no violations are found
1171 assert disk_count == len(disk_sizes)
1174 (constants.ISPEC_MEM_SIZE, "", mem_size),
1175 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176 (constants.ISPEC_DISK_COUNT, "", disk_count),
1177 (constants.ISPEC_NIC_COUNT, "", nic_count),
1178 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180 for idx, d in enumerate(disk_sizes)]
1183 (_compute_fn(name, qualifier, ipolicy, value)
1184 for (name, qualifier, value) in test_settings))
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188 _compute_fn=_ComputeIPolicySpecViolation):
1189 """Compute if instance meets the specs of ipolicy.
1192 @param ipolicy: The ipolicy to verify against
1193 @type instance: L{objects.Instance}
1194 @param instance: The instance to verify
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202 disk_count = len(instance.disks)
1203 disk_sizes = [disk.size for disk in instance.disks]
1204 nic_count = len(instance.nics)
1206 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207 disk_sizes, spindle_use)
1210 def _ComputeIPolicyInstanceSpecViolation(
1211 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1212 """Compute if instance specs meets the specs of ipolicy.
1215 @param ipolicy: The ipolicy to verify against
1216 @param instance_spec: dict
1217 @param instance_spec: The instance spec to verify
1218 @param _compute_fn: The function to verify ipolicy (unittest only)
1219 @see: L{_ComputeIPolicySpecViolation}
1222 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1229 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230 disk_sizes, spindle_use)
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1235 _compute_fn=_ComputeIPolicyInstanceViolation):
1236 """Compute if instance meets the specs of the new target group.
1238 @param ipolicy: The ipolicy to verify
1239 @param instance: The instance object to verify
1240 @param current_group: The current group of the instance
1241 @param target_group: The new group of the instance
1242 @param _compute_fn: The function to verify ipolicy (unittest only)
1243 @see: L{_ComputeIPolicySpecViolation}
1246 if current_group == target_group:
1249 return _compute_fn(ipolicy, instance)
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253 _compute_fn=_ComputeIPolicyNodeViolation):
1254 """Checks that the target node is correct in terms of instance policy.
1256 @param ipolicy: The ipolicy to verify
1257 @param instance: The instance object to verify
1258 @param node: The new node to relocate
1259 @param ignore: Ignore violations of the ipolicy
1260 @param _compute_fn: The function to verify ipolicy (unittest only)
1261 @see: L{_ComputeIPolicySpecViolation}
1264 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1268 msg = ("Instance does not meet target node group's (%s) instance"
1269 " policy: %s") % (node.group, utils.CommaJoin(res))
1273 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277 """Computes a set of any instances that would violate the new ipolicy.
1279 @param old_ipolicy: The current (still in-place) ipolicy
1280 @param new_ipolicy: The new (to become) ipolicy
1281 @param instances: List of instances to verify
1282 @return: A list of instances which violates the new ipolicy but
1286 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287 _ComputeViolatingInstances(old_ipolicy, instances))
1290 def _ExpandItemName(fn, name, kind):
1291 """Expand an item name.
1293 @param fn: the function to use for expansion
1294 @param name: requested item name
1295 @param kind: text description ('Node' or 'Instance')
1296 @return: the resolved (full) name
1297 @raise errors.OpPrereqError: if the item is not found
1300 full_name = fn(name)
1301 if full_name is None:
1302 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1307 def _ExpandNodeName(cfg, name):
1308 """Wrapper over L{_ExpandItemName} for nodes."""
1309 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1312 def _ExpandInstanceName(cfg, name):
1313 """Wrapper over L{_ExpandItemName} for instance."""
1314 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1317 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1318 minmem, maxmem, vcpus, nics, disk_template, disks,
1319 bep, hvp, hypervisor_name, tags):
1320 """Builds instance related env variables for hooks
1322 This builds the hook environment from individual variables.
1325 @param name: the name of the instance
1326 @type primary_node: string
1327 @param primary_node: the name of the instance's primary node
1328 @type secondary_nodes: list
1329 @param secondary_nodes: list of secondary nodes as strings
1330 @type os_type: string
1331 @param os_type: the name of the instance's OS
1332 @type status: string
1333 @param status: the desired status of the instance
1334 @type minmem: string
1335 @param minmem: the minimum memory size of the instance
1336 @type maxmem: string
1337 @param maxmem: the maximum memory size of the instance
1339 @param vcpus: the count of VCPUs the instance has
1341 @param nics: list of tuples (ip, mac, mode, link) representing
1342 the NICs the instance has
1343 @type disk_template: string
1344 @param disk_template: the disk template of the instance
1346 @param disks: the list of (size, mode) pairs
1348 @param bep: the backend parameters for the instance
1350 @param hvp: the hypervisor parameters for the instance
1351 @type hypervisor_name: string
1352 @param hypervisor_name: the hypervisor for the instance
1354 @param tags: list of instance tags as strings
1356 @return: the hook environment for this instance
1361 "INSTANCE_NAME": name,
1362 "INSTANCE_PRIMARY": primary_node,
1363 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1364 "INSTANCE_OS_TYPE": os_type,
1365 "INSTANCE_STATUS": status,
1366 "INSTANCE_MINMEM": minmem,
1367 "INSTANCE_MAXMEM": maxmem,
1368 # TODO(2.7) remove deprecated "memory" value
1369 "INSTANCE_MEMORY": maxmem,
1370 "INSTANCE_VCPUS": vcpus,
1371 "INSTANCE_DISK_TEMPLATE": disk_template,
1372 "INSTANCE_HYPERVISOR": hypervisor_name,
1375 nic_count = len(nics)
1376 for idx, (ip, mac, mode, link) in enumerate(nics):
1379 env["INSTANCE_NIC%d_IP" % idx] = ip
1380 env["INSTANCE_NIC%d_MAC" % idx] = mac
1381 env["INSTANCE_NIC%d_MODE" % idx] = mode
1382 env["INSTANCE_NIC%d_LINK" % idx] = link
1383 if mode == constants.NIC_MODE_BRIDGED:
1384 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1388 env["INSTANCE_NIC_COUNT"] = nic_count
1391 disk_count = len(disks)
1392 for idx, (size, mode) in enumerate(disks):
1393 env["INSTANCE_DISK%d_SIZE" % idx] = size
1394 env["INSTANCE_DISK%d_MODE" % idx] = mode
1398 env["INSTANCE_DISK_COUNT"] = disk_count
1403 env["INSTANCE_TAGS"] = " ".join(tags)
1405 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1406 for key, value in source.items():
1407 env["INSTANCE_%s_%s" % (kind, key)] = value
1412 def _NICListToTuple(lu, nics):
1413 """Build a list of nic information tuples.
1415 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1416 value in LUInstanceQueryData.
1418 @type lu: L{LogicalUnit}
1419 @param lu: the logical unit on whose behalf we execute
1420 @type nics: list of L{objects.NIC}
1421 @param nics: list of nics to convert to hooks tuples
1425 cluster = lu.cfg.GetClusterInfo()
1429 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1430 mode = filled_params[constants.NIC_MODE]
1431 link = filled_params[constants.NIC_LINK]
1432 hooks_nics.append((ip, mac, mode, link))
1436 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1437 """Builds instance related env variables for hooks from an object.
1439 @type lu: L{LogicalUnit}
1440 @param lu: the logical unit on whose behalf we execute
1441 @type instance: L{objects.Instance}
1442 @param instance: the instance for which we should build the
1444 @type override: dict
1445 @param override: dictionary with key/values that will override
1448 @return: the hook environment dictionary
1451 cluster = lu.cfg.GetClusterInfo()
1452 bep = cluster.FillBE(instance)
1453 hvp = cluster.FillHV(instance)
1455 "name": instance.name,
1456 "primary_node": instance.primary_node,
1457 "secondary_nodes": instance.secondary_nodes,
1458 "os_type": instance.os,
1459 "status": instance.admin_state,
1460 "maxmem": bep[constants.BE_MAXMEM],
1461 "minmem": bep[constants.BE_MINMEM],
1462 "vcpus": bep[constants.BE_VCPUS],
1463 "nics": _NICListToTuple(lu, instance.nics),
1464 "disk_template": instance.disk_template,
1465 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1468 "hypervisor_name": instance.hypervisor,
1469 "tags": instance.tags,
1472 args.update(override)
1473 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1476 def _AdjustCandidatePool(lu, exceptions):
1477 """Adjust the candidate pool after node operations.
1480 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1482 lu.LogInfo("Promoted nodes to master candidate role: %s",
1483 utils.CommaJoin(node.name for node in mod_list))
1484 for name in mod_list:
1485 lu.context.ReaddNode(name)
1486 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1488 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1492 def _DecideSelfPromotion(lu, exceptions=None):
1493 """Decide whether I should promote myself as a master candidate.
1496 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1497 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1498 # the new node will increase mc_max with one, so:
1499 mc_should = min(mc_should + 1, cp_size)
1500 return mc_now < mc_should
1503 def _ComputeViolatingInstances(ipolicy, instances):
1504 """Computes a set of instances who violates given ipolicy.
1506 @param ipolicy: The ipolicy to verify
1507 @type instances: object.Instance
1508 @param instances: List of instances to verify
1509 @return: A frozenset of instance names violating the ipolicy
1512 return frozenset([inst.name for inst in instances
1513 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1516 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1517 """Check that the brigdes needed by a list of nics exist.
1520 cluster = lu.cfg.GetClusterInfo()
1521 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1522 brlist = [params[constants.NIC_LINK] for params in paramslist
1523 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1525 result = lu.rpc.call_bridges_exist(target_node, brlist)
1526 result.Raise("Error checking bridges on destination node '%s'" %
1527 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1530 def _CheckInstanceBridgesExist(lu, instance, node=None):
1531 """Check that the brigdes needed by an instance exist.
1535 node = instance.primary_node
1536 _CheckNicsBridgesExist(lu, instance.nics, node)
1539 def _CheckOSVariant(os_obj, name):
1540 """Check whether an OS name conforms to the os variants specification.
1542 @type os_obj: L{objects.OS}
1543 @param os_obj: OS object to check
1545 @param name: OS name passed by the user, to check for validity
1548 variant = objects.OS.GetVariant(name)
1549 if not os_obj.supported_variants:
1551 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1552 " passed)" % (os_obj.name, variant),
1556 raise errors.OpPrereqError("OS name must include a variant",
1559 if variant not in os_obj.supported_variants:
1560 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1563 def _GetNodeInstancesInner(cfg, fn):
1564 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1567 def _GetNodeInstances(cfg, node_name):
1568 """Returns a list of all primary and secondary instances on a node.
1572 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1575 def _GetNodePrimaryInstances(cfg, node_name):
1576 """Returns primary instances on a node.
1579 return _GetNodeInstancesInner(cfg,
1580 lambda inst: node_name == inst.primary_node)
1583 def _GetNodeSecondaryInstances(cfg, node_name):
1584 """Returns secondary instances on a node.
1587 return _GetNodeInstancesInner(cfg,
1588 lambda inst: node_name in inst.secondary_nodes)
1591 def _GetStorageTypeArgs(cfg, storage_type):
1592 """Returns the arguments for a storage type.
1595 # Special case for file storage
1596 if storage_type == constants.ST_FILE:
1597 # storage.FileStorage wants a list of storage directories
1598 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1603 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1606 for dev in instance.disks:
1607 cfg.SetDiskID(dev, node_name)
1609 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1611 result.Raise("Failed to get disk status from node %s" % node_name,
1612 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1614 for idx, bdev_status in enumerate(result.payload):
1615 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1621 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1622 """Check the sanity of iallocator and node arguments and use the
1623 cluster-wide iallocator if appropriate.
1625 Check that at most one of (iallocator, node) is specified. If none is
1626 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1627 then the LU's opcode's iallocator slot is filled with the cluster-wide
1630 @type iallocator_slot: string
1631 @param iallocator_slot: the name of the opcode iallocator slot
1632 @type node_slot: string
1633 @param node_slot: the name of the opcode target node slot
1636 node = getattr(lu.op, node_slot, None)
1637 ialloc = getattr(lu.op, iallocator_slot, None)
1641 if node is not None and ialloc is not None:
1642 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1644 elif ((node is None and ialloc is None) or
1645 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1646 default_iallocator = lu.cfg.GetDefaultIAllocator()
1647 if default_iallocator:
1648 setattr(lu.op, iallocator_slot, default_iallocator)
1650 raise errors.OpPrereqError("No iallocator or node given and no"
1651 " cluster-wide default iallocator found;"
1652 " please specify either an iallocator or a"
1653 " node, or set a cluster-wide default"
1654 " iallocator", errors.ECODE_INVAL)
1657 def _GetDefaultIAllocator(cfg, ialloc):
1658 """Decides on which iallocator to use.
1660 @type cfg: L{config.ConfigWriter}
1661 @param cfg: Cluster configuration object
1662 @type ialloc: string or None
1663 @param ialloc: Iallocator specified in opcode
1665 @return: Iallocator name
1669 # Use default iallocator
1670 ialloc = cfg.GetDefaultIAllocator()
1673 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1674 " opcode nor as a cluster-wide default",
1680 def _CheckHostnameSane(lu, name):
1681 """Ensures that a given hostname resolves to a 'sane' name.
1683 The given name is required to be a prefix of the resolved hostname,
1684 to prevent accidental mismatches.
1686 @param lu: the logical unit on behalf of which we're checking
1687 @param name: the name we should resolve and check
1688 @return: the resolved hostname object
1691 hostname = netutils.GetHostname(name=name)
1692 if hostname.name != name:
1693 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1694 if not utils.MatchNameComponent(name, [hostname.name]):
1695 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1696 " same as given hostname '%s'") %
1697 (hostname.name, name), errors.ECODE_INVAL)
1701 class LUClusterPostInit(LogicalUnit):
1702 """Logical unit for running hooks after cluster initialization.
1705 HPATH = "cluster-init"
1706 HTYPE = constants.HTYPE_CLUSTER
1708 def BuildHooksEnv(self):
1713 "OP_TARGET": self.cfg.GetClusterName(),
1716 def BuildHooksNodes(self):
1717 """Build hooks nodes.
1720 return ([], [self.cfg.GetMasterNode()])
1722 def Exec(self, feedback_fn):
1729 class LUClusterDestroy(LogicalUnit):
1730 """Logical unit for destroying the cluster.
1733 HPATH = "cluster-destroy"
1734 HTYPE = constants.HTYPE_CLUSTER
1736 def BuildHooksEnv(self):
1741 "OP_TARGET": self.cfg.GetClusterName(),
1744 def BuildHooksNodes(self):
1745 """Build hooks nodes.
1750 def CheckPrereq(self):
1751 """Check prerequisites.
1753 This checks whether the cluster is empty.
1755 Any errors are signaled by raising errors.OpPrereqError.
1758 master = self.cfg.GetMasterNode()
1760 nodelist = self.cfg.GetNodeList()
1761 if len(nodelist) != 1 or nodelist[0] != master:
1762 raise errors.OpPrereqError("There are still %d node(s) in"
1763 " this cluster." % (len(nodelist) - 1),
1765 instancelist = self.cfg.GetInstanceList()
1767 raise errors.OpPrereqError("There are still %d instance(s) in"
1768 " this cluster." % len(instancelist),
1771 def Exec(self, feedback_fn):
1772 """Destroys the cluster.
1775 master_params = self.cfg.GetMasterNetworkParameters()
1777 # Run post hooks on master node before it's removed
1778 _RunPostHook(self, master_params.name)
1780 ems = self.cfg.GetUseExternalMipScript()
1781 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1784 self.LogWarning("Error disabling the master IP address: %s",
1787 return master_params.name
1790 def _VerifyCertificate(filename):
1791 """Verifies a certificate for L{LUClusterVerifyConfig}.
1793 @type filename: string
1794 @param filename: Path to PEM file
1798 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1799 utils.ReadFile(filename))
1800 except Exception, err: # pylint: disable=W0703
1801 return (LUClusterVerifyConfig.ETYPE_ERROR,
1802 "Failed to load X509 certificate %s: %s" % (filename, err))
1805 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1806 constants.SSL_CERT_EXPIRATION_ERROR)
1809 fnamemsg = "While verifying %s: %s" % (filename, msg)
1814 return (None, fnamemsg)
1815 elif errcode == utils.CERT_WARNING:
1816 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1817 elif errcode == utils.CERT_ERROR:
1818 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1820 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1823 def _GetAllHypervisorParameters(cluster, instances):
1824 """Compute the set of all hypervisor parameters.
1826 @type cluster: L{objects.Cluster}
1827 @param cluster: the cluster object
1828 @param instances: list of L{objects.Instance}
1829 @param instances: additional instances from which to obtain parameters
1830 @rtype: list of (origin, hypervisor, parameters)
1831 @return: a list with all parameters found, indicating the hypervisor they
1832 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1837 for hv_name in cluster.enabled_hypervisors:
1838 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1840 for os_name, os_hvp in cluster.os_hvp.items():
1841 for hv_name, hv_params in os_hvp.items():
1843 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1844 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1846 # TODO: collapse identical parameter values in a single one
1847 for instance in instances:
1848 if instance.hvparams:
1849 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1850 cluster.FillHV(instance)))
1855 class _VerifyErrors(object):
1856 """Mix-in for cluster/group verify LUs.
1858 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1859 self.op and self._feedback_fn to be available.)
1863 ETYPE_FIELD = "code"
1864 ETYPE_ERROR = "ERROR"
1865 ETYPE_WARNING = "WARNING"
1867 def _Error(self, ecode, item, msg, *args, **kwargs):
1868 """Format an error message.
1870 Based on the opcode's error_codes parameter, either format a
1871 parseable error code, or a simpler error string.
1873 This must be called only from Exec and functions called from Exec.
1876 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1877 itype, etxt, _ = ecode
1878 # first complete the msg
1881 # then format the whole message
1882 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1883 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1889 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1890 # and finally report it via the feedback_fn
1891 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1893 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1894 """Log an error message if the passed condition is True.
1898 or self.op.debug_simulate_errors) # pylint: disable=E1101
1900 # If the error code is in the list of ignored errors, demote the error to a
1902 (_, etxt, _) = ecode
1903 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1904 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1907 self._Error(ecode, *args, **kwargs)
1909 # do not mark the operation as failed for WARN cases only
1910 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1911 self.bad = self.bad or cond
1914 class LUClusterVerify(NoHooksLU):
1915 """Submits all jobs necessary to verify the cluster.
1920 def ExpandNames(self):
1921 self.needed_locks = {}
1923 def Exec(self, feedback_fn):
1926 if self.op.group_name:
1927 groups = [self.op.group_name]
1928 depends_fn = lambda: None
1930 groups = self.cfg.GetNodeGroupList()
1932 # Verify global configuration
1934 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1937 # Always depend on global verification
1938 depends_fn = lambda: [(-len(jobs), [])]
1941 [opcodes.OpClusterVerifyGroup(group_name=group,
1942 ignore_errors=self.op.ignore_errors,
1943 depends=depends_fn())]
1944 for group in groups)
1946 # Fix up all parameters
1947 for op in itertools.chain(*jobs): # pylint: disable=W0142
1948 op.debug_simulate_errors = self.op.debug_simulate_errors
1949 op.verbose = self.op.verbose
1950 op.error_codes = self.op.error_codes
1952 op.skip_checks = self.op.skip_checks
1953 except AttributeError:
1954 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1956 return ResultWithJobs(jobs)
1959 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1960 """Verifies the cluster config.
1965 def _VerifyHVP(self, hvp_data):
1966 """Verifies locally the syntax of the hypervisor parameters.
1969 for item, hv_name, hv_params in hvp_data:
1970 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1973 hv_class = hypervisor.GetHypervisor(hv_name)
1974 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1975 hv_class.CheckParameterSyntax(hv_params)
1976 except errors.GenericError, err:
1977 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1979 def ExpandNames(self):
1980 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1981 self.share_locks = _ShareAll()
1983 def CheckPrereq(self):
1984 """Check prerequisites.
1987 # Retrieve all information
1988 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1989 self.all_node_info = self.cfg.GetAllNodesInfo()
1990 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1992 def Exec(self, feedback_fn):
1993 """Verify integrity of cluster, performing various test on nodes.
1997 self._feedback_fn = feedback_fn
1999 feedback_fn("* Verifying cluster config")
2001 for msg in self.cfg.VerifyConfig():
2002 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2004 feedback_fn("* Verifying cluster certificate files")
2006 for cert_filename in pathutils.ALL_CERT_FILES:
2007 (errcode, msg) = _VerifyCertificate(cert_filename)
2008 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2010 feedback_fn("* Verifying hypervisor parameters")
2012 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2013 self.all_inst_info.values()))
2015 feedback_fn("* Verifying all nodes belong to an existing group")
2017 # We do this verification here because, should this bogus circumstance
2018 # occur, it would never be caught by VerifyGroup, which only acts on
2019 # nodes/instances reachable from existing node groups.
2021 dangling_nodes = set(node.name for node in self.all_node_info.values()
2022 if node.group not in self.all_group_info)
2024 dangling_instances = {}
2025 no_node_instances = []
2027 for inst in self.all_inst_info.values():
2028 if inst.primary_node in dangling_nodes:
2029 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2030 elif inst.primary_node not in self.all_node_info:
2031 no_node_instances.append(inst.name)
2036 utils.CommaJoin(dangling_instances.get(node.name,
2038 for node in dangling_nodes]
2040 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2042 "the following nodes (and their instances) belong to a non"
2043 " existing group: %s", utils.CommaJoin(pretty_dangling))
2045 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2047 "the following instances have a non-existing primary-node:"
2048 " %s", utils.CommaJoin(no_node_instances))
2053 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2054 """Verifies the status of a node group.
2057 HPATH = "cluster-verify"
2058 HTYPE = constants.HTYPE_CLUSTER
2061 _HOOKS_INDENT_RE = re.compile("^", re.M)
2063 class NodeImage(object):
2064 """A class representing the logical and physical status of a node.
2067 @ivar name: the node name to which this object refers
2068 @ivar volumes: a structure as returned from
2069 L{ganeti.backend.GetVolumeList} (runtime)
2070 @ivar instances: a list of running instances (runtime)
2071 @ivar pinst: list of configured primary instances (config)
2072 @ivar sinst: list of configured secondary instances (config)
2073 @ivar sbp: dictionary of {primary-node: list of instances} for all
2074 instances for which this node is secondary (config)
2075 @ivar mfree: free memory, as reported by hypervisor (runtime)
2076 @ivar dfree: free disk, as reported by the node (runtime)
2077 @ivar offline: the offline status (config)
2078 @type rpc_fail: boolean
2079 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2080 not whether the individual keys were correct) (runtime)
2081 @type lvm_fail: boolean
2082 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2083 @type hyp_fail: boolean
2084 @ivar hyp_fail: whether the RPC call didn't return the instance list
2085 @type ghost: boolean
2086 @ivar ghost: whether this is a known node or not (config)
2087 @type os_fail: boolean
2088 @ivar os_fail: whether the RPC call didn't return valid OS data
2090 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2091 @type vm_capable: boolean
2092 @ivar vm_capable: whether the node can host instances
2095 def __init__(self, offline=False, name=None, vm_capable=True):
2104 self.offline = offline
2105 self.vm_capable = vm_capable
2106 self.rpc_fail = False
2107 self.lvm_fail = False
2108 self.hyp_fail = False
2110 self.os_fail = False
2113 def ExpandNames(self):
2114 # This raises errors.OpPrereqError on its own:
2115 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2117 # Get instances in node group; this is unsafe and needs verification later
2119 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2121 self.needed_locks = {
2122 locking.LEVEL_INSTANCE: inst_names,
2123 locking.LEVEL_NODEGROUP: [self.group_uuid],
2124 locking.LEVEL_NODE: [],
2127 self.share_locks = _ShareAll()
2129 def DeclareLocks(self, level):
2130 if level == locking.LEVEL_NODE:
2131 # Get members of node group; this is unsafe and needs verification later
2132 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2134 all_inst_info = self.cfg.GetAllInstancesInfo()
2136 # In Exec(), we warn about mirrored instances that have primary and
2137 # secondary living in separate node groups. To fully verify that
2138 # volumes for these instances are healthy, we will need to do an
2139 # extra call to their secondaries. We ensure here those nodes will
2141 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2142 # Important: access only the instances whose lock is owned
2143 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2144 nodes.update(all_inst_info[inst].secondary_nodes)
2146 self.needed_locks[locking.LEVEL_NODE] = nodes
2148 def CheckPrereq(self):
2149 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2150 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2152 group_nodes = set(self.group_info.members)
2154 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2157 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2159 unlocked_instances = \
2160 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2163 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2164 utils.CommaJoin(unlocked_nodes),
2167 if unlocked_instances:
2168 raise errors.OpPrereqError("Missing lock for instances: %s" %
2169 utils.CommaJoin(unlocked_instances),
2172 self.all_node_info = self.cfg.GetAllNodesInfo()
2173 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2175 self.my_node_names = utils.NiceSort(group_nodes)
2176 self.my_inst_names = utils.NiceSort(group_instances)
2178 self.my_node_info = dict((name, self.all_node_info[name])
2179 for name in self.my_node_names)
2181 self.my_inst_info = dict((name, self.all_inst_info[name])
2182 for name in self.my_inst_names)
2184 # We detect here the nodes that will need the extra RPC calls for verifying
2185 # split LV volumes; they should be locked.
2186 extra_lv_nodes = set()
2188 for inst in self.my_inst_info.values():
2189 if inst.disk_template in constants.DTS_INT_MIRROR:
2190 for nname in inst.all_nodes:
2191 if self.all_node_info[nname].group != self.group_uuid:
2192 extra_lv_nodes.add(nname)
2194 unlocked_lv_nodes = \
2195 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2197 if unlocked_lv_nodes:
2198 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2199 utils.CommaJoin(unlocked_lv_nodes),
2201 self.extra_lv_nodes = list(extra_lv_nodes)
2203 def _VerifyNode(self, ninfo, nresult):
2204 """Perform some basic validation on data returned from a node.
2206 - check the result data structure is well formed and has all the
2208 - check ganeti version
2210 @type ninfo: L{objects.Node}
2211 @param ninfo: the node to check
2212 @param nresult: the results from the node
2214 @return: whether overall this call was successful (and we can expect
2215 reasonable values in the respose)
2219 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2221 # main result, nresult should be a non-empty dict
2222 test = not nresult or not isinstance(nresult, dict)
2223 _ErrorIf(test, constants.CV_ENODERPC, node,
2224 "unable to verify node: no data returned")
2228 # compares ganeti version
2229 local_version = constants.PROTOCOL_VERSION
2230 remote_version = nresult.get("version", None)
2231 test = not (remote_version and
2232 isinstance(remote_version, (list, tuple)) and
2233 len(remote_version) == 2)
2234 _ErrorIf(test, constants.CV_ENODERPC, node,
2235 "connection to node returned invalid data")
2239 test = local_version != remote_version[0]
2240 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2241 "incompatible protocol versions: master %s,"
2242 " node %s", local_version, remote_version[0])
2246 # node seems compatible, we can actually try to look into its results
2248 # full package version
2249 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2250 constants.CV_ENODEVERSION, node,
2251 "software version mismatch: master %s, node %s",
2252 constants.RELEASE_VERSION, remote_version[1],
2253 code=self.ETYPE_WARNING)
2255 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2256 if ninfo.vm_capable and isinstance(hyp_result, dict):
2257 for hv_name, hv_result in hyp_result.iteritems():
2258 test = hv_result is not None
2259 _ErrorIf(test, constants.CV_ENODEHV, node,
2260 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2262 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2263 if ninfo.vm_capable and isinstance(hvp_result, list):
2264 for item, hv_name, hv_result in hvp_result:
2265 _ErrorIf(True, constants.CV_ENODEHV, node,
2266 "hypervisor %s parameter verify failure (source %s): %s",
2267 hv_name, item, hv_result)
2269 test = nresult.get(constants.NV_NODESETUP,
2270 ["Missing NODESETUP results"])
2271 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2276 def _VerifyNodeTime(self, ninfo, nresult,
2277 nvinfo_starttime, nvinfo_endtime):
2278 """Check the node time.
2280 @type ninfo: L{objects.Node}
2281 @param ninfo: the node to check
2282 @param nresult: the remote results for the node
2283 @param nvinfo_starttime: the start time of the RPC call
2284 @param nvinfo_endtime: the end time of the RPC call
2288 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2290 ntime = nresult.get(constants.NV_TIME, None)
2292 ntime_merged = utils.MergeTime(ntime)
2293 except (ValueError, TypeError):
2294 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2297 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2298 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2299 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2300 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2304 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2305 "Node time diverges by at least %s from master node time",
2308 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2309 """Check the node LVM results.
2311 @type ninfo: L{objects.Node}
2312 @param ninfo: the node to check
2313 @param nresult: the remote results for the node
2314 @param vg_name: the configured VG name
2321 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2323 # checks vg existence and size > 20G
2324 vglist = nresult.get(constants.NV_VGLIST, None)
2326 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2328 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2329 constants.MIN_VG_SIZE)
2330 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2333 pvlist = nresult.get(constants.NV_PVLIST, None)
2334 test = pvlist is None
2335 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2337 # check that ':' is not present in PV names, since it's a
2338 # special character for lvcreate (denotes the range of PEs to
2340 for _, pvname, owner_vg in pvlist:
2341 test = ":" in pvname
2342 _ErrorIf(test, constants.CV_ENODELVM, node,
2343 "Invalid character ':' in PV '%s' of VG '%s'",
2346 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2347 """Check the node bridges.
2349 @type ninfo: L{objects.Node}
2350 @param ninfo: the node to check
2351 @param nresult: the remote results for the node
2352 @param bridges: the expected list of bridges
2359 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2361 missing = nresult.get(constants.NV_BRIDGES, None)
2362 test = not isinstance(missing, list)
2363 _ErrorIf(test, constants.CV_ENODENET, node,
2364 "did not return valid bridge information")
2366 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2367 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2369 def _VerifyNodeUserScripts(self, ninfo, nresult):
2370 """Check the results of user scripts presence and executability on the node
2372 @type ninfo: L{objects.Node}
2373 @param ninfo: the node to check
2374 @param nresult: the remote results for the node
2379 test = not constants.NV_USERSCRIPTS in nresult
2380 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2381 "did not return user scripts information")
2383 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2385 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2386 "user scripts not present or not executable: %s" %
2387 utils.CommaJoin(sorted(broken_scripts)))
2389 def _VerifyNodeNetwork(self, ninfo, nresult):
2390 """Check the node network connectivity results.
2392 @type ninfo: L{objects.Node}
2393 @param ninfo: the node to check
2394 @param nresult: the remote results for the node
2398 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2400 test = constants.NV_NODELIST not in nresult
2401 _ErrorIf(test, constants.CV_ENODESSH, node,
2402 "node hasn't returned node ssh connectivity data")
2404 if nresult[constants.NV_NODELIST]:
2405 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2406 _ErrorIf(True, constants.CV_ENODESSH, node,
2407 "ssh communication with node '%s': %s", a_node, a_msg)
2409 test = constants.NV_NODENETTEST not in nresult
2410 _ErrorIf(test, constants.CV_ENODENET, node,
2411 "node hasn't returned node tcp connectivity data")
2413 if nresult[constants.NV_NODENETTEST]:
2414 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2416 _ErrorIf(True, constants.CV_ENODENET, node,
2417 "tcp communication with node '%s': %s",
2418 anode, nresult[constants.NV_NODENETTEST][anode])
2420 test = constants.NV_MASTERIP not in nresult
2421 _ErrorIf(test, constants.CV_ENODENET, node,
2422 "node hasn't returned node master IP reachability data")
2424 if not nresult[constants.NV_MASTERIP]:
2425 if node == self.master_node:
2426 msg = "the master node cannot reach the master IP (not configured?)"
2428 msg = "cannot reach the master IP"
2429 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2431 def _VerifyInstance(self, instance, instanceconfig, node_image,
2433 """Verify an instance.
2435 This function checks to see if the required block devices are
2436 available on the instance's node.
2439 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2440 node_current = instanceconfig.primary_node
2442 node_vol_should = {}
2443 instanceconfig.MapLVsByNode(node_vol_should)
2445 cluster = self.cfg.GetClusterInfo()
2446 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2448 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2449 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2451 for node in node_vol_should:
2452 n_img = node_image[node]
2453 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2454 # ignore missing volumes on offline or broken nodes
2456 for volume in node_vol_should[node]:
2457 test = volume not in n_img.volumes
2458 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2459 "volume %s missing on node %s", volume, node)
2461 if instanceconfig.admin_state == constants.ADMINST_UP:
2462 pri_img = node_image[node_current]
2463 test = instance not in pri_img.instances and not pri_img.offline
2464 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2465 "instance not running on its primary node %s",
2468 diskdata = [(nname, success, status, idx)
2469 for (nname, disks) in diskstatus.items()
2470 for idx, (success, status) in enumerate(disks)]
2472 for nname, success, bdev_status, idx in diskdata:
2473 # the 'ghost node' construction in Exec() ensures that we have a
2475 snode = node_image[nname]
2476 bad_snode = snode.ghost or snode.offline
2477 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2478 not success and not bad_snode,
2479 constants.CV_EINSTANCEFAULTYDISK, instance,
2480 "couldn't retrieve status for disk/%s on %s: %s",
2481 idx, nname, bdev_status)
2482 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2483 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2484 constants.CV_EINSTANCEFAULTYDISK, instance,
2485 "disk/%s on %s is faulty", idx, nname)
2487 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2488 """Verify if there are any unknown volumes in the cluster.
2490 The .os, .swap and backup volumes are ignored. All other volumes are
2491 reported as unknown.
2493 @type reserved: L{ganeti.utils.FieldSet}
2494 @param reserved: a FieldSet of reserved volume names
2497 for node, n_img in node_image.items():
2498 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2499 self.all_node_info[node].group != self.group_uuid):
2500 # skip non-healthy nodes
2502 for volume in n_img.volumes:
2503 test = ((node not in node_vol_should or
2504 volume not in node_vol_should[node]) and
2505 not reserved.Matches(volume))
2506 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2507 "volume %s is unknown", volume)
2509 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2510 """Verify N+1 Memory Resilience.
2512 Check that if one single node dies we can still start all the
2513 instances it was primary for.
2516 cluster_info = self.cfg.GetClusterInfo()
2517 for node, n_img in node_image.items():
2518 # This code checks that every node which is now listed as
2519 # secondary has enough memory to host all instances it is
2520 # supposed to should a single other node in the cluster fail.
2521 # FIXME: not ready for failover to an arbitrary node
2522 # FIXME: does not support file-backed instances
2523 # WARNING: we currently take into account down instances as well
2524 # as up ones, considering that even if they're down someone
2525 # might want to start them even in the event of a node failure.
2526 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2527 # we're skipping nodes marked offline and nodes in other groups from
2528 # the N+1 warning, since most likely we don't have good memory
2529 # infromation from them; we already list instances living on such
2530 # nodes, and that's enough warning
2532 #TODO(dynmem): also consider ballooning out other instances
2533 for prinode, instances in n_img.sbp.items():
2535 for instance in instances:
2536 bep = cluster_info.FillBE(instance_cfg[instance])
2537 if bep[constants.BE_AUTO_BALANCE]:
2538 needed_mem += bep[constants.BE_MINMEM]
2539 test = n_img.mfree < needed_mem
2540 self._ErrorIf(test, constants.CV_ENODEN1, node,
2541 "not enough memory to accomodate instance failovers"
2542 " should node %s fail (%dMiB needed, %dMiB available)",
2543 prinode, needed_mem, n_img.mfree)
2546 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2547 (files_all, files_opt, files_mc, files_vm)):
2548 """Verifies file checksums collected from all nodes.
2550 @param errorif: Callback for reporting errors
2551 @param nodeinfo: List of L{objects.Node} objects
2552 @param master_node: Name of master node
2553 @param all_nvinfo: RPC results
2556 # Define functions determining which nodes to consider for a file
2559 (files_mc, lambda node: (node.master_candidate or
2560 node.name == master_node)),
2561 (files_vm, lambda node: node.vm_capable),
2564 # Build mapping from filename to list of nodes which should have the file
2566 for (files, fn) in files2nodefn:
2568 filenodes = nodeinfo
2570 filenodes = filter(fn, nodeinfo)
2571 nodefiles.update((filename,
2572 frozenset(map(operator.attrgetter("name"), filenodes)))
2573 for filename in files)
2575 assert set(nodefiles) == (files_all | files_mc | files_vm)
2577 fileinfo = dict((filename, {}) for filename in nodefiles)
2578 ignore_nodes = set()
2580 for node in nodeinfo:
2582 ignore_nodes.add(node.name)
2585 nresult = all_nvinfo[node.name]
2587 if nresult.fail_msg or not nresult.payload:
2590 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2591 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2592 for (key, value) in fingerprints.items())
2595 test = not (node_files and isinstance(node_files, dict))
2596 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2597 "Node did not return file checksum data")
2599 ignore_nodes.add(node.name)
2602 # Build per-checksum mapping from filename to nodes having it
2603 for (filename, checksum) in node_files.items():
2604 assert filename in nodefiles
2605 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2607 for (filename, checksums) in fileinfo.items():
2608 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2610 # Nodes having the file
2611 with_file = frozenset(node_name
2612 for nodes in fileinfo[filename].values()
2613 for node_name in nodes) - ignore_nodes
2615 expected_nodes = nodefiles[filename] - ignore_nodes
2617 # Nodes missing file
2618 missing_file = expected_nodes - with_file
2620 if filename in files_opt:
2622 errorif(missing_file and missing_file != expected_nodes,
2623 constants.CV_ECLUSTERFILECHECK, None,
2624 "File %s is optional, but it must exist on all or no"
2625 " nodes (not found on %s)",
2626 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2628 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2629 "File %s is missing from node(s) %s", filename,
2630 utils.CommaJoin(utils.NiceSort(missing_file)))
2632 # Warn if a node has a file it shouldn't
2633 unexpected = with_file - expected_nodes
2635 constants.CV_ECLUSTERFILECHECK, None,
2636 "File %s should not exist on node(s) %s",
2637 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2639 # See if there are multiple versions of the file
2640 test = len(checksums) > 1
2642 variants = ["variant %s on %s" %
2643 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2644 for (idx, (checksum, nodes)) in
2645 enumerate(sorted(checksums.items()))]
2649 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2650 "File %s found with %s different checksums (%s)",
2651 filename, len(checksums), "; ".join(variants))
2653 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2655 """Verifies and the node DRBD status.
2657 @type ninfo: L{objects.Node}
2658 @param ninfo: the node to check
2659 @param nresult: the remote results for the node
2660 @param instanceinfo: the dict of instances
2661 @param drbd_helper: the configured DRBD usermode helper
2662 @param drbd_map: the DRBD map as returned by
2663 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2667 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2670 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2671 test = (helper_result is None)
2672 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2673 "no drbd usermode helper returned")
2675 status, payload = helper_result
2677 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2678 "drbd usermode helper check unsuccessful: %s", payload)
2679 test = status and (payload != drbd_helper)
2680 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2681 "wrong drbd usermode helper: %s", payload)
2683 # compute the DRBD minors
2685 for minor, instance in drbd_map[node].items():
2686 test = instance not in instanceinfo
2687 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2688 "ghost instance '%s' in temporary DRBD map", instance)
2689 # ghost instance should not be running, but otherwise we
2690 # don't give double warnings (both ghost instance and
2691 # unallocated minor in use)
2693 node_drbd[minor] = (instance, False)
2695 instance = instanceinfo[instance]
2696 node_drbd[minor] = (instance.name,
2697 instance.admin_state == constants.ADMINST_UP)
2699 # and now check them
2700 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2701 test = not isinstance(used_minors, (tuple, list))
2702 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2703 "cannot parse drbd status file: %s", str(used_minors))
2705 # we cannot check drbd status
2708 for minor, (iname, must_exist) in node_drbd.items():
2709 test = minor not in used_minors and must_exist
2710 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2711 "drbd minor %d of instance %s is not active", minor, iname)
2712 for minor in used_minors:
2713 test = minor not in node_drbd
2714 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2715 "unallocated drbd minor %d is in use", minor)
2717 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2718 """Builds the node OS structures.
2720 @type ninfo: L{objects.Node}
2721 @param ninfo: the node to check
2722 @param nresult: the remote results for the node
2723 @param nimg: the node image object
2727 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2729 remote_os = nresult.get(constants.NV_OSLIST, None)
2730 test = (not isinstance(remote_os, list) or
2731 not compat.all(isinstance(v, list) and len(v) == 7
2732 for v in remote_os))
2734 _ErrorIf(test, constants.CV_ENODEOS, node,
2735 "node hasn't returned valid OS data")
2744 for (name, os_path, status, diagnose,
2745 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2747 if name not in os_dict:
2750 # parameters is a list of lists instead of list of tuples due to
2751 # JSON lacking a real tuple type, fix it:
2752 parameters = [tuple(v) for v in parameters]
2753 os_dict[name].append((os_path, status, diagnose,
2754 set(variants), set(parameters), set(api_ver)))
2756 nimg.oslist = os_dict
2758 def _VerifyNodeOS(self, ninfo, nimg, base):
2759 """Verifies the node OS list.
2761 @type ninfo: L{objects.Node}
2762 @param ninfo: the node to check
2763 @param nimg: the node image object
2764 @param base: the 'template' node we match against (e.g. from the master)
2768 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2770 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2772 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2773 for os_name, os_data in nimg.oslist.items():
2774 assert os_data, "Empty OS status for OS %s?!" % os_name
2775 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2776 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2777 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2778 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2779 "OS '%s' has multiple entries (first one shadows the rest): %s",
2780 os_name, utils.CommaJoin([v[0] for v in os_data]))
2781 # comparisons with the 'base' image
2782 test = os_name not in base.oslist
2783 _ErrorIf(test, constants.CV_ENODEOS, node,
2784 "Extra OS %s not present on reference node (%s)",
2788 assert base.oslist[os_name], "Base node has empty OS status?"
2789 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2791 # base OS is invalid, skipping
2793 for kind, a, b in [("API version", f_api, b_api),
2794 ("variants list", f_var, b_var),
2795 ("parameters", beautify_params(f_param),
2796 beautify_params(b_param))]:
2797 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2798 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2799 kind, os_name, base.name,
2800 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2802 # check any missing OSes
2803 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2804 _ErrorIf(missing, constants.CV_ENODEOS, node,
2805 "OSes present on reference node %s but missing on this node: %s",
2806 base.name, utils.CommaJoin(missing))
2808 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2809 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2811 @type ninfo: L{objects.Node}
2812 @param ninfo: the node to check
2813 @param nresult: the remote results for the node
2814 @type is_master: bool
2815 @param is_master: Whether node is the master node
2821 (constants.ENABLE_FILE_STORAGE or
2822 constants.ENABLE_SHARED_FILE_STORAGE)):
2824 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2826 # This should never happen
2827 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2828 "Node did not return forbidden file storage paths")
2830 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2831 "Found forbidden file storage paths: %s",
2832 utils.CommaJoin(fspaths))
2834 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2835 constants.CV_ENODEFILESTORAGEPATHS, node,
2836 "Node should not have returned forbidden file storage"
2839 def _VerifyOob(self, ninfo, nresult):
2840 """Verifies out of band functionality of a node.
2842 @type ninfo: L{objects.Node}
2843 @param ninfo: the node to check
2844 @param nresult: the remote results for the node
2848 # We just have to verify the paths on master and/or master candidates
2849 # as the oob helper is invoked on the master
2850 if ((ninfo.master_candidate or ninfo.master_capable) and
2851 constants.NV_OOB_PATHS in nresult):
2852 for path_result in nresult[constants.NV_OOB_PATHS]:
2853 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2855 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2856 """Verifies and updates the node volume data.
2858 This function will update a L{NodeImage}'s internal structures
2859 with data from the remote call.
2861 @type ninfo: L{objects.Node}
2862 @param ninfo: the node to check
2863 @param nresult: the remote results for the node
2864 @param nimg: the node image object
2865 @param vg_name: the configured VG name
2869 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2871 nimg.lvm_fail = True
2872 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2875 elif isinstance(lvdata, basestring):
2876 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2877 utils.SafeEncode(lvdata))
2878 elif not isinstance(lvdata, dict):
2879 _ErrorIf(True, constants.CV_ENODELVM, node,
2880 "rpc call to node failed (lvlist)")
2882 nimg.volumes = lvdata
2883 nimg.lvm_fail = False
2885 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2886 """Verifies and updates the node instance list.
2888 If the listing was successful, then updates this node's instance
2889 list. Otherwise, it marks the RPC call as failed for the instance
2892 @type ninfo: L{objects.Node}
2893 @param ninfo: the node to check
2894 @param nresult: the remote results for the node
2895 @param nimg: the node image object
2898 idata = nresult.get(constants.NV_INSTANCELIST, None)
2899 test = not isinstance(idata, list)
2900 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2901 "rpc call to node failed (instancelist): %s",
2902 utils.SafeEncode(str(idata)))
2904 nimg.hyp_fail = True
2906 nimg.instances = idata
2908 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2909 """Verifies and computes a node information map
2911 @type ninfo: L{objects.Node}
2912 @param ninfo: the node to check
2913 @param nresult: the remote results for the node
2914 @param nimg: the node image object
2915 @param vg_name: the configured VG name
2919 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2921 # try to read free memory (from the hypervisor)
2922 hv_info = nresult.get(constants.NV_HVINFO, None)
2923 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2924 _ErrorIf(test, constants.CV_ENODEHV, node,
2925 "rpc call to node failed (hvinfo)")
2928 nimg.mfree = int(hv_info["memory_free"])
2929 except (ValueError, TypeError):
2930 _ErrorIf(True, constants.CV_ENODERPC, node,
2931 "node returned invalid nodeinfo, check hypervisor")
2933 # FIXME: devise a free space model for file based instances as well
2934 if vg_name is not None:
2935 test = (constants.NV_VGLIST not in nresult or
2936 vg_name not in nresult[constants.NV_VGLIST])
2937 _ErrorIf(test, constants.CV_ENODELVM, node,
2938 "node didn't return data for the volume group '%s'"
2939 " - it is either missing or broken", vg_name)
2942 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2943 except (ValueError, TypeError):
2944 _ErrorIf(True, constants.CV_ENODERPC, node,
2945 "node returned invalid LVM info, check LVM status")
2947 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2948 """Gets per-disk status information for all instances.
2950 @type nodelist: list of strings
2951 @param nodelist: Node names
2952 @type node_image: dict of (name, L{objects.Node})
2953 @param node_image: Node objects
2954 @type instanceinfo: dict of (name, L{objects.Instance})
2955 @param instanceinfo: Instance objects
2956 @rtype: {instance: {node: [(succes, payload)]}}
2957 @return: a dictionary of per-instance dictionaries with nodes as
2958 keys and disk information as values; the disk information is a
2959 list of tuples (success, payload)
2962 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2965 node_disks_devonly = {}
2966 diskless_instances = set()
2967 diskless = constants.DT_DISKLESS
2969 for nname in nodelist:
2970 node_instances = list(itertools.chain(node_image[nname].pinst,
2971 node_image[nname].sinst))
2972 diskless_instances.update(inst for inst in node_instances
2973 if instanceinfo[inst].disk_template == diskless)
2974 disks = [(inst, disk)
2975 for inst in node_instances
2976 for disk in instanceinfo[inst].disks]
2979 # No need to collect data
2982 node_disks[nname] = disks
2984 # _AnnotateDiskParams makes already copies of the disks
2986 for (inst, dev) in disks:
2987 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2988 self.cfg.SetDiskID(anno_disk, nname)
2989 devonly.append(anno_disk)
2991 node_disks_devonly[nname] = devonly
2993 assert len(node_disks) == len(node_disks_devonly)
2995 # Collect data from all nodes with disks
2996 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2999 assert len(result) == len(node_disks)
3003 for (nname, nres) in result.items():
3004 disks = node_disks[nname]
3007 # No data from this node
3008 data = len(disks) * [(False, "node offline")]
3011 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3012 "while getting disk information: %s", msg)
3014 # No data from this node
3015 data = len(disks) * [(False, msg)]
3018 for idx, i in enumerate(nres.payload):
3019 if isinstance(i, (tuple, list)) and len(i) == 2:
3022 logging.warning("Invalid result from node %s, entry %d: %s",
3024 data.append((False, "Invalid result from the remote node"))
3026 for ((inst, _), status) in zip(disks, data):
3027 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3029 # Add empty entries for diskless instances.
3030 for inst in diskless_instances:
3031 assert inst not in instdisk
3034 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3035 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3036 compat.all(isinstance(s, (tuple, list)) and
3037 len(s) == 2 for s in statuses)
3038 for inst, nnames in instdisk.items()
3039 for nname, statuses in nnames.items())
3040 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3045 def _SshNodeSelector(group_uuid, all_nodes):
3046 """Create endless iterators for all potential SSH check hosts.
3049 nodes = [node for node in all_nodes
3050 if (node.group != group_uuid and
3052 keyfunc = operator.attrgetter("group")
3054 return map(itertools.cycle,
3055 [sorted(map(operator.attrgetter("name"), names))
3056 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3060 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3061 """Choose which nodes should talk to which other nodes.
3063 We will make nodes contact all nodes in their group, and one node from
3066 @warning: This algorithm has a known issue if one node group is much
3067 smaller than others (e.g. just one node). In such a case all other
3068 nodes will talk to the single node.
3071 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3072 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3074 return (online_nodes,
3075 dict((name, sorted([i.next() for i in sel]))
3076 for name in online_nodes))
3078 def BuildHooksEnv(self):
3081 Cluster-Verify hooks just ran in the post phase and their failure makes
3082 the output be logged in the verify output and the verification to fail.
3086 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3089 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3090 for node in self.my_node_info.values())
3094 def BuildHooksNodes(self):
3095 """Build hooks nodes.
3098 return ([], self.my_node_names)
3100 def Exec(self, feedback_fn):
3101 """Verify integrity of the node group, performing various test on nodes.
3104 # This method has too many local variables. pylint: disable=R0914
3105 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3107 if not self.my_node_names:
3109 feedback_fn("* Empty node group, skipping verification")
3113 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3114 verbose = self.op.verbose
3115 self._feedback_fn = feedback_fn
3117 vg_name = self.cfg.GetVGName()
3118 drbd_helper = self.cfg.GetDRBDHelper()
3119 cluster = self.cfg.GetClusterInfo()
3120 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3121 hypervisors = cluster.enabled_hypervisors
3122 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3124 i_non_redundant = [] # Non redundant instances
3125 i_non_a_balanced = [] # Non auto-balanced instances
3126 i_offline = 0 # Count of offline instances
3127 n_offline = 0 # Count of offline nodes
3128 n_drained = 0 # Count of nodes being drained
3129 node_vol_should = {}
3131 # FIXME: verify OS list
3134 filemap = _ComputeAncillaryFiles(cluster, False)
3136 # do local checksums
3137 master_node = self.master_node = self.cfg.GetMasterNode()
3138 master_ip = self.cfg.GetMasterIP()
3140 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3143 if self.cfg.GetUseExternalMipScript():
3144 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3146 node_verify_param = {
3147 constants.NV_FILELIST:
3148 map(vcluster.MakeVirtualPath,
3149 utils.UniqueSequence(filename
3150 for files in filemap
3151 for filename in files)),
3152 constants.NV_NODELIST:
3153 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3154 self.all_node_info.values()),
3155 constants.NV_HYPERVISOR: hypervisors,
3156 constants.NV_HVPARAMS:
3157 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3158 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3159 for node in node_data_list
3160 if not node.offline],
3161 constants.NV_INSTANCELIST: hypervisors,
3162 constants.NV_VERSION: None,
3163 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3164 constants.NV_NODESETUP: None,
3165 constants.NV_TIME: None,
3166 constants.NV_MASTERIP: (master_node, master_ip),
3167 constants.NV_OSLIST: None,
3168 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3169 constants.NV_USERSCRIPTS: user_scripts,
3172 if vg_name is not None:
3173 node_verify_param[constants.NV_VGLIST] = None
3174 node_verify_param[constants.NV_LVLIST] = vg_name
3175 node_verify_param[constants.NV_PVLIST] = [vg_name]
3178 node_verify_param[constants.NV_DRBDLIST] = None
3179 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3181 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3182 # Load file storage paths only from master node
3183 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3186 # FIXME: this needs to be changed per node-group, not cluster-wide
3188 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3189 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3190 bridges.add(default_nicpp[constants.NIC_LINK])
3191 for instance in self.my_inst_info.values():
3192 for nic in instance.nics:
3193 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3194 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3195 bridges.add(full_nic[constants.NIC_LINK])
3198 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3200 # Build our expected cluster state
3201 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3203 vm_capable=node.vm_capable))
3204 for node in node_data_list)
3208 for node in self.all_node_info.values():
3209 path = _SupportsOob(self.cfg, node)
3210 if path and path not in oob_paths:
3211 oob_paths.append(path)
3214 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3216 for instance in self.my_inst_names:
3217 inst_config = self.my_inst_info[instance]
3218 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3221 for nname in inst_config.all_nodes:
3222 if nname not in node_image:
3223 gnode = self.NodeImage(name=nname)
3224 gnode.ghost = (nname not in self.all_node_info)
3225 node_image[nname] = gnode
3227 inst_config.MapLVsByNode(node_vol_should)
3229 pnode = inst_config.primary_node
3230 node_image[pnode].pinst.append(instance)
3232 for snode in inst_config.secondary_nodes:
3233 nimg = node_image[snode]
3234 nimg.sinst.append(instance)
3235 if pnode not in nimg.sbp:
3236 nimg.sbp[pnode] = []
3237 nimg.sbp[pnode].append(instance)
3239 # At this point, we have the in-memory data structures complete,
3240 # except for the runtime information, which we'll gather next
3242 # Due to the way our RPC system works, exact response times cannot be
3243 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3244 # time before and after executing the request, we can at least have a time
3246 nvinfo_starttime = time.time()
3247 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3249 self.cfg.GetClusterName())
3250 nvinfo_endtime = time.time()
3252 if self.extra_lv_nodes and vg_name is not None:
3254 self.rpc.call_node_verify(self.extra_lv_nodes,
3255 {constants.NV_LVLIST: vg_name},
3256 self.cfg.GetClusterName())
3258 extra_lv_nvinfo = {}
3260 all_drbd_map = self.cfg.ComputeDRBDMap()
3262 feedback_fn("* Gathering disk information (%s nodes)" %
3263 len(self.my_node_names))
3264 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3267 feedback_fn("* Verifying configuration file consistency")
3269 # If not all nodes are being checked, we need to make sure the master node
3270 # and a non-checked vm_capable node are in the list.
3271 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3273 vf_nvinfo = all_nvinfo.copy()
3274 vf_node_info = list(self.my_node_info.values())
3275 additional_nodes = []
3276 if master_node not in self.my_node_info:
3277 additional_nodes.append(master_node)
3278 vf_node_info.append(self.all_node_info[master_node])
3279 # Add the first vm_capable node we find which is not included,
3280 # excluding the master node (which we already have)
3281 for node in absent_nodes:
3282 nodeinfo = self.all_node_info[node]
3283 if (nodeinfo.vm_capable and not nodeinfo.offline and
3284 node != master_node):
3285 additional_nodes.append(node)
3286 vf_node_info.append(self.all_node_info[node])
3288 key = constants.NV_FILELIST
3289 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3290 {key: node_verify_param[key]},
3291 self.cfg.GetClusterName()))
3293 vf_nvinfo = all_nvinfo
3294 vf_node_info = self.my_node_info.values()
3296 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3298 feedback_fn("* Verifying node status")
3302 for node_i in node_data_list:
3304 nimg = node_image[node]
3308 feedback_fn("* Skipping offline node %s" % (node,))
3312 if node == master_node:
3314 elif node_i.master_candidate:
3315 ntype = "master candidate"
3316 elif node_i.drained:
3322 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3324 msg = all_nvinfo[node].fail_msg
3325 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3328 nimg.rpc_fail = True
3331 nresult = all_nvinfo[node].payload
3333 nimg.call_ok = self._VerifyNode(node_i, nresult)
3334 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3335 self._VerifyNodeNetwork(node_i, nresult)
3336 self._VerifyNodeUserScripts(node_i, nresult)
3337 self._VerifyOob(node_i, nresult)
3338 self._VerifyFileStoragePaths(node_i, nresult,
3339 node == master_node)
3342 self._VerifyNodeLVM(node_i, nresult, vg_name)
3343 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3346 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3347 self._UpdateNodeInstances(node_i, nresult, nimg)
3348 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3349 self._UpdateNodeOS(node_i, nresult, nimg)
3351 if not nimg.os_fail:
3352 if refos_img is None:
3354 self._VerifyNodeOS(node_i, nimg, refos_img)
3355 self._VerifyNodeBridges(node_i, nresult, bridges)
3357 # Check whether all running instancies are primary for the node. (This
3358 # can no longer be done from _VerifyInstance below, since some of the
3359 # wrong instances could be from other node groups.)
3360 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3362 for inst in non_primary_inst:
3363 test = inst in self.all_inst_info
3364 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3365 "instance should not run on node %s", node_i.name)
3366 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3367 "node is running unknown instance %s", inst)
3369 for node, result in extra_lv_nvinfo.items():
3370 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3371 node_image[node], vg_name)
3373 feedback_fn("* Verifying instance status")
3374 for instance in self.my_inst_names:
3376 feedback_fn("* Verifying instance %s" % instance)
3377 inst_config = self.my_inst_info[instance]
3378 self._VerifyInstance(instance, inst_config, node_image,
3380 inst_nodes_offline = []
3382 pnode = inst_config.primary_node
3383 pnode_img = node_image[pnode]
3384 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3385 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3386 " primary node failed", instance)
3388 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3390 constants.CV_EINSTANCEBADNODE, instance,
3391 "instance is marked as running and lives on offline node %s",
3392 inst_config.primary_node)
3394 # If the instance is non-redundant we cannot survive losing its primary
3395 # node, so we are not N+1 compliant.
3396 if inst_config.disk_template not in constants.DTS_MIRRORED:
3397 i_non_redundant.append(instance)
3399 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3400 constants.CV_EINSTANCELAYOUT,
3401 instance, "instance has multiple secondary nodes: %s",
3402 utils.CommaJoin(inst_config.secondary_nodes),
3403 code=self.ETYPE_WARNING)
3405 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3406 pnode = inst_config.primary_node
3407 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3408 instance_groups = {}
3410 for node in instance_nodes:
3411 instance_groups.setdefault(self.all_node_info[node].group,
3415 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3416 # Sort so that we always list the primary node first.
3417 for group, nodes in sorted(instance_groups.items(),
3418 key=lambda (_, nodes): pnode in nodes,
3421 self._ErrorIf(len(instance_groups) > 1,
3422 constants.CV_EINSTANCESPLITGROUPS,
3423 instance, "instance has primary and secondary nodes in"
3424 " different groups: %s", utils.CommaJoin(pretty_list),
3425 code=self.ETYPE_WARNING)
3427 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3428 i_non_a_balanced.append(instance)
3430 for snode in inst_config.secondary_nodes:
3431 s_img = node_image[snode]
3432 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3433 snode, "instance %s, connection to secondary node failed",
3437 inst_nodes_offline.append(snode)
3439 # warn that the instance lives on offline nodes
3440 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3441 "instance has offline secondary node(s) %s",
3442 utils.CommaJoin(inst_nodes_offline))
3443 # ... or ghost/non-vm_capable nodes
3444 for node in inst_config.all_nodes:
3445 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3446 instance, "instance lives on ghost node %s", node)
3447 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3448 instance, "instance lives on non-vm_capable node %s", node)
3450 feedback_fn("* Verifying orphan volumes")
3451 reserved = utils.FieldSet(*cluster.reserved_lvs)
3453 # We will get spurious "unknown volume" warnings if any node of this group
3454 # is secondary for an instance whose primary is in another group. To avoid
3455 # them, we find these instances and add their volumes to node_vol_should.
3456 for inst in self.all_inst_info.values():
3457 for secondary in inst.secondary_nodes:
3458 if (secondary in self.my_node_info
3459 and inst.name not in self.my_inst_info):
3460 inst.MapLVsByNode(node_vol_should)
3463 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3465 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3466 feedback_fn("* Verifying N+1 Memory redundancy")
3467 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3469 feedback_fn("* Other Notes")
3471 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3472 % len(i_non_redundant))
3474 if i_non_a_balanced:
3475 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3476 % len(i_non_a_balanced))
3479 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3482 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3485 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3489 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3490 """Analyze the post-hooks' result
3492 This method analyses the hook result, handles it, and sends some
3493 nicely-formatted feedback back to the user.
3495 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3496 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3497 @param hooks_results: the results of the multi-node hooks rpc call
3498 @param feedback_fn: function used send feedback back to the caller
3499 @param lu_result: previous Exec result
3500 @return: the new Exec result, based on the previous result
3504 # We only really run POST phase hooks, only for non-empty groups,
3505 # and are only interested in their results
3506 if not self.my_node_names:
3509 elif phase == constants.HOOKS_PHASE_POST:
3510 # Used to change hooks' output to proper indentation
3511 feedback_fn("* Hooks Results")
3512 assert hooks_results, "invalid result from hooks"
3514 for node_name in hooks_results:
3515 res = hooks_results[node_name]
3517 test = msg and not res.offline
3518 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3519 "Communication failure in hooks execution: %s", msg)
3520 if res.offline or msg:
3521 # No need to investigate payload if node is offline or gave
3524 for script, hkr, output in res.payload:
3525 test = hkr == constants.HKR_FAIL
3526 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3527 "Script %s failed, output:", script)
3529 output = self._HOOKS_INDENT_RE.sub(" ", output)
3530 feedback_fn("%s" % output)
3536 class LUClusterVerifyDisks(NoHooksLU):
3537 """Verifies the cluster disks status.
3542 def ExpandNames(self):
3543 self.share_locks = _ShareAll()
3544 self.needed_locks = {
3545 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3548 def Exec(self, feedback_fn):
3549 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3551 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3552 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3553 for group in group_names])
3556 class LUGroupVerifyDisks(NoHooksLU):
3557 """Verifies the status of all disks in a node group.
3562 def ExpandNames(self):
3563 # Raises errors.OpPrereqError on its own if group can't be found
3564 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3566 self.share_locks = _ShareAll()
3567 self.needed_locks = {
3568 locking.LEVEL_INSTANCE: [],
3569 locking.LEVEL_NODEGROUP: [],
3570 locking.LEVEL_NODE: [],
3573 def DeclareLocks(self, level):
3574 if level == locking.LEVEL_INSTANCE:
3575 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3577 # Lock instances optimistically, needs verification once node and group
3578 # locks have been acquired
3579 self.needed_locks[locking.LEVEL_INSTANCE] = \
3580 self.cfg.GetNodeGroupInstances(self.group_uuid)
3582 elif level == locking.LEVEL_NODEGROUP:
3583 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3585 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3586 set([self.group_uuid] +
3587 # Lock all groups used by instances optimistically; this requires
3588 # going via the node before it's locked, requiring verification
3591 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3592 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3594 elif level == locking.LEVEL_NODE:
3595 # This will only lock the nodes in the group to be verified which contain
3597 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3598 self._LockInstancesNodes()
3600 # Lock all nodes in group to be verified
3601 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3602 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3603 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3605 def CheckPrereq(self):
3606 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3607 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3608 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3610 assert self.group_uuid in owned_groups
3612 # Check if locked instances are still correct
3613 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3615 # Get instance information
3616 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3618 # Check if node groups for locked instances are still correct
3619 _CheckInstancesNodeGroups(self.cfg, self.instances,
3620 owned_groups, owned_nodes, self.group_uuid)
3622 def Exec(self, feedback_fn):
3623 """Verify integrity of cluster disks.
3625 @rtype: tuple of three items
3626 @return: a tuple of (dict of node-to-node_error, list of instances
3627 which need activate-disks, dict of instance: (node, volume) for
3632 res_instances = set()
3635 nv_dict = _MapInstanceDisksToNodes(
3636 [inst for inst in self.instances.values()
3637 if inst.admin_state == constants.ADMINST_UP])
3640 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3641 set(self.cfg.GetVmCapableNodeList()))
3643 node_lvs = self.rpc.call_lv_list(nodes, [])
3645 for (node, node_res) in node_lvs.items():
3646 if node_res.offline:
3649 msg = node_res.fail_msg
3651 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3652 res_nodes[node] = msg
3655 for lv_name, (_, _, lv_online) in node_res.payload.items():
3656 inst = nv_dict.pop((node, lv_name), None)
3657 if not (lv_online or inst is None):
3658 res_instances.add(inst)
3660 # any leftover items in nv_dict are missing LVs, let's arrange the data
3662 for key, inst in nv_dict.iteritems():
3663 res_missing.setdefault(inst, []).append(list(key))
3665 return (res_nodes, list(res_instances), res_missing)
3668 class LUClusterRepairDiskSizes(NoHooksLU):
3669 """Verifies the cluster disks sizes.
3674 def ExpandNames(self):
3675 if self.op.instances:
3676 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3677 self.needed_locks = {
3678 locking.LEVEL_NODE_RES: [],
3679 locking.LEVEL_INSTANCE: self.wanted_names,
3681 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3683 self.wanted_names = None
3684 self.needed_locks = {
3685 locking.LEVEL_NODE_RES: locking.ALL_SET,
3686 locking.LEVEL_INSTANCE: locking.ALL_SET,
3688 self.share_locks = {
3689 locking.LEVEL_NODE_RES: 1,
3690 locking.LEVEL_INSTANCE: 0,
3693 def DeclareLocks(self, level):
3694 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3695 self._LockInstancesNodes(primary_only=True, level=level)
3697 def CheckPrereq(self):
3698 """Check prerequisites.
3700 This only checks the optional instance list against the existing names.
3703 if self.wanted_names is None:
3704 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3706 self.wanted_instances = \
3707 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3709 def _EnsureChildSizes(self, disk):
3710 """Ensure children of the disk have the needed disk size.
3712 This is valid mainly for DRBD8 and fixes an issue where the
3713 children have smaller disk size.
3715 @param disk: an L{ganeti.objects.Disk} object
3718 if disk.dev_type == constants.LD_DRBD8:
3719 assert disk.children, "Empty children for DRBD8?"
3720 fchild = disk.children[0]
3721 mismatch = fchild.size < disk.size
3723 self.LogInfo("Child disk has size %d, parent %d, fixing",
3724 fchild.size, disk.size)
3725 fchild.size = disk.size
3727 # and we recurse on this child only, not on the metadev
3728 return self._EnsureChildSizes(fchild) or mismatch
3732 def Exec(self, feedback_fn):
3733 """Verify the size of cluster disks.
3736 # TODO: check child disks too
3737 # TODO: check differences in size between primary/secondary nodes
3739 for instance in self.wanted_instances:
3740 pnode = instance.primary_node
3741 if pnode not in per_node_disks:
3742 per_node_disks[pnode] = []
3743 for idx, disk in enumerate(instance.disks):
3744 per_node_disks[pnode].append((instance, idx, disk))
3746 assert not (frozenset(per_node_disks.keys()) -
3747 self.owned_locks(locking.LEVEL_NODE_RES)), \
3748 "Not owning correct locks"
3749 assert not self.owned_locks(locking.LEVEL_NODE)
3752 for node, dskl in per_node_disks.items():
3753 newl = [v[2].Copy() for v in dskl]
3755 self.cfg.SetDiskID(dsk, node)
3756 result = self.rpc.call_blockdev_getsize(node, newl)
3758 self.LogWarning("Failure in blockdev_getsize call to node"
3759 " %s, ignoring", node)
3761 if len(result.payload) != len(dskl):
3762 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3763 " result.payload=%s", node, len(dskl), result.payload)
3764 self.LogWarning("Invalid result from node %s, ignoring node results",
3767 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3769 self.LogWarning("Disk %d of instance %s did not return size"
3770 " information, ignoring", idx, instance.name)
3772 if not isinstance(size, (int, long)):
3773 self.LogWarning("Disk %d of instance %s did not return valid"
3774 " size information, ignoring", idx, instance.name)
3777 if size != disk.size:
3778 self.LogInfo("Disk %d of instance %s has mismatched size,"
3779 " correcting: recorded %d, actual %d", idx,
3780 instance.name, disk.size, size)
3782 self.cfg.Update(instance, feedback_fn)
3783 changed.append((instance.name, idx, size))
3784 if self._EnsureChildSizes(disk):
3785 self.cfg.Update(instance, feedback_fn)
3786 changed.append((instance.name, idx, disk.size))
3790 class LUClusterRename(LogicalUnit):
3791 """Rename the cluster.
3794 HPATH = "cluster-rename"
3795 HTYPE = constants.HTYPE_CLUSTER
3797 def BuildHooksEnv(self):
3802 "OP_TARGET": self.cfg.GetClusterName(),
3803 "NEW_NAME": self.op.name,
3806 def BuildHooksNodes(self):
3807 """Build hooks nodes.
3810 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3812 def CheckPrereq(self):
3813 """Verify that the passed name is a valid one.
3816 hostname = netutils.GetHostname(name=self.op.name,
3817 family=self.cfg.GetPrimaryIPFamily())
3819 new_name = hostname.name
3820 self.ip = new_ip = hostname.ip
3821 old_name = self.cfg.GetClusterName()
3822 old_ip = self.cfg.GetMasterIP()
3823 if new_name == old_name and new_ip == old_ip:
3824 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3825 " cluster has changed",
3827 if new_ip != old_ip:
3828 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3829 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3830 " reachable on the network" %
3831 new_ip, errors.ECODE_NOTUNIQUE)
3833 self.op.name = new_name
3835 def Exec(self, feedback_fn):
3836 """Rename the cluster.
3839 clustername = self.op.name
3842 # shutdown the master IP
3843 master_params = self.cfg.GetMasterNetworkParameters()
3844 ems = self.cfg.GetUseExternalMipScript()
3845 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3847 result.Raise("Could not disable the master role")
3850 cluster = self.cfg.GetClusterInfo()
3851 cluster.cluster_name = clustername
3852 cluster.master_ip = new_ip
3853 self.cfg.Update(cluster, feedback_fn)
3855 # update the known hosts file
3856 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3857 node_list = self.cfg.GetOnlineNodeList()
3859 node_list.remove(master_params.name)
3862 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3864 master_params.ip = new_ip
3865 result = self.rpc.call_node_activate_master_ip(master_params.name,
3867 msg = result.fail_msg
3869 self.LogWarning("Could not re-enable the master role on"
3870 " the master, please restart manually: %s", msg)
3875 def _ValidateNetmask(cfg, netmask):
3876 """Checks if a netmask is valid.
3878 @type cfg: L{config.ConfigWriter}
3879 @param cfg: The cluster configuration
3881 @param netmask: the netmask to be verified
3882 @raise errors.OpPrereqError: if the validation fails
3885 ip_family = cfg.GetPrimaryIPFamily()
3887 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3888 except errors.ProgrammerError:
3889 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3890 ip_family, errors.ECODE_INVAL)
3891 if not ipcls.ValidateNetmask(netmask):
3892 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3893 (netmask), errors.ECODE_INVAL)
3896 class LUClusterSetParams(LogicalUnit):
3897 """Change the parameters of the cluster.
3900 HPATH = "cluster-modify"
3901 HTYPE = constants.HTYPE_CLUSTER
3904 def CheckArguments(self):
3908 if self.op.uid_pool:
3909 uidpool.CheckUidPool(self.op.uid_pool)
3911 if self.op.add_uids:
3912 uidpool.CheckUidPool(self.op.add_uids)
3914 if self.op.remove_uids:
3915 uidpool.CheckUidPool(self.op.remove_uids)
3917 if self.op.master_netmask is not None:
3918 _ValidateNetmask(self.cfg, self.op.master_netmask)
3920 if self.op.diskparams:
3921 for dt_params in self.op.diskparams.values():
3922 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3924 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3925 except errors.OpPrereqError, err:
3926 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3929 def ExpandNames(self):
3930 # FIXME: in the future maybe other cluster params won't require checking on
3931 # all nodes to be modified.
3932 self.needed_locks = {
3933 locking.LEVEL_NODE: locking.ALL_SET,
3934 locking.LEVEL_INSTANCE: locking.ALL_SET,
3935 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3937 self.share_locks = {
3938 locking.LEVEL_NODE: 1,
3939 locking.LEVEL_INSTANCE: 1,
3940 locking.LEVEL_NODEGROUP: 1,
3943 def BuildHooksEnv(self):
3948 "OP_TARGET": self.cfg.GetClusterName(),
3949 "NEW_VG_NAME": self.op.vg_name,
3952 def BuildHooksNodes(self):
3953 """Build hooks nodes.
3956 mn = self.cfg.GetMasterNode()
3959 def CheckPrereq(self):
3960 """Check prerequisites.
3962 This checks whether the given params don't conflict and
3963 if the given volume group is valid.
3966 if self.op.vg_name is not None and not self.op.vg_name:
3967 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3968 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3969 " instances exist", errors.ECODE_INVAL)
3971 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3972 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3973 raise errors.OpPrereqError("Cannot disable drbd helper while"
3974 " drbd-based instances exist",
3977 node_list = self.owned_locks(locking.LEVEL_NODE)
3979 # if vg_name not None, checks given volume group on all nodes
3981 vglist = self.rpc.call_vg_list(node_list)
3982 for node in node_list:
3983 msg = vglist[node].fail_msg
3985 # ignoring down node
3986 self.LogWarning("Error while gathering data on node %s"
3987 " (ignoring node): %s", node, msg)
3989 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3991 constants.MIN_VG_SIZE)
3993 raise errors.OpPrereqError("Error on node '%s': %s" %
3994 (node, vgstatus), errors.ECODE_ENVIRON)
3996 if self.op.drbd_helper:
3997 # checks given drbd helper on all nodes
3998 helpers = self.rpc.call_drbd_helper(node_list)
3999 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4001 self.LogInfo("Not checking drbd helper on offline node %s", node)
4003 msg = helpers[node].fail_msg
4005 raise errors.OpPrereqError("Error checking drbd helper on node"
4006 " '%s': %s" % (node, msg),
4007 errors.ECODE_ENVIRON)
4008 node_helper = helpers[node].payload
4009 if node_helper != self.op.drbd_helper:
4010 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4011 (node, node_helper), errors.ECODE_ENVIRON)
4013 self.cluster = cluster = self.cfg.GetClusterInfo()
4014 # validate params changes
4015 if self.op.beparams:
4016 objects.UpgradeBeParams(self.op.beparams)
4017 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4018 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4020 if self.op.ndparams:
4021 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4022 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4024 # TODO: we need a more general way to handle resetting
4025 # cluster-level parameters to default values
4026 if self.new_ndparams["oob_program"] == "":
4027 self.new_ndparams["oob_program"] = \
4028 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4030 if self.op.hv_state:
4031 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4032 self.cluster.hv_state_static)
4033 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4034 for hv, values in new_hv_state.items())
4036 if self.op.disk_state:
4037 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4038 self.cluster.disk_state_static)
4039 self.new_disk_state = \
4040 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4041 for name, values in svalues.items()))
4042 for storage, svalues in new_disk_state.items())
4045 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4048 all_instances = self.cfg.GetAllInstancesInfo().values()
4050 for group in self.cfg.GetAllNodeGroupsInfo().values():
4051 instances = frozenset([inst for inst in all_instances
4052 if compat.any(node in group.members
4053 for node in inst.all_nodes)])
4054 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4055 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4056 new = _ComputeNewInstanceViolations(ipol,
4057 new_ipolicy, instances)
4059 violations.update(new)
4062 self.LogWarning("After the ipolicy change the following instances"
4063 " violate them: %s",
4064 utils.CommaJoin(utils.NiceSort(violations)))
4066 if self.op.nicparams:
4067 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4068 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4069 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4072 # check all instances for consistency
4073 for instance in self.cfg.GetAllInstancesInfo().values():
4074 for nic_idx, nic in enumerate(instance.nics):
4075 params_copy = copy.deepcopy(nic.nicparams)
4076 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4078 # check parameter syntax
4080 objects.NIC.CheckParameterSyntax(params_filled)
4081 except errors.ConfigurationError, err:
4082 nic_errors.append("Instance %s, nic/%d: %s" %
4083 (instance.name, nic_idx, err))
4085 # if we're moving instances to routed, check that they have an ip
4086 target_mode = params_filled[constants.NIC_MODE]
4087 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4088 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4089 " address" % (instance.name, nic_idx))
4091 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4092 "\n".join(nic_errors), errors.ECODE_INVAL)
4094 # hypervisor list/parameters
4095 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4096 if self.op.hvparams:
4097 for hv_name, hv_dict in self.op.hvparams.items():
4098 if hv_name not in self.new_hvparams:
4099 self.new_hvparams[hv_name] = hv_dict
4101 self.new_hvparams[hv_name].update(hv_dict)
4103 # disk template parameters
4104 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4105 if self.op.diskparams:
4106 for dt_name, dt_params in self.op.diskparams.items():
4107 if dt_name not in self.op.diskparams:
4108 self.new_diskparams[dt_name] = dt_params
4110 self.new_diskparams[dt_name].update(dt_params)
4112 # os hypervisor parameters
4113 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4115 for os_name, hvs in self.op.os_hvp.items():
4116 if os_name not in self.new_os_hvp:
4117 self.new_os_hvp[os_name] = hvs
4119 for hv_name, hv_dict in hvs.items():
4120 if hv_name not in self.new_os_hvp[os_name]:
4121 self.new_os_hvp[os_name][hv_name] = hv_dict
4123 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4126 self.new_osp = objects.FillDict(cluster.osparams, {})
4127 if self.op.osparams:
4128 for os_name, osp in self.op.osparams.items():
4129 if os_name not in self.new_osp:
4130 self.new_osp[os_name] = {}
4132 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4135 if not self.new_osp[os_name]:
4136 # we removed all parameters
4137 del self.new_osp[os_name]
4139 # check the parameter validity (remote check)
4140 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4141 os_name, self.new_osp[os_name])
4143 # changes to the hypervisor list
4144 if self.op.enabled_hypervisors is not None:
4145 self.hv_list = self.op.enabled_hypervisors
4146 for hv in self.hv_list:
4147 # if the hypervisor doesn't already exist in the cluster
4148 # hvparams, we initialize it to empty, and then (in both
4149 # cases) we make sure to fill the defaults, as we might not
4150 # have a complete defaults list if the hypervisor wasn't
4152 if hv not in new_hvp:
4154 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4155 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4157 self.hv_list = cluster.enabled_hypervisors
4159 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4160 # either the enabled list has changed, or the parameters have, validate
4161 for hv_name, hv_params in self.new_hvparams.items():
4162 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4163 (self.op.enabled_hypervisors and
4164 hv_name in self.op.enabled_hypervisors)):
4165 # either this is a new hypervisor, or its parameters have changed
4166 hv_class = hypervisor.GetHypervisor(hv_name)
4167 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4168 hv_class.CheckParameterSyntax(hv_params)
4169 _CheckHVParams(self, node_list, hv_name, hv_params)
4172 # no need to check any newly-enabled hypervisors, since the
4173 # defaults have already been checked in the above code-block
4174 for os_name, os_hvp in self.new_os_hvp.items():
4175 for hv_name, hv_params in os_hvp.items():
4176 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4177 # we need to fill in the new os_hvp on top of the actual hv_p
4178 cluster_defaults = self.new_hvparams.get(hv_name, {})
4179 new_osp = objects.FillDict(cluster_defaults, hv_params)
4180 hv_class = hypervisor.GetHypervisor(hv_name)
4181 hv_class.CheckParameterSyntax(new_osp)
4182 _CheckHVParams(self, node_list, hv_name, new_osp)
4184 if self.op.default_iallocator:
4185 alloc_script = utils.FindFile(self.op.default_iallocator,
4186 constants.IALLOCATOR_SEARCH_PATH,
4188 if alloc_script is None:
4189 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4190 " specified" % self.op.default_iallocator,
4193 def Exec(self, feedback_fn):
4194 """Change the parameters of the cluster.
4197 if self.op.vg_name is not None:
4198 new_volume = self.op.vg_name
4201 if new_volume != self.cfg.GetVGName():
4202 self.cfg.SetVGName(new_volume)
4204 feedback_fn("Cluster LVM configuration already in desired"
4205 " state, not changing")
4206 if self.op.drbd_helper is not None:
4207 new_helper = self.op.drbd_helper
4210 if new_helper != self.cfg.GetDRBDHelper():
4211 self.cfg.SetDRBDHelper(new_helper)
4213 feedback_fn("Cluster DRBD helper already in desired state,"
4215 if self.op.hvparams:
4216 self.cluster.hvparams = self.new_hvparams
4218 self.cluster.os_hvp = self.new_os_hvp
4219 if self.op.enabled_hypervisors is not None:
4220 self.cluster.hvparams = self.new_hvparams
4221 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4222 if self.op.beparams:
4223 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4224 if self.op.nicparams:
4225 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4227 self.cluster.ipolicy = self.new_ipolicy
4228 if self.op.osparams:
4229 self.cluster.osparams = self.new_osp
4230 if self.op.ndparams:
4231 self.cluster.ndparams = self.new_ndparams
4232 if self.op.diskparams:
4233 self.cluster.diskparams = self.new_diskparams
4234 if self.op.hv_state:
4235 self.cluster.hv_state_static = self.new_hv_state
4236 if self.op.disk_state:
4237 self.cluster.disk_state_static = self.new_disk_state
4239 if self.op.candidate_pool_size is not None:
4240 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4241 # we need to update the pool size here, otherwise the save will fail
4242 _AdjustCandidatePool(self, [])
4244 if self.op.maintain_node_health is not None:
4245 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4246 feedback_fn("Note: CONFD was disabled at build time, node health"
4247 " maintenance is not useful (still enabling it)")
4248 self.cluster.maintain_node_health = self.op.maintain_node_health
4250 if self.op.prealloc_wipe_disks is not None:
4251 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4253 if self.op.add_uids is not None:
4254 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4256 if self.op.remove_uids is not None:
4257 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4259 if self.op.uid_pool is not None:
4260 self.cluster.uid_pool = self.op.uid_pool
4262 if self.op.default_iallocator is not None:
4263 self.cluster.default_iallocator = self.op.default_iallocator
4265 if self.op.reserved_lvs is not None:
4266 self.cluster.reserved_lvs = self.op.reserved_lvs
4268 if self.op.use_external_mip_script is not None:
4269 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4271 def helper_os(aname, mods, desc):
4273 lst = getattr(self.cluster, aname)
4274 for key, val in mods:
4275 if key == constants.DDM_ADD:
4277 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4280 elif key == constants.DDM_REMOVE:
4284 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4286 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4288 if self.op.hidden_os:
4289 helper_os("hidden_os", self.op.hidden_os, "hidden")
4291 if self.op.blacklisted_os:
4292 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4294 if self.op.master_netdev:
4295 master_params = self.cfg.GetMasterNetworkParameters()
4296 ems = self.cfg.GetUseExternalMipScript()
4297 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4298 self.cluster.master_netdev)
4299 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4301 result.Raise("Could not disable the master ip")
4302 feedback_fn("Changing master_netdev from %s to %s" %
4303 (master_params.netdev, self.op.master_netdev))
4304 self.cluster.master_netdev = self.op.master_netdev
4306 if self.op.master_netmask:
4307 master_params = self.cfg.GetMasterNetworkParameters()
4308 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4309 result = self.rpc.call_node_change_master_netmask(master_params.name,
4310 master_params.netmask,
4311 self.op.master_netmask,
4313 master_params.netdev)
4315 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4318 self.cluster.master_netmask = self.op.master_netmask
4320 self.cfg.Update(self.cluster, feedback_fn)
4322 if self.op.master_netdev:
4323 master_params = self.cfg.GetMasterNetworkParameters()
4324 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4325 self.op.master_netdev)
4326 ems = self.cfg.GetUseExternalMipScript()
4327 result = self.rpc.call_node_activate_master_ip(master_params.name,
4330 self.LogWarning("Could not re-enable the master ip on"
4331 " the master, please restart manually: %s",
4335 def _UploadHelper(lu, nodes, fname):
4336 """Helper for uploading a file and showing warnings.
4339 if os.path.exists(fname):
4340 result = lu.rpc.call_upload_file(nodes, fname)
4341 for to_node, to_result in result.items():
4342 msg = to_result.fail_msg
4344 msg = ("Copy of file %s to node %s failed: %s" %
4345 (fname, to_node, msg))
4346 lu.proc.LogWarning(msg)
4349 def _ComputeAncillaryFiles(cluster, redist):
4350 """Compute files external to Ganeti which need to be consistent.
4352 @type redist: boolean
4353 @param redist: Whether to include files which need to be redistributed
4356 # Compute files for all nodes
4358 pathutils.SSH_KNOWN_HOSTS_FILE,
4359 pathutils.CONFD_HMAC_KEY,
4360 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4361 pathutils.SPICE_CERT_FILE,
4362 pathutils.SPICE_CACERT_FILE,
4363 pathutils.RAPI_USERS_FILE,
4367 # we need to ship at least the RAPI certificate
4368 files_all.add(pathutils.RAPI_CERT_FILE)
4370 files_all.update(pathutils.ALL_CERT_FILES)
4371 files_all.update(ssconf.SimpleStore().GetFileList())
4373 if cluster.modify_etc_hosts:
4374 files_all.add(pathutils.ETC_HOSTS)
4376 if cluster.use_external_mip_script:
4377 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4379 # Files which are optional, these must:
4380 # - be present in one other category as well
4381 # - either exist or not exist on all nodes of that category (mc, vm all)
4383 pathutils.RAPI_USERS_FILE,
4386 # Files which should only be on master candidates
4390 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4394 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4395 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4396 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4398 # Files which should only be on VM-capable nodes
4401 for hv_name in cluster.enabled_hypervisors
4402 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4406 for hv_name in cluster.enabled_hypervisors
4407 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4409 # Filenames in each category must be unique
4410 all_files_set = files_all | files_mc | files_vm
4411 assert (len(all_files_set) ==
4412 sum(map(len, [files_all, files_mc, files_vm]))), \
4413 "Found file listed in more than one file list"
4415 # Optional files must be present in one other category
4416 assert all_files_set.issuperset(files_opt), \
4417 "Optional file not in a different required list"
4419 # This one file should never ever be re-distributed via RPC
4420 assert not (redist and
4421 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4423 return (files_all, files_opt, files_mc, files_vm)
4426 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4427 """Distribute additional files which are part of the cluster configuration.
4429 ConfigWriter takes care of distributing the config and ssconf files, but
4430 there are more files which should be distributed to all nodes. This function
4431 makes sure those are copied.
4433 @param lu: calling logical unit
4434 @param additional_nodes: list of nodes not in the config to distribute to
4435 @type additional_vm: boolean
4436 @param additional_vm: whether the additional nodes are vm-capable or not
4439 # Gather target nodes
4440 cluster = lu.cfg.GetClusterInfo()
4441 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4443 online_nodes = lu.cfg.GetOnlineNodeList()
4444 online_set = frozenset(online_nodes)
4445 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4447 if additional_nodes is not None:
4448 online_nodes.extend(additional_nodes)
4450 vm_nodes.extend(additional_nodes)
4452 # Never distribute to master node
4453 for nodelist in [online_nodes, vm_nodes]:
4454 if master_info.name in nodelist:
4455 nodelist.remove(master_info.name)
4458 (files_all, _, files_mc, files_vm) = \
4459 _ComputeAncillaryFiles(cluster, True)
4461 # Never re-distribute configuration file from here
4462 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4463 pathutils.CLUSTER_CONF_FILE in files_vm)
4464 assert not files_mc, "Master candidates not handled in this function"
4467 (online_nodes, files_all),
4468 (vm_nodes, files_vm),
4472 for (node_list, files) in filemap:
4474 _UploadHelper(lu, node_list, fname)
4477 class LUClusterRedistConf(NoHooksLU):
4478 """Force the redistribution of cluster configuration.
4480 This is a very simple LU.
4485 def ExpandNames(self):
4486 self.needed_locks = {
4487 locking.LEVEL_NODE: locking.ALL_SET,
4489 self.share_locks[locking.LEVEL_NODE] = 1
4491 def Exec(self, feedback_fn):
4492 """Redistribute the configuration.
4495 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4496 _RedistributeAncillaryFiles(self)
4499 class LUClusterActivateMasterIp(NoHooksLU):
4500 """Activate the master IP on the master node.
4503 def Exec(self, feedback_fn):
4504 """Activate the master IP.
4507 master_params = self.cfg.GetMasterNetworkParameters()
4508 ems = self.cfg.GetUseExternalMipScript()
4509 result = self.rpc.call_node_activate_master_ip(master_params.name,
4511 result.Raise("Could not activate the master IP")
4514 class LUClusterDeactivateMasterIp(NoHooksLU):
4515 """Deactivate the master IP on the master node.
4518 def Exec(self, feedback_fn):
4519 """Deactivate the master IP.
4522 master_params = self.cfg.GetMasterNetworkParameters()
4523 ems = self.cfg.GetUseExternalMipScript()
4524 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4526 result.Raise("Could not deactivate the master IP")
4529 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4530 """Sleep and poll for an instance's disk to sync.
4533 if not instance.disks or disks is not None and not disks:
4536 disks = _ExpandCheckDisks(instance, disks)
4539 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4541 node = instance.primary_node
4544 lu.cfg.SetDiskID(dev, node)
4546 # TODO: Convert to utils.Retry
4549 degr_retries = 10 # in seconds, as we sleep 1 second each time
4553 cumul_degraded = False
4554 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4555 msg = rstats.fail_msg
4557 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4560 raise errors.RemoteError("Can't contact node %s for mirror data,"
4561 " aborting." % node)
4564 rstats = rstats.payload
4566 for i, mstat in enumerate(rstats):
4568 lu.LogWarning("Can't compute data for node %s/%s",
4569 node, disks[i].iv_name)
4572 cumul_degraded = (cumul_degraded or
4573 (mstat.is_degraded and mstat.sync_percent is None))
4574 if mstat.sync_percent is not None:
4576 if mstat.estimated_time is not None:
4577 rem_time = ("%s remaining (estimated)" %
4578 utils.FormatSeconds(mstat.estimated_time))
4579 max_time = mstat.estimated_time
4581 rem_time = "no time estimate"
4582 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4583 (disks[i].iv_name, mstat.sync_percent, rem_time))
4585 # if we're done but degraded, let's do a few small retries, to
4586 # make sure we see a stable and not transient situation; therefore
4587 # we force restart of the loop
4588 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4589 logging.info("Degraded disks found, %d retries left", degr_retries)
4597 time.sleep(min(60, max_time))
4600 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4601 return not cumul_degraded
4604 def _BlockdevFind(lu, node, dev, instance):
4605 """Wrapper around call_blockdev_find to annotate diskparams.
4607 @param lu: A reference to the lu object
4608 @param node: The node to call out
4609 @param dev: The device to find
4610 @param instance: The instance object the device belongs to
4611 @returns The result of the rpc call
4614 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4615 return lu.rpc.call_blockdev_find(node, disk)
4618 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4619 """Wrapper around L{_CheckDiskConsistencyInner}.
4622 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4623 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4627 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4629 """Check that mirrors are not degraded.
4631 @attention: The device has to be annotated already.
4633 The ldisk parameter, if True, will change the test from the
4634 is_degraded attribute (which represents overall non-ok status for
4635 the device(s)) to the ldisk (representing the local storage status).
4638 lu.cfg.SetDiskID(dev, node)
4642 if on_primary or dev.AssembleOnSecondary():
4643 rstats = lu.rpc.call_blockdev_find(node, dev)
4644 msg = rstats.fail_msg
4646 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4648 elif not rstats.payload:
4649 lu.LogWarning("Can't find disk on node %s", node)
4653 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4655 result = result and not rstats.payload.is_degraded
4658 for child in dev.children:
4659 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4665 class LUOobCommand(NoHooksLU):
4666 """Logical unit for OOB handling.
4670 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4672 def ExpandNames(self):
4673 """Gather locks we need.
4676 if self.op.node_names:
4677 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4678 lock_names = self.op.node_names
4680 lock_names = locking.ALL_SET
4682 self.needed_locks = {
4683 locking.LEVEL_NODE: lock_names,
4686 def CheckPrereq(self):
4687 """Check prerequisites.
4690 - the node exists in the configuration
4693 Any errors are signaled by raising errors.OpPrereqError.
4697 self.master_node = self.cfg.GetMasterNode()
4699 assert self.op.power_delay >= 0.0
4701 if self.op.node_names:
4702 if (self.op.command in self._SKIP_MASTER and
4703 self.master_node in self.op.node_names):
4704 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4705 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4707 if master_oob_handler:
4708 additional_text = ("run '%s %s %s' if you want to operate on the"
4709 " master regardless") % (master_oob_handler,
4713 additional_text = "it does not support out-of-band operations"
4715 raise errors.OpPrereqError(("Operating on the master node %s is not"
4716 " allowed for %s; %s") %
4717 (self.master_node, self.op.command,
4718 additional_text), errors.ECODE_INVAL)
4720 self.op.node_names = self.cfg.GetNodeList()
4721 if self.op.command in self._SKIP_MASTER:
4722 self.op.node_names.remove(self.master_node)
4724 if self.op.command in self._SKIP_MASTER:
4725 assert self.master_node not in self.op.node_names
4727 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4729 raise errors.OpPrereqError("Node %s not found" % node_name,
4732 self.nodes.append(node)
4734 if (not self.op.ignore_status and
4735 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4736 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4737 " not marked offline") % node_name,
4740 def Exec(self, feedback_fn):
4741 """Execute OOB and return result if we expect any.
4744 master_node = self.master_node
4747 for idx, node in enumerate(utils.NiceSort(self.nodes,
4748 key=lambda node: node.name)):
4749 node_entry = [(constants.RS_NORMAL, node.name)]
4750 ret.append(node_entry)
4752 oob_program = _SupportsOob(self.cfg, node)
4755 node_entry.append((constants.RS_UNAVAIL, None))
4758 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4759 self.op.command, oob_program, node.name)
4760 result = self.rpc.call_run_oob(master_node, oob_program,
4761 self.op.command, node.name,
4765 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4766 node.name, result.fail_msg)
4767 node_entry.append((constants.RS_NODATA, None))
4770 self._CheckPayload(result)
4771 except errors.OpExecError, err:
4772 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4774 node_entry.append((constants.RS_NODATA, None))
4776 if self.op.command == constants.OOB_HEALTH:
4777 # For health we should log important events
4778 for item, status in result.payload:
4779 if status in [constants.OOB_STATUS_WARNING,
4780 constants.OOB_STATUS_CRITICAL]:
4781 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4782 item, node.name, status)
4784 if self.op.command == constants.OOB_POWER_ON:
4786 elif self.op.command == constants.OOB_POWER_OFF:
4787 node.powered = False
4788 elif self.op.command == constants.OOB_POWER_STATUS:
4789 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4790 if powered != node.powered:
4791 logging.warning(("Recorded power state (%s) of node '%s' does not"
4792 " match actual power state (%s)"), node.powered,
4795 # For configuration changing commands we should update the node
4796 if self.op.command in (constants.OOB_POWER_ON,
4797 constants.OOB_POWER_OFF):
4798 self.cfg.Update(node, feedback_fn)
4800 node_entry.append((constants.RS_NORMAL, result.payload))
4802 if (self.op.command == constants.OOB_POWER_ON and
4803 idx < len(self.nodes) - 1):
4804 time.sleep(self.op.power_delay)
4808 def _CheckPayload(self, result):
4809 """Checks if the payload is valid.
4811 @param result: RPC result
4812 @raises errors.OpExecError: If payload is not valid
4816 if self.op.command == constants.OOB_HEALTH:
4817 if not isinstance(result.payload, list):
4818 errs.append("command 'health' is expected to return a list but got %s" %
4819 type(result.payload))
4821 for item, status in result.payload:
4822 if status not in constants.OOB_STATUSES:
4823 errs.append("health item '%s' has invalid status '%s'" %
4826 if self.op.command == constants.OOB_POWER_STATUS:
4827 if not isinstance(result.payload, dict):
4828 errs.append("power-status is expected to return a dict but got %s" %
4829 type(result.payload))
4831 if self.op.command in [
4832 constants.OOB_POWER_ON,
4833 constants.OOB_POWER_OFF,
4834 constants.OOB_POWER_CYCLE,
4836 if result.payload is not None:
4837 errs.append("%s is expected to not return payload but got '%s'" %
4838 (self.op.command, result.payload))
4841 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4842 utils.CommaJoin(errs))
4845 class _OsQuery(_QueryBase):
4846 FIELDS = query.OS_FIELDS
4848 def ExpandNames(self, lu):
4849 # Lock all nodes in shared mode
4850 # Temporary removal of locks, should be reverted later
4851 # TODO: reintroduce locks when they are lighter-weight
4852 lu.needed_locks = {}
4853 #self.share_locks[locking.LEVEL_NODE] = 1
4854 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4856 # The following variables interact with _QueryBase._GetNames
4858 self.wanted = self.names
4860 self.wanted = locking.ALL_SET
4862 self.do_locking = self.use_locking
4864 def DeclareLocks(self, lu, level):
4868 def _DiagnoseByOS(rlist):
4869 """Remaps a per-node return list into an a per-os per-node dictionary
4871 @param rlist: a map with node names as keys and OS objects as values
4874 @return: a dictionary with osnames as keys and as value another
4875 map, with nodes as keys and tuples of (path, status, diagnose,
4876 variants, parameters, api_versions) as values, eg::
4878 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4879 (/srv/..., False, "invalid api")],
4880 "node2": [(/srv/..., True, "", [], [])]}
4885 # we build here the list of nodes that didn't fail the RPC (at RPC
4886 # level), so that nodes with a non-responding node daemon don't
4887 # make all OSes invalid
4888 good_nodes = [node_name for node_name in rlist
4889 if not rlist[node_name].fail_msg]
4890 for node_name, nr in rlist.items():
4891 if nr.fail_msg or not nr.payload:
4893 for (name, path, status, diagnose, variants,
4894 params, api_versions) in nr.payload:
4895 if name not in all_os:
4896 # build a list of nodes for this os containing empty lists
4897 # for each node in node_list
4899 for nname in good_nodes:
4900 all_os[name][nname] = []
4901 # convert params from [name, help] to (name, help)
4902 params = [tuple(v) for v in params]
4903 all_os[name][node_name].append((path, status, diagnose,
4904 variants, params, api_versions))
4907 def _GetQueryData(self, lu):
4908 """Computes the list of nodes and their attributes.
4911 # Locking is not used
4912 assert not (compat.any(lu.glm.is_owned(level)
4913 for level in locking.LEVELS
4914 if level != locking.LEVEL_CLUSTER) or
4915 self.do_locking or self.use_locking)
4917 valid_nodes = [node.name
4918 for node in lu.cfg.GetAllNodesInfo().values()
4919 if not node.offline and node.vm_capable]
4920 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4921 cluster = lu.cfg.GetClusterInfo()
4925 for (os_name, os_data) in pol.items():
4926 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4927 hidden=(os_name in cluster.hidden_os),
4928 blacklisted=(os_name in cluster.blacklisted_os))
4932 api_versions = set()
4934 for idx, osl in enumerate(os_data.values()):
4935 info.valid = bool(info.valid and osl and osl[0][1])
4939 (node_variants, node_params, node_api) = osl[0][3:6]
4942 variants.update(node_variants)
4943 parameters.update(node_params)
4944 api_versions.update(node_api)
4946 # Filter out inconsistent values
4947 variants.intersection_update(node_variants)
4948 parameters.intersection_update(node_params)
4949 api_versions.intersection_update(node_api)
4951 info.variants = list(variants)
4952 info.parameters = list(parameters)
4953 info.api_versions = list(api_versions)
4955 data[os_name] = info
4957 # Prepare data in requested order
4958 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4962 class LUOsDiagnose(NoHooksLU):
4963 """Logical unit for OS diagnose/query.
4969 def _BuildFilter(fields, names):
4970 """Builds a filter for querying OSes.
4973 name_filter = qlang.MakeSimpleFilter("name", names)
4975 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4976 # respective field is not requested
4977 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4978 for fname in ["hidden", "blacklisted"]
4979 if fname not in fields]
4980 if "valid" not in fields:
4981 status_filter.append([qlang.OP_TRUE, "valid"])
4984 status_filter.insert(0, qlang.OP_AND)
4986 status_filter = None
4988 if name_filter and status_filter:
4989 return [qlang.OP_AND, name_filter, status_filter]
4993 return status_filter
4995 def CheckArguments(self):
4996 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4997 self.op.output_fields, False)
4999 def ExpandNames(self):
5000 self.oq.ExpandNames(self)
5002 def Exec(self, feedback_fn):
5003 return self.oq.OldStyleQuery(self)
5006 class LUNodeRemove(LogicalUnit):
5007 """Logical unit for removing a node.
5010 HPATH = "node-remove"
5011 HTYPE = constants.HTYPE_NODE
5013 def BuildHooksEnv(self):
5018 "OP_TARGET": self.op.node_name,
5019 "NODE_NAME": self.op.node_name,
5022 def BuildHooksNodes(self):
5023 """Build hooks nodes.
5025 This doesn't run on the target node in the pre phase as a failed
5026 node would then be impossible to remove.
5029 all_nodes = self.cfg.GetNodeList()
5031 all_nodes.remove(self.op.node_name)
5034 return (all_nodes, all_nodes)
5036 def CheckPrereq(self):
5037 """Check prerequisites.
5040 - the node exists in the configuration
5041 - it does not have primary or secondary instances
5042 - it's not the master
5044 Any errors are signaled by raising errors.OpPrereqError.
5047 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5048 node = self.cfg.GetNodeInfo(self.op.node_name)
5049 assert node is not None
5051 masternode = self.cfg.GetMasterNode()
5052 if node.name == masternode:
5053 raise errors.OpPrereqError("Node is the master node, failover to another"
5054 " node is required", errors.ECODE_INVAL)
5056 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5057 if node.name in instance.all_nodes:
5058 raise errors.OpPrereqError("Instance %s is still running on the node,"
5059 " please remove first" % instance_name,
5061 self.op.node_name = node.name
5064 def Exec(self, feedback_fn):
5065 """Removes the node from the cluster.
5069 logging.info("Stopping the node daemon and removing configs from node %s",
5072 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5074 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5077 # Promote nodes to master candidate as needed
5078 _AdjustCandidatePool(self, exceptions=[node.name])
5079 self.context.RemoveNode(node.name)
5081 # Run post hooks on the node before it's removed
5082 _RunPostHook(self, node.name)
5084 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5085 msg = result.fail_msg
5087 self.LogWarning("Errors encountered on the remote node while leaving"
5088 " the cluster: %s", msg)
5090 # Remove node from our /etc/hosts
5091 if self.cfg.GetClusterInfo().modify_etc_hosts:
5092 master_node = self.cfg.GetMasterNode()
5093 result = self.rpc.call_etc_hosts_modify(master_node,
5094 constants.ETC_HOSTS_REMOVE,
5096 result.Raise("Can't update hosts file with new host data")
5097 _RedistributeAncillaryFiles(self)
5100 class _NodeQuery(_QueryBase):
5101 FIELDS = query.NODE_FIELDS
5103 def ExpandNames(self, lu):
5104 lu.needed_locks = {}
5105 lu.share_locks = _ShareAll()
5108 self.wanted = _GetWantedNodes(lu, self.names)
5110 self.wanted = locking.ALL_SET
5112 self.do_locking = (self.use_locking and
5113 query.NQ_LIVE in self.requested_data)
5116 # If any non-static field is requested we need to lock the nodes
5117 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5119 def DeclareLocks(self, lu, level):
5122 def _GetQueryData(self, lu):
5123 """Computes the list of nodes and their attributes.
5126 all_info = lu.cfg.GetAllNodesInfo()
5128 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5130 # Gather data as requested
5131 if query.NQ_LIVE in self.requested_data:
5132 # filter out non-vm_capable nodes
5133 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5135 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5136 [lu.cfg.GetHypervisorType()])
5137 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5138 for (name, nresult) in node_data.items()
5139 if not nresult.fail_msg and nresult.payload)
5143 if query.NQ_INST in self.requested_data:
5144 node_to_primary = dict([(name, set()) for name in nodenames])
5145 node_to_secondary = dict([(name, set()) for name in nodenames])
5147 inst_data = lu.cfg.GetAllInstancesInfo()
5149 for inst in inst_data.values():
5150 if inst.primary_node in node_to_primary:
5151 node_to_primary[inst.primary_node].add(inst.name)
5152 for secnode in inst.secondary_nodes:
5153 if secnode in node_to_secondary:
5154 node_to_secondary[secnode].add(inst.name)
5156 node_to_primary = None
5157 node_to_secondary = None
5159 if query.NQ_OOB in self.requested_data:
5160 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5161 for name, node in all_info.iteritems())
5165 if query.NQ_GROUP in self.requested_data:
5166 groups = lu.cfg.GetAllNodeGroupsInfo()
5170 return query.NodeQueryData([all_info[name] for name in nodenames],
5171 live_data, lu.cfg.GetMasterNode(),
5172 node_to_primary, node_to_secondary, groups,
5173 oob_support, lu.cfg.GetClusterInfo())
5176 class LUNodeQuery(NoHooksLU):
5177 """Logical unit for querying nodes.
5180 # pylint: disable=W0142
5183 def CheckArguments(self):
5184 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5185 self.op.output_fields, self.op.use_locking)
5187 def ExpandNames(self):
5188 self.nq.ExpandNames(self)
5190 def DeclareLocks(self, level):
5191 self.nq.DeclareLocks(self, level)
5193 def Exec(self, feedback_fn):
5194 return self.nq.OldStyleQuery(self)
5197 class LUNodeQueryvols(NoHooksLU):
5198 """Logical unit for getting volumes on node(s).
5202 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5203 _FIELDS_STATIC = utils.FieldSet("node")
5205 def CheckArguments(self):
5206 _CheckOutputFields(static=self._FIELDS_STATIC,
5207 dynamic=self._FIELDS_DYNAMIC,
5208 selected=self.op.output_fields)
5210 def ExpandNames(self):
5211 self.share_locks = _ShareAll()
5212 self.needed_locks = {}
5214 if not self.op.nodes:
5215 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5217 self.needed_locks[locking.LEVEL_NODE] = \
5218 _GetWantedNodes(self, self.op.nodes)
5220 def Exec(self, feedback_fn):
5221 """Computes the list of nodes and their attributes.
5224 nodenames = self.owned_locks(locking.LEVEL_NODE)
5225 volumes = self.rpc.call_node_volumes(nodenames)
5227 ilist = self.cfg.GetAllInstancesInfo()
5228 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5231 for node in nodenames:
5232 nresult = volumes[node]
5235 msg = nresult.fail_msg
5237 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5240 node_vols = sorted(nresult.payload,
5241 key=operator.itemgetter("dev"))
5243 for vol in node_vols:
5245 for field in self.op.output_fields:
5248 elif field == "phys":
5252 elif field == "name":
5254 elif field == "size":
5255 val = int(float(vol["size"]))
5256 elif field == "instance":
5257 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5259 raise errors.ParameterError(field)
5260 node_output.append(str(val))
5262 output.append(node_output)
5267 class LUNodeQueryStorage(NoHooksLU):
5268 """Logical unit for getting information on storage units on node(s).
5271 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5274 def CheckArguments(self):
5275 _CheckOutputFields(static=self._FIELDS_STATIC,
5276 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5277 selected=self.op.output_fields)
5279 def ExpandNames(self):
5280 self.share_locks = _ShareAll()
5281 self.needed_locks = {}
5284 self.needed_locks[locking.LEVEL_NODE] = \
5285 _GetWantedNodes(self, self.op.nodes)
5287 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5289 def Exec(self, feedback_fn):
5290 """Computes the list of nodes and their attributes.
5293 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5295 # Always get name to sort by
5296 if constants.SF_NAME in self.op.output_fields:
5297 fields = self.op.output_fields[:]
5299 fields = [constants.SF_NAME] + self.op.output_fields
5301 # Never ask for node or type as it's only known to the LU
5302 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5303 while extra in fields:
5304 fields.remove(extra)
5306 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5307 name_idx = field_idx[constants.SF_NAME]
5309 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5310 data = self.rpc.call_storage_list(self.nodes,
5311 self.op.storage_type, st_args,
5312 self.op.name, fields)
5316 for node in utils.NiceSort(self.nodes):
5317 nresult = data[node]
5321 msg = nresult.fail_msg
5323 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5326 rows = dict([(row[name_idx], row) for row in nresult.payload])
5328 for name in utils.NiceSort(rows.keys()):
5333 for field in self.op.output_fields:
5334 if field == constants.SF_NODE:
5336 elif field == constants.SF_TYPE:
5337 val = self.op.storage_type
5338 elif field in field_idx:
5339 val = row[field_idx[field]]
5341 raise errors.ParameterError(field)
5350 class _InstanceQuery(_QueryBase):
5351 FIELDS = query.INSTANCE_FIELDS
5353 def ExpandNames(self, lu):
5354 lu.needed_locks = {}
5355 lu.share_locks = _ShareAll()
5358 self.wanted = _GetWantedInstances(lu, self.names)
5360 self.wanted = locking.ALL_SET
5362 self.do_locking = (self.use_locking and
5363 query.IQ_LIVE in self.requested_data)
5365 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5366 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5367 lu.needed_locks[locking.LEVEL_NODE] = []
5368 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5370 self.do_grouplocks = (self.do_locking and
5371 query.IQ_NODES in self.requested_data)
5373 def DeclareLocks(self, lu, level):
5375 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5376 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5378 # Lock all groups used by instances optimistically; this requires going
5379 # via the node before it's locked, requiring verification later on
5380 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5382 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5383 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5384 elif level == locking.LEVEL_NODE:
5385 lu._LockInstancesNodes() # pylint: disable=W0212
5388 def _CheckGroupLocks(lu):
5389 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5390 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5392 # Check if node groups for locked instances are still correct
5393 for instance_name in owned_instances:
5394 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5396 def _GetQueryData(self, lu):
5397 """Computes the list of instances and their attributes.
5400 if self.do_grouplocks:
5401 self._CheckGroupLocks(lu)
5403 cluster = lu.cfg.GetClusterInfo()
5404 all_info = lu.cfg.GetAllInstancesInfo()
5406 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5408 instance_list = [all_info[name] for name in instance_names]
5409 nodes = frozenset(itertools.chain(*(inst.all_nodes
5410 for inst in instance_list)))
5411 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5414 wrongnode_inst = set()
5416 # Gather data as requested
5417 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5419 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5421 result = node_data[name]
5423 # offline nodes will be in both lists
5424 assert result.fail_msg
5425 offline_nodes.append(name)
5427 bad_nodes.append(name)
5428 elif result.payload:
5429 for inst in result.payload:
5430 if inst in all_info:
5431 if all_info[inst].primary_node == name:
5432 live_data.update(result.payload)
5434 wrongnode_inst.add(inst)
5436 # orphan instance; we don't list it here as we don't
5437 # handle this case yet in the output of instance listing
5438 logging.warning("Orphan instance '%s' found on node %s",
5440 # else no instance is alive
5444 if query.IQ_DISKUSAGE in self.requested_data:
5445 gmi = ganeti.masterd.instance
5446 disk_usage = dict((inst.name,
5447 gmi.ComputeDiskSize(inst.disk_template,
5448 [{constants.IDISK_SIZE: disk.size}
5449 for disk in inst.disks]))
5450 for inst in instance_list)
5454 if query.IQ_CONSOLE in self.requested_data:
5456 for inst in instance_list:
5457 if inst.name in live_data:
5458 # Instance is running
5459 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5461 consinfo[inst.name] = None
5462 assert set(consinfo.keys()) == set(instance_names)
5466 if query.IQ_NODES in self.requested_data:
5467 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5469 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5470 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5471 for uuid in set(map(operator.attrgetter("group"),
5477 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5478 disk_usage, offline_nodes, bad_nodes,
5479 live_data, wrongnode_inst, consinfo,
5483 class LUQuery(NoHooksLU):
5484 """Query for resources/items of a certain kind.
5487 # pylint: disable=W0142
5490 def CheckArguments(self):
5491 qcls = _GetQueryImplementation(self.op.what)
5493 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5495 def ExpandNames(self):
5496 self.impl.ExpandNames(self)
5498 def DeclareLocks(self, level):
5499 self.impl.DeclareLocks(self, level)
5501 def Exec(self, feedback_fn):
5502 return self.impl.NewStyleQuery(self)
5505 class LUQueryFields(NoHooksLU):
5506 """Query for resources/items of a certain kind.
5509 # pylint: disable=W0142
5512 def CheckArguments(self):
5513 self.qcls = _GetQueryImplementation(self.op.what)
5515 def ExpandNames(self):
5516 self.needed_locks = {}
5518 def Exec(self, feedback_fn):
5519 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5522 class LUNodeModifyStorage(NoHooksLU):
5523 """Logical unit for modifying a storage volume on a node.
5528 def CheckArguments(self):
5529 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5531 storage_type = self.op.storage_type
5534 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5536 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5537 " modified" % storage_type,
5540 diff = set(self.op.changes.keys()) - modifiable
5542 raise errors.OpPrereqError("The following fields can not be modified for"
5543 " storage units of type '%s': %r" %
5544 (storage_type, list(diff)),
5547 def ExpandNames(self):
5548 self.needed_locks = {
5549 locking.LEVEL_NODE: self.op.node_name,
5552 def Exec(self, feedback_fn):
5553 """Computes the list of nodes and their attributes.
5556 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5557 result = self.rpc.call_storage_modify(self.op.node_name,
5558 self.op.storage_type, st_args,
5559 self.op.name, self.op.changes)
5560 result.Raise("Failed to modify storage unit '%s' on %s" %
5561 (self.op.name, self.op.node_name))
5564 class LUNodeAdd(LogicalUnit):
5565 """Logical unit for adding node to the cluster.
5569 HTYPE = constants.HTYPE_NODE
5570 _NFLAGS = ["master_capable", "vm_capable"]
5572 def CheckArguments(self):
5573 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5574 # validate/normalize the node name
5575 self.hostname = netutils.GetHostname(name=self.op.node_name,
5576 family=self.primary_ip_family)
5577 self.op.node_name = self.hostname.name
5579 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5580 raise errors.OpPrereqError("Cannot readd the master node",
5583 if self.op.readd and self.op.group:
5584 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5585 " being readded", errors.ECODE_INVAL)
5587 def BuildHooksEnv(self):
5590 This will run on all nodes before, and on all nodes + the new node after.
5594 "OP_TARGET": self.op.node_name,
5595 "NODE_NAME": self.op.node_name,
5596 "NODE_PIP": self.op.primary_ip,
5597 "NODE_SIP": self.op.secondary_ip,
5598 "MASTER_CAPABLE": str(self.op.master_capable),
5599 "VM_CAPABLE": str(self.op.vm_capable),
5602 def BuildHooksNodes(self):
5603 """Build hooks nodes.
5606 # Exclude added node
5607 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5608 post_nodes = pre_nodes + [self.op.node_name, ]
5610 return (pre_nodes, post_nodes)
5612 def CheckPrereq(self):
5613 """Check prerequisites.
5616 - the new node is not already in the config
5618 - its parameters (single/dual homed) matches the cluster
5620 Any errors are signaled by raising errors.OpPrereqError.
5624 hostname = self.hostname
5625 node = hostname.name
5626 primary_ip = self.op.primary_ip = hostname.ip
5627 if self.op.secondary_ip is None:
5628 if self.primary_ip_family == netutils.IP6Address.family:
5629 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5630 " IPv4 address must be given as secondary",
5632 self.op.secondary_ip = primary_ip
5634 secondary_ip = self.op.secondary_ip
5635 if not netutils.IP4Address.IsValid(secondary_ip):
5636 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5637 " address" % secondary_ip, errors.ECODE_INVAL)
5639 node_list = cfg.GetNodeList()
5640 if not self.op.readd and node in node_list:
5641 raise errors.OpPrereqError("Node %s is already in the configuration" %
5642 node, errors.ECODE_EXISTS)
5643 elif self.op.readd and node not in node_list:
5644 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5647 self.changed_primary_ip = False
5649 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5650 if self.op.readd and node == existing_node_name:
5651 if existing_node.secondary_ip != secondary_ip:
5652 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5653 " address configuration as before",
5655 if existing_node.primary_ip != primary_ip:
5656 self.changed_primary_ip = True
5660 if (existing_node.primary_ip == primary_ip or
5661 existing_node.secondary_ip == primary_ip or
5662 existing_node.primary_ip == secondary_ip or
5663 existing_node.secondary_ip == secondary_ip):
5664 raise errors.OpPrereqError("New node ip address(es) conflict with"
5665 " existing node %s" % existing_node.name,
5666 errors.ECODE_NOTUNIQUE)
5668 # After this 'if' block, None is no longer a valid value for the
5669 # _capable op attributes
5671 old_node = self.cfg.GetNodeInfo(node)
5672 assert old_node is not None, "Can't retrieve locked node %s" % node
5673 for attr in self._NFLAGS:
5674 if getattr(self.op, attr) is None:
5675 setattr(self.op, attr, getattr(old_node, attr))
5677 for attr in self._NFLAGS:
5678 if getattr(self.op, attr) is None:
5679 setattr(self.op, attr, True)
5681 if self.op.readd and not self.op.vm_capable:
5682 pri, sec = cfg.GetNodeInstances(node)
5684 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5685 " flag set to false, but it already holds"
5686 " instances" % node,
5689 # check that the type of the node (single versus dual homed) is the
5690 # same as for the master
5691 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5692 master_singlehomed = myself.secondary_ip == myself.primary_ip
5693 newbie_singlehomed = secondary_ip == primary_ip
5694 if master_singlehomed != newbie_singlehomed:
5695 if master_singlehomed:
5696 raise errors.OpPrereqError("The master has no secondary ip but the"
5697 " new node has one",
5700 raise errors.OpPrereqError("The master has a secondary ip but the"
5701 " new node doesn't have one",
5704 # checks reachability
5705 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5706 raise errors.OpPrereqError("Node not reachable by ping",
5707 errors.ECODE_ENVIRON)
5709 if not newbie_singlehomed:
5710 # check reachability from my secondary ip to newbie's secondary ip
5711 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5712 source=myself.secondary_ip):
5713 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5714 " based ping to node daemon port",
5715 errors.ECODE_ENVIRON)
5722 if self.op.master_capable:
5723 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5725 self.master_candidate = False
5728 self.new_node = old_node
5730 node_group = cfg.LookupNodeGroup(self.op.group)
5731 self.new_node = objects.Node(name=node,
5732 primary_ip=primary_ip,
5733 secondary_ip=secondary_ip,
5734 master_candidate=self.master_candidate,
5735 offline=False, drained=False,
5738 if self.op.ndparams:
5739 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5741 if self.op.hv_state:
5742 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5744 if self.op.disk_state:
5745 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5747 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5748 # it a property on the base class.
5749 result = rpc.DnsOnlyRunner().call_version([node])[node]
5750 result.Raise("Can't get version information from node %s" % node)
5751 if constants.PROTOCOL_VERSION == result.payload:
5752 logging.info("Communication to node %s fine, sw version %s match",
5753 node, result.payload)
5755 raise errors.OpPrereqError("Version mismatch master version %s,"
5756 " node version %s" %
5757 (constants.PROTOCOL_VERSION, result.payload),
5758 errors.ECODE_ENVIRON)
5760 def Exec(self, feedback_fn):
5761 """Adds the new node to the cluster.
5764 new_node = self.new_node
5765 node = new_node.name
5767 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5770 # We adding a new node so we assume it's powered
5771 new_node.powered = True
5773 # for re-adds, reset the offline/drained/master-candidate flags;
5774 # we need to reset here, otherwise offline would prevent RPC calls
5775 # later in the procedure; this also means that if the re-add
5776 # fails, we are left with a non-offlined, broken node
5778 new_node.drained = new_node.offline = False # pylint: disable=W0201
5779 self.LogInfo("Readding a node, the offline/drained flags were reset")
5780 # if we demote the node, we do cleanup later in the procedure
5781 new_node.master_candidate = self.master_candidate
5782 if self.changed_primary_ip:
5783 new_node.primary_ip = self.op.primary_ip
5785 # copy the master/vm_capable flags
5786 for attr in self._NFLAGS:
5787 setattr(new_node, attr, getattr(self.op, attr))
5789 # notify the user about any possible mc promotion
5790 if new_node.master_candidate:
5791 self.LogInfo("Node will be a master candidate")
5793 if self.op.ndparams:
5794 new_node.ndparams = self.op.ndparams
5796 new_node.ndparams = {}
5798 if self.op.hv_state:
5799 new_node.hv_state_static = self.new_hv_state
5801 if self.op.disk_state:
5802 new_node.disk_state_static = self.new_disk_state
5804 # Add node to our /etc/hosts, and add key to known_hosts
5805 if self.cfg.GetClusterInfo().modify_etc_hosts:
5806 master_node = self.cfg.GetMasterNode()
5807 result = self.rpc.call_etc_hosts_modify(master_node,
5808 constants.ETC_HOSTS_ADD,
5811 result.Raise("Can't update hosts file with new host data")
5813 if new_node.secondary_ip != new_node.primary_ip:
5814 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5817 node_verify_list = [self.cfg.GetMasterNode()]
5818 node_verify_param = {
5819 constants.NV_NODELIST: ([node], {}),
5820 # TODO: do a node-net-test as well?
5823 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5824 self.cfg.GetClusterName())
5825 for verifier in node_verify_list:
5826 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5827 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5829 for failed in nl_payload:
5830 feedback_fn("ssh/hostname verification failed"
5831 " (checking from %s): %s" %
5832 (verifier, nl_payload[failed]))
5833 raise errors.OpExecError("ssh/hostname verification failed")
5836 _RedistributeAncillaryFiles(self)
5837 self.context.ReaddNode(new_node)
5838 # make sure we redistribute the config
5839 self.cfg.Update(new_node, feedback_fn)
5840 # and make sure the new node will not have old files around
5841 if not new_node.master_candidate:
5842 result = self.rpc.call_node_demote_from_mc(new_node.name)
5843 msg = result.fail_msg
5845 self.LogWarning("Node failed to demote itself from master"
5846 " candidate status: %s" % msg)
5848 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5849 additional_vm=self.op.vm_capable)
5850 self.context.AddNode(new_node, self.proc.GetECId())
5853 class LUNodeSetParams(LogicalUnit):
5854 """Modifies the parameters of a node.
5856 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5857 to the node role (as _ROLE_*)
5858 @cvar _R2F: a dictionary from node role to tuples of flags
5859 @cvar _FLAGS: a list of attribute names corresponding to the flags
5862 HPATH = "node-modify"
5863 HTYPE = constants.HTYPE_NODE
5865 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5867 (True, False, False): _ROLE_CANDIDATE,
5868 (False, True, False): _ROLE_DRAINED,
5869 (False, False, True): _ROLE_OFFLINE,
5870 (False, False, False): _ROLE_REGULAR,
5872 _R2F = dict((v, k) for k, v in _F2R.items())
5873 _FLAGS = ["master_candidate", "drained", "offline"]
5875 def CheckArguments(self):
5876 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5877 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5878 self.op.master_capable, self.op.vm_capable,
5879 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5881 if all_mods.count(None) == len(all_mods):
5882 raise errors.OpPrereqError("Please pass at least one modification",
5884 if all_mods.count(True) > 1:
5885 raise errors.OpPrereqError("Can't set the node into more than one"
5886 " state at the same time",
5889 # Boolean value that tells us whether we might be demoting from MC
5890 self.might_demote = (self.op.master_candidate is False or
5891 self.op.offline is True or
5892 self.op.drained is True or
5893 self.op.master_capable is False)
5895 if self.op.secondary_ip:
5896 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5897 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5898 " address" % self.op.secondary_ip,
5901 self.lock_all = self.op.auto_promote and self.might_demote
5902 self.lock_instances = self.op.secondary_ip is not None
5904 def _InstanceFilter(self, instance):
5905 """Filter for getting affected instances.
5908 return (instance.disk_template in constants.DTS_INT_MIRROR and
5909 self.op.node_name in instance.all_nodes)
5911 def ExpandNames(self):
5913 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5915 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5917 # Since modifying a node can have severe effects on currently running
5918 # operations the resource lock is at least acquired in shared mode
5919 self.needed_locks[locking.LEVEL_NODE_RES] = \
5920 self.needed_locks[locking.LEVEL_NODE]
5922 # Get node resource and instance locks in shared mode; they are not used
5923 # for anything but read-only access
5924 self.share_locks[locking.LEVEL_NODE_RES] = 1
5925 self.share_locks[locking.LEVEL_INSTANCE] = 1
5927 if self.lock_instances:
5928 self.needed_locks[locking.LEVEL_INSTANCE] = \
5929 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5931 def BuildHooksEnv(self):
5934 This runs on the master node.
5938 "OP_TARGET": self.op.node_name,
5939 "MASTER_CANDIDATE": str(self.op.master_candidate),
5940 "OFFLINE": str(self.op.offline),
5941 "DRAINED": str(self.op.drained),
5942 "MASTER_CAPABLE": str(self.op.master_capable),
5943 "VM_CAPABLE": str(self.op.vm_capable),
5946 def BuildHooksNodes(self):
5947 """Build hooks nodes.
5950 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5953 def CheckPrereq(self):
5954 """Check prerequisites.
5956 This only checks the instance list against the existing names.
5959 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5961 if self.lock_instances:
5962 affected_instances = \
5963 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5965 # Verify instance locks
5966 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5967 wanted_instances = frozenset(affected_instances.keys())
5968 if wanted_instances - owned_instances:
5969 raise errors.OpPrereqError("Instances affected by changing node %s's"
5970 " secondary IP address have changed since"
5971 " locks were acquired, wanted '%s', have"
5972 " '%s'; retry the operation" %
5974 utils.CommaJoin(wanted_instances),
5975 utils.CommaJoin(owned_instances)),
5978 affected_instances = None
5980 if (self.op.master_candidate is not None or
5981 self.op.drained is not None or
5982 self.op.offline is not None):
5983 # we can't change the master's node flags
5984 if self.op.node_name == self.cfg.GetMasterNode():
5985 raise errors.OpPrereqError("The master role can be changed"
5986 " only via master-failover",
5989 if self.op.master_candidate and not node.master_capable:
5990 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5991 " it a master candidate" % node.name,
5994 if self.op.vm_capable is False:
5995 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5997 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5998 " the vm_capable flag" % node.name,
6001 if node.master_candidate and self.might_demote and not self.lock_all:
6002 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6003 # check if after removing the current node, we're missing master
6005 (mc_remaining, mc_should, _) = \
6006 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6007 if mc_remaining < mc_should:
6008 raise errors.OpPrereqError("Not enough master candidates, please"
6009 " pass auto promote option to allow"
6010 " promotion (--auto-promote or RAPI"
6011 " auto_promote=True)", errors.ECODE_STATE)
6013 self.old_flags = old_flags = (node.master_candidate,
6014 node.drained, node.offline)
6015 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6016 self.old_role = old_role = self._F2R[old_flags]
6018 # Check for ineffective changes
6019 for attr in self._FLAGS:
6020 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6021 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6022 setattr(self.op, attr, None)
6024 # Past this point, any flag change to False means a transition
6025 # away from the respective state, as only real changes are kept
6027 # TODO: We might query the real power state if it supports OOB
6028 if _SupportsOob(self.cfg, node):
6029 if self.op.offline is False and not (node.powered or
6030 self.op.powered is True):
6031 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6032 " offline status can be reset") %
6033 self.op.node_name, errors.ECODE_STATE)
6034 elif self.op.powered is not None:
6035 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6036 " as it does not support out-of-band"
6037 " handling") % self.op.node_name,
6040 # If we're being deofflined/drained, we'll MC ourself if needed
6041 if (self.op.drained is False or self.op.offline is False or
6042 (self.op.master_capable and not node.master_capable)):
6043 if _DecideSelfPromotion(self):
6044 self.op.master_candidate = True
6045 self.LogInfo("Auto-promoting node to master candidate")
6047 # If we're no longer master capable, we'll demote ourselves from MC
6048 if self.op.master_capable is False and node.master_candidate:
6049 self.LogInfo("Demoting from master candidate")
6050 self.op.master_candidate = False
6053 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6054 if self.op.master_candidate:
6055 new_role = self._ROLE_CANDIDATE
6056 elif self.op.drained:
6057 new_role = self._ROLE_DRAINED
6058 elif self.op.offline:
6059 new_role = self._ROLE_OFFLINE
6060 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6061 # False is still in new flags, which means we're un-setting (the
6063 new_role = self._ROLE_REGULAR
6064 else: # no new flags, nothing, keep old role
6067 self.new_role = new_role
6069 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6070 # Trying to transition out of offline status
6071 result = self.rpc.call_version([node.name])[node.name]
6073 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6074 " to report its version: %s" %
6075 (node.name, result.fail_msg),
6078 self.LogWarning("Transitioning node from offline to online state"
6079 " without using re-add. Please make sure the node"
6082 # When changing the secondary ip, verify if this is a single-homed to
6083 # multi-homed transition or vice versa, and apply the relevant
6085 if self.op.secondary_ip:
6086 # Ok even without locking, because this can't be changed by any LU
6087 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6088 master_singlehomed = master.secondary_ip == master.primary_ip
6089 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6090 if self.op.force and node.name == master.name:
6091 self.LogWarning("Transitioning from single-homed to multi-homed"
6092 " cluster. All nodes will require a secondary ip.")
6094 raise errors.OpPrereqError("Changing the secondary ip on a"
6095 " single-homed cluster requires the"
6096 " --force option to be passed, and the"
6097 " target node to be the master",
6099 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6100 if self.op.force and node.name == master.name:
6101 self.LogWarning("Transitioning from multi-homed to single-homed"
6102 " cluster. Secondary IPs will have to be removed.")
6104 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6105 " same as the primary IP on a multi-homed"
6106 " cluster, unless the --force option is"
6107 " passed, and the target node is the"
6108 " master", errors.ECODE_INVAL)
6110 assert not (frozenset(affected_instances) -
6111 self.owned_locks(locking.LEVEL_INSTANCE))
6114 if affected_instances:
6115 msg = ("Cannot change secondary IP address: offline node has"
6116 " instances (%s) configured to use it" %
6117 utils.CommaJoin(affected_instances.keys()))
6118 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6120 # On online nodes, check that no instances are running, and that
6121 # the node has the new ip and we can reach it.
6122 for instance in affected_instances.values():
6123 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6124 msg="cannot change secondary ip")
6126 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6127 if master.name != node.name:
6128 # check reachability from master secondary ip to new secondary ip
6129 if not netutils.TcpPing(self.op.secondary_ip,
6130 constants.DEFAULT_NODED_PORT,
6131 source=master.secondary_ip):
6132 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6133 " based ping to node daemon port",
6134 errors.ECODE_ENVIRON)
6136 if self.op.ndparams:
6137 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6138 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6139 self.new_ndparams = new_ndparams
6141 if self.op.hv_state:
6142 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6143 self.node.hv_state_static)
6145 if self.op.disk_state:
6146 self.new_disk_state = \
6147 _MergeAndVerifyDiskState(self.op.disk_state,
6148 self.node.disk_state_static)
6150 def Exec(self, feedback_fn):
6155 old_role = self.old_role
6156 new_role = self.new_role
6160 if self.op.ndparams:
6161 node.ndparams = self.new_ndparams
6163 if self.op.powered is not None:
6164 node.powered = self.op.powered
6166 if self.op.hv_state:
6167 node.hv_state_static = self.new_hv_state
6169 if self.op.disk_state:
6170 node.disk_state_static = self.new_disk_state
6172 for attr in ["master_capable", "vm_capable"]:
6173 val = getattr(self.op, attr)
6175 setattr(node, attr, val)
6176 result.append((attr, str(val)))
6178 if new_role != old_role:
6179 # Tell the node to demote itself, if no longer MC and not offline
6180 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6181 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6183 self.LogWarning("Node failed to demote itself: %s", msg)
6185 new_flags = self._R2F[new_role]
6186 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6188 result.append((desc, str(nf)))
6189 (node.master_candidate, node.drained, node.offline) = new_flags
6191 # we locked all nodes, we adjust the CP before updating this node
6193 _AdjustCandidatePool(self, [node.name])
6195 if self.op.secondary_ip:
6196 node.secondary_ip = self.op.secondary_ip
6197 result.append(("secondary_ip", self.op.secondary_ip))
6199 # this will trigger configuration file update, if needed
6200 self.cfg.Update(node, feedback_fn)
6202 # this will trigger job queue propagation or cleanup if the mc
6204 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6205 self.context.ReaddNode(node)
6210 class LUNodePowercycle(NoHooksLU):
6211 """Powercycles a node.
6216 def CheckArguments(self):
6217 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6218 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6219 raise errors.OpPrereqError("The node is the master and the force"
6220 " parameter was not set",
6223 def ExpandNames(self):
6224 """Locking for PowercycleNode.
6226 This is a last-resort option and shouldn't block on other
6227 jobs. Therefore, we grab no locks.
6230 self.needed_locks = {}
6232 def Exec(self, feedback_fn):
6236 result = self.rpc.call_node_powercycle(self.op.node_name,
6237 self.cfg.GetHypervisorType())
6238 result.Raise("Failed to schedule the reboot")
6239 return result.payload
6242 class LUClusterQuery(NoHooksLU):
6243 """Query cluster configuration.
6248 def ExpandNames(self):
6249 self.needed_locks = {}
6251 def Exec(self, feedback_fn):
6252 """Return cluster config.
6255 cluster = self.cfg.GetClusterInfo()
6258 # Filter just for enabled hypervisors
6259 for os_name, hv_dict in cluster.os_hvp.items():
6260 os_hvp[os_name] = {}
6261 for hv_name, hv_params in hv_dict.items():
6262 if hv_name in cluster.enabled_hypervisors:
6263 os_hvp[os_name][hv_name] = hv_params
6265 # Convert ip_family to ip_version
6266 primary_ip_version = constants.IP4_VERSION
6267 if cluster.primary_ip_family == netutils.IP6Address.family:
6268 primary_ip_version = constants.IP6_VERSION
6271 "software_version": constants.RELEASE_VERSION,
6272 "protocol_version": constants.PROTOCOL_VERSION,
6273 "config_version": constants.CONFIG_VERSION,
6274 "os_api_version": max(constants.OS_API_VERSIONS),
6275 "export_version": constants.EXPORT_VERSION,
6276 "architecture": runtime.GetArchInfo(),
6277 "name": cluster.cluster_name,
6278 "master": cluster.master_node,
6279 "default_hypervisor": cluster.primary_hypervisor,
6280 "enabled_hypervisors": cluster.enabled_hypervisors,
6281 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6282 for hypervisor_name in cluster.enabled_hypervisors]),
6284 "beparams": cluster.beparams,
6285 "osparams": cluster.osparams,
6286 "ipolicy": cluster.ipolicy,
6287 "nicparams": cluster.nicparams,
6288 "ndparams": cluster.ndparams,
6289 "diskparams": cluster.diskparams,
6290 "candidate_pool_size": cluster.candidate_pool_size,
6291 "master_netdev": cluster.master_netdev,
6292 "master_netmask": cluster.master_netmask,
6293 "use_external_mip_script": cluster.use_external_mip_script,
6294 "volume_group_name": cluster.volume_group_name,
6295 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6296 "file_storage_dir": cluster.file_storage_dir,
6297 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6298 "maintain_node_health": cluster.maintain_node_health,
6299 "ctime": cluster.ctime,
6300 "mtime": cluster.mtime,
6301 "uuid": cluster.uuid,
6302 "tags": list(cluster.GetTags()),
6303 "uid_pool": cluster.uid_pool,
6304 "default_iallocator": cluster.default_iallocator,
6305 "reserved_lvs": cluster.reserved_lvs,
6306 "primary_ip_version": primary_ip_version,
6307 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6308 "hidden_os": cluster.hidden_os,
6309 "blacklisted_os": cluster.blacklisted_os,
6315 class LUClusterConfigQuery(NoHooksLU):
6316 """Return configuration values.
6321 def CheckArguments(self):
6322 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6324 def ExpandNames(self):
6325 self.cq.ExpandNames(self)
6327 def DeclareLocks(self, level):
6328 self.cq.DeclareLocks(self, level)
6330 def Exec(self, feedback_fn):
6331 result = self.cq.OldStyleQuery(self)
6333 assert len(result) == 1
6338 class _ClusterQuery(_QueryBase):
6339 FIELDS = query.CLUSTER_FIELDS
6341 #: Do not sort (there is only one item)
6344 def ExpandNames(self, lu):
6345 lu.needed_locks = {}
6347 # The following variables interact with _QueryBase._GetNames
6348 self.wanted = locking.ALL_SET
6349 self.do_locking = self.use_locking
6352 raise errors.OpPrereqError("Can not use locking for cluster queries",
6355 def DeclareLocks(self, lu, level):
6358 def _GetQueryData(self, lu):
6359 """Computes the list of nodes and their attributes.
6362 # Locking is not used
6363 assert not (compat.any(lu.glm.is_owned(level)
6364 for level in locking.LEVELS
6365 if level != locking.LEVEL_CLUSTER) or
6366 self.do_locking or self.use_locking)
6368 if query.CQ_CONFIG in self.requested_data:
6369 cluster = lu.cfg.GetClusterInfo()
6371 cluster = NotImplemented
6373 if query.CQ_QUEUE_DRAINED in self.requested_data:
6374 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6376 drain_flag = NotImplemented
6378 if query.CQ_WATCHER_PAUSE in self.requested_data:
6379 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6381 watcher_pause = NotImplemented
6383 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6386 class LUInstanceActivateDisks(NoHooksLU):
6387 """Bring up an instance's disks.
6392 def ExpandNames(self):
6393 self._ExpandAndLockInstance()
6394 self.needed_locks[locking.LEVEL_NODE] = []
6395 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6397 def DeclareLocks(self, level):
6398 if level == locking.LEVEL_NODE:
6399 self._LockInstancesNodes()
6401 def CheckPrereq(self):
6402 """Check prerequisites.
6404 This checks that the instance is in the cluster.
6407 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6408 assert self.instance is not None, \
6409 "Cannot retrieve locked instance %s" % self.op.instance_name
6410 _CheckNodeOnline(self, self.instance.primary_node)
6412 def Exec(self, feedback_fn):
6413 """Activate the disks.
6416 disks_ok, disks_info = \
6417 _AssembleInstanceDisks(self, self.instance,
6418 ignore_size=self.op.ignore_size)
6420 raise errors.OpExecError("Cannot activate block devices")
6422 if self.op.wait_for_sync:
6423 if not _WaitForSync(self, self.instance):
6424 raise errors.OpExecError("Some disks of the instance are degraded!")
6429 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6431 """Prepare the block devices for an instance.
6433 This sets up the block devices on all nodes.
6435 @type lu: L{LogicalUnit}
6436 @param lu: the logical unit on whose behalf we execute
6437 @type instance: L{objects.Instance}
6438 @param instance: the instance for whose disks we assemble
6439 @type disks: list of L{objects.Disk} or None
6440 @param disks: which disks to assemble (or all, if None)
6441 @type ignore_secondaries: boolean
6442 @param ignore_secondaries: if true, errors on secondary nodes
6443 won't result in an error return from the function
6444 @type ignore_size: boolean
6445 @param ignore_size: if true, the current known size of the disk
6446 will not be used during the disk activation, useful for cases
6447 when the size is wrong
6448 @return: False if the operation failed, otherwise a list of
6449 (host, instance_visible_name, node_visible_name)
6450 with the mapping from node devices to instance devices
6455 iname = instance.name
6456 disks = _ExpandCheckDisks(instance, disks)
6458 # With the two passes mechanism we try to reduce the window of
6459 # opportunity for the race condition of switching DRBD to primary
6460 # before handshaking occured, but we do not eliminate it
6462 # The proper fix would be to wait (with some limits) until the
6463 # connection has been made and drbd transitions from WFConnection
6464 # into any other network-connected state (Connected, SyncTarget,
6467 # 1st pass, assemble on all nodes in secondary mode
6468 for idx, inst_disk in enumerate(disks):
6469 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6471 node_disk = node_disk.Copy()
6472 node_disk.UnsetSize()
6473 lu.cfg.SetDiskID(node_disk, node)
6474 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6476 msg = result.fail_msg
6478 is_offline_secondary = (node in instance.secondary_nodes and
6480 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6481 " (is_primary=False, pass=1): %s",
6482 inst_disk.iv_name, node, msg)
6483 if not (ignore_secondaries or is_offline_secondary):
6486 # FIXME: race condition on drbd migration to primary
6488 # 2nd pass, do only the primary node
6489 for idx, inst_disk in enumerate(disks):
6492 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6493 if node != instance.primary_node:
6496 node_disk = node_disk.Copy()
6497 node_disk.UnsetSize()
6498 lu.cfg.SetDiskID(node_disk, node)
6499 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6501 msg = result.fail_msg
6503 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6504 " (is_primary=True, pass=2): %s",
6505 inst_disk.iv_name, node, msg)
6508 dev_path = result.payload
6510 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6512 # leave the disks configured for the primary node
6513 # this is a workaround that would be fixed better by
6514 # improving the logical/physical id handling
6516 lu.cfg.SetDiskID(disk, instance.primary_node)
6518 return disks_ok, device_info
6521 def _StartInstanceDisks(lu, instance, force):
6522 """Start the disks of an instance.
6525 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6526 ignore_secondaries=force)
6528 _ShutdownInstanceDisks(lu, instance)
6529 if force is not None and not force:
6530 lu.proc.LogWarning("", hint="If the message above refers to a"
6532 " you can retry the operation using '--force'.")
6533 raise errors.OpExecError("Disk consistency error")
6536 class LUInstanceDeactivateDisks(NoHooksLU):
6537 """Shutdown an instance's disks.
6542 def ExpandNames(self):
6543 self._ExpandAndLockInstance()
6544 self.needed_locks[locking.LEVEL_NODE] = []
6545 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6547 def DeclareLocks(self, level):
6548 if level == locking.LEVEL_NODE:
6549 self._LockInstancesNodes()
6551 def CheckPrereq(self):
6552 """Check prerequisites.
6554 This checks that the instance is in the cluster.
6557 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6558 assert self.instance is not None, \
6559 "Cannot retrieve locked instance %s" % self.op.instance_name
6561 def Exec(self, feedback_fn):
6562 """Deactivate the disks
6565 instance = self.instance
6567 _ShutdownInstanceDisks(self, instance)
6569 _SafeShutdownInstanceDisks(self, instance)
6572 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6573 """Shutdown block devices of an instance.
6575 This function checks if an instance is running, before calling
6576 _ShutdownInstanceDisks.
6579 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6580 _ShutdownInstanceDisks(lu, instance, disks=disks)
6583 def _ExpandCheckDisks(instance, disks):
6584 """Return the instance disks selected by the disks list
6586 @type disks: list of L{objects.Disk} or None
6587 @param disks: selected disks
6588 @rtype: list of L{objects.Disk}
6589 @return: selected instance disks to act on
6593 return instance.disks
6595 if not set(disks).issubset(instance.disks):
6596 raise errors.ProgrammerError("Can only act on disks belonging to the"
6601 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6602 """Shutdown block devices of an instance.
6604 This does the shutdown on all nodes of the instance.
6606 If the ignore_primary is false, errors on the primary node are
6611 disks = _ExpandCheckDisks(instance, disks)
6614 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6615 lu.cfg.SetDiskID(top_disk, node)
6616 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6617 msg = result.fail_msg
6619 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6620 disk.iv_name, node, msg)
6621 if ((node == instance.primary_node and not ignore_primary) or
6622 (node != instance.primary_node and not result.offline)):
6627 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6628 """Checks if a node has enough free memory.
6630 This function check if a given node has the needed amount of free
6631 memory. In case the node has less memory or we cannot get the
6632 information from the node, this function raise an OpPrereqError
6635 @type lu: C{LogicalUnit}
6636 @param lu: a logical unit from which we get configuration data
6638 @param node: the node to check
6639 @type reason: C{str}
6640 @param reason: string to use in the error message
6641 @type requested: C{int}
6642 @param requested: the amount of memory in MiB to check for
6643 @type hypervisor_name: C{str}
6644 @param hypervisor_name: the hypervisor to ask for memory stats
6646 @return: node current free memory
6647 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6648 we cannot check the node
6651 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6652 nodeinfo[node].Raise("Can't get data from node %s" % node,
6653 prereq=True, ecode=errors.ECODE_ENVIRON)
6654 (_, _, (hv_info, )) = nodeinfo[node].payload
6656 free_mem = hv_info.get("memory_free", None)
6657 if not isinstance(free_mem, int):
6658 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6659 " was '%s'" % (node, free_mem),
6660 errors.ECODE_ENVIRON)
6661 if requested > free_mem:
6662 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6663 " needed %s MiB, available %s MiB" %
6664 (node, reason, requested, free_mem),
6669 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6670 """Checks if nodes have enough free disk space in the all VGs.
6672 This function check if all given nodes have the needed amount of
6673 free disk. In case any node has less disk or we cannot get the
6674 information from the node, this function raise an OpPrereqError
6677 @type lu: C{LogicalUnit}
6678 @param lu: a logical unit from which we get configuration data
6679 @type nodenames: C{list}
6680 @param nodenames: the list of node names to check
6681 @type req_sizes: C{dict}
6682 @param req_sizes: the hash of vg and corresponding amount of disk in
6684 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6685 or we cannot check the node
6688 for vg, req_size in req_sizes.items():
6689 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6692 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6693 """Checks if nodes have enough free disk space in the specified VG.
6695 This function check if all given nodes have the needed amount of
6696 free disk. In case any node has less disk or we cannot get the
6697 information from the node, this function raise an OpPrereqError
6700 @type lu: C{LogicalUnit}
6701 @param lu: a logical unit from which we get configuration data
6702 @type nodenames: C{list}
6703 @param nodenames: the list of node names to check
6705 @param vg: the volume group to check
6706 @type requested: C{int}
6707 @param requested: the amount of disk in MiB to check for
6708 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6709 or we cannot check the node
6712 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6713 for node in nodenames:
6714 info = nodeinfo[node]
6715 info.Raise("Cannot get current information from node %s" % node,
6716 prereq=True, ecode=errors.ECODE_ENVIRON)
6717 (_, (vg_info, ), _) = info.payload
6718 vg_free = vg_info.get("vg_free", None)
6719 if not isinstance(vg_free, int):
6720 raise errors.OpPrereqError("Can't compute free disk space on node"
6721 " %s for vg %s, result was '%s'" %
6722 (node, vg, vg_free), errors.ECODE_ENVIRON)
6723 if requested > vg_free:
6724 raise errors.OpPrereqError("Not enough disk space on target node %s"
6725 " vg %s: required %d MiB, available %d MiB" %
6726 (node, vg, requested, vg_free),
6730 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6731 """Checks if nodes have enough physical CPUs
6733 This function checks if all given nodes have the needed number of
6734 physical CPUs. In case any node has less CPUs or we cannot get the
6735 information from the node, this function raises an OpPrereqError
6738 @type lu: C{LogicalUnit}
6739 @param lu: a logical unit from which we get configuration data
6740 @type nodenames: C{list}
6741 @param nodenames: the list of node names to check
6742 @type requested: C{int}
6743 @param requested: the minimum acceptable number of physical CPUs
6744 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6745 or we cannot check the node
6748 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6749 for node in nodenames:
6750 info = nodeinfo[node]
6751 info.Raise("Cannot get current information from node %s" % node,
6752 prereq=True, ecode=errors.ECODE_ENVIRON)
6753 (_, _, (hv_info, )) = info.payload
6754 num_cpus = hv_info.get("cpu_total", None)
6755 if not isinstance(num_cpus, int):
6756 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6757 " on node %s, result was '%s'" %
6758 (node, num_cpus), errors.ECODE_ENVIRON)
6759 if requested > num_cpus:
6760 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6761 "required" % (node, num_cpus, requested),
6765 class LUInstanceStartup(LogicalUnit):
6766 """Starts an instance.
6769 HPATH = "instance-start"
6770 HTYPE = constants.HTYPE_INSTANCE
6773 def CheckArguments(self):
6775 if self.op.beparams:
6776 # fill the beparams dict
6777 objects.UpgradeBeParams(self.op.beparams)
6778 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6780 def ExpandNames(self):
6781 self._ExpandAndLockInstance()
6782 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6784 def DeclareLocks(self, level):
6785 if level == locking.LEVEL_NODE_RES:
6786 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6788 def BuildHooksEnv(self):
6791 This runs on master, primary and secondary nodes of the instance.
6795 "FORCE": self.op.force,
6798 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6802 def BuildHooksNodes(self):
6803 """Build hooks nodes.
6806 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6809 def CheckPrereq(self):
6810 """Check prerequisites.
6812 This checks that the instance is in the cluster.
6815 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6816 assert self.instance is not None, \
6817 "Cannot retrieve locked instance %s" % self.op.instance_name
6820 if self.op.hvparams:
6821 # check hypervisor parameter syntax (locally)
6822 cluster = self.cfg.GetClusterInfo()
6823 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6824 filled_hvp = cluster.FillHV(instance)
6825 filled_hvp.update(self.op.hvparams)
6826 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6827 hv_type.CheckParameterSyntax(filled_hvp)
6828 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6830 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6832 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6834 if self.primary_offline and self.op.ignore_offline_nodes:
6835 self.proc.LogWarning("Ignoring offline primary node")
6837 if self.op.hvparams or self.op.beparams:
6838 self.proc.LogWarning("Overridden parameters are ignored")
6840 _CheckNodeOnline(self, instance.primary_node)
6842 bep = self.cfg.GetClusterInfo().FillBE(instance)
6843 bep.update(self.op.beparams)
6845 # check bridges existence
6846 _CheckInstanceBridgesExist(self, instance)
6848 remote_info = self.rpc.call_instance_info(instance.primary_node,
6850 instance.hypervisor)
6851 remote_info.Raise("Error checking node %s" % instance.primary_node,
6852 prereq=True, ecode=errors.ECODE_ENVIRON)
6853 if not remote_info.payload: # not running already
6854 _CheckNodeFreeMemory(self, instance.primary_node,
6855 "starting instance %s" % instance.name,
6856 bep[constants.BE_MINMEM], instance.hypervisor)
6858 def Exec(self, feedback_fn):
6859 """Start the instance.
6862 instance = self.instance
6863 force = self.op.force
6865 if not self.op.no_remember:
6866 self.cfg.MarkInstanceUp(instance.name)
6868 if self.primary_offline:
6869 assert self.op.ignore_offline_nodes
6870 self.proc.LogInfo("Primary node offline, marked instance as started")
6872 node_current = instance.primary_node
6874 _StartInstanceDisks(self, instance, force)
6877 self.rpc.call_instance_start(node_current,
6878 (instance, self.op.hvparams,
6880 self.op.startup_paused)
6881 msg = result.fail_msg
6883 _ShutdownInstanceDisks(self, instance)
6884 raise errors.OpExecError("Could not start instance: %s" % msg)
6887 class LUInstanceReboot(LogicalUnit):
6888 """Reboot an instance.
6891 HPATH = "instance-reboot"
6892 HTYPE = constants.HTYPE_INSTANCE
6895 def ExpandNames(self):
6896 self._ExpandAndLockInstance()
6898 def BuildHooksEnv(self):
6901 This runs on master, primary and secondary nodes of the instance.
6905 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6906 "REBOOT_TYPE": self.op.reboot_type,
6907 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6910 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6914 def BuildHooksNodes(self):
6915 """Build hooks nodes.
6918 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6921 def CheckPrereq(self):
6922 """Check prerequisites.
6924 This checks that the instance is in the cluster.
6927 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6928 assert self.instance is not None, \
6929 "Cannot retrieve locked instance %s" % self.op.instance_name
6930 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6931 _CheckNodeOnline(self, instance.primary_node)
6933 # check bridges existence
6934 _CheckInstanceBridgesExist(self, instance)
6936 def Exec(self, feedback_fn):
6937 """Reboot the instance.
6940 instance = self.instance
6941 ignore_secondaries = self.op.ignore_secondaries
6942 reboot_type = self.op.reboot_type
6944 remote_info = self.rpc.call_instance_info(instance.primary_node,
6946 instance.hypervisor)
6947 remote_info.Raise("Error checking node %s" % instance.primary_node)
6948 instance_running = bool(remote_info.payload)
6950 node_current = instance.primary_node
6952 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6953 constants.INSTANCE_REBOOT_HARD]:
6954 for disk in instance.disks:
6955 self.cfg.SetDiskID(disk, node_current)
6956 result = self.rpc.call_instance_reboot(node_current, instance,
6958 self.op.shutdown_timeout)
6959 result.Raise("Could not reboot instance")
6961 if instance_running:
6962 result = self.rpc.call_instance_shutdown(node_current, instance,
6963 self.op.shutdown_timeout)
6964 result.Raise("Could not shutdown instance for full reboot")
6965 _ShutdownInstanceDisks(self, instance)
6967 self.LogInfo("Instance %s was already stopped, starting now",
6969 _StartInstanceDisks(self, instance, ignore_secondaries)
6970 result = self.rpc.call_instance_start(node_current,
6971 (instance, None, None), False)
6972 msg = result.fail_msg
6974 _ShutdownInstanceDisks(self, instance)
6975 raise errors.OpExecError("Could not start instance for"
6976 " full reboot: %s" % msg)
6978 self.cfg.MarkInstanceUp(instance.name)
6981 class LUInstanceShutdown(LogicalUnit):
6982 """Shutdown an instance.
6985 HPATH = "instance-stop"
6986 HTYPE = constants.HTYPE_INSTANCE
6989 def ExpandNames(self):
6990 self._ExpandAndLockInstance()
6992 def BuildHooksEnv(self):
6995 This runs on master, primary and secondary nodes of the instance.
6998 env = _BuildInstanceHookEnvByObject(self, self.instance)
6999 env["TIMEOUT"] = self.op.timeout
7002 def BuildHooksNodes(self):
7003 """Build hooks nodes.
7006 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7009 def CheckPrereq(self):
7010 """Check prerequisites.
7012 This checks that the instance is in the cluster.
7015 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7016 assert self.instance is not None, \
7017 "Cannot retrieve locked instance %s" % self.op.instance_name
7019 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7021 self.primary_offline = \
7022 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7024 if self.primary_offline and self.op.ignore_offline_nodes:
7025 self.proc.LogWarning("Ignoring offline primary node")
7027 _CheckNodeOnline(self, self.instance.primary_node)
7029 def Exec(self, feedback_fn):
7030 """Shutdown the instance.
7033 instance = self.instance
7034 node_current = instance.primary_node
7035 timeout = self.op.timeout
7037 if not self.op.no_remember:
7038 self.cfg.MarkInstanceDown(instance.name)
7040 if self.primary_offline:
7041 assert self.op.ignore_offline_nodes
7042 self.proc.LogInfo("Primary node offline, marked instance as stopped")
7044 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7045 msg = result.fail_msg
7047 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7049 _ShutdownInstanceDisks(self, instance)
7052 class LUInstanceReinstall(LogicalUnit):
7053 """Reinstall an instance.
7056 HPATH = "instance-reinstall"
7057 HTYPE = constants.HTYPE_INSTANCE
7060 def ExpandNames(self):
7061 self._ExpandAndLockInstance()
7063 def BuildHooksEnv(self):
7066 This runs on master, primary and secondary nodes of the instance.
7069 return _BuildInstanceHookEnvByObject(self, self.instance)
7071 def BuildHooksNodes(self):
7072 """Build hooks nodes.
7075 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7078 def CheckPrereq(self):
7079 """Check prerequisites.
7081 This checks that the instance is in the cluster and is not running.
7084 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7085 assert instance is not None, \
7086 "Cannot retrieve locked instance %s" % self.op.instance_name
7087 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7088 " offline, cannot reinstall")
7090 if instance.disk_template == constants.DT_DISKLESS:
7091 raise errors.OpPrereqError("Instance '%s' has no disks" %
7092 self.op.instance_name,
7094 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7096 if self.op.os_type is not None:
7098 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7099 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7100 instance_os = self.op.os_type
7102 instance_os = instance.os
7104 nodelist = list(instance.all_nodes)
7106 if self.op.osparams:
7107 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7108 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7109 self.os_inst = i_osdict # the new dict (without defaults)
7113 self.instance = instance
7115 def Exec(self, feedback_fn):
7116 """Reinstall the instance.
7119 inst = self.instance
7121 if self.op.os_type is not None:
7122 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7123 inst.os = self.op.os_type
7124 # Write to configuration
7125 self.cfg.Update(inst, feedback_fn)
7127 _StartInstanceDisks(self, inst, None)
7129 feedback_fn("Running the instance OS create scripts...")
7130 # FIXME: pass debug option from opcode to backend
7131 result = self.rpc.call_instance_os_add(inst.primary_node,
7132 (inst, self.os_inst), True,
7133 self.op.debug_level)
7134 result.Raise("Could not install OS for instance %s on node %s" %
7135 (inst.name, inst.primary_node))
7137 _ShutdownInstanceDisks(self, inst)
7140 class LUInstanceRecreateDisks(LogicalUnit):
7141 """Recreate an instance's missing disks.
7144 HPATH = "instance-recreate-disks"
7145 HTYPE = constants.HTYPE_INSTANCE
7148 _MODIFYABLE = frozenset([
7149 constants.IDISK_SIZE,
7150 constants.IDISK_MODE,
7153 # New or changed disk parameters may have different semantics
7154 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7155 constants.IDISK_ADOPT,
7157 # TODO: Implement support changing VG while recreating
7159 constants.IDISK_METAVG,
7162 def _RunAllocator(self):
7163 """Run the allocator based on input opcode.
7166 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7169 # The allocator should actually run in "relocate" mode, but current
7170 # allocators don't support relocating all the nodes of an instance at
7171 # the same time. As a workaround we use "allocate" mode, but this is
7172 # suboptimal for two reasons:
7173 # - The instance name passed to the allocator is present in the list of
7174 # existing instances, so there could be a conflict within the
7175 # internal structures of the allocator. This doesn't happen with the
7176 # current allocators, but it's a liability.
7177 # - The allocator counts the resources used by the instance twice: once
7178 # because the instance exists already, and once because it tries to
7179 # allocate a new instance.
7180 # The allocator could choose some of the nodes on which the instance is
7181 # running, but that's not a problem. If the instance nodes are broken,
7182 # they should be already be marked as drained or offline, and hence
7183 # skipped by the allocator. If instance disks have been lost for other
7184 # reasons, then recreating the disks on the same nodes should be fine.
7185 disk_template = self.instance.disk_template
7186 spindle_use = be_full[constants.BE_SPINDLE_USE]
7187 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7188 disk_template=disk_template,
7189 tags=list(self.instance.GetTags()),
7190 os=self.instance.os,
7192 vcpus=be_full[constants.BE_VCPUS],
7193 memory=be_full[constants.BE_MAXMEM],
7194 spindle_use=spindle_use,
7195 disks=[{constants.IDISK_SIZE: d.size,
7196 constants.IDISK_MODE: d.mode}
7197 for d in self.instance.disks],
7198 hypervisor=self.instance.hypervisor)
7199 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7201 ial.Run(self.op.iallocator)
7203 assert req.RequiredNodes() == len(self.instance.all_nodes)
7206 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7207 " %s" % (self.op.iallocator, ial.info),
7210 self.op.nodes = ial.result
7211 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7212 self.op.instance_name, self.op.iallocator,
7213 utils.CommaJoin(ial.result))
7215 def CheckArguments(self):
7216 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7217 # Normalize and convert deprecated list of disk indices
7218 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7220 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7222 raise errors.OpPrereqError("Some disks have been specified more than"
7223 " once: %s" % utils.CommaJoin(duplicates),
7226 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7227 # when neither iallocator nor nodes are specified
7228 if self.op.iallocator or self.op.nodes:
7229 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7231 for (idx, params) in self.op.disks:
7232 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7233 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7235 raise errors.OpPrereqError("Parameters for disk %s try to change"
7236 " unmodifyable parameter(s): %s" %
7237 (idx, utils.CommaJoin(unsupported)),
7240 def ExpandNames(self):
7241 self._ExpandAndLockInstance()
7242 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7244 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7245 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7247 self.needed_locks[locking.LEVEL_NODE] = []
7248 if self.op.iallocator:
7249 # iallocator will select a new node in the same group
7250 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7251 self.needed_locks[locking.LEVEL_NODE_RES] = []
7253 def DeclareLocks(self, level):
7254 if level == locking.LEVEL_NODEGROUP:
7255 assert self.op.iallocator is not None
7256 assert not self.op.nodes
7257 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7258 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7259 # Lock the primary group used by the instance optimistically; this
7260 # requires going via the node before it's locked, requiring
7261 # verification later on
7262 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7263 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7265 elif level == locking.LEVEL_NODE:
7266 # If an allocator is used, then we lock all the nodes in the current
7267 # instance group, as we don't know yet which ones will be selected;
7268 # if we replace the nodes without using an allocator, locks are
7269 # already declared in ExpandNames; otherwise, we need to lock all the
7270 # instance nodes for disk re-creation
7271 if self.op.iallocator:
7272 assert not self.op.nodes
7273 assert not self.needed_locks[locking.LEVEL_NODE]
7274 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7276 # Lock member nodes of the group of the primary node
7277 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7278 self.needed_locks[locking.LEVEL_NODE].extend(
7279 self.cfg.GetNodeGroup(group_uuid).members)
7280 elif not self.op.nodes:
7281 self._LockInstancesNodes(primary_only=False)
7282 elif level == locking.LEVEL_NODE_RES:
7284 self.needed_locks[locking.LEVEL_NODE_RES] = \
7285 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7287 def BuildHooksEnv(self):
7290 This runs on master, primary and secondary nodes of the instance.
7293 return _BuildInstanceHookEnvByObject(self, self.instance)
7295 def BuildHooksNodes(self):
7296 """Build hooks nodes.
7299 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7302 def CheckPrereq(self):
7303 """Check prerequisites.
7305 This checks that the instance is in the cluster and is not running.
7308 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7309 assert instance is not None, \
7310 "Cannot retrieve locked instance %s" % self.op.instance_name
7312 if len(self.op.nodes) != len(instance.all_nodes):
7313 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7314 " %d replacement nodes were specified" %
7315 (instance.name, len(instance.all_nodes),
7316 len(self.op.nodes)),
7318 assert instance.disk_template != constants.DT_DRBD8 or \
7319 len(self.op.nodes) == 2
7320 assert instance.disk_template != constants.DT_PLAIN or \
7321 len(self.op.nodes) == 1
7322 primary_node = self.op.nodes[0]
7324 primary_node = instance.primary_node
7325 if not self.op.iallocator:
7326 _CheckNodeOnline(self, primary_node)
7328 if instance.disk_template == constants.DT_DISKLESS:
7329 raise errors.OpPrereqError("Instance '%s' has no disks" %
7330 self.op.instance_name, errors.ECODE_INVAL)
7332 # Verify if node group locks are still correct
7333 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7335 # Node group locks are acquired only for the primary node (and only
7336 # when the allocator is used)
7337 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7340 # if we replace nodes *and* the old primary is offline, we don't
7341 # check the instance state
7342 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7343 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7344 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7345 msg="cannot recreate disks")
7348 self.disks = dict(self.op.disks)
7350 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7352 maxidx = max(self.disks.keys())
7353 if maxidx >= len(instance.disks):
7354 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7357 if ((self.op.nodes or self.op.iallocator) and
7358 sorted(self.disks.keys()) != range(len(instance.disks))):
7359 raise errors.OpPrereqError("Can't recreate disks partially and"
7360 " change the nodes at the same time",
7363 self.instance = instance
7365 if self.op.iallocator:
7366 self._RunAllocator()
7367 # Release unneeded node and node resource locks
7368 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7369 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7371 def Exec(self, feedback_fn):
7372 """Recreate the disks.
7375 instance = self.instance
7377 assert (self.owned_locks(locking.LEVEL_NODE) ==
7378 self.owned_locks(locking.LEVEL_NODE_RES))
7381 mods = [] # keeps track of needed changes
7383 for idx, disk in enumerate(instance.disks):
7385 changes = self.disks[idx]
7387 # Disk should not be recreated
7391 # update secondaries for disks, if needed
7392 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7393 # need to update the nodes and minors
7394 assert len(self.op.nodes) == 2
7395 assert len(disk.logical_id) == 6 # otherwise disk internals
7397 (_, _, old_port, _, _, old_secret) = disk.logical_id
7398 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7399 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7400 new_minors[0], new_minors[1], old_secret)
7401 assert len(disk.logical_id) == len(new_id)
7405 mods.append((idx, new_id, changes))
7407 # now that we have passed all asserts above, we can apply the mods
7408 # in a single run (to avoid partial changes)
7409 for idx, new_id, changes in mods:
7410 disk = instance.disks[idx]
7411 if new_id is not None:
7412 assert disk.dev_type == constants.LD_DRBD8
7413 disk.logical_id = new_id
7415 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7416 mode=changes.get(constants.IDISK_MODE, None))
7418 # change primary node, if needed
7420 instance.primary_node = self.op.nodes[0]
7421 self.LogWarning("Changing the instance's nodes, you will have to"
7422 " remove any disks left on the older nodes manually")
7425 self.cfg.Update(instance, feedback_fn)
7427 # All touched nodes must be locked
7428 mylocks = self.owned_locks(locking.LEVEL_NODE)
7429 assert mylocks.issuperset(frozenset(instance.all_nodes))
7430 _CreateDisks(self, instance, to_skip=to_skip)
7433 class LUInstanceRename(LogicalUnit):
7434 """Rename an instance.
7437 HPATH = "instance-rename"
7438 HTYPE = constants.HTYPE_INSTANCE
7440 def CheckArguments(self):
7444 if self.op.ip_check and not self.op.name_check:
7445 # TODO: make the ip check more flexible and not depend on the name check
7446 raise errors.OpPrereqError("IP address check requires a name check",
7449 def BuildHooksEnv(self):
7452 This runs on master, primary and secondary nodes of the instance.
7455 env = _BuildInstanceHookEnvByObject(self, self.instance)
7456 env["INSTANCE_NEW_NAME"] = self.op.new_name
7459 def BuildHooksNodes(self):
7460 """Build hooks nodes.
7463 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7466 def CheckPrereq(self):
7467 """Check prerequisites.
7469 This checks that the instance is in the cluster and is not running.
7472 self.op.instance_name = _ExpandInstanceName(self.cfg,
7473 self.op.instance_name)
7474 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7475 assert instance is not None
7476 _CheckNodeOnline(self, instance.primary_node)
7477 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7478 msg="cannot rename")
7479 self.instance = instance
7481 new_name = self.op.new_name
7482 if self.op.name_check:
7483 hostname = _CheckHostnameSane(self, new_name)
7484 new_name = self.op.new_name = hostname.name
7485 if (self.op.ip_check and
7486 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7487 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7488 (hostname.ip, new_name),
7489 errors.ECODE_NOTUNIQUE)
7491 instance_list = self.cfg.GetInstanceList()
7492 if new_name in instance_list and new_name != instance.name:
7493 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7494 new_name, errors.ECODE_EXISTS)
7496 def Exec(self, feedback_fn):
7497 """Rename the instance.
7500 inst = self.instance
7501 old_name = inst.name
7503 rename_file_storage = False
7504 if (inst.disk_template in constants.DTS_FILEBASED and
7505 self.op.new_name != inst.name):
7506 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7507 rename_file_storage = True
7509 self.cfg.RenameInstance(inst.name, self.op.new_name)
7510 # Change the instance lock. This is definitely safe while we hold the BGL.
7511 # Otherwise the new lock would have to be added in acquired mode.
7513 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7514 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7516 # re-read the instance from the configuration after rename
7517 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7519 if rename_file_storage:
7520 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7521 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7522 old_file_storage_dir,
7523 new_file_storage_dir)
7524 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7525 " (but the instance has been renamed in Ganeti)" %
7526 (inst.primary_node, old_file_storage_dir,
7527 new_file_storage_dir))
7529 _StartInstanceDisks(self, inst, None)
7530 # update info on disks
7531 info = _GetInstanceInfoText(inst)
7532 for (idx, disk) in enumerate(inst.disks):
7533 for node in inst.all_nodes:
7534 self.cfg.SetDiskID(disk, node)
7535 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7537 self.LogWarning("Error setting info on node %s for disk %s: %s",
7538 node, idx, result.fail_msg)
7540 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7541 old_name, self.op.debug_level)
7542 msg = result.fail_msg
7544 msg = ("Could not run OS rename script for instance %s on node %s"
7545 " (but the instance has been renamed in Ganeti): %s" %
7546 (inst.name, inst.primary_node, msg))
7547 self.proc.LogWarning(msg)
7549 _ShutdownInstanceDisks(self, inst)
7554 class LUInstanceRemove(LogicalUnit):
7555 """Remove an instance.
7558 HPATH = "instance-remove"
7559 HTYPE = constants.HTYPE_INSTANCE
7562 def ExpandNames(self):
7563 self._ExpandAndLockInstance()
7564 self.needed_locks[locking.LEVEL_NODE] = []
7565 self.needed_locks[locking.LEVEL_NODE_RES] = []
7566 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7568 def DeclareLocks(self, level):
7569 if level == locking.LEVEL_NODE:
7570 self._LockInstancesNodes()
7571 elif level == locking.LEVEL_NODE_RES:
7573 self.needed_locks[locking.LEVEL_NODE_RES] = \
7574 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7576 def BuildHooksEnv(self):
7579 This runs on master, primary and secondary nodes of the instance.
7582 env = _BuildInstanceHookEnvByObject(self, self.instance)
7583 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7586 def BuildHooksNodes(self):
7587 """Build hooks nodes.
7590 nl = [self.cfg.GetMasterNode()]
7591 nl_post = list(self.instance.all_nodes) + nl
7592 return (nl, nl_post)
7594 def CheckPrereq(self):
7595 """Check prerequisites.
7597 This checks that the instance is in the cluster.
7600 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7601 assert self.instance is not None, \
7602 "Cannot retrieve locked instance %s" % self.op.instance_name
7604 def Exec(self, feedback_fn):
7605 """Remove the instance.
7608 instance = self.instance
7609 logging.info("Shutting down instance %s on node %s",
7610 instance.name, instance.primary_node)
7612 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7613 self.op.shutdown_timeout)
7614 msg = result.fail_msg
7616 if self.op.ignore_failures:
7617 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7619 raise errors.OpExecError("Could not shutdown instance %s on"
7621 (instance.name, instance.primary_node, msg))
7623 assert (self.owned_locks(locking.LEVEL_NODE) ==
7624 self.owned_locks(locking.LEVEL_NODE_RES))
7625 assert not (set(instance.all_nodes) -
7626 self.owned_locks(locking.LEVEL_NODE)), \
7627 "Not owning correct locks"
7629 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7632 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7633 """Utility function to remove an instance.
7636 logging.info("Removing block devices for instance %s", instance.name)
7638 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7639 if not ignore_failures:
7640 raise errors.OpExecError("Can't remove instance's disks")
7641 feedback_fn("Warning: can't remove instance's disks")
7643 logging.info("Removing instance %s out of cluster config", instance.name)
7645 lu.cfg.RemoveInstance(instance.name)
7647 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7648 "Instance lock removal conflict"
7650 # Remove lock for the instance
7651 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7654 class LUInstanceQuery(NoHooksLU):
7655 """Logical unit for querying instances.
7658 # pylint: disable=W0142
7661 def CheckArguments(self):
7662 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7663 self.op.output_fields, self.op.use_locking)
7665 def ExpandNames(self):
7666 self.iq.ExpandNames(self)
7668 def DeclareLocks(self, level):
7669 self.iq.DeclareLocks(self, level)
7671 def Exec(self, feedback_fn):
7672 return self.iq.OldStyleQuery(self)
7675 class LUInstanceFailover(LogicalUnit):
7676 """Failover an instance.
7679 HPATH = "instance-failover"
7680 HTYPE = constants.HTYPE_INSTANCE
7683 def CheckArguments(self):
7684 """Check the arguments.
7687 self.iallocator = getattr(self.op, "iallocator", None)
7688 self.target_node = getattr(self.op, "target_node", None)
7690 def ExpandNames(self):
7691 self._ExpandAndLockInstance()
7693 if self.op.target_node is not None:
7694 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7696 self.needed_locks[locking.LEVEL_NODE] = []
7697 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7699 self.needed_locks[locking.LEVEL_NODE_RES] = []
7700 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7702 ignore_consistency = self.op.ignore_consistency
7703 shutdown_timeout = self.op.shutdown_timeout
7704 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7707 ignore_consistency=ignore_consistency,
7708 shutdown_timeout=shutdown_timeout,
7709 ignore_ipolicy=self.op.ignore_ipolicy)
7710 self.tasklets = [self._migrater]
7712 def DeclareLocks(self, level):
7713 if level == locking.LEVEL_NODE:
7714 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7715 if instance.disk_template in constants.DTS_EXT_MIRROR:
7716 if self.op.target_node is None:
7717 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7719 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7720 self.op.target_node]
7721 del self.recalculate_locks[locking.LEVEL_NODE]
7723 self._LockInstancesNodes()
7724 elif level == locking.LEVEL_NODE_RES:
7726 self.needed_locks[locking.LEVEL_NODE_RES] = \
7727 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7729 def BuildHooksEnv(self):
7732 This runs on master, primary and secondary nodes of the instance.
7735 instance = self._migrater.instance
7736 source_node = instance.primary_node
7737 target_node = self.op.target_node
7739 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7740 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7741 "OLD_PRIMARY": source_node,
7742 "NEW_PRIMARY": target_node,
7745 if instance.disk_template in constants.DTS_INT_MIRROR:
7746 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7747 env["NEW_SECONDARY"] = source_node
7749 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7751 env.update(_BuildInstanceHookEnvByObject(self, instance))
7755 def BuildHooksNodes(self):
7756 """Build hooks nodes.
7759 instance = self._migrater.instance
7760 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7761 return (nl, nl + [instance.primary_node])
7764 class LUInstanceMigrate(LogicalUnit):
7765 """Migrate an instance.
7767 This is migration without shutting down, compared to the failover,
7768 which is done with shutdown.
7771 HPATH = "instance-migrate"
7772 HTYPE = constants.HTYPE_INSTANCE
7775 def ExpandNames(self):
7776 self._ExpandAndLockInstance()
7778 if self.op.target_node is not None:
7779 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7781 self.needed_locks[locking.LEVEL_NODE] = []
7782 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7784 self.needed_locks[locking.LEVEL_NODE] = []
7785 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7788 TLMigrateInstance(self, self.op.instance_name,
7789 cleanup=self.op.cleanup,
7791 fallback=self.op.allow_failover,
7792 allow_runtime_changes=self.op.allow_runtime_changes,
7793 ignore_ipolicy=self.op.ignore_ipolicy)
7794 self.tasklets = [self._migrater]
7796 def DeclareLocks(self, level):
7797 if level == locking.LEVEL_NODE:
7798 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7799 if instance.disk_template in constants.DTS_EXT_MIRROR:
7800 if self.op.target_node is None:
7801 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7803 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7804 self.op.target_node]
7805 del self.recalculate_locks[locking.LEVEL_NODE]
7807 self._LockInstancesNodes()
7808 elif level == locking.LEVEL_NODE_RES:
7810 self.needed_locks[locking.LEVEL_NODE_RES] = \
7811 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7813 def BuildHooksEnv(self):
7816 This runs on master, primary and secondary nodes of the instance.
7819 instance = self._migrater.instance
7820 source_node = instance.primary_node
7821 target_node = self.op.target_node
7822 env = _BuildInstanceHookEnvByObject(self, instance)
7824 "MIGRATE_LIVE": self._migrater.live,
7825 "MIGRATE_CLEANUP": self.op.cleanup,
7826 "OLD_PRIMARY": source_node,
7827 "NEW_PRIMARY": target_node,
7828 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7831 if instance.disk_template in constants.DTS_INT_MIRROR:
7832 env["OLD_SECONDARY"] = target_node
7833 env["NEW_SECONDARY"] = source_node
7835 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7839 def BuildHooksNodes(self):
7840 """Build hooks nodes.
7843 instance = self._migrater.instance
7844 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7845 return (nl, nl + [instance.primary_node])
7848 class LUInstanceMove(LogicalUnit):
7849 """Move an instance by data-copying.
7852 HPATH = "instance-move"
7853 HTYPE = constants.HTYPE_INSTANCE
7856 def ExpandNames(self):
7857 self._ExpandAndLockInstance()
7858 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7859 self.op.target_node = target_node
7860 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7861 self.needed_locks[locking.LEVEL_NODE_RES] = []
7862 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7864 def DeclareLocks(self, level):
7865 if level == locking.LEVEL_NODE:
7866 self._LockInstancesNodes(primary_only=True)
7867 elif level == locking.LEVEL_NODE_RES:
7869 self.needed_locks[locking.LEVEL_NODE_RES] = \
7870 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7872 def BuildHooksEnv(self):
7875 This runs on master, primary and secondary nodes of the instance.
7879 "TARGET_NODE": self.op.target_node,
7880 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7882 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7885 def BuildHooksNodes(self):
7886 """Build hooks nodes.
7890 self.cfg.GetMasterNode(),
7891 self.instance.primary_node,
7892 self.op.target_node,
7896 def CheckPrereq(self):
7897 """Check prerequisites.
7899 This checks that the instance is in the cluster.
7902 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7903 assert self.instance is not None, \
7904 "Cannot retrieve locked instance %s" % self.op.instance_name
7906 node = self.cfg.GetNodeInfo(self.op.target_node)
7907 assert node is not None, \
7908 "Cannot retrieve locked node %s" % self.op.target_node
7910 self.target_node = target_node = node.name
7912 if target_node == instance.primary_node:
7913 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7914 (instance.name, target_node),
7917 bep = self.cfg.GetClusterInfo().FillBE(instance)
7919 for idx, dsk in enumerate(instance.disks):
7920 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7921 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7922 " cannot copy" % idx, errors.ECODE_STATE)
7924 _CheckNodeOnline(self, target_node)
7925 _CheckNodeNotDrained(self, target_node)
7926 _CheckNodeVmCapable(self, target_node)
7927 cluster = self.cfg.GetClusterInfo()
7928 group_info = self.cfg.GetNodeGroup(node.group)
7929 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7930 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7931 ignore=self.op.ignore_ipolicy)
7933 if instance.admin_state == constants.ADMINST_UP:
7934 # check memory requirements on the secondary node
7935 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7936 instance.name, bep[constants.BE_MAXMEM],
7937 instance.hypervisor)
7939 self.LogInfo("Not checking memory on the secondary node as"
7940 " instance will not be started")
7942 # check bridge existance
7943 _CheckInstanceBridgesExist(self, instance, node=target_node)
7945 def Exec(self, feedback_fn):
7946 """Move an instance.
7948 The move is done by shutting it down on its present node, copying
7949 the data over (slow) and starting it on the new node.
7952 instance = self.instance
7954 source_node = instance.primary_node
7955 target_node = self.target_node
7957 self.LogInfo("Shutting down instance %s on source node %s",
7958 instance.name, source_node)
7960 assert (self.owned_locks(locking.LEVEL_NODE) ==
7961 self.owned_locks(locking.LEVEL_NODE_RES))
7963 result = self.rpc.call_instance_shutdown(source_node, instance,
7964 self.op.shutdown_timeout)
7965 msg = result.fail_msg
7967 if self.op.ignore_consistency:
7968 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7969 " Proceeding anyway. Please make sure node"
7970 " %s is down. Error details: %s",
7971 instance.name, source_node, source_node, msg)
7973 raise errors.OpExecError("Could not shutdown instance %s on"
7975 (instance.name, source_node, msg))
7977 # create the target disks
7979 _CreateDisks(self, instance, target_node=target_node)
7980 except errors.OpExecError:
7981 self.LogWarning("Device creation failed, reverting...")
7983 _RemoveDisks(self, instance, target_node=target_node)
7985 self.cfg.ReleaseDRBDMinors(instance.name)
7988 cluster_name = self.cfg.GetClusterInfo().cluster_name
7991 # activate, get path, copy the data over
7992 for idx, disk in enumerate(instance.disks):
7993 self.LogInfo("Copying data for disk %d", idx)
7994 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7995 instance.name, True, idx)
7997 self.LogWarning("Can't assemble newly created disk %d: %s",
7998 idx, result.fail_msg)
7999 errs.append(result.fail_msg)
8001 dev_path = result.payload
8002 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8003 target_node, dev_path,
8006 self.LogWarning("Can't copy data over for disk %d: %s",
8007 idx, result.fail_msg)
8008 errs.append(result.fail_msg)
8012 self.LogWarning("Some disks failed to copy, aborting")
8014 _RemoveDisks(self, instance, target_node=target_node)
8016 self.cfg.ReleaseDRBDMinors(instance.name)
8017 raise errors.OpExecError("Errors during disk copy: %s" %
8020 instance.primary_node = target_node
8021 self.cfg.Update(instance, feedback_fn)
8023 self.LogInfo("Removing the disks on the original node")
8024 _RemoveDisks(self, instance, target_node=source_node)
8026 # Only start the instance if it's marked as up
8027 if instance.admin_state == constants.ADMINST_UP:
8028 self.LogInfo("Starting instance %s on node %s",
8029 instance.name, target_node)
8031 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8032 ignore_secondaries=True)
8034 _ShutdownInstanceDisks(self, instance)
8035 raise errors.OpExecError("Can't activate the instance's disks")
8037 result = self.rpc.call_instance_start(target_node,
8038 (instance, None, None), False)
8039 msg = result.fail_msg
8041 _ShutdownInstanceDisks(self, instance)
8042 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8043 (instance.name, target_node, msg))
8046 class LUNodeMigrate(LogicalUnit):
8047 """Migrate all instances from a node.
8050 HPATH = "node-migrate"
8051 HTYPE = constants.HTYPE_NODE
8054 def CheckArguments(self):
8057 def ExpandNames(self):
8058 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8060 self.share_locks = _ShareAll()
8061 self.needed_locks = {
8062 locking.LEVEL_NODE: [self.op.node_name],
8065 def BuildHooksEnv(self):
8068 This runs on the master, the primary and all the secondaries.
8072 "NODE_NAME": self.op.node_name,
8073 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8076 def BuildHooksNodes(self):
8077 """Build hooks nodes.
8080 nl = [self.cfg.GetMasterNode()]
8083 def CheckPrereq(self):
8086 def Exec(self, feedback_fn):
8087 # Prepare jobs for migration instances
8088 allow_runtime_changes = self.op.allow_runtime_changes
8090 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8093 iallocator=self.op.iallocator,
8094 target_node=self.op.target_node,
8095 allow_runtime_changes=allow_runtime_changes,
8096 ignore_ipolicy=self.op.ignore_ipolicy)]
8097 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8100 # TODO: Run iallocator in this opcode and pass correct placement options to
8101 # OpInstanceMigrate. Since other jobs can modify the cluster between
8102 # running the iallocator and the actual migration, a good consistency model
8103 # will have to be found.
8105 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8106 frozenset([self.op.node_name]))
8108 return ResultWithJobs(jobs)
8111 class TLMigrateInstance(Tasklet):
8112 """Tasklet class for instance migration.
8115 @ivar live: whether the migration will be done live or non-live;
8116 this variable is initalized only after CheckPrereq has run
8117 @type cleanup: boolean
8118 @ivar cleanup: Wheater we cleanup from a failed migration
8119 @type iallocator: string
8120 @ivar iallocator: The iallocator used to determine target_node
8121 @type target_node: string
8122 @ivar target_node: If given, the target_node to reallocate the instance to
8123 @type failover: boolean
8124 @ivar failover: Whether operation results in failover or migration
8125 @type fallback: boolean
8126 @ivar fallback: Whether fallback to failover is allowed if migration not
8128 @type ignore_consistency: boolean
8129 @ivar ignore_consistency: Wheter we should ignore consistency between source
8131 @type shutdown_timeout: int
8132 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8133 @type ignore_ipolicy: bool
8134 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8139 _MIGRATION_POLL_INTERVAL = 1 # seconds
8140 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8142 def __init__(self, lu, instance_name, cleanup=False,
8143 failover=False, fallback=False,
8144 ignore_consistency=False,
8145 allow_runtime_changes=True,
8146 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8147 ignore_ipolicy=False):
8148 """Initializes this class.
8151 Tasklet.__init__(self, lu)
8154 self.instance_name = instance_name
8155 self.cleanup = cleanup
8156 self.live = False # will be overridden later
8157 self.failover = failover
8158 self.fallback = fallback
8159 self.ignore_consistency = ignore_consistency
8160 self.shutdown_timeout = shutdown_timeout
8161 self.ignore_ipolicy = ignore_ipolicy
8162 self.allow_runtime_changes = allow_runtime_changes
8164 def CheckPrereq(self):
8165 """Check prerequisites.
8167 This checks that the instance is in the cluster.
8170 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8171 instance = self.cfg.GetInstanceInfo(instance_name)
8172 assert instance is not None
8173 self.instance = instance
8174 cluster = self.cfg.GetClusterInfo()
8176 if (not self.cleanup and
8177 not instance.admin_state == constants.ADMINST_UP and
8178 not self.failover and self.fallback):
8179 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8180 " switching to failover")
8181 self.failover = True
8183 if instance.disk_template not in constants.DTS_MIRRORED:
8188 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8189 " %s" % (instance.disk_template, text),
8192 if instance.disk_template in constants.DTS_EXT_MIRROR:
8193 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8195 if self.lu.op.iallocator:
8196 self._RunAllocator()
8198 # We set set self.target_node as it is required by
8200 self.target_node = self.lu.op.target_node
8202 # Check that the target node is correct in terms of instance policy
8203 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8204 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8205 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8207 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8208 ignore=self.ignore_ipolicy)
8210 # self.target_node is already populated, either directly or by the
8212 target_node = self.target_node
8213 if self.target_node == instance.primary_node:
8214 raise errors.OpPrereqError("Cannot migrate instance %s"
8215 " to its primary (%s)" %
8216 (instance.name, instance.primary_node),
8219 if len(self.lu.tasklets) == 1:
8220 # It is safe to release locks only when we're the only tasklet
8222 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8223 keep=[instance.primary_node, self.target_node])
8226 secondary_nodes = instance.secondary_nodes
8227 if not secondary_nodes:
8228 raise errors.ConfigurationError("No secondary node but using"
8229 " %s disk template" %
8230 instance.disk_template)
8231 target_node = secondary_nodes[0]
8232 if self.lu.op.iallocator or (self.lu.op.target_node and
8233 self.lu.op.target_node != target_node):
8235 text = "failed over"
8238 raise errors.OpPrereqError("Instances with disk template %s cannot"
8239 " be %s to arbitrary nodes"
8240 " (neither an iallocator nor a target"
8241 " node can be passed)" %
8242 (instance.disk_template, text),
8244 nodeinfo = self.cfg.GetNodeInfo(target_node)
8245 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8246 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8248 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8249 ignore=self.ignore_ipolicy)
8251 i_be = cluster.FillBE(instance)
8253 # check memory requirements on the secondary node
8254 if (not self.cleanup and
8255 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8256 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8257 "migrating instance %s" %
8259 i_be[constants.BE_MINMEM],
8260 instance.hypervisor)
8262 self.lu.LogInfo("Not checking memory on the secondary node as"
8263 " instance will not be started")
8265 # check if failover must be forced instead of migration
8266 if (not self.cleanup and not self.failover and
8267 i_be[constants.BE_ALWAYS_FAILOVER]):
8268 self.lu.LogInfo("Instance configured to always failover; fallback"
8270 self.failover = True
8272 # check bridge existance
8273 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8275 if not self.cleanup:
8276 _CheckNodeNotDrained(self.lu, target_node)
8277 if not self.failover:
8278 result = self.rpc.call_instance_migratable(instance.primary_node,
8280 if result.fail_msg and self.fallback:
8281 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8283 self.failover = True
8285 result.Raise("Can't migrate, please use failover",
8286 prereq=True, ecode=errors.ECODE_STATE)
8288 assert not (self.failover and self.cleanup)
8290 if not self.failover:
8291 if self.lu.op.live is not None and self.lu.op.mode is not None:
8292 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8293 " parameters are accepted",
8295 if self.lu.op.live is not None:
8297 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8299 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8300 # reset the 'live' parameter to None so that repeated
8301 # invocations of CheckPrereq do not raise an exception
8302 self.lu.op.live = None
8303 elif self.lu.op.mode is None:
8304 # read the default value from the hypervisor
8305 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8306 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8308 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8310 # Failover is never live
8313 if not (self.failover or self.cleanup):
8314 remote_info = self.rpc.call_instance_info(instance.primary_node,
8316 instance.hypervisor)
8317 remote_info.Raise("Error checking instance on node %s" %
8318 instance.primary_node)
8319 instance_running = bool(remote_info.payload)
8320 if instance_running:
8321 self.current_mem = int(remote_info.payload["memory"])
8323 def _RunAllocator(self):
8324 """Run the allocator based on input opcode.
8327 # FIXME: add a self.ignore_ipolicy option
8328 req = iallocator.IAReqRelocate(name=self.instance_name,
8329 relocate_from=[self.instance.primary_node])
8330 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8332 ial.Run(self.lu.op.iallocator)
8335 raise errors.OpPrereqError("Can't compute nodes using"
8336 " iallocator '%s': %s" %
8337 (self.lu.op.iallocator, ial.info),
8339 self.target_node = ial.result[0]
8340 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8341 self.instance_name, self.lu.op.iallocator,
8342 utils.CommaJoin(ial.result))
8344 def _WaitUntilSync(self):
8345 """Poll with custom rpc for disk sync.
8347 This uses our own step-based rpc call.
8350 self.feedback_fn("* wait until resync is done")
8354 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8356 (self.instance.disks,
8359 for node, nres in result.items():
8360 nres.Raise("Cannot resync disks on node %s" % node)
8361 node_done, node_percent = nres.payload
8362 all_done = all_done and node_done
8363 if node_percent is not None:
8364 min_percent = min(min_percent, node_percent)
8366 if min_percent < 100:
8367 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8370 def _EnsureSecondary(self, node):
8371 """Demote a node to secondary.
8374 self.feedback_fn("* switching node %s to secondary mode" % node)
8376 for dev in self.instance.disks:
8377 self.cfg.SetDiskID(dev, node)
8379 result = self.rpc.call_blockdev_close(node, self.instance.name,
8380 self.instance.disks)
8381 result.Raise("Cannot change disk to secondary on node %s" % node)
8383 def _GoStandalone(self):
8384 """Disconnect from the network.
8387 self.feedback_fn("* changing into standalone mode")
8388 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8389 self.instance.disks)
8390 for node, nres in result.items():
8391 nres.Raise("Cannot disconnect disks node %s" % node)
8393 def _GoReconnect(self, multimaster):
8394 """Reconnect to the network.
8400 msg = "single-master"
8401 self.feedback_fn("* changing disks into %s mode" % msg)
8402 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8403 (self.instance.disks, self.instance),
8404 self.instance.name, multimaster)
8405 for node, nres in result.items():
8406 nres.Raise("Cannot change disks config on node %s" % node)
8408 def _ExecCleanup(self):
8409 """Try to cleanup after a failed migration.
8411 The cleanup is done by:
8412 - check that the instance is running only on one node
8413 (and update the config if needed)
8414 - change disks on its secondary node to secondary
8415 - wait until disks are fully synchronized
8416 - disconnect from the network
8417 - change disks into single-master mode
8418 - wait again until disks are fully synchronized
8421 instance = self.instance
8422 target_node = self.target_node
8423 source_node = self.source_node
8425 # check running on only one node
8426 self.feedback_fn("* checking where the instance actually runs"
8427 " (if this hangs, the hypervisor might be in"
8429 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8430 for node, result in ins_l.items():
8431 result.Raise("Can't contact node %s" % node)
8433 runningon_source = instance.name in ins_l[source_node].payload
8434 runningon_target = instance.name in ins_l[target_node].payload
8436 if runningon_source and runningon_target:
8437 raise errors.OpExecError("Instance seems to be running on two nodes,"
8438 " or the hypervisor is confused; you will have"
8439 " to ensure manually that it runs only on one"
8440 " and restart this operation")
8442 if not (runningon_source or runningon_target):
8443 raise errors.OpExecError("Instance does not seem to be running at all;"
8444 " in this case it's safer to repair by"
8445 " running 'gnt-instance stop' to ensure disk"
8446 " shutdown, and then restarting it")
8448 if runningon_target:
8449 # the migration has actually succeeded, we need to update the config
8450 self.feedback_fn("* instance running on secondary node (%s),"
8451 " updating config" % target_node)
8452 instance.primary_node = target_node
8453 self.cfg.Update(instance, self.feedback_fn)
8454 demoted_node = source_node
8456 self.feedback_fn("* instance confirmed to be running on its"
8457 " primary node (%s)" % source_node)
8458 demoted_node = target_node
8460 if instance.disk_template in constants.DTS_INT_MIRROR:
8461 self._EnsureSecondary(demoted_node)
8463 self._WaitUntilSync()
8464 except errors.OpExecError:
8465 # we ignore here errors, since if the device is standalone, it
8466 # won't be able to sync
8468 self._GoStandalone()
8469 self._GoReconnect(False)
8470 self._WaitUntilSync()
8472 self.feedback_fn("* done")
8474 def _RevertDiskStatus(self):
8475 """Try to revert the disk status after a failed migration.
8478 target_node = self.target_node
8479 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8483 self._EnsureSecondary(target_node)
8484 self._GoStandalone()
8485 self._GoReconnect(False)
8486 self._WaitUntilSync()
8487 except errors.OpExecError, err:
8488 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8489 " please try to recover the instance manually;"
8490 " error '%s'" % str(err))
8492 def _AbortMigration(self):
8493 """Call the hypervisor code to abort a started migration.
8496 instance = self.instance
8497 target_node = self.target_node
8498 source_node = self.source_node
8499 migration_info = self.migration_info
8501 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8505 abort_msg = abort_result.fail_msg
8507 logging.error("Aborting migration failed on target node %s: %s",
8508 target_node, abort_msg)
8509 # Don't raise an exception here, as we stil have to try to revert the
8510 # disk status, even if this step failed.
8512 abort_result = self.rpc.call_instance_finalize_migration_src(
8513 source_node, instance, False, self.live)
8514 abort_msg = abort_result.fail_msg
8516 logging.error("Aborting migration failed on source node %s: %s",
8517 source_node, abort_msg)
8519 def _ExecMigration(self):
8520 """Migrate an instance.
8522 The migrate is done by:
8523 - change the disks into dual-master mode
8524 - wait until disks are fully synchronized again
8525 - migrate the instance
8526 - change disks on the new secondary node (the old primary) to secondary
8527 - wait until disks are fully synchronized
8528 - change disks into single-master mode
8531 instance = self.instance
8532 target_node = self.target_node
8533 source_node = self.source_node
8535 # Check for hypervisor version mismatch and warn the user.
8536 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8537 None, [self.instance.hypervisor])
8538 for ninfo in nodeinfo.values():
8539 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8541 (_, _, (src_info, )) = nodeinfo[source_node].payload
8542 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8544 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8545 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8546 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8547 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8548 if src_version != dst_version:
8549 self.feedback_fn("* warning: hypervisor version mismatch between"
8550 " source (%s) and target (%s) node" %
8551 (src_version, dst_version))
8553 self.feedback_fn("* checking disk consistency between source and target")
8554 for (idx, dev) in enumerate(instance.disks):
8555 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8556 raise errors.OpExecError("Disk %s is degraded or not fully"
8557 " synchronized on target node,"
8558 " aborting migration" % idx)
8560 if self.current_mem > self.tgt_free_mem:
8561 if not self.allow_runtime_changes:
8562 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8563 " free memory to fit instance %s on target"
8564 " node %s (have %dMB, need %dMB)" %
8565 (instance.name, target_node,
8566 self.tgt_free_mem, self.current_mem))
8567 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8568 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8571 rpcres.Raise("Cannot modify instance runtime memory")
8573 # First get the migration information from the remote node
8574 result = self.rpc.call_migration_info(source_node, instance)
8575 msg = result.fail_msg
8577 log_err = ("Failed fetching source migration information from %s: %s" %
8579 logging.error(log_err)
8580 raise errors.OpExecError(log_err)
8582 self.migration_info = migration_info = result.payload
8584 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8585 # Then switch the disks to master/master mode
8586 self._EnsureSecondary(target_node)
8587 self._GoStandalone()
8588 self._GoReconnect(True)
8589 self._WaitUntilSync()
8591 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8592 result = self.rpc.call_accept_instance(target_node,
8595 self.nodes_ip[target_node])
8597 msg = result.fail_msg
8599 logging.error("Instance pre-migration failed, trying to revert"
8600 " disk status: %s", msg)
8601 self.feedback_fn("Pre-migration failed, aborting")
8602 self._AbortMigration()
8603 self._RevertDiskStatus()
8604 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8605 (instance.name, msg))
8607 self.feedback_fn("* migrating instance to %s" % target_node)
8608 result = self.rpc.call_instance_migrate(source_node, instance,
8609 self.nodes_ip[target_node],
8611 msg = result.fail_msg
8613 logging.error("Instance migration failed, trying to revert"
8614 " disk status: %s", msg)
8615 self.feedback_fn("Migration failed, aborting")
8616 self._AbortMigration()
8617 self._RevertDiskStatus()
8618 raise errors.OpExecError("Could not migrate instance %s: %s" %
8619 (instance.name, msg))
8621 self.feedback_fn("* starting memory transfer")
8622 last_feedback = time.time()
8624 result = self.rpc.call_instance_get_migration_status(source_node,
8626 msg = result.fail_msg
8627 ms = result.payload # MigrationStatus instance
8628 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8629 logging.error("Instance migration failed, trying to revert"
8630 " disk status: %s", msg)
8631 self.feedback_fn("Migration failed, aborting")
8632 self._AbortMigration()
8633 self._RevertDiskStatus()
8635 msg = "hypervisor returned failure"
8636 raise errors.OpExecError("Could not migrate instance %s: %s" %
8637 (instance.name, msg))
8639 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8640 self.feedback_fn("* memory transfer complete")
8643 if (utils.TimeoutExpired(last_feedback,
8644 self._MIGRATION_FEEDBACK_INTERVAL) and
8645 ms.transferred_ram is not None):
8646 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8647 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8648 last_feedback = time.time()
8650 time.sleep(self._MIGRATION_POLL_INTERVAL)
8652 result = self.rpc.call_instance_finalize_migration_src(source_node,
8656 msg = result.fail_msg
8658 logging.error("Instance migration succeeded, but finalization failed"
8659 " on the source node: %s", msg)
8660 raise errors.OpExecError("Could not finalize instance migration: %s" %
8663 instance.primary_node = target_node
8665 # distribute new instance config to the other nodes
8666 self.cfg.Update(instance, self.feedback_fn)
8668 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8672 msg = result.fail_msg
8674 logging.error("Instance migration succeeded, but finalization failed"
8675 " on the target node: %s", msg)
8676 raise errors.OpExecError("Could not finalize instance migration: %s" %
8679 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8680 self._EnsureSecondary(source_node)
8681 self._WaitUntilSync()
8682 self._GoStandalone()
8683 self._GoReconnect(False)
8684 self._WaitUntilSync()
8686 # If the instance's disk template is `rbd' and there was a successful
8687 # migration, unmap the device from the source node.
8688 if self.instance.disk_template == constants.DT_RBD:
8689 disks = _ExpandCheckDisks(instance, instance.disks)
8690 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8692 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8693 msg = result.fail_msg
8695 logging.error("Migration was successful, but couldn't unmap the"
8696 " block device %s on source node %s: %s",
8697 disk.iv_name, source_node, msg)
8698 logging.error("You need to unmap the device %s manually on %s",
8699 disk.iv_name, source_node)
8701 self.feedback_fn("* done")
8703 def _ExecFailover(self):
8704 """Failover an instance.
8706 The failover is done by shutting it down on its present node and
8707 starting it on the secondary.
8710 instance = self.instance
8711 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8713 source_node = instance.primary_node
8714 target_node = self.target_node
8716 if instance.admin_state == constants.ADMINST_UP:
8717 self.feedback_fn("* checking disk consistency between source and target")
8718 for (idx, dev) in enumerate(instance.disks):
8719 # for drbd, these are drbd over lvm
8720 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8722 if primary_node.offline:
8723 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8725 (primary_node.name, idx, target_node))
8726 elif not self.ignore_consistency:
8727 raise errors.OpExecError("Disk %s is degraded on target node,"
8728 " aborting failover" % idx)
8730 self.feedback_fn("* not checking disk consistency as instance is not"
8733 self.feedback_fn("* shutting down instance on source node")
8734 logging.info("Shutting down instance %s on node %s",
8735 instance.name, source_node)
8737 result = self.rpc.call_instance_shutdown(source_node, instance,
8738 self.shutdown_timeout)
8739 msg = result.fail_msg
8741 if self.ignore_consistency or primary_node.offline:
8742 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8743 " proceeding anyway; please make sure node"
8744 " %s is down; error details: %s",
8745 instance.name, source_node, source_node, msg)
8747 raise errors.OpExecError("Could not shutdown instance %s on"
8749 (instance.name, source_node, msg))
8751 self.feedback_fn("* deactivating the instance's disks on source node")
8752 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8753 raise errors.OpExecError("Can't shut down the instance's disks")
8755 instance.primary_node = target_node
8756 # distribute new instance config to the other nodes
8757 self.cfg.Update(instance, self.feedback_fn)
8759 # Only start the instance if it's marked as up
8760 if instance.admin_state == constants.ADMINST_UP:
8761 self.feedback_fn("* activating the instance's disks on target node %s" %
8763 logging.info("Starting instance %s on node %s",
8764 instance.name, target_node)
8766 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8767 ignore_secondaries=True)
8769 _ShutdownInstanceDisks(self.lu, instance)
8770 raise errors.OpExecError("Can't activate the instance's disks")
8772 self.feedback_fn("* starting the instance on the target node %s" %
8774 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8776 msg = result.fail_msg
8778 _ShutdownInstanceDisks(self.lu, instance)
8779 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8780 (instance.name, target_node, msg))
8782 def Exec(self, feedback_fn):
8783 """Perform the migration.
8786 self.feedback_fn = feedback_fn
8787 self.source_node = self.instance.primary_node
8789 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8790 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8791 self.target_node = self.instance.secondary_nodes[0]
8792 # Otherwise self.target_node has been populated either
8793 # directly, or through an iallocator.
8795 self.all_nodes = [self.source_node, self.target_node]
8796 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8797 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8800 feedback_fn("Failover instance %s" % self.instance.name)
8801 self._ExecFailover()
8803 feedback_fn("Migrating instance %s" % self.instance.name)
8806 return self._ExecCleanup()
8808 return self._ExecMigration()
8811 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8813 """Wrapper around L{_CreateBlockDevInner}.
8815 This method annotates the root device first.
8818 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8819 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8823 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8825 """Create a tree of block devices on a given node.
8827 If this device type has to be created on secondaries, create it and
8830 If not, just recurse to children keeping the same 'force' value.
8832 @attention: The device has to be annotated already.
8834 @param lu: the lu on whose behalf we execute
8835 @param node: the node on which to create the device
8836 @type instance: L{objects.Instance}
8837 @param instance: the instance which owns the device
8838 @type device: L{objects.Disk}
8839 @param device: the device to create
8840 @type force_create: boolean
8841 @param force_create: whether to force creation of this device; this
8842 will be change to True whenever we find a device which has
8843 CreateOnSecondary() attribute
8844 @param info: the extra 'metadata' we should attach to the device
8845 (this will be represented as a LVM tag)
8846 @type force_open: boolean
8847 @param force_open: this parameter will be passes to the
8848 L{backend.BlockdevCreate} function where it specifies
8849 whether we run on primary or not, and it affects both
8850 the child assembly and the device own Open() execution
8853 if device.CreateOnSecondary():
8857 for child in device.children:
8858 _CreateBlockDevInner(lu, node, instance, child, force_create,
8861 if not force_create:
8864 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8867 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8868 """Create a single block device on a given node.
8870 This will not recurse over children of the device, so they must be
8873 @param lu: the lu on whose behalf we execute
8874 @param node: the node on which to create the device
8875 @type instance: L{objects.Instance}
8876 @param instance: the instance which owns the device
8877 @type device: L{objects.Disk}
8878 @param device: the device to create
8879 @param info: the extra 'metadata' we should attach to the device
8880 (this will be represented as a LVM tag)
8881 @type force_open: boolean
8882 @param force_open: this parameter will be passes to the
8883 L{backend.BlockdevCreate} function where it specifies
8884 whether we run on primary or not, and it affects both
8885 the child assembly and the device own Open() execution
8888 lu.cfg.SetDiskID(device, node)
8889 result = lu.rpc.call_blockdev_create(node, device, device.size,
8890 instance.name, force_open, info)
8891 result.Raise("Can't create block device %s on"
8892 " node %s for instance %s" % (device, node, instance.name))
8893 if device.physical_id is None:
8894 device.physical_id = result.payload
8897 def _GenerateUniqueNames(lu, exts):
8898 """Generate a suitable LV name.
8900 This will generate a logical volume name for the given instance.
8905 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8906 results.append("%s%s" % (new_id, val))
8910 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8911 iv_name, p_minor, s_minor):
8912 """Generate a drbd8 device complete with its children.
8915 assert len(vgnames) == len(names) == 2
8916 port = lu.cfg.AllocatePort()
8917 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8919 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8920 logical_id=(vgnames[0], names[0]),
8922 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8923 size=constants.DRBD_META_SIZE,
8924 logical_id=(vgnames[1], names[1]),
8926 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8927 logical_id=(primary, secondary, port,
8930 children=[dev_data, dev_meta],
8931 iv_name=iv_name, params={})
8935 _DISK_TEMPLATE_NAME_PREFIX = {
8936 constants.DT_PLAIN: "",
8937 constants.DT_RBD: ".rbd",
8941 _DISK_TEMPLATE_DEVICE_TYPE = {
8942 constants.DT_PLAIN: constants.LD_LV,
8943 constants.DT_FILE: constants.LD_FILE,
8944 constants.DT_SHARED_FILE: constants.LD_FILE,
8945 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8946 constants.DT_RBD: constants.LD_RBD,
8950 def _GenerateDiskTemplate(
8951 lu, template_name, instance_name, primary_node, secondary_nodes,
8952 disk_info, file_storage_dir, file_driver, base_index,
8953 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8954 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8955 """Generate the entire disk layout for a given template type.
8958 #TODO: compute space requirements
8960 vgname = lu.cfg.GetVGName()
8961 disk_count = len(disk_info)
8964 if template_name == constants.DT_DISKLESS:
8966 elif template_name == constants.DT_DRBD8:
8967 if len(secondary_nodes) != 1:
8968 raise errors.ProgrammerError("Wrong template configuration")
8969 remote_node = secondary_nodes[0]
8970 minors = lu.cfg.AllocateDRBDMinor(
8971 [primary_node, remote_node] * len(disk_info), instance_name)
8973 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8975 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8978 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8979 for i in range(disk_count)]):
8980 names.append(lv_prefix + "_data")
8981 names.append(lv_prefix + "_meta")
8982 for idx, disk in enumerate(disk_info):
8983 disk_index = idx + base_index
8984 data_vg = disk.get(constants.IDISK_VG, vgname)
8985 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8986 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8987 disk[constants.IDISK_SIZE],
8989 names[idx * 2:idx * 2 + 2],
8990 "disk/%d" % disk_index,
8991 minors[idx * 2], minors[idx * 2 + 1])
8992 disk_dev.mode = disk[constants.IDISK_MODE]
8993 disks.append(disk_dev)
8996 raise errors.ProgrammerError("Wrong template configuration")
8998 if template_name == constants.DT_FILE:
9000 elif template_name == constants.DT_SHARED_FILE:
9001 _req_shr_file_storage()
9003 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9004 if name_prefix is None:
9007 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9008 (name_prefix, base_index + i)
9009 for i in range(disk_count)])
9011 if template_name == constants.DT_PLAIN:
9012 def logical_id_fn(idx, _, disk):
9013 vg = disk.get(constants.IDISK_VG, vgname)
9014 return (vg, names[idx])
9015 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9017 lambda _, disk_index, disk: (file_driver,
9018 "%s/disk%d" % (file_storage_dir,
9020 elif template_name == constants.DT_BLOCK:
9022 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9023 disk[constants.IDISK_ADOPT])
9024 elif template_name == constants.DT_RBD:
9025 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9027 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9029 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9031 for idx, disk in enumerate(disk_info):
9032 disk_index = idx + base_index
9033 size = disk[constants.IDISK_SIZE]
9034 feedback_fn("* disk %s, size %s" %
9035 (disk_index, utils.FormatUnit(size, "h")))
9036 disks.append(objects.Disk(dev_type=dev_type, size=size,
9037 logical_id=logical_id_fn(idx, disk_index, disk),
9038 iv_name="disk/%d" % disk_index,
9039 mode=disk[constants.IDISK_MODE],
9045 def _GetInstanceInfoText(instance):
9046 """Compute that text that should be added to the disk's metadata.
9049 return "originstname+%s" % instance.name
9052 def _CalcEta(time_taken, written, total_size):
9053 """Calculates the ETA based on size written and total size.
9055 @param time_taken: The time taken so far
9056 @param written: amount written so far
9057 @param total_size: The total size of data to be written
9058 @return: The remaining time in seconds
9061 avg_time = time_taken / float(written)
9062 return (total_size - written) * avg_time
9065 def _WipeDisks(lu, instance, disks=None):
9066 """Wipes instance disks.
9068 @type lu: L{LogicalUnit}
9069 @param lu: the logical unit on whose behalf we execute
9070 @type instance: L{objects.Instance}
9071 @param instance: the instance whose disks we should create
9072 @return: the success of the wipe
9075 node = instance.primary_node
9078 disks = [(idx, disk, 0)
9079 for (idx, disk) in enumerate(instance.disks)]
9081 for (_, device, _) in disks:
9082 lu.cfg.SetDiskID(device, node)
9084 logging.info("Pausing synchronization of disks of instance '%s'",
9086 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9087 (map(compat.snd, disks),
9090 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9092 for idx, success in enumerate(result.payload):
9094 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9095 " failed", idx, instance.name)
9098 for (idx, device, offset) in disks:
9099 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9100 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9102 int(min(constants.MAX_WIPE_CHUNK,
9103 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9107 start_time = time.time()
9112 info_text = (" (from %s to %s)" %
9113 (utils.FormatUnit(offset, "h"),
9114 utils.FormatUnit(size, "h")))
9116 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9118 logging.info("Wiping disk %d for instance %s on node %s using"
9119 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9121 while offset < size:
9122 wipe_size = min(wipe_chunk_size, size - offset)
9124 logging.debug("Wiping disk %d, offset %s, chunk %s",
9125 idx, offset, wipe_size)
9127 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9129 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9130 (idx, offset, wipe_size))
9134 if now - last_output >= 60:
9135 eta = _CalcEta(now - start_time, offset, size)
9136 lu.LogInfo(" - done: %.1f%% ETA: %s",
9137 offset / float(size) * 100, utils.FormatSeconds(eta))
9140 logging.info("Resuming synchronization of disks for instance '%s'",
9143 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9144 (map(compat.snd, disks),
9149 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9150 node, result.fail_msg)
9152 for idx, success in enumerate(result.payload):
9154 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9155 " failed", idx, instance.name)
9158 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9159 """Create all disks for an instance.
9161 This abstracts away some work from AddInstance.
9163 @type lu: L{LogicalUnit}
9164 @param lu: the logical unit on whose behalf we execute
9165 @type instance: L{objects.Instance}
9166 @param instance: the instance whose disks we should create
9168 @param to_skip: list of indices to skip
9169 @type target_node: string
9170 @param target_node: if passed, overrides the target node for creation
9172 @return: the success of the creation
9175 info = _GetInstanceInfoText(instance)
9176 if target_node is None:
9177 pnode = instance.primary_node
9178 all_nodes = instance.all_nodes
9183 if instance.disk_template in constants.DTS_FILEBASED:
9184 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9185 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9187 result.Raise("Failed to create directory '%s' on"
9188 " node %s" % (file_storage_dir, pnode))
9190 # Note: this needs to be kept in sync with adding of disks in
9191 # LUInstanceSetParams
9192 for idx, device in enumerate(instance.disks):
9193 if to_skip and idx in to_skip:
9195 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9197 for node in all_nodes:
9198 f_create = node == pnode
9199 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9202 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9203 """Remove all disks for an instance.
9205 This abstracts away some work from `AddInstance()` and
9206 `RemoveInstance()`. Note that in case some of the devices couldn't
9207 be removed, the removal will continue with the other ones (compare
9208 with `_CreateDisks()`).
9210 @type lu: L{LogicalUnit}
9211 @param lu: the logical unit on whose behalf we execute
9212 @type instance: L{objects.Instance}
9213 @param instance: the instance whose disks we should remove
9214 @type target_node: string
9215 @param target_node: used to override the node on which to remove the disks
9217 @return: the success of the removal
9220 logging.info("Removing block devices for instance %s", instance.name)
9223 ports_to_release = set()
9224 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9225 for (idx, device) in enumerate(anno_disks):
9227 edata = [(target_node, device)]
9229 edata = device.ComputeNodeTree(instance.primary_node)
9230 for node, disk in edata:
9231 lu.cfg.SetDiskID(disk, node)
9232 result = lu.rpc.call_blockdev_remove(node, disk)
9234 lu.LogWarning("Could not remove disk %s on node %s,"
9235 " continuing anyway: %s", idx, node, result.fail_msg)
9236 if not (result.offline and node != instance.primary_node):
9239 # if this is a DRBD disk, return its port to the pool
9240 if device.dev_type in constants.LDS_DRBD:
9241 ports_to_release.add(device.logical_id[2])
9243 if all_result or ignore_failures:
9244 for port in ports_to_release:
9245 lu.cfg.AddTcpUdpPort(port)
9247 if instance.disk_template in constants.DTS_FILEBASED:
9248 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9252 tgt = instance.primary_node
9253 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9255 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9256 file_storage_dir, instance.primary_node, result.fail_msg)
9262 def _ComputeDiskSizePerVG(disk_template, disks):
9263 """Compute disk size requirements in the volume group
9266 def _compute(disks, payload):
9267 """Universal algorithm.
9272 vgs[disk[constants.IDISK_VG]] = \
9273 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9277 # Required free disk space as a function of disk and swap space
9279 constants.DT_DISKLESS: {},
9280 constants.DT_PLAIN: _compute(disks, 0),
9281 # 128 MB are added for drbd metadata for each disk
9282 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9283 constants.DT_FILE: {},
9284 constants.DT_SHARED_FILE: {},
9287 if disk_template not in req_size_dict:
9288 raise errors.ProgrammerError("Disk template '%s' size requirement"
9289 " is unknown" % disk_template)
9291 return req_size_dict[disk_template]
9294 def _FilterVmNodes(lu, nodenames):
9295 """Filters out non-vm_capable nodes from a list.
9297 @type lu: L{LogicalUnit}
9298 @param lu: the logical unit for which we check
9299 @type nodenames: list
9300 @param nodenames: the list of nodes on which we should check
9302 @return: the list of vm-capable nodes
9305 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9306 return [name for name in nodenames if name not in vm_nodes]
9309 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9310 """Hypervisor parameter validation.
9312 This function abstract the hypervisor parameter validation to be
9313 used in both instance create and instance modify.
9315 @type lu: L{LogicalUnit}
9316 @param lu: the logical unit for which we check
9317 @type nodenames: list
9318 @param nodenames: the list of nodes on which we should check
9319 @type hvname: string
9320 @param hvname: the name of the hypervisor we should use
9321 @type hvparams: dict
9322 @param hvparams: the parameters which we need to check
9323 @raise errors.OpPrereqError: if the parameters are not valid
9326 nodenames = _FilterVmNodes(lu, nodenames)
9328 cluster = lu.cfg.GetClusterInfo()
9329 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9331 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9332 for node in nodenames:
9336 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9339 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9340 """OS parameters validation.
9342 @type lu: L{LogicalUnit}
9343 @param lu: the logical unit for which we check
9344 @type required: boolean
9345 @param required: whether the validation should fail if the OS is not
9347 @type nodenames: list
9348 @param nodenames: the list of nodes on which we should check
9349 @type osname: string
9350 @param osname: the name of the hypervisor we should use
9351 @type osparams: dict
9352 @param osparams: the parameters which we need to check
9353 @raise errors.OpPrereqError: if the parameters are not valid
9356 nodenames = _FilterVmNodes(lu, nodenames)
9357 result = lu.rpc.call_os_validate(nodenames, required, osname,
9358 [constants.OS_VALIDATE_PARAMETERS],
9360 for node, nres in result.items():
9361 # we don't check for offline cases since this should be run only
9362 # against the master node and/or an instance's nodes
9363 nres.Raise("OS Parameters validation failed on node %s" % node)
9364 if not nres.payload:
9365 lu.LogInfo("OS %s not found on node %s, validation skipped",
9369 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9370 """Wrapper around IAReqInstanceAlloc.
9372 @param op: The instance opcode
9373 @param disks: The computed disks
9374 @param nics: The computed nics
9375 @param beparams: The full filled beparams
9377 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9380 spindle_use = beparams[constants.BE_SPINDLE_USE]
9381 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9382 disk_template=op.disk_template,
9385 vcpus=beparams[constants.BE_VCPUS],
9386 memory=beparams[constants.BE_MAXMEM],
9387 spindle_use=spindle_use,
9389 nics=[n.ToDict() for n in nics],
9390 hypervisor=op.hypervisor)
9393 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9394 """Computes the nics.
9396 @param op: The instance opcode
9397 @param cluster: Cluster configuration object
9398 @param default_ip: The default ip to assign
9399 @param cfg: An instance of the configuration object
9400 @param proc: The executer instance
9402 @returns: The build up nics
9406 for idx, nic in enumerate(op.nics):
9407 nic_mode_req = nic.get(constants.INIC_MODE, None)
9408 nic_mode = nic_mode_req
9409 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9410 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9412 # in routed mode, for the first nic, the default ip is 'auto'
9413 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9414 default_ip_mode = constants.VALUE_AUTO
9416 default_ip_mode = constants.VALUE_NONE
9418 # ip validity checks
9419 ip = nic.get(constants.INIC_IP, default_ip_mode)
9420 if ip is None or ip.lower() == constants.VALUE_NONE:
9422 elif ip.lower() == constants.VALUE_AUTO:
9423 if not op.name_check:
9424 raise errors.OpPrereqError("IP address set to auto but name checks"
9425 " have been skipped",
9429 if not netutils.IPAddress.IsValid(ip):
9430 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9434 # TODO: check the ip address for uniqueness
9435 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9436 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9439 # MAC address verification
9440 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9441 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9442 mac = utils.NormalizeAndValidateMac(mac)
9445 # TODO: We need to factor this out
9446 cfg.ReserveMAC(mac, proc.GetECId())
9447 except errors.ReservationError:
9448 raise errors.OpPrereqError("MAC address %s already in use"
9449 " in cluster" % mac,
9450 errors.ECODE_NOTUNIQUE)
9452 # Build nic parameters
9453 link = nic.get(constants.INIC_LINK, None)
9454 if link == constants.VALUE_AUTO:
9455 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9458 nicparams[constants.NIC_MODE] = nic_mode
9460 nicparams[constants.NIC_LINK] = link
9462 check_params = cluster.SimpleFillNIC(nicparams)
9463 objects.NIC.CheckParameterSyntax(check_params)
9464 nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9469 def _ComputeDisks(op, default_vg):
9470 """Computes the instance disks.
9472 @param op: The instance opcode
9473 @param default_vg: The default_vg to assume
9475 @return: The computer disks
9479 for disk in op.disks:
9480 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9481 if mode not in constants.DISK_ACCESS_SET:
9482 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9483 mode, errors.ECODE_INVAL)
9484 size = disk.get(constants.IDISK_SIZE, None)
9486 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9489 except (TypeError, ValueError):
9490 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9493 data_vg = disk.get(constants.IDISK_VG, default_vg)
9495 constants.IDISK_SIZE: size,
9496 constants.IDISK_MODE: mode,
9497 constants.IDISK_VG: data_vg,
9499 if constants.IDISK_METAVG in disk:
9500 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9501 if constants.IDISK_ADOPT in disk:
9502 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9503 disks.append(new_disk)
9508 def _ComputeFullBeParams(op, cluster):
9509 """Computes the full beparams.
9511 @param op: The instance opcode
9512 @param cluster: The cluster config object
9514 @return: The fully filled beparams
9517 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9518 for param, value in op.beparams.iteritems():
9519 if value == constants.VALUE_AUTO:
9520 op.beparams[param] = default_beparams[param]
9521 objects.UpgradeBeParams(op.beparams)
9522 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9523 return cluster.SimpleFillBE(op.beparams)
9526 class LUInstanceCreate(LogicalUnit):
9527 """Create an instance.
9530 HPATH = "instance-add"
9531 HTYPE = constants.HTYPE_INSTANCE
9534 def CheckArguments(self):
9538 # do not require name_check to ease forward/backward compatibility
9540 if self.op.no_install and self.op.start:
9541 self.LogInfo("No-installation mode selected, disabling startup")
9542 self.op.start = False
9543 # validate/normalize the instance name
9544 self.op.instance_name = \
9545 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9547 if self.op.ip_check and not self.op.name_check:
9548 # TODO: make the ip check more flexible and not depend on the name check
9549 raise errors.OpPrereqError("Cannot do IP address check without a name"
9550 " check", errors.ECODE_INVAL)
9552 # check nics' parameter names
9553 for nic in self.op.nics:
9554 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9556 # check disks. parameter names and consistent adopt/no-adopt strategy
9557 has_adopt = has_no_adopt = False
9558 for disk in self.op.disks:
9559 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9560 if constants.IDISK_ADOPT in disk:
9564 if has_adopt and has_no_adopt:
9565 raise errors.OpPrereqError("Either all disks are adopted or none is",
9568 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9569 raise errors.OpPrereqError("Disk adoption is not supported for the"
9570 " '%s' disk template" %
9571 self.op.disk_template,
9573 if self.op.iallocator is not None:
9574 raise errors.OpPrereqError("Disk adoption not allowed with an"
9575 " iallocator script", errors.ECODE_INVAL)
9576 if self.op.mode == constants.INSTANCE_IMPORT:
9577 raise errors.OpPrereqError("Disk adoption not allowed for"
9578 " instance import", errors.ECODE_INVAL)
9580 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9581 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9582 " but no 'adopt' parameter given" %
9583 self.op.disk_template,
9586 self.adopt_disks = has_adopt
9588 # instance name verification
9589 if self.op.name_check:
9590 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9591 self.op.instance_name = self.hostname1.name
9592 # used in CheckPrereq for ip ping check
9593 self.check_ip = self.hostname1.ip
9595 self.check_ip = None
9597 # file storage checks
9598 if (self.op.file_driver and
9599 not self.op.file_driver in constants.FILE_DRIVER):
9600 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9601 self.op.file_driver, errors.ECODE_INVAL)
9603 if self.op.disk_template == constants.DT_FILE:
9604 opcodes.RequireFileStorage()
9605 elif self.op.disk_template == constants.DT_SHARED_FILE:
9606 opcodes.RequireSharedFileStorage()
9608 ### Node/iallocator related checks
9609 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9611 if self.op.pnode is not None:
9612 if self.op.disk_template in constants.DTS_INT_MIRROR:
9613 if self.op.snode is None:
9614 raise errors.OpPrereqError("The networked disk templates need"
9615 " a mirror node", errors.ECODE_INVAL)
9617 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9619 self.op.snode = None
9621 self._cds = _GetClusterDomainSecret()
9623 if self.op.mode == constants.INSTANCE_IMPORT:
9624 # On import force_variant must be True, because if we forced it at
9625 # initial install, our only chance when importing it back is that it
9627 self.op.force_variant = True
9629 if self.op.no_install:
9630 self.LogInfo("No-installation mode has no effect during import")
9632 elif self.op.mode == constants.INSTANCE_CREATE:
9633 if self.op.os_type is None:
9634 raise errors.OpPrereqError("No guest OS specified",
9636 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9637 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9638 " installation" % self.op.os_type,
9640 if self.op.disk_template is None:
9641 raise errors.OpPrereqError("No disk template specified",
9644 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9645 # Check handshake to ensure both clusters have the same domain secret
9646 src_handshake = self.op.source_handshake
9647 if not src_handshake:
9648 raise errors.OpPrereqError("Missing source handshake",
9651 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9654 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9657 # Load and check source CA
9658 self.source_x509_ca_pem = self.op.source_x509_ca
9659 if not self.source_x509_ca_pem:
9660 raise errors.OpPrereqError("Missing source X509 CA",
9664 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9666 except OpenSSL.crypto.Error, err:
9667 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9668 (err, ), errors.ECODE_INVAL)
9670 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9671 if errcode is not None:
9672 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9675 self.source_x509_ca = cert
9677 src_instance_name = self.op.source_instance_name
9678 if not src_instance_name:
9679 raise errors.OpPrereqError("Missing source instance name",
9682 self.source_instance_name = \
9683 netutils.GetHostname(name=src_instance_name).name
9686 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9687 self.op.mode, errors.ECODE_INVAL)
9689 def ExpandNames(self):
9690 """ExpandNames for CreateInstance.
9692 Figure out the right locks for instance creation.
9695 self.needed_locks = {}
9697 instance_name = self.op.instance_name
9698 # this is just a preventive check, but someone might still add this
9699 # instance in the meantime, and creation will fail at lock-add time
9700 if instance_name in self.cfg.GetInstanceList():
9701 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9702 instance_name, errors.ECODE_EXISTS)
9704 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9706 if self.op.iallocator:
9707 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9708 # specifying a group on instance creation and then selecting nodes from
9710 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9711 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9713 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9714 nodelist = [self.op.pnode]
9715 if self.op.snode is not None:
9716 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9717 nodelist.append(self.op.snode)
9718 self.needed_locks[locking.LEVEL_NODE] = nodelist
9719 # Lock resources of instance's primary and secondary nodes (copy to
9720 # prevent accidential modification)
9721 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9723 # in case of import lock the source node too
9724 if self.op.mode == constants.INSTANCE_IMPORT:
9725 src_node = self.op.src_node
9726 src_path = self.op.src_path
9728 if src_path is None:
9729 self.op.src_path = src_path = self.op.instance_name
9731 if src_node is None:
9732 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9733 self.op.src_node = None
9734 if os.path.isabs(src_path):
9735 raise errors.OpPrereqError("Importing an instance from a path"
9736 " requires a source node option",
9739 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9740 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9741 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9742 if not os.path.isabs(src_path):
9743 self.op.src_path = src_path = \
9744 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9746 def _RunAllocator(self):
9747 """Run the allocator based on input opcode.
9750 req = _CreateInstanceAllocRequest(self.op, self.disks,
9751 self.nics, self.be_full)
9752 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9754 ial.Run(self.op.iallocator)
9757 raise errors.OpPrereqError("Can't compute nodes using"
9758 " iallocator '%s': %s" %
9759 (self.op.iallocator, ial.info),
9761 self.op.pnode = ial.result[0]
9762 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9763 self.op.instance_name, self.op.iallocator,
9764 utils.CommaJoin(ial.result))
9766 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9768 if req.RequiredNodes() == 2:
9769 self.op.snode = ial.result[1]
9771 def BuildHooksEnv(self):
9774 This runs on master, primary and secondary nodes of the instance.
9778 "ADD_MODE": self.op.mode,
9780 if self.op.mode == constants.INSTANCE_IMPORT:
9781 env["SRC_NODE"] = self.op.src_node
9782 env["SRC_PATH"] = self.op.src_path
9783 env["SRC_IMAGES"] = self.src_images
9785 env.update(_BuildInstanceHookEnv(
9786 name=self.op.instance_name,
9787 primary_node=self.op.pnode,
9788 secondary_nodes=self.secondaries,
9789 status=self.op.start,
9790 os_type=self.op.os_type,
9791 minmem=self.be_full[constants.BE_MINMEM],
9792 maxmem=self.be_full[constants.BE_MAXMEM],
9793 vcpus=self.be_full[constants.BE_VCPUS],
9794 nics=_NICListToTuple(self, self.nics),
9795 disk_template=self.op.disk_template,
9796 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9797 for d in self.disks],
9800 hypervisor_name=self.op.hypervisor,
9806 def BuildHooksNodes(self):
9807 """Build hooks nodes.
9810 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9813 def _ReadExportInfo(self):
9814 """Reads the export information from disk.
9816 It will override the opcode source node and path with the actual
9817 information, if these two were not specified before.
9819 @return: the export information
9822 assert self.op.mode == constants.INSTANCE_IMPORT
9824 src_node = self.op.src_node
9825 src_path = self.op.src_path
9827 if src_node is None:
9828 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9829 exp_list = self.rpc.call_export_list(locked_nodes)
9831 for node in exp_list:
9832 if exp_list[node].fail_msg:
9834 if src_path in exp_list[node].payload:
9836 self.op.src_node = src_node = node
9837 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9841 raise errors.OpPrereqError("No export found for relative path %s" %
9842 src_path, errors.ECODE_INVAL)
9844 _CheckNodeOnline(self, src_node)
9845 result = self.rpc.call_export_info(src_node, src_path)
9846 result.Raise("No export or invalid export found in dir %s" % src_path)
9848 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9849 if not export_info.has_section(constants.INISECT_EXP):
9850 raise errors.ProgrammerError("Corrupted export config",
9851 errors.ECODE_ENVIRON)
9853 ei_version = export_info.get(constants.INISECT_EXP, "version")
9854 if (int(ei_version) != constants.EXPORT_VERSION):
9855 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9856 (ei_version, constants.EXPORT_VERSION),
9857 errors.ECODE_ENVIRON)
9860 def _ReadExportParams(self, einfo):
9861 """Use export parameters as defaults.
9863 In case the opcode doesn't specify (as in override) some instance
9864 parameters, then try to use them from the export information, if
9868 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9870 if self.op.disk_template is None:
9871 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9872 self.op.disk_template = einfo.get(constants.INISECT_INS,
9874 if self.op.disk_template not in constants.DISK_TEMPLATES:
9875 raise errors.OpPrereqError("Disk template specified in configuration"
9876 " file is not one of the allowed values:"
9878 " ".join(constants.DISK_TEMPLATES),
9881 raise errors.OpPrereqError("No disk template specified and the export"
9882 " is missing the disk_template information",
9885 if not self.op.disks:
9887 # TODO: import the disk iv_name too
9888 for idx in range(constants.MAX_DISKS):
9889 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9890 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9891 disks.append({constants.IDISK_SIZE: disk_sz})
9892 self.op.disks = disks
9893 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9894 raise errors.OpPrereqError("No disk info specified and the export"
9895 " is missing the disk information",
9898 if not self.op.nics:
9900 for idx in range(constants.MAX_NICS):
9901 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9903 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9904 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9911 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9912 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9914 if (self.op.hypervisor is None and
9915 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9916 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9918 if einfo.has_section(constants.INISECT_HYP):
9919 # use the export parameters but do not override the ones
9920 # specified by the user
9921 for name, value in einfo.items(constants.INISECT_HYP):
9922 if name not in self.op.hvparams:
9923 self.op.hvparams[name] = value
9925 if einfo.has_section(constants.INISECT_BEP):
9926 # use the parameters, without overriding
9927 for name, value in einfo.items(constants.INISECT_BEP):
9928 if name not in self.op.beparams:
9929 self.op.beparams[name] = value
9930 # Compatibility for the old "memory" be param
9931 if name == constants.BE_MEMORY:
9932 if constants.BE_MAXMEM not in self.op.beparams:
9933 self.op.beparams[constants.BE_MAXMEM] = value
9934 if constants.BE_MINMEM not in self.op.beparams:
9935 self.op.beparams[constants.BE_MINMEM] = value
9937 # try to read the parameters old style, from the main section
9938 for name in constants.BES_PARAMETERS:
9939 if (name not in self.op.beparams and
9940 einfo.has_option(constants.INISECT_INS, name)):
9941 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9943 if einfo.has_section(constants.INISECT_OSP):
9944 # use the parameters, without overriding
9945 for name, value in einfo.items(constants.INISECT_OSP):
9946 if name not in self.op.osparams:
9947 self.op.osparams[name] = value
9949 def _RevertToDefaults(self, cluster):
9950 """Revert the instance parameters to the default values.
9954 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9955 for name in self.op.hvparams.keys():
9956 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9957 del self.op.hvparams[name]
9959 be_defs = cluster.SimpleFillBE({})
9960 for name in self.op.beparams.keys():
9961 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9962 del self.op.beparams[name]
9964 nic_defs = cluster.SimpleFillNIC({})
9965 for nic in self.op.nics:
9966 for name in constants.NICS_PARAMETERS:
9967 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9970 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9971 for name in self.op.osparams.keys():
9972 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9973 del self.op.osparams[name]
9975 def _CalculateFileStorageDir(self):
9976 """Calculate final instance file storage dir.
9979 # file storage dir calculation/check
9980 self.instance_file_storage_dir = None
9981 if self.op.disk_template in constants.DTS_FILEBASED:
9982 # build the full file storage dir path
9985 if self.op.disk_template == constants.DT_SHARED_FILE:
9986 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9988 get_fsd_fn = self.cfg.GetFileStorageDir
9990 cfg_storagedir = get_fsd_fn()
9991 if not cfg_storagedir:
9992 raise errors.OpPrereqError("Cluster file storage dir not defined",
9994 joinargs.append(cfg_storagedir)
9996 if self.op.file_storage_dir is not None:
9997 joinargs.append(self.op.file_storage_dir)
9999 joinargs.append(self.op.instance_name)
10001 # pylint: disable=W0142
10002 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10004 def CheckPrereq(self): # pylint: disable=R0914
10005 """Check prerequisites.
10008 self._CalculateFileStorageDir()
10010 if self.op.mode == constants.INSTANCE_IMPORT:
10011 export_info = self._ReadExportInfo()
10012 self._ReadExportParams(export_info)
10013 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10015 self._old_instance_name = None
10017 if (not self.cfg.GetVGName() and
10018 self.op.disk_template not in constants.DTS_NOT_LVM):
10019 raise errors.OpPrereqError("Cluster does not support lvm-based"
10020 " instances", errors.ECODE_STATE)
10022 if (self.op.hypervisor is None or
10023 self.op.hypervisor == constants.VALUE_AUTO):
10024 self.op.hypervisor = self.cfg.GetHypervisorType()
10026 cluster = self.cfg.GetClusterInfo()
10027 enabled_hvs = cluster.enabled_hypervisors
10028 if self.op.hypervisor not in enabled_hvs:
10029 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10031 (self.op.hypervisor, ",".join(enabled_hvs)),
10032 errors.ECODE_STATE)
10034 # Check tag validity
10035 for tag in self.op.tags:
10036 objects.TaggableObject.ValidateTag(tag)
10038 # check hypervisor parameter syntax (locally)
10039 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10040 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10042 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10043 hv_type.CheckParameterSyntax(filled_hvp)
10044 self.hv_full = filled_hvp
10045 # check that we don't specify global parameters on an instance
10046 _CheckGlobalHvParams(self.op.hvparams)
10048 # fill and remember the beparams dict
10049 self.be_full = _ComputeFullBeParams(self.op, cluster)
10051 # build os parameters
10052 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10054 # now that hvp/bep are in final format, let's reset to defaults,
10056 if self.op.identify_defaults:
10057 self._RevertToDefaults(cluster)
10060 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10063 # disk checks/pre-build
10064 default_vg = self.cfg.GetVGName()
10065 self.disks = _ComputeDisks(self.op, default_vg)
10067 if self.op.mode == constants.INSTANCE_IMPORT:
10069 for idx in range(len(self.disks)):
10070 option = "disk%d_dump" % idx
10071 if export_info.has_option(constants.INISECT_INS, option):
10072 # FIXME: are the old os-es, disk sizes, etc. useful?
10073 export_name = export_info.get(constants.INISECT_INS, option)
10074 image = utils.PathJoin(self.op.src_path, export_name)
10075 disk_images.append(image)
10077 disk_images.append(False)
10079 self.src_images = disk_images
10081 if self.op.instance_name == self._old_instance_name:
10082 for idx, nic in enumerate(self.nics):
10083 if nic.mac == constants.VALUE_AUTO:
10084 nic_mac_ini = "nic%d_mac" % idx
10085 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10087 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10089 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10090 if self.op.ip_check:
10091 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10092 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10093 (self.check_ip, self.op.instance_name),
10094 errors.ECODE_NOTUNIQUE)
10096 #### mac address generation
10097 # By generating here the mac address both the allocator and the hooks get
10098 # the real final mac address rather than the 'auto' or 'generate' value.
10099 # There is a race condition between the generation and the instance object
10100 # creation, which means that we know the mac is valid now, but we're not
10101 # sure it will be when we actually add the instance. If things go bad
10102 # adding the instance will abort because of a duplicate mac, and the
10103 # creation job will fail.
10104 for nic in self.nics:
10105 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10106 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10110 if self.op.iallocator is not None:
10111 self._RunAllocator()
10113 # Release all unneeded node locks
10114 _ReleaseLocks(self, locking.LEVEL_NODE,
10115 keep=filter(None, [self.op.pnode, self.op.snode,
10116 self.op.src_node]))
10117 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10118 keep=filter(None, [self.op.pnode, self.op.snode,
10119 self.op.src_node]))
10121 #### node related checks
10123 # check primary node
10124 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10125 assert self.pnode is not None, \
10126 "Cannot retrieve locked node %s" % self.op.pnode
10128 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10129 pnode.name, errors.ECODE_STATE)
10131 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10132 pnode.name, errors.ECODE_STATE)
10133 if not pnode.vm_capable:
10134 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10135 " '%s'" % pnode.name, errors.ECODE_STATE)
10137 self.secondaries = []
10139 # mirror node verification
10140 if self.op.disk_template in constants.DTS_INT_MIRROR:
10141 if self.op.snode == pnode.name:
10142 raise errors.OpPrereqError("The secondary node cannot be the"
10143 " primary node", errors.ECODE_INVAL)
10144 _CheckNodeOnline(self, self.op.snode)
10145 _CheckNodeNotDrained(self, self.op.snode)
10146 _CheckNodeVmCapable(self, self.op.snode)
10147 self.secondaries.append(self.op.snode)
10149 snode = self.cfg.GetNodeInfo(self.op.snode)
10150 if pnode.group != snode.group:
10151 self.LogWarning("The primary and secondary nodes are in two"
10152 " different node groups; the disk parameters"
10153 " from the first disk's node group will be"
10156 nodenames = [pnode.name] + self.secondaries
10158 # Verify instance specs
10159 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10161 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10162 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10163 constants.ISPEC_DISK_COUNT: len(self.disks),
10164 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10165 constants.ISPEC_NIC_COUNT: len(self.nics),
10166 constants.ISPEC_SPINDLE_USE: spindle_use,
10169 group_info = self.cfg.GetNodeGroup(pnode.group)
10170 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10171 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10172 if not self.op.ignore_ipolicy and res:
10173 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10174 (pnode.group, group_info.name, utils.CommaJoin(res)))
10175 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10177 if not self.adopt_disks:
10178 if self.op.disk_template == constants.DT_RBD:
10179 # _CheckRADOSFreeSpace() is just a placeholder.
10180 # Any function that checks prerequisites can be placed here.
10181 # Check if there is enough space on the RADOS cluster.
10182 _CheckRADOSFreeSpace()
10184 # Check lv size requirements, if not adopting
10185 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10186 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10188 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10189 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10190 disk[constants.IDISK_ADOPT])
10191 for disk in self.disks])
10192 if len(all_lvs) != len(self.disks):
10193 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10194 errors.ECODE_INVAL)
10195 for lv_name in all_lvs:
10197 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10198 # to ReserveLV uses the same syntax
10199 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10200 except errors.ReservationError:
10201 raise errors.OpPrereqError("LV named %s used by another instance" %
10202 lv_name, errors.ECODE_NOTUNIQUE)
10204 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10205 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10207 node_lvs = self.rpc.call_lv_list([pnode.name],
10208 vg_names.payload.keys())[pnode.name]
10209 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10210 node_lvs = node_lvs.payload
10212 delta = all_lvs.difference(node_lvs.keys())
10214 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10215 utils.CommaJoin(delta),
10216 errors.ECODE_INVAL)
10217 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10219 raise errors.OpPrereqError("Online logical volumes found, cannot"
10220 " adopt: %s" % utils.CommaJoin(online_lvs),
10221 errors.ECODE_STATE)
10222 # update the size of disk based on what is found
10223 for dsk in self.disks:
10224 dsk[constants.IDISK_SIZE] = \
10225 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10226 dsk[constants.IDISK_ADOPT])][0]))
10228 elif self.op.disk_template == constants.DT_BLOCK:
10229 # Normalize and de-duplicate device paths
10230 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10231 for disk in self.disks])
10232 if len(all_disks) != len(self.disks):
10233 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10234 errors.ECODE_INVAL)
10235 baddisks = [d for d in all_disks
10236 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10238 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10239 " cannot be adopted" %
10240 (", ".join(baddisks),
10241 constants.ADOPTABLE_BLOCKDEV_ROOT),
10242 errors.ECODE_INVAL)
10244 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10245 list(all_disks))[pnode.name]
10246 node_disks.Raise("Cannot get block device information from node %s" %
10248 node_disks = node_disks.payload
10249 delta = all_disks.difference(node_disks.keys())
10251 raise errors.OpPrereqError("Missing block device(s): %s" %
10252 utils.CommaJoin(delta),
10253 errors.ECODE_INVAL)
10254 for dsk in self.disks:
10255 dsk[constants.IDISK_SIZE] = \
10256 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10258 # Verify instance specs
10259 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10261 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10262 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10263 constants.ISPEC_DISK_COUNT: len(self.disks),
10264 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10265 for disk in self.disks],
10266 constants.ISPEC_NIC_COUNT: len(self.nics),
10267 constants.ISPEC_SPINDLE_USE: spindle_use,
10270 group_info = self.cfg.GetNodeGroup(pnode.group)
10271 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10272 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10273 if not self.op.ignore_ipolicy and res:
10274 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10275 " policy: %s") % (pnode.group,
10276 utils.CommaJoin(res)),
10277 errors.ECODE_INVAL)
10279 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10281 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10282 # check OS parameters (remotely)
10283 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10285 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10287 # memory check on primary node
10288 #TODO(dynmem): use MINMEM for checking
10290 _CheckNodeFreeMemory(self, self.pnode.name,
10291 "creating instance %s" % self.op.instance_name,
10292 self.be_full[constants.BE_MAXMEM],
10293 self.op.hypervisor)
10295 self.dry_run_result = list(nodenames)
10297 def Exec(self, feedback_fn):
10298 """Create and add the instance to the cluster.
10301 instance = self.op.instance_name
10302 pnode_name = self.pnode.name
10304 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10305 self.owned_locks(locking.LEVEL_NODE)), \
10306 "Node locks differ from node resource locks"
10308 ht_kind = self.op.hypervisor
10309 if ht_kind in constants.HTS_REQ_PORT:
10310 network_port = self.cfg.AllocatePort()
10312 network_port = None
10314 # This is ugly but we got a chicken-egg problem here
10315 # We can only take the group disk parameters, as the instance
10316 # has no disks yet (we are generating them right here).
10317 node = self.cfg.GetNodeInfo(pnode_name)
10318 nodegroup = self.cfg.GetNodeGroup(node.group)
10319 disks = _GenerateDiskTemplate(self,
10320 self.op.disk_template,
10321 instance, pnode_name,
10324 self.instance_file_storage_dir,
10325 self.op.file_driver,
10328 self.cfg.GetGroupDiskParams(nodegroup))
10330 iobj = objects.Instance(name=instance, os=self.op.os_type,
10331 primary_node=pnode_name,
10332 nics=self.nics, disks=disks,
10333 disk_template=self.op.disk_template,
10334 admin_state=constants.ADMINST_DOWN,
10335 network_port=network_port,
10336 beparams=self.op.beparams,
10337 hvparams=self.op.hvparams,
10338 hypervisor=self.op.hypervisor,
10339 osparams=self.op.osparams,
10343 for tag in self.op.tags:
10346 if self.adopt_disks:
10347 if self.op.disk_template == constants.DT_PLAIN:
10348 # rename LVs to the newly-generated names; we need to construct
10349 # 'fake' LV disks with the old data, plus the new unique_id
10350 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10352 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10353 rename_to.append(t_dsk.logical_id)
10354 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10355 self.cfg.SetDiskID(t_dsk, pnode_name)
10356 result = self.rpc.call_blockdev_rename(pnode_name,
10357 zip(tmp_disks, rename_to))
10358 result.Raise("Failed to rename adoped LVs")
10360 feedback_fn("* creating instance disks...")
10362 _CreateDisks(self, iobj)
10363 except errors.OpExecError:
10364 self.LogWarning("Device creation failed, reverting...")
10366 _RemoveDisks(self, iobj)
10368 self.cfg.ReleaseDRBDMinors(instance)
10371 feedback_fn("adding instance %s to cluster config" % instance)
10373 self.cfg.AddInstance(iobj, self.proc.GetECId())
10375 # Declare that we don't want to remove the instance lock anymore, as we've
10376 # added the instance to the config
10377 del self.remove_locks[locking.LEVEL_INSTANCE]
10379 if self.op.mode == constants.INSTANCE_IMPORT:
10380 # Release unused nodes
10381 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10383 # Release all nodes
10384 _ReleaseLocks(self, locking.LEVEL_NODE)
10387 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10388 feedback_fn("* wiping instance disks...")
10390 _WipeDisks(self, iobj)
10391 except errors.OpExecError, err:
10392 logging.exception("Wiping disks failed")
10393 self.LogWarning("Wiping instance disks failed (%s)", err)
10397 # Something is already wrong with the disks, don't do anything else
10399 elif self.op.wait_for_sync:
10400 disk_abort = not _WaitForSync(self, iobj)
10401 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10402 # make sure the disks are not degraded (still sync-ing is ok)
10403 feedback_fn("* checking mirrors status")
10404 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10409 _RemoveDisks(self, iobj)
10410 self.cfg.RemoveInstance(iobj.name)
10411 # Make sure the instance lock gets removed
10412 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10413 raise errors.OpExecError("There are some degraded disks for"
10416 # Release all node resource locks
10417 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10419 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10420 # we need to set the disks ID to the primary node, since the
10421 # preceding code might or might have not done it, depending on
10422 # disk template and other options
10423 for disk in iobj.disks:
10424 self.cfg.SetDiskID(disk, pnode_name)
10425 if self.op.mode == constants.INSTANCE_CREATE:
10426 if not self.op.no_install:
10427 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10428 not self.op.wait_for_sync)
10430 feedback_fn("* pausing disk sync to install instance OS")
10431 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10434 for idx, success in enumerate(result.payload):
10436 logging.warn("pause-sync of instance %s for disk %d failed",
10439 feedback_fn("* running the instance OS create scripts...")
10440 # FIXME: pass debug option from opcode to backend
10442 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10443 self.op.debug_level)
10445 feedback_fn("* resuming disk sync")
10446 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10449 for idx, success in enumerate(result.payload):
10451 logging.warn("resume-sync of instance %s for disk %d failed",
10454 os_add_result.Raise("Could not add os for instance %s"
10455 " on node %s" % (instance, pnode_name))
10458 if self.op.mode == constants.INSTANCE_IMPORT:
10459 feedback_fn("* running the instance OS import scripts...")
10463 for idx, image in enumerate(self.src_images):
10467 # FIXME: pass debug option from opcode to backend
10468 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10469 constants.IEIO_FILE, (image, ),
10470 constants.IEIO_SCRIPT,
10471 (iobj.disks[idx], idx),
10473 transfers.append(dt)
10476 masterd.instance.TransferInstanceData(self, feedback_fn,
10477 self.op.src_node, pnode_name,
10478 self.pnode.secondary_ip,
10480 if not compat.all(import_result):
10481 self.LogWarning("Some disks for instance %s on node %s were not"
10482 " imported successfully" % (instance, pnode_name))
10484 rename_from = self._old_instance_name
10486 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10487 feedback_fn("* preparing remote import...")
10488 # The source cluster will stop the instance before attempting to make
10489 # a connection. In some cases stopping an instance can take a long
10490 # time, hence the shutdown timeout is added to the connection
10492 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10493 self.op.source_shutdown_timeout)
10494 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10496 assert iobj.primary_node == self.pnode.name
10498 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10499 self.source_x509_ca,
10500 self._cds, timeouts)
10501 if not compat.all(disk_results):
10502 # TODO: Should the instance still be started, even if some disks
10503 # failed to import (valid for local imports, too)?
10504 self.LogWarning("Some disks for instance %s on node %s were not"
10505 " imported successfully" % (instance, pnode_name))
10507 rename_from = self.source_instance_name
10510 # also checked in the prereq part
10511 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10514 # Run rename script on newly imported instance
10515 assert iobj.name == instance
10516 feedback_fn("Running rename script for %s" % instance)
10517 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10519 self.op.debug_level)
10520 if result.fail_msg:
10521 self.LogWarning("Failed to run rename script for %s on node"
10522 " %s: %s" % (instance, pnode_name, result.fail_msg))
10524 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10527 iobj.admin_state = constants.ADMINST_UP
10528 self.cfg.Update(iobj, feedback_fn)
10529 logging.info("Starting instance %s on node %s", instance, pnode_name)
10530 feedback_fn("* starting instance...")
10531 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10533 result.Raise("Could not start instance")
10535 return list(iobj.all_nodes)
10538 class LUInstanceMultiAlloc(NoHooksLU):
10539 """Allocates multiple instances at the same time.
10544 def CheckArguments(self):
10545 """Check arguments.
10549 for inst in self.op.instances:
10550 if inst.iallocator is not None:
10551 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10552 " instance objects", errors.ECODE_INVAL)
10553 nodes.append(bool(inst.pnode))
10554 if inst.disk_template in constants.DTS_INT_MIRROR:
10555 nodes.append(bool(inst.snode))
10557 has_nodes = compat.any(nodes)
10558 if compat.all(nodes) ^ has_nodes:
10559 raise errors.OpPrereqError("There are instance objects providing"
10560 " pnode/snode while others do not",
10561 errors.ECODE_INVAL)
10563 if self.op.iallocator is None:
10564 default_iallocator = self.cfg.GetDefaultIAllocator()
10565 if default_iallocator and has_nodes:
10566 self.op.iallocator = default_iallocator
10568 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10569 " given and no cluster-wide default"
10570 " iallocator found; please specify either"
10571 " an iallocator or nodes on the instances"
10572 " or set a cluster-wide default iallocator",
10573 errors.ECODE_INVAL)
10575 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10577 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10578 utils.CommaJoin(dups), errors.ECODE_INVAL)
10580 def ExpandNames(self):
10581 """Calculate the locks.
10584 self.share_locks = _ShareAll()
10585 self.needed_locks = {}
10587 if self.op.iallocator:
10588 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10589 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10592 for inst in self.op.instances:
10593 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10594 nodeslist.append(inst.pnode)
10595 if inst.snode is not None:
10596 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10597 nodeslist.append(inst.snode)
10599 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10600 # Lock resources of instance's primary and secondary nodes (copy to
10601 # prevent accidential modification)
10602 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10604 def CheckPrereq(self):
10605 """Check prerequisite.
10608 cluster = self.cfg.GetClusterInfo()
10609 default_vg = self.cfg.GetVGName()
10610 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10611 _ComputeNics(op, cluster, None,
10612 self.cfg, self.proc),
10613 _ComputeFullBeParams(op, cluster))
10614 for op in self.op.instances]
10615 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10616 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10618 ial.Run(self.op.iallocator)
10620 if not ial.success:
10621 raise errors.OpPrereqError("Can't compute nodes using"
10622 " iallocator '%s': %s" %
10623 (self.op.iallocator, ial.info),
10624 errors.ECODE_NORES)
10626 self.ia_result = ial.result
10628 if self.op.dry_run:
10629 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10630 constants.JOB_IDS_KEY: [],
10633 def _ConstructPartialResult(self):
10634 """Contructs the partial result.
10637 (allocatable, failed) = self.ia_result
10639 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10640 map(compat.fst, allocatable),
10641 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10644 def Exec(self, feedback_fn):
10645 """Executes the opcode.
10648 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10649 (allocatable, failed) = self.ia_result
10652 for (name, nodes) in allocatable:
10653 op = op2inst.pop(name)
10656 (op.pnode, op.snode) = nodes
10658 (op.pnode,) = nodes
10662 missing = set(op2inst.keys()) - set(failed)
10663 assert not missing, \
10664 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10666 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10669 def _CheckRADOSFreeSpace():
10670 """Compute disk size requirements inside the RADOS cluster.
10673 # For the RADOS cluster we assume there is always enough space.
10677 class LUInstanceConsole(NoHooksLU):
10678 """Connect to an instance's console.
10680 This is somewhat special in that it returns the command line that
10681 you need to run on the master node in order to connect to the
10687 def ExpandNames(self):
10688 self.share_locks = _ShareAll()
10689 self._ExpandAndLockInstance()
10691 def CheckPrereq(self):
10692 """Check prerequisites.
10694 This checks that the instance is in the cluster.
10697 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10698 assert self.instance is not None, \
10699 "Cannot retrieve locked instance %s" % self.op.instance_name
10700 _CheckNodeOnline(self, self.instance.primary_node)
10702 def Exec(self, feedback_fn):
10703 """Connect to the console of an instance
10706 instance = self.instance
10707 node = instance.primary_node
10709 node_insts = self.rpc.call_instance_list([node],
10710 [instance.hypervisor])[node]
10711 node_insts.Raise("Can't get node information from %s" % node)
10713 if instance.name not in node_insts.payload:
10714 if instance.admin_state == constants.ADMINST_UP:
10715 state = constants.INSTST_ERRORDOWN
10716 elif instance.admin_state == constants.ADMINST_DOWN:
10717 state = constants.INSTST_ADMINDOWN
10719 state = constants.INSTST_ADMINOFFLINE
10720 raise errors.OpExecError("Instance %s is not running (state %s)" %
10721 (instance.name, state))
10723 logging.debug("Connecting to console of %s on %s", instance.name, node)
10725 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10728 def _GetInstanceConsole(cluster, instance):
10729 """Returns console information for an instance.
10731 @type cluster: L{objects.Cluster}
10732 @type instance: L{objects.Instance}
10736 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10737 # beparams and hvparams are passed separately, to avoid editing the
10738 # instance and then saving the defaults in the instance itself.
10739 hvparams = cluster.FillHV(instance)
10740 beparams = cluster.FillBE(instance)
10741 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10743 assert console.instance == instance.name
10744 assert console.Validate()
10746 return console.ToDict()
10749 class LUInstanceReplaceDisks(LogicalUnit):
10750 """Replace the disks of an instance.
10753 HPATH = "mirrors-replace"
10754 HTYPE = constants.HTYPE_INSTANCE
10757 def CheckArguments(self):
10758 """Check arguments.
10761 remote_node = self.op.remote_node
10762 ialloc = self.op.iallocator
10763 if self.op.mode == constants.REPLACE_DISK_CHG:
10764 if remote_node is None and ialloc is None:
10765 raise errors.OpPrereqError("When changing the secondary either an"
10766 " iallocator script must be used or the"
10767 " new node given", errors.ECODE_INVAL)
10769 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10771 elif remote_node is not None or ialloc is not None:
10772 # Not replacing the secondary
10773 raise errors.OpPrereqError("The iallocator and new node options can"
10774 " only be used when changing the"
10775 " secondary node", errors.ECODE_INVAL)
10777 def ExpandNames(self):
10778 self._ExpandAndLockInstance()
10780 assert locking.LEVEL_NODE not in self.needed_locks
10781 assert locking.LEVEL_NODE_RES not in self.needed_locks
10782 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10784 assert self.op.iallocator is None or self.op.remote_node is None, \
10785 "Conflicting options"
10787 if self.op.remote_node is not None:
10788 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10790 # Warning: do not remove the locking of the new secondary here
10791 # unless DRBD8.AddChildren is changed to work in parallel;
10792 # currently it doesn't since parallel invocations of
10793 # FindUnusedMinor will conflict
10794 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10795 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10797 self.needed_locks[locking.LEVEL_NODE] = []
10798 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10800 if self.op.iallocator is not None:
10801 # iallocator will select a new node in the same group
10802 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10804 self.needed_locks[locking.LEVEL_NODE_RES] = []
10806 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10807 self.op.iallocator, self.op.remote_node,
10808 self.op.disks, False, self.op.early_release,
10809 self.op.ignore_ipolicy)
10811 self.tasklets = [self.replacer]
10813 def DeclareLocks(self, level):
10814 if level == locking.LEVEL_NODEGROUP:
10815 assert self.op.remote_node is None
10816 assert self.op.iallocator is not None
10817 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10819 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10820 # Lock all groups used by instance optimistically; this requires going
10821 # via the node before it's locked, requiring verification later on
10822 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10823 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10825 elif level == locking.LEVEL_NODE:
10826 if self.op.iallocator is not None:
10827 assert self.op.remote_node is None
10828 assert not self.needed_locks[locking.LEVEL_NODE]
10830 # Lock member nodes of all locked groups
10831 self.needed_locks[locking.LEVEL_NODE] = \
10833 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10834 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10836 self._LockInstancesNodes()
10837 elif level == locking.LEVEL_NODE_RES:
10839 self.needed_locks[locking.LEVEL_NODE_RES] = \
10840 self.needed_locks[locking.LEVEL_NODE]
10842 def BuildHooksEnv(self):
10843 """Build hooks env.
10845 This runs on the master, the primary and all the secondaries.
10848 instance = self.replacer.instance
10850 "MODE": self.op.mode,
10851 "NEW_SECONDARY": self.op.remote_node,
10852 "OLD_SECONDARY": instance.secondary_nodes[0],
10854 env.update(_BuildInstanceHookEnvByObject(self, instance))
10857 def BuildHooksNodes(self):
10858 """Build hooks nodes.
10861 instance = self.replacer.instance
10863 self.cfg.GetMasterNode(),
10864 instance.primary_node,
10866 if self.op.remote_node is not None:
10867 nl.append(self.op.remote_node)
10870 def CheckPrereq(self):
10871 """Check prerequisites.
10874 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10875 self.op.iallocator is None)
10877 # Verify if node group locks are still correct
10878 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10880 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10882 return LogicalUnit.CheckPrereq(self)
10885 class TLReplaceDisks(Tasklet):
10886 """Replaces disks for an instance.
10888 Note: Locking is not within the scope of this class.
10891 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10892 disks, delay_iallocator, early_release, ignore_ipolicy):
10893 """Initializes this class.
10896 Tasklet.__init__(self, lu)
10899 self.instance_name = instance_name
10901 self.iallocator_name = iallocator_name
10902 self.remote_node = remote_node
10904 self.delay_iallocator = delay_iallocator
10905 self.early_release = early_release
10906 self.ignore_ipolicy = ignore_ipolicy
10909 self.instance = None
10910 self.new_node = None
10911 self.target_node = None
10912 self.other_node = None
10913 self.remote_node_info = None
10914 self.node_secondary_ip = None
10917 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10918 """Compute a new secondary node using an IAllocator.
10921 req = iallocator.IAReqRelocate(name=instance_name,
10922 relocate_from=list(relocate_from))
10923 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10925 ial.Run(iallocator_name)
10927 if not ial.success:
10928 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10929 " %s" % (iallocator_name, ial.info),
10930 errors.ECODE_NORES)
10932 remote_node_name = ial.result[0]
10934 lu.LogInfo("Selected new secondary for instance '%s': %s",
10935 instance_name, remote_node_name)
10937 return remote_node_name
10939 def _FindFaultyDisks(self, node_name):
10940 """Wrapper for L{_FindFaultyInstanceDisks}.
10943 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10946 def _CheckDisksActivated(self, instance):
10947 """Checks if the instance disks are activated.
10949 @param instance: The instance to check disks
10950 @return: True if they are activated, False otherwise
10953 nodes = instance.all_nodes
10955 for idx, dev in enumerate(instance.disks):
10957 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10958 self.cfg.SetDiskID(dev, node)
10960 result = _BlockdevFind(self, node, dev, instance)
10964 elif result.fail_msg or not result.payload:
10969 def CheckPrereq(self):
10970 """Check prerequisites.
10972 This checks that the instance is in the cluster.
10975 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10976 assert instance is not None, \
10977 "Cannot retrieve locked instance %s" % self.instance_name
10979 if instance.disk_template != constants.DT_DRBD8:
10980 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10981 " instances", errors.ECODE_INVAL)
10983 if len(instance.secondary_nodes) != 1:
10984 raise errors.OpPrereqError("The instance has a strange layout,"
10985 " expected one secondary but found %d" %
10986 len(instance.secondary_nodes),
10987 errors.ECODE_FAULT)
10989 if not self.delay_iallocator:
10990 self._CheckPrereq2()
10992 def _CheckPrereq2(self):
10993 """Check prerequisites, second part.
10995 This function should always be part of CheckPrereq. It was separated and is
10996 now called from Exec because during node evacuation iallocator was only
10997 called with an unmodified cluster model, not taking planned changes into
11001 instance = self.instance
11002 secondary_node = instance.secondary_nodes[0]
11004 if self.iallocator_name is None:
11005 remote_node = self.remote_node
11007 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11008 instance.name, instance.secondary_nodes)
11010 if remote_node is None:
11011 self.remote_node_info = None
11013 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11014 "Remote node '%s' is not locked" % remote_node
11016 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11017 assert self.remote_node_info is not None, \
11018 "Cannot retrieve locked node %s" % remote_node
11020 if remote_node == self.instance.primary_node:
11021 raise errors.OpPrereqError("The specified node is the primary node of"
11022 " the instance", errors.ECODE_INVAL)
11024 if remote_node == secondary_node:
11025 raise errors.OpPrereqError("The specified node is already the"
11026 " secondary node of the instance",
11027 errors.ECODE_INVAL)
11029 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11030 constants.REPLACE_DISK_CHG):
11031 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11032 errors.ECODE_INVAL)
11034 if self.mode == constants.REPLACE_DISK_AUTO:
11035 if not self._CheckDisksActivated(instance):
11036 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11037 " first" % self.instance_name,
11038 errors.ECODE_STATE)
11039 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11040 faulty_secondary = self._FindFaultyDisks(secondary_node)
11042 if faulty_primary and faulty_secondary:
11043 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11044 " one node and can not be repaired"
11045 " automatically" % self.instance_name,
11046 errors.ECODE_STATE)
11049 self.disks = faulty_primary
11050 self.target_node = instance.primary_node
11051 self.other_node = secondary_node
11052 check_nodes = [self.target_node, self.other_node]
11053 elif faulty_secondary:
11054 self.disks = faulty_secondary
11055 self.target_node = secondary_node
11056 self.other_node = instance.primary_node
11057 check_nodes = [self.target_node, self.other_node]
11063 # Non-automatic modes
11064 if self.mode == constants.REPLACE_DISK_PRI:
11065 self.target_node = instance.primary_node
11066 self.other_node = secondary_node
11067 check_nodes = [self.target_node, self.other_node]
11069 elif self.mode == constants.REPLACE_DISK_SEC:
11070 self.target_node = secondary_node
11071 self.other_node = instance.primary_node
11072 check_nodes = [self.target_node, self.other_node]
11074 elif self.mode == constants.REPLACE_DISK_CHG:
11075 self.new_node = remote_node
11076 self.other_node = instance.primary_node
11077 self.target_node = secondary_node
11078 check_nodes = [self.new_node, self.other_node]
11080 _CheckNodeNotDrained(self.lu, remote_node)
11081 _CheckNodeVmCapable(self.lu, remote_node)
11083 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11084 assert old_node_info is not None
11085 if old_node_info.offline and not self.early_release:
11086 # doesn't make sense to delay the release
11087 self.early_release = True
11088 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11089 " early-release mode", secondary_node)
11092 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11095 # If not specified all disks should be replaced
11097 self.disks = range(len(self.instance.disks))
11099 # TODO: This is ugly, but right now we can't distinguish between internal
11100 # submitted opcode and external one. We should fix that.
11101 if self.remote_node_info:
11102 # We change the node, lets verify it still meets instance policy
11103 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11104 cluster = self.cfg.GetClusterInfo()
11105 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11107 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11108 ignore=self.ignore_ipolicy)
11110 for node in check_nodes:
11111 _CheckNodeOnline(self.lu, node)
11113 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11116 if node_name is not None)
11118 # Release unneeded node and node resource locks
11119 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11120 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11122 # Release any owned node group
11123 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11124 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11126 # Check whether disks are valid
11127 for disk_idx in self.disks:
11128 instance.FindDisk(disk_idx)
11130 # Get secondary node IP addresses
11131 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11132 in self.cfg.GetMultiNodeInfo(touched_nodes))
11134 def Exec(self, feedback_fn):
11135 """Execute disk replacement.
11137 This dispatches the disk replacement to the appropriate handler.
11140 if self.delay_iallocator:
11141 self._CheckPrereq2()
11144 # Verify owned locks before starting operation
11145 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11146 assert set(owned_nodes) == set(self.node_secondary_ip), \
11147 ("Incorrect node locks, owning %s, expected %s" %
11148 (owned_nodes, self.node_secondary_ip.keys()))
11149 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11150 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11152 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11153 assert list(owned_instances) == [self.instance_name], \
11154 "Instance '%s' not locked" % self.instance_name
11156 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11157 "Should not own any node group lock at this point"
11160 feedback_fn("No disks need replacement for instance '%s'" %
11161 self.instance.name)
11164 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11165 (utils.CommaJoin(self.disks), self.instance.name))
11166 feedback_fn("Current primary node: %s", self.instance.primary_node)
11167 feedback_fn("Current seconary node: %s",
11168 utils.CommaJoin(self.instance.secondary_nodes))
11170 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11172 # Activate the instance disks if we're replacing them on a down instance
11174 _StartInstanceDisks(self.lu, self.instance, True)
11177 # Should we replace the secondary node?
11178 if self.new_node is not None:
11179 fn = self._ExecDrbd8Secondary
11181 fn = self._ExecDrbd8DiskOnly
11183 result = fn(feedback_fn)
11185 # Deactivate the instance disks if we're replacing them on a
11188 _SafeShutdownInstanceDisks(self.lu, self.instance)
11190 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11193 # Verify owned locks
11194 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11195 nodes = frozenset(self.node_secondary_ip)
11196 assert ((self.early_release and not owned_nodes) or
11197 (not self.early_release and not (set(owned_nodes) - nodes))), \
11198 ("Not owning the correct locks, early_release=%s, owned=%r,"
11199 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11203 def _CheckVolumeGroup(self, nodes):
11204 self.lu.LogInfo("Checking volume groups")
11206 vgname = self.cfg.GetVGName()
11208 # Make sure volume group exists on all involved nodes
11209 results = self.rpc.call_vg_list(nodes)
11211 raise errors.OpExecError("Can't list volume groups on the nodes")
11214 res = results[node]
11215 res.Raise("Error checking node %s" % node)
11216 if vgname not in res.payload:
11217 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11220 def _CheckDisksExistence(self, nodes):
11221 # Check disk existence
11222 for idx, dev in enumerate(self.instance.disks):
11223 if idx not in self.disks:
11227 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11228 self.cfg.SetDiskID(dev, node)
11230 result = _BlockdevFind(self, node, dev, self.instance)
11232 msg = result.fail_msg
11233 if msg or not result.payload:
11235 msg = "disk not found"
11236 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11239 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11240 for idx, dev in enumerate(self.instance.disks):
11241 if idx not in self.disks:
11244 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11247 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11248 on_primary, ldisk=ldisk):
11249 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11250 " replace disks for instance %s" %
11251 (node_name, self.instance.name))
11253 def _CreateNewStorage(self, node_name):
11254 """Create new storage on the primary or secondary node.
11256 This is only used for same-node replaces, not for changing the
11257 secondary node, hence we don't want to modify the existing disk.
11262 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11263 for idx, dev in enumerate(disks):
11264 if idx not in self.disks:
11267 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11269 self.cfg.SetDiskID(dev, node_name)
11271 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11272 names = _GenerateUniqueNames(self.lu, lv_names)
11274 (data_disk, meta_disk) = dev.children
11275 vg_data = data_disk.logical_id[0]
11276 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11277 logical_id=(vg_data, names[0]),
11278 params=data_disk.params)
11279 vg_meta = meta_disk.logical_id[0]
11280 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11281 size=constants.DRBD_META_SIZE,
11282 logical_id=(vg_meta, names[1]),
11283 params=meta_disk.params)
11285 new_lvs = [lv_data, lv_meta]
11286 old_lvs = [child.Copy() for child in dev.children]
11287 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11289 # we pass force_create=True to force the LVM creation
11290 for new_lv in new_lvs:
11291 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11292 _GetInstanceInfoText(self.instance), False)
11296 def _CheckDevices(self, node_name, iv_names):
11297 for name, (dev, _, _) in iv_names.iteritems():
11298 self.cfg.SetDiskID(dev, node_name)
11300 result = _BlockdevFind(self, node_name, dev, self.instance)
11302 msg = result.fail_msg
11303 if msg or not result.payload:
11305 msg = "disk not found"
11306 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11309 if result.payload.is_degraded:
11310 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11312 def _RemoveOldStorage(self, node_name, iv_names):
11313 for name, (_, old_lvs, _) in iv_names.iteritems():
11314 self.lu.LogInfo("Remove logical volumes for %s" % name)
11317 self.cfg.SetDiskID(lv, node_name)
11319 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11321 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11322 hint="remove unused LVs manually")
11324 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11325 """Replace a disk on the primary or secondary for DRBD 8.
11327 The algorithm for replace is quite complicated:
11329 1. for each disk to be replaced:
11331 1. create new LVs on the target node with unique names
11332 1. detach old LVs from the drbd device
11333 1. rename old LVs to name_replaced.<time_t>
11334 1. rename new LVs to old LVs
11335 1. attach the new LVs (with the old names now) to the drbd device
11337 1. wait for sync across all devices
11339 1. for each modified disk:
11341 1. remove old LVs (which have the name name_replaces.<time_t>)
11343 Failures are not very well handled.
11348 # Step: check device activation
11349 self.lu.LogStep(1, steps_total, "Check device existence")
11350 self._CheckDisksExistence([self.other_node, self.target_node])
11351 self._CheckVolumeGroup([self.target_node, self.other_node])
11353 # Step: check other node consistency
11354 self.lu.LogStep(2, steps_total, "Check peer consistency")
11355 self._CheckDisksConsistency(self.other_node,
11356 self.other_node == self.instance.primary_node,
11359 # Step: create new storage
11360 self.lu.LogStep(3, steps_total, "Allocate new storage")
11361 iv_names = self._CreateNewStorage(self.target_node)
11363 # Step: for each lv, detach+rename*2+attach
11364 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11365 for dev, old_lvs, new_lvs in iv_names.itervalues():
11366 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11368 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11370 result.Raise("Can't detach drbd from local storage on node"
11371 " %s for device %s" % (self.target_node, dev.iv_name))
11373 #cfg.Update(instance)
11375 # ok, we created the new LVs, so now we know we have the needed
11376 # storage; as such, we proceed on the target node to rename
11377 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11378 # using the assumption that logical_id == physical_id (which in
11379 # turn is the unique_id on that node)
11381 # FIXME(iustin): use a better name for the replaced LVs
11382 temp_suffix = int(time.time())
11383 ren_fn = lambda d, suff: (d.physical_id[0],
11384 d.physical_id[1] + "_replaced-%s" % suff)
11386 # Build the rename list based on what LVs exist on the node
11387 rename_old_to_new = []
11388 for to_ren in old_lvs:
11389 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11390 if not result.fail_msg and result.payload:
11392 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11394 self.lu.LogInfo("Renaming the old LVs on the target node")
11395 result = self.rpc.call_blockdev_rename(self.target_node,
11397 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11399 # Now we rename the new LVs to the old LVs
11400 self.lu.LogInfo("Renaming the new LVs on the target node")
11401 rename_new_to_old = [(new, old.physical_id)
11402 for old, new in zip(old_lvs, new_lvs)]
11403 result = self.rpc.call_blockdev_rename(self.target_node,
11405 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11407 # Intermediate steps of in memory modifications
11408 for old, new in zip(old_lvs, new_lvs):
11409 new.logical_id = old.logical_id
11410 self.cfg.SetDiskID(new, self.target_node)
11412 # We need to modify old_lvs so that removal later removes the
11413 # right LVs, not the newly added ones; note that old_lvs is a
11415 for disk in old_lvs:
11416 disk.logical_id = ren_fn(disk, temp_suffix)
11417 self.cfg.SetDiskID(disk, self.target_node)
11419 # Now that the new lvs have the old name, we can add them to the device
11420 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11421 result = self.rpc.call_blockdev_addchildren(self.target_node,
11422 (dev, self.instance), new_lvs)
11423 msg = result.fail_msg
11425 for new_lv in new_lvs:
11426 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11429 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11430 hint=("cleanup manually the unused logical"
11432 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11434 cstep = itertools.count(5)
11436 if self.early_release:
11437 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11438 self._RemoveOldStorage(self.target_node, iv_names)
11439 # TODO: Check if releasing locks early still makes sense
11440 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11442 # Release all resource locks except those used by the instance
11443 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11444 keep=self.node_secondary_ip.keys())
11446 # Release all node locks while waiting for sync
11447 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11449 # TODO: Can the instance lock be downgraded here? Take the optional disk
11450 # shutdown in the caller into consideration.
11453 # This can fail as the old devices are degraded and _WaitForSync
11454 # does a combined result over all disks, so we don't check its return value
11455 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11456 _WaitForSync(self.lu, self.instance)
11458 # Check all devices manually
11459 self._CheckDevices(self.instance.primary_node, iv_names)
11461 # Step: remove old storage
11462 if not self.early_release:
11463 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11464 self._RemoveOldStorage(self.target_node, iv_names)
11466 def _ExecDrbd8Secondary(self, feedback_fn):
11467 """Replace the secondary node for DRBD 8.
11469 The algorithm for replace is quite complicated:
11470 - for all disks of the instance:
11471 - create new LVs on the new node with same names
11472 - shutdown the drbd device on the old secondary
11473 - disconnect the drbd network on the primary
11474 - create the drbd device on the new secondary
11475 - network attach the drbd on the primary, using an artifice:
11476 the drbd code for Attach() will connect to the network if it
11477 finds a device which is connected to the good local disks but
11478 not network enabled
11479 - wait for sync across all devices
11480 - remove all disks from the old secondary
11482 Failures are not very well handled.
11487 pnode = self.instance.primary_node
11489 # Step: check device activation
11490 self.lu.LogStep(1, steps_total, "Check device existence")
11491 self._CheckDisksExistence([self.instance.primary_node])
11492 self._CheckVolumeGroup([self.instance.primary_node])
11494 # Step: check other node consistency
11495 self.lu.LogStep(2, steps_total, "Check peer consistency")
11496 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11498 # Step: create new storage
11499 self.lu.LogStep(3, steps_total, "Allocate new storage")
11500 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11501 for idx, dev in enumerate(disks):
11502 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11503 (self.new_node, idx))
11504 # we pass force_create=True to force LVM creation
11505 for new_lv in dev.children:
11506 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11507 True, _GetInstanceInfoText(self.instance), False)
11509 # Step 4: dbrd minors and drbd setups changes
11510 # after this, we must manually remove the drbd minors on both the
11511 # error and the success paths
11512 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11513 minors = self.cfg.AllocateDRBDMinor([self.new_node
11514 for dev in self.instance.disks],
11515 self.instance.name)
11516 logging.debug("Allocated minors %r", minors)
11519 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11520 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11521 (self.new_node, idx))
11522 # create new devices on new_node; note that we create two IDs:
11523 # one without port, so the drbd will be activated without
11524 # networking information on the new node at this stage, and one
11525 # with network, for the latter activation in step 4
11526 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11527 if self.instance.primary_node == o_node1:
11530 assert self.instance.primary_node == o_node2, "Three-node instance?"
11533 new_alone_id = (self.instance.primary_node, self.new_node, None,
11534 p_minor, new_minor, o_secret)
11535 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11536 p_minor, new_minor, o_secret)
11538 iv_names[idx] = (dev, dev.children, new_net_id)
11539 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11541 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11542 logical_id=new_alone_id,
11543 children=dev.children,
11546 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11549 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11551 _GetInstanceInfoText(self.instance), False)
11552 except errors.GenericError:
11553 self.cfg.ReleaseDRBDMinors(self.instance.name)
11556 # We have new devices, shutdown the drbd on the old secondary
11557 for idx, dev in enumerate(self.instance.disks):
11558 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11559 self.cfg.SetDiskID(dev, self.target_node)
11560 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11561 (dev, self.instance)).fail_msg
11563 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11564 "node: %s" % (idx, msg),
11565 hint=("Please cleanup this device manually as"
11566 " soon as possible"))
11568 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11569 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11570 self.instance.disks)[pnode]
11572 msg = result.fail_msg
11574 # detaches didn't succeed (unlikely)
11575 self.cfg.ReleaseDRBDMinors(self.instance.name)
11576 raise errors.OpExecError("Can't detach the disks from the network on"
11577 " old node: %s" % (msg,))
11579 # if we managed to detach at least one, we update all the disks of
11580 # the instance to point to the new secondary
11581 self.lu.LogInfo("Updating instance configuration")
11582 for dev, _, new_logical_id in iv_names.itervalues():
11583 dev.logical_id = new_logical_id
11584 self.cfg.SetDiskID(dev, self.instance.primary_node)
11586 self.cfg.Update(self.instance, feedback_fn)
11588 # Release all node locks (the configuration has been updated)
11589 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11591 # and now perform the drbd attach
11592 self.lu.LogInfo("Attaching primary drbds to new secondary"
11593 " (standalone => connected)")
11594 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11596 self.node_secondary_ip,
11597 (self.instance.disks, self.instance),
11598 self.instance.name,
11600 for to_node, to_result in result.items():
11601 msg = to_result.fail_msg
11603 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11605 hint=("please do a gnt-instance info to see the"
11606 " status of disks"))
11608 cstep = itertools.count(5)
11610 if self.early_release:
11611 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11612 self._RemoveOldStorage(self.target_node, iv_names)
11613 # TODO: Check if releasing locks early still makes sense
11614 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11616 # Release all resource locks except those used by the instance
11617 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11618 keep=self.node_secondary_ip.keys())
11620 # TODO: Can the instance lock be downgraded here? Take the optional disk
11621 # shutdown in the caller into consideration.
11624 # This can fail as the old devices are degraded and _WaitForSync
11625 # does a combined result over all disks, so we don't check its return value
11626 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11627 _WaitForSync(self.lu, self.instance)
11629 # Check all devices manually
11630 self._CheckDevices(self.instance.primary_node, iv_names)
11632 # Step: remove old storage
11633 if not self.early_release:
11634 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11635 self._RemoveOldStorage(self.target_node, iv_names)
11638 class LURepairNodeStorage(NoHooksLU):
11639 """Repairs the volume group on a node.
11644 def CheckArguments(self):
11645 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11647 storage_type = self.op.storage_type
11649 if (constants.SO_FIX_CONSISTENCY not in
11650 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11651 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11652 " repaired" % storage_type,
11653 errors.ECODE_INVAL)
11655 def ExpandNames(self):
11656 self.needed_locks = {
11657 locking.LEVEL_NODE: [self.op.node_name],
11660 def _CheckFaultyDisks(self, instance, node_name):
11661 """Ensure faulty disks abort the opcode or at least warn."""
11663 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11665 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11666 " node '%s'" % (instance.name, node_name),
11667 errors.ECODE_STATE)
11668 except errors.OpPrereqError, err:
11669 if self.op.ignore_consistency:
11670 self.proc.LogWarning(str(err.args[0]))
11674 def CheckPrereq(self):
11675 """Check prerequisites.
11678 # Check whether any instance on this node has faulty disks
11679 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11680 if inst.admin_state != constants.ADMINST_UP:
11682 check_nodes = set(inst.all_nodes)
11683 check_nodes.discard(self.op.node_name)
11684 for inst_node_name in check_nodes:
11685 self._CheckFaultyDisks(inst, inst_node_name)
11687 def Exec(self, feedback_fn):
11688 feedback_fn("Repairing storage unit '%s' on %s ..." %
11689 (self.op.name, self.op.node_name))
11691 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11692 result = self.rpc.call_storage_execute(self.op.node_name,
11693 self.op.storage_type, st_args,
11695 constants.SO_FIX_CONSISTENCY)
11696 result.Raise("Failed to repair storage unit '%s' on %s" %
11697 (self.op.name, self.op.node_name))
11700 class LUNodeEvacuate(NoHooksLU):
11701 """Evacuates instances off a list of nodes.
11706 _MODE2IALLOCATOR = {
11707 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11708 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11709 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11711 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11712 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11713 constants.IALLOCATOR_NEVAC_MODES)
11715 def CheckArguments(self):
11716 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11718 def ExpandNames(self):
11719 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11721 if self.op.remote_node is not None:
11722 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11723 assert self.op.remote_node
11725 if self.op.remote_node == self.op.node_name:
11726 raise errors.OpPrereqError("Can not use evacuated node as a new"
11727 " secondary node", errors.ECODE_INVAL)
11729 if self.op.mode != constants.NODE_EVAC_SEC:
11730 raise errors.OpPrereqError("Without the use of an iallocator only"
11731 " secondary instances can be evacuated",
11732 errors.ECODE_INVAL)
11735 self.share_locks = _ShareAll()
11736 self.needed_locks = {
11737 locking.LEVEL_INSTANCE: [],
11738 locking.LEVEL_NODEGROUP: [],
11739 locking.LEVEL_NODE: [],
11742 # Determine nodes (via group) optimistically, needs verification once locks
11743 # have been acquired
11744 self.lock_nodes = self._DetermineNodes()
11746 def _DetermineNodes(self):
11747 """Gets the list of nodes to operate on.
11750 if self.op.remote_node is None:
11751 # Iallocator will choose any node(s) in the same group
11752 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11754 group_nodes = frozenset([self.op.remote_node])
11756 # Determine nodes to be locked
11757 return set([self.op.node_name]) | group_nodes
11759 def _DetermineInstances(self):
11760 """Builds list of instances to operate on.
11763 assert self.op.mode in constants.NODE_EVAC_MODES
11765 if self.op.mode == constants.NODE_EVAC_PRI:
11766 # Primary instances only
11767 inst_fn = _GetNodePrimaryInstances
11768 assert self.op.remote_node is None, \
11769 "Evacuating primary instances requires iallocator"
11770 elif self.op.mode == constants.NODE_EVAC_SEC:
11771 # Secondary instances only
11772 inst_fn = _GetNodeSecondaryInstances
11775 assert self.op.mode == constants.NODE_EVAC_ALL
11776 inst_fn = _GetNodeInstances
11777 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11779 raise errors.OpPrereqError("Due to an issue with the iallocator"
11780 " interface it is not possible to evacuate"
11781 " all instances at once; specify explicitly"
11782 " whether to evacuate primary or secondary"
11784 errors.ECODE_INVAL)
11786 return inst_fn(self.cfg, self.op.node_name)
11788 def DeclareLocks(self, level):
11789 if level == locking.LEVEL_INSTANCE:
11790 # Lock instances optimistically, needs verification once node and group
11791 # locks have been acquired
11792 self.needed_locks[locking.LEVEL_INSTANCE] = \
11793 set(i.name for i in self._DetermineInstances())
11795 elif level == locking.LEVEL_NODEGROUP:
11796 # Lock node groups for all potential target nodes optimistically, needs
11797 # verification once nodes have been acquired
11798 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11799 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11801 elif level == locking.LEVEL_NODE:
11802 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11804 def CheckPrereq(self):
11806 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11807 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11808 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11810 need_nodes = self._DetermineNodes()
11812 if not owned_nodes.issuperset(need_nodes):
11813 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11814 " locks were acquired, current nodes are"
11815 " are '%s', used to be '%s'; retry the"
11817 (self.op.node_name,
11818 utils.CommaJoin(need_nodes),
11819 utils.CommaJoin(owned_nodes)),
11820 errors.ECODE_STATE)
11822 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11823 if owned_groups != wanted_groups:
11824 raise errors.OpExecError("Node groups changed since locks were acquired,"
11825 " current groups are '%s', used to be '%s';"
11826 " retry the operation" %
11827 (utils.CommaJoin(wanted_groups),
11828 utils.CommaJoin(owned_groups)))
11830 # Determine affected instances
11831 self.instances = self._DetermineInstances()
11832 self.instance_names = [i.name for i in self.instances]
11834 if set(self.instance_names) != owned_instances:
11835 raise errors.OpExecError("Instances on node '%s' changed since locks"
11836 " were acquired, current instances are '%s',"
11837 " used to be '%s'; retry the operation" %
11838 (self.op.node_name,
11839 utils.CommaJoin(self.instance_names),
11840 utils.CommaJoin(owned_instances)))
11842 if self.instance_names:
11843 self.LogInfo("Evacuating instances from node '%s': %s",
11845 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11847 self.LogInfo("No instances to evacuate from node '%s'",
11850 if self.op.remote_node is not None:
11851 for i in self.instances:
11852 if i.primary_node == self.op.remote_node:
11853 raise errors.OpPrereqError("Node %s is the primary node of"
11854 " instance %s, cannot use it as"
11856 (self.op.remote_node, i.name),
11857 errors.ECODE_INVAL)
11859 def Exec(self, feedback_fn):
11860 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11862 if not self.instance_names:
11863 # No instances to evacuate
11866 elif self.op.iallocator is not None:
11867 # TODO: Implement relocation to other group
11868 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11869 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11870 instances=list(self.instance_names))
11871 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11873 ial.Run(self.op.iallocator)
11875 if not ial.success:
11876 raise errors.OpPrereqError("Can't compute node evacuation using"
11877 " iallocator '%s': %s" %
11878 (self.op.iallocator, ial.info),
11879 errors.ECODE_NORES)
11881 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11883 elif self.op.remote_node is not None:
11884 assert self.op.mode == constants.NODE_EVAC_SEC
11886 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11887 remote_node=self.op.remote_node,
11889 mode=constants.REPLACE_DISK_CHG,
11890 early_release=self.op.early_release)]
11891 for instance_name in self.instance_names
11895 raise errors.ProgrammerError("No iallocator or remote node")
11897 return ResultWithJobs(jobs)
11900 def _SetOpEarlyRelease(early_release, op):
11901 """Sets C{early_release} flag on opcodes if available.
11905 op.early_release = early_release
11906 except AttributeError:
11907 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11912 def _NodeEvacDest(use_nodes, group, nodes):
11913 """Returns group or nodes depending on caller's choice.
11917 return utils.CommaJoin(nodes)
11922 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11923 """Unpacks the result of change-group and node-evacuate iallocator requests.
11925 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11926 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11928 @type lu: L{LogicalUnit}
11929 @param lu: Logical unit instance
11930 @type alloc_result: tuple/list
11931 @param alloc_result: Result from iallocator
11932 @type early_release: bool
11933 @param early_release: Whether to release locks early if possible
11934 @type use_nodes: bool
11935 @param use_nodes: Whether to display node names instead of groups
11938 (moved, failed, jobs) = alloc_result
11941 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11942 for (name, reason) in failed)
11943 lu.LogWarning("Unable to evacuate instances %s", failreason)
11944 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11947 lu.LogInfo("Instances to be moved: %s",
11948 utils.CommaJoin("%s (to %s)" %
11949 (name, _NodeEvacDest(use_nodes, group, nodes))
11950 for (name, group, nodes) in moved))
11952 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11953 map(opcodes.OpCode.LoadOpCode, ops))
11957 def _DiskSizeInBytesToMebibytes(lu, size):
11958 """Converts a disk size in bytes to mebibytes.
11960 Warns and rounds up if the size isn't an even multiple of 1 MiB.
11963 (mib, remainder) = divmod(size, 1024 * 1024)
11966 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
11967 " to not overwrite existing data (%s bytes will not be"
11968 " wiped)", (1024 * 1024) - remainder)
11974 class LUInstanceGrowDisk(LogicalUnit):
11975 """Grow a disk of an instance.
11978 HPATH = "disk-grow"
11979 HTYPE = constants.HTYPE_INSTANCE
11982 def ExpandNames(self):
11983 self._ExpandAndLockInstance()
11984 self.needed_locks[locking.LEVEL_NODE] = []
11985 self.needed_locks[locking.LEVEL_NODE_RES] = []
11986 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11987 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11989 def DeclareLocks(self, level):
11990 if level == locking.LEVEL_NODE:
11991 self._LockInstancesNodes()
11992 elif level == locking.LEVEL_NODE_RES:
11994 self.needed_locks[locking.LEVEL_NODE_RES] = \
11995 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11997 def BuildHooksEnv(self):
11998 """Build hooks env.
12000 This runs on the master, the primary and all the secondaries.
12004 "DISK": self.op.disk,
12005 "AMOUNT": self.op.amount,
12006 "ABSOLUTE": self.op.absolute,
12008 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12011 def BuildHooksNodes(self):
12012 """Build hooks nodes.
12015 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12018 def CheckPrereq(self):
12019 """Check prerequisites.
12021 This checks that the instance is in the cluster.
12024 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12025 assert instance is not None, \
12026 "Cannot retrieve locked instance %s" % self.op.instance_name
12027 nodenames = list(instance.all_nodes)
12028 for node in nodenames:
12029 _CheckNodeOnline(self, node)
12031 self.instance = instance
12033 if instance.disk_template not in constants.DTS_GROWABLE:
12034 raise errors.OpPrereqError("Instance's disk layout does not support"
12035 " growing", errors.ECODE_INVAL)
12037 self.disk = instance.FindDisk(self.op.disk)
12039 if self.op.absolute:
12040 self.target = self.op.amount
12041 self.delta = self.target - self.disk.size
12043 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12044 "current disk size (%s)" %
12045 (utils.FormatUnit(self.target, "h"),
12046 utils.FormatUnit(self.disk.size, "h")),
12047 errors.ECODE_STATE)
12049 self.delta = self.op.amount
12050 self.target = self.disk.size + self.delta
12052 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12053 utils.FormatUnit(self.delta, "h"),
12054 errors.ECODE_INVAL)
12056 if instance.disk_template not in (constants.DT_FILE,
12057 constants.DT_SHARED_FILE,
12059 # TODO: check the free disk space for file, when that feature will be
12061 _CheckNodesFreeDiskPerVG(self, nodenames,
12062 self.disk.ComputeGrowth(self.delta))
12064 def Exec(self, feedback_fn):
12065 """Execute disk grow.
12068 instance = self.instance
12071 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12072 assert (self.owned_locks(locking.LEVEL_NODE) ==
12073 self.owned_locks(locking.LEVEL_NODE_RES))
12075 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12077 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12079 raise errors.OpExecError("Cannot activate block device to grow")
12081 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12082 (self.op.disk, instance.name,
12083 utils.FormatUnit(self.delta, "h"),
12084 utils.FormatUnit(self.target, "h")))
12086 # First run all grow ops in dry-run mode
12087 for node in instance.all_nodes:
12088 self.cfg.SetDiskID(disk, node)
12089 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12091 result.Raise("Dry-run grow request failed to node %s" % node)
12094 # Get disk size from primary node for wiping
12095 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12096 result.Raise("Failed to retrieve disk size from node '%s'" %
12097 instance.primary_node)
12099 (disk_size_in_bytes, ) = result.payload
12101 if disk_size_in_bytes is None:
12102 raise errors.OpExecError("Failed to retrieve disk size from primary"
12103 " node '%s'" % instance.primary_node)
12105 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12107 assert old_disk_size >= disk.size, \
12108 ("Retrieved disk size too small (got %s, should be at least %s)" %
12109 (old_disk_size, disk.size))
12111 old_disk_size = None
12113 # We know that (as far as we can test) operations across different
12114 # nodes will succeed, time to run it for real on the backing storage
12115 for node in instance.all_nodes:
12116 self.cfg.SetDiskID(disk, node)
12117 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12119 result.Raise("Grow request failed to node %s" % node)
12121 # And now execute it for logical storage, on the primary node
12122 node = instance.primary_node
12123 self.cfg.SetDiskID(disk, node)
12124 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12126 result.Raise("Grow request failed to node %s" % node)
12128 disk.RecordGrow(self.delta)
12129 self.cfg.Update(instance, feedback_fn)
12131 # Changes have been recorded, release node lock
12132 _ReleaseLocks(self, locking.LEVEL_NODE)
12134 # Downgrade lock while waiting for sync
12135 self.glm.downgrade(locking.LEVEL_INSTANCE)
12137 assert wipe_disks ^ (old_disk_size is None)
12140 assert instance.disks[self.op.disk] == disk
12142 # Wipe newly added disk space
12143 _WipeDisks(self, instance,
12144 disks=[(self.op.disk, disk, old_disk_size)])
12146 if self.op.wait_for_sync:
12147 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12149 self.proc.LogWarning("Disk sync-ing has not returned a good"
12150 " status; please check the instance")
12151 if instance.admin_state != constants.ADMINST_UP:
12152 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12153 elif instance.admin_state != constants.ADMINST_UP:
12154 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12155 " not supposed to be running because no wait for"
12156 " sync mode was requested")
12158 assert self.owned_locks(locking.LEVEL_NODE_RES)
12159 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12162 class LUInstanceQueryData(NoHooksLU):
12163 """Query runtime instance data.
12168 def ExpandNames(self):
12169 self.needed_locks = {}
12171 # Use locking if requested or when non-static information is wanted
12172 if not (self.op.static or self.op.use_locking):
12173 self.LogWarning("Non-static data requested, locks need to be acquired")
12174 self.op.use_locking = True
12176 if self.op.instances or not self.op.use_locking:
12177 # Expand instance names right here
12178 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12180 # Will use acquired locks
12181 self.wanted_names = None
12183 if self.op.use_locking:
12184 self.share_locks = _ShareAll()
12186 if self.wanted_names is None:
12187 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12189 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12191 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12192 self.needed_locks[locking.LEVEL_NODE] = []
12193 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12195 def DeclareLocks(self, level):
12196 if self.op.use_locking:
12197 if level == locking.LEVEL_NODEGROUP:
12198 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12200 # Lock all groups used by instances optimistically; this requires going
12201 # via the node before it's locked, requiring verification later on
12202 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12203 frozenset(group_uuid
12204 for instance_name in owned_instances
12206 self.cfg.GetInstanceNodeGroups(instance_name))
12208 elif level == locking.LEVEL_NODE:
12209 self._LockInstancesNodes()
12211 def CheckPrereq(self):
12212 """Check prerequisites.
12214 This only checks the optional instance list against the existing names.
12217 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12218 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12219 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12221 if self.wanted_names is None:
12222 assert self.op.use_locking, "Locking was not used"
12223 self.wanted_names = owned_instances
12225 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12227 if self.op.use_locking:
12228 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12231 assert not (owned_instances or owned_groups or owned_nodes)
12233 self.wanted_instances = instances.values()
12235 def _ComputeBlockdevStatus(self, node, instance, dev):
12236 """Returns the status of a block device
12239 if self.op.static or not node:
12242 self.cfg.SetDiskID(dev, node)
12244 result = self.rpc.call_blockdev_find(node, dev)
12248 result.Raise("Can't compute disk status for %s" % instance.name)
12250 status = result.payload
12254 return (status.dev_path, status.major, status.minor,
12255 status.sync_percent, status.estimated_time,
12256 status.is_degraded, status.ldisk_status)
12258 def _ComputeDiskStatus(self, instance, snode, dev):
12259 """Compute block device status.
12262 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12264 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12266 def _ComputeDiskStatusInner(self, instance, snode, dev):
12267 """Compute block device status.
12269 @attention: The device has to be annotated already.
12272 if dev.dev_type in constants.LDS_DRBD:
12273 # we change the snode then (otherwise we use the one passed in)
12274 if dev.logical_id[0] == instance.primary_node:
12275 snode = dev.logical_id[1]
12277 snode = dev.logical_id[0]
12279 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12281 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12284 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12291 "iv_name": dev.iv_name,
12292 "dev_type": dev.dev_type,
12293 "logical_id": dev.logical_id,
12294 "physical_id": dev.physical_id,
12295 "pstatus": dev_pstatus,
12296 "sstatus": dev_sstatus,
12297 "children": dev_children,
12302 def Exec(self, feedback_fn):
12303 """Gather and return data"""
12306 cluster = self.cfg.GetClusterInfo()
12308 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12309 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12311 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12312 for node in nodes.values()))
12314 group2name_fn = lambda uuid: groups[uuid].name
12316 for instance in self.wanted_instances:
12317 pnode = nodes[instance.primary_node]
12319 if self.op.static or pnode.offline:
12320 remote_state = None
12322 self.LogWarning("Primary node %s is marked offline, returning static"
12323 " information only for instance %s" %
12324 (pnode.name, instance.name))
12326 remote_info = self.rpc.call_instance_info(instance.primary_node,
12328 instance.hypervisor)
12329 remote_info.Raise("Error checking node %s" % instance.primary_node)
12330 remote_info = remote_info.payload
12331 if remote_info and "state" in remote_info:
12332 remote_state = "up"
12334 if instance.admin_state == constants.ADMINST_UP:
12335 remote_state = "down"
12337 remote_state = instance.admin_state
12339 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12342 snodes_group_uuids = [nodes[snode_name].group
12343 for snode_name in instance.secondary_nodes]
12345 result[instance.name] = {
12346 "name": instance.name,
12347 "config_state": instance.admin_state,
12348 "run_state": remote_state,
12349 "pnode": instance.primary_node,
12350 "pnode_group_uuid": pnode.group,
12351 "pnode_group_name": group2name_fn(pnode.group),
12352 "snodes": instance.secondary_nodes,
12353 "snodes_group_uuids": snodes_group_uuids,
12354 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12356 # this happens to be the same format used for hooks
12357 "nics": _NICListToTuple(self, instance.nics),
12358 "disk_template": instance.disk_template,
12360 "hypervisor": instance.hypervisor,
12361 "network_port": instance.network_port,
12362 "hv_instance": instance.hvparams,
12363 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12364 "be_instance": instance.beparams,
12365 "be_actual": cluster.FillBE(instance),
12366 "os_instance": instance.osparams,
12367 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12368 "serial_no": instance.serial_no,
12369 "mtime": instance.mtime,
12370 "ctime": instance.ctime,
12371 "uuid": instance.uuid,
12377 def PrepareContainerMods(mods, private_fn):
12378 """Prepares a list of container modifications by adding a private data field.
12380 @type mods: list of tuples; (operation, index, parameters)
12381 @param mods: List of modifications
12382 @type private_fn: callable or None
12383 @param private_fn: Callable for constructing a private data field for a
12388 if private_fn is None:
12393 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12396 #: Type description for changes as returned by L{ApplyContainerMods}'s
12398 _TApplyContModsCbChanges = \
12399 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12400 ht.TNonEmptyString,
12405 def ApplyContainerMods(kind, container, chgdesc, mods,
12406 create_fn, modify_fn, remove_fn):
12407 """Applies descriptions in C{mods} to C{container}.
12410 @param kind: One-word item description
12411 @type container: list
12412 @param container: Container to modify
12413 @type chgdesc: None or list
12414 @param chgdesc: List of applied changes
12416 @param mods: Modifications as returned by L{PrepareContainerMods}
12417 @type create_fn: callable
12418 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12419 receives absolute item index, parameters and private data object as added
12420 by L{PrepareContainerMods}, returns tuple containing new item and changes
12422 @type modify_fn: callable
12423 @param modify_fn: Callback for modifying an existing item
12424 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12425 and private data object as added by L{PrepareContainerMods}, returns
12427 @type remove_fn: callable
12428 @param remove_fn: Callback on removing item; receives absolute item index,
12429 item and private data object as added by L{PrepareContainerMods}
12432 for (op, idx, params, private) in mods:
12435 absidx = len(container) - 1
12437 raise IndexError("Not accepting negative indices other than -1")
12438 elif idx > len(container):
12439 raise IndexError("Got %s index %s, but there are only %s" %
12440 (kind, idx, len(container)))
12446 if op == constants.DDM_ADD:
12447 # Calculate where item will be added
12449 addidx = len(container)
12453 if create_fn is None:
12456 (item, changes) = create_fn(addidx, params, private)
12459 container.append(item)
12462 assert idx <= len(container)
12463 # list.insert does so before the specified index
12464 container.insert(idx, item)
12466 # Retrieve existing item
12468 item = container[absidx]
12470 raise IndexError("Invalid %s index %s" % (kind, idx))
12472 if op == constants.DDM_REMOVE:
12475 if remove_fn is not None:
12476 remove_fn(absidx, item, private)
12478 changes = [("%s/%s" % (kind, absidx), "remove")]
12480 assert container[absidx] == item
12481 del container[absidx]
12482 elif op == constants.DDM_MODIFY:
12483 if modify_fn is not None:
12484 changes = modify_fn(absidx, item, params, private)
12486 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12488 assert _TApplyContModsCbChanges(changes)
12490 if not (chgdesc is None or changes is None):
12491 chgdesc.extend(changes)
12494 def _UpdateIvNames(base_index, disks):
12495 """Updates the C{iv_name} attribute of disks.
12497 @type disks: list of L{objects.Disk}
12500 for (idx, disk) in enumerate(disks):
12501 disk.iv_name = "disk/%s" % (base_index + idx, )
12504 class _InstNicModPrivate:
12505 """Data structure for network interface modifications.
12507 Used by L{LUInstanceSetParams}.
12510 def __init__(self):
12515 class LUInstanceSetParams(LogicalUnit):
12516 """Modifies an instances's parameters.
12519 HPATH = "instance-modify"
12520 HTYPE = constants.HTYPE_INSTANCE
12524 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12525 assert ht.TList(mods)
12526 assert not mods or len(mods[0]) in (2, 3)
12528 if mods and len(mods[0]) == 2:
12532 for op, params in mods:
12533 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12534 result.append((op, -1, params))
12538 raise errors.OpPrereqError("Only one %s add or remove operation is"
12539 " supported at a time" % kind,
12540 errors.ECODE_INVAL)
12542 result.append((constants.DDM_MODIFY, op, params))
12544 assert verify_fn(result)
12551 def _CheckMods(kind, mods, key_types, item_fn):
12552 """Ensures requested disk/NIC modifications are valid.
12555 for (op, _, params) in mods:
12556 assert ht.TDict(params)
12558 utils.ForceDictType(params, key_types)
12560 if op == constants.DDM_REMOVE:
12562 raise errors.OpPrereqError("No settings should be passed when"
12563 " removing a %s" % kind,
12564 errors.ECODE_INVAL)
12565 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12566 item_fn(op, params)
12568 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12571 def _VerifyDiskModification(op, params):
12572 """Verifies a disk modification.
12575 if op == constants.DDM_ADD:
12576 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12577 if mode not in constants.DISK_ACCESS_SET:
12578 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12579 errors.ECODE_INVAL)
12581 size = params.get(constants.IDISK_SIZE, None)
12583 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12584 constants.IDISK_SIZE, errors.ECODE_INVAL)
12588 except (TypeError, ValueError), err:
12589 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12590 errors.ECODE_INVAL)
12592 params[constants.IDISK_SIZE] = size
12594 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12595 raise errors.OpPrereqError("Disk size change not possible, use"
12596 " grow-disk", errors.ECODE_INVAL)
12599 def _VerifyNicModification(op, params):
12600 """Verifies a network interface modification.
12603 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12604 ip = params.get(constants.INIC_IP, None)
12607 elif ip.lower() == constants.VALUE_NONE:
12608 params[constants.INIC_IP] = None
12609 elif not netutils.IPAddress.IsValid(ip):
12610 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12611 errors.ECODE_INVAL)
12613 bridge = params.get("bridge", None)
12614 link = params.get(constants.INIC_LINK, None)
12615 if bridge and link:
12616 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12617 " at the same time", errors.ECODE_INVAL)
12618 elif bridge and bridge.lower() == constants.VALUE_NONE:
12619 params["bridge"] = None
12620 elif link and link.lower() == constants.VALUE_NONE:
12621 params[constants.INIC_LINK] = None
12623 if op == constants.DDM_ADD:
12624 macaddr = params.get(constants.INIC_MAC, None)
12625 if macaddr is None:
12626 params[constants.INIC_MAC] = constants.VALUE_AUTO
12628 if constants.INIC_MAC in params:
12629 macaddr = params[constants.INIC_MAC]
12630 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12631 macaddr = utils.NormalizeAndValidateMac(macaddr)
12633 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12634 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12635 " modifying an existing NIC",
12636 errors.ECODE_INVAL)
12638 def CheckArguments(self):
12639 if not (self.op.nics or self.op.disks or self.op.disk_template or
12640 self.op.hvparams or self.op.beparams or self.op.os_name or
12641 self.op.offline is not None or self.op.runtime_mem):
12642 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12644 if self.op.hvparams:
12645 _CheckGlobalHvParams(self.op.hvparams)
12647 self.op.disks = self._UpgradeDiskNicMods(
12648 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12649 self.op.nics = self._UpgradeDiskNicMods(
12650 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12652 # Check disk modifications
12653 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12654 self._VerifyDiskModification)
12656 if self.op.disks and self.op.disk_template is not None:
12657 raise errors.OpPrereqError("Disk template conversion and other disk"
12658 " changes not supported at the same time",
12659 errors.ECODE_INVAL)
12661 if (self.op.disk_template and
12662 self.op.disk_template in constants.DTS_INT_MIRROR and
12663 self.op.remote_node is None):
12664 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12665 " one requires specifying a secondary node",
12666 errors.ECODE_INVAL)
12668 # Check NIC modifications
12669 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12670 self._VerifyNicModification)
12672 def ExpandNames(self):
12673 self._ExpandAndLockInstance()
12674 # Can't even acquire node locks in shared mode as upcoming changes in
12675 # Ganeti 2.6 will start to modify the node object on disk conversion
12676 self.needed_locks[locking.LEVEL_NODE] = []
12677 self.needed_locks[locking.LEVEL_NODE_RES] = []
12678 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12680 def DeclareLocks(self, level):
12681 # TODO: Acquire group lock in shared mode (disk parameters)
12682 if level == locking.LEVEL_NODE:
12683 self._LockInstancesNodes()
12684 if self.op.disk_template and self.op.remote_node:
12685 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12686 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12687 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12689 self.needed_locks[locking.LEVEL_NODE_RES] = \
12690 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12692 def BuildHooksEnv(self):
12693 """Build hooks env.
12695 This runs on the master, primary and secondaries.
12699 if constants.BE_MINMEM in self.be_new:
12700 args["minmem"] = self.be_new[constants.BE_MINMEM]
12701 if constants.BE_MAXMEM in self.be_new:
12702 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12703 if constants.BE_VCPUS in self.be_new:
12704 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12705 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12706 # information at all.
12708 if self._new_nics is not None:
12711 for nic in self._new_nics:
12712 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12713 mode = nicparams[constants.NIC_MODE]
12714 link = nicparams[constants.NIC_LINK]
12715 nics.append((nic.ip, nic.mac, mode, link))
12717 args["nics"] = nics
12719 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12720 if self.op.disk_template:
12721 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12722 if self.op.runtime_mem:
12723 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12727 def BuildHooksNodes(self):
12728 """Build hooks nodes.
12731 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12734 def _PrepareNicModification(self, params, private, old_ip, old_params,
12736 update_params_dict = dict([(key, params[key])
12737 for key in constants.NICS_PARAMETERS
12740 if "bridge" in params:
12741 update_params_dict[constants.NIC_LINK] = params["bridge"]
12743 new_params = _GetUpdatedParams(old_params, update_params_dict)
12744 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12746 new_filled_params = cluster.SimpleFillNIC(new_params)
12747 objects.NIC.CheckParameterSyntax(new_filled_params)
12749 new_mode = new_filled_params[constants.NIC_MODE]
12750 if new_mode == constants.NIC_MODE_BRIDGED:
12751 bridge = new_filled_params[constants.NIC_LINK]
12752 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12754 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12756 self.warn.append(msg)
12758 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12760 elif new_mode == constants.NIC_MODE_ROUTED:
12761 ip = params.get(constants.INIC_IP, old_ip)
12763 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12764 " on a routed NIC", errors.ECODE_INVAL)
12766 if constants.INIC_MAC in params:
12767 mac = params[constants.INIC_MAC]
12769 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12770 errors.ECODE_INVAL)
12771 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12772 # otherwise generate the MAC address
12773 params[constants.INIC_MAC] = \
12774 self.cfg.GenerateMAC(self.proc.GetECId())
12776 # or validate/reserve the current one
12778 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12779 except errors.ReservationError:
12780 raise errors.OpPrereqError("MAC address '%s' already in use"
12781 " in cluster" % mac,
12782 errors.ECODE_NOTUNIQUE)
12784 private.params = new_params
12785 private.filled = new_filled_params
12787 def CheckPrereq(self):
12788 """Check prerequisites.
12790 This only checks the instance list against the existing names.
12793 # checking the new params on the primary/secondary nodes
12795 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12796 cluster = self.cluster = self.cfg.GetClusterInfo()
12797 assert self.instance is not None, \
12798 "Cannot retrieve locked instance %s" % self.op.instance_name
12799 pnode = instance.primary_node
12800 nodelist = list(instance.all_nodes)
12801 pnode_info = self.cfg.GetNodeInfo(pnode)
12802 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12804 # Prepare disk/NIC modifications
12805 self.diskmod = PrepareContainerMods(self.op.disks, None)
12806 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12809 if self.op.os_name and not self.op.force:
12810 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12811 self.op.force_variant)
12812 instance_os = self.op.os_name
12814 instance_os = instance.os
12816 assert not (self.op.disk_template and self.op.disks), \
12817 "Can't modify disk template and apply disk changes at the same time"
12819 if self.op.disk_template:
12820 if instance.disk_template == self.op.disk_template:
12821 raise errors.OpPrereqError("Instance already has disk template %s" %
12822 instance.disk_template, errors.ECODE_INVAL)
12824 if (instance.disk_template,
12825 self.op.disk_template) not in self._DISK_CONVERSIONS:
12826 raise errors.OpPrereqError("Unsupported disk template conversion from"
12827 " %s to %s" % (instance.disk_template,
12828 self.op.disk_template),
12829 errors.ECODE_INVAL)
12830 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12831 msg="cannot change disk template")
12832 if self.op.disk_template in constants.DTS_INT_MIRROR:
12833 if self.op.remote_node == pnode:
12834 raise errors.OpPrereqError("Given new secondary node %s is the same"
12835 " as the primary node of the instance" %
12836 self.op.remote_node, errors.ECODE_STATE)
12837 _CheckNodeOnline(self, self.op.remote_node)
12838 _CheckNodeNotDrained(self, self.op.remote_node)
12839 # FIXME: here we assume that the old instance type is DT_PLAIN
12840 assert instance.disk_template == constants.DT_PLAIN
12841 disks = [{constants.IDISK_SIZE: d.size,
12842 constants.IDISK_VG: d.logical_id[0]}
12843 for d in instance.disks]
12844 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12845 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12847 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12848 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12849 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12851 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12852 ignore=self.op.ignore_ipolicy)
12853 if pnode_info.group != snode_info.group:
12854 self.LogWarning("The primary and secondary nodes are in two"
12855 " different node groups; the disk parameters"
12856 " from the first disk's node group will be"
12859 # hvparams processing
12860 if self.op.hvparams:
12861 hv_type = instance.hypervisor
12862 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12863 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12864 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12867 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12868 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12869 self.hv_proposed = self.hv_new = hv_new # the new actual values
12870 self.hv_inst = i_hvdict # the new dict (without defaults)
12872 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12874 self.hv_new = self.hv_inst = {}
12876 # beparams processing
12877 if self.op.beparams:
12878 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12880 objects.UpgradeBeParams(i_bedict)
12881 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12882 be_new = cluster.SimpleFillBE(i_bedict)
12883 self.be_proposed = self.be_new = be_new # the new actual values
12884 self.be_inst = i_bedict # the new dict (without defaults)
12886 self.be_new = self.be_inst = {}
12887 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12888 be_old = cluster.FillBE(instance)
12890 # CPU param validation -- checking every time a parameter is
12891 # changed to cover all cases where either CPU mask or vcpus have
12893 if (constants.BE_VCPUS in self.be_proposed and
12894 constants.HV_CPU_MASK in self.hv_proposed):
12896 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12897 # Verify mask is consistent with number of vCPUs. Can skip this
12898 # test if only 1 entry in the CPU mask, which means same mask
12899 # is applied to all vCPUs.
12900 if (len(cpu_list) > 1 and
12901 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12902 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12904 (self.be_proposed[constants.BE_VCPUS],
12905 self.hv_proposed[constants.HV_CPU_MASK]),
12906 errors.ECODE_INVAL)
12908 # Only perform this test if a new CPU mask is given
12909 if constants.HV_CPU_MASK in self.hv_new:
12910 # Calculate the largest CPU number requested
12911 max_requested_cpu = max(map(max, cpu_list))
12912 # Check that all of the instance's nodes have enough physical CPUs to
12913 # satisfy the requested CPU mask
12914 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12915 max_requested_cpu + 1, instance.hypervisor)
12917 # osparams processing
12918 if self.op.osparams:
12919 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12920 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12921 self.os_inst = i_osdict # the new dict (without defaults)
12927 #TODO(dynmem): do the appropriate check involving MINMEM
12928 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12929 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12930 mem_check_list = [pnode]
12931 if be_new[constants.BE_AUTO_BALANCE]:
12932 # either we changed auto_balance to yes or it was from before
12933 mem_check_list.extend(instance.secondary_nodes)
12934 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12935 instance.hypervisor)
12936 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12937 [instance.hypervisor])
12938 pninfo = nodeinfo[pnode]
12939 msg = pninfo.fail_msg
12941 # Assume the primary node is unreachable and go ahead
12942 self.warn.append("Can't get info from primary node %s: %s" %
12945 (_, _, (pnhvinfo, )) = pninfo.payload
12946 if not isinstance(pnhvinfo.get("memory_free", None), int):
12947 self.warn.append("Node data from primary node %s doesn't contain"
12948 " free memory information" % pnode)
12949 elif instance_info.fail_msg:
12950 self.warn.append("Can't get instance runtime information: %s" %
12951 instance_info.fail_msg)
12953 if instance_info.payload:
12954 current_mem = int(instance_info.payload["memory"])
12956 # Assume instance not running
12957 # (there is a slight race condition here, but it's not very
12958 # probable, and we have no other way to check)
12959 # TODO: Describe race condition
12961 #TODO(dynmem): do the appropriate check involving MINMEM
12962 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12963 pnhvinfo["memory_free"])
12965 raise errors.OpPrereqError("This change will prevent the instance"
12966 " from starting, due to %d MB of memory"
12967 " missing on its primary node" %
12968 miss_mem, errors.ECODE_NORES)
12970 if be_new[constants.BE_AUTO_BALANCE]:
12971 for node, nres in nodeinfo.items():
12972 if node not in instance.secondary_nodes:
12974 nres.Raise("Can't get info from secondary node %s" % node,
12975 prereq=True, ecode=errors.ECODE_STATE)
12976 (_, _, (nhvinfo, )) = nres.payload
12977 if not isinstance(nhvinfo.get("memory_free", None), int):
12978 raise errors.OpPrereqError("Secondary node %s didn't return free"
12979 " memory information" % node,
12980 errors.ECODE_STATE)
12981 #TODO(dynmem): do the appropriate check involving MINMEM
12982 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12983 raise errors.OpPrereqError("This change will prevent the instance"
12984 " from failover to its secondary node"
12985 " %s, due to not enough memory" % node,
12986 errors.ECODE_STATE)
12988 if self.op.runtime_mem:
12989 remote_info = self.rpc.call_instance_info(instance.primary_node,
12991 instance.hypervisor)
12992 remote_info.Raise("Error checking node %s" % instance.primary_node)
12993 if not remote_info.payload: # not running already
12994 raise errors.OpPrereqError("Instance %s is not running" %
12995 instance.name, errors.ECODE_STATE)
12997 current_memory = remote_info.payload["memory"]
12998 if (not self.op.force and
12999 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13000 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13001 raise errors.OpPrereqError("Instance %s must have memory between %d"
13002 " and %d MB of memory unless --force is"
13005 self.be_proposed[constants.BE_MINMEM],
13006 self.be_proposed[constants.BE_MAXMEM]),
13007 errors.ECODE_INVAL)
13009 delta = self.op.runtime_mem - current_memory
13011 _CheckNodeFreeMemory(self, instance.primary_node,
13012 "ballooning memory for instance %s" %
13013 instance.name, delta, instance.hypervisor)
13015 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13016 raise errors.OpPrereqError("Disk operations not supported for"
13017 " diskless instances", errors.ECODE_INVAL)
13019 def _PrepareNicCreate(_, params, private):
13020 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
13021 return (None, None)
13023 def _PrepareNicMod(_, nic, params, private):
13024 self._PrepareNicModification(params, private, nic.ip,
13025 nic.nicparams, cluster, pnode)
13028 # Verify NIC changes (operating on copy)
13029 nics = instance.nics[:]
13030 ApplyContainerMods("NIC", nics, None, self.nicmod,
13031 _PrepareNicCreate, _PrepareNicMod, None)
13032 if len(nics) > constants.MAX_NICS:
13033 raise errors.OpPrereqError("Instance has too many network interfaces"
13034 " (%d), cannot add more" % constants.MAX_NICS,
13035 errors.ECODE_STATE)
13037 # Verify disk changes (operating on a copy)
13038 disks = instance.disks[:]
13039 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13040 if len(disks) > constants.MAX_DISKS:
13041 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13042 " more" % constants.MAX_DISKS,
13043 errors.ECODE_STATE)
13045 if self.op.offline is not None:
13046 if self.op.offline:
13047 msg = "can't change to offline"
13049 msg = "can't change to online"
13050 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13052 # Pre-compute NIC changes (necessary to use result in hooks)
13053 self._nic_chgdesc = []
13055 # Operate on copies as this is still in prereq
13056 nics = [nic.Copy() for nic in instance.nics]
13057 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13058 self._CreateNewNic, self._ApplyNicMods, None)
13059 self._new_nics = nics
13061 self._new_nics = None
13063 def _ConvertPlainToDrbd(self, feedback_fn):
13064 """Converts an instance from plain to drbd.
13067 feedback_fn("Converting template to drbd")
13068 instance = self.instance
13069 pnode = instance.primary_node
13070 snode = self.op.remote_node
13072 assert instance.disk_template == constants.DT_PLAIN
13074 # create a fake disk info for _GenerateDiskTemplate
13075 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13076 constants.IDISK_VG: d.logical_id[0]}
13077 for d in instance.disks]
13078 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13079 instance.name, pnode, [snode],
13080 disk_info, None, None, 0, feedback_fn,
13082 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13084 info = _GetInstanceInfoText(instance)
13085 feedback_fn("Creating additional volumes...")
13086 # first, create the missing data and meta devices
13087 for disk in anno_disks:
13088 # unfortunately this is... not too nice
13089 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13091 for child in disk.children:
13092 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13093 # at this stage, all new LVs have been created, we can rename the
13095 feedback_fn("Renaming original volumes...")
13096 rename_list = [(o, n.children[0].logical_id)
13097 for (o, n) in zip(instance.disks, new_disks)]
13098 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13099 result.Raise("Failed to rename original LVs")
13101 feedback_fn("Initializing DRBD devices...")
13102 # all child devices are in place, we can now create the DRBD devices
13103 for disk in anno_disks:
13104 for node in [pnode, snode]:
13105 f_create = node == pnode
13106 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13108 # at this point, the instance has been modified
13109 instance.disk_template = constants.DT_DRBD8
13110 instance.disks = new_disks
13111 self.cfg.Update(instance, feedback_fn)
13113 # Release node locks while waiting for sync
13114 _ReleaseLocks(self, locking.LEVEL_NODE)
13116 # disks are created, waiting for sync
13117 disk_abort = not _WaitForSync(self, instance,
13118 oneshot=not self.op.wait_for_sync)
13120 raise errors.OpExecError("There are some degraded disks for"
13121 " this instance, please cleanup manually")
13123 # Node resource locks will be released by caller
13125 def _ConvertDrbdToPlain(self, feedback_fn):
13126 """Converts an instance from drbd to plain.
13129 instance = self.instance
13131 assert len(instance.secondary_nodes) == 1
13132 assert instance.disk_template == constants.DT_DRBD8
13134 pnode = instance.primary_node
13135 snode = instance.secondary_nodes[0]
13136 feedback_fn("Converting template to plain")
13138 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13139 new_disks = [d.children[0] for d in instance.disks]
13141 # copy over size and mode
13142 for parent, child in zip(old_disks, new_disks):
13143 child.size = parent.size
13144 child.mode = parent.mode
13146 # this is a DRBD disk, return its port to the pool
13147 # NOTE: this must be done right before the call to cfg.Update!
13148 for disk in old_disks:
13149 tcp_port = disk.logical_id[2]
13150 self.cfg.AddTcpUdpPort(tcp_port)
13152 # update instance structure
13153 instance.disks = new_disks
13154 instance.disk_template = constants.DT_PLAIN
13155 self.cfg.Update(instance, feedback_fn)
13157 # Release locks in case removing disks takes a while
13158 _ReleaseLocks(self, locking.LEVEL_NODE)
13160 feedback_fn("Removing volumes on the secondary node...")
13161 for disk in old_disks:
13162 self.cfg.SetDiskID(disk, snode)
13163 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13165 self.LogWarning("Could not remove block device %s on node %s,"
13166 " continuing anyway: %s", disk.iv_name, snode, msg)
13168 feedback_fn("Removing unneeded volumes on the primary node...")
13169 for idx, disk in enumerate(old_disks):
13170 meta = disk.children[1]
13171 self.cfg.SetDiskID(meta, pnode)
13172 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13174 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13175 " continuing anyway: %s", idx, pnode, msg)
13177 def _CreateNewDisk(self, idx, params, _):
13178 """Creates a new disk.
13181 instance = self.instance
13184 if instance.disk_template in constants.DTS_FILEBASED:
13185 (file_driver, file_path) = instance.disks[0].logical_id
13186 file_path = os.path.dirname(file_path)
13188 file_driver = file_path = None
13191 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13192 instance.primary_node, instance.secondary_nodes,
13193 [params], file_path, file_driver, idx,
13194 self.Log, self.diskparams)[0]
13196 info = _GetInstanceInfoText(instance)
13198 logging.info("Creating volume %s for instance %s",
13199 disk.iv_name, instance.name)
13200 # Note: this needs to be kept in sync with _CreateDisks
13202 for node in instance.all_nodes:
13203 f_create = (node == instance.primary_node)
13205 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13206 except errors.OpExecError, err:
13207 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13208 disk.iv_name, disk, node, err)
13211 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13215 def _ModifyDisk(idx, disk, params, _):
13216 """Modifies a disk.
13219 disk.mode = params[constants.IDISK_MODE]
13222 ("disk.mode/%d" % idx, disk.mode),
13225 def _RemoveDisk(self, idx, root, _):
13229 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13230 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13231 self.cfg.SetDiskID(disk, node)
13232 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13234 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13235 " continuing anyway", idx, node, msg)
13237 # if this is a DRBD disk, return its port to the pool
13238 if root.dev_type in constants.LDS_DRBD:
13239 self.cfg.AddTcpUdpPort(root.logical_id[2])
13242 def _CreateNewNic(idx, params, private):
13243 """Creates data structure for a new network interface.
13246 mac = params[constants.INIC_MAC]
13247 ip = params.get(constants.INIC_IP, None)
13248 nicparams = private.params
13250 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13252 "add:mac=%s,ip=%s,mode=%s,link=%s" %
13253 (mac, ip, private.filled[constants.NIC_MODE],
13254 private.filled[constants.NIC_LINK])),
13258 def _ApplyNicMods(idx, nic, params, private):
13259 """Modifies a network interface.
13264 for key in [constants.INIC_MAC, constants.INIC_IP]:
13266 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13267 setattr(nic, key, params[key])
13270 nic.nicparams = private.params
13272 for (key, val) in params.items():
13273 changes.append(("nic.%s/%d" % (key, idx), val))
13277 def Exec(self, feedback_fn):
13278 """Modifies an instance.
13280 All parameters take effect only at the next restart of the instance.
13283 # Process here the warnings from CheckPrereq, as we don't have a
13284 # feedback_fn there.
13285 # TODO: Replace with self.LogWarning
13286 for warn in self.warn:
13287 feedback_fn("WARNING: %s" % warn)
13289 assert ((self.op.disk_template is None) ^
13290 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13291 "Not owning any node resource locks"
13294 instance = self.instance
13297 if self.op.runtime_mem:
13298 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13300 self.op.runtime_mem)
13301 rpcres.Raise("Cannot modify instance runtime memory")
13302 result.append(("runtime_memory", self.op.runtime_mem))
13304 # Apply disk changes
13305 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13306 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13307 _UpdateIvNames(0, instance.disks)
13309 if self.op.disk_template:
13311 check_nodes = set(instance.all_nodes)
13312 if self.op.remote_node:
13313 check_nodes.add(self.op.remote_node)
13314 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13315 owned = self.owned_locks(level)
13316 assert not (check_nodes - owned), \
13317 ("Not owning the correct locks, owning %r, expected at least %r" %
13318 (owned, check_nodes))
13320 r_shut = _ShutdownInstanceDisks(self, instance)
13322 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13323 " proceed with disk template conversion")
13324 mode = (instance.disk_template, self.op.disk_template)
13326 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13328 self.cfg.ReleaseDRBDMinors(instance.name)
13330 result.append(("disk_template", self.op.disk_template))
13332 assert instance.disk_template == self.op.disk_template, \
13333 ("Expected disk template '%s', found '%s'" %
13334 (self.op.disk_template, instance.disk_template))
13336 # Release node and resource locks if there are any (they might already have
13337 # been released during disk conversion)
13338 _ReleaseLocks(self, locking.LEVEL_NODE)
13339 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13341 # Apply NIC changes
13342 if self._new_nics is not None:
13343 instance.nics = self._new_nics
13344 result.extend(self._nic_chgdesc)
13347 if self.op.hvparams:
13348 instance.hvparams = self.hv_inst
13349 for key, val in self.op.hvparams.iteritems():
13350 result.append(("hv/%s" % key, val))
13353 if self.op.beparams:
13354 instance.beparams = self.be_inst
13355 for key, val in self.op.beparams.iteritems():
13356 result.append(("be/%s" % key, val))
13359 if self.op.os_name:
13360 instance.os = self.op.os_name
13363 if self.op.osparams:
13364 instance.osparams = self.os_inst
13365 for key, val in self.op.osparams.iteritems():
13366 result.append(("os/%s" % key, val))
13368 if self.op.offline is None:
13371 elif self.op.offline:
13372 # Mark instance as offline
13373 self.cfg.MarkInstanceOffline(instance.name)
13374 result.append(("admin_state", constants.ADMINST_OFFLINE))
13376 # Mark instance as online, but stopped
13377 self.cfg.MarkInstanceDown(instance.name)
13378 result.append(("admin_state", constants.ADMINST_DOWN))
13380 self.cfg.Update(instance, feedback_fn)
13382 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13383 self.owned_locks(locking.LEVEL_NODE)), \
13384 "All node locks should have been released by now"
13388 _DISK_CONVERSIONS = {
13389 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13390 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13394 class LUInstanceChangeGroup(LogicalUnit):
13395 HPATH = "instance-change-group"
13396 HTYPE = constants.HTYPE_INSTANCE
13399 def ExpandNames(self):
13400 self.share_locks = _ShareAll()
13401 self.needed_locks = {
13402 locking.LEVEL_NODEGROUP: [],
13403 locking.LEVEL_NODE: [],
13406 self._ExpandAndLockInstance()
13408 if self.op.target_groups:
13409 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13410 self.op.target_groups)
13412 self.req_target_uuids = None
13414 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13416 def DeclareLocks(self, level):
13417 if level == locking.LEVEL_NODEGROUP:
13418 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13420 if self.req_target_uuids:
13421 lock_groups = set(self.req_target_uuids)
13423 # Lock all groups used by instance optimistically; this requires going
13424 # via the node before it's locked, requiring verification later on
13425 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13426 lock_groups.update(instance_groups)
13428 # No target groups, need to lock all of them
13429 lock_groups = locking.ALL_SET
13431 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13433 elif level == locking.LEVEL_NODE:
13434 if self.req_target_uuids:
13435 # Lock all nodes used by instances
13436 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13437 self._LockInstancesNodes()
13439 # Lock all nodes in all potential target groups
13440 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13441 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13442 member_nodes = [node_name
13443 for group in lock_groups
13444 for node_name in self.cfg.GetNodeGroup(group).members]
13445 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13447 # Lock all nodes as all groups are potential targets
13448 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13450 def CheckPrereq(self):
13451 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13452 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13453 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13455 assert (self.req_target_uuids is None or
13456 owned_groups.issuperset(self.req_target_uuids))
13457 assert owned_instances == set([self.op.instance_name])
13459 # Get instance information
13460 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13462 # Check if node groups for locked instance are still correct
13463 assert owned_nodes.issuperset(self.instance.all_nodes), \
13464 ("Instance %s's nodes changed while we kept the lock" %
13465 self.op.instance_name)
13467 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13470 if self.req_target_uuids:
13471 # User requested specific target groups
13472 self.target_uuids = frozenset(self.req_target_uuids)
13474 # All groups except those used by the instance are potential targets
13475 self.target_uuids = owned_groups - inst_groups
13477 conflicting_groups = self.target_uuids & inst_groups
13478 if conflicting_groups:
13479 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13480 " used by the instance '%s'" %
13481 (utils.CommaJoin(conflicting_groups),
13482 self.op.instance_name),
13483 errors.ECODE_INVAL)
13485 if not self.target_uuids:
13486 raise errors.OpPrereqError("There are no possible target groups",
13487 errors.ECODE_INVAL)
13489 def BuildHooksEnv(self):
13490 """Build hooks env.
13493 assert self.target_uuids
13496 "TARGET_GROUPS": " ".join(self.target_uuids),
13499 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13503 def BuildHooksNodes(self):
13504 """Build hooks nodes.
13507 mn = self.cfg.GetMasterNode()
13508 return ([mn], [mn])
13510 def Exec(self, feedback_fn):
13511 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13513 assert instances == [self.op.instance_name], "Instance not locked"
13515 req = iallocator.IAReqGroupChange(instances=instances,
13516 target_groups=list(self.target_uuids))
13517 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13519 ial.Run(self.op.iallocator)
13521 if not ial.success:
13522 raise errors.OpPrereqError("Can't compute solution for changing group of"
13523 " instance '%s' using iallocator '%s': %s" %
13524 (self.op.instance_name, self.op.iallocator,
13525 ial.info), errors.ECODE_NORES)
13527 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13529 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13530 " instance '%s'", len(jobs), self.op.instance_name)
13532 return ResultWithJobs(jobs)
13535 class LUBackupQuery(NoHooksLU):
13536 """Query the exports list
13541 def CheckArguments(self):
13542 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13543 ["node", "export"], self.op.use_locking)
13545 def ExpandNames(self):
13546 self.expq.ExpandNames(self)
13548 def DeclareLocks(self, level):
13549 self.expq.DeclareLocks(self, level)
13551 def Exec(self, feedback_fn):
13554 for (node, expname) in self.expq.OldStyleQuery(self):
13555 if expname is None:
13556 result[node] = False
13558 result.setdefault(node, []).append(expname)
13563 class _ExportQuery(_QueryBase):
13564 FIELDS = query.EXPORT_FIELDS
13566 #: The node name is not a unique key for this query
13567 SORT_FIELD = "node"
13569 def ExpandNames(self, lu):
13570 lu.needed_locks = {}
13572 # The following variables interact with _QueryBase._GetNames
13574 self.wanted = _GetWantedNodes(lu, self.names)
13576 self.wanted = locking.ALL_SET
13578 self.do_locking = self.use_locking
13580 if self.do_locking:
13581 lu.share_locks = _ShareAll()
13582 lu.needed_locks = {
13583 locking.LEVEL_NODE: self.wanted,
13586 def DeclareLocks(self, lu, level):
13589 def _GetQueryData(self, lu):
13590 """Computes the list of nodes and their attributes.
13593 # Locking is not used
13595 assert not (compat.any(lu.glm.is_owned(level)
13596 for level in locking.LEVELS
13597 if level != locking.LEVEL_CLUSTER) or
13598 self.do_locking or self.use_locking)
13600 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13604 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13606 result.append((node, None))
13608 result.extend((node, expname) for expname in nres.payload)
13613 class LUBackupPrepare(NoHooksLU):
13614 """Prepares an instance for an export and returns useful information.
13619 def ExpandNames(self):
13620 self._ExpandAndLockInstance()
13622 def CheckPrereq(self):
13623 """Check prerequisites.
13626 instance_name = self.op.instance_name
13628 self.instance = self.cfg.GetInstanceInfo(instance_name)
13629 assert self.instance is not None, \
13630 "Cannot retrieve locked instance %s" % self.op.instance_name
13631 _CheckNodeOnline(self, self.instance.primary_node)
13633 self._cds = _GetClusterDomainSecret()
13635 def Exec(self, feedback_fn):
13636 """Prepares an instance for an export.
13639 instance = self.instance
13641 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13642 salt = utils.GenerateSecret(8)
13644 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13645 result = self.rpc.call_x509_cert_create(instance.primary_node,
13646 constants.RIE_CERT_VALIDITY)
13647 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13649 (name, cert_pem) = result.payload
13651 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13655 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13656 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13658 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13664 class LUBackupExport(LogicalUnit):
13665 """Export an instance to an image in the cluster.
13668 HPATH = "instance-export"
13669 HTYPE = constants.HTYPE_INSTANCE
13672 def CheckArguments(self):
13673 """Check the arguments.
13676 self.x509_key_name = self.op.x509_key_name
13677 self.dest_x509_ca_pem = self.op.destination_x509_ca
13679 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13680 if not self.x509_key_name:
13681 raise errors.OpPrereqError("Missing X509 key name for encryption",
13682 errors.ECODE_INVAL)
13684 if not self.dest_x509_ca_pem:
13685 raise errors.OpPrereqError("Missing destination X509 CA",
13686 errors.ECODE_INVAL)
13688 def ExpandNames(self):
13689 self._ExpandAndLockInstance()
13691 # Lock all nodes for local exports
13692 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13693 # FIXME: lock only instance primary and destination node
13695 # Sad but true, for now we have do lock all nodes, as we don't know where
13696 # the previous export might be, and in this LU we search for it and
13697 # remove it from its current node. In the future we could fix this by:
13698 # - making a tasklet to search (share-lock all), then create the
13699 # new one, then one to remove, after
13700 # - removing the removal operation altogether
13701 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13703 def DeclareLocks(self, level):
13704 """Last minute lock declaration."""
13705 # All nodes are locked anyway, so nothing to do here.
13707 def BuildHooksEnv(self):
13708 """Build hooks env.
13710 This will run on the master, primary node and target node.
13714 "EXPORT_MODE": self.op.mode,
13715 "EXPORT_NODE": self.op.target_node,
13716 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13717 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13718 # TODO: Generic function for boolean env variables
13719 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13722 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13726 def BuildHooksNodes(self):
13727 """Build hooks nodes.
13730 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13732 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13733 nl.append(self.op.target_node)
13737 def CheckPrereq(self):
13738 """Check prerequisites.
13740 This checks that the instance and node names are valid.
13743 instance_name = self.op.instance_name
13745 self.instance = self.cfg.GetInstanceInfo(instance_name)
13746 assert self.instance is not None, \
13747 "Cannot retrieve locked instance %s" % self.op.instance_name
13748 _CheckNodeOnline(self, self.instance.primary_node)
13750 if (self.op.remove_instance and
13751 self.instance.admin_state == constants.ADMINST_UP and
13752 not self.op.shutdown):
13753 raise errors.OpPrereqError("Can not remove instance without shutting it"
13754 " down before", errors.ECODE_STATE)
13756 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13757 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13758 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13759 assert self.dst_node is not None
13761 _CheckNodeOnline(self, self.dst_node.name)
13762 _CheckNodeNotDrained(self, self.dst_node.name)
13765 self.dest_disk_info = None
13766 self.dest_x509_ca = None
13768 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13769 self.dst_node = None
13771 if len(self.op.target_node) != len(self.instance.disks):
13772 raise errors.OpPrereqError(("Received destination information for %s"
13773 " disks, but instance %s has %s disks") %
13774 (len(self.op.target_node), instance_name,
13775 len(self.instance.disks)),
13776 errors.ECODE_INVAL)
13778 cds = _GetClusterDomainSecret()
13780 # Check X509 key name
13782 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13783 except (TypeError, ValueError), err:
13784 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13785 errors.ECODE_INVAL)
13787 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13788 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13789 errors.ECODE_INVAL)
13791 # Load and verify CA
13793 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13794 except OpenSSL.crypto.Error, err:
13795 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13796 (err, ), errors.ECODE_INVAL)
13798 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13799 if errcode is not None:
13800 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13801 (msg, ), errors.ECODE_INVAL)
13803 self.dest_x509_ca = cert
13805 # Verify target information
13807 for idx, disk_data in enumerate(self.op.target_node):
13809 (host, port, magic) = \
13810 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13811 except errors.GenericError, err:
13812 raise errors.OpPrereqError("Target info for disk %s: %s" %
13813 (idx, err), errors.ECODE_INVAL)
13815 disk_info.append((host, port, magic))
13817 assert len(disk_info) == len(self.op.target_node)
13818 self.dest_disk_info = disk_info
13821 raise errors.ProgrammerError("Unhandled export mode %r" %
13824 # instance disk type verification
13825 # TODO: Implement export support for file-based disks
13826 for disk in self.instance.disks:
13827 if disk.dev_type == constants.LD_FILE:
13828 raise errors.OpPrereqError("Export not supported for instances with"
13829 " file-based disks", errors.ECODE_INVAL)
13831 def _CleanupExports(self, feedback_fn):
13832 """Removes exports of current instance from all other nodes.
13834 If an instance in a cluster with nodes A..D was exported to node C, its
13835 exports will be removed from the nodes A, B and D.
13838 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13840 nodelist = self.cfg.GetNodeList()
13841 nodelist.remove(self.dst_node.name)
13843 # on one-node clusters nodelist will be empty after the removal
13844 # if we proceed the backup would be removed because OpBackupQuery
13845 # substitutes an empty list with the full cluster node list.
13846 iname = self.instance.name
13848 feedback_fn("Removing old exports for instance %s" % iname)
13849 exportlist = self.rpc.call_export_list(nodelist)
13850 for node in exportlist:
13851 if exportlist[node].fail_msg:
13853 if iname in exportlist[node].payload:
13854 msg = self.rpc.call_export_remove(node, iname).fail_msg
13856 self.LogWarning("Could not remove older export for instance %s"
13857 " on node %s: %s", iname, node, msg)
13859 def Exec(self, feedback_fn):
13860 """Export an instance to an image in the cluster.
13863 assert self.op.mode in constants.EXPORT_MODES
13865 instance = self.instance
13866 src_node = instance.primary_node
13868 if self.op.shutdown:
13869 # shutdown the instance, but not the disks
13870 feedback_fn("Shutting down instance %s" % instance.name)
13871 result = self.rpc.call_instance_shutdown(src_node, instance,
13872 self.op.shutdown_timeout)
13873 # TODO: Maybe ignore failures if ignore_remove_failures is set
13874 result.Raise("Could not shutdown instance %s on"
13875 " node %s" % (instance.name, src_node))
13877 # set the disks ID correctly since call_instance_start needs the
13878 # correct drbd minor to create the symlinks
13879 for disk in instance.disks:
13880 self.cfg.SetDiskID(disk, src_node)
13882 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13885 # Activate the instance disks if we'exporting a stopped instance
13886 feedback_fn("Activating disks for %s" % instance.name)
13887 _StartInstanceDisks(self, instance, None)
13890 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13893 helper.CreateSnapshots()
13895 if (self.op.shutdown and
13896 instance.admin_state == constants.ADMINST_UP and
13897 not self.op.remove_instance):
13898 assert not activate_disks
13899 feedback_fn("Starting instance %s" % instance.name)
13900 result = self.rpc.call_instance_start(src_node,
13901 (instance, None, None), False)
13902 msg = result.fail_msg
13904 feedback_fn("Failed to start instance: %s" % msg)
13905 _ShutdownInstanceDisks(self, instance)
13906 raise errors.OpExecError("Could not start instance: %s" % msg)
13908 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13909 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13910 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13911 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13912 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13914 (key_name, _, _) = self.x509_key_name
13917 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13920 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13921 key_name, dest_ca_pem,
13926 # Check for backwards compatibility
13927 assert len(dresults) == len(instance.disks)
13928 assert compat.all(isinstance(i, bool) for i in dresults), \
13929 "Not all results are boolean: %r" % dresults
13933 feedback_fn("Deactivating disks for %s" % instance.name)
13934 _ShutdownInstanceDisks(self, instance)
13936 if not (compat.all(dresults) and fin_resu):
13939 failures.append("export finalization")
13940 if not compat.all(dresults):
13941 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13943 failures.append("disk export: disk(s) %s" % fdsk)
13945 raise errors.OpExecError("Export failed, errors in %s" %
13946 utils.CommaJoin(failures))
13948 # At this point, the export was successful, we can cleanup/finish
13950 # Remove instance if requested
13951 if self.op.remove_instance:
13952 feedback_fn("Removing instance %s" % instance.name)
13953 _RemoveInstance(self, feedback_fn, instance,
13954 self.op.ignore_remove_failures)
13956 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13957 self._CleanupExports(feedback_fn)
13959 return fin_resu, dresults
13962 class LUBackupRemove(NoHooksLU):
13963 """Remove exports related to the named instance.
13968 def ExpandNames(self):
13969 self.needed_locks = {}
13970 # We need all nodes to be locked in order for RemoveExport to work, but we
13971 # don't need to lock the instance itself, as nothing will happen to it (and
13972 # we can remove exports also for a removed instance)
13973 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13975 def Exec(self, feedback_fn):
13976 """Remove any export.
13979 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13980 # If the instance was not found we'll try with the name that was passed in.
13981 # This will only work if it was an FQDN, though.
13983 if not instance_name:
13985 instance_name = self.op.instance_name
13987 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13988 exportlist = self.rpc.call_export_list(locked_nodes)
13990 for node in exportlist:
13991 msg = exportlist[node].fail_msg
13993 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13995 if instance_name in exportlist[node].payload:
13997 result = self.rpc.call_export_remove(node, instance_name)
13998 msg = result.fail_msg
14000 logging.error("Could not remove export for instance %s"
14001 " on node %s: %s", instance_name, node, msg)
14003 if fqdn_warn and not found:
14004 feedback_fn("Export not found. If trying to remove an export belonging"
14005 " to a deleted instance please use its Fully Qualified"
14009 class LUGroupAdd(LogicalUnit):
14010 """Logical unit for creating node groups.
14013 HPATH = "group-add"
14014 HTYPE = constants.HTYPE_GROUP
14017 def ExpandNames(self):
14018 # We need the new group's UUID here so that we can create and acquire the
14019 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14020 # that it should not check whether the UUID exists in the configuration.
14021 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14022 self.needed_locks = {}
14023 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14025 def CheckPrereq(self):
14026 """Check prerequisites.
14028 This checks that the given group name is not an existing node group
14033 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14034 except errors.OpPrereqError:
14037 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14038 " node group (UUID: %s)" %
14039 (self.op.group_name, existing_uuid),
14040 errors.ECODE_EXISTS)
14042 if self.op.ndparams:
14043 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14045 if self.op.hv_state:
14046 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14048 self.new_hv_state = None
14050 if self.op.disk_state:
14051 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14053 self.new_disk_state = None
14055 if self.op.diskparams:
14056 for templ in constants.DISK_TEMPLATES:
14057 if templ in self.op.diskparams:
14058 utils.ForceDictType(self.op.diskparams[templ],
14059 constants.DISK_DT_TYPES)
14060 self.new_diskparams = self.op.diskparams
14062 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14063 except errors.OpPrereqError, err:
14064 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14065 errors.ECODE_INVAL)
14067 self.new_diskparams = {}
14069 if self.op.ipolicy:
14070 cluster = self.cfg.GetClusterInfo()
14071 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14073 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14074 except errors.ConfigurationError, err:
14075 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14076 errors.ECODE_INVAL)
14078 def BuildHooksEnv(self):
14079 """Build hooks env.
14083 "GROUP_NAME": self.op.group_name,
14086 def BuildHooksNodes(self):
14087 """Build hooks nodes.
14090 mn = self.cfg.GetMasterNode()
14091 return ([mn], [mn])
14093 def Exec(self, feedback_fn):
14094 """Add the node group to the cluster.
14097 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14098 uuid=self.group_uuid,
14099 alloc_policy=self.op.alloc_policy,
14100 ndparams=self.op.ndparams,
14101 diskparams=self.new_diskparams,
14102 ipolicy=self.op.ipolicy,
14103 hv_state_static=self.new_hv_state,
14104 disk_state_static=self.new_disk_state)
14106 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14107 del self.remove_locks[locking.LEVEL_NODEGROUP]
14110 class LUGroupAssignNodes(NoHooksLU):
14111 """Logical unit for assigning nodes to groups.
14116 def ExpandNames(self):
14117 # These raise errors.OpPrereqError on their own:
14118 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14119 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14121 # We want to lock all the affected nodes and groups. We have readily
14122 # available the list of nodes, and the *destination* group. To gather the
14123 # list of "source" groups, we need to fetch node information later on.
14124 self.needed_locks = {
14125 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14126 locking.LEVEL_NODE: self.op.nodes,
14129 def DeclareLocks(self, level):
14130 if level == locking.LEVEL_NODEGROUP:
14131 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14133 # Try to get all affected nodes' groups without having the group or node
14134 # lock yet. Needs verification later in the code flow.
14135 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14137 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14139 def CheckPrereq(self):
14140 """Check prerequisites.
14143 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14144 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14145 frozenset(self.op.nodes))
14147 expected_locks = (set([self.group_uuid]) |
14148 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14149 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14150 if actual_locks != expected_locks:
14151 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14152 " current groups are '%s', used to be '%s'" %
14153 (utils.CommaJoin(expected_locks),
14154 utils.CommaJoin(actual_locks)))
14156 self.node_data = self.cfg.GetAllNodesInfo()
14157 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14158 instance_data = self.cfg.GetAllInstancesInfo()
14160 if self.group is None:
14161 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14162 (self.op.group_name, self.group_uuid))
14164 (new_splits, previous_splits) = \
14165 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14166 for node in self.op.nodes],
14167 self.node_data, instance_data)
14170 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14172 if not self.op.force:
14173 raise errors.OpExecError("The following instances get split by this"
14174 " change and --force was not given: %s" %
14177 self.LogWarning("This operation will split the following instances: %s",
14180 if previous_splits:
14181 self.LogWarning("In addition, these already-split instances continue"
14182 " to be split across groups: %s",
14183 utils.CommaJoin(utils.NiceSort(previous_splits)))
14185 def Exec(self, feedback_fn):
14186 """Assign nodes to a new group.
14189 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14191 self.cfg.AssignGroupNodes(mods)
14194 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14195 """Check for split instances after a node assignment.
14197 This method considers a series of node assignments as an atomic operation,
14198 and returns information about split instances after applying the set of
14201 In particular, it returns information about newly split instances, and
14202 instances that were already split, and remain so after the change.
14204 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14207 @type changes: list of (node_name, new_group_uuid) pairs.
14208 @param changes: list of node assignments to consider.
14209 @param node_data: a dict with data for all nodes
14210 @param instance_data: a dict with all instances to consider
14211 @rtype: a two-tuple
14212 @return: a list of instances that were previously okay and result split as a
14213 consequence of this change, and a list of instances that were previously
14214 split and this change does not fix.
14217 changed_nodes = dict((node, group) for node, group in changes
14218 if node_data[node].group != group)
14220 all_split_instances = set()
14221 previously_split_instances = set()
14223 def InstanceNodes(instance):
14224 return [instance.primary_node] + list(instance.secondary_nodes)
14226 for inst in instance_data.values():
14227 if inst.disk_template not in constants.DTS_INT_MIRROR:
14230 instance_nodes = InstanceNodes(inst)
14232 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14233 previously_split_instances.add(inst.name)
14235 if len(set(changed_nodes.get(node, node_data[node].group)
14236 for node in instance_nodes)) > 1:
14237 all_split_instances.add(inst.name)
14239 return (list(all_split_instances - previously_split_instances),
14240 list(previously_split_instances & all_split_instances))
14243 class _GroupQuery(_QueryBase):
14244 FIELDS = query.GROUP_FIELDS
14246 def ExpandNames(self, lu):
14247 lu.needed_locks = {}
14249 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14250 self._cluster = lu.cfg.GetClusterInfo()
14251 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14254 self.wanted = [name_to_uuid[name]
14255 for name in utils.NiceSort(name_to_uuid.keys())]
14257 # Accept names to be either names or UUIDs.
14260 all_uuid = frozenset(self._all_groups.keys())
14262 for name in self.names:
14263 if name in all_uuid:
14264 self.wanted.append(name)
14265 elif name in name_to_uuid:
14266 self.wanted.append(name_to_uuid[name])
14268 missing.append(name)
14271 raise errors.OpPrereqError("Some groups do not exist: %s" %
14272 utils.CommaJoin(missing),
14273 errors.ECODE_NOENT)
14275 def DeclareLocks(self, lu, level):
14278 def _GetQueryData(self, lu):
14279 """Computes the list of node groups and their attributes.
14282 do_nodes = query.GQ_NODE in self.requested_data
14283 do_instances = query.GQ_INST in self.requested_data
14285 group_to_nodes = None
14286 group_to_instances = None
14288 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14289 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14290 # latter GetAllInstancesInfo() is not enough, for we have to go through
14291 # instance->node. Hence, we will need to process nodes even if we only need
14292 # instance information.
14293 if do_nodes or do_instances:
14294 all_nodes = lu.cfg.GetAllNodesInfo()
14295 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14298 for node in all_nodes.values():
14299 if node.group in group_to_nodes:
14300 group_to_nodes[node.group].append(node.name)
14301 node_to_group[node.name] = node.group
14304 all_instances = lu.cfg.GetAllInstancesInfo()
14305 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14307 for instance in all_instances.values():
14308 node = instance.primary_node
14309 if node in node_to_group:
14310 group_to_instances[node_to_group[node]].append(instance.name)
14313 # Do not pass on node information if it was not requested.
14314 group_to_nodes = None
14316 return query.GroupQueryData(self._cluster,
14317 [self._all_groups[uuid]
14318 for uuid in self.wanted],
14319 group_to_nodes, group_to_instances,
14320 query.GQ_DISKPARAMS in self.requested_data)
14323 class LUGroupQuery(NoHooksLU):
14324 """Logical unit for querying node groups.
14329 def CheckArguments(self):
14330 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14331 self.op.output_fields, False)
14333 def ExpandNames(self):
14334 self.gq.ExpandNames(self)
14336 def DeclareLocks(self, level):
14337 self.gq.DeclareLocks(self, level)
14339 def Exec(self, feedback_fn):
14340 return self.gq.OldStyleQuery(self)
14343 class LUGroupSetParams(LogicalUnit):
14344 """Modifies the parameters of a node group.
14347 HPATH = "group-modify"
14348 HTYPE = constants.HTYPE_GROUP
14351 def CheckArguments(self):
14354 self.op.diskparams,
14355 self.op.alloc_policy,
14357 self.op.disk_state,
14361 if all_changes.count(None) == len(all_changes):
14362 raise errors.OpPrereqError("Please pass at least one modification",
14363 errors.ECODE_INVAL)
14365 def ExpandNames(self):
14366 # This raises errors.OpPrereqError on its own:
14367 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14369 self.needed_locks = {
14370 locking.LEVEL_INSTANCE: [],
14371 locking.LEVEL_NODEGROUP: [self.group_uuid],
14374 self.share_locks[locking.LEVEL_INSTANCE] = 1
14376 def DeclareLocks(self, level):
14377 if level == locking.LEVEL_INSTANCE:
14378 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14380 # Lock instances optimistically, needs verification once group lock has
14382 self.needed_locks[locking.LEVEL_INSTANCE] = \
14383 self.cfg.GetNodeGroupInstances(self.group_uuid)
14386 def _UpdateAndVerifyDiskParams(old, new):
14387 """Updates and verifies disk parameters.
14390 new_params = _GetUpdatedParams(old, new)
14391 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14394 def CheckPrereq(self):
14395 """Check prerequisites.
14398 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14400 # Check if locked instances are still correct
14401 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14403 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14404 cluster = self.cfg.GetClusterInfo()
14406 if self.group is None:
14407 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14408 (self.op.group_name, self.group_uuid))
14410 if self.op.ndparams:
14411 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14412 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14413 self.new_ndparams = new_ndparams
14415 if self.op.diskparams:
14416 diskparams = self.group.diskparams
14417 uavdp = self._UpdateAndVerifyDiskParams
14418 # For each disktemplate subdict update and verify the values
14419 new_diskparams = dict((dt,
14420 uavdp(diskparams.get(dt, {}),
14421 self.op.diskparams[dt]))
14422 for dt in constants.DISK_TEMPLATES
14423 if dt in self.op.diskparams)
14424 # As we've all subdicts of diskparams ready, lets merge the actual
14425 # dict with all updated subdicts
14426 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14428 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14429 except errors.OpPrereqError, err:
14430 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14431 errors.ECODE_INVAL)
14433 if self.op.hv_state:
14434 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14435 self.group.hv_state_static)
14437 if self.op.disk_state:
14438 self.new_disk_state = \
14439 _MergeAndVerifyDiskState(self.op.disk_state,
14440 self.group.disk_state_static)
14442 if self.op.ipolicy:
14443 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14447 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14448 inst_filter = lambda inst: inst.name in owned_instances
14449 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14450 gmi = ganeti.masterd.instance
14452 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14454 new_ipolicy, instances)
14457 self.LogWarning("After the ipolicy change the following instances"
14458 " violate them: %s",
14459 utils.CommaJoin(violations))
14461 def BuildHooksEnv(self):
14462 """Build hooks env.
14466 "GROUP_NAME": self.op.group_name,
14467 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14470 def BuildHooksNodes(self):
14471 """Build hooks nodes.
14474 mn = self.cfg.GetMasterNode()
14475 return ([mn], [mn])
14477 def Exec(self, feedback_fn):
14478 """Modifies the node group.
14483 if self.op.ndparams:
14484 self.group.ndparams = self.new_ndparams
14485 result.append(("ndparams", str(self.group.ndparams)))
14487 if self.op.diskparams:
14488 self.group.diskparams = self.new_diskparams
14489 result.append(("diskparams", str(self.group.diskparams)))
14491 if self.op.alloc_policy:
14492 self.group.alloc_policy = self.op.alloc_policy
14494 if self.op.hv_state:
14495 self.group.hv_state_static = self.new_hv_state
14497 if self.op.disk_state:
14498 self.group.disk_state_static = self.new_disk_state
14500 if self.op.ipolicy:
14501 self.group.ipolicy = self.new_ipolicy
14503 self.cfg.Update(self.group, feedback_fn)
14507 class LUGroupRemove(LogicalUnit):
14508 HPATH = "group-remove"
14509 HTYPE = constants.HTYPE_GROUP
14512 def ExpandNames(self):
14513 # This will raises errors.OpPrereqError on its own:
14514 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14515 self.needed_locks = {
14516 locking.LEVEL_NODEGROUP: [self.group_uuid],
14519 def CheckPrereq(self):
14520 """Check prerequisites.
14522 This checks that the given group name exists as a node group, that is
14523 empty (i.e., contains no nodes), and that is not the last group of the
14527 # Verify that the group is empty.
14528 group_nodes = [node.name
14529 for node in self.cfg.GetAllNodesInfo().values()
14530 if node.group == self.group_uuid]
14533 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14535 (self.op.group_name,
14536 utils.CommaJoin(utils.NiceSort(group_nodes))),
14537 errors.ECODE_STATE)
14539 # Verify the cluster would not be left group-less.
14540 if len(self.cfg.GetNodeGroupList()) == 1:
14541 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14542 " removed" % self.op.group_name,
14543 errors.ECODE_STATE)
14545 def BuildHooksEnv(self):
14546 """Build hooks env.
14550 "GROUP_NAME": self.op.group_name,
14553 def BuildHooksNodes(self):
14554 """Build hooks nodes.
14557 mn = self.cfg.GetMasterNode()
14558 return ([mn], [mn])
14560 def Exec(self, feedback_fn):
14561 """Remove the node group.
14565 self.cfg.RemoveNodeGroup(self.group_uuid)
14566 except errors.ConfigurationError:
14567 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14568 (self.op.group_name, self.group_uuid))
14570 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14573 class LUGroupRename(LogicalUnit):
14574 HPATH = "group-rename"
14575 HTYPE = constants.HTYPE_GROUP
14578 def ExpandNames(self):
14579 # This raises errors.OpPrereqError on its own:
14580 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14582 self.needed_locks = {
14583 locking.LEVEL_NODEGROUP: [self.group_uuid],
14586 def CheckPrereq(self):
14587 """Check prerequisites.
14589 Ensures requested new name is not yet used.
14593 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14594 except errors.OpPrereqError:
14597 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14598 " node group (UUID: %s)" %
14599 (self.op.new_name, new_name_uuid),
14600 errors.ECODE_EXISTS)
14602 def BuildHooksEnv(self):
14603 """Build hooks env.
14607 "OLD_NAME": self.op.group_name,
14608 "NEW_NAME": self.op.new_name,
14611 def BuildHooksNodes(self):
14612 """Build hooks nodes.
14615 mn = self.cfg.GetMasterNode()
14617 all_nodes = self.cfg.GetAllNodesInfo()
14618 all_nodes.pop(mn, None)
14621 run_nodes.extend(node.name for node in all_nodes.values()
14622 if node.group == self.group_uuid)
14624 return (run_nodes, run_nodes)
14626 def Exec(self, feedback_fn):
14627 """Rename the node group.
14630 group = self.cfg.GetNodeGroup(self.group_uuid)
14633 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14634 (self.op.group_name, self.group_uuid))
14636 group.name = self.op.new_name
14637 self.cfg.Update(group, feedback_fn)
14639 return self.op.new_name
14642 class LUGroupEvacuate(LogicalUnit):
14643 HPATH = "group-evacuate"
14644 HTYPE = constants.HTYPE_GROUP
14647 def ExpandNames(self):
14648 # This raises errors.OpPrereqError on its own:
14649 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14651 if self.op.target_groups:
14652 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14653 self.op.target_groups)
14655 self.req_target_uuids = []
14657 if self.group_uuid in self.req_target_uuids:
14658 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14659 " as a target group (targets are %s)" %
14661 utils.CommaJoin(self.req_target_uuids)),
14662 errors.ECODE_INVAL)
14664 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14666 self.share_locks = _ShareAll()
14667 self.needed_locks = {
14668 locking.LEVEL_INSTANCE: [],
14669 locking.LEVEL_NODEGROUP: [],
14670 locking.LEVEL_NODE: [],
14673 def DeclareLocks(self, level):
14674 if level == locking.LEVEL_INSTANCE:
14675 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14677 # Lock instances optimistically, needs verification once node and group
14678 # locks have been acquired
14679 self.needed_locks[locking.LEVEL_INSTANCE] = \
14680 self.cfg.GetNodeGroupInstances(self.group_uuid)
14682 elif level == locking.LEVEL_NODEGROUP:
14683 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14685 if self.req_target_uuids:
14686 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14688 # Lock all groups used by instances optimistically; this requires going
14689 # via the node before it's locked, requiring verification later on
14690 lock_groups.update(group_uuid
14691 for instance_name in
14692 self.owned_locks(locking.LEVEL_INSTANCE)
14694 self.cfg.GetInstanceNodeGroups(instance_name))
14696 # No target groups, need to lock all of them
14697 lock_groups = locking.ALL_SET
14699 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14701 elif level == locking.LEVEL_NODE:
14702 # This will only lock the nodes in the group to be evacuated which
14703 # contain actual instances
14704 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14705 self._LockInstancesNodes()
14707 # Lock all nodes in group to be evacuated and target groups
14708 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14709 assert self.group_uuid in owned_groups
14710 member_nodes = [node_name
14711 for group in owned_groups
14712 for node_name in self.cfg.GetNodeGroup(group).members]
14713 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14715 def CheckPrereq(self):
14716 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14717 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14718 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14720 assert owned_groups.issuperset(self.req_target_uuids)
14721 assert self.group_uuid in owned_groups
14723 # Check if locked instances are still correct
14724 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14726 # Get instance information
14727 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14729 # Check if node groups for locked instances are still correct
14730 _CheckInstancesNodeGroups(self.cfg, self.instances,
14731 owned_groups, owned_nodes, self.group_uuid)
14733 if self.req_target_uuids:
14734 # User requested specific target groups
14735 self.target_uuids = self.req_target_uuids
14737 # All groups except the one to be evacuated are potential targets
14738 self.target_uuids = [group_uuid for group_uuid in owned_groups
14739 if group_uuid != self.group_uuid]
14741 if not self.target_uuids:
14742 raise errors.OpPrereqError("There are no possible target groups",
14743 errors.ECODE_INVAL)
14745 def BuildHooksEnv(self):
14746 """Build hooks env.
14750 "GROUP_NAME": self.op.group_name,
14751 "TARGET_GROUPS": " ".join(self.target_uuids),
14754 def BuildHooksNodes(self):
14755 """Build hooks nodes.
14758 mn = self.cfg.GetMasterNode()
14760 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14762 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14764 return (run_nodes, run_nodes)
14766 def Exec(self, feedback_fn):
14767 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14769 assert self.group_uuid not in self.target_uuids
14771 req = iallocator.IAReqGroupChange(instances=instances,
14772 target_groups=self.target_uuids)
14773 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14775 ial.Run(self.op.iallocator)
14777 if not ial.success:
14778 raise errors.OpPrereqError("Can't compute group evacuation using"
14779 " iallocator '%s': %s" %
14780 (self.op.iallocator, ial.info),
14781 errors.ECODE_NORES)
14783 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14785 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14786 len(jobs), self.op.group_name)
14788 return ResultWithJobs(jobs)
14791 class TagsLU(NoHooksLU): # pylint: disable=W0223
14792 """Generic tags LU.
14794 This is an abstract class which is the parent of all the other tags LUs.
14797 def ExpandNames(self):
14798 self.group_uuid = None
14799 self.needed_locks = {}
14801 if self.op.kind == constants.TAG_NODE:
14802 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14803 lock_level = locking.LEVEL_NODE
14804 lock_name = self.op.name
14805 elif self.op.kind == constants.TAG_INSTANCE:
14806 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14807 lock_level = locking.LEVEL_INSTANCE
14808 lock_name = self.op.name
14809 elif self.op.kind == constants.TAG_NODEGROUP:
14810 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14811 lock_level = locking.LEVEL_NODEGROUP
14812 lock_name = self.group_uuid
14817 if lock_level and getattr(self.op, "use_locking", True):
14818 self.needed_locks[lock_level] = lock_name
14820 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14821 # not possible to acquire the BGL based on opcode parameters)
14823 def CheckPrereq(self):
14824 """Check prerequisites.
14827 if self.op.kind == constants.TAG_CLUSTER:
14828 self.target = self.cfg.GetClusterInfo()
14829 elif self.op.kind == constants.TAG_NODE:
14830 self.target = self.cfg.GetNodeInfo(self.op.name)
14831 elif self.op.kind == constants.TAG_INSTANCE:
14832 self.target = self.cfg.GetInstanceInfo(self.op.name)
14833 elif self.op.kind == constants.TAG_NODEGROUP:
14834 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14836 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14837 str(self.op.kind), errors.ECODE_INVAL)
14840 class LUTagsGet(TagsLU):
14841 """Returns the tags of a given object.
14846 def ExpandNames(self):
14847 TagsLU.ExpandNames(self)
14849 # Share locks as this is only a read operation
14850 self.share_locks = _ShareAll()
14852 def Exec(self, feedback_fn):
14853 """Returns the tag list.
14856 return list(self.target.GetTags())
14859 class LUTagsSearch(NoHooksLU):
14860 """Searches the tags for a given pattern.
14865 def ExpandNames(self):
14866 self.needed_locks = {}
14868 def CheckPrereq(self):
14869 """Check prerequisites.
14871 This checks the pattern passed for validity by compiling it.
14875 self.re = re.compile(self.op.pattern)
14876 except re.error, err:
14877 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14878 (self.op.pattern, err), errors.ECODE_INVAL)
14880 def Exec(self, feedback_fn):
14881 """Returns the tag list.
14885 tgts = [("/cluster", cfg.GetClusterInfo())]
14886 ilist = cfg.GetAllInstancesInfo().values()
14887 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14888 nlist = cfg.GetAllNodesInfo().values()
14889 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14890 tgts.extend(("/nodegroup/%s" % n.name, n)
14891 for n in cfg.GetAllNodeGroupsInfo().values())
14893 for path, target in tgts:
14894 for tag in target.GetTags():
14895 if self.re.search(tag):
14896 results.append((path, tag))
14900 class LUTagsSet(TagsLU):
14901 """Sets a tag on a given object.
14906 def CheckPrereq(self):
14907 """Check prerequisites.
14909 This checks the type and length of the tag name and value.
14912 TagsLU.CheckPrereq(self)
14913 for tag in self.op.tags:
14914 objects.TaggableObject.ValidateTag(tag)
14916 def Exec(self, feedback_fn):
14921 for tag in self.op.tags:
14922 self.target.AddTag(tag)
14923 except errors.TagError, err:
14924 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14925 self.cfg.Update(self.target, feedback_fn)
14928 class LUTagsDel(TagsLU):
14929 """Delete a list of tags from a given object.
14934 def CheckPrereq(self):
14935 """Check prerequisites.
14937 This checks that we have the given tag.
14940 TagsLU.CheckPrereq(self)
14941 for tag in self.op.tags:
14942 objects.TaggableObject.ValidateTag(tag)
14943 del_tags = frozenset(self.op.tags)
14944 cur_tags = self.target.GetTags()
14946 diff_tags = del_tags - cur_tags
14948 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14949 raise errors.OpPrereqError("Tag(s) %s not found" %
14950 (utils.CommaJoin(diff_names), ),
14951 errors.ECODE_NOENT)
14953 def Exec(self, feedback_fn):
14954 """Remove the tag from the object.
14957 for tag in self.op.tags:
14958 self.target.RemoveTag(tag)
14959 self.cfg.Update(self.target, feedback_fn)
14962 class LUTestDelay(NoHooksLU):
14963 """Sleep for a specified amount of time.
14965 This LU sleeps on the master and/or nodes for a specified amount of
14971 def ExpandNames(self):
14972 """Expand names and set required locks.
14974 This expands the node list, if any.
14977 self.needed_locks = {}
14978 if self.op.on_nodes:
14979 # _GetWantedNodes can be used here, but is not always appropriate to use
14980 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14981 # more information.
14982 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14983 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14985 def _TestDelay(self):
14986 """Do the actual sleep.
14989 if self.op.on_master:
14990 if not utils.TestDelay(self.op.duration):
14991 raise errors.OpExecError("Error during master delay test")
14992 if self.op.on_nodes:
14993 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14994 for node, node_result in result.items():
14995 node_result.Raise("Failure during rpc call to node %s" % node)
14997 def Exec(self, feedback_fn):
14998 """Execute the test delay opcode, with the wanted repetitions.
15001 if self.op.repeat == 0:
15004 top_value = self.op.repeat - 1
15005 for i in range(self.op.repeat):
15006 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15010 class LURestrictedCommand(NoHooksLU):
15011 """Logical unit for executing restricted commands.
15016 def ExpandNames(self):
15018 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15020 self.needed_locks = {
15021 locking.LEVEL_NODE: self.op.nodes,
15023 self.share_locks = {
15024 locking.LEVEL_NODE: not self.op.use_locking,
15027 def CheckPrereq(self):
15028 """Check prerequisites.
15032 def Exec(self, feedback_fn):
15033 """Execute restricted command and return output.
15036 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15038 # Check if correct locks are held
15039 assert set(self.op.nodes).issubset(owned_nodes)
15041 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15045 for node_name in self.op.nodes:
15046 nres = rpcres[node_name]
15048 msg = ("Command '%s' on node '%s' failed: %s" %
15049 (self.op.command, node_name, nres.fail_msg))
15050 result.append((False, msg))
15052 result.append((True, nres.payload))
15057 class LUTestJqueue(NoHooksLU):
15058 """Utility LU to test some aspects of the job queue.
15063 # Must be lower than default timeout for WaitForJobChange to see whether it
15064 # notices changed jobs
15065 _CLIENT_CONNECT_TIMEOUT = 20.0
15066 _CLIENT_CONFIRM_TIMEOUT = 60.0
15069 def _NotifyUsingSocket(cls, cb, errcls):
15070 """Opens a Unix socket and waits for another program to connect.
15073 @param cb: Callback to send socket name to client
15074 @type errcls: class
15075 @param errcls: Exception class to use for errors
15078 # Using a temporary directory as there's no easy way to create temporary
15079 # sockets without writing a custom loop around tempfile.mktemp and
15081 tmpdir = tempfile.mkdtemp()
15083 tmpsock = utils.PathJoin(tmpdir, "sock")
15085 logging.debug("Creating temporary socket at %s", tmpsock)
15086 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15091 # Send details to client
15094 # Wait for client to connect before continuing
15095 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15097 (conn, _) = sock.accept()
15098 except socket.error, err:
15099 raise errcls("Client didn't connect in time (%s)" % err)
15103 # Remove as soon as client is connected
15104 shutil.rmtree(tmpdir)
15106 # Wait for client to close
15109 # pylint: disable=E1101
15110 # Instance of '_socketobject' has no ... member
15111 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15113 except socket.error, err:
15114 raise errcls("Client failed to confirm notification (%s)" % err)
15118 def _SendNotification(self, test, arg, sockname):
15119 """Sends a notification to the client.
15122 @param test: Test name
15123 @param arg: Test argument (depends on test)
15124 @type sockname: string
15125 @param sockname: Socket path
15128 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15130 def _Notify(self, prereq, test, arg):
15131 """Notifies the client of a test.
15134 @param prereq: Whether this is a prereq-phase test
15136 @param test: Test name
15137 @param arg: Test argument (depends on test)
15141 errcls = errors.OpPrereqError
15143 errcls = errors.OpExecError
15145 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15149 def CheckArguments(self):
15150 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15151 self.expandnames_calls = 0
15153 def ExpandNames(self):
15154 checkargs_calls = getattr(self, "checkargs_calls", 0)
15155 if checkargs_calls < 1:
15156 raise errors.ProgrammerError("CheckArguments was not called")
15158 self.expandnames_calls += 1
15160 if self.op.notify_waitlock:
15161 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15163 self.LogInfo("Expanding names")
15165 # Get lock on master node (just to get a lock, not for a particular reason)
15166 self.needed_locks = {
15167 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15170 def Exec(self, feedback_fn):
15171 if self.expandnames_calls < 1:
15172 raise errors.ProgrammerError("ExpandNames was not called")
15174 if self.op.notify_exec:
15175 self._Notify(False, constants.JQT_EXEC, None)
15177 self.LogInfo("Executing")
15179 if self.op.log_messages:
15180 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15181 for idx, msg in enumerate(self.op.log_messages):
15182 self.LogInfo("Sending log message %s", idx + 1)
15183 feedback_fn(constants.JQT_MSGPREFIX + msg)
15184 # Report how many test messages have been sent
15185 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15188 raise errors.OpExecError("Opcode failure was requested")
15193 class LUTestAllocator(NoHooksLU):
15194 """Run allocator tests.
15196 This LU runs the allocator tests
15199 def CheckPrereq(self):
15200 """Check prerequisites.
15202 This checks the opcode parameters depending on the director and mode test.
15205 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15206 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15207 for attr in ["memory", "disks", "disk_template",
15208 "os", "tags", "nics", "vcpus"]:
15209 if not hasattr(self.op, attr):
15210 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15211 attr, errors.ECODE_INVAL)
15212 iname = self.cfg.ExpandInstanceName(self.op.name)
15213 if iname is not None:
15214 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15215 iname, errors.ECODE_EXISTS)
15216 if not isinstance(self.op.nics, list):
15217 raise errors.OpPrereqError("Invalid parameter 'nics'",
15218 errors.ECODE_INVAL)
15219 if not isinstance(self.op.disks, list):
15220 raise errors.OpPrereqError("Invalid parameter 'disks'",
15221 errors.ECODE_INVAL)
15222 for row in self.op.disks:
15223 if (not isinstance(row, dict) or
15224 constants.IDISK_SIZE not in row or
15225 not isinstance(row[constants.IDISK_SIZE], int) or
15226 constants.IDISK_MODE not in row or
15227 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15228 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15229 " parameter", errors.ECODE_INVAL)
15230 if self.op.hypervisor is None:
15231 self.op.hypervisor = self.cfg.GetHypervisorType()
15232 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15233 fname = _ExpandInstanceName(self.cfg, self.op.name)
15234 self.op.name = fname
15235 self.relocate_from = \
15236 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15237 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15238 constants.IALLOCATOR_MODE_NODE_EVAC):
15239 if not self.op.instances:
15240 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15241 self.op.instances = _GetWantedInstances(self, self.op.instances)
15243 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15244 self.op.mode, errors.ECODE_INVAL)
15246 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15247 if self.op.allocator is None:
15248 raise errors.OpPrereqError("Missing allocator name",
15249 errors.ECODE_INVAL)
15250 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15251 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15252 self.op.direction, errors.ECODE_INVAL)
15254 def Exec(self, feedback_fn):
15255 """Run the allocator test.
15258 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15259 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15260 memory=self.op.memory,
15261 disks=self.op.disks,
15262 disk_template=self.op.disk_template,
15266 vcpus=self.op.vcpus,
15267 spindle_use=self.op.spindle_use,
15268 hypervisor=self.op.hypervisor)
15269 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15270 req = iallocator.IAReqRelocate(name=self.op.name,
15271 relocate_from=list(self.relocate_from))
15272 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15273 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15274 target_groups=self.op.target_groups)
15275 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15276 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15277 evac_mode=self.op.evac_mode)
15278 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15279 disk_template = self.op.disk_template
15280 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15281 memory=self.op.memory,
15282 disks=self.op.disks,
15283 disk_template=disk_template,
15287 vcpus=self.op.vcpus,
15288 spindle_use=self.op.spindle_use,
15289 hypervisor=self.op.hypervisor)
15290 for idx in range(self.op.count)]
15291 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15293 raise errors.ProgrammerError("Uncatched mode %s in"
15294 " LUTestAllocator.Exec", self.op.mode)
15296 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15297 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15298 result = ial.in_text
15300 ial.Run(self.op.allocator, validate=False)
15301 result = ial.out_text
15305 #: Query type implementations
15307 constants.QR_CLUSTER: _ClusterQuery,
15308 constants.QR_INSTANCE: _InstanceQuery,
15309 constants.QR_NODE: _NodeQuery,
15310 constants.QR_GROUP: _GroupQuery,
15311 constants.QR_OS: _OsQuery,
15312 constants.QR_EXPORT: _ExportQuery,
15315 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15318 def _GetQueryImplementation(name):
15319 """Returns the implemtnation for a query type.
15321 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15325 return _QUERY_IMPL[name]
15327 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15328 errors.ECODE_INVAL)