4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti.masterd import iallocator
66 import ganeti.masterd.instance # pylint: disable=W0611
70 INSTANCE_DOWN = [constants.ADMINST_DOWN]
71 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
72 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
74 #: Instance status in which an instance can be marked as offline/online
75 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
76 constants.ADMINST_OFFLINE,
81 """Data container for LU results with jobs.
83 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
84 by L{mcpu._ProcessResult}. The latter will then submit the jobs
85 contained in the C{jobs} attribute and include the job IDs in the opcode
89 def __init__(self, jobs, **kwargs):
90 """Initializes this class.
92 Additional return values can be specified as keyword arguments.
94 @type jobs: list of lists of L{opcode.OpCode}
95 @param jobs: A list of lists of opcode objects
102 class LogicalUnit(object):
103 """Logical Unit base class.
105 Subclasses must follow these rules:
106 - implement ExpandNames
107 - implement CheckPrereq (except when tasklets are used)
108 - implement Exec (except when tasklets are used)
109 - implement BuildHooksEnv
110 - implement BuildHooksNodes
111 - redefine HPATH and HTYPE
112 - optionally redefine their run requirements:
113 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
115 Note that all commands require root permissions.
117 @ivar dry_run_result: the value (if any) that will be returned to the caller
118 in dry-run mode (signalled by opcode dry_run parameter)
125 def __init__(self, processor, op, context, rpc_runner):
126 """Constructor for LogicalUnit.
128 This needs to be overridden in derived classes in order to check op
132 self.proc = processor
134 self.cfg = context.cfg
135 self.glm = context.glm
137 self.owned_locks = context.glm.list_owned
138 self.context = context
139 self.rpc = rpc_runner
140 # Dicts used to declare locking needs to mcpu
141 self.needed_locks = None
142 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
144 self.remove_locks = {}
145 # Used to force good behavior when calling helper functions
146 self.recalculate_locks = {}
148 self.Log = processor.Log # pylint: disable=C0103
149 self.LogWarning = processor.LogWarning # pylint: disable=C0103
150 self.LogInfo = processor.LogInfo # pylint: disable=C0103
151 self.LogStep = processor.LogStep # pylint: disable=C0103
152 # support for dry-run
153 self.dry_run_result = None
154 # support for generic debug attribute
155 if (not hasattr(self.op, "debug_level") or
156 not isinstance(self.op.debug_level, int)):
157 self.op.debug_level = 0
162 # Validate opcode parameters and set defaults
163 self.op.Validate(True)
165 self.CheckArguments()
167 def CheckArguments(self):
168 """Check syntactic validity for the opcode arguments.
170 This method is for doing a simple syntactic check and ensure
171 validity of opcode parameters, without any cluster-related
172 checks. While the same can be accomplished in ExpandNames and/or
173 CheckPrereq, doing these separate is better because:
175 - ExpandNames is left as as purely a lock-related function
176 - CheckPrereq is run after we have acquired locks (and possible
179 The function is allowed to change the self.op attribute so that
180 later methods can no longer worry about missing parameters.
185 def ExpandNames(self):
186 """Expand names for this LU.
188 This method is called before starting to execute the opcode, and it should
189 update all the parameters of the opcode to their canonical form (e.g. a
190 short node name must be fully expanded after this method has successfully
191 completed). This way locking, hooks, logging, etc. can work correctly.
193 LUs which implement this method must also populate the self.needed_locks
194 member, as a dict with lock levels as keys, and a list of needed lock names
197 - use an empty dict if you don't need any lock
198 - if you don't need any lock at a particular level omit that
199 level (note that in this case C{DeclareLocks} won't be called
200 at all for that level)
201 - if you need locks at a level, but you can't calculate it in
202 this function, initialise that level with an empty list and do
203 further processing in L{LogicalUnit.DeclareLocks} (see that
204 function's docstring)
205 - don't put anything for the BGL level
206 - if you want all locks at a level use L{locking.ALL_SET} as a value
208 If you need to share locks (rather than acquire them exclusively) at one
209 level you can modify self.share_locks, setting a true value (usually 1) for
210 that level. By default locks are not shared.
212 This function can also define a list of tasklets, which then will be
213 executed in order instead of the usual LU-level CheckPrereq and Exec
214 functions, if those are not defined by the LU.
218 # Acquire all nodes and one instance
219 self.needed_locks = {
220 locking.LEVEL_NODE: locking.ALL_SET,
221 locking.LEVEL_INSTANCE: ['instance1.example.com'],
223 # Acquire just two nodes
224 self.needed_locks = {
225 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
228 self.needed_locks = {} # No, you can't leave it to the default value None
231 # The implementation of this method is mandatory only if the new LU is
232 # concurrent, so that old LUs don't need to be changed all at the same
235 self.needed_locks = {} # Exclusive LUs don't need locks.
237 raise NotImplementedError
239 def DeclareLocks(self, level):
240 """Declare LU locking needs for a level
242 While most LUs can just declare their locking needs at ExpandNames time,
243 sometimes there's the need to calculate some locks after having acquired
244 the ones before. This function is called just before acquiring locks at a
245 particular level, but after acquiring the ones at lower levels, and permits
246 such calculations. It can be used to modify self.needed_locks, and by
247 default it does nothing.
249 This function is only called if you have something already set in
250 self.needed_locks for the level.
252 @param level: Locking level which is going to be locked
253 @type level: member of L{ganeti.locking.LEVELS}
257 def CheckPrereq(self):
258 """Check prerequisites for this LU.
260 This method should check that the prerequisites for the execution
261 of this LU are fulfilled. It can do internode communication, but
262 it should be idempotent - no cluster or system changes are
265 The method should raise errors.OpPrereqError in case something is
266 not fulfilled. Its return value is ignored.
268 This method should also update all the parameters of the opcode to
269 their canonical form if it hasn't been done by ExpandNames before.
272 if self.tasklets is not None:
273 for (idx, tl) in enumerate(self.tasklets):
274 logging.debug("Checking prerequisites for tasklet %s/%s",
275 idx + 1, len(self.tasklets))
280 def Exec(self, feedback_fn):
283 This method should implement the actual work. It should raise
284 errors.OpExecError for failures that are somewhat dealt with in
288 if self.tasklets is not None:
289 for (idx, tl) in enumerate(self.tasklets):
290 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
293 raise NotImplementedError
295 def BuildHooksEnv(self):
296 """Build hooks environment for this LU.
299 @return: Dictionary containing the environment that will be used for
300 running the hooks for this LU. The keys of the dict must not be prefixed
301 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
302 will extend the environment with additional variables. If no environment
303 should be defined, an empty dictionary should be returned (not C{None}).
304 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
308 raise NotImplementedError
310 def BuildHooksNodes(self):
311 """Build list of nodes to run LU's hooks.
313 @rtype: tuple; (list, list)
314 @return: Tuple containing a list of node names on which the hook
315 should run before the execution and a list of node names on which the
316 hook should run after the execution. No nodes should be returned as an
317 empty list (and not None).
318 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
322 raise NotImplementedError
324 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
325 """Notify the LU about the results of its hooks.
327 This method is called every time a hooks phase is executed, and notifies
328 the Logical Unit about the hooks' result. The LU can then use it to alter
329 its result based on the hooks. By default the method does nothing and the
330 previous result is passed back unchanged but any LU can define it if it
331 wants to use the local cluster hook-scripts somehow.
333 @param phase: one of L{constants.HOOKS_PHASE_POST} or
334 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
335 @param hook_results: the results of the multi-node hooks rpc call
336 @param feedback_fn: function used send feedback back to the caller
337 @param lu_result: the previous Exec result this LU had, or None
339 @return: the new Exec result, based on the previous result
343 # API must be kept, thus we ignore the unused argument and could
344 # be a function warnings
345 # pylint: disable=W0613,R0201
348 def _ExpandAndLockInstance(self):
349 """Helper function to expand and lock an instance.
351 Many LUs that work on an instance take its name in self.op.instance_name
352 and need to expand it and then declare the expanded name for locking. This
353 function does it, and then updates self.op.instance_name to the expanded
354 name. It also initializes needed_locks as a dict, if this hasn't been done
358 if self.needed_locks is None:
359 self.needed_locks = {}
361 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
362 "_ExpandAndLockInstance called with instance-level locks set"
363 self.op.instance_name = _ExpandInstanceName(self.cfg,
364 self.op.instance_name)
365 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
367 def _LockInstancesNodes(self, primary_only=False,
368 level=locking.LEVEL_NODE):
369 """Helper function to declare instances' nodes for locking.
371 This function should be called after locking one or more instances to lock
372 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
373 with all primary or secondary nodes for instances already locked and
374 present in self.needed_locks[locking.LEVEL_INSTANCE].
376 It should be called from DeclareLocks, and for safety only works if
377 self.recalculate_locks[locking.LEVEL_NODE] is set.
379 In the future it may grow parameters to just lock some instance's nodes, or
380 to just lock primaries or secondary nodes, if needed.
382 If should be called in DeclareLocks in a way similar to::
384 if level == locking.LEVEL_NODE:
385 self._LockInstancesNodes()
387 @type primary_only: boolean
388 @param primary_only: only lock primary nodes of locked instances
389 @param level: Which lock level to use for locking nodes
392 assert level in self.recalculate_locks, \
393 "_LockInstancesNodes helper function called with no nodes to recalculate"
395 # TODO: check if we're really been called with the instance locks held
397 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
398 # future we might want to have different behaviors depending on the value
399 # of self.recalculate_locks[locking.LEVEL_NODE]
401 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
402 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
403 wanted_nodes.append(instance.primary_node)
405 wanted_nodes.extend(instance.secondary_nodes)
407 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
408 self.needed_locks[level] = wanted_nodes
409 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
410 self.needed_locks[level].extend(wanted_nodes)
412 raise errors.ProgrammerError("Unknown recalculation mode")
414 del self.recalculate_locks[level]
417 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
418 """Simple LU which runs no hooks.
420 This LU is intended as a parent for other LogicalUnits which will
421 run no hooks, in order to reduce duplicate code.
427 def BuildHooksEnv(self):
428 """Empty BuildHooksEnv for NoHooksLu.
430 This just raises an error.
433 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
435 def BuildHooksNodes(self):
436 """Empty BuildHooksNodes for NoHooksLU.
439 raise AssertionError("BuildHooksNodes called for NoHooksLU")
443 """Tasklet base class.
445 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
446 they can mix legacy code with tasklets. Locking needs to be done in the LU,
447 tasklets know nothing about locks.
449 Subclasses must follow these rules:
450 - Implement CheckPrereq
454 def __init__(self, lu):
461 def CheckPrereq(self):
462 """Check prerequisites for this tasklets.
464 This method should check whether the prerequisites for the execution of
465 this tasklet are fulfilled. It can do internode communication, but it
466 should be idempotent - no cluster or system changes are allowed.
468 The method should raise errors.OpPrereqError in case something is not
469 fulfilled. Its return value is ignored.
471 This method should also update all parameters to their canonical form if it
472 hasn't been done before.
477 def Exec(self, feedback_fn):
478 """Execute the tasklet.
480 This method should implement the actual work. It should raise
481 errors.OpExecError for failures that are somewhat dealt with in code, or
485 raise NotImplementedError
489 """Base for query utility classes.
492 #: Attribute holding field definitions
498 def __init__(self, qfilter, fields, use_locking):
499 """Initializes this class.
502 self.use_locking = use_locking
504 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
505 namefield=self.SORT_FIELD)
506 self.requested_data = self.query.RequestedData()
507 self.names = self.query.RequestedNames()
509 # Sort only if no names were requested
510 self.sort_by_name = not self.names
512 self.do_locking = None
515 def _GetNames(self, lu, all_names, lock_level):
516 """Helper function to determine names asked for in the query.
520 names = lu.owned_locks(lock_level)
524 if self.wanted == locking.ALL_SET:
525 assert not self.names
526 # caller didn't specify names, so ordering is not important
527 return utils.NiceSort(names)
529 # caller specified names and we must keep the same order
531 assert not self.do_locking or lu.glm.is_owned(lock_level)
533 missing = set(self.wanted).difference(names)
535 raise errors.OpExecError("Some items were removed before retrieving"
536 " their data: %s" % missing)
538 # Return expanded names
541 def ExpandNames(self, lu):
542 """Expand names for this query.
544 See L{LogicalUnit.ExpandNames}.
547 raise NotImplementedError()
549 def DeclareLocks(self, lu, level):
550 """Declare locks for this query.
552 See L{LogicalUnit.DeclareLocks}.
555 raise NotImplementedError()
557 def _GetQueryData(self, lu):
558 """Collects all data for this query.
560 @return: Query data object
563 raise NotImplementedError()
565 def NewStyleQuery(self, lu):
566 """Collect data and execute query.
569 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
570 sort_by_name=self.sort_by_name)
572 def OldStyleQuery(self, lu):
573 """Collect data and execute query.
576 return self.query.OldStyleQuery(self._GetQueryData(lu),
577 sort_by_name=self.sort_by_name)
581 """Returns a dict declaring all lock levels shared.
584 return dict.fromkeys(locking.LEVELS, 1)
587 def _AnnotateDiskParams(instance, devs, cfg):
588 """Little helper wrapper to the rpc annotation method.
590 @param instance: The instance object
591 @type devs: List of L{objects.Disk}
592 @param devs: The root devices (not any of its children!)
593 @param cfg: The config object
594 @returns The annotated disk copies
595 @see L{rpc.AnnotateDiskParams}
598 return rpc.AnnotateDiskParams(instance.disk_template, devs,
599 cfg.GetInstanceDiskParams(instance))
602 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
604 """Checks if node groups for locked instances are still correct.
606 @type cfg: L{config.ConfigWriter}
607 @param cfg: Cluster configuration
608 @type instances: dict; string as key, L{objects.Instance} as value
609 @param instances: Dictionary, instance name as key, instance object as value
610 @type owned_groups: iterable of string
611 @param owned_groups: List of owned groups
612 @type owned_nodes: iterable of string
613 @param owned_nodes: List of owned nodes
614 @type cur_group_uuid: string or None
615 @param cur_group_uuid: Optional group UUID to check against instance's groups
618 for (name, inst) in instances.items():
619 assert owned_nodes.issuperset(inst.all_nodes), \
620 "Instance %s's nodes changed while we kept the lock" % name
622 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
624 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
625 "Instance %s has no node in group %s" % (name, cur_group_uuid)
628 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
630 """Checks if the owned node groups are still correct for an instance.
632 @type cfg: L{config.ConfigWriter}
633 @param cfg: The cluster configuration
634 @type instance_name: string
635 @param instance_name: Instance name
636 @type owned_groups: set or frozenset
637 @param owned_groups: List of currently owned node groups
638 @type primary_only: boolean
639 @param primary_only: Whether to check node groups for only the primary node
642 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
644 if not owned_groups.issuperset(inst_groups):
645 raise errors.OpPrereqError("Instance %s's node groups changed since"
646 " locks were acquired, current groups are"
647 " are '%s', owning groups '%s'; retry the"
650 utils.CommaJoin(inst_groups),
651 utils.CommaJoin(owned_groups)),
657 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
658 """Checks if the instances in a node group are still correct.
660 @type cfg: L{config.ConfigWriter}
661 @param cfg: The cluster configuration
662 @type group_uuid: string
663 @param group_uuid: Node group UUID
664 @type owned_instances: set or frozenset
665 @param owned_instances: List of currently owned instances
668 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
669 if owned_instances != wanted_instances:
670 raise errors.OpPrereqError("Instances in node group '%s' changed since"
671 " locks were acquired, wanted '%s', have '%s';"
672 " retry the operation" %
674 utils.CommaJoin(wanted_instances),
675 utils.CommaJoin(owned_instances)),
678 return wanted_instances
681 def _SupportsOob(cfg, node):
682 """Tells if node supports OOB.
684 @type cfg: L{config.ConfigWriter}
685 @param cfg: The cluster configuration
686 @type node: L{objects.Node}
687 @param node: The node
688 @return: The OOB script if supported or an empty string otherwise
691 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
694 def _CopyLockList(names):
695 """Makes a copy of a list of lock names.
697 Handles L{locking.ALL_SET} correctly.
700 if names == locking.ALL_SET:
701 return locking.ALL_SET
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
960 except Exception, err: # pylint: disable=W0703
961 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
964 def _CheckOutputFields(static, dynamic, selected):
965 """Checks whether all selected fields are valid.
967 @type static: L{utils.FieldSet}
968 @param static: static fields set
969 @type dynamic: L{utils.FieldSet}
970 @param dynamic: dynamic fields set
977 delta = f.NonMatching(selected)
979 raise errors.OpPrereqError("Unknown output fields selected: %s"
980 % ",".join(delta), errors.ECODE_INVAL)
983 def _CheckGlobalHvParams(params):
984 """Validates that given hypervisor params are not global ones.
986 This will ensure that instances don't get customised versions of
990 used_globals = constants.HVC_GLOBALS.intersection(params)
992 msg = ("The following hypervisor parameters are global and cannot"
993 " be customized at instance level, please modify them at"
994 " cluster level: %s" % utils.CommaJoin(used_globals))
995 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
998 def _CheckNodeOnline(lu, node, msg=None):
999 """Ensure that a given node is online.
1001 @param lu: the LU on behalf of which we make the check
1002 @param node: the node to check
1003 @param msg: if passed, should be a message to replace the default one
1004 @raise errors.OpPrereqError: if the node is offline
1008 msg = "Can't use offline node"
1009 if lu.cfg.GetNodeInfo(node).offline:
1010 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1013 def _CheckNodeNotDrained(lu, node):
1014 """Ensure that a given node is not drained.
1016 @param lu: the LU on behalf of which we make the check
1017 @param node: the node to check
1018 @raise errors.OpPrereqError: if the node is drained
1021 if lu.cfg.GetNodeInfo(node).drained:
1022 raise errors.OpPrereqError("Can't use drained node %s" % node,
1026 def _CheckNodeVmCapable(lu, node):
1027 """Ensure that a given node is vm capable.
1029 @param lu: the LU on behalf of which we make the check
1030 @param node: the node to check
1031 @raise errors.OpPrereqError: if the node is not vm capable
1034 if not lu.cfg.GetNodeInfo(node).vm_capable:
1035 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040 """Ensure that a node supports a given OS.
1042 @param lu: the LU on behalf of which we make the check
1043 @param node: the node to check
1044 @param os_name: the OS to query about
1045 @param force_variant: whether to ignore variant errors
1046 @raise errors.OpPrereqError: if the node is not supporting the OS
1049 result = lu.rpc.call_os_get(node, os_name)
1050 result.Raise("OS '%s' not in supported OS list for node %s" %
1052 prereq=True, ecode=errors.ECODE_INVAL)
1053 if not force_variant:
1054 _CheckOSVariant(result.payload, os_name)
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058 """Ensure that a node has the given secondary ip.
1060 @type lu: L{LogicalUnit}
1061 @param lu: the LU on behalf of which we make the check
1063 @param node: the node to check
1064 @type secondary_ip: string
1065 @param secondary_ip: the ip to check
1066 @type prereq: boolean
1067 @param prereq: whether to throw a prerequisite or an execute error
1068 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1072 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073 result.Raise("Failure checking secondary ip on node %s" % node,
1074 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075 if not result.payload:
1076 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077 " please fix and re-run this command" % secondary_ip)
1079 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1081 raise errors.OpExecError(msg)
1084 def _GetClusterDomainSecret():
1085 """Reads the cluster domain secret.
1088 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093 """Ensure that an instance is in one of the required states.
1095 @param lu: the LU on behalf of which we make the check
1096 @param instance: the instance to check
1097 @param msg: if passed, should be a message to replace the default one
1098 @raise errors.OpPrereqError: if the instance is not in the required state
1102 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103 if instance.admin_state not in req_states:
1104 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105 (instance.name, instance.admin_state, msg),
1108 if constants.ADMINST_UP not in req_states:
1109 pnode = instance.primary_node
1110 if not lu.cfg.GetNodeInfo(pnode).offline:
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1114 if instance.name in ins_l.payload:
1115 raise errors.OpPrereqError("Instance %s is running, %s" %
1116 (instance.name, msg), errors.ECODE_STATE)
1118 lu.LogWarning("Primary node offline, ignoring check that instance"
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123 """Computes if value is in the desired range.
1125 @param name: name of the parameter for which we perform the check
1126 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1128 @param ipolicy: dictionary containing min, max and std values
1129 @param value: actual value that we want to use
1130 @return: None or element not meeting the criteria
1134 if value in [None, constants.VALUE_AUTO]:
1136 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138 if value > max_v or min_v > value:
1140 fqn = "%s/%s" % (name, qualifier)
1143 return ("%s value %s is not in range [%s, %s]" %
1144 (fqn, value, min_v, max_v))
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149 nic_count, disk_sizes, spindle_use,
1150 _compute_fn=_ComputeMinMaxSpec):
1151 """Verifies ipolicy against provided specs.
1154 @param ipolicy: The ipolicy
1156 @param mem_size: The memory size
1157 @type cpu_count: int
1158 @param cpu_count: Used cpu cores
1159 @type disk_count: int
1160 @param disk_count: Number of disks used
1161 @type nic_count: int
1162 @param nic_count: Number of nics used
1163 @type disk_sizes: list of ints
1164 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165 @type spindle_use: int
1166 @param spindle_use: The number of spindles this instance uses
1167 @param _compute_fn: The compute function (unittest only)
1168 @return: A list of violations, or an empty list of no violations are found
1171 assert disk_count == len(disk_sizes)
1174 (constants.ISPEC_MEM_SIZE, "", mem_size),
1175 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176 (constants.ISPEC_DISK_COUNT, "", disk_count),
1177 (constants.ISPEC_NIC_COUNT, "", nic_count),
1178 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180 for idx, d in enumerate(disk_sizes)]
1183 (_compute_fn(name, qualifier, ipolicy, value)
1184 for (name, qualifier, value) in test_settings))
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188 _compute_fn=_ComputeIPolicySpecViolation):
1189 """Compute if instance meets the specs of ipolicy.
1192 @param ipolicy: The ipolicy to verify against
1193 @type instance: L{objects.Instance}
1194 @param instance: The instance to verify
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202 disk_count = len(instance.disks)
1203 disk_sizes = [disk.size for disk in instance.disks]
1204 nic_count = len(instance.nics)
1206 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207 disk_sizes, spindle_use)
1210 def _ComputeIPolicyInstanceSpecViolation(
1211 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1212 """Compute if instance specs meets the specs of ipolicy.
1215 @param ipolicy: The ipolicy to verify against
1216 @param instance_spec: dict
1217 @param instance_spec: The instance spec to verify
1218 @param _compute_fn: The function to verify ipolicy (unittest only)
1219 @see: L{_ComputeIPolicySpecViolation}
1222 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1229 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230 disk_sizes, spindle_use)
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1235 _compute_fn=_ComputeIPolicyInstanceViolation):
1236 """Compute if instance meets the specs of the new target group.
1238 @param ipolicy: The ipolicy to verify
1239 @param instance: The instance object to verify
1240 @param current_group: The current group of the instance
1241 @param target_group: The new group of the instance
1242 @param _compute_fn: The function to verify ipolicy (unittest only)
1243 @see: L{_ComputeIPolicySpecViolation}
1246 if current_group == target_group:
1249 return _compute_fn(ipolicy, instance)
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253 _compute_fn=_ComputeIPolicyNodeViolation):
1254 """Checks that the target node is correct in terms of instance policy.
1256 @param ipolicy: The ipolicy to verify
1257 @param instance: The instance object to verify
1258 @param node: The new node to relocate
1259 @param ignore: Ignore violations of the ipolicy
1260 @param _compute_fn: The function to verify ipolicy (unittest only)
1261 @see: L{_ComputeIPolicySpecViolation}
1264 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1268 msg = ("Instance does not meet target node group's (%s) instance"
1269 " policy: %s") % (node.group, utils.CommaJoin(res))
1273 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277 """Computes a set of any instances that would violate the new ipolicy.
1279 @param old_ipolicy: The current (still in-place) ipolicy
1280 @param new_ipolicy: The new (to become) ipolicy
1281 @param instances: List of instances to verify
1282 @return: A list of instances which violates the new ipolicy but
1286 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287 _ComputeViolatingInstances(old_ipolicy, instances))
1290 def _ExpandItemName(fn, name, kind):
1291 """Expand an item name.
1293 @param fn: the function to use for expansion
1294 @param name: requested item name
1295 @param kind: text description ('Node' or 'Instance')
1296 @return: the resolved (full) name
1297 @raise errors.OpPrereqError: if the item is not found
1300 full_name = fn(name)
1301 if full_name is None:
1302 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1307 def _ExpandNodeName(cfg, name):
1308 """Wrapper over L{_ExpandItemName} for nodes."""
1309 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1312 def _ExpandInstanceName(cfg, name):
1313 """Wrapper over L{_ExpandItemName} for instance."""
1314 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1317 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1318 minmem, maxmem, vcpus, nics, disk_template, disks,
1319 bep, hvp, hypervisor_name, tags):
1320 """Builds instance related env variables for hooks
1322 This builds the hook environment from individual variables.
1325 @param name: the name of the instance
1326 @type primary_node: string
1327 @param primary_node: the name of the instance's primary node
1328 @type secondary_nodes: list
1329 @param secondary_nodes: list of secondary nodes as strings
1330 @type os_type: string
1331 @param os_type: the name of the instance's OS
1332 @type status: string
1333 @param status: the desired status of the instance
1334 @type minmem: string
1335 @param minmem: the minimum memory size of the instance
1336 @type maxmem: string
1337 @param maxmem: the maximum memory size of the instance
1339 @param vcpus: the count of VCPUs the instance has
1341 @param nics: list of tuples (ip, mac, mode, link) representing
1342 the NICs the instance has
1343 @type disk_template: string
1344 @param disk_template: the disk template of the instance
1346 @param disks: the list of (size, mode) pairs
1348 @param bep: the backend parameters for the instance
1350 @param hvp: the hypervisor parameters for the instance
1351 @type hypervisor_name: string
1352 @param hypervisor_name: the hypervisor for the instance
1354 @param tags: list of instance tags as strings
1356 @return: the hook environment for this instance
1361 "INSTANCE_NAME": name,
1362 "INSTANCE_PRIMARY": primary_node,
1363 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1364 "INSTANCE_OS_TYPE": os_type,
1365 "INSTANCE_STATUS": status,
1366 "INSTANCE_MINMEM": minmem,
1367 "INSTANCE_MAXMEM": maxmem,
1368 # TODO(2.7) remove deprecated "memory" value
1369 "INSTANCE_MEMORY": maxmem,
1370 "INSTANCE_VCPUS": vcpus,
1371 "INSTANCE_DISK_TEMPLATE": disk_template,
1372 "INSTANCE_HYPERVISOR": hypervisor_name,
1375 nic_count = len(nics)
1376 for idx, (ip, mac, mode, link) in enumerate(nics):
1379 env["INSTANCE_NIC%d_IP" % idx] = ip
1380 env["INSTANCE_NIC%d_MAC" % idx] = mac
1381 env["INSTANCE_NIC%d_MODE" % idx] = mode
1382 env["INSTANCE_NIC%d_LINK" % idx] = link
1383 if mode == constants.NIC_MODE_BRIDGED:
1384 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1388 env["INSTANCE_NIC_COUNT"] = nic_count
1391 disk_count = len(disks)
1392 for idx, (size, mode) in enumerate(disks):
1393 env["INSTANCE_DISK%d_SIZE" % idx] = size
1394 env["INSTANCE_DISK%d_MODE" % idx] = mode
1398 env["INSTANCE_DISK_COUNT"] = disk_count
1403 env["INSTANCE_TAGS"] = " ".join(tags)
1405 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1406 for key, value in source.items():
1407 env["INSTANCE_%s_%s" % (kind, key)] = value
1412 def _NICListToTuple(lu, nics):
1413 """Build a list of nic information tuples.
1415 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1416 value in LUInstanceQueryData.
1418 @type lu: L{LogicalUnit}
1419 @param lu: the logical unit on whose behalf we execute
1420 @type nics: list of L{objects.NIC}
1421 @param nics: list of nics to convert to hooks tuples
1425 cluster = lu.cfg.GetClusterInfo()
1429 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1430 mode = filled_params[constants.NIC_MODE]
1431 link = filled_params[constants.NIC_LINK]
1432 hooks_nics.append((ip, mac, mode, link))
1436 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1437 """Builds instance related env variables for hooks from an object.
1439 @type lu: L{LogicalUnit}
1440 @param lu: the logical unit on whose behalf we execute
1441 @type instance: L{objects.Instance}
1442 @param instance: the instance for which we should build the
1444 @type override: dict
1445 @param override: dictionary with key/values that will override
1448 @return: the hook environment dictionary
1451 cluster = lu.cfg.GetClusterInfo()
1452 bep = cluster.FillBE(instance)
1453 hvp = cluster.FillHV(instance)
1455 "name": instance.name,
1456 "primary_node": instance.primary_node,
1457 "secondary_nodes": instance.secondary_nodes,
1458 "os_type": instance.os,
1459 "status": instance.admin_state,
1460 "maxmem": bep[constants.BE_MAXMEM],
1461 "minmem": bep[constants.BE_MINMEM],
1462 "vcpus": bep[constants.BE_VCPUS],
1463 "nics": _NICListToTuple(lu, instance.nics),
1464 "disk_template": instance.disk_template,
1465 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1468 "hypervisor_name": instance.hypervisor,
1469 "tags": instance.tags,
1472 args.update(override)
1473 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1476 def _AdjustCandidatePool(lu, exceptions):
1477 """Adjust the candidate pool after node operations.
1480 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1482 lu.LogInfo("Promoted nodes to master candidate role: %s",
1483 utils.CommaJoin(node.name for node in mod_list))
1484 for name in mod_list:
1485 lu.context.ReaddNode(name)
1486 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1488 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1492 def _DecideSelfPromotion(lu, exceptions=None):
1493 """Decide whether I should promote myself as a master candidate.
1496 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1497 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1498 # the new node will increase mc_max with one, so:
1499 mc_should = min(mc_should + 1, cp_size)
1500 return mc_now < mc_should
1503 def _ComputeViolatingInstances(ipolicy, instances):
1504 """Computes a set of instances who violates given ipolicy.
1506 @param ipolicy: The ipolicy to verify
1507 @type instances: object.Instance
1508 @param instances: List of instances to verify
1509 @return: A frozenset of instance names violating the ipolicy
1512 return frozenset([inst.name for inst in instances
1513 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1516 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1517 """Check that the brigdes needed by a list of nics exist.
1520 cluster = lu.cfg.GetClusterInfo()
1521 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1522 brlist = [params[constants.NIC_LINK] for params in paramslist
1523 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1525 result = lu.rpc.call_bridges_exist(target_node, brlist)
1526 result.Raise("Error checking bridges on destination node '%s'" %
1527 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1530 def _CheckInstanceBridgesExist(lu, instance, node=None):
1531 """Check that the brigdes needed by an instance exist.
1535 node = instance.primary_node
1536 _CheckNicsBridgesExist(lu, instance.nics, node)
1539 def _CheckOSVariant(os_obj, name):
1540 """Check whether an OS name conforms to the os variants specification.
1542 @type os_obj: L{objects.OS}
1543 @param os_obj: OS object to check
1545 @param name: OS name passed by the user, to check for validity
1548 variant = objects.OS.GetVariant(name)
1549 if not os_obj.supported_variants:
1551 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1552 " passed)" % (os_obj.name, variant),
1556 raise errors.OpPrereqError("OS name must include a variant",
1559 if variant not in os_obj.supported_variants:
1560 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1563 def _GetNodeInstancesInner(cfg, fn):
1564 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1567 def _GetNodeInstances(cfg, node_name):
1568 """Returns a list of all primary and secondary instances on a node.
1572 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1575 def _GetNodePrimaryInstances(cfg, node_name):
1576 """Returns primary instances on a node.
1579 return _GetNodeInstancesInner(cfg,
1580 lambda inst: node_name == inst.primary_node)
1583 def _GetNodeSecondaryInstances(cfg, node_name):
1584 """Returns secondary instances on a node.
1587 return _GetNodeInstancesInner(cfg,
1588 lambda inst: node_name in inst.secondary_nodes)
1591 def _GetStorageTypeArgs(cfg, storage_type):
1592 """Returns the arguments for a storage type.
1595 # Special case for file storage
1596 if storage_type == constants.ST_FILE:
1597 # storage.FileStorage wants a list of storage directories
1598 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1603 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1606 for dev in instance.disks:
1607 cfg.SetDiskID(dev, node_name)
1609 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1611 result.Raise("Failed to get disk status from node %s" % node_name,
1612 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1614 for idx, bdev_status in enumerate(result.payload):
1615 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1621 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1622 """Check the sanity of iallocator and node arguments and use the
1623 cluster-wide iallocator if appropriate.
1625 Check that at most one of (iallocator, node) is specified. If none is
1626 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1627 then the LU's opcode's iallocator slot is filled with the cluster-wide
1630 @type iallocator_slot: string
1631 @param iallocator_slot: the name of the opcode iallocator slot
1632 @type node_slot: string
1633 @param node_slot: the name of the opcode target node slot
1636 node = getattr(lu.op, node_slot, None)
1637 ialloc = getattr(lu.op, iallocator_slot, None)
1641 if node is not None and ialloc is not None:
1642 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1644 elif ((node is None and ialloc is None) or
1645 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1646 default_iallocator = lu.cfg.GetDefaultIAllocator()
1647 if default_iallocator:
1648 setattr(lu.op, iallocator_slot, default_iallocator)
1650 raise errors.OpPrereqError("No iallocator or node given and no"
1651 " cluster-wide default iallocator found;"
1652 " please specify either an iallocator or a"
1653 " node, or set a cluster-wide default"
1654 " iallocator", errors.ECODE_INVAL)
1657 def _GetDefaultIAllocator(cfg, ialloc):
1658 """Decides on which iallocator to use.
1660 @type cfg: L{config.ConfigWriter}
1661 @param cfg: Cluster configuration object
1662 @type ialloc: string or None
1663 @param ialloc: Iallocator specified in opcode
1665 @return: Iallocator name
1669 # Use default iallocator
1670 ialloc = cfg.GetDefaultIAllocator()
1673 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1674 " opcode nor as a cluster-wide default",
1680 class LUClusterPostInit(LogicalUnit):
1681 """Logical unit for running hooks after cluster initialization.
1684 HPATH = "cluster-init"
1685 HTYPE = constants.HTYPE_CLUSTER
1687 def BuildHooksEnv(self):
1692 "OP_TARGET": self.cfg.GetClusterName(),
1695 def BuildHooksNodes(self):
1696 """Build hooks nodes.
1699 return ([], [self.cfg.GetMasterNode()])
1701 def Exec(self, feedback_fn):
1708 class LUClusterDestroy(LogicalUnit):
1709 """Logical unit for destroying the cluster.
1712 HPATH = "cluster-destroy"
1713 HTYPE = constants.HTYPE_CLUSTER
1715 def BuildHooksEnv(self):
1720 "OP_TARGET": self.cfg.GetClusterName(),
1723 def BuildHooksNodes(self):
1724 """Build hooks nodes.
1729 def CheckPrereq(self):
1730 """Check prerequisites.
1732 This checks whether the cluster is empty.
1734 Any errors are signaled by raising errors.OpPrereqError.
1737 master = self.cfg.GetMasterNode()
1739 nodelist = self.cfg.GetNodeList()
1740 if len(nodelist) != 1 or nodelist[0] != master:
1741 raise errors.OpPrereqError("There are still %d node(s) in"
1742 " this cluster." % (len(nodelist) - 1),
1744 instancelist = self.cfg.GetInstanceList()
1746 raise errors.OpPrereqError("There are still %d instance(s) in"
1747 " this cluster." % len(instancelist),
1750 def Exec(self, feedback_fn):
1751 """Destroys the cluster.
1754 master_params = self.cfg.GetMasterNetworkParameters()
1756 # Run post hooks on master node before it's removed
1757 _RunPostHook(self, master_params.name)
1759 ems = self.cfg.GetUseExternalMipScript()
1760 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1763 self.LogWarning("Error disabling the master IP address: %s",
1766 return master_params.name
1769 def _VerifyCertificate(filename):
1770 """Verifies a certificate for L{LUClusterVerifyConfig}.
1772 @type filename: string
1773 @param filename: Path to PEM file
1777 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1778 utils.ReadFile(filename))
1779 except Exception, err: # pylint: disable=W0703
1780 return (LUClusterVerifyConfig.ETYPE_ERROR,
1781 "Failed to load X509 certificate %s: %s" % (filename, err))
1784 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1785 constants.SSL_CERT_EXPIRATION_ERROR)
1788 fnamemsg = "While verifying %s: %s" % (filename, msg)
1793 return (None, fnamemsg)
1794 elif errcode == utils.CERT_WARNING:
1795 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1796 elif errcode == utils.CERT_ERROR:
1797 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1799 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1802 def _GetAllHypervisorParameters(cluster, instances):
1803 """Compute the set of all hypervisor parameters.
1805 @type cluster: L{objects.Cluster}
1806 @param cluster: the cluster object
1807 @param instances: list of L{objects.Instance}
1808 @param instances: additional instances from which to obtain parameters
1809 @rtype: list of (origin, hypervisor, parameters)
1810 @return: a list with all parameters found, indicating the hypervisor they
1811 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1816 for hv_name in cluster.enabled_hypervisors:
1817 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1819 for os_name, os_hvp in cluster.os_hvp.items():
1820 for hv_name, hv_params in os_hvp.items():
1822 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1823 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1825 # TODO: collapse identical parameter values in a single one
1826 for instance in instances:
1827 if instance.hvparams:
1828 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1829 cluster.FillHV(instance)))
1834 class _VerifyErrors(object):
1835 """Mix-in for cluster/group verify LUs.
1837 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1838 self.op and self._feedback_fn to be available.)
1842 ETYPE_FIELD = "code"
1843 ETYPE_ERROR = "ERROR"
1844 ETYPE_WARNING = "WARNING"
1846 def _Error(self, ecode, item, msg, *args, **kwargs):
1847 """Format an error message.
1849 Based on the opcode's error_codes parameter, either format a
1850 parseable error code, or a simpler error string.
1852 This must be called only from Exec and functions called from Exec.
1855 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1856 itype, etxt, _ = ecode
1857 # first complete the msg
1860 # then format the whole message
1861 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1862 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1868 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1869 # and finally report it via the feedback_fn
1870 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1872 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1873 """Log an error message if the passed condition is True.
1877 or self.op.debug_simulate_errors) # pylint: disable=E1101
1879 # If the error code is in the list of ignored errors, demote the error to a
1881 (_, etxt, _) = ecode
1882 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1883 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1886 self._Error(ecode, *args, **kwargs)
1888 # do not mark the operation as failed for WARN cases only
1889 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1890 self.bad = self.bad or cond
1893 class LUClusterVerify(NoHooksLU):
1894 """Submits all jobs necessary to verify the cluster.
1899 def ExpandNames(self):
1900 self.needed_locks = {}
1902 def Exec(self, feedback_fn):
1905 if self.op.group_name:
1906 groups = [self.op.group_name]
1907 depends_fn = lambda: None
1909 groups = self.cfg.GetNodeGroupList()
1911 # Verify global configuration
1913 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1916 # Always depend on global verification
1917 depends_fn = lambda: [(-len(jobs), [])]
1920 [opcodes.OpClusterVerifyGroup(group_name=group,
1921 ignore_errors=self.op.ignore_errors,
1922 depends=depends_fn())]
1923 for group in groups)
1925 # Fix up all parameters
1926 for op in itertools.chain(*jobs): # pylint: disable=W0142
1927 op.debug_simulate_errors = self.op.debug_simulate_errors
1928 op.verbose = self.op.verbose
1929 op.error_codes = self.op.error_codes
1931 op.skip_checks = self.op.skip_checks
1932 except AttributeError:
1933 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1935 return ResultWithJobs(jobs)
1938 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1939 """Verifies the cluster config.
1944 def _VerifyHVP(self, hvp_data):
1945 """Verifies locally the syntax of the hypervisor parameters.
1948 for item, hv_name, hv_params in hvp_data:
1949 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1952 hv_class = hypervisor.GetHypervisor(hv_name)
1953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1954 hv_class.CheckParameterSyntax(hv_params)
1955 except errors.GenericError, err:
1956 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1958 def ExpandNames(self):
1959 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1960 self.share_locks = _ShareAll()
1962 def CheckPrereq(self):
1963 """Check prerequisites.
1966 # Retrieve all information
1967 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1968 self.all_node_info = self.cfg.GetAllNodesInfo()
1969 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1971 def Exec(self, feedback_fn):
1972 """Verify integrity of cluster, performing various test on nodes.
1976 self._feedback_fn = feedback_fn
1978 feedback_fn("* Verifying cluster config")
1980 for msg in self.cfg.VerifyConfig():
1981 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1983 feedback_fn("* Verifying cluster certificate files")
1985 for cert_filename in pathutils.ALL_CERT_FILES:
1986 (errcode, msg) = _VerifyCertificate(cert_filename)
1987 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1989 feedback_fn("* Verifying hypervisor parameters")
1991 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1992 self.all_inst_info.values()))
1994 feedback_fn("* Verifying all nodes belong to an existing group")
1996 # We do this verification here because, should this bogus circumstance
1997 # occur, it would never be caught by VerifyGroup, which only acts on
1998 # nodes/instances reachable from existing node groups.
2000 dangling_nodes = set(node.name for node in self.all_node_info.values()
2001 if node.group not in self.all_group_info)
2003 dangling_instances = {}
2004 no_node_instances = []
2006 for inst in self.all_inst_info.values():
2007 if inst.primary_node in dangling_nodes:
2008 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2009 elif inst.primary_node not in self.all_node_info:
2010 no_node_instances.append(inst.name)
2015 utils.CommaJoin(dangling_instances.get(node.name,
2017 for node in dangling_nodes]
2019 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2021 "the following nodes (and their instances) belong to a non"
2022 " existing group: %s", utils.CommaJoin(pretty_dangling))
2024 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2026 "the following instances have a non-existing primary-node:"
2027 " %s", utils.CommaJoin(no_node_instances))
2032 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2033 """Verifies the status of a node group.
2036 HPATH = "cluster-verify"
2037 HTYPE = constants.HTYPE_CLUSTER
2040 _HOOKS_INDENT_RE = re.compile("^", re.M)
2042 class NodeImage(object):
2043 """A class representing the logical and physical status of a node.
2046 @ivar name: the node name to which this object refers
2047 @ivar volumes: a structure as returned from
2048 L{ganeti.backend.GetVolumeList} (runtime)
2049 @ivar instances: a list of running instances (runtime)
2050 @ivar pinst: list of configured primary instances (config)
2051 @ivar sinst: list of configured secondary instances (config)
2052 @ivar sbp: dictionary of {primary-node: list of instances} for all
2053 instances for which this node is secondary (config)
2054 @ivar mfree: free memory, as reported by hypervisor (runtime)
2055 @ivar dfree: free disk, as reported by the node (runtime)
2056 @ivar offline: the offline status (config)
2057 @type rpc_fail: boolean
2058 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2059 not whether the individual keys were correct) (runtime)
2060 @type lvm_fail: boolean
2061 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2062 @type hyp_fail: boolean
2063 @ivar hyp_fail: whether the RPC call didn't return the instance list
2064 @type ghost: boolean
2065 @ivar ghost: whether this is a known node or not (config)
2066 @type os_fail: boolean
2067 @ivar os_fail: whether the RPC call didn't return valid OS data
2069 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2070 @type vm_capable: boolean
2071 @ivar vm_capable: whether the node can host instances
2074 def __init__(self, offline=False, name=None, vm_capable=True):
2083 self.offline = offline
2084 self.vm_capable = vm_capable
2085 self.rpc_fail = False
2086 self.lvm_fail = False
2087 self.hyp_fail = False
2089 self.os_fail = False
2092 def ExpandNames(self):
2093 # This raises errors.OpPrereqError on its own:
2094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2096 # Get instances in node group; this is unsafe and needs verification later
2098 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2100 self.needed_locks = {
2101 locking.LEVEL_INSTANCE: inst_names,
2102 locking.LEVEL_NODEGROUP: [self.group_uuid],
2103 locking.LEVEL_NODE: [],
2106 self.share_locks = _ShareAll()
2108 def DeclareLocks(self, level):
2109 if level == locking.LEVEL_NODE:
2110 # Get members of node group; this is unsafe and needs verification later
2111 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2113 all_inst_info = self.cfg.GetAllInstancesInfo()
2115 # In Exec(), we warn about mirrored instances that have primary and
2116 # secondary living in separate node groups. To fully verify that
2117 # volumes for these instances are healthy, we will need to do an
2118 # extra call to their secondaries. We ensure here those nodes will
2120 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2121 # Important: access only the instances whose lock is owned
2122 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2123 nodes.update(all_inst_info[inst].secondary_nodes)
2125 self.needed_locks[locking.LEVEL_NODE] = nodes
2127 def CheckPrereq(self):
2128 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2129 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2131 group_nodes = set(self.group_info.members)
2133 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2136 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2138 unlocked_instances = \
2139 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2142 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2143 utils.CommaJoin(unlocked_nodes),
2146 if unlocked_instances:
2147 raise errors.OpPrereqError("Missing lock for instances: %s" %
2148 utils.CommaJoin(unlocked_instances),
2151 self.all_node_info = self.cfg.GetAllNodesInfo()
2152 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2154 self.my_node_names = utils.NiceSort(group_nodes)
2155 self.my_inst_names = utils.NiceSort(group_instances)
2157 self.my_node_info = dict((name, self.all_node_info[name])
2158 for name in self.my_node_names)
2160 self.my_inst_info = dict((name, self.all_inst_info[name])
2161 for name in self.my_inst_names)
2163 # We detect here the nodes that will need the extra RPC calls for verifying
2164 # split LV volumes; they should be locked.
2165 extra_lv_nodes = set()
2167 for inst in self.my_inst_info.values():
2168 if inst.disk_template in constants.DTS_INT_MIRROR:
2169 for nname in inst.all_nodes:
2170 if self.all_node_info[nname].group != self.group_uuid:
2171 extra_lv_nodes.add(nname)
2173 unlocked_lv_nodes = \
2174 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2176 if unlocked_lv_nodes:
2177 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2178 utils.CommaJoin(unlocked_lv_nodes),
2180 self.extra_lv_nodes = list(extra_lv_nodes)
2182 def _VerifyNode(self, ninfo, nresult):
2183 """Perform some basic validation on data returned from a node.
2185 - check the result data structure is well formed and has all the
2187 - check ganeti version
2189 @type ninfo: L{objects.Node}
2190 @param ninfo: the node to check
2191 @param nresult: the results from the node
2193 @return: whether overall this call was successful (and we can expect
2194 reasonable values in the respose)
2198 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2200 # main result, nresult should be a non-empty dict
2201 test = not nresult or not isinstance(nresult, dict)
2202 _ErrorIf(test, constants.CV_ENODERPC, node,
2203 "unable to verify node: no data returned")
2207 # compares ganeti version
2208 local_version = constants.PROTOCOL_VERSION
2209 remote_version = nresult.get("version", None)
2210 test = not (remote_version and
2211 isinstance(remote_version, (list, tuple)) and
2212 len(remote_version) == 2)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "connection to node returned invalid data")
2218 test = local_version != remote_version[0]
2219 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2220 "incompatible protocol versions: master %s,"
2221 " node %s", local_version, remote_version[0])
2225 # node seems compatible, we can actually try to look into its results
2227 # full package version
2228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2229 constants.CV_ENODEVERSION, node,
2230 "software version mismatch: master %s, node %s",
2231 constants.RELEASE_VERSION, remote_version[1],
2232 code=self.ETYPE_WARNING)
2234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2235 if ninfo.vm_capable and isinstance(hyp_result, dict):
2236 for hv_name, hv_result in hyp_result.iteritems():
2237 test = hv_result is not None
2238 _ErrorIf(test, constants.CV_ENODEHV, node,
2239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2241 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2242 if ninfo.vm_capable and isinstance(hvp_result, list):
2243 for item, hv_name, hv_result in hvp_result:
2244 _ErrorIf(True, constants.CV_ENODEHV, node,
2245 "hypervisor %s parameter verify failure (source %s): %s",
2246 hv_name, item, hv_result)
2248 test = nresult.get(constants.NV_NODESETUP,
2249 ["Missing NODESETUP results"])
2250 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2255 def _VerifyNodeTime(self, ninfo, nresult,
2256 nvinfo_starttime, nvinfo_endtime):
2257 """Check the node time.
2259 @type ninfo: L{objects.Node}
2260 @param ninfo: the node to check
2261 @param nresult: the remote results for the node
2262 @param nvinfo_starttime: the start time of the RPC call
2263 @param nvinfo_endtime: the end time of the RPC call
2267 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2269 ntime = nresult.get(constants.NV_TIME, None)
2271 ntime_merged = utils.MergeTime(ntime)
2272 except (ValueError, TypeError):
2273 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2276 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2277 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2278 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2279 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2283 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2284 "Node time diverges by at least %s from master node time",
2287 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2288 """Check the node LVM results.
2290 @type ninfo: L{objects.Node}
2291 @param ninfo: the node to check
2292 @param nresult: the remote results for the node
2293 @param vg_name: the configured VG name
2300 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2302 # checks vg existence and size > 20G
2303 vglist = nresult.get(constants.NV_VGLIST, None)
2305 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2307 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2308 constants.MIN_VG_SIZE)
2309 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2312 pvlist = nresult.get(constants.NV_PVLIST, None)
2313 test = pvlist is None
2314 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2316 # check that ':' is not present in PV names, since it's a
2317 # special character for lvcreate (denotes the range of PEs to
2319 for _, pvname, owner_vg in pvlist:
2320 test = ":" in pvname
2321 _ErrorIf(test, constants.CV_ENODELVM, node,
2322 "Invalid character ':' in PV '%s' of VG '%s'",
2325 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2326 """Check the node bridges.
2328 @type ninfo: L{objects.Node}
2329 @param ninfo: the node to check
2330 @param nresult: the remote results for the node
2331 @param bridges: the expected list of bridges
2338 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2340 missing = nresult.get(constants.NV_BRIDGES, None)
2341 test = not isinstance(missing, list)
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "did not return valid bridge information")
2345 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2346 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2348 def _VerifyNodeUserScripts(self, ninfo, nresult):
2349 """Check the results of user scripts presence and executability on the node
2351 @type ninfo: L{objects.Node}
2352 @param ninfo: the node to check
2353 @param nresult: the remote results for the node
2358 test = not constants.NV_USERSCRIPTS in nresult
2359 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2360 "did not return user scripts information")
2362 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2364 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2365 "user scripts not present or not executable: %s" %
2366 utils.CommaJoin(sorted(broken_scripts)))
2368 def _VerifyNodeNetwork(self, ninfo, nresult):
2369 """Check the node network connectivity results.
2371 @type ninfo: L{objects.Node}
2372 @param ninfo: the node to check
2373 @param nresult: the remote results for the node
2377 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2379 test = constants.NV_NODELIST not in nresult
2380 _ErrorIf(test, constants.CV_ENODESSH, node,
2381 "node hasn't returned node ssh connectivity data")
2383 if nresult[constants.NV_NODELIST]:
2384 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2385 _ErrorIf(True, constants.CV_ENODESSH, node,
2386 "ssh communication with node '%s': %s", a_node, a_msg)
2388 test = constants.NV_NODENETTEST not in nresult
2389 _ErrorIf(test, constants.CV_ENODENET, node,
2390 "node hasn't returned node tcp connectivity data")
2392 if nresult[constants.NV_NODENETTEST]:
2393 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2395 _ErrorIf(True, constants.CV_ENODENET, node,
2396 "tcp communication with node '%s': %s",
2397 anode, nresult[constants.NV_NODENETTEST][anode])
2399 test = constants.NV_MASTERIP not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node master IP reachability data")
2403 if not nresult[constants.NV_MASTERIP]:
2404 if node == self.master_node:
2405 msg = "the master node cannot reach the master IP (not configured?)"
2407 msg = "cannot reach the master IP"
2408 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2410 def _VerifyInstance(self, instance, instanceconfig, node_image,
2412 """Verify an instance.
2414 This function checks to see if the required block devices are
2415 available on the instance's node.
2418 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2419 node_current = instanceconfig.primary_node
2421 node_vol_should = {}
2422 instanceconfig.MapLVsByNode(node_vol_should)
2424 cluster = self.cfg.GetClusterInfo()
2425 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2427 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2428 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2430 for node in node_vol_should:
2431 n_img = node_image[node]
2432 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2433 # ignore missing volumes on offline or broken nodes
2435 for volume in node_vol_should[node]:
2436 test = volume not in n_img.volumes
2437 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2438 "volume %s missing on node %s", volume, node)
2440 if instanceconfig.admin_state == constants.ADMINST_UP:
2441 pri_img = node_image[node_current]
2442 test = instance not in pri_img.instances and not pri_img.offline
2443 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2444 "instance not running on its primary node %s",
2447 diskdata = [(nname, success, status, idx)
2448 for (nname, disks) in diskstatus.items()
2449 for idx, (success, status) in enumerate(disks)]
2451 for nname, success, bdev_status, idx in diskdata:
2452 # the 'ghost node' construction in Exec() ensures that we have a
2454 snode = node_image[nname]
2455 bad_snode = snode.ghost or snode.offline
2456 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2457 not success and not bad_snode,
2458 constants.CV_EINSTANCEFAULTYDISK, instance,
2459 "couldn't retrieve status for disk/%s on %s: %s",
2460 idx, nname, bdev_status)
2461 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2462 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2463 constants.CV_EINSTANCEFAULTYDISK, instance,
2464 "disk/%s on %s is faulty", idx, nname)
2466 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2467 """Verify if there are any unknown volumes in the cluster.
2469 The .os, .swap and backup volumes are ignored. All other volumes are
2470 reported as unknown.
2472 @type reserved: L{ganeti.utils.FieldSet}
2473 @param reserved: a FieldSet of reserved volume names
2476 for node, n_img in node_image.items():
2477 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2478 self.all_node_info[node].group != self.group_uuid):
2479 # skip non-healthy nodes
2481 for volume in n_img.volumes:
2482 test = ((node not in node_vol_should or
2483 volume not in node_vol_should[node]) and
2484 not reserved.Matches(volume))
2485 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2486 "volume %s is unknown", volume)
2488 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2489 """Verify N+1 Memory Resilience.
2491 Check that if one single node dies we can still start all the
2492 instances it was primary for.
2495 cluster_info = self.cfg.GetClusterInfo()
2496 for node, n_img in node_image.items():
2497 # This code checks that every node which is now listed as
2498 # secondary has enough memory to host all instances it is
2499 # supposed to should a single other node in the cluster fail.
2500 # FIXME: not ready for failover to an arbitrary node
2501 # FIXME: does not support file-backed instances
2502 # WARNING: we currently take into account down instances as well
2503 # as up ones, considering that even if they're down someone
2504 # might want to start them even in the event of a node failure.
2505 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2506 # we're skipping nodes marked offline and nodes in other groups from
2507 # the N+1 warning, since most likely we don't have good memory
2508 # infromation from them; we already list instances living on such
2509 # nodes, and that's enough warning
2511 #TODO(dynmem): also consider ballooning out other instances
2512 for prinode, instances in n_img.sbp.items():
2514 for instance in instances:
2515 bep = cluster_info.FillBE(instance_cfg[instance])
2516 if bep[constants.BE_AUTO_BALANCE]:
2517 needed_mem += bep[constants.BE_MINMEM]
2518 test = n_img.mfree < needed_mem
2519 self._ErrorIf(test, constants.CV_ENODEN1, node,
2520 "not enough memory to accomodate instance failovers"
2521 " should node %s fail (%dMiB needed, %dMiB available)",
2522 prinode, needed_mem, n_img.mfree)
2525 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2526 (files_all, files_opt, files_mc, files_vm)):
2527 """Verifies file checksums collected from all nodes.
2529 @param errorif: Callback for reporting errors
2530 @param nodeinfo: List of L{objects.Node} objects
2531 @param master_node: Name of master node
2532 @param all_nvinfo: RPC results
2535 # Define functions determining which nodes to consider for a file
2538 (files_mc, lambda node: (node.master_candidate or
2539 node.name == master_node)),
2540 (files_vm, lambda node: node.vm_capable),
2543 # Build mapping from filename to list of nodes which should have the file
2545 for (files, fn) in files2nodefn:
2547 filenodes = nodeinfo
2549 filenodes = filter(fn, nodeinfo)
2550 nodefiles.update((filename,
2551 frozenset(map(operator.attrgetter("name"), filenodes)))
2552 for filename in files)
2554 assert set(nodefiles) == (files_all | files_mc | files_vm)
2556 fileinfo = dict((filename, {}) for filename in nodefiles)
2557 ignore_nodes = set()
2559 for node in nodeinfo:
2561 ignore_nodes.add(node.name)
2564 nresult = all_nvinfo[node.name]
2566 if nresult.fail_msg or not nresult.payload:
2569 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2570 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2571 for (key, value) in fingerprints.items())
2574 test = not (node_files and isinstance(node_files, dict))
2575 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2576 "Node did not return file checksum data")
2578 ignore_nodes.add(node.name)
2581 # Build per-checksum mapping from filename to nodes having it
2582 for (filename, checksum) in node_files.items():
2583 assert filename in nodefiles
2584 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2586 for (filename, checksums) in fileinfo.items():
2587 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2589 # Nodes having the file
2590 with_file = frozenset(node_name
2591 for nodes in fileinfo[filename].values()
2592 for node_name in nodes) - ignore_nodes
2594 expected_nodes = nodefiles[filename] - ignore_nodes
2596 # Nodes missing file
2597 missing_file = expected_nodes - with_file
2599 if filename in files_opt:
2601 errorif(missing_file and missing_file != expected_nodes,
2602 constants.CV_ECLUSTERFILECHECK, None,
2603 "File %s is optional, but it must exist on all or no"
2604 " nodes (not found on %s)",
2605 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2607 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2608 "File %s is missing from node(s) %s", filename,
2609 utils.CommaJoin(utils.NiceSort(missing_file)))
2611 # Warn if a node has a file it shouldn't
2612 unexpected = with_file - expected_nodes
2614 constants.CV_ECLUSTERFILECHECK, None,
2615 "File %s should not exist on node(s) %s",
2616 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2618 # See if there are multiple versions of the file
2619 test = len(checksums) > 1
2621 variants = ["variant %s on %s" %
2622 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2623 for (idx, (checksum, nodes)) in
2624 enumerate(sorted(checksums.items()))]
2628 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2629 "File %s found with %s different checksums (%s)",
2630 filename, len(checksums), "; ".join(variants))
2632 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2634 """Verifies and the node DRBD status.
2636 @type ninfo: L{objects.Node}
2637 @param ninfo: the node to check
2638 @param nresult: the remote results for the node
2639 @param instanceinfo: the dict of instances
2640 @param drbd_helper: the configured DRBD usermode helper
2641 @param drbd_map: the DRBD map as returned by
2642 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2646 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2649 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2650 test = (helper_result is None)
2651 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2652 "no drbd usermode helper returned")
2654 status, payload = helper_result
2656 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2657 "drbd usermode helper check unsuccessful: %s", payload)
2658 test = status and (payload != drbd_helper)
2659 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2660 "wrong drbd usermode helper: %s", payload)
2662 # compute the DRBD minors
2664 for minor, instance in drbd_map[node].items():
2665 test = instance not in instanceinfo
2666 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2667 "ghost instance '%s' in temporary DRBD map", instance)
2668 # ghost instance should not be running, but otherwise we
2669 # don't give double warnings (both ghost instance and
2670 # unallocated minor in use)
2672 node_drbd[minor] = (instance, False)
2674 instance = instanceinfo[instance]
2675 node_drbd[minor] = (instance.name,
2676 instance.admin_state == constants.ADMINST_UP)
2678 # and now check them
2679 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2680 test = not isinstance(used_minors, (tuple, list))
2681 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2682 "cannot parse drbd status file: %s", str(used_minors))
2684 # we cannot check drbd status
2687 for minor, (iname, must_exist) in node_drbd.items():
2688 test = minor not in used_minors and must_exist
2689 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2690 "drbd minor %d of instance %s is not active", minor, iname)
2691 for minor in used_minors:
2692 test = minor not in node_drbd
2693 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2694 "unallocated drbd minor %d is in use", minor)
2696 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2697 """Builds the node OS structures.
2699 @type ninfo: L{objects.Node}
2700 @param ninfo: the node to check
2701 @param nresult: the remote results for the node
2702 @param nimg: the node image object
2706 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2708 remote_os = nresult.get(constants.NV_OSLIST, None)
2709 test = (not isinstance(remote_os, list) or
2710 not compat.all(isinstance(v, list) and len(v) == 7
2711 for v in remote_os))
2713 _ErrorIf(test, constants.CV_ENODEOS, node,
2714 "node hasn't returned valid OS data")
2723 for (name, os_path, status, diagnose,
2724 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2726 if name not in os_dict:
2729 # parameters is a list of lists instead of list of tuples due to
2730 # JSON lacking a real tuple type, fix it:
2731 parameters = [tuple(v) for v in parameters]
2732 os_dict[name].append((os_path, status, diagnose,
2733 set(variants), set(parameters), set(api_ver)))
2735 nimg.oslist = os_dict
2737 def _VerifyNodeOS(self, ninfo, nimg, base):
2738 """Verifies the node OS list.
2740 @type ninfo: L{objects.Node}
2741 @param ninfo: the node to check
2742 @param nimg: the node image object
2743 @param base: the 'template' node we match against (e.g. from the master)
2747 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2749 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2751 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2752 for os_name, os_data in nimg.oslist.items():
2753 assert os_data, "Empty OS status for OS %s?!" % os_name
2754 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2755 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2756 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2757 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2758 "OS '%s' has multiple entries (first one shadows the rest): %s",
2759 os_name, utils.CommaJoin([v[0] for v in os_data]))
2760 # comparisons with the 'base' image
2761 test = os_name not in base.oslist
2762 _ErrorIf(test, constants.CV_ENODEOS, node,
2763 "Extra OS %s not present on reference node (%s)",
2767 assert base.oslist[os_name], "Base node has empty OS status?"
2768 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2770 # base OS is invalid, skipping
2772 for kind, a, b in [("API version", f_api, b_api),
2773 ("variants list", f_var, b_var),
2774 ("parameters", beautify_params(f_param),
2775 beautify_params(b_param))]:
2776 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2777 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2778 kind, os_name, base.name,
2779 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2781 # check any missing OSes
2782 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2783 _ErrorIf(missing, constants.CV_ENODEOS, node,
2784 "OSes present on reference node %s but missing on this node: %s",
2785 base.name, utils.CommaJoin(missing))
2787 def _VerifyOob(self, ninfo, nresult):
2788 """Verifies out of band functionality of a node.
2790 @type ninfo: L{objects.Node}
2791 @param ninfo: the node to check
2792 @param nresult: the remote results for the node
2796 # We just have to verify the paths on master and/or master candidates
2797 # as the oob helper is invoked on the master
2798 if ((ninfo.master_candidate or ninfo.master_capable) and
2799 constants.NV_OOB_PATHS in nresult):
2800 for path_result in nresult[constants.NV_OOB_PATHS]:
2801 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2803 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2804 """Verifies and updates the node volume data.
2806 This function will update a L{NodeImage}'s internal structures
2807 with data from the remote call.
2809 @type ninfo: L{objects.Node}
2810 @param ninfo: the node to check
2811 @param nresult: the remote results for the node
2812 @param nimg: the node image object
2813 @param vg_name: the configured VG name
2817 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2819 nimg.lvm_fail = True
2820 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2823 elif isinstance(lvdata, basestring):
2824 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2825 utils.SafeEncode(lvdata))
2826 elif not isinstance(lvdata, dict):
2827 _ErrorIf(True, constants.CV_ENODELVM, node,
2828 "rpc call to node failed (lvlist)")
2830 nimg.volumes = lvdata
2831 nimg.lvm_fail = False
2833 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2834 """Verifies and updates the node instance list.
2836 If the listing was successful, then updates this node's instance
2837 list. Otherwise, it marks the RPC call as failed for the instance
2840 @type ninfo: L{objects.Node}
2841 @param ninfo: the node to check
2842 @param nresult: the remote results for the node
2843 @param nimg: the node image object
2846 idata = nresult.get(constants.NV_INSTANCELIST, None)
2847 test = not isinstance(idata, list)
2848 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2849 "rpc call to node failed (instancelist): %s",
2850 utils.SafeEncode(str(idata)))
2852 nimg.hyp_fail = True
2854 nimg.instances = idata
2856 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2857 """Verifies and computes a node information map
2859 @type ninfo: L{objects.Node}
2860 @param ninfo: the node to check
2861 @param nresult: the remote results for the node
2862 @param nimg: the node image object
2863 @param vg_name: the configured VG name
2867 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2869 # try to read free memory (from the hypervisor)
2870 hv_info = nresult.get(constants.NV_HVINFO, None)
2871 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2872 _ErrorIf(test, constants.CV_ENODEHV, node,
2873 "rpc call to node failed (hvinfo)")
2876 nimg.mfree = int(hv_info["memory_free"])
2877 except (ValueError, TypeError):
2878 _ErrorIf(True, constants.CV_ENODERPC, node,
2879 "node returned invalid nodeinfo, check hypervisor")
2881 # FIXME: devise a free space model for file based instances as well
2882 if vg_name is not None:
2883 test = (constants.NV_VGLIST not in nresult or
2884 vg_name not in nresult[constants.NV_VGLIST])
2885 _ErrorIf(test, constants.CV_ENODELVM, node,
2886 "node didn't return data for the volume group '%s'"
2887 " - it is either missing or broken", vg_name)
2890 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2891 except (ValueError, TypeError):
2892 _ErrorIf(True, constants.CV_ENODERPC, node,
2893 "node returned invalid LVM info, check LVM status")
2895 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2896 """Gets per-disk status information for all instances.
2898 @type nodelist: list of strings
2899 @param nodelist: Node names
2900 @type node_image: dict of (name, L{objects.Node})
2901 @param node_image: Node objects
2902 @type instanceinfo: dict of (name, L{objects.Instance})
2903 @param instanceinfo: Instance objects
2904 @rtype: {instance: {node: [(succes, payload)]}}
2905 @return: a dictionary of per-instance dictionaries with nodes as
2906 keys and disk information as values; the disk information is a
2907 list of tuples (success, payload)
2910 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2913 node_disks_devonly = {}
2914 diskless_instances = set()
2915 diskless = constants.DT_DISKLESS
2917 for nname in nodelist:
2918 node_instances = list(itertools.chain(node_image[nname].pinst,
2919 node_image[nname].sinst))
2920 diskless_instances.update(inst for inst in node_instances
2921 if instanceinfo[inst].disk_template == diskless)
2922 disks = [(inst, disk)
2923 for inst in node_instances
2924 for disk in instanceinfo[inst].disks]
2927 # No need to collect data
2930 node_disks[nname] = disks
2932 # _AnnotateDiskParams makes already copies of the disks
2934 for (inst, dev) in disks:
2935 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2936 self.cfg.SetDiskID(anno_disk, nname)
2937 devonly.append(anno_disk)
2939 node_disks_devonly[nname] = devonly
2941 assert len(node_disks) == len(node_disks_devonly)
2943 # Collect data from all nodes with disks
2944 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2947 assert len(result) == len(node_disks)
2951 for (nname, nres) in result.items():
2952 disks = node_disks[nname]
2955 # No data from this node
2956 data = len(disks) * [(False, "node offline")]
2959 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2960 "while getting disk information: %s", msg)
2962 # No data from this node
2963 data = len(disks) * [(False, msg)]
2966 for idx, i in enumerate(nres.payload):
2967 if isinstance(i, (tuple, list)) and len(i) == 2:
2970 logging.warning("Invalid result from node %s, entry %d: %s",
2972 data.append((False, "Invalid result from the remote node"))
2974 for ((inst, _), status) in zip(disks, data):
2975 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2977 # Add empty entries for diskless instances.
2978 for inst in diskless_instances:
2979 assert inst not in instdisk
2982 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2983 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2984 compat.all(isinstance(s, (tuple, list)) and
2985 len(s) == 2 for s in statuses)
2986 for inst, nnames in instdisk.items()
2987 for nname, statuses in nnames.items())
2988 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2993 def _SshNodeSelector(group_uuid, all_nodes):
2994 """Create endless iterators for all potential SSH check hosts.
2997 nodes = [node for node in all_nodes
2998 if (node.group != group_uuid and
3000 keyfunc = operator.attrgetter("group")
3002 return map(itertools.cycle,
3003 [sorted(map(operator.attrgetter("name"), names))
3004 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3008 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3009 """Choose which nodes should talk to which other nodes.
3011 We will make nodes contact all nodes in their group, and one node from
3014 @warning: This algorithm has a known issue if one node group is much
3015 smaller than others (e.g. just one node). In such a case all other
3016 nodes will talk to the single node.
3019 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3020 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3022 return (online_nodes,
3023 dict((name, sorted([i.next() for i in sel]))
3024 for name in online_nodes))
3026 def BuildHooksEnv(self):
3029 Cluster-Verify hooks just ran in the post phase and their failure makes
3030 the output be logged in the verify output and the verification to fail.
3034 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3037 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3038 for node in self.my_node_info.values())
3042 def BuildHooksNodes(self):
3043 """Build hooks nodes.
3046 return ([], self.my_node_names)
3048 def Exec(self, feedback_fn):
3049 """Verify integrity of the node group, performing various test on nodes.
3052 # This method has too many local variables. pylint: disable=R0914
3053 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3055 if not self.my_node_names:
3057 feedback_fn("* Empty node group, skipping verification")
3061 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3062 verbose = self.op.verbose
3063 self._feedback_fn = feedback_fn
3065 vg_name = self.cfg.GetVGName()
3066 drbd_helper = self.cfg.GetDRBDHelper()
3067 cluster = self.cfg.GetClusterInfo()
3068 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3069 hypervisors = cluster.enabled_hypervisors
3070 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3072 i_non_redundant = [] # Non redundant instances
3073 i_non_a_balanced = [] # Non auto-balanced instances
3074 i_offline = 0 # Count of offline instances
3075 n_offline = 0 # Count of offline nodes
3076 n_drained = 0 # Count of nodes being drained
3077 node_vol_should = {}
3079 # FIXME: verify OS list
3082 filemap = _ComputeAncillaryFiles(cluster, False)
3084 # do local checksums
3085 master_node = self.master_node = self.cfg.GetMasterNode()
3086 master_ip = self.cfg.GetMasterIP()
3088 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3091 if self.cfg.GetUseExternalMipScript():
3092 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3094 node_verify_param = {
3095 constants.NV_FILELIST:
3096 map(vcluster.MakeVirtualPath,
3097 utils.UniqueSequence(filename
3098 for files in filemap
3099 for filename in files)),
3100 constants.NV_NODELIST:
3101 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3102 self.all_node_info.values()),
3103 constants.NV_HYPERVISOR: hypervisors,
3104 constants.NV_HVPARAMS:
3105 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3106 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3107 for node in node_data_list
3108 if not node.offline],
3109 constants.NV_INSTANCELIST: hypervisors,
3110 constants.NV_VERSION: None,
3111 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3112 constants.NV_NODESETUP: None,
3113 constants.NV_TIME: None,
3114 constants.NV_MASTERIP: (master_node, master_ip),
3115 constants.NV_OSLIST: None,
3116 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3117 constants.NV_USERSCRIPTS: user_scripts,
3120 if vg_name is not None:
3121 node_verify_param[constants.NV_VGLIST] = None
3122 node_verify_param[constants.NV_LVLIST] = vg_name
3123 node_verify_param[constants.NV_PVLIST] = [vg_name]
3126 node_verify_param[constants.NV_DRBDLIST] = None
3127 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3130 # FIXME: this needs to be changed per node-group, not cluster-wide
3132 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3133 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3134 bridges.add(default_nicpp[constants.NIC_LINK])
3135 for instance in self.my_inst_info.values():
3136 for nic in instance.nics:
3137 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3138 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3139 bridges.add(full_nic[constants.NIC_LINK])
3142 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3144 # Build our expected cluster state
3145 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3147 vm_capable=node.vm_capable))
3148 for node in node_data_list)
3152 for node in self.all_node_info.values():
3153 path = _SupportsOob(self.cfg, node)
3154 if path and path not in oob_paths:
3155 oob_paths.append(path)
3158 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3160 for instance in self.my_inst_names:
3161 inst_config = self.my_inst_info[instance]
3162 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3165 for nname in inst_config.all_nodes:
3166 if nname not in node_image:
3167 gnode = self.NodeImage(name=nname)
3168 gnode.ghost = (nname not in self.all_node_info)
3169 node_image[nname] = gnode
3171 inst_config.MapLVsByNode(node_vol_should)
3173 pnode = inst_config.primary_node
3174 node_image[pnode].pinst.append(instance)
3176 for snode in inst_config.secondary_nodes:
3177 nimg = node_image[snode]
3178 nimg.sinst.append(instance)
3179 if pnode not in nimg.sbp:
3180 nimg.sbp[pnode] = []
3181 nimg.sbp[pnode].append(instance)
3183 # At this point, we have the in-memory data structures complete,
3184 # except for the runtime information, which we'll gather next
3186 # Due to the way our RPC system works, exact response times cannot be
3187 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3188 # time before and after executing the request, we can at least have a time
3190 nvinfo_starttime = time.time()
3191 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3193 self.cfg.GetClusterName())
3194 nvinfo_endtime = time.time()
3196 if self.extra_lv_nodes and vg_name is not None:
3198 self.rpc.call_node_verify(self.extra_lv_nodes,
3199 {constants.NV_LVLIST: vg_name},
3200 self.cfg.GetClusterName())
3202 extra_lv_nvinfo = {}
3204 all_drbd_map = self.cfg.ComputeDRBDMap()
3206 feedback_fn("* Gathering disk information (%s nodes)" %
3207 len(self.my_node_names))
3208 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3211 feedback_fn("* Verifying configuration file consistency")
3213 # If not all nodes are being checked, we need to make sure the master node
3214 # and a non-checked vm_capable node are in the list.
3215 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3217 vf_nvinfo = all_nvinfo.copy()
3218 vf_node_info = list(self.my_node_info.values())
3219 additional_nodes = []
3220 if master_node not in self.my_node_info:
3221 additional_nodes.append(master_node)
3222 vf_node_info.append(self.all_node_info[master_node])
3223 # Add the first vm_capable node we find which is not included,
3224 # excluding the master node (which we already have)
3225 for node in absent_nodes:
3226 nodeinfo = self.all_node_info[node]
3227 if (nodeinfo.vm_capable and not nodeinfo.offline and
3228 node != master_node):
3229 additional_nodes.append(node)
3230 vf_node_info.append(self.all_node_info[node])
3232 key = constants.NV_FILELIST
3233 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3234 {key: node_verify_param[key]},
3235 self.cfg.GetClusterName()))
3237 vf_nvinfo = all_nvinfo
3238 vf_node_info = self.my_node_info.values()
3240 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3242 feedback_fn("* Verifying node status")
3246 for node_i in node_data_list:
3248 nimg = node_image[node]
3252 feedback_fn("* Skipping offline node %s" % (node,))
3256 if node == master_node:
3258 elif node_i.master_candidate:
3259 ntype = "master candidate"
3260 elif node_i.drained:
3266 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3268 msg = all_nvinfo[node].fail_msg
3269 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3272 nimg.rpc_fail = True
3275 nresult = all_nvinfo[node].payload
3277 nimg.call_ok = self._VerifyNode(node_i, nresult)
3278 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3279 self._VerifyNodeNetwork(node_i, nresult)
3280 self._VerifyNodeUserScripts(node_i, nresult)
3281 self._VerifyOob(node_i, nresult)
3284 self._VerifyNodeLVM(node_i, nresult, vg_name)
3285 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3288 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3289 self._UpdateNodeInstances(node_i, nresult, nimg)
3290 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3291 self._UpdateNodeOS(node_i, nresult, nimg)
3293 if not nimg.os_fail:
3294 if refos_img is None:
3296 self._VerifyNodeOS(node_i, nimg, refos_img)
3297 self._VerifyNodeBridges(node_i, nresult, bridges)
3299 # Check whether all running instancies are primary for the node. (This
3300 # can no longer be done from _VerifyInstance below, since some of the
3301 # wrong instances could be from other node groups.)
3302 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3304 for inst in non_primary_inst:
3305 test = inst in self.all_inst_info
3306 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3307 "instance should not run on node %s", node_i.name)
3308 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3309 "node is running unknown instance %s", inst)
3311 for node, result in extra_lv_nvinfo.items():
3312 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3313 node_image[node], vg_name)
3315 feedback_fn("* Verifying instance status")
3316 for instance in self.my_inst_names:
3318 feedback_fn("* Verifying instance %s" % instance)
3319 inst_config = self.my_inst_info[instance]
3320 self._VerifyInstance(instance, inst_config, node_image,
3322 inst_nodes_offline = []
3324 pnode = inst_config.primary_node
3325 pnode_img = node_image[pnode]
3326 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3327 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3328 " primary node failed", instance)
3330 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3332 constants.CV_EINSTANCEBADNODE, instance,
3333 "instance is marked as running and lives on offline node %s",
3334 inst_config.primary_node)
3336 # If the instance is non-redundant we cannot survive losing its primary
3337 # node, so we are not N+1 compliant. On the other hand we have no disk
3338 # templates with more than one secondary so that situation is not well
3340 # FIXME: does not support file-backed instances
3341 if not inst_config.secondary_nodes:
3342 i_non_redundant.append(instance)
3344 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3345 constants.CV_EINSTANCELAYOUT,
3346 instance, "instance has multiple secondary nodes: %s",
3347 utils.CommaJoin(inst_config.secondary_nodes),
3348 code=self.ETYPE_WARNING)
3350 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3351 pnode = inst_config.primary_node
3352 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3353 instance_groups = {}
3355 for node in instance_nodes:
3356 instance_groups.setdefault(self.all_node_info[node].group,
3360 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3361 # Sort so that we always list the primary node first.
3362 for group, nodes in sorted(instance_groups.items(),
3363 key=lambda (_, nodes): pnode in nodes,
3366 self._ErrorIf(len(instance_groups) > 1,
3367 constants.CV_EINSTANCESPLITGROUPS,
3368 instance, "instance has primary and secondary nodes in"
3369 " different groups: %s", utils.CommaJoin(pretty_list),
3370 code=self.ETYPE_WARNING)
3372 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3373 i_non_a_balanced.append(instance)
3375 for snode in inst_config.secondary_nodes:
3376 s_img = node_image[snode]
3377 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3378 snode, "instance %s, connection to secondary node failed",
3382 inst_nodes_offline.append(snode)
3384 # warn that the instance lives on offline nodes
3385 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3386 "instance has offline secondary node(s) %s",
3387 utils.CommaJoin(inst_nodes_offline))
3388 # ... or ghost/non-vm_capable nodes
3389 for node in inst_config.all_nodes:
3390 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3391 instance, "instance lives on ghost node %s", node)
3392 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3393 instance, "instance lives on non-vm_capable node %s", node)
3395 feedback_fn("* Verifying orphan volumes")
3396 reserved = utils.FieldSet(*cluster.reserved_lvs)
3398 # We will get spurious "unknown volume" warnings if any node of this group
3399 # is secondary for an instance whose primary is in another group. To avoid
3400 # them, we find these instances and add their volumes to node_vol_should.
3401 for inst in self.all_inst_info.values():
3402 for secondary in inst.secondary_nodes:
3403 if (secondary in self.my_node_info
3404 and inst.name not in self.my_inst_info):
3405 inst.MapLVsByNode(node_vol_should)
3408 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3410 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3411 feedback_fn("* Verifying N+1 Memory redundancy")
3412 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3414 feedback_fn("* Other Notes")
3416 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3417 % len(i_non_redundant))
3419 if i_non_a_balanced:
3420 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3421 % len(i_non_a_balanced))
3424 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3427 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3430 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3434 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3435 """Analyze the post-hooks' result
3437 This method analyses the hook result, handles it, and sends some
3438 nicely-formatted feedback back to the user.
3440 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3441 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3442 @param hooks_results: the results of the multi-node hooks rpc call
3443 @param feedback_fn: function used send feedback back to the caller
3444 @param lu_result: previous Exec result
3445 @return: the new Exec result, based on the previous result
3449 # We only really run POST phase hooks, only for non-empty groups,
3450 # and are only interested in their results
3451 if not self.my_node_names:
3454 elif phase == constants.HOOKS_PHASE_POST:
3455 # Used to change hooks' output to proper indentation
3456 feedback_fn("* Hooks Results")
3457 assert hooks_results, "invalid result from hooks"
3459 for node_name in hooks_results:
3460 res = hooks_results[node_name]
3462 test = msg and not res.offline
3463 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3464 "Communication failure in hooks execution: %s", msg)
3465 if res.offline or msg:
3466 # No need to investigate payload if node is offline or gave
3469 for script, hkr, output in res.payload:
3470 test = hkr == constants.HKR_FAIL
3471 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3472 "Script %s failed, output:", script)
3474 output = self._HOOKS_INDENT_RE.sub(" ", output)
3475 feedback_fn("%s" % output)
3481 class LUClusterVerifyDisks(NoHooksLU):
3482 """Verifies the cluster disks status.
3487 def ExpandNames(self):
3488 self.share_locks = _ShareAll()
3489 self.needed_locks = {
3490 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3493 def Exec(self, feedback_fn):
3494 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3496 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3497 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3498 for group in group_names])
3501 class LUGroupVerifyDisks(NoHooksLU):
3502 """Verifies the status of all disks in a node group.
3507 def ExpandNames(self):
3508 # Raises errors.OpPrereqError on its own if group can't be found
3509 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3511 self.share_locks = _ShareAll()
3512 self.needed_locks = {
3513 locking.LEVEL_INSTANCE: [],
3514 locking.LEVEL_NODEGROUP: [],
3515 locking.LEVEL_NODE: [],
3518 def DeclareLocks(self, level):
3519 if level == locking.LEVEL_INSTANCE:
3520 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3522 # Lock instances optimistically, needs verification once node and group
3523 # locks have been acquired
3524 self.needed_locks[locking.LEVEL_INSTANCE] = \
3525 self.cfg.GetNodeGroupInstances(self.group_uuid)
3527 elif level == locking.LEVEL_NODEGROUP:
3528 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3530 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3531 set([self.group_uuid] +
3532 # Lock all groups used by instances optimistically; this requires
3533 # going via the node before it's locked, requiring verification
3536 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3537 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3539 elif level == locking.LEVEL_NODE:
3540 # This will only lock the nodes in the group to be verified which contain
3542 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3543 self._LockInstancesNodes()
3545 # Lock all nodes in group to be verified
3546 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3547 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3548 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3550 def CheckPrereq(self):
3551 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3552 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3553 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3555 assert self.group_uuid in owned_groups
3557 # Check if locked instances are still correct
3558 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3560 # Get instance information
3561 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3563 # Check if node groups for locked instances are still correct
3564 _CheckInstancesNodeGroups(self.cfg, self.instances,
3565 owned_groups, owned_nodes, self.group_uuid)
3567 def Exec(self, feedback_fn):
3568 """Verify integrity of cluster disks.
3570 @rtype: tuple of three items
3571 @return: a tuple of (dict of node-to-node_error, list of instances
3572 which need activate-disks, dict of instance: (node, volume) for
3577 res_instances = set()
3580 nv_dict = _MapInstanceDisksToNodes(
3581 [inst for inst in self.instances.values()
3582 if inst.admin_state == constants.ADMINST_UP])
3585 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3586 set(self.cfg.GetVmCapableNodeList()))
3588 node_lvs = self.rpc.call_lv_list(nodes, [])
3590 for (node, node_res) in node_lvs.items():
3591 if node_res.offline:
3594 msg = node_res.fail_msg
3596 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3597 res_nodes[node] = msg
3600 for lv_name, (_, _, lv_online) in node_res.payload.items():
3601 inst = nv_dict.pop((node, lv_name), None)
3602 if not (lv_online or inst is None):
3603 res_instances.add(inst)
3605 # any leftover items in nv_dict are missing LVs, let's arrange the data
3607 for key, inst in nv_dict.iteritems():
3608 res_missing.setdefault(inst, []).append(list(key))
3610 return (res_nodes, list(res_instances), res_missing)
3613 class LUClusterRepairDiskSizes(NoHooksLU):
3614 """Verifies the cluster disks sizes.
3619 def ExpandNames(self):
3620 if self.op.instances:
3621 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3622 self.needed_locks = {
3623 locking.LEVEL_NODE_RES: [],
3624 locking.LEVEL_INSTANCE: self.wanted_names,
3626 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3628 self.wanted_names = None
3629 self.needed_locks = {
3630 locking.LEVEL_NODE_RES: locking.ALL_SET,
3631 locking.LEVEL_INSTANCE: locking.ALL_SET,
3633 self.share_locks = {
3634 locking.LEVEL_NODE_RES: 1,
3635 locking.LEVEL_INSTANCE: 0,
3638 def DeclareLocks(self, level):
3639 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3640 self._LockInstancesNodes(primary_only=True, level=level)
3642 def CheckPrereq(self):
3643 """Check prerequisites.
3645 This only checks the optional instance list against the existing names.
3648 if self.wanted_names is None:
3649 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3651 self.wanted_instances = \
3652 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3654 def _EnsureChildSizes(self, disk):
3655 """Ensure children of the disk have the needed disk size.
3657 This is valid mainly for DRBD8 and fixes an issue where the
3658 children have smaller disk size.
3660 @param disk: an L{ganeti.objects.Disk} object
3663 if disk.dev_type == constants.LD_DRBD8:
3664 assert disk.children, "Empty children for DRBD8?"
3665 fchild = disk.children[0]
3666 mismatch = fchild.size < disk.size
3668 self.LogInfo("Child disk has size %d, parent %d, fixing",
3669 fchild.size, disk.size)
3670 fchild.size = disk.size
3672 # and we recurse on this child only, not on the metadev
3673 return self._EnsureChildSizes(fchild) or mismatch
3677 def Exec(self, feedback_fn):
3678 """Verify the size of cluster disks.
3681 # TODO: check child disks too
3682 # TODO: check differences in size between primary/secondary nodes
3684 for instance in self.wanted_instances:
3685 pnode = instance.primary_node
3686 if pnode not in per_node_disks:
3687 per_node_disks[pnode] = []
3688 for idx, disk in enumerate(instance.disks):
3689 per_node_disks[pnode].append((instance, idx, disk))
3691 assert not (frozenset(per_node_disks.keys()) -
3692 self.owned_locks(locking.LEVEL_NODE_RES)), \
3693 "Not owning correct locks"
3694 assert not self.owned_locks(locking.LEVEL_NODE)
3697 for node, dskl in per_node_disks.items():
3698 newl = [v[2].Copy() for v in dskl]
3700 self.cfg.SetDiskID(dsk, node)
3701 result = self.rpc.call_blockdev_getsize(node, newl)
3703 self.LogWarning("Failure in blockdev_getsize call to node"
3704 " %s, ignoring", node)
3706 if len(result.payload) != len(dskl):
3707 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3708 " result.payload=%s", node, len(dskl), result.payload)
3709 self.LogWarning("Invalid result from node %s, ignoring node results",
3712 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3714 self.LogWarning("Disk %d of instance %s did not return size"
3715 " information, ignoring", idx, instance.name)
3717 if not isinstance(size, (int, long)):
3718 self.LogWarning("Disk %d of instance %s did not return valid"
3719 " size information, ignoring", idx, instance.name)
3722 if size != disk.size:
3723 self.LogInfo("Disk %d of instance %s has mismatched size,"
3724 " correcting: recorded %d, actual %d", idx,
3725 instance.name, disk.size, size)
3727 self.cfg.Update(instance, feedback_fn)
3728 changed.append((instance.name, idx, size))
3729 if self._EnsureChildSizes(disk):
3730 self.cfg.Update(instance, feedback_fn)
3731 changed.append((instance.name, idx, disk.size))
3735 class LUClusterRename(LogicalUnit):
3736 """Rename the cluster.
3739 HPATH = "cluster-rename"
3740 HTYPE = constants.HTYPE_CLUSTER
3742 def BuildHooksEnv(self):
3747 "OP_TARGET": self.cfg.GetClusterName(),
3748 "NEW_NAME": self.op.name,
3751 def BuildHooksNodes(self):
3752 """Build hooks nodes.
3755 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3757 def CheckPrereq(self):
3758 """Verify that the passed name is a valid one.
3761 hostname = netutils.GetHostname(name=self.op.name,
3762 family=self.cfg.GetPrimaryIPFamily())
3764 new_name = hostname.name
3765 self.ip = new_ip = hostname.ip
3766 old_name = self.cfg.GetClusterName()
3767 old_ip = self.cfg.GetMasterIP()
3768 if new_name == old_name and new_ip == old_ip:
3769 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3770 " cluster has changed",
3772 if new_ip != old_ip:
3773 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3774 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3775 " reachable on the network" %
3776 new_ip, errors.ECODE_NOTUNIQUE)
3778 self.op.name = new_name
3780 def Exec(self, feedback_fn):
3781 """Rename the cluster.
3784 clustername = self.op.name
3787 # shutdown the master IP
3788 master_params = self.cfg.GetMasterNetworkParameters()
3789 ems = self.cfg.GetUseExternalMipScript()
3790 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3792 result.Raise("Could not disable the master role")
3795 cluster = self.cfg.GetClusterInfo()
3796 cluster.cluster_name = clustername
3797 cluster.master_ip = new_ip
3798 self.cfg.Update(cluster, feedback_fn)
3800 # update the known hosts file
3801 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3802 node_list = self.cfg.GetOnlineNodeList()
3804 node_list.remove(master_params.name)
3807 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3809 master_params.ip = new_ip
3810 result = self.rpc.call_node_activate_master_ip(master_params.name,
3812 msg = result.fail_msg
3814 self.LogWarning("Could not re-enable the master role on"
3815 " the master, please restart manually: %s", msg)
3820 def _ValidateNetmask(cfg, netmask):
3821 """Checks if a netmask is valid.
3823 @type cfg: L{config.ConfigWriter}
3824 @param cfg: The cluster configuration
3826 @param netmask: the netmask to be verified
3827 @raise errors.OpPrereqError: if the validation fails
3830 ip_family = cfg.GetPrimaryIPFamily()
3832 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3833 except errors.ProgrammerError:
3834 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3835 ip_family, errors.ECODE_INVAL)
3836 if not ipcls.ValidateNetmask(netmask):
3837 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3838 (netmask), errors.ECODE_INVAL)
3841 class LUClusterSetParams(LogicalUnit):
3842 """Change the parameters of the cluster.
3845 HPATH = "cluster-modify"
3846 HTYPE = constants.HTYPE_CLUSTER
3849 def CheckArguments(self):
3853 if self.op.uid_pool:
3854 uidpool.CheckUidPool(self.op.uid_pool)
3856 if self.op.add_uids:
3857 uidpool.CheckUidPool(self.op.add_uids)
3859 if self.op.remove_uids:
3860 uidpool.CheckUidPool(self.op.remove_uids)
3862 if self.op.master_netmask is not None:
3863 _ValidateNetmask(self.cfg, self.op.master_netmask)
3865 if self.op.diskparams:
3866 for dt_params in self.op.diskparams.values():
3867 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3869 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3870 except errors.OpPrereqError, err:
3871 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3874 def ExpandNames(self):
3875 # FIXME: in the future maybe other cluster params won't require checking on
3876 # all nodes to be modified.
3877 self.needed_locks = {
3878 locking.LEVEL_NODE: locking.ALL_SET,
3879 locking.LEVEL_INSTANCE: locking.ALL_SET,
3880 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3882 self.share_locks = {
3883 locking.LEVEL_NODE: 1,
3884 locking.LEVEL_INSTANCE: 1,
3885 locking.LEVEL_NODEGROUP: 1,
3888 def BuildHooksEnv(self):
3893 "OP_TARGET": self.cfg.GetClusterName(),
3894 "NEW_VG_NAME": self.op.vg_name,
3897 def BuildHooksNodes(self):
3898 """Build hooks nodes.
3901 mn = self.cfg.GetMasterNode()
3904 def CheckPrereq(self):
3905 """Check prerequisites.
3907 This checks whether the given params don't conflict and
3908 if the given volume group is valid.
3911 if self.op.vg_name is not None and not self.op.vg_name:
3912 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3913 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3914 " instances exist", errors.ECODE_INVAL)
3916 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3917 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3918 raise errors.OpPrereqError("Cannot disable drbd helper while"
3919 " drbd-based instances exist",
3922 node_list = self.owned_locks(locking.LEVEL_NODE)
3924 # if vg_name not None, checks given volume group on all nodes
3926 vglist = self.rpc.call_vg_list(node_list)
3927 for node in node_list:
3928 msg = vglist[node].fail_msg
3930 # ignoring down node
3931 self.LogWarning("Error while gathering data on node %s"
3932 " (ignoring node): %s", node, msg)
3934 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3936 constants.MIN_VG_SIZE)
3938 raise errors.OpPrereqError("Error on node '%s': %s" %
3939 (node, vgstatus), errors.ECODE_ENVIRON)
3941 if self.op.drbd_helper:
3942 # checks given drbd helper on all nodes
3943 helpers = self.rpc.call_drbd_helper(node_list)
3944 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3946 self.LogInfo("Not checking drbd helper on offline node %s", node)
3948 msg = helpers[node].fail_msg
3950 raise errors.OpPrereqError("Error checking drbd helper on node"
3951 " '%s': %s" % (node, msg),
3952 errors.ECODE_ENVIRON)
3953 node_helper = helpers[node].payload
3954 if node_helper != self.op.drbd_helper:
3955 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3956 (node, node_helper), errors.ECODE_ENVIRON)
3958 self.cluster = cluster = self.cfg.GetClusterInfo()
3959 # validate params changes
3960 if self.op.beparams:
3961 objects.UpgradeBeParams(self.op.beparams)
3962 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3963 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3965 if self.op.ndparams:
3966 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3967 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3969 # TODO: we need a more general way to handle resetting
3970 # cluster-level parameters to default values
3971 if self.new_ndparams["oob_program"] == "":
3972 self.new_ndparams["oob_program"] = \
3973 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3975 if self.op.hv_state:
3976 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3977 self.cluster.hv_state_static)
3978 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3979 for hv, values in new_hv_state.items())
3981 if self.op.disk_state:
3982 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3983 self.cluster.disk_state_static)
3984 self.new_disk_state = \
3985 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3986 for name, values in svalues.items()))
3987 for storage, svalues in new_disk_state.items())
3990 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3993 all_instances = self.cfg.GetAllInstancesInfo().values()
3995 for group in self.cfg.GetAllNodeGroupsInfo().values():
3996 instances = frozenset([inst for inst in all_instances
3997 if compat.any(node in group.members
3998 for node in inst.all_nodes)])
3999 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4000 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4001 new = _ComputeNewInstanceViolations(ipol,
4002 new_ipolicy, instances)
4004 violations.update(new)
4007 self.LogWarning("After the ipolicy change the following instances"
4008 " violate them: %s",
4009 utils.CommaJoin(utils.NiceSort(violations)))
4011 if self.op.nicparams:
4012 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4013 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4014 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4017 # check all instances for consistency
4018 for instance in self.cfg.GetAllInstancesInfo().values():
4019 for nic_idx, nic in enumerate(instance.nics):
4020 params_copy = copy.deepcopy(nic.nicparams)
4021 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4023 # check parameter syntax
4025 objects.NIC.CheckParameterSyntax(params_filled)
4026 except errors.ConfigurationError, err:
4027 nic_errors.append("Instance %s, nic/%d: %s" %
4028 (instance.name, nic_idx, err))
4030 # if we're moving instances to routed, check that they have an ip
4031 target_mode = params_filled[constants.NIC_MODE]
4032 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4033 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4034 " address" % (instance.name, nic_idx))
4036 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4037 "\n".join(nic_errors), errors.ECODE_INVAL)
4039 # hypervisor list/parameters
4040 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4041 if self.op.hvparams:
4042 for hv_name, hv_dict in self.op.hvparams.items():
4043 if hv_name not in self.new_hvparams:
4044 self.new_hvparams[hv_name] = hv_dict
4046 self.new_hvparams[hv_name].update(hv_dict)
4048 # disk template parameters
4049 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4050 if self.op.diskparams:
4051 for dt_name, dt_params in self.op.diskparams.items():
4052 if dt_name not in self.op.diskparams:
4053 self.new_diskparams[dt_name] = dt_params
4055 self.new_diskparams[dt_name].update(dt_params)
4057 # os hypervisor parameters
4058 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4060 for os_name, hvs in self.op.os_hvp.items():
4061 if os_name not in self.new_os_hvp:
4062 self.new_os_hvp[os_name] = hvs
4064 for hv_name, hv_dict in hvs.items():
4065 if hv_name not in self.new_os_hvp[os_name]:
4066 self.new_os_hvp[os_name][hv_name] = hv_dict
4068 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4071 self.new_osp = objects.FillDict(cluster.osparams, {})
4072 if self.op.osparams:
4073 for os_name, osp in self.op.osparams.items():
4074 if os_name not in self.new_osp:
4075 self.new_osp[os_name] = {}
4077 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4080 if not self.new_osp[os_name]:
4081 # we removed all parameters
4082 del self.new_osp[os_name]
4084 # check the parameter validity (remote check)
4085 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4086 os_name, self.new_osp[os_name])
4088 # changes to the hypervisor list
4089 if self.op.enabled_hypervisors is not None:
4090 self.hv_list = self.op.enabled_hypervisors
4091 for hv in self.hv_list:
4092 # if the hypervisor doesn't already exist in the cluster
4093 # hvparams, we initialize it to empty, and then (in both
4094 # cases) we make sure to fill the defaults, as we might not
4095 # have a complete defaults list if the hypervisor wasn't
4097 if hv not in new_hvp:
4099 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4100 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4102 self.hv_list = cluster.enabled_hypervisors
4104 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4105 # either the enabled list has changed, or the parameters have, validate
4106 for hv_name, hv_params in self.new_hvparams.items():
4107 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4108 (self.op.enabled_hypervisors and
4109 hv_name in self.op.enabled_hypervisors)):
4110 # either this is a new hypervisor, or its parameters have changed
4111 hv_class = hypervisor.GetHypervisor(hv_name)
4112 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4113 hv_class.CheckParameterSyntax(hv_params)
4114 _CheckHVParams(self, node_list, hv_name, hv_params)
4117 # no need to check any newly-enabled hypervisors, since the
4118 # defaults have already been checked in the above code-block
4119 for os_name, os_hvp in self.new_os_hvp.items():
4120 for hv_name, hv_params in os_hvp.items():
4121 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4122 # we need to fill in the new os_hvp on top of the actual hv_p
4123 cluster_defaults = self.new_hvparams.get(hv_name, {})
4124 new_osp = objects.FillDict(cluster_defaults, hv_params)
4125 hv_class = hypervisor.GetHypervisor(hv_name)
4126 hv_class.CheckParameterSyntax(new_osp)
4127 _CheckHVParams(self, node_list, hv_name, new_osp)
4129 if self.op.default_iallocator:
4130 alloc_script = utils.FindFile(self.op.default_iallocator,
4131 constants.IALLOCATOR_SEARCH_PATH,
4133 if alloc_script is None:
4134 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4135 " specified" % self.op.default_iallocator,
4138 def Exec(self, feedback_fn):
4139 """Change the parameters of the cluster.
4142 if self.op.vg_name is not None:
4143 new_volume = self.op.vg_name
4146 if new_volume != self.cfg.GetVGName():
4147 self.cfg.SetVGName(new_volume)
4149 feedback_fn("Cluster LVM configuration already in desired"
4150 " state, not changing")
4151 if self.op.drbd_helper is not None:
4152 new_helper = self.op.drbd_helper
4155 if new_helper != self.cfg.GetDRBDHelper():
4156 self.cfg.SetDRBDHelper(new_helper)
4158 feedback_fn("Cluster DRBD helper already in desired state,"
4160 if self.op.hvparams:
4161 self.cluster.hvparams = self.new_hvparams
4163 self.cluster.os_hvp = self.new_os_hvp
4164 if self.op.enabled_hypervisors is not None:
4165 self.cluster.hvparams = self.new_hvparams
4166 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4167 if self.op.beparams:
4168 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4169 if self.op.nicparams:
4170 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4172 self.cluster.ipolicy = self.new_ipolicy
4173 if self.op.osparams:
4174 self.cluster.osparams = self.new_osp
4175 if self.op.ndparams:
4176 self.cluster.ndparams = self.new_ndparams
4177 if self.op.diskparams:
4178 self.cluster.diskparams = self.new_diskparams
4179 if self.op.hv_state:
4180 self.cluster.hv_state_static = self.new_hv_state
4181 if self.op.disk_state:
4182 self.cluster.disk_state_static = self.new_disk_state
4184 if self.op.candidate_pool_size is not None:
4185 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4186 # we need to update the pool size here, otherwise the save will fail
4187 _AdjustCandidatePool(self, [])
4189 if self.op.maintain_node_health is not None:
4190 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4191 feedback_fn("Note: CONFD was disabled at build time, node health"
4192 " maintenance is not useful (still enabling it)")
4193 self.cluster.maintain_node_health = self.op.maintain_node_health
4195 if self.op.prealloc_wipe_disks is not None:
4196 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4198 if self.op.add_uids is not None:
4199 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4201 if self.op.remove_uids is not None:
4202 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4204 if self.op.uid_pool is not None:
4205 self.cluster.uid_pool = self.op.uid_pool
4207 if self.op.default_iallocator is not None:
4208 self.cluster.default_iallocator = self.op.default_iallocator
4210 if self.op.reserved_lvs is not None:
4211 self.cluster.reserved_lvs = self.op.reserved_lvs
4213 if self.op.use_external_mip_script is not None:
4214 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4216 def helper_os(aname, mods, desc):
4218 lst = getattr(self.cluster, aname)
4219 for key, val in mods:
4220 if key == constants.DDM_ADD:
4222 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4225 elif key == constants.DDM_REMOVE:
4229 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4231 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4233 if self.op.hidden_os:
4234 helper_os("hidden_os", self.op.hidden_os, "hidden")
4236 if self.op.blacklisted_os:
4237 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4239 if self.op.master_netdev:
4240 master_params = self.cfg.GetMasterNetworkParameters()
4241 ems = self.cfg.GetUseExternalMipScript()
4242 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4243 self.cluster.master_netdev)
4244 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4246 result.Raise("Could not disable the master ip")
4247 feedback_fn("Changing master_netdev from %s to %s" %
4248 (master_params.netdev, self.op.master_netdev))
4249 self.cluster.master_netdev = self.op.master_netdev
4251 if self.op.master_netmask:
4252 master_params = self.cfg.GetMasterNetworkParameters()
4253 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4254 result = self.rpc.call_node_change_master_netmask(master_params.name,
4255 master_params.netmask,
4256 self.op.master_netmask,
4258 master_params.netdev)
4260 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4263 self.cluster.master_netmask = self.op.master_netmask
4265 self.cfg.Update(self.cluster, feedback_fn)
4267 if self.op.master_netdev:
4268 master_params = self.cfg.GetMasterNetworkParameters()
4269 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4270 self.op.master_netdev)
4271 ems = self.cfg.GetUseExternalMipScript()
4272 result = self.rpc.call_node_activate_master_ip(master_params.name,
4275 self.LogWarning("Could not re-enable the master ip on"
4276 " the master, please restart manually: %s",
4280 def _UploadHelper(lu, nodes, fname):
4281 """Helper for uploading a file and showing warnings.
4284 if os.path.exists(fname):
4285 result = lu.rpc.call_upload_file(nodes, fname)
4286 for to_node, to_result in result.items():
4287 msg = to_result.fail_msg
4289 msg = ("Copy of file %s to node %s failed: %s" %
4290 (fname, to_node, msg))
4291 lu.proc.LogWarning(msg)
4294 def _ComputeAncillaryFiles(cluster, redist):
4295 """Compute files external to Ganeti which need to be consistent.
4297 @type redist: boolean
4298 @param redist: Whether to include files which need to be redistributed
4301 # Compute files for all nodes
4303 pathutils.SSH_KNOWN_HOSTS_FILE,
4304 pathutils.CONFD_HMAC_KEY,
4305 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4306 pathutils.SPICE_CERT_FILE,
4307 pathutils.SPICE_CACERT_FILE,
4308 pathutils.RAPI_USERS_FILE,
4312 # we need to ship at least the RAPI certificate
4313 files_all.add(pathutils.RAPI_CERT_FILE)
4315 files_all.update(pathutils.ALL_CERT_FILES)
4316 files_all.update(ssconf.SimpleStore().GetFileList())
4318 if cluster.modify_etc_hosts:
4319 files_all.add(pathutils.ETC_HOSTS)
4321 if cluster.use_external_mip_script:
4322 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4324 # Files which are optional, these must:
4325 # - be present in one other category as well
4326 # - either exist or not exist on all nodes of that category (mc, vm all)
4328 pathutils.RAPI_USERS_FILE,
4331 # Files which should only be on master candidates
4335 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4339 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4340 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4341 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4343 # Files which should only be on VM-capable nodes
4346 for hv_name in cluster.enabled_hypervisors
4347 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4351 for hv_name in cluster.enabled_hypervisors
4352 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4354 # Filenames in each category must be unique
4355 all_files_set = files_all | files_mc | files_vm
4356 assert (len(all_files_set) ==
4357 sum(map(len, [files_all, files_mc, files_vm]))), \
4358 "Found file listed in more than one file list"
4360 # Optional files must be present in one other category
4361 assert all_files_set.issuperset(files_opt), \
4362 "Optional file not in a different required list"
4364 # This one file should never ever be re-distributed via RPC
4365 assert not (redist and
4366 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4368 return (files_all, files_opt, files_mc, files_vm)
4371 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4372 """Distribute additional files which are part of the cluster configuration.
4374 ConfigWriter takes care of distributing the config and ssconf files, but
4375 there are more files which should be distributed to all nodes. This function
4376 makes sure those are copied.
4378 @param lu: calling logical unit
4379 @param additional_nodes: list of nodes not in the config to distribute to
4380 @type additional_vm: boolean
4381 @param additional_vm: whether the additional nodes are vm-capable or not
4384 # Gather target nodes
4385 cluster = lu.cfg.GetClusterInfo()
4386 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4388 online_nodes = lu.cfg.GetOnlineNodeList()
4389 online_set = frozenset(online_nodes)
4390 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4392 if additional_nodes is not None:
4393 online_nodes.extend(additional_nodes)
4395 vm_nodes.extend(additional_nodes)
4397 # Never distribute to master node
4398 for nodelist in [online_nodes, vm_nodes]:
4399 if master_info.name in nodelist:
4400 nodelist.remove(master_info.name)
4403 (files_all, _, files_mc, files_vm) = \
4404 _ComputeAncillaryFiles(cluster, True)
4406 # Never re-distribute configuration file from here
4407 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4408 pathutils.CLUSTER_CONF_FILE in files_vm)
4409 assert not files_mc, "Master candidates not handled in this function"
4412 (online_nodes, files_all),
4413 (vm_nodes, files_vm),
4417 for (node_list, files) in filemap:
4419 _UploadHelper(lu, node_list, fname)
4422 class LUClusterRedistConf(NoHooksLU):
4423 """Force the redistribution of cluster configuration.
4425 This is a very simple LU.
4430 def ExpandNames(self):
4431 self.needed_locks = {
4432 locking.LEVEL_NODE: locking.ALL_SET,
4434 self.share_locks[locking.LEVEL_NODE] = 1
4436 def Exec(self, feedback_fn):
4437 """Redistribute the configuration.
4440 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4441 _RedistributeAncillaryFiles(self)
4444 class LUClusterActivateMasterIp(NoHooksLU):
4445 """Activate the master IP on the master node.
4448 def Exec(self, feedback_fn):
4449 """Activate the master IP.
4452 master_params = self.cfg.GetMasterNetworkParameters()
4453 ems = self.cfg.GetUseExternalMipScript()
4454 result = self.rpc.call_node_activate_master_ip(master_params.name,
4456 result.Raise("Could not activate the master IP")
4459 class LUClusterDeactivateMasterIp(NoHooksLU):
4460 """Deactivate the master IP on the master node.
4463 def Exec(self, feedback_fn):
4464 """Deactivate the master IP.
4467 master_params = self.cfg.GetMasterNetworkParameters()
4468 ems = self.cfg.GetUseExternalMipScript()
4469 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4471 result.Raise("Could not deactivate the master IP")
4474 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4475 """Sleep and poll for an instance's disk to sync.
4478 if not instance.disks or disks is not None and not disks:
4481 disks = _ExpandCheckDisks(instance, disks)
4484 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4486 node = instance.primary_node
4489 lu.cfg.SetDiskID(dev, node)
4491 # TODO: Convert to utils.Retry
4494 degr_retries = 10 # in seconds, as we sleep 1 second each time
4498 cumul_degraded = False
4499 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4500 msg = rstats.fail_msg
4502 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4505 raise errors.RemoteError("Can't contact node %s for mirror data,"
4506 " aborting." % node)
4509 rstats = rstats.payload
4511 for i, mstat in enumerate(rstats):
4513 lu.LogWarning("Can't compute data for node %s/%s",
4514 node, disks[i].iv_name)
4517 cumul_degraded = (cumul_degraded or
4518 (mstat.is_degraded and mstat.sync_percent is None))
4519 if mstat.sync_percent is not None:
4521 if mstat.estimated_time is not None:
4522 rem_time = ("%s remaining (estimated)" %
4523 utils.FormatSeconds(mstat.estimated_time))
4524 max_time = mstat.estimated_time
4526 rem_time = "no time estimate"
4527 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4528 (disks[i].iv_name, mstat.sync_percent, rem_time))
4530 # if we're done but degraded, let's do a few small retries, to
4531 # make sure we see a stable and not transient situation; therefore
4532 # we force restart of the loop
4533 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4534 logging.info("Degraded disks found, %d retries left", degr_retries)
4542 time.sleep(min(60, max_time))
4545 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4546 return not cumul_degraded
4549 def _BlockdevFind(lu, node, dev, instance):
4550 """Wrapper around call_blockdev_find to annotate diskparams.
4552 @param lu: A reference to the lu object
4553 @param node: The node to call out
4554 @param dev: The device to find
4555 @param instance: The instance object the device belongs to
4556 @returns The result of the rpc call
4559 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4560 return lu.rpc.call_blockdev_find(node, disk)
4563 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4564 """Wrapper around L{_CheckDiskConsistencyInner}.
4567 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4568 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4572 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4574 """Check that mirrors are not degraded.
4576 @attention: The device has to be annotated already.
4578 The ldisk parameter, if True, will change the test from the
4579 is_degraded attribute (which represents overall non-ok status for
4580 the device(s)) to the ldisk (representing the local storage status).
4583 lu.cfg.SetDiskID(dev, node)
4587 if on_primary or dev.AssembleOnSecondary():
4588 rstats = lu.rpc.call_blockdev_find(node, dev)
4589 msg = rstats.fail_msg
4591 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4593 elif not rstats.payload:
4594 lu.LogWarning("Can't find disk on node %s", node)
4598 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4600 result = result and not rstats.payload.is_degraded
4603 for child in dev.children:
4604 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4610 class LUOobCommand(NoHooksLU):
4611 """Logical unit for OOB handling.
4615 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4617 def ExpandNames(self):
4618 """Gather locks we need.
4621 if self.op.node_names:
4622 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4623 lock_names = self.op.node_names
4625 lock_names = locking.ALL_SET
4627 self.needed_locks = {
4628 locking.LEVEL_NODE: lock_names,
4631 def CheckPrereq(self):
4632 """Check prerequisites.
4635 - the node exists in the configuration
4638 Any errors are signaled by raising errors.OpPrereqError.
4642 self.master_node = self.cfg.GetMasterNode()
4644 assert self.op.power_delay >= 0.0
4646 if self.op.node_names:
4647 if (self.op.command in self._SKIP_MASTER and
4648 self.master_node in self.op.node_names):
4649 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4650 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4652 if master_oob_handler:
4653 additional_text = ("run '%s %s %s' if you want to operate on the"
4654 " master regardless") % (master_oob_handler,
4658 additional_text = "it does not support out-of-band operations"
4660 raise errors.OpPrereqError(("Operating on the master node %s is not"
4661 " allowed for %s; %s") %
4662 (self.master_node, self.op.command,
4663 additional_text), errors.ECODE_INVAL)
4665 self.op.node_names = self.cfg.GetNodeList()
4666 if self.op.command in self._SKIP_MASTER:
4667 self.op.node_names.remove(self.master_node)
4669 if self.op.command in self._SKIP_MASTER:
4670 assert self.master_node not in self.op.node_names
4672 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4674 raise errors.OpPrereqError("Node %s not found" % node_name,
4677 self.nodes.append(node)
4679 if (not self.op.ignore_status and
4680 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4681 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4682 " not marked offline") % node_name,
4685 def Exec(self, feedback_fn):
4686 """Execute OOB and return result if we expect any.
4689 master_node = self.master_node
4692 for idx, node in enumerate(utils.NiceSort(self.nodes,
4693 key=lambda node: node.name)):
4694 node_entry = [(constants.RS_NORMAL, node.name)]
4695 ret.append(node_entry)
4697 oob_program = _SupportsOob(self.cfg, node)
4700 node_entry.append((constants.RS_UNAVAIL, None))
4703 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4704 self.op.command, oob_program, node.name)
4705 result = self.rpc.call_run_oob(master_node, oob_program,
4706 self.op.command, node.name,
4710 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4711 node.name, result.fail_msg)
4712 node_entry.append((constants.RS_NODATA, None))
4715 self._CheckPayload(result)
4716 except errors.OpExecError, err:
4717 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4719 node_entry.append((constants.RS_NODATA, None))
4721 if self.op.command == constants.OOB_HEALTH:
4722 # For health we should log important events
4723 for item, status in result.payload:
4724 if status in [constants.OOB_STATUS_WARNING,
4725 constants.OOB_STATUS_CRITICAL]:
4726 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4727 item, node.name, status)
4729 if self.op.command == constants.OOB_POWER_ON:
4731 elif self.op.command == constants.OOB_POWER_OFF:
4732 node.powered = False
4733 elif self.op.command == constants.OOB_POWER_STATUS:
4734 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4735 if powered != node.powered:
4736 logging.warning(("Recorded power state (%s) of node '%s' does not"
4737 " match actual power state (%s)"), node.powered,
4740 # For configuration changing commands we should update the node
4741 if self.op.command in (constants.OOB_POWER_ON,
4742 constants.OOB_POWER_OFF):
4743 self.cfg.Update(node, feedback_fn)
4745 node_entry.append((constants.RS_NORMAL, result.payload))
4747 if (self.op.command == constants.OOB_POWER_ON and
4748 idx < len(self.nodes) - 1):
4749 time.sleep(self.op.power_delay)
4753 def _CheckPayload(self, result):
4754 """Checks if the payload is valid.
4756 @param result: RPC result
4757 @raises errors.OpExecError: If payload is not valid
4761 if self.op.command == constants.OOB_HEALTH:
4762 if not isinstance(result.payload, list):
4763 errs.append("command 'health' is expected to return a list but got %s" %
4764 type(result.payload))
4766 for item, status in result.payload:
4767 if status not in constants.OOB_STATUSES:
4768 errs.append("health item '%s' has invalid status '%s'" %
4771 if self.op.command == constants.OOB_POWER_STATUS:
4772 if not isinstance(result.payload, dict):
4773 errs.append("power-status is expected to return a dict but got %s" %
4774 type(result.payload))
4776 if self.op.command in [
4777 constants.OOB_POWER_ON,
4778 constants.OOB_POWER_OFF,
4779 constants.OOB_POWER_CYCLE,
4781 if result.payload is not None:
4782 errs.append("%s is expected to not return payload but got '%s'" %
4783 (self.op.command, result.payload))
4786 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4787 utils.CommaJoin(errs))
4790 class _OsQuery(_QueryBase):
4791 FIELDS = query.OS_FIELDS
4793 def ExpandNames(self, lu):
4794 # Lock all nodes in shared mode
4795 # Temporary removal of locks, should be reverted later
4796 # TODO: reintroduce locks when they are lighter-weight
4797 lu.needed_locks = {}
4798 #self.share_locks[locking.LEVEL_NODE] = 1
4799 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4801 # The following variables interact with _QueryBase._GetNames
4803 self.wanted = self.names
4805 self.wanted = locking.ALL_SET
4807 self.do_locking = self.use_locking
4809 def DeclareLocks(self, lu, level):
4813 def _DiagnoseByOS(rlist):
4814 """Remaps a per-node return list into an a per-os per-node dictionary
4816 @param rlist: a map with node names as keys and OS objects as values
4819 @return: a dictionary with osnames as keys and as value another
4820 map, with nodes as keys and tuples of (path, status, diagnose,
4821 variants, parameters, api_versions) as values, eg::
4823 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4824 (/srv/..., False, "invalid api")],
4825 "node2": [(/srv/..., True, "", [], [])]}
4830 # we build here the list of nodes that didn't fail the RPC (at RPC
4831 # level), so that nodes with a non-responding node daemon don't
4832 # make all OSes invalid
4833 good_nodes = [node_name for node_name in rlist
4834 if not rlist[node_name].fail_msg]
4835 for node_name, nr in rlist.items():
4836 if nr.fail_msg or not nr.payload:
4838 for (name, path, status, diagnose, variants,
4839 params, api_versions) in nr.payload:
4840 if name not in all_os:
4841 # build a list of nodes for this os containing empty lists
4842 # for each node in node_list
4844 for nname in good_nodes:
4845 all_os[name][nname] = []
4846 # convert params from [name, help] to (name, help)
4847 params = [tuple(v) for v in params]
4848 all_os[name][node_name].append((path, status, diagnose,
4849 variants, params, api_versions))
4852 def _GetQueryData(self, lu):
4853 """Computes the list of nodes and their attributes.
4856 # Locking is not used
4857 assert not (compat.any(lu.glm.is_owned(level)
4858 for level in locking.LEVELS
4859 if level != locking.LEVEL_CLUSTER) or
4860 self.do_locking or self.use_locking)
4862 valid_nodes = [node.name
4863 for node in lu.cfg.GetAllNodesInfo().values()
4864 if not node.offline and node.vm_capable]
4865 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4866 cluster = lu.cfg.GetClusterInfo()
4870 for (os_name, os_data) in pol.items():
4871 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4872 hidden=(os_name in cluster.hidden_os),
4873 blacklisted=(os_name in cluster.blacklisted_os))
4877 api_versions = set()
4879 for idx, osl in enumerate(os_data.values()):
4880 info.valid = bool(info.valid and osl and osl[0][1])
4884 (node_variants, node_params, node_api) = osl[0][3:6]
4887 variants.update(node_variants)
4888 parameters.update(node_params)
4889 api_versions.update(node_api)
4891 # Filter out inconsistent values
4892 variants.intersection_update(node_variants)
4893 parameters.intersection_update(node_params)
4894 api_versions.intersection_update(node_api)
4896 info.variants = list(variants)
4897 info.parameters = list(parameters)
4898 info.api_versions = list(api_versions)
4900 data[os_name] = info
4902 # Prepare data in requested order
4903 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4907 class LUOsDiagnose(NoHooksLU):
4908 """Logical unit for OS diagnose/query.
4914 def _BuildFilter(fields, names):
4915 """Builds a filter for querying OSes.
4918 name_filter = qlang.MakeSimpleFilter("name", names)
4920 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4921 # respective field is not requested
4922 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4923 for fname in ["hidden", "blacklisted"]
4924 if fname not in fields]
4925 if "valid" not in fields:
4926 status_filter.append([qlang.OP_TRUE, "valid"])
4929 status_filter.insert(0, qlang.OP_AND)
4931 status_filter = None
4933 if name_filter and status_filter:
4934 return [qlang.OP_AND, name_filter, status_filter]
4938 return status_filter
4940 def CheckArguments(self):
4941 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4942 self.op.output_fields, False)
4944 def ExpandNames(self):
4945 self.oq.ExpandNames(self)
4947 def Exec(self, feedback_fn):
4948 return self.oq.OldStyleQuery(self)
4951 class LUNodeRemove(LogicalUnit):
4952 """Logical unit for removing a node.
4955 HPATH = "node-remove"
4956 HTYPE = constants.HTYPE_NODE
4958 def BuildHooksEnv(self):
4963 "OP_TARGET": self.op.node_name,
4964 "NODE_NAME": self.op.node_name,
4967 def BuildHooksNodes(self):
4968 """Build hooks nodes.
4970 This doesn't run on the target node in the pre phase as a failed
4971 node would then be impossible to remove.
4974 all_nodes = self.cfg.GetNodeList()
4976 all_nodes.remove(self.op.node_name)
4979 return (all_nodes, all_nodes)
4981 def CheckPrereq(self):
4982 """Check prerequisites.
4985 - the node exists in the configuration
4986 - it does not have primary or secondary instances
4987 - it's not the master
4989 Any errors are signaled by raising errors.OpPrereqError.
4992 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4993 node = self.cfg.GetNodeInfo(self.op.node_name)
4994 assert node is not None
4996 masternode = self.cfg.GetMasterNode()
4997 if node.name == masternode:
4998 raise errors.OpPrereqError("Node is the master node, failover to another"
4999 " node is required", errors.ECODE_INVAL)
5001 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5002 if node.name in instance.all_nodes:
5003 raise errors.OpPrereqError("Instance %s is still running on the node,"
5004 " please remove first" % instance_name,
5006 self.op.node_name = node.name
5009 def Exec(self, feedback_fn):
5010 """Removes the node from the cluster.
5014 logging.info("Stopping the node daemon and removing configs from node %s",
5017 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5019 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5022 # Promote nodes to master candidate as needed
5023 _AdjustCandidatePool(self, exceptions=[node.name])
5024 self.context.RemoveNode(node.name)
5026 # Run post hooks on the node before it's removed
5027 _RunPostHook(self, node.name)
5029 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5030 msg = result.fail_msg
5032 self.LogWarning("Errors encountered on the remote node while leaving"
5033 " the cluster: %s", msg)
5035 # Remove node from our /etc/hosts
5036 if self.cfg.GetClusterInfo().modify_etc_hosts:
5037 master_node = self.cfg.GetMasterNode()
5038 result = self.rpc.call_etc_hosts_modify(master_node,
5039 constants.ETC_HOSTS_REMOVE,
5041 result.Raise("Can't update hosts file with new host data")
5042 _RedistributeAncillaryFiles(self)
5045 class _NodeQuery(_QueryBase):
5046 FIELDS = query.NODE_FIELDS
5048 def ExpandNames(self, lu):
5049 lu.needed_locks = {}
5050 lu.share_locks = _ShareAll()
5053 self.wanted = _GetWantedNodes(lu, self.names)
5055 self.wanted = locking.ALL_SET
5057 self.do_locking = (self.use_locking and
5058 query.NQ_LIVE in self.requested_data)
5061 # If any non-static field is requested we need to lock the nodes
5062 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5064 def DeclareLocks(self, lu, level):
5067 def _GetQueryData(self, lu):
5068 """Computes the list of nodes and their attributes.
5071 all_info = lu.cfg.GetAllNodesInfo()
5073 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5075 # Gather data as requested
5076 if query.NQ_LIVE in self.requested_data:
5077 # filter out non-vm_capable nodes
5078 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5080 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5081 [lu.cfg.GetHypervisorType()])
5082 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5083 for (name, nresult) in node_data.items()
5084 if not nresult.fail_msg and nresult.payload)
5088 if query.NQ_INST in self.requested_data:
5089 node_to_primary = dict([(name, set()) for name in nodenames])
5090 node_to_secondary = dict([(name, set()) for name in nodenames])
5092 inst_data = lu.cfg.GetAllInstancesInfo()
5094 for inst in inst_data.values():
5095 if inst.primary_node in node_to_primary:
5096 node_to_primary[inst.primary_node].add(inst.name)
5097 for secnode in inst.secondary_nodes:
5098 if secnode in node_to_secondary:
5099 node_to_secondary[secnode].add(inst.name)
5101 node_to_primary = None
5102 node_to_secondary = None
5104 if query.NQ_OOB in self.requested_data:
5105 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5106 for name, node in all_info.iteritems())
5110 if query.NQ_GROUP in self.requested_data:
5111 groups = lu.cfg.GetAllNodeGroupsInfo()
5115 return query.NodeQueryData([all_info[name] for name in nodenames],
5116 live_data, lu.cfg.GetMasterNode(),
5117 node_to_primary, node_to_secondary, groups,
5118 oob_support, lu.cfg.GetClusterInfo())
5121 class LUNodeQuery(NoHooksLU):
5122 """Logical unit for querying nodes.
5125 # pylint: disable=W0142
5128 def CheckArguments(self):
5129 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5130 self.op.output_fields, self.op.use_locking)
5132 def ExpandNames(self):
5133 self.nq.ExpandNames(self)
5135 def DeclareLocks(self, level):
5136 self.nq.DeclareLocks(self, level)
5138 def Exec(self, feedback_fn):
5139 return self.nq.OldStyleQuery(self)
5142 class LUNodeQueryvols(NoHooksLU):
5143 """Logical unit for getting volumes on node(s).
5147 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5148 _FIELDS_STATIC = utils.FieldSet("node")
5150 def CheckArguments(self):
5151 _CheckOutputFields(static=self._FIELDS_STATIC,
5152 dynamic=self._FIELDS_DYNAMIC,
5153 selected=self.op.output_fields)
5155 def ExpandNames(self):
5156 self.share_locks = _ShareAll()
5157 self.needed_locks = {}
5159 if not self.op.nodes:
5160 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5162 self.needed_locks[locking.LEVEL_NODE] = \
5163 _GetWantedNodes(self, self.op.nodes)
5165 def Exec(self, feedback_fn):
5166 """Computes the list of nodes and their attributes.
5169 nodenames = self.owned_locks(locking.LEVEL_NODE)
5170 volumes = self.rpc.call_node_volumes(nodenames)
5172 ilist = self.cfg.GetAllInstancesInfo()
5173 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5176 for node in nodenames:
5177 nresult = volumes[node]
5180 msg = nresult.fail_msg
5182 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5185 node_vols = sorted(nresult.payload,
5186 key=operator.itemgetter("dev"))
5188 for vol in node_vols:
5190 for field in self.op.output_fields:
5193 elif field == "phys":
5197 elif field == "name":
5199 elif field == "size":
5200 val = int(float(vol["size"]))
5201 elif field == "instance":
5202 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5204 raise errors.ParameterError(field)
5205 node_output.append(str(val))
5207 output.append(node_output)
5212 class LUNodeQueryStorage(NoHooksLU):
5213 """Logical unit for getting information on storage units on node(s).
5216 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5219 def CheckArguments(self):
5220 _CheckOutputFields(static=self._FIELDS_STATIC,
5221 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5222 selected=self.op.output_fields)
5224 def ExpandNames(self):
5225 self.share_locks = _ShareAll()
5226 self.needed_locks = {}
5229 self.needed_locks[locking.LEVEL_NODE] = \
5230 _GetWantedNodes(self, self.op.nodes)
5232 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5234 def Exec(self, feedback_fn):
5235 """Computes the list of nodes and their attributes.
5238 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5240 # Always get name to sort by
5241 if constants.SF_NAME in self.op.output_fields:
5242 fields = self.op.output_fields[:]
5244 fields = [constants.SF_NAME] + self.op.output_fields
5246 # Never ask for node or type as it's only known to the LU
5247 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5248 while extra in fields:
5249 fields.remove(extra)
5251 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5252 name_idx = field_idx[constants.SF_NAME]
5254 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5255 data = self.rpc.call_storage_list(self.nodes,
5256 self.op.storage_type, st_args,
5257 self.op.name, fields)
5261 for node in utils.NiceSort(self.nodes):
5262 nresult = data[node]
5266 msg = nresult.fail_msg
5268 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5271 rows = dict([(row[name_idx], row) for row in nresult.payload])
5273 for name in utils.NiceSort(rows.keys()):
5278 for field in self.op.output_fields:
5279 if field == constants.SF_NODE:
5281 elif field == constants.SF_TYPE:
5282 val = self.op.storage_type
5283 elif field in field_idx:
5284 val = row[field_idx[field]]
5286 raise errors.ParameterError(field)
5295 class _InstanceQuery(_QueryBase):
5296 FIELDS = query.INSTANCE_FIELDS
5298 def ExpandNames(self, lu):
5299 lu.needed_locks = {}
5300 lu.share_locks = _ShareAll()
5303 self.wanted = _GetWantedInstances(lu, self.names)
5305 self.wanted = locking.ALL_SET
5307 self.do_locking = (self.use_locking and
5308 query.IQ_LIVE in self.requested_data)
5310 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5311 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5312 lu.needed_locks[locking.LEVEL_NODE] = []
5313 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5315 self.do_grouplocks = (self.do_locking and
5316 query.IQ_NODES in self.requested_data)
5318 def DeclareLocks(self, lu, level):
5320 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5321 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5323 # Lock all groups used by instances optimistically; this requires going
5324 # via the node before it's locked, requiring verification later on
5325 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5327 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5328 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5329 elif level == locking.LEVEL_NODE:
5330 lu._LockInstancesNodes() # pylint: disable=W0212
5333 def _CheckGroupLocks(lu):
5334 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5335 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5337 # Check if node groups for locked instances are still correct
5338 for instance_name in owned_instances:
5339 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5341 def _GetQueryData(self, lu):
5342 """Computes the list of instances and their attributes.
5345 if self.do_grouplocks:
5346 self._CheckGroupLocks(lu)
5348 cluster = lu.cfg.GetClusterInfo()
5349 all_info = lu.cfg.GetAllInstancesInfo()
5351 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5353 instance_list = [all_info[name] for name in instance_names]
5354 nodes = frozenset(itertools.chain(*(inst.all_nodes
5355 for inst in instance_list)))
5356 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5359 wrongnode_inst = set()
5361 # Gather data as requested
5362 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5364 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5366 result = node_data[name]
5368 # offline nodes will be in both lists
5369 assert result.fail_msg
5370 offline_nodes.append(name)
5372 bad_nodes.append(name)
5373 elif result.payload:
5374 for inst in result.payload:
5375 if inst in all_info:
5376 if all_info[inst].primary_node == name:
5377 live_data.update(result.payload)
5379 wrongnode_inst.add(inst)
5381 # orphan instance; we don't list it here as we don't
5382 # handle this case yet in the output of instance listing
5383 logging.warning("Orphan instance '%s' found on node %s",
5385 # else no instance is alive
5389 if query.IQ_DISKUSAGE in self.requested_data:
5390 gmi = ganeti.masterd.instance
5391 disk_usage = dict((inst.name,
5392 gmi.ComputeDiskSize(inst.disk_template,
5393 [{constants.IDISK_SIZE: disk.size}
5394 for disk in inst.disks]))
5395 for inst in instance_list)
5399 if query.IQ_CONSOLE in self.requested_data:
5401 for inst in instance_list:
5402 if inst.name in live_data:
5403 # Instance is running
5404 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5406 consinfo[inst.name] = None
5407 assert set(consinfo.keys()) == set(instance_names)
5411 if query.IQ_NODES in self.requested_data:
5412 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5414 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5415 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5416 for uuid in set(map(operator.attrgetter("group"),
5422 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5423 disk_usage, offline_nodes, bad_nodes,
5424 live_data, wrongnode_inst, consinfo,
5428 class LUQuery(NoHooksLU):
5429 """Query for resources/items of a certain kind.
5432 # pylint: disable=W0142
5435 def CheckArguments(self):
5436 qcls = _GetQueryImplementation(self.op.what)
5438 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5440 def ExpandNames(self):
5441 self.impl.ExpandNames(self)
5443 def DeclareLocks(self, level):
5444 self.impl.DeclareLocks(self, level)
5446 def Exec(self, feedback_fn):
5447 return self.impl.NewStyleQuery(self)
5450 class LUQueryFields(NoHooksLU):
5451 """Query for resources/items of a certain kind.
5454 # pylint: disable=W0142
5457 def CheckArguments(self):
5458 self.qcls = _GetQueryImplementation(self.op.what)
5460 def ExpandNames(self):
5461 self.needed_locks = {}
5463 def Exec(self, feedback_fn):
5464 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5467 class LUNodeModifyStorage(NoHooksLU):
5468 """Logical unit for modifying a storage volume on a node.
5473 def CheckArguments(self):
5474 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5476 storage_type = self.op.storage_type
5479 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5481 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5482 " modified" % storage_type,
5485 diff = set(self.op.changes.keys()) - modifiable
5487 raise errors.OpPrereqError("The following fields can not be modified for"
5488 " storage units of type '%s': %r" %
5489 (storage_type, list(diff)),
5492 def ExpandNames(self):
5493 self.needed_locks = {
5494 locking.LEVEL_NODE: self.op.node_name,
5497 def Exec(self, feedback_fn):
5498 """Computes the list of nodes and their attributes.
5501 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5502 result = self.rpc.call_storage_modify(self.op.node_name,
5503 self.op.storage_type, st_args,
5504 self.op.name, self.op.changes)
5505 result.Raise("Failed to modify storage unit '%s' on %s" %
5506 (self.op.name, self.op.node_name))
5509 class LUNodeAdd(LogicalUnit):
5510 """Logical unit for adding node to the cluster.
5514 HTYPE = constants.HTYPE_NODE
5515 _NFLAGS = ["master_capable", "vm_capable"]
5517 def CheckArguments(self):
5518 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5519 # validate/normalize the node name
5520 self.hostname = netutils.GetHostname(name=self.op.node_name,
5521 family=self.primary_ip_family)
5522 self.op.node_name = self.hostname.name
5524 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5525 raise errors.OpPrereqError("Cannot readd the master node",
5528 if self.op.readd and self.op.group:
5529 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5530 " being readded", errors.ECODE_INVAL)
5532 def BuildHooksEnv(self):
5535 This will run on all nodes before, and on all nodes + the new node after.
5539 "OP_TARGET": self.op.node_name,
5540 "NODE_NAME": self.op.node_name,
5541 "NODE_PIP": self.op.primary_ip,
5542 "NODE_SIP": self.op.secondary_ip,
5543 "MASTER_CAPABLE": str(self.op.master_capable),
5544 "VM_CAPABLE": str(self.op.vm_capable),
5547 def BuildHooksNodes(self):
5548 """Build hooks nodes.
5551 # Exclude added node
5552 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5553 post_nodes = pre_nodes + [self.op.node_name, ]
5555 return (pre_nodes, post_nodes)
5557 def CheckPrereq(self):
5558 """Check prerequisites.
5561 - the new node is not already in the config
5563 - its parameters (single/dual homed) matches the cluster
5565 Any errors are signaled by raising errors.OpPrereqError.
5569 hostname = self.hostname
5570 node = hostname.name
5571 primary_ip = self.op.primary_ip = hostname.ip
5572 if self.op.secondary_ip is None:
5573 if self.primary_ip_family == netutils.IP6Address.family:
5574 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5575 " IPv4 address must be given as secondary",
5577 self.op.secondary_ip = primary_ip
5579 secondary_ip = self.op.secondary_ip
5580 if not netutils.IP4Address.IsValid(secondary_ip):
5581 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5582 " address" % secondary_ip, errors.ECODE_INVAL)
5584 node_list = cfg.GetNodeList()
5585 if not self.op.readd and node in node_list:
5586 raise errors.OpPrereqError("Node %s is already in the configuration" %
5587 node, errors.ECODE_EXISTS)
5588 elif self.op.readd and node not in node_list:
5589 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5592 self.changed_primary_ip = False
5594 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5595 if self.op.readd and node == existing_node_name:
5596 if existing_node.secondary_ip != secondary_ip:
5597 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5598 " address configuration as before",
5600 if existing_node.primary_ip != primary_ip:
5601 self.changed_primary_ip = True
5605 if (existing_node.primary_ip == primary_ip or
5606 existing_node.secondary_ip == primary_ip or
5607 existing_node.primary_ip == secondary_ip or
5608 existing_node.secondary_ip == secondary_ip):
5609 raise errors.OpPrereqError("New node ip address(es) conflict with"
5610 " existing node %s" % existing_node.name,
5611 errors.ECODE_NOTUNIQUE)
5613 # After this 'if' block, None is no longer a valid value for the
5614 # _capable op attributes
5616 old_node = self.cfg.GetNodeInfo(node)
5617 assert old_node is not None, "Can't retrieve locked node %s" % node
5618 for attr in self._NFLAGS:
5619 if getattr(self.op, attr) is None:
5620 setattr(self.op, attr, getattr(old_node, attr))
5622 for attr in self._NFLAGS:
5623 if getattr(self.op, attr) is None:
5624 setattr(self.op, attr, True)
5626 if self.op.readd and not self.op.vm_capable:
5627 pri, sec = cfg.GetNodeInstances(node)
5629 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5630 " flag set to false, but it already holds"
5631 " instances" % node,
5634 # check that the type of the node (single versus dual homed) is the
5635 # same as for the master
5636 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5637 master_singlehomed = myself.secondary_ip == myself.primary_ip
5638 newbie_singlehomed = secondary_ip == primary_ip
5639 if master_singlehomed != newbie_singlehomed:
5640 if master_singlehomed:
5641 raise errors.OpPrereqError("The master has no secondary ip but the"
5642 " new node has one",
5645 raise errors.OpPrereqError("The master has a secondary ip but the"
5646 " new node doesn't have one",
5649 # checks reachability
5650 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5651 raise errors.OpPrereqError("Node not reachable by ping",
5652 errors.ECODE_ENVIRON)
5654 if not newbie_singlehomed:
5655 # check reachability from my secondary ip to newbie's secondary ip
5656 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5657 source=myself.secondary_ip):
5658 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5659 " based ping to node daemon port",
5660 errors.ECODE_ENVIRON)
5667 if self.op.master_capable:
5668 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5670 self.master_candidate = False
5673 self.new_node = old_node
5675 node_group = cfg.LookupNodeGroup(self.op.group)
5676 self.new_node = objects.Node(name=node,
5677 primary_ip=primary_ip,
5678 secondary_ip=secondary_ip,
5679 master_candidate=self.master_candidate,
5680 offline=False, drained=False,
5683 if self.op.ndparams:
5684 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5686 if self.op.hv_state:
5687 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5689 if self.op.disk_state:
5690 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5692 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5693 # it a property on the base class.
5694 result = rpc.DnsOnlyRunner().call_version([node])[node]
5695 result.Raise("Can't get version information from node %s" % node)
5696 if constants.PROTOCOL_VERSION == result.payload:
5697 logging.info("Communication to node %s fine, sw version %s match",
5698 node, result.payload)
5700 raise errors.OpPrereqError("Version mismatch master version %s,"
5701 " node version %s" %
5702 (constants.PROTOCOL_VERSION, result.payload),
5703 errors.ECODE_ENVIRON)
5705 def Exec(self, feedback_fn):
5706 """Adds the new node to the cluster.
5709 new_node = self.new_node
5710 node = new_node.name
5712 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5715 # We adding a new node so we assume it's powered
5716 new_node.powered = True
5718 # for re-adds, reset the offline/drained/master-candidate flags;
5719 # we need to reset here, otherwise offline would prevent RPC calls
5720 # later in the procedure; this also means that if the re-add
5721 # fails, we are left with a non-offlined, broken node
5723 new_node.drained = new_node.offline = False # pylint: disable=W0201
5724 self.LogInfo("Readding a node, the offline/drained flags were reset")
5725 # if we demote the node, we do cleanup later in the procedure
5726 new_node.master_candidate = self.master_candidate
5727 if self.changed_primary_ip:
5728 new_node.primary_ip = self.op.primary_ip
5730 # copy the master/vm_capable flags
5731 for attr in self._NFLAGS:
5732 setattr(new_node, attr, getattr(self.op, attr))
5734 # notify the user about any possible mc promotion
5735 if new_node.master_candidate:
5736 self.LogInfo("Node will be a master candidate")
5738 if self.op.ndparams:
5739 new_node.ndparams = self.op.ndparams
5741 new_node.ndparams = {}
5743 if self.op.hv_state:
5744 new_node.hv_state_static = self.new_hv_state
5746 if self.op.disk_state:
5747 new_node.disk_state_static = self.new_disk_state
5749 # Add node to our /etc/hosts, and add key to known_hosts
5750 if self.cfg.GetClusterInfo().modify_etc_hosts:
5751 master_node = self.cfg.GetMasterNode()
5752 result = self.rpc.call_etc_hosts_modify(master_node,
5753 constants.ETC_HOSTS_ADD,
5756 result.Raise("Can't update hosts file with new host data")
5758 if new_node.secondary_ip != new_node.primary_ip:
5759 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5762 node_verify_list = [self.cfg.GetMasterNode()]
5763 node_verify_param = {
5764 constants.NV_NODELIST: ([node], {}),
5765 # TODO: do a node-net-test as well?
5768 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5769 self.cfg.GetClusterName())
5770 for verifier in node_verify_list:
5771 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5772 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5774 for failed in nl_payload:
5775 feedback_fn("ssh/hostname verification failed"
5776 " (checking from %s): %s" %
5777 (verifier, nl_payload[failed]))
5778 raise errors.OpExecError("ssh/hostname verification failed")
5781 _RedistributeAncillaryFiles(self)
5782 self.context.ReaddNode(new_node)
5783 # make sure we redistribute the config
5784 self.cfg.Update(new_node, feedback_fn)
5785 # and make sure the new node will not have old files around
5786 if not new_node.master_candidate:
5787 result = self.rpc.call_node_demote_from_mc(new_node.name)
5788 msg = result.fail_msg
5790 self.LogWarning("Node failed to demote itself from master"
5791 " candidate status: %s" % msg)
5793 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5794 additional_vm=self.op.vm_capable)
5795 self.context.AddNode(new_node, self.proc.GetECId())
5798 class LUNodeSetParams(LogicalUnit):
5799 """Modifies the parameters of a node.
5801 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5802 to the node role (as _ROLE_*)
5803 @cvar _R2F: a dictionary from node role to tuples of flags
5804 @cvar _FLAGS: a list of attribute names corresponding to the flags
5807 HPATH = "node-modify"
5808 HTYPE = constants.HTYPE_NODE
5810 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5812 (True, False, False): _ROLE_CANDIDATE,
5813 (False, True, False): _ROLE_DRAINED,
5814 (False, False, True): _ROLE_OFFLINE,
5815 (False, False, False): _ROLE_REGULAR,
5817 _R2F = dict((v, k) for k, v in _F2R.items())
5818 _FLAGS = ["master_candidate", "drained", "offline"]
5820 def CheckArguments(self):
5821 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5822 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5823 self.op.master_capable, self.op.vm_capable,
5824 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5826 if all_mods.count(None) == len(all_mods):
5827 raise errors.OpPrereqError("Please pass at least one modification",
5829 if all_mods.count(True) > 1:
5830 raise errors.OpPrereqError("Can't set the node into more than one"
5831 " state at the same time",
5834 # Boolean value that tells us whether we might be demoting from MC
5835 self.might_demote = (self.op.master_candidate is False or
5836 self.op.offline is True or
5837 self.op.drained is True or
5838 self.op.master_capable is False)
5840 if self.op.secondary_ip:
5841 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5842 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5843 " address" % self.op.secondary_ip,
5846 self.lock_all = self.op.auto_promote and self.might_demote
5847 self.lock_instances = self.op.secondary_ip is not None
5849 def _InstanceFilter(self, instance):
5850 """Filter for getting affected instances.
5853 return (instance.disk_template in constants.DTS_INT_MIRROR and
5854 self.op.node_name in instance.all_nodes)
5856 def ExpandNames(self):
5858 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5860 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5862 # Since modifying a node can have severe effects on currently running
5863 # operations the resource lock is at least acquired in shared mode
5864 self.needed_locks[locking.LEVEL_NODE_RES] = \
5865 self.needed_locks[locking.LEVEL_NODE]
5867 # Get node resource and instance locks in shared mode; they are not used
5868 # for anything but read-only access
5869 self.share_locks[locking.LEVEL_NODE_RES] = 1
5870 self.share_locks[locking.LEVEL_INSTANCE] = 1
5872 if self.lock_instances:
5873 self.needed_locks[locking.LEVEL_INSTANCE] = \
5874 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5876 def BuildHooksEnv(self):
5879 This runs on the master node.
5883 "OP_TARGET": self.op.node_name,
5884 "MASTER_CANDIDATE": str(self.op.master_candidate),
5885 "OFFLINE": str(self.op.offline),
5886 "DRAINED": str(self.op.drained),
5887 "MASTER_CAPABLE": str(self.op.master_capable),
5888 "VM_CAPABLE": str(self.op.vm_capable),
5891 def BuildHooksNodes(self):
5892 """Build hooks nodes.
5895 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5898 def CheckPrereq(self):
5899 """Check prerequisites.
5901 This only checks the instance list against the existing names.
5904 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5906 if self.lock_instances:
5907 affected_instances = \
5908 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5910 # Verify instance locks
5911 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5912 wanted_instances = frozenset(affected_instances.keys())
5913 if wanted_instances - owned_instances:
5914 raise errors.OpPrereqError("Instances affected by changing node %s's"
5915 " secondary IP address have changed since"
5916 " locks were acquired, wanted '%s', have"
5917 " '%s'; retry the operation" %
5919 utils.CommaJoin(wanted_instances),
5920 utils.CommaJoin(owned_instances)),
5923 affected_instances = None
5925 if (self.op.master_candidate is not None or
5926 self.op.drained is not None or
5927 self.op.offline is not None):
5928 # we can't change the master's node flags
5929 if self.op.node_name == self.cfg.GetMasterNode():
5930 raise errors.OpPrereqError("The master role can be changed"
5931 " only via master-failover",
5934 if self.op.master_candidate and not node.master_capable:
5935 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5936 " it a master candidate" % node.name,
5939 if self.op.vm_capable is False:
5940 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5942 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5943 " the vm_capable flag" % node.name,
5946 if node.master_candidate and self.might_demote and not self.lock_all:
5947 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5948 # check if after removing the current node, we're missing master
5950 (mc_remaining, mc_should, _) = \
5951 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5952 if mc_remaining < mc_should:
5953 raise errors.OpPrereqError("Not enough master candidates, please"
5954 " pass auto promote option to allow"
5955 " promotion (--auto-promote or RAPI"
5956 " auto_promote=True)", errors.ECODE_STATE)
5958 self.old_flags = old_flags = (node.master_candidate,
5959 node.drained, node.offline)
5960 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5961 self.old_role = old_role = self._F2R[old_flags]
5963 # Check for ineffective changes
5964 for attr in self._FLAGS:
5965 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5966 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5967 setattr(self.op, attr, None)
5969 # Past this point, any flag change to False means a transition
5970 # away from the respective state, as only real changes are kept
5972 # TODO: We might query the real power state if it supports OOB
5973 if _SupportsOob(self.cfg, node):
5974 if self.op.offline is False and not (node.powered or
5975 self.op.powered is True):
5976 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5977 " offline status can be reset") %
5978 self.op.node_name, errors.ECODE_STATE)
5979 elif self.op.powered is not None:
5980 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5981 " as it does not support out-of-band"
5982 " handling") % self.op.node_name,
5985 # If we're being deofflined/drained, we'll MC ourself if needed
5986 if (self.op.drained is False or self.op.offline is False or
5987 (self.op.master_capable and not node.master_capable)):
5988 if _DecideSelfPromotion(self):
5989 self.op.master_candidate = True
5990 self.LogInfo("Auto-promoting node to master candidate")
5992 # If we're no longer master capable, we'll demote ourselves from MC
5993 if self.op.master_capable is False and node.master_candidate:
5994 self.LogInfo("Demoting from master candidate")
5995 self.op.master_candidate = False
5998 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5999 if self.op.master_candidate:
6000 new_role = self._ROLE_CANDIDATE
6001 elif self.op.drained:
6002 new_role = self._ROLE_DRAINED
6003 elif self.op.offline:
6004 new_role = self._ROLE_OFFLINE
6005 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6006 # False is still in new flags, which means we're un-setting (the
6008 new_role = self._ROLE_REGULAR
6009 else: # no new flags, nothing, keep old role
6012 self.new_role = new_role
6014 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6015 # Trying to transition out of offline status
6016 result = self.rpc.call_version([node.name])[node.name]
6018 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6019 " to report its version: %s" %
6020 (node.name, result.fail_msg),
6023 self.LogWarning("Transitioning node from offline to online state"
6024 " without using re-add. Please make sure the node"
6027 # When changing the secondary ip, verify if this is a single-homed to
6028 # multi-homed transition or vice versa, and apply the relevant
6030 if self.op.secondary_ip:
6031 # Ok even without locking, because this can't be changed by any LU
6032 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6033 master_singlehomed = master.secondary_ip == master.primary_ip
6034 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6035 if self.op.force and node.name == master.name:
6036 self.LogWarning("Transitioning from single-homed to multi-homed"
6037 " cluster. All nodes will require a secondary ip.")
6039 raise errors.OpPrereqError("Changing the secondary ip on a"
6040 " single-homed cluster requires the"
6041 " --force option to be passed, and the"
6042 " target node to be the master",
6044 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6045 if self.op.force and node.name == master.name:
6046 self.LogWarning("Transitioning from multi-homed to single-homed"
6047 " cluster. Secondary IPs will have to be removed.")
6049 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6050 " same as the primary IP on a multi-homed"
6051 " cluster, unless the --force option is"
6052 " passed, and the target node is the"
6053 " master", errors.ECODE_INVAL)
6055 assert not (frozenset(affected_instances) -
6056 self.owned_locks(locking.LEVEL_INSTANCE))
6059 if affected_instances:
6060 msg = ("Cannot change secondary IP address: offline node has"
6061 " instances (%s) configured to use it" %
6062 utils.CommaJoin(affected_instances.keys()))
6063 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6065 # On online nodes, check that no instances are running, and that
6066 # the node has the new ip and we can reach it.
6067 for instance in affected_instances.values():
6068 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6069 msg="cannot change secondary ip")
6071 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6072 if master.name != node.name:
6073 # check reachability from master secondary ip to new secondary ip
6074 if not netutils.TcpPing(self.op.secondary_ip,
6075 constants.DEFAULT_NODED_PORT,
6076 source=master.secondary_ip):
6077 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6078 " based ping to node daemon port",
6079 errors.ECODE_ENVIRON)
6081 if self.op.ndparams:
6082 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6083 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6084 self.new_ndparams = new_ndparams
6086 if self.op.hv_state:
6087 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6088 self.node.hv_state_static)
6090 if self.op.disk_state:
6091 self.new_disk_state = \
6092 _MergeAndVerifyDiskState(self.op.disk_state,
6093 self.node.disk_state_static)
6095 def Exec(self, feedback_fn):
6100 old_role = self.old_role
6101 new_role = self.new_role
6105 if self.op.ndparams:
6106 node.ndparams = self.new_ndparams
6108 if self.op.powered is not None:
6109 node.powered = self.op.powered
6111 if self.op.hv_state:
6112 node.hv_state_static = self.new_hv_state
6114 if self.op.disk_state:
6115 node.disk_state_static = self.new_disk_state
6117 for attr in ["master_capable", "vm_capable"]:
6118 val = getattr(self.op, attr)
6120 setattr(node, attr, val)
6121 result.append((attr, str(val)))
6123 if new_role != old_role:
6124 # Tell the node to demote itself, if no longer MC and not offline
6125 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6126 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6128 self.LogWarning("Node failed to demote itself: %s", msg)
6130 new_flags = self._R2F[new_role]
6131 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6133 result.append((desc, str(nf)))
6134 (node.master_candidate, node.drained, node.offline) = new_flags
6136 # we locked all nodes, we adjust the CP before updating this node
6138 _AdjustCandidatePool(self, [node.name])
6140 if self.op.secondary_ip:
6141 node.secondary_ip = self.op.secondary_ip
6142 result.append(("secondary_ip", self.op.secondary_ip))
6144 # this will trigger configuration file update, if needed
6145 self.cfg.Update(node, feedback_fn)
6147 # this will trigger job queue propagation or cleanup if the mc
6149 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6150 self.context.ReaddNode(node)
6155 class LUNodePowercycle(NoHooksLU):
6156 """Powercycles a node.
6161 def CheckArguments(self):
6162 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6163 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6164 raise errors.OpPrereqError("The node is the master and the force"
6165 " parameter was not set",
6168 def ExpandNames(self):
6169 """Locking for PowercycleNode.
6171 This is a last-resort option and shouldn't block on other
6172 jobs. Therefore, we grab no locks.
6175 self.needed_locks = {}
6177 def Exec(self, feedback_fn):
6181 result = self.rpc.call_node_powercycle(self.op.node_name,
6182 self.cfg.GetHypervisorType())
6183 result.Raise("Failed to schedule the reboot")
6184 return result.payload
6187 class LUClusterQuery(NoHooksLU):
6188 """Query cluster configuration.
6193 def ExpandNames(self):
6194 self.needed_locks = {}
6196 def Exec(self, feedback_fn):
6197 """Return cluster config.
6200 cluster = self.cfg.GetClusterInfo()
6203 # Filter just for enabled hypervisors
6204 for os_name, hv_dict in cluster.os_hvp.items():
6205 os_hvp[os_name] = {}
6206 for hv_name, hv_params in hv_dict.items():
6207 if hv_name in cluster.enabled_hypervisors:
6208 os_hvp[os_name][hv_name] = hv_params
6210 # Convert ip_family to ip_version
6211 primary_ip_version = constants.IP4_VERSION
6212 if cluster.primary_ip_family == netutils.IP6Address.family:
6213 primary_ip_version = constants.IP6_VERSION
6216 "software_version": constants.RELEASE_VERSION,
6217 "protocol_version": constants.PROTOCOL_VERSION,
6218 "config_version": constants.CONFIG_VERSION,
6219 "os_api_version": max(constants.OS_API_VERSIONS),
6220 "export_version": constants.EXPORT_VERSION,
6221 "architecture": runtime.GetArchInfo(),
6222 "name": cluster.cluster_name,
6223 "master": cluster.master_node,
6224 "default_hypervisor": cluster.primary_hypervisor,
6225 "enabled_hypervisors": cluster.enabled_hypervisors,
6226 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6227 for hypervisor_name in cluster.enabled_hypervisors]),
6229 "beparams": cluster.beparams,
6230 "osparams": cluster.osparams,
6231 "ipolicy": cluster.ipolicy,
6232 "nicparams": cluster.nicparams,
6233 "ndparams": cluster.ndparams,
6234 "diskparams": cluster.diskparams,
6235 "candidate_pool_size": cluster.candidate_pool_size,
6236 "master_netdev": cluster.master_netdev,
6237 "master_netmask": cluster.master_netmask,
6238 "use_external_mip_script": cluster.use_external_mip_script,
6239 "volume_group_name": cluster.volume_group_name,
6240 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6241 "file_storage_dir": cluster.file_storage_dir,
6242 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6243 "maintain_node_health": cluster.maintain_node_health,
6244 "ctime": cluster.ctime,
6245 "mtime": cluster.mtime,
6246 "uuid": cluster.uuid,
6247 "tags": list(cluster.GetTags()),
6248 "uid_pool": cluster.uid_pool,
6249 "default_iallocator": cluster.default_iallocator,
6250 "reserved_lvs": cluster.reserved_lvs,
6251 "primary_ip_version": primary_ip_version,
6252 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6253 "hidden_os": cluster.hidden_os,
6254 "blacklisted_os": cluster.blacklisted_os,
6260 class LUClusterConfigQuery(NoHooksLU):
6261 """Return configuration values.
6266 def CheckArguments(self):
6267 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6269 def ExpandNames(self):
6270 self.cq.ExpandNames(self)
6272 def DeclareLocks(self, level):
6273 self.cq.DeclareLocks(self, level)
6275 def Exec(self, feedback_fn):
6276 result = self.cq.OldStyleQuery(self)
6278 assert len(result) == 1
6283 class _ClusterQuery(_QueryBase):
6284 FIELDS = query.CLUSTER_FIELDS
6286 #: Do not sort (there is only one item)
6289 def ExpandNames(self, lu):
6290 lu.needed_locks = {}
6292 # The following variables interact with _QueryBase._GetNames
6293 self.wanted = locking.ALL_SET
6294 self.do_locking = self.use_locking
6297 raise errors.OpPrereqError("Can not use locking for cluster queries",
6300 def DeclareLocks(self, lu, level):
6303 def _GetQueryData(self, lu):
6304 """Computes the list of nodes and their attributes.
6307 # Locking is not used
6308 assert not (compat.any(lu.glm.is_owned(level)
6309 for level in locking.LEVELS
6310 if level != locking.LEVEL_CLUSTER) or
6311 self.do_locking or self.use_locking)
6313 if query.CQ_CONFIG in self.requested_data:
6314 cluster = lu.cfg.GetClusterInfo()
6316 cluster = NotImplemented
6318 if query.CQ_QUEUE_DRAINED in self.requested_data:
6319 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6321 drain_flag = NotImplemented
6323 if query.CQ_WATCHER_PAUSE in self.requested_data:
6324 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6326 watcher_pause = NotImplemented
6328 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6331 class LUInstanceActivateDisks(NoHooksLU):
6332 """Bring up an instance's disks.
6337 def ExpandNames(self):
6338 self._ExpandAndLockInstance()
6339 self.needed_locks[locking.LEVEL_NODE] = []
6340 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6342 def DeclareLocks(self, level):
6343 if level == locking.LEVEL_NODE:
6344 self._LockInstancesNodes()
6346 def CheckPrereq(self):
6347 """Check prerequisites.
6349 This checks that the instance is in the cluster.
6352 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6353 assert self.instance is not None, \
6354 "Cannot retrieve locked instance %s" % self.op.instance_name
6355 _CheckNodeOnline(self, self.instance.primary_node)
6357 def Exec(self, feedback_fn):
6358 """Activate the disks.
6361 disks_ok, disks_info = \
6362 _AssembleInstanceDisks(self, self.instance,
6363 ignore_size=self.op.ignore_size)
6365 raise errors.OpExecError("Cannot activate block devices")
6367 if self.op.wait_for_sync:
6368 if not _WaitForSync(self, self.instance):
6369 raise errors.OpExecError("Some disks of the instance are degraded!")
6374 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6376 """Prepare the block devices for an instance.
6378 This sets up the block devices on all nodes.
6380 @type lu: L{LogicalUnit}
6381 @param lu: the logical unit on whose behalf we execute
6382 @type instance: L{objects.Instance}
6383 @param instance: the instance for whose disks we assemble
6384 @type disks: list of L{objects.Disk} or None
6385 @param disks: which disks to assemble (or all, if None)
6386 @type ignore_secondaries: boolean
6387 @param ignore_secondaries: if true, errors on secondary nodes
6388 won't result in an error return from the function
6389 @type ignore_size: boolean
6390 @param ignore_size: if true, the current known size of the disk
6391 will not be used during the disk activation, useful for cases
6392 when the size is wrong
6393 @return: False if the operation failed, otherwise a list of
6394 (host, instance_visible_name, node_visible_name)
6395 with the mapping from node devices to instance devices
6400 iname = instance.name
6401 disks = _ExpandCheckDisks(instance, disks)
6403 # With the two passes mechanism we try to reduce the window of
6404 # opportunity for the race condition of switching DRBD to primary
6405 # before handshaking occured, but we do not eliminate it
6407 # The proper fix would be to wait (with some limits) until the
6408 # connection has been made and drbd transitions from WFConnection
6409 # into any other network-connected state (Connected, SyncTarget,
6412 # 1st pass, assemble on all nodes in secondary mode
6413 for idx, inst_disk in enumerate(disks):
6414 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6416 node_disk = node_disk.Copy()
6417 node_disk.UnsetSize()
6418 lu.cfg.SetDiskID(node_disk, node)
6419 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6421 msg = result.fail_msg
6423 is_offline_secondary = (node in instance.secondary_nodes and
6425 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6426 " (is_primary=False, pass=1): %s",
6427 inst_disk.iv_name, node, msg)
6428 if not (ignore_secondaries or is_offline_secondary):
6431 # FIXME: race condition on drbd migration to primary
6433 # 2nd pass, do only the primary node
6434 for idx, inst_disk in enumerate(disks):
6437 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6438 if node != instance.primary_node:
6441 node_disk = node_disk.Copy()
6442 node_disk.UnsetSize()
6443 lu.cfg.SetDiskID(node_disk, node)
6444 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6446 msg = result.fail_msg
6448 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6449 " (is_primary=True, pass=2): %s",
6450 inst_disk.iv_name, node, msg)
6453 dev_path = result.payload
6455 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6457 # leave the disks configured for the primary node
6458 # this is a workaround that would be fixed better by
6459 # improving the logical/physical id handling
6461 lu.cfg.SetDiskID(disk, instance.primary_node)
6463 return disks_ok, device_info
6466 def _StartInstanceDisks(lu, instance, force):
6467 """Start the disks of an instance.
6470 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6471 ignore_secondaries=force)
6473 _ShutdownInstanceDisks(lu, instance)
6474 if force is not None and not force:
6475 lu.proc.LogWarning("", hint="If the message above refers to a"
6477 " you can retry the operation using '--force'.")
6478 raise errors.OpExecError("Disk consistency error")
6481 class LUInstanceDeactivateDisks(NoHooksLU):
6482 """Shutdown an instance's disks.
6487 def ExpandNames(self):
6488 self._ExpandAndLockInstance()
6489 self.needed_locks[locking.LEVEL_NODE] = []
6490 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6492 def DeclareLocks(self, level):
6493 if level == locking.LEVEL_NODE:
6494 self._LockInstancesNodes()
6496 def CheckPrereq(self):
6497 """Check prerequisites.
6499 This checks that the instance is in the cluster.
6502 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6503 assert self.instance is not None, \
6504 "Cannot retrieve locked instance %s" % self.op.instance_name
6506 def Exec(self, feedback_fn):
6507 """Deactivate the disks
6510 instance = self.instance
6512 _ShutdownInstanceDisks(self, instance)
6514 _SafeShutdownInstanceDisks(self, instance)
6517 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6518 """Shutdown block devices of an instance.
6520 This function checks if an instance is running, before calling
6521 _ShutdownInstanceDisks.
6524 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6525 _ShutdownInstanceDisks(lu, instance, disks=disks)
6528 def _ExpandCheckDisks(instance, disks):
6529 """Return the instance disks selected by the disks list
6531 @type disks: list of L{objects.Disk} or None
6532 @param disks: selected disks
6533 @rtype: list of L{objects.Disk}
6534 @return: selected instance disks to act on
6538 return instance.disks
6540 if not set(disks).issubset(instance.disks):
6541 raise errors.ProgrammerError("Can only act on disks belonging to the"
6546 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6547 """Shutdown block devices of an instance.
6549 This does the shutdown on all nodes of the instance.
6551 If the ignore_primary is false, errors on the primary node are
6556 disks = _ExpandCheckDisks(instance, disks)
6559 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6560 lu.cfg.SetDiskID(top_disk, node)
6561 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6562 msg = result.fail_msg
6564 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6565 disk.iv_name, node, msg)
6566 if ((node == instance.primary_node and not ignore_primary) or
6567 (node != instance.primary_node and not result.offline)):
6572 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6573 """Checks if a node has enough free memory.
6575 This function check if a given node has the needed amount of free
6576 memory. In case the node has less memory or we cannot get the
6577 information from the node, this function raise an OpPrereqError
6580 @type lu: C{LogicalUnit}
6581 @param lu: a logical unit from which we get configuration data
6583 @param node: the node to check
6584 @type reason: C{str}
6585 @param reason: string to use in the error message
6586 @type requested: C{int}
6587 @param requested: the amount of memory in MiB to check for
6588 @type hypervisor_name: C{str}
6589 @param hypervisor_name: the hypervisor to ask for memory stats
6591 @return: node current free memory
6592 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6593 we cannot check the node
6596 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6597 nodeinfo[node].Raise("Can't get data from node %s" % node,
6598 prereq=True, ecode=errors.ECODE_ENVIRON)
6599 (_, _, (hv_info, )) = nodeinfo[node].payload
6601 free_mem = hv_info.get("memory_free", None)
6602 if not isinstance(free_mem, int):
6603 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6604 " was '%s'" % (node, free_mem),
6605 errors.ECODE_ENVIRON)
6606 if requested > free_mem:
6607 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6608 " needed %s MiB, available %s MiB" %
6609 (node, reason, requested, free_mem),
6614 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6615 """Checks if nodes have enough free disk space in the all VGs.
6617 This function check if all given nodes have the needed amount of
6618 free disk. In case any node has less disk or we cannot get the
6619 information from the node, this function raise an OpPrereqError
6622 @type lu: C{LogicalUnit}
6623 @param lu: a logical unit from which we get configuration data
6624 @type nodenames: C{list}
6625 @param nodenames: the list of node names to check
6626 @type req_sizes: C{dict}
6627 @param req_sizes: the hash of vg and corresponding amount of disk in
6629 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6630 or we cannot check the node
6633 for vg, req_size in req_sizes.items():
6634 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6637 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6638 """Checks if nodes have enough free disk space in the specified VG.
6640 This function check if all given nodes have the needed amount of
6641 free disk. In case any node has less disk or we cannot get the
6642 information from the node, this function raise an OpPrereqError
6645 @type lu: C{LogicalUnit}
6646 @param lu: a logical unit from which we get configuration data
6647 @type nodenames: C{list}
6648 @param nodenames: the list of node names to check
6650 @param vg: the volume group to check
6651 @type requested: C{int}
6652 @param requested: the amount of disk in MiB to check for
6653 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6654 or we cannot check the node
6657 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6658 for node in nodenames:
6659 info = nodeinfo[node]
6660 info.Raise("Cannot get current information from node %s" % node,
6661 prereq=True, ecode=errors.ECODE_ENVIRON)
6662 (_, (vg_info, ), _) = info.payload
6663 vg_free = vg_info.get("vg_free", None)
6664 if not isinstance(vg_free, int):
6665 raise errors.OpPrereqError("Can't compute free disk space on node"
6666 " %s for vg %s, result was '%s'" %
6667 (node, vg, vg_free), errors.ECODE_ENVIRON)
6668 if requested > vg_free:
6669 raise errors.OpPrereqError("Not enough disk space on target node %s"
6670 " vg %s: required %d MiB, available %d MiB" %
6671 (node, vg, requested, vg_free),
6675 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6676 """Checks if nodes have enough physical CPUs
6678 This function checks if all given nodes have the needed number of
6679 physical CPUs. In case any node has less CPUs or we cannot get the
6680 information from the node, this function raises an OpPrereqError
6683 @type lu: C{LogicalUnit}
6684 @param lu: a logical unit from which we get configuration data
6685 @type nodenames: C{list}
6686 @param nodenames: the list of node names to check
6687 @type requested: C{int}
6688 @param requested: the minimum acceptable number of physical CPUs
6689 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6690 or we cannot check the node
6693 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6694 for node in nodenames:
6695 info = nodeinfo[node]
6696 info.Raise("Cannot get current information from node %s" % node,
6697 prereq=True, ecode=errors.ECODE_ENVIRON)
6698 (_, _, (hv_info, )) = info.payload
6699 num_cpus = hv_info.get("cpu_total", None)
6700 if not isinstance(num_cpus, int):
6701 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6702 " on node %s, result was '%s'" %
6703 (node, num_cpus), errors.ECODE_ENVIRON)
6704 if requested > num_cpus:
6705 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6706 "required" % (node, num_cpus, requested),
6710 class LUInstanceStartup(LogicalUnit):
6711 """Starts an instance.
6714 HPATH = "instance-start"
6715 HTYPE = constants.HTYPE_INSTANCE
6718 def CheckArguments(self):
6720 if self.op.beparams:
6721 # fill the beparams dict
6722 objects.UpgradeBeParams(self.op.beparams)
6723 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6725 def ExpandNames(self):
6726 self._ExpandAndLockInstance()
6727 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6729 def DeclareLocks(self, level):
6730 if level == locking.LEVEL_NODE_RES:
6731 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6733 def BuildHooksEnv(self):
6736 This runs on master, primary and secondary nodes of the instance.
6740 "FORCE": self.op.force,
6743 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6747 def BuildHooksNodes(self):
6748 """Build hooks nodes.
6751 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6754 def CheckPrereq(self):
6755 """Check prerequisites.
6757 This checks that the instance is in the cluster.
6760 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6761 assert self.instance is not None, \
6762 "Cannot retrieve locked instance %s" % self.op.instance_name
6765 if self.op.hvparams:
6766 # check hypervisor parameter syntax (locally)
6767 cluster = self.cfg.GetClusterInfo()
6768 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6769 filled_hvp = cluster.FillHV(instance)
6770 filled_hvp.update(self.op.hvparams)
6771 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6772 hv_type.CheckParameterSyntax(filled_hvp)
6773 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6775 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6777 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6779 if self.primary_offline and self.op.ignore_offline_nodes:
6780 self.proc.LogWarning("Ignoring offline primary node")
6782 if self.op.hvparams or self.op.beparams:
6783 self.proc.LogWarning("Overridden parameters are ignored")
6785 _CheckNodeOnline(self, instance.primary_node)
6787 bep = self.cfg.GetClusterInfo().FillBE(instance)
6788 bep.update(self.op.beparams)
6790 # check bridges existence
6791 _CheckInstanceBridgesExist(self, instance)
6793 remote_info = self.rpc.call_instance_info(instance.primary_node,
6795 instance.hypervisor)
6796 remote_info.Raise("Error checking node %s" % instance.primary_node,
6797 prereq=True, ecode=errors.ECODE_ENVIRON)
6798 if not remote_info.payload: # not running already
6799 _CheckNodeFreeMemory(self, instance.primary_node,
6800 "starting instance %s" % instance.name,
6801 bep[constants.BE_MINMEM], instance.hypervisor)
6803 def Exec(self, feedback_fn):
6804 """Start the instance.
6807 instance = self.instance
6808 force = self.op.force
6810 if not self.op.no_remember:
6811 self.cfg.MarkInstanceUp(instance.name)
6813 if self.primary_offline:
6814 assert self.op.ignore_offline_nodes
6815 self.proc.LogInfo("Primary node offline, marked instance as started")
6817 node_current = instance.primary_node
6819 _StartInstanceDisks(self, instance, force)
6822 self.rpc.call_instance_start(node_current,
6823 (instance, self.op.hvparams,
6825 self.op.startup_paused)
6826 msg = result.fail_msg
6828 _ShutdownInstanceDisks(self, instance)
6829 raise errors.OpExecError("Could not start instance: %s" % msg)
6832 class LUInstanceReboot(LogicalUnit):
6833 """Reboot an instance.
6836 HPATH = "instance-reboot"
6837 HTYPE = constants.HTYPE_INSTANCE
6840 def ExpandNames(self):
6841 self._ExpandAndLockInstance()
6843 def BuildHooksEnv(self):
6846 This runs on master, primary and secondary nodes of the instance.
6850 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6851 "REBOOT_TYPE": self.op.reboot_type,
6852 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6855 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6859 def BuildHooksNodes(self):
6860 """Build hooks nodes.
6863 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6866 def CheckPrereq(self):
6867 """Check prerequisites.
6869 This checks that the instance is in the cluster.
6872 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6873 assert self.instance is not None, \
6874 "Cannot retrieve locked instance %s" % self.op.instance_name
6875 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6876 _CheckNodeOnline(self, instance.primary_node)
6878 # check bridges existence
6879 _CheckInstanceBridgesExist(self, instance)
6881 def Exec(self, feedback_fn):
6882 """Reboot the instance.
6885 instance = self.instance
6886 ignore_secondaries = self.op.ignore_secondaries
6887 reboot_type = self.op.reboot_type
6889 remote_info = self.rpc.call_instance_info(instance.primary_node,
6891 instance.hypervisor)
6892 remote_info.Raise("Error checking node %s" % instance.primary_node)
6893 instance_running = bool(remote_info.payload)
6895 node_current = instance.primary_node
6897 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6898 constants.INSTANCE_REBOOT_HARD]:
6899 for disk in instance.disks:
6900 self.cfg.SetDiskID(disk, node_current)
6901 result = self.rpc.call_instance_reboot(node_current, instance,
6903 self.op.shutdown_timeout)
6904 result.Raise("Could not reboot instance")
6906 if instance_running:
6907 result = self.rpc.call_instance_shutdown(node_current, instance,
6908 self.op.shutdown_timeout)
6909 result.Raise("Could not shutdown instance for full reboot")
6910 _ShutdownInstanceDisks(self, instance)
6912 self.LogInfo("Instance %s was already stopped, starting now",
6914 _StartInstanceDisks(self, instance, ignore_secondaries)
6915 result = self.rpc.call_instance_start(node_current,
6916 (instance, None, None), False)
6917 msg = result.fail_msg
6919 _ShutdownInstanceDisks(self, instance)
6920 raise errors.OpExecError("Could not start instance for"
6921 " full reboot: %s" % msg)
6923 self.cfg.MarkInstanceUp(instance.name)
6926 class LUInstanceShutdown(LogicalUnit):
6927 """Shutdown an instance.
6930 HPATH = "instance-stop"
6931 HTYPE = constants.HTYPE_INSTANCE
6934 def ExpandNames(self):
6935 self._ExpandAndLockInstance()
6937 def BuildHooksEnv(self):
6940 This runs on master, primary and secondary nodes of the instance.
6943 env = _BuildInstanceHookEnvByObject(self, self.instance)
6944 env["TIMEOUT"] = self.op.timeout
6947 def BuildHooksNodes(self):
6948 """Build hooks nodes.
6951 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6954 def CheckPrereq(self):
6955 """Check prerequisites.
6957 This checks that the instance is in the cluster.
6960 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6961 assert self.instance is not None, \
6962 "Cannot retrieve locked instance %s" % self.op.instance_name
6964 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6966 self.primary_offline = \
6967 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6969 if self.primary_offline and self.op.ignore_offline_nodes:
6970 self.proc.LogWarning("Ignoring offline primary node")
6972 _CheckNodeOnline(self, self.instance.primary_node)
6974 def Exec(self, feedback_fn):
6975 """Shutdown the instance.
6978 instance = self.instance
6979 node_current = instance.primary_node
6980 timeout = self.op.timeout
6982 if not self.op.no_remember:
6983 self.cfg.MarkInstanceDown(instance.name)
6985 if self.primary_offline:
6986 assert self.op.ignore_offline_nodes
6987 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6989 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6990 msg = result.fail_msg
6992 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6994 _ShutdownInstanceDisks(self, instance)
6997 class LUInstanceReinstall(LogicalUnit):
6998 """Reinstall an instance.
7001 HPATH = "instance-reinstall"
7002 HTYPE = constants.HTYPE_INSTANCE
7005 def ExpandNames(self):
7006 self._ExpandAndLockInstance()
7008 def BuildHooksEnv(self):
7011 This runs on master, primary and secondary nodes of the instance.
7014 return _BuildInstanceHookEnvByObject(self, self.instance)
7016 def BuildHooksNodes(self):
7017 """Build hooks nodes.
7020 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7023 def CheckPrereq(self):
7024 """Check prerequisites.
7026 This checks that the instance is in the cluster and is not running.
7029 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7030 assert instance is not None, \
7031 "Cannot retrieve locked instance %s" % self.op.instance_name
7032 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7033 " offline, cannot reinstall")
7035 if instance.disk_template == constants.DT_DISKLESS:
7036 raise errors.OpPrereqError("Instance '%s' has no disks" %
7037 self.op.instance_name,
7039 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7041 if self.op.os_type is not None:
7043 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7044 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7045 instance_os = self.op.os_type
7047 instance_os = instance.os
7049 nodelist = list(instance.all_nodes)
7051 if self.op.osparams:
7052 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7053 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7054 self.os_inst = i_osdict # the new dict (without defaults)
7058 self.instance = instance
7060 def Exec(self, feedback_fn):
7061 """Reinstall the instance.
7064 inst = self.instance
7066 if self.op.os_type is not None:
7067 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7068 inst.os = self.op.os_type
7069 # Write to configuration
7070 self.cfg.Update(inst, feedback_fn)
7072 _StartInstanceDisks(self, inst, None)
7074 feedback_fn("Running the instance OS create scripts...")
7075 # FIXME: pass debug option from opcode to backend
7076 result = self.rpc.call_instance_os_add(inst.primary_node,
7077 (inst, self.os_inst), True,
7078 self.op.debug_level)
7079 result.Raise("Could not install OS for instance %s on node %s" %
7080 (inst.name, inst.primary_node))
7082 _ShutdownInstanceDisks(self, inst)
7085 class LUInstanceRecreateDisks(LogicalUnit):
7086 """Recreate an instance's missing disks.
7089 HPATH = "instance-recreate-disks"
7090 HTYPE = constants.HTYPE_INSTANCE
7093 _MODIFYABLE = frozenset([
7094 constants.IDISK_SIZE,
7095 constants.IDISK_MODE,
7098 # New or changed disk parameters may have different semantics
7099 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7100 constants.IDISK_ADOPT,
7102 # TODO: Implement support changing VG while recreating
7104 constants.IDISK_METAVG,
7107 def _RunAllocator(self):
7108 """Run the allocator based on input opcode.
7111 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7114 # The allocator should actually run in "relocate" mode, but current
7115 # allocators don't support relocating all the nodes of an instance at
7116 # the same time. As a workaround we use "allocate" mode, but this is
7117 # suboptimal for two reasons:
7118 # - The instance name passed to the allocator is present in the list of
7119 # existing instances, so there could be a conflict within the
7120 # internal structures of the allocator. This doesn't happen with the
7121 # current allocators, but it's a liability.
7122 # - The allocator counts the resources used by the instance twice: once
7123 # because the instance exists already, and once because it tries to
7124 # allocate a new instance.
7125 # The allocator could choose some of the nodes on which the instance is
7126 # running, but that's not a problem. If the instance nodes are broken,
7127 # they should be already be marked as drained or offline, and hence
7128 # skipped by the allocator. If instance disks have been lost for other
7129 # reasons, then recreating the disks on the same nodes should be fine.
7130 disk_template = self.instance.disk_template
7131 spindle_use = be_full[constants.BE_SPINDLE_USE]
7132 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7133 disk_template=disk_template,
7134 tags=list(self.instance.GetTags()),
7135 os=self.instance.os,
7137 vcpus=be_full[constants.BE_VCPUS],
7138 memory=be_full[constants.BE_MAXMEM],
7139 spindle_use=spindle_use,
7140 disks=[{constants.IDISK_SIZE: d.size,
7141 constants.IDISK_MODE: d.mode}
7142 for d in self.instance.disks],
7143 hypervisor=self.instance.hypervisor)
7144 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7146 ial.Run(self.op.iallocator)
7148 assert req.RequiredNodes() == len(self.instance.all_nodes)
7151 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7152 " %s" % (self.op.iallocator, ial.info),
7155 self.op.nodes = ial.result
7156 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7157 self.op.instance_name, self.op.iallocator,
7158 utils.CommaJoin(ial.result))
7160 def CheckArguments(self):
7161 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7162 # Normalize and convert deprecated list of disk indices
7163 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7165 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7167 raise errors.OpPrereqError("Some disks have been specified more than"
7168 " once: %s" % utils.CommaJoin(duplicates),
7171 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7172 # when neither iallocator nor nodes are specified
7173 if self.op.iallocator or self.op.nodes:
7174 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7176 for (idx, params) in self.op.disks:
7177 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7178 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7180 raise errors.OpPrereqError("Parameters for disk %s try to change"
7181 " unmodifyable parameter(s): %s" %
7182 (idx, utils.CommaJoin(unsupported)),
7185 def ExpandNames(self):
7186 self._ExpandAndLockInstance()
7187 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7189 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7190 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7192 self.needed_locks[locking.LEVEL_NODE] = []
7193 if self.op.iallocator:
7194 # iallocator will select a new node in the same group
7195 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7196 self.needed_locks[locking.LEVEL_NODE_RES] = []
7198 def DeclareLocks(self, level):
7199 if level == locking.LEVEL_NODEGROUP:
7200 assert self.op.iallocator is not None
7201 assert not self.op.nodes
7202 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7203 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7204 # Lock the primary group used by the instance optimistically; this
7205 # requires going via the node before it's locked, requiring
7206 # verification later on
7207 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7208 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7210 elif level == locking.LEVEL_NODE:
7211 # If an allocator is used, then we lock all the nodes in the current
7212 # instance group, as we don't know yet which ones will be selected;
7213 # if we replace the nodes without using an allocator, locks are
7214 # already declared in ExpandNames; otherwise, we need to lock all the
7215 # instance nodes for disk re-creation
7216 if self.op.iallocator:
7217 assert not self.op.nodes
7218 assert not self.needed_locks[locking.LEVEL_NODE]
7219 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7221 # Lock member nodes of the group of the primary node
7222 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7223 self.needed_locks[locking.LEVEL_NODE].extend(
7224 self.cfg.GetNodeGroup(group_uuid).members)
7225 elif not self.op.nodes:
7226 self._LockInstancesNodes(primary_only=False)
7227 elif level == locking.LEVEL_NODE_RES:
7229 self.needed_locks[locking.LEVEL_NODE_RES] = \
7230 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7232 def BuildHooksEnv(self):
7235 This runs on master, primary and secondary nodes of the instance.
7238 return _BuildInstanceHookEnvByObject(self, self.instance)
7240 def BuildHooksNodes(self):
7241 """Build hooks nodes.
7244 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7247 def CheckPrereq(self):
7248 """Check prerequisites.
7250 This checks that the instance is in the cluster and is not running.
7253 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7254 assert instance is not None, \
7255 "Cannot retrieve locked instance %s" % self.op.instance_name
7257 if len(self.op.nodes) != len(instance.all_nodes):
7258 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7259 " %d replacement nodes were specified" %
7260 (instance.name, len(instance.all_nodes),
7261 len(self.op.nodes)),
7263 assert instance.disk_template != constants.DT_DRBD8 or \
7264 len(self.op.nodes) == 2
7265 assert instance.disk_template != constants.DT_PLAIN or \
7266 len(self.op.nodes) == 1
7267 primary_node = self.op.nodes[0]
7269 primary_node = instance.primary_node
7270 if not self.op.iallocator:
7271 _CheckNodeOnline(self, primary_node)
7273 if instance.disk_template == constants.DT_DISKLESS:
7274 raise errors.OpPrereqError("Instance '%s' has no disks" %
7275 self.op.instance_name, errors.ECODE_INVAL)
7277 # Verify if node group locks are still correct
7278 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7280 # Node group locks are acquired only for the primary node (and only
7281 # when the allocator is used)
7282 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7285 # if we replace nodes *and* the old primary is offline, we don't
7286 # check the instance state
7287 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7288 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7289 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7290 msg="cannot recreate disks")
7293 self.disks = dict(self.op.disks)
7295 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7297 maxidx = max(self.disks.keys())
7298 if maxidx >= len(instance.disks):
7299 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7302 if ((self.op.nodes or self.op.iallocator) and
7303 sorted(self.disks.keys()) != range(len(instance.disks))):
7304 raise errors.OpPrereqError("Can't recreate disks partially and"
7305 " change the nodes at the same time",
7308 self.instance = instance
7310 if self.op.iallocator:
7311 self._RunAllocator()
7312 # Release unneeded node and node resource locks
7313 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7314 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7316 def Exec(self, feedback_fn):
7317 """Recreate the disks.
7320 instance = self.instance
7322 assert (self.owned_locks(locking.LEVEL_NODE) ==
7323 self.owned_locks(locking.LEVEL_NODE_RES))
7326 mods = [] # keeps track of needed changes
7328 for idx, disk in enumerate(instance.disks):
7330 changes = self.disks[idx]
7332 # Disk should not be recreated
7336 # update secondaries for disks, if needed
7337 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7338 # need to update the nodes and minors
7339 assert len(self.op.nodes) == 2
7340 assert len(disk.logical_id) == 6 # otherwise disk internals
7342 (_, _, old_port, _, _, old_secret) = disk.logical_id
7343 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7344 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7345 new_minors[0], new_minors[1], old_secret)
7346 assert len(disk.logical_id) == len(new_id)
7350 mods.append((idx, new_id, changes))
7352 # now that we have passed all asserts above, we can apply the mods
7353 # in a single run (to avoid partial changes)
7354 for idx, new_id, changes in mods:
7355 disk = instance.disks[idx]
7356 if new_id is not None:
7357 assert disk.dev_type == constants.LD_DRBD8
7358 disk.logical_id = new_id
7360 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7361 mode=changes.get(constants.IDISK_MODE, None))
7363 # change primary node, if needed
7365 instance.primary_node = self.op.nodes[0]
7366 self.LogWarning("Changing the instance's nodes, you will have to"
7367 " remove any disks left on the older nodes manually")
7370 self.cfg.Update(instance, feedback_fn)
7372 # All touched nodes must be locked
7373 mylocks = self.owned_locks(locking.LEVEL_NODE)
7374 assert mylocks.issuperset(frozenset(instance.all_nodes))
7375 _CreateDisks(self, instance, to_skip=to_skip)
7378 class LUInstanceRename(LogicalUnit):
7379 """Rename an instance.
7382 HPATH = "instance-rename"
7383 HTYPE = constants.HTYPE_INSTANCE
7385 def CheckArguments(self):
7389 if self.op.ip_check and not self.op.name_check:
7390 # TODO: make the ip check more flexible and not depend on the name check
7391 raise errors.OpPrereqError("IP address check requires a name check",
7394 def BuildHooksEnv(self):
7397 This runs on master, primary and secondary nodes of the instance.
7400 env = _BuildInstanceHookEnvByObject(self, self.instance)
7401 env["INSTANCE_NEW_NAME"] = self.op.new_name
7404 def BuildHooksNodes(self):
7405 """Build hooks nodes.
7408 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7411 def CheckPrereq(self):
7412 """Check prerequisites.
7414 This checks that the instance is in the cluster and is not running.
7417 self.op.instance_name = _ExpandInstanceName(self.cfg,
7418 self.op.instance_name)
7419 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7420 assert instance is not None
7421 _CheckNodeOnline(self, instance.primary_node)
7422 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7423 msg="cannot rename")
7424 self.instance = instance
7426 new_name = self.op.new_name
7427 if self.op.name_check:
7428 hostname = netutils.GetHostname(name=new_name)
7429 if hostname.name != new_name:
7430 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7432 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7433 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7434 " same as given hostname '%s'") %
7435 (hostname.name, self.op.new_name),
7437 new_name = self.op.new_name = hostname.name
7438 if (self.op.ip_check and
7439 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7440 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7441 (hostname.ip, new_name),
7442 errors.ECODE_NOTUNIQUE)
7444 instance_list = self.cfg.GetInstanceList()
7445 if new_name in instance_list and new_name != instance.name:
7446 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7447 new_name, errors.ECODE_EXISTS)
7449 def Exec(self, feedback_fn):
7450 """Rename the instance.
7453 inst = self.instance
7454 old_name = inst.name
7456 rename_file_storage = False
7457 if (inst.disk_template in constants.DTS_FILEBASED and
7458 self.op.new_name != inst.name):
7459 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7460 rename_file_storage = True
7462 self.cfg.RenameInstance(inst.name, self.op.new_name)
7463 # Change the instance lock. This is definitely safe while we hold the BGL.
7464 # Otherwise the new lock would have to be added in acquired mode.
7466 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7467 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7469 # re-read the instance from the configuration after rename
7470 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7472 if rename_file_storage:
7473 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7474 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7475 old_file_storage_dir,
7476 new_file_storage_dir)
7477 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7478 " (but the instance has been renamed in Ganeti)" %
7479 (inst.primary_node, old_file_storage_dir,
7480 new_file_storage_dir))
7482 _StartInstanceDisks(self, inst, None)
7483 # update info on disks
7484 info = _GetInstanceInfoText(inst)
7485 for (idx, disk) in enumerate(inst.disks):
7486 for node in inst.all_nodes:
7487 self.cfg.SetDiskID(disk, node)
7488 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7490 self.LogWarning("Error setting info on node %s for disk %s: %s",
7491 node, idx, result.fail_msg)
7493 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7494 old_name, self.op.debug_level)
7495 msg = result.fail_msg
7497 msg = ("Could not run OS rename script for instance %s on node %s"
7498 " (but the instance has been renamed in Ganeti): %s" %
7499 (inst.name, inst.primary_node, msg))
7500 self.proc.LogWarning(msg)
7502 _ShutdownInstanceDisks(self, inst)
7507 class LUInstanceRemove(LogicalUnit):
7508 """Remove an instance.
7511 HPATH = "instance-remove"
7512 HTYPE = constants.HTYPE_INSTANCE
7515 def ExpandNames(self):
7516 self._ExpandAndLockInstance()
7517 self.needed_locks[locking.LEVEL_NODE] = []
7518 self.needed_locks[locking.LEVEL_NODE_RES] = []
7519 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7521 def DeclareLocks(self, level):
7522 if level == locking.LEVEL_NODE:
7523 self._LockInstancesNodes()
7524 elif level == locking.LEVEL_NODE_RES:
7526 self.needed_locks[locking.LEVEL_NODE_RES] = \
7527 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7529 def BuildHooksEnv(self):
7532 This runs on master, primary and secondary nodes of the instance.
7535 env = _BuildInstanceHookEnvByObject(self, self.instance)
7536 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7539 def BuildHooksNodes(self):
7540 """Build hooks nodes.
7543 nl = [self.cfg.GetMasterNode()]
7544 nl_post = list(self.instance.all_nodes) + nl
7545 return (nl, nl_post)
7547 def CheckPrereq(self):
7548 """Check prerequisites.
7550 This checks that the instance is in the cluster.
7553 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7554 assert self.instance is not None, \
7555 "Cannot retrieve locked instance %s" % self.op.instance_name
7557 def Exec(self, feedback_fn):
7558 """Remove the instance.
7561 instance = self.instance
7562 logging.info("Shutting down instance %s on node %s",
7563 instance.name, instance.primary_node)
7565 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7566 self.op.shutdown_timeout)
7567 msg = result.fail_msg
7569 if self.op.ignore_failures:
7570 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7572 raise errors.OpExecError("Could not shutdown instance %s on"
7574 (instance.name, instance.primary_node, msg))
7576 assert (self.owned_locks(locking.LEVEL_NODE) ==
7577 self.owned_locks(locking.LEVEL_NODE_RES))
7578 assert not (set(instance.all_nodes) -
7579 self.owned_locks(locking.LEVEL_NODE)), \
7580 "Not owning correct locks"
7582 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7585 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7586 """Utility function to remove an instance.
7589 logging.info("Removing block devices for instance %s", instance.name)
7591 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7592 if not ignore_failures:
7593 raise errors.OpExecError("Can't remove instance's disks")
7594 feedback_fn("Warning: can't remove instance's disks")
7596 logging.info("Removing instance %s out of cluster config", instance.name)
7598 lu.cfg.RemoveInstance(instance.name)
7600 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7601 "Instance lock removal conflict"
7603 # Remove lock for the instance
7604 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7607 class LUInstanceQuery(NoHooksLU):
7608 """Logical unit for querying instances.
7611 # pylint: disable=W0142
7614 def CheckArguments(self):
7615 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7616 self.op.output_fields, self.op.use_locking)
7618 def ExpandNames(self):
7619 self.iq.ExpandNames(self)
7621 def DeclareLocks(self, level):
7622 self.iq.DeclareLocks(self, level)
7624 def Exec(self, feedback_fn):
7625 return self.iq.OldStyleQuery(self)
7628 class LUInstanceFailover(LogicalUnit):
7629 """Failover an instance.
7632 HPATH = "instance-failover"
7633 HTYPE = constants.HTYPE_INSTANCE
7636 def CheckArguments(self):
7637 """Check the arguments.
7640 self.iallocator = getattr(self.op, "iallocator", None)
7641 self.target_node = getattr(self.op, "target_node", None)
7643 def ExpandNames(self):
7644 self._ExpandAndLockInstance()
7646 if self.op.target_node is not None:
7647 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7649 self.needed_locks[locking.LEVEL_NODE] = []
7650 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7652 self.needed_locks[locking.LEVEL_NODE_RES] = []
7653 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7655 ignore_consistency = self.op.ignore_consistency
7656 shutdown_timeout = self.op.shutdown_timeout
7657 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7660 ignore_consistency=ignore_consistency,
7661 shutdown_timeout=shutdown_timeout,
7662 ignore_ipolicy=self.op.ignore_ipolicy)
7663 self.tasklets = [self._migrater]
7665 def DeclareLocks(self, level):
7666 if level == locking.LEVEL_NODE:
7667 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7668 if instance.disk_template in constants.DTS_EXT_MIRROR:
7669 if self.op.target_node is None:
7670 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7672 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7673 self.op.target_node]
7674 del self.recalculate_locks[locking.LEVEL_NODE]
7676 self._LockInstancesNodes()
7677 elif level == locking.LEVEL_NODE_RES:
7679 self.needed_locks[locking.LEVEL_NODE_RES] = \
7680 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7682 def BuildHooksEnv(self):
7685 This runs on master, primary and secondary nodes of the instance.
7688 instance = self._migrater.instance
7689 source_node = instance.primary_node
7690 target_node = self.op.target_node
7692 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7693 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7694 "OLD_PRIMARY": source_node,
7695 "NEW_PRIMARY": target_node,
7698 if instance.disk_template in constants.DTS_INT_MIRROR:
7699 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7700 env["NEW_SECONDARY"] = source_node
7702 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7704 env.update(_BuildInstanceHookEnvByObject(self, instance))
7708 def BuildHooksNodes(self):
7709 """Build hooks nodes.
7712 instance = self._migrater.instance
7713 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7714 return (nl, nl + [instance.primary_node])
7717 class LUInstanceMigrate(LogicalUnit):
7718 """Migrate an instance.
7720 This is migration without shutting down, compared to the failover,
7721 which is done with shutdown.
7724 HPATH = "instance-migrate"
7725 HTYPE = constants.HTYPE_INSTANCE
7728 def ExpandNames(self):
7729 self._ExpandAndLockInstance()
7731 if self.op.target_node is not None:
7732 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7734 self.needed_locks[locking.LEVEL_NODE] = []
7735 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7737 self.needed_locks[locking.LEVEL_NODE] = []
7738 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7741 TLMigrateInstance(self, self.op.instance_name,
7742 cleanup=self.op.cleanup,
7744 fallback=self.op.allow_failover,
7745 allow_runtime_changes=self.op.allow_runtime_changes,
7746 ignore_ipolicy=self.op.ignore_ipolicy)
7747 self.tasklets = [self._migrater]
7749 def DeclareLocks(self, level):
7750 if level == locking.LEVEL_NODE:
7751 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7752 if instance.disk_template in constants.DTS_EXT_MIRROR:
7753 if self.op.target_node is None:
7754 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7756 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7757 self.op.target_node]
7758 del self.recalculate_locks[locking.LEVEL_NODE]
7760 self._LockInstancesNodes()
7761 elif level == locking.LEVEL_NODE_RES:
7763 self.needed_locks[locking.LEVEL_NODE_RES] = \
7764 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7766 def BuildHooksEnv(self):
7769 This runs on master, primary and secondary nodes of the instance.
7772 instance = self._migrater.instance
7773 source_node = instance.primary_node
7774 target_node = self.op.target_node
7775 env = _BuildInstanceHookEnvByObject(self, instance)
7777 "MIGRATE_LIVE": self._migrater.live,
7778 "MIGRATE_CLEANUP": self.op.cleanup,
7779 "OLD_PRIMARY": source_node,
7780 "NEW_PRIMARY": target_node,
7781 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7784 if instance.disk_template in constants.DTS_INT_MIRROR:
7785 env["OLD_SECONDARY"] = target_node
7786 env["NEW_SECONDARY"] = source_node
7788 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7792 def BuildHooksNodes(self):
7793 """Build hooks nodes.
7796 instance = self._migrater.instance
7797 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7798 return (nl, nl + [instance.primary_node])
7801 class LUInstanceMove(LogicalUnit):
7802 """Move an instance by data-copying.
7805 HPATH = "instance-move"
7806 HTYPE = constants.HTYPE_INSTANCE
7809 def ExpandNames(self):
7810 self._ExpandAndLockInstance()
7811 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7812 self.op.target_node = target_node
7813 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7814 self.needed_locks[locking.LEVEL_NODE_RES] = []
7815 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7817 def DeclareLocks(self, level):
7818 if level == locking.LEVEL_NODE:
7819 self._LockInstancesNodes(primary_only=True)
7820 elif level == locking.LEVEL_NODE_RES:
7822 self.needed_locks[locking.LEVEL_NODE_RES] = \
7823 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7825 def BuildHooksEnv(self):
7828 This runs on master, primary and secondary nodes of the instance.
7832 "TARGET_NODE": self.op.target_node,
7833 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7835 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7838 def BuildHooksNodes(self):
7839 """Build hooks nodes.
7843 self.cfg.GetMasterNode(),
7844 self.instance.primary_node,
7845 self.op.target_node,
7849 def CheckPrereq(self):
7850 """Check prerequisites.
7852 This checks that the instance is in the cluster.
7855 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7856 assert self.instance is not None, \
7857 "Cannot retrieve locked instance %s" % self.op.instance_name
7859 node = self.cfg.GetNodeInfo(self.op.target_node)
7860 assert node is not None, \
7861 "Cannot retrieve locked node %s" % self.op.target_node
7863 self.target_node = target_node = node.name
7865 if target_node == instance.primary_node:
7866 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7867 (instance.name, target_node),
7870 bep = self.cfg.GetClusterInfo().FillBE(instance)
7872 for idx, dsk in enumerate(instance.disks):
7873 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7874 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7875 " cannot copy" % idx, errors.ECODE_STATE)
7877 _CheckNodeOnline(self, target_node)
7878 _CheckNodeNotDrained(self, target_node)
7879 _CheckNodeVmCapable(self, target_node)
7880 cluster = self.cfg.GetClusterInfo()
7881 group_info = self.cfg.GetNodeGroup(node.group)
7882 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7883 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7884 ignore=self.op.ignore_ipolicy)
7886 if instance.admin_state == constants.ADMINST_UP:
7887 # check memory requirements on the secondary node
7888 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7889 instance.name, bep[constants.BE_MAXMEM],
7890 instance.hypervisor)
7892 self.LogInfo("Not checking memory on the secondary node as"
7893 " instance will not be started")
7895 # check bridge existance
7896 _CheckInstanceBridgesExist(self, instance, node=target_node)
7898 def Exec(self, feedback_fn):
7899 """Move an instance.
7901 The move is done by shutting it down on its present node, copying
7902 the data over (slow) and starting it on the new node.
7905 instance = self.instance
7907 source_node = instance.primary_node
7908 target_node = self.target_node
7910 self.LogInfo("Shutting down instance %s on source node %s",
7911 instance.name, source_node)
7913 assert (self.owned_locks(locking.LEVEL_NODE) ==
7914 self.owned_locks(locking.LEVEL_NODE_RES))
7916 result = self.rpc.call_instance_shutdown(source_node, instance,
7917 self.op.shutdown_timeout)
7918 msg = result.fail_msg
7920 if self.op.ignore_consistency:
7921 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7922 " Proceeding anyway. Please make sure node"
7923 " %s is down. Error details: %s",
7924 instance.name, source_node, source_node, msg)
7926 raise errors.OpExecError("Could not shutdown instance %s on"
7928 (instance.name, source_node, msg))
7930 # create the target disks
7932 _CreateDisks(self, instance, target_node=target_node)
7933 except errors.OpExecError:
7934 self.LogWarning("Device creation failed, reverting...")
7936 _RemoveDisks(self, instance, target_node=target_node)
7938 self.cfg.ReleaseDRBDMinors(instance.name)
7941 cluster_name = self.cfg.GetClusterInfo().cluster_name
7944 # activate, get path, copy the data over
7945 for idx, disk in enumerate(instance.disks):
7946 self.LogInfo("Copying data for disk %d", idx)
7947 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7948 instance.name, True, idx)
7950 self.LogWarning("Can't assemble newly created disk %d: %s",
7951 idx, result.fail_msg)
7952 errs.append(result.fail_msg)
7954 dev_path = result.payload
7955 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7956 target_node, dev_path,
7959 self.LogWarning("Can't copy data over for disk %d: %s",
7960 idx, result.fail_msg)
7961 errs.append(result.fail_msg)
7965 self.LogWarning("Some disks failed to copy, aborting")
7967 _RemoveDisks(self, instance, target_node=target_node)
7969 self.cfg.ReleaseDRBDMinors(instance.name)
7970 raise errors.OpExecError("Errors during disk copy: %s" %
7973 instance.primary_node = target_node
7974 self.cfg.Update(instance, feedback_fn)
7976 self.LogInfo("Removing the disks on the original node")
7977 _RemoveDisks(self, instance, target_node=source_node)
7979 # Only start the instance if it's marked as up
7980 if instance.admin_state == constants.ADMINST_UP:
7981 self.LogInfo("Starting instance %s on node %s",
7982 instance.name, target_node)
7984 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7985 ignore_secondaries=True)
7987 _ShutdownInstanceDisks(self, instance)
7988 raise errors.OpExecError("Can't activate the instance's disks")
7990 result = self.rpc.call_instance_start(target_node,
7991 (instance, None, None), False)
7992 msg = result.fail_msg
7994 _ShutdownInstanceDisks(self, instance)
7995 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7996 (instance.name, target_node, msg))
7999 class LUNodeMigrate(LogicalUnit):
8000 """Migrate all instances from a node.
8003 HPATH = "node-migrate"
8004 HTYPE = constants.HTYPE_NODE
8007 def CheckArguments(self):
8010 def ExpandNames(self):
8011 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8013 self.share_locks = _ShareAll()
8014 self.needed_locks = {
8015 locking.LEVEL_NODE: [self.op.node_name],
8018 def BuildHooksEnv(self):
8021 This runs on the master, the primary and all the secondaries.
8025 "NODE_NAME": self.op.node_name,
8026 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8029 def BuildHooksNodes(self):
8030 """Build hooks nodes.
8033 nl = [self.cfg.GetMasterNode()]
8036 def CheckPrereq(self):
8039 def Exec(self, feedback_fn):
8040 # Prepare jobs for migration instances
8041 allow_runtime_changes = self.op.allow_runtime_changes
8043 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8046 iallocator=self.op.iallocator,
8047 target_node=self.op.target_node,
8048 allow_runtime_changes=allow_runtime_changes,
8049 ignore_ipolicy=self.op.ignore_ipolicy)]
8050 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8053 # TODO: Run iallocator in this opcode and pass correct placement options to
8054 # OpInstanceMigrate. Since other jobs can modify the cluster between
8055 # running the iallocator and the actual migration, a good consistency model
8056 # will have to be found.
8058 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8059 frozenset([self.op.node_name]))
8061 return ResultWithJobs(jobs)
8064 class TLMigrateInstance(Tasklet):
8065 """Tasklet class for instance migration.
8068 @ivar live: whether the migration will be done live or non-live;
8069 this variable is initalized only after CheckPrereq has run
8070 @type cleanup: boolean
8071 @ivar cleanup: Wheater we cleanup from a failed migration
8072 @type iallocator: string
8073 @ivar iallocator: The iallocator used to determine target_node
8074 @type target_node: string
8075 @ivar target_node: If given, the target_node to reallocate the instance to
8076 @type failover: boolean
8077 @ivar failover: Whether operation results in failover or migration
8078 @type fallback: boolean
8079 @ivar fallback: Whether fallback to failover is allowed if migration not
8081 @type ignore_consistency: boolean
8082 @ivar ignore_consistency: Wheter we should ignore consistency between source
8084 @type shutdown_timeout: int
8085 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8086 @type ignore_ipolicy: bool
8087 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8092 _MIGRATION_POLL_INTERVAL = 1 # seconds
8093 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8095 def __init__(self, lu, instance_name, cleanup=False,
8096 failover=False, fallback=False,
8097 ignore_consistency=False,
8098 allow_runtime_changes=True,
8099 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8100 ignore_ipolicy=False):
8101 """Initializes this class.
8104 Tasklet.__init__(self, lu)
8107 self.instance_name = instance_name
8108 self.cleanup = cleanup
8109 self.live = False # will be overridden later
8110 self.failover = failover
8111 self.fallback = fallback
8112 self.ignore_consistency = ignore_consistency
8113 self.shutdown_timeout = shutdown_timeout
8114 self.ignore_ipolicy = ignore_ipolicy
8115 self.allow_runtime_changes = allow_runtime_changes
8117 def CheckPrereq(self):
8118 """Check prerequisites.
8120 This checks that the instance is in the cluster.
8123 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8124 instance = self.cfg.GetInstanceInfo(instance_name)
8125 assert instance is not None
8126 self.instance = instance
8127 cluster = self.cfg.GetClusterInfo()
8129 if (not self.cleanup and
8130 not instance.admin_state == constants.ADMINST_UP and
8131 not self.failover and self.fallback):
8132 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8133 " switching to failover")
8134 self.failover = True
8136 if instance.disk_template not in constants.DTS_MIRRORED:
8141 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8142 " %s" % (instance.disk_template, text),
8145 if instance.disk_template in constants.DTS_EXT_MIRROR:
8146 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8148 if self.lu.op.iallocator:
8149 self._RunAllocator()
8151 # We set set self.target_node as it is required by
8153 self.target_node = self.lu.op.target_node
8155 # Check that the target node is correct in terms of instance policy
8156 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8157 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8158 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8160 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8161 ignore=self.ignore_ipolicy)
8163 # self.target_node is already populated, either directly or by the
8165 target_node = self.target_node
8166 if self.target_node == instance.primary_node:
8167 raise errors.OpPrereqError("Cannot migrate instance %s"
8168 " to its primary (%s)" %
8169 (instance.name, instance.primary_node),
8172 if len(self.lu.tasklets) == 1:
8173 # It is safe to release locks only when we're the only tasklet
8175 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8176 keep=[instance.primary_node, self.target_node])
8179 secondary_nodes = instance.secondary_nodes
8180 if not secondary_nodes:
8181 raise errors.ConfigurationError("No secondary node but using"
8182 " %s disk template" %
8183 instance.disk_template)
8184 target_node = secondary_nodes[0]
8185 if self.lu.op.iallocator or (self.lu.op.target_node and
8186 self.lu.op.target_node != target_node):
8188 text = "failed over"
8191 raise errors.OpPrereqError("Instances with disk template %s cannot"
8192 " be %s to arbitrary nodes"
8193 " (neither an iallocator nor a target"
8194 " node can be passed)" %
8195 (instance.disk_template, text),
8197 nodeinfo = self.cfg.GetNodeInfo(target_node)
8198 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8199 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8201 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8202 ignore=self.ignore_ipolicy)
8204 i_be = cluster.FillBE(instance)
8206 # check memory requirements on the secondary node
8207 if (not self.cleanup and
8208 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8209 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8210 "migrating instance %s" %
8212 i_be[constants.BE_MINMEM],
8213 instance.hypervisor)
8215 self.lu.LogInfo("Not checking memory on the secondary node as"
8216 " instance will not be started")
8218 # check if failover must be forced instead of migration
8219 if (not self.cleanup and not self.failover and
8220 i_be[constants.BE_ALWAYS_FAILOVER]):
8221 self.lu.LogInfo("Instance configured to always failover; fallback"
8223 self.failover = True
8225 # check bridge existance
8226 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8228 if not self.cleanup:
8229 _CheckNodeNotDrained(self.lu, target_node)
8230 if not self.failover:
8231 result = self.rpc.call_instance_migratable(instance.primary_node,
8233 if result.fail_msg and self.fallback:
8234 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8236 self.failover = True
8238 result.Raise("Can't migrate, please use failover",
8239 prereq=True, ecode=errors.ECODE_STATE)
8241 assert not (self.failover and self.cleanup)
8243 if not self.failover:
8244 if self.lu.op.live is not None and self.lu.op.mode is not None:
8245 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8246 " parameters are accepted",
8248 if self.lu.op.live is not None:
8250 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8252 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8253 # reset the 'live' parameter to None so that repeated
8254 # invocations of CheckPrereq do not raise an exception
8255 self.lu.op.live = None
8256 elif self.lu.op.mode is None:
8257 # read the default value from the hypervisor
8258 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8259 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8261 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8263 # Failover is never live
8266 if not (self.failover or self.cleanup):
8267 remote_info = self.rpc.call_instance_info(instance.primary_node,
8269 instance.hypervisor)
8270 remote_info.Raise("Error checking instance on node %s" %
8271 instance.primary_node)
8272 instance_running = bool(remote_info.payload)
8273 if instance_running:
8274 self.current_mem = int(remote_info.payload["memory"])
8276 def _RunAllocator(self):
8277 """Run the allocator based on input opcode.
8280 # FIXME: add a self.ignore_ipolicy option
8281 req = iallocator.IAReqRelocate(name=self.instance_name,
8282 relocate_from=[self.instance.primary_node])
8283 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8285 ial.Run(self.lu.op.iallocator)
8288 raise errors.OpPrereqError("Can't compute nodes using"
8289 " iallocator '%s': %s" %
8290 (self.lu.op.iallocator, ial.info),
8292 self.target_node = ial.result[0]
8293 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8294 self.instance_name, self.lu.op.iallocator,
8295 utils.CommaJoin(ial.result))
8297 def _WaitUntilSync(self):
8298 """Poll with custom rpc for disk sync.
8300 This uses our own step-based rpc call.
8303 self.feedback_fn("* wait until resync is done")
8307 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8309 (self.instance.disks,
8312 for node, nres in result.items():
8313 nres.Raise("Cannot resync disks on node %s" % node)
8314 node_done, node_percent = nres.payload
8315 all_done = all_done and node_done
8316 if node_percent is not None:
8317 min_percent = min(min_percent, node_percent)
8319 if min_percent < 100:
8320 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8323 def _EnsureSecondary(self, node):
8324 """Demote a node to secondary.
8327 self.feedback_fn("* switching node %s to secondary mode" % node)
8329 for dev in self.instance.disks:
8330 self.cfg.SetDiskID(dev, node)
8332 result = self.rpc.call_blockdev_close(node, self.instance.name,
8333 self.instance.disks)
8334 result.Raise("Cannot change disk to secondary on node %s" % node)
8336 def _GoStandalone(self):
8337 """Disconnect from the network.
8340 self.feedback_fn("* changing into standalone mode")
8341 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8342 self.instance.disks)
8343 for node, nres in result.items():
8344 nres.Raise("Cannot disconnect disks node %s" % node)
8346 def _GoReconnect(self, multimaster):
8347 """Reconnect to the network.
8353 msg = "single-master"
8354 self.feedback_fn("* changing disks into %s mode" % msg)
8355 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8356 (self.instance.disks, self.instance),
8357 self.instance.name, multimaster)
8358 for node, nres in result.items():
8359 nres.Raise("Cannot change disks config on node %s" % node)
8361 def _ExecCleanup(self):
8362 """Try to cleanup after a failed migration.
8364 The cleanup is done by:
8365 - check that the instance is running only on one node
8366 (and update the config if needed)
8367 - change disks on its secondary node to secondary
8368 - wait until disks are fully synchronized
8369 - disconnect from the network
8370 - change disks into single-master mode
8371 - wait again until disks are fully synchronized
8374 instance = self.instance
8375 target_node = self.target_node
8376 source_node = self.source_node
8378 # check running on only one node
8379 self.feedback_fn("* checking where the instance actually runs"
8380 " (if this hangs, the hypervisor might be in"
8382 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8383 for node, result in ins_l.items():
8384 result.Raise("Can't contact node %s" % node)
8386 runningon_source = instance.name in ins_l[source_node].payload
8387 runningon_target = instance.name in ins_l[target_node].payload
8389 if runningon_source and runningon_target:
8390 raise errors.OpExecError("Instance seems to be running on two nodes,"
8391 " or the hypervisor is confused; you will have"
8392 " to ensure manually that it runs only on one"
8393 " and restart this operation")
8395 if not (runningon_source or runningon_target):
8396 raise errors.OpExecError("Instance does not seem to be running at all;"
8397 " in this case it's safer to repair by"
8398 " running 'gnt-instance stop' to ensure disk"
8399 " shutdown, and then restarting it")
8401 if runningon_target:
8402 # the migration has actually succeeded, we need to update the config
8403 self.feedback_fn("* instance running on secondary node (%s),"
8404 " updating config" % target_node)
8405 instance.primary_node = target_node
8406 self.cfg.Update(instance, self.feedback_fn)
8407 demoted_node = source_node
8409 self.feedback_fn("* instance confirmed to be running on its"
8410 " primary node (%s)" % source_node)
8411 demoted_node = target_node
8413 if instance.disk_template in constants.DTS_INT_MIRROR:
8414 self._EnsureSecondary(demoted_node)
8416 self._WaitUntilSync()
8417 except errors.OpExecError:
8418 # we ignore here errors, since if the device is standalone, it
8419 # won't be able to sync
8421 self._GoStandalone()
8422 self._GoReconnect(False)
8423 self._WaitUntilSync()
8425 self.feedback_fn("* done")
8427 def _RevertDiskStatus(self):
8428 """Try to revert the disk status after a failed migration.
8431 target_node = self.target_node
8432 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8436 self._EnsureSecondary(target_node)
8437 self._GoStandalone()
8438 self._GoReconnect(False)
8439 self._WaitUntilSync()
8440 except errors.OpExecError, err:
8441 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8442 " please try to recover the instance manually;"
8443 " error '%s'" % str(err))
8445 def _AbortMigration(self):
8446 """Call the hypervisor code to abort a started migration.
8449 instance = self.instance
8450 target_node = self.target_node
8451 source_node = self.source_node
8452 migration_info = self.migration_info
8454 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8458 abort_msg = abort_result.fail_msg
8460 logging.error("Aborting migration failed on target node %s: %s",
8461 target_node, abort_msg)
8462 # Don't raise an exception here, as we stil have to try to revert the
8463 # disk status, even if this step failed.
8465 abort_result = self.rpc.call_instance_finalize_migration_src(
8466 source_node, instance, False, self.live)
8467 abort_msg = abort_result.fail_msg
8469 logging.error("Aborting migration failed on source node %s: %s",
8470 source_node, abort_msg)
8472 def _ExecMigration(self):
8473 """Migrate an instance.
8475 The migrate is done by:
8476 - change the disks into dual-master mode
8477 - wait until disks are fully synchronized again
8478 - migrate the instance
8479 - change disks on the new secondary node (the old primary) to secondary
8480 - wait until disks are fully synchronized
8481 - change disks into single-master mode
8484 instance = self.instance
8485 target_node = self.target_node
8486 source_node = self.source_node
8488 # Check for hypervisor version mismatch and warn the user.
8489 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8490 None, [self.instance.hypervisor])
8491 for ninfo in nodeinfo.values():
8492 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8494 (_, _, (src_info, )) = nodeinfo[source_node].payload
8495 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8497 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8498 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8499 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8500 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8501 if src_version != dst_version:
8502 self.feedback_fn("* warning: hypervisor version mismatch between"
8503 " source (%s) and target (%s) node" %
8504 (src_version, dst_version))
8506 self.feedback_fn("* checking disk consistency between source and target")
8507 for (idx, dev) in enumerate(instance.disks):
8508 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8509 raise errors.OpExecError("Disk %s is degraded or not fully"
8510 " synchronized on target node,"
8511 " aborting migration" % idx)
8513 if self.current_mem > self.tgt_free_mem:
8514 if not self.allow_runtime_changes:
8515 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8516 " free memory to fit instance %s on target"
8517 " node %s (have %dMB, need %dMB)" %
8518 (instance.name, target_node,
8519 self.tgt_free_mem, self.current_mem))
8520 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8521 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8524 rpcres.Raise("Cannot modify instance runtime memory")
8526 # First get the migration information from the remote node
8527 result = self.rpc.call_migration_info(source_node, instance)
8528 msg = result.fail_msg
8530 log_err = ("Failed fetching source migration information from %s: %s" %
8532 logging.error(log_err)
8533 raise errors.OpExecError(log_err)
8535 self.migration_info = migration_info = result.payload
8537 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8538 # Then switch the disks to master/master mode
8539 self._EnsureSecondary(target_node)
8540 self._GoStandalone()
8541 self._GoReconnect(True)
8542 self._WaitUntilSync()
8544 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8545 result = self.rpc.call_accept_instance(target_node,
8548 self.nodes_ip[target_node])
8550 msg = result.fail_msg
8552 logging.error("Instance pre-migration failed, trying to revert"
8553 " disk status: %s", msg)
8554 self.feedback_fn("Pre-migration failed, aborting")
8555 self._AbortMigration()
8556 self._RevertDiskStatus()
8557 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8558 (instance.name, msg))
8560 self.feedback_fn("* migrating instance to %s" % target_node)
8561 result = self.rpc.call_instance_migrate(source_node, instance,
8562 self.nodes_ip[target_node],
8564 msg = result.fail_msg
8566 logging.error("Instance migration failed, trying to revert"
8567 " disk status: %s", msg)
8568 self.feedback_fn("Migration failed, aborting")
8569 self._AbortMigration()
8570 self._RevertDiskStatus()
8571 raise errors.OpExecError("Could not migrate instance %s: %s" %
8572 (instance.name, msg))
8574 self.feedback_fn("* starting memory transfer")
8575 last_feedback = time.time()
8577 result = self.rpc.call_instance_get_migration_status(source_node,
8579 msg = result.fail_msg
8580 ms = result.payload # MigrationStatus instance
8581 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8582 logging.error("Instance migration failed, trying to revert"
8583 " disk status: %s", msg)
8584 self.feedback_fn("Migration failed, aborting")
8585 self._AbortMigration()
8586 self._RevertDiskStatus()
8587 raise errors.OpExecError("Could not migrate instance %s: %s" %
8588 (instance.name, msg))
8590 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8591 self.feedback_fn("* memory transfer complete")
8594 if (utils.TimeoutExpired(last_feedback,
8595 self._MIGRATION_FEEDBACK_INTERVAL) and
8596 ms.transferred_ram is not None):
8597 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8598 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8599 last_feedback = time.time()
8601 time.sleep(self._MIGRATION_POLL_INTERVAL)
8603 result = self.rpc.call_instance_finalize_migration_src(source_node,
8607 msg = result.fail_msg
8609 logging.error("Instance migration succeeded, but finalization failed"
8610 " on the source node: %s", msg)
8611 raise errors.OpExecError("Could not finalize instance migration: %s" %
8614 instance.primary_node = target_node
8616 # distribute new instance config to the other nodes
8617 self.cfg.Update(instance, self.feedback_fn)
8619 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8623 msg = result.fail_msg
8625 logging.error("Instance migration succeeded, but finalization failed"
8626 " on the target node: %s", msg)
8627 raise errors.OpExecError("Could not finalize instance migration: %s" %
8630 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8631 self._EnsureSecondary(source_node)
8632 self._WaitUntilSync()
8633 self._GoStandalone()
8634 self._GoReconnect(False)
8635 self._WaitUntilSync()
8637 # If the instance's disk template is `rbd' and there was a successful
8638 # migration, unmap the device from the source node.
8639 if self.instance.disk_template == constants.DT_RBD:
8640 disks = _ExpandCheckDisks(instance, instance.disks)
8641 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8643 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8644 msg = result.fail_msg
8646 logging.error("Migration was successful, but couldn't unmap the"
8647 " block device %s on source node %s: %s",
8648 disk.iv_name, source_node, msg)
8649 logging.error("You need to unmap the device %s manually on %s",
8650 disk.iv_name, source_node)
8652 self.feedback_fn("* done")
8654 def _ExecFailover(self):
8655 """Failover an instance.
8657 The failover is done by shutting it down on its present node and
8658 starting it on the secondary.
8661 instance = self.instance
8662 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8664 source_node = instance.primary_node
8665 target_node = self.target_node
8667 if instance.admin_state == constants.ADMINST_UP:
8668 self.feedback_fn("* checking disk consistency between source and target")
8669 for (idx, dev) in enumerate(instance.disks):
8670 # for drbd, these are drbd over lvm
8671 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8673 if primary_node.offline:
8674 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8676 (primary_node.name, idx, target_node))
8677 elif not self.ignore_consistency:
8678 raise errors.OpExecError("Disk %s is degraded on target node,"
8679 " aborting failover" % idx)
8681 self.feedback_fn("* not checking disk consistency as instance is not"
8684 self.feedback_fn("* shutting down instance on source node")
8685 logging.info("Shutting down instance %s on node %s",
8686 instance.name, source_node)
8688 result = self.rpc.call_instance_shutdown(source_node, instance,
8689 self.shutdown_timeout)
8690 msg = result.fail_msg
8692 if self.ignore_consistency or primary_node.offline:
8693 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8694 " proceeding anyway; please make sure node"
8695 " %s is down; error details: %s",
8696 instance.name, source_node, source_node, msg)
8698 raise errors.OpExecError("Could not shutdown instance %s on"
8700 (instance.name, source_node, msg))
8702 self.feedback_fn("* deactivating the instance's disks on source node")
8703 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8704 raise errors.OpExecError("Can't shut down the instance's disks")
8706 instance.primary_node = target_node
8707 # distribute new instance config to the other nodes
8708 self.cfg.Update(instance, self.feedback_fn)
8710 # Only start the instance if it's marked as up
8711 if instance.admin_state == constants.ADMINST_UP:
8712 self.feedback_fn("* activating the instance's disks on target node %s" %
8714 logging.info("Starting instance %s on node %s",
8715 instance.name, target_node)
8717 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8718 ignore_secondaries=True)
8720 _ShutdownInstanceDisks(self.lu, instance)
8721 raise errors.OpExecError("Can't activate the instance's disks")
8723 self.feedback_fn("* starting the instance on the target node %s" %
8725 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8727 msg = result.fail_msg
8729 _ShutdownInstanceDisks(self.lu, instance)
8730 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8731 (instance.name, target_node, msg))
8733 def Exec(self, feedback_fn):
8734 """Perform the migration.
8737 self.feedback_fn = feedback_fn
8738 self.source_node = self.instance.primary_node
8740 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8741 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8742 self.target_node = self.instance.secondary_nodes[0]
8743 # Otherwise self.target_node has been populated either
8744 # directly, or through an iallocator.
8746 self.all_nodes = [self.source_node, self.target_node]
8747 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8748 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8751 feedback_fn("Failover instance %s" % self.instance.name)
8752 self._ExecFailover()
8754 feedback_fn("Migrating instance %s" % self.instance.name)
8757 return self._ExecCleanup()
8759 return self._ExecMigration()
8762 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8764 """Wrapper around L{_CreateBlockDevInner}.
8766 This method annotates the root device first.
8769 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8770 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8774 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8776 """Create a tree of block devices on a given node.
8778 If this device type has to be created on secondaries, create it and
8781 If not, just recurse to children keeping the same 'force' value.
8783 @attention: The device has to be annotated already.
8785 @param lu: the lu on whose behalf we execute
8786 @param node: the node on which to create the device
8787 @type instance: L{objects.Instance}
8788 @param instance: the instance which owns the device
8789 @type device: L{objects.Disk}
8790 @param device: the device to create
8791 @type force_create: boolean
8792 @param force_create: whether to force creation of this device; this
8793 will be change to True whenever we find a device which has
8794 CreateOnSecondary() attribute
8795 @param info: the extra 'metadata' we should attach to the device
8796 (this will be represented as a LVM tag)
8797 @type force_open: boolean
8798 @param force_open: this parameter will be passes to the
8799 L{backend.BlockdevCreate} function where it specifies
8800 whether we run on primary or not, and it affects both
8801 the child assembly and the device own Open() execution
8804 if device.CreateOnSecondary():
8808 for child in device.children:
8809 _CreateBlockDevInner(lu, node, instance, child, force_create,
8812 if not force_create:
8815 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8818 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8819 """Create a single block device on a given node.
8821 This will not recurse over children of the device, so they must be
8824 @param lu: the lu on whose behalf we execute
8825 @param node: the node on which to create the device
8826 @type instance: L{objects.Instance}
8827 @param instance: the instance which owns the device
8828 @type device: L{objects.Disk}
8829 @param device: the device to create
8830 @param info: the extra 'metadata' we should attach to the device
8831 (this will be represented as a LVM tag)
8832 @type force_open: boolean
8833 @param force_open: this parameter will be passes to the
8834 L{backend.BlockdevCreate} function where it specifies
8835 whether we run on primary or not, and it affects both
8836 the child assembly and the device own Open() execution
8839 lu.cfg.SetDiskID(device, node)
8840 result = lu.rpc.call_blockdev_create(node, device, device.size,
8841 instance.name, force_open, info)
8842 result.Raise("Can't create block device %s on"
8843 " node %s for instance %s" % (device, node, instance.name))
8844 if device.physical_id is None:
8845 device.physical_id = result.payload
8848 def _GenerateUniqueNames(lu, exts):
8849 """Generate a suitable LV name.
8851 This will generate a logical volume name for the given instance.
8856 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8857 results.append("%s%s" % (new_id, val))
8861 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8862 iv_name, p_minor, s_minor):
8863 """Generate a drbd8 device complete with its children.
8866 assert len(vgnames) == len(names) == 2
8867 port = lu.cfg.AllocatePort()
8868 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8870 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8871 logical_id=(vgnames[0], names[0]),
8873 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8874 size=constants.DRBD_META_SIZE,
8875 logical_id=(vgnames[1], names[1]),
8877 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8878 logical_id=(primary, secondary, port,
8881 children=[dev_data, dev_meta],
8882 iv_name=iv_name, params={})
8886 _DISK_TEMPLATE_NAME_PREFIX = {
8887 constants.DT_PLAIN: "",
8888 constants.DT_RBD: ".rbd",
8892 _DISK_TEMPLATE_DEVICE_TYPE = {
8893 constants.DT_PLAIN: constants.LD_LV,
8894 constants.DT_FILE: constants.LD_FILE,
8895 constants.DT_SHARED_FILE: constants.LD_FILE,
8896 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8897 constants.DT_RBD: constants.LD_RBD,
8901 def _GenerateDiskTemplate(
8902 lu, template_name, instance_name, primary_node, secondary_nodes,
8903 disk_info, file_storage_dir, file_driver, base_index,
8904 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8905 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8906 """Generate the entire disk layout for a given template type.
8909 #TODO: compute space requirements
8911 vgname = lu.cfg.GetVGName()
8912 disk_count = len(disk_info)
8915 if template_name == constants.DT_DISKLESS:
8917 elif template_name == constants.DT_DRBD8:
8918 if len(secondary_nodes) != 1:
8919 raise errors.ProgrammerError("Wrong template configuration")
8920 remote_node = secondary_nodes[0]
8921 minors = lu.cfg.AllocateDRBDMinor(
8922 [primary_node, remote_node] * len(disk_info), instance_name)
8924 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8926 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8929 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8930 for i in range(disk_count)]):
8931 names.append(lv_prefix + "_data")
8932 names.append(lv_prefix + "_meta")
8933 for idx, disk in enumerate(disk_info):
8934 disk_index = idx + base_index
8935 data_vg = disk.get(constants.IDISK_VG, vgname)
8936 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8937 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8938 disk[constants.IDISK_SIZE],
8940 names[idx * 2:idx * 2 + 2],
8941 "disk/%d" % disk_index,
8942 minors[idx * 2], minors[idx * 2 + 1])
8943 disk_dev.mode = disk[constants.IDISK_MODE]
8944 disks.append(disk_dev)
8947 raise errors.ProgrammerError("Wrong template configuration")
8949 if template_name == constants.DT_FILE:
8951 elif template_name == constants.DT_SHARED_FILE:
8952 _req_shr_file_storage()
8954 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8955 if name_prefix is None:
8958 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8959 (name_prefix, base_index + i)
8960 for i in range(disk_count)])
8962 if template_name == constants.DT_PLAIN:
8963 def logical_id_fn(idx, _, disk):
8964 vg = disk.get(constants.IDISK_VG, vgname)
8965 return (vg, names[idx])
8966 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8968 lambda _, disk_index, disk: (file_driver,
8969 "%s/disk%d" % (file_storage_dir,
8971 elif template_name == constants.DT_BLOCK:
8973 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8974 disk[constants.IDISK_ADOPT])
8975 elif template_name == constants.DT_RBD:
8976 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8978 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8980 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8982 for idx, disk in enumerate(disk_info):
8983 disk_index = idx + base_index
8984 size = disk[constants.IDISK_SIZE]
8985 feedback_fn("* disk %s, size %s" %
8986 (disk_index, utils.FormatUnit(size, "h")))
8987 disks.append(objects.Disk(dev_type=dev_type, size=size,
8988 logical_id=logical_id_fn(idx, disk_index, disk),
8989 iv_name="disk/%d" % disk_index,
8990 mode=disk[constants.IDISK_MODE],
8996 def _GetInstanceInfoText(instance):
8997 """Compute that text that should be added to the disk's metadata.
9000 return "originstname+%s" % instance.name
9003 def _CalcEta(time_taken, written, total_size):
9004 """Calculates the ETA based on size written and total size.
9006 @param time_taken: The time taken so far
9007 @param written: amount written so far
9008 @param total_size: The total size of data to be written
9009 @return: The remaining time in seconds
9012 avg_time = time_taken / float(written)
9013 return (total_size - written) * avg_time
9016 def _WipeDisks(lu, instance, disks=None):
9017 """Wipes instance disks.
9019 @type lu: L{LogicalUnit}
9020 @param lu: the logical unit on whose behalf we execute
9021 @type instance: L{objects.Instance}
9022 @param instance: the instance whose disks we should create
9023 @return: the success of the wipe
9026 node = instance.primary_node
9029 disks = [(idx, disk, 0)
9030 for (idx, disk) in enumerate(instance.disks)]
9032 for (_, device, _) in disks:
9033 lu.cfg.SetDiskID(device, node)
9035 logging.info("Pausing synchronization of disks of instance '%s'",
9037 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9038 (map(compat.snd, disks),
9041 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9043 for idx, success in enumerate(result.payload):
9045 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9046 " failed", idx, instance.name)
9049 for (idx, device, offset) in disks:
9050 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9051 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9053 int(min(constants.MAX_WIPE_CHUNK,
9054 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9058 start_time = time.time()
9063 info_text = (" (from %s to %s)" %
9064 (utils.FormatUnit(offset, "h"),
9065 utils.FormatUnit(size, "h")))
9067 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9069 logging.info("Wiping disk %d for instance %s on node %s using"
9070 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9072 while offset < size:
9073 wipe_size = min(wipe_chunk_size, size - offset)
9075 logging.debug("Wiping disk %d, offset %s, chunk %s",
9076 idx, offset, wipe_size)
9078 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9080 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9081 (idx, offset, wipe_size))
9085 if now - last_output >= 60:
9086 eta = _CalcEta(now - start_time, offset, size)
9087 lu.LogInfo(" - done: %.1f%% ETA: %s",
9088 offset / float(size) * 100, utils.FormatSeconds(eta))
9091 logging.info("Resuming synchronization of disks for instance '%s'",
9094 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9095 (map(compat.snd, disks),
9100 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9101 node, result.fail_msg)
9103 for idx, success in enumerate(result.payload):
9105 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9106 " failed", idx, instance.name)
9109 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9110 """Create all disks for an instance.
9112 This abstracts away some work from AddInstance.
9114 @type lu: L{LogicalUnit}
9115 @param lu: the logical unit on whose behalf we execute
9116 @type instance: L{objects.Instance}
9117 @param instance: the instance whose disks we should create
9119 @param to_skip: list of indices to skip
9120 @type target_node: string
9121 @param target_node: if passed, overrides the target node for creation
9123 @return: the success of the creation
9126 info = _GetInstanceInfoText(instance)
9127 if target_node is None:
9128 pnode = instance.primary_node
9129 all_nodes = instance.all_nodes
9134 if instance.disk_template in constants.DTS_FILEBASED:
9135 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9136 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9138 result.Raise("Failed to create directory '%s' on"
9139 " node %s" % (file_storage_dir, pnode))
9141 # Note: this needs to be kept in sync with adding of disks in
9142 # LUInstanceSetParams
9143 for idx, device in enumerate(instance.disks):
9144 if to_skip and idx in to_skip:
9146 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9148 for node in all_nodes:
9149 f_create = node == pnode
9150 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9153 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9154 """Remove all disks for an instance.
9156 This abstracts away some work from `AddInstance()` and
9157 `RemoveInstance()`. Note that in case some of the devices couldn't
9158 be removed, the removal will continue with the other ones (compare
9159 with `_CreateDisks()`).
9161 @type lu: L{LogicalUnit}
9162 @param lu: the logical unit on whose behalf we execute
9163 @type instance: L{objects.Instance}
9164 @param instance: the instance whose disks we should remove
9165 @type target_node: string
9166 @param target_node: used to override the node on which to remove the disks
9168 @return: the success of the removal
9171 logging.info("Removing block devices for instance %s", instance.name)
9174 ports_to_release = set()
9175 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9176 for (idx, device) in enumerate(anno_disks):
9178 edata = [(target_node, device)]
9180 edata = device.ComputeNodeTree(instance.primary_node)
9181 for node, disk in edata:
9182 lu.cfg.SetDiskID(disk, node)
9183 result = lu.rpc.call_blockdev_remove(node, disk)
9185 lu.LogWarning("Could not remove disk %s on node %s,"
9186 " continuing anyway: %s", idx, node, result.fail_msg)
9187 if not (result.offline and node != instance.primary_node):
9190 # if this is a DRBD disk, return its port to the pool
9191 if device.dev_type in constants.LDS_DRBD:
9192 ports_to_release.add(device.logical_id[2])
9194 if all_result or ignore_failures:
9195 for port in ports_to_release:
9196 lu.cfg.AddTcpUdpPort(port)
9198 if instance.disk_template == constants.DT_FILE:
9199 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9203 tgt = instance.primary_node
9204 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9206 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9207 file_storage_dir, instance.primary_node, result.fail_msg)
9213 def _ComputeDiskSizePerVG(disk_template, disks):
9214 """Compute disk size requirements in the volume group
9217 def _compute(disks, payload):
9218 """Universal algorithm.
9223 vgs[disk[constants.IDISK_VG]] = \
9224 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9228 # Required free disk space as a function of disk and swap space
9230 constants.DT_DISKLESS: {},
9231 constants.DT_PLAIN: _compute(disks, 0),
9232 # 128 MB are added for drbd metadata for each disk
9233 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9234 constants.DT_FILE: {},
9235 constants.DT_SHARED_FILE: {},
9238 if disk_template not in req_size_dict:
9239 raise errors.ProgrammerError("Disk template '%s' size requirement"
9240 " is unknown" % disk_template)
9242 return req_size_dict[disk_template]
9245 def _FilterVmNodes(lu, nodenames):
9246 """Filters out non-vm_capable nodes from a list.
9248 @type lu: L{LogicalUnit}
9249 @param lu: the logical unit for which we check
9250 @type nodenames: list
9251 @param nodenames: the list of nodes on which we should check
9253 @return: the list of vm-capable nodes
9256 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9257 return [name for name in nodenames if name not in vm_nodes]
9260 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9261 """Hypervisor parameter validation.
9263 This function abstract the hypervisor parameter validation to be
9264 used in both instance create and instance modify.
9266 @type lu: L{LogicalUnit}
9267 @param lu: the logical unit for which we check
9268 @type nodenames: list
9269 @param nodenames: the list of nodes on which we should check
9270 @type hvname: string
9271 @param hvname: the name of the hypervisor we should use
9272 @type hvparams: dict
9273 @param hvparams: the parameters which we need to check
9274 @raise errors.OpPrereqError: if the parameters are not valid
9277 nodenames = _FilterVmNodes(lu, nodenames)
9279 cluster = lu.cfg.GetClusterInfo()
9280 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9282 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9283 for node in nodenames:
9287 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9290 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9291 """OS parameters validation.
9293 @type lu: L{LogicalUnit}
9294 @param lu: the logical unit for which we check
9295 @type required: boolean
9296 @param required: whether the validation should fail if the OS is not
9298 @type nodenames: list
9299 @param nodenames: the list of nodes on which we should check
9300 @type osname: string
9301 @param osname: the name of the hypervisor we should use
9302 @type osparams: dict
9303 @param osparams: the parameters which we need to check
9304 @raise errors.OpPrereqError: if the parameters are not valid
9307 nodenames = _FilterVmNodes(lu, nodenames)
9308 result = lu.rpc.call_os_validate(nodenames, required, osname,
9309 [constants.OS_VALIDATE_PARAMETERS],
9311 for node, nres in result.items():
9312 # we don't check for offline cases since this should be run only
9313 # against the master node and/or an instance's nodes
9314 nres.Raise("OS Parameters validation failed on node %s" % node)
9315 if not nres.payload:
9316 lu.LogInfo("OS %s not found on node %s, validation skipped",
9320 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9321 """Wrapper around IAReqInstanceAlloc.
9323 @param op: The instance opcode
9324 @param disks: The computed disks
9325 @param nics: The computed nics
9326 @param beparams: The full filled beparams
9328 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9331 spindle_use = beparams[constants.BE_SPINDLE_USE]
9332 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9333 disk_template=op.disk_template,
9336 vcpus=beparams[constants.BE_VCPUS],
9337 memory=beparams[constants.BE_MAXMEM],
9338 spindle_use=spindle_use,
9340 nics=[n.ToDict() for n in nics],
9341 hypervisor=op.hypervisor)
9344 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9345 """Computes the nics.
9347 @param op: The instance opcode
9348 @param cluster: Cluster configuration object
9349 @param default_ip: The default ip to assign
9350 @param cfg: An instance of the configuration object
9351 @param proc: The executer instance
9353 @returns: The build up nics
9357 for idx, nic in enumerate(op.nics):
9358 nic_mode_req = nic.get(constants.INIC_MODE, None)
9359 nic_mode = nic_mode_req
9360 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9361 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9363 # in routed mode, for the first nic, the default ip is 'auto'
9364 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9365 default_ip_mode = constants.VALUE_AUTO
9367 default_ip_mode = constants.VALUE_NONE
9369 # ip validity checks
9370 ip = nic.get(constants.INIC_IP, default_ip_mode)
9371 if ip is None or ip.lower() == constants.VALUE_NONE:
9373 elif ip.lower() == constants.VALUE_AUTO:
9374 if not op.name_check:
9375 raise errors.OpPrereqError("IP address set to auto but name checks"
9376 " have been skipped",
9380 if not netutils.IPAddress.IsValid(ip):
9381 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9385 # TODO: check the ip address for uniqueness
9386 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9387 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9390 # MAC address verification
9391 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9392 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9393 mac = utils.NormalizeAndValidateMac(mac)
9396 # TODO: We need to factor this out
9397 cfg.ReserveMAC(mac, proc.GetECId())
9398 except errors.ReservationError:
9399 raise errors.OpPrereqError("MAC address %s already in use"
9400 " in cluster" % mac,
9401 errors.ECODE_NOTUNIQUE)
9403 # Build nic parameters
9404 link = nic.get(constants.INIC_LINK, None)
9405 if link == constants.VALUE_AUTO:
9406 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9409 nicparams[constants.NIC_MODE] = nic_mode
9411 nicparams[constants.NIC_LINK] = link
9413 check_params = cluster.SimpleFillNIC(nicparams)
9414 objects.NIC.CheckParameterSyntax(check_params)
9415 nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9420 def _ComputeDisks(op, default_vg):
9421 """Computes the instance disks.
9423 @param op: The instance opcode
9424 @param default_vg: The default_vg to assume
9426 @return: The computer disks
9430 for disk in op.disks:
9431 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9432 if mode not in constants.DISK_ACCESS_SET:
9433 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9434 mode, errors.ECODE_INVAL)
9435 size = disk.get(constants.IDISK_SIZE, None)
9437 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9440 except (TypeError, ValueError):
9441 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9444 data_vg = disk.get(constants.IDISK_VG, default_vg)
9446 constants.IDISK_SIZE: size,
9447 constants.IDISK_MODE: mode,
9448 constants.IDISK_VG: data_vg,
9450 if constants.IDISK_METAVG in disk:
9451 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9452 if constants.IDISK_ADOPT in disk:
9453 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9454 disks.append(new_disk)
9459 def _ComputeFullBeParams(op, cluster):
9460 """Computes the full beparams.
9462 @param op: The instance opcode
9463 @param cluster: The cluster config object
9465 @return: The fully filled beparams
9468 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9469 for param, value in op.beparams.iteritems():
9470 if value == constants.VALUE_AUTO:
9471 op.beparams[param] = default_beparams[param]
9472 objects.UpgradeBeParams(op.beparams)
9473 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9474 return cluster.SimpleFillBE(op.beparams)
9477 class LUInstanceCreate(LogicalUnit):
9478 """Create an instance.
9481 HPATH = "instance-add"
9482 HTYPE = constants.HTYPE_INSTANCE
9485 def CheckArguments(self):
9489 # do not require name_check to ease forward/backward compatibility
9491 if self.op.no_install and self.op.start:
9492 self.LogInfo("No-installation mode selected, disabling startup")
9493 self.op.start = False
9494 # validate/normalize the instance name
9495 self.op.instance_name = \
9496 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9498 if self.op.ip_check and not self.op.name_check:
9499 # TODO: make the ip check more flexible and not depend on the name check
9500 raise errors.OpPrereqError("Cannot do IP address check without a name"
9501 " check", errors.ECODE_INVAL)
9503 # check nics' parameter names
9504 for nic in self.op.nics:
9505 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9507 # check disks. parameter names and consistent adopt/no-adopt strategy
9508 has_adopt = has_no_adopt = False
9509 for disk in self.op.disks:
9510 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9511 if constants.IDISK_ADOPT in disk:
9515 if has_adopt and has_no_adopt:
9516 raise errors.OpPrereqError("Either all disks are adopted or none is",
9519 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9520 raise errors.OpPrereqError("Disk adoption is not supported for the"
9521 " '%s' disk template" %
9522 self.op.disk_template,
9524 if self.op.iallocator is not None:
9525 raise errors.OpPrereqError("Disk adoption not allowed with an"
9526 " iallocator script", errors.ECODE_INVAL)
9527 if self.op.mode == constants.INSTANCE_IMPORT:
9528 raise errors.OpPrereqError("Disk adoption not allowed for"
9529 " instance import", errors.ECODE_INVAL)
9531 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9532 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9533 " but no 'adopt' parameter given" %
9534 self.op.disk_template,
9537 self.adopt_disks = has_adopt
9539 # instance name verification
9540 if self.op.name_check:
9541 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9542 self.op.instance_name = self.hostname1.name
9543 # used in CheckPrereq for ip ping check
9544 self.check_ip = self.hostname1.ip
9546 self.check_ip = None
9548 # file storage checks
9549 if (self.op.file_driver and
9550 not self.op.file_driver in constants.FILE_DRIVER):
9551 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9552 self.op.file_driver, errors.ECODE_INVAL)
9554 if self.op.disk_template == constants.DT_FILE:
9555 opcodes.RequireFileStorage()
9556 elif self.op.disk_template == constants.DT_SHARED_FILE:
9557 opcodes.RequireSharedFileStorage()
9559 ### Node/iallocator related checks
9560 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9562 if self.op.pnode is not None:
9563 if self.op.disk_template in constants.DTS_INT_MIRROR:
9564 if self.op.snode is None:
9565 raise errors.OpPrereqError("The networked disk templates need"
9566 " a mirror node", errors.ECODE_INVAL)
9568 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9570 self.op.snode = None
9572 self._cds = _GetClusterDomainSecret()
9574 if self.op.mode == constants.INSTANCE_IMPORT:
9575 # On import force_variant must be True, because if we forced it at
9576 # initial install, our only chance when importing it back is that it
9578 self.op.force_variant = True
9580 if self.op.no_install:
9581 self.LogInfo("No-installation mode has no effect during import")
9583 elif self.op.mode == constants.INSTANCE_CREATE:
9584 if self.op.os_type is None:
9585 raise errors.OpPrereqError("No guest OS specified",
9587 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9588 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9589 " installation" % self.op.os_type,
9591 if self.op.disk_template is None:
9592 raise errors.OpPrereqError("No disk template specified",
9595 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9596 # Check handshake to ensure both clusters have the same domain secret
9597 src_handshake = self.op.source_handshake
9598 if not src_handshake:
9599 raise errors.OpPrereqError("Missing source handshake",
9602 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9605 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9608 # Load and check source CA
9609 self.source_x509_ca_pem = self.op.source_x509_ca
9610 if not self.source_x509_ca_pem:
9611 raise errors.OpPrereqError("Missing source X509 CA",
9615 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9617 except OpenSSL.crypto.Error, err:
9618 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9619 (err, ), errors.ECODE_INVAL)
9621 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9622 if errcode is not None:
9623 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9626 self.source_x509_ca = cert
9628 src_instance_name = self.op.source_instance_name
9629 if not src_instance_name:
9630 raise errors.OpPrereqError("Missing source instance name",
9633 self.source_instance_name = \
9634 netutils.GetHostname(name=src_instance_name).name
9637 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9638 self.op.mode, errors.ECODE_INVAL)
9640 def ExpandNames(self):
9641 """ExpandNames for CreateInstance.
9643 Figure out the right locks for instance creation.
9646 self.needed_locks = {}
9648 instance_name = self.op.instance_name
9649 # this is just a preventive check, but someone might still add this
9650 # instance in the meantime, and creation will fail at lock-add time
9651 if instance_name in self.cfg.GetInstanceList():
9652 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9653 instance_name, errors.ECODE_EXISTS)
9655 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9657 if self.op.iallocator:
9658 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9659 # specifying a group on instance creation and then selecting nodes from
9661 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9662 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9664 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9665 nodelist = [self.op.pnode]
9666 if self.op.snode is not None:
9667 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9668 nodelist.append(self.op.snode)
9669 self.needed_locks[locking.LEVEL_NODE] = nodelist
9670 # Lock resources of instance's primary and secondary nodes (copy to
9671 # prevent accidential modification)
9672 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9674 # in case of import lock the source node too
9675 if self.op.mode == constants.INSTANCE_IMPORT:
9676 src_node = self.op.src_node
9677 src_path = self.op.src_path
9679 if src_path is None:
9680 self.op.src_path = src_path = self.op.instance_name
9682 if src_node is None:
9683 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9684 self.op.src_node = None
9685 if os.path.isabs(src_path):
9686 raise errors.OpPrereqError("Importing an instance from a path"
9687 " requires a source node option",
9690 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9691 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9692 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9693 if not os.path.isabs(src_path):
9694 self.op.src_path = src_path = \
9695 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9697 def _RunAllocator(self):
9698 """Run the allocator based on input opcode.
9701 req = _CreateInstanceAllocRequest(self.op, self.disks,
9702 self.nics, self.be_full)
9703 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9705 ial.Run(self.op.iallocator)
9708 raise errors.OpPrereqError("Can't compute nodes using"
9709 " iallocator '%s': %s" %
9710 (self.op.iallocator, ial.info),
9712 self.op.pnode = ial.result[0]
9713 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9714 self.op.instance_name, self.op.iallocator,
9715 utils.CommaJoin(ial.result))
9717 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9719 if req.RequiredNodes() == 2:
9720 self.op.snode = ial.result[1]
9722 def BuildHooksEnv(self):
9725 This runs on master, primary and secondary nodes of the instance.
9729 "ADD_MODE": self.op.mode,
9731 if self.op.mode == constants.INSTANCE_IMPORT:
9732 env["SRC_NODE"] = self.op.src_node
9733 env["SRC_PATH"] = self.op.src_path
9734 env["SRC_IMAGES"] = self.src_images
9736 env.update(_BuildInstanceHookEnv(
9737 name=self.op.instance_name,
9738 primary_node=self.op.pnode,
9739 secondary_nodes=self.secondaries,
9740 status=self.op.start,
9741 os_type=self.op.os_type,
9742 minmem=self.be_full[constants.BE_MINMEM],
9743 maxmem=self.be_full[constants.BE_MAXMEM],
9744 vcpus=self.be_full[constants.BE_VCPUS],
9745 nics=_NICListToTuple(self, self.nics),
9746 disk_template=self.op.disk_template,
9747 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9748 for d in self.disks],
9751 hypervisor_name=self.op.hypervisor,
9757 def BuildHooksNodes(self):
9758 """Build hooks nodes.
9761 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9764 def _ReadExportInfo(self):
9765 """Reads the export information from disk.
9767 It will override the opcode source node and path with the actual
9768 information, if these two were not specified before.
9770 @return: the export information
9773 assert self.op.mode == constants.INSTANCE_IMPORT
9775 src_node = self.op.src_node
9776 src_path = self.op.src_path
9778 if src_node is None:
9779 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9780 exp_list = self.rpc.call_export_list(locked_nodes)
9782 for node in exp_list:
9783 if exp_list[node].fail_msg:
9785 if src_path in exp_list[node].payload:
9787 self.op.src_node = src_node = node
9788 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9792 raise errors.OpPrereqError("No export found for relative path %s" %
9793 src_path, errors.ECODE_INVAL)
9795 _CheckNodeOnline(self, src_node)
9796 result = self.rpc.call_export_info(src_node, src_path)
9797 result.Raise("No export or invalid export found in dir %s" % src_path)
9799 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9800 if not export_info.has_section(constants.INISECT_EXP):
9801 raise errors.ProgrammerError("Corrupted export config",
9802 errors.ECODE_ENVIRON)
9804 ei_version = export_info.get(constants.INISECT_EXP, "version")
9805 if (int(ei_version) != constants.EXPORT_VERSION):
9806 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9807 (ei_version, constants.EXPORT_VERSION),
9808 errors.ECODE_ENVIRON)
9811 def _ReadExportParams(self, einfo):
9812 """Use export parameters as defaults.
9814 In case the opcode doesn't specify (as in override) some instance
9815 parameters, then try to use them from the export information, if
9819 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9821 if self.op.disk_template is None:
9822 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9823 self.op.disk_template = einfo.get(constants.INISECT_INS,
9825 if self.op.disk_template not in constants.DISK_TEMPLATES:
9826 raise errors.OpPrereqError("Disk template specified in configuration"
9827 " file is not one of the allowed values:"
9829 " ".join(constants.DISK_TEMPLATES),
9832 raise errors.OpPrereqError("No disk template specified and the export"
9833 " is missing the disk_template information",
9836 if not self.op.disks:
9838 # TODO: import the disk iv_name too
9839 for idx in range(constants.MAX_DISKS):
9840 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9841 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9842 disks.append({constants.IDISK_SIZE: disk_sz})
9843 self.op.disks = disks
9844 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9845 raise errors.OpPrereqError("No disk info specified and the export"
9846 " is missing the disk information",
9849 if not self.op.nics:
9851 for idx in range(constants.MAX_NICS):
9852 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9854 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9855 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9862 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9863 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9865 if (self.op.hypervisor is None and
9866 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9867 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9869 if einfo.has_section(constants.INISECT_HYP):
9870 # use the export parameters but do not override the ones
9871 # specified by the user
9872 for name, value in einfo.items(constants.INISECT_HYP):
9873 if name not in self.op.hvparams:
9874 self.op.hvparams[name] = value
9876 if einfo.has_section(constants.INISECT_BEP):
9877 # use the parameters, without overriding
9878 for name, value in einfo.items(constants.INISECT_BEP):
9879 if name not in self.op.beparams:
9880 self.op.beparams[name] = value
9881 # Compatibility for the old "memory" be param
9882 if name == constants.BE_MEMORY:
9883 if constants.BE_MAXMEM not in self.op.beparams:
9884 self.op.beparams[constants.BE_MAXMEM] = value
9885 if constants.BE_MINMEM not in self.op.beparams:
9886 self.op.beparams[constants.BE_MINMEM] = value
9888 # try to read the parameters old style, from the main section
9889 for name in constants.BES_PARAMETERS:
9890 if (name not in self.op.beparams and
9891 einfo.has_option(constants.INISECT_INS, name)):
9892 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9894 if einfo.has_section(constants.INISECT_OSP):
9895 # use the parameters, without overriding
9896 for name, value in einfo.items(constants.INISECT_OSP):
9897 if name not in self.op.osparams:
9898 self.op.osparams[name] = value
9900 def _RevertToDefaults(self, cluster):
9901 """Revert the instance parameters to the default values.
9905 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9906 for name in self.op.hvparams.keys():
9907 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9908 del self.op.hvparams[name]
9910 be_defs = cluster.SimpleFillBE({})
9911 for name in self.op.beparams.keys():
9912 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9913 del self.op.beparams[name]
9915 nic_defs = cluster.SimpleFillNIC({})
9916 for nic in self.op.nics:
9917 for name in constants.NICS_PARAMETERS:
9918 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9921 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9922 for name in self.op.osparams.keys():
9923 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9924 del self.op.osparams[name]
9926 def _CalculateFileStorageDir(self):
9927 """Calculate final instance file storage dir.
9930 # file storage dir calculation/check
9931 self.instance_file_storage_dir = None
9932 if self.op.disk_template in constants.DTS_FILEBASED:
9933 # build the full file storage dir path
9936 if self.op.disk_template == constants.DT_SHARED_FILE:
9937 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9939 get_fsd_fn = self.cfg.GetFileStorageDir
9941 cfg_storagedir = get_fsd_fn()
9942 if not cfg_storagedir:
9943 raise errors.OpPrereqError("Cluster file storage dir not defined",
9945 joinargs.append(cfg_storagedir)
9947 if self.op.file_storage_dir is not None:
9948 joinargs.append(self.op.file_storage_dir)
9950 joinargs.append(self.op.instance_name)
9952 # pylint: disable=W0142
9953 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9955 def CheckPrereq(self): # pylint: disable=R0914
9956 """Check prerequisites.
9959 self._CalculateFileStorageDir()
9961 if self.op.mode == constants.INSTANCE_IMPORT:
9962 export_info = self._ReadExportInfo()
9963 self._ReadExportParams(export_info)
9964 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9966 self._old_instance_name = None
9968 if (not self.cfg.GetVGName() and
9969 self.op.disk_template not in constants.DTS_NOT_LVM):
9970 raise errors.OpPrereqError("Cluster does not support lvm-based"
9971 " instances", errors.ECODE_STATE)
9973 if (self.op.hypervisor is None or
9974 self.op.hypervisor == constants.VALUE_AUTO):
9975 self.op.hypervisor = self.cfg.GetHypervisorType()
9977 cluster = self.cfg.GetClusterInfo()
9978 enabled_hvs = cluster.enabled_hypervisors
9979 if self.op.hypervisor not in enabled_hvs:
9980 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9982 (self.op.hypervisor, ",".join(enabled_hvs)),
9985 # Check tag validity
9986 for tag in self.op.tags:
9987 objects.TaggableObject.ValidateTag(tag)
9989 # check hypervisor parameter syntax (locally)
9990 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9991 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9993 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9994 hv_type.CheckParameterSyntax(filled_hvp)
9995 self.hv_full = filled_hvp
9996 # check that we don't specify global parameters on an instance
9997 _CheckGlobalHvParams(self.op.hvparams)
9999 # fill and remember the beparams dict
10000 self.be_full = _ComputeFullBeParams(self.op, cluster)
10002 # build os parameters
10003 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10005 # now that hvp/bep are in final format, let's reset to defaults,
10007 if self.op.identify_defaults:
10008 self._RevertToDefaults(cluster)
10011 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10014 # disk checks/pre-build
10015 default_vg = self.cfg.GetVGName()
10016 self.disks = _ComputeDisks(self.op, default_vg)
10018 if self.op.mode == constants.INSTANCE_IMPORT:
10020 for idx in range(len(self.disks)):
10021 option = "disk%d_dump" % idx
10022 if export_info.has_option(constants.INISECT_INS, option):
10023 # FIXME: are the old os-es, disk sizes, etc. useful?
10024 export_name = export_info.get(constants.INISECT_INS, option)
10025 image = utils.PathJoin(self.op.src_path, export_name)
10026 disk_images.append(image)
10028 disk_images.append(False)
10030 self.src_images = disk_images
10032 if self.op.instance_name == self._old_instance_name:
10033 for idx, nic in enumerate(self.nics):
10034 if nic.mac == constants.VALUE_AUTO:
10035 nic_mac_ini = "nic%d_mac" % idx
10036 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10038 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10040 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10041 if self.op.ip_check:
10042 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10043 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10044 (self.check_ip, self.op.instance_name),
10045 errors.ECODE_NOTUNIQUE)
10047 #### mac address generation
10048 # By generating here the mac address both the allocator and the hooks get
10049 # the real final mac address rather than the 'auto' or 'generate' value.
10050 # There is a race condition between the generation and the instance object
10051 # creation, which means that we know the mac is valid now, but we're not
10052 # sure it will be when we actually add the instance. If things go bad
10053 # adding the instance will abort because of a duplicate mac, and the
10054 # creation job will fail.
10055 for nic in self.nics:
10056 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10057 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10061 if self.op.iallocator is not None:
10062 self._RunAllocator()
10064 # Release all unneeded node locks
10065 _ReleaseLocks(self, locking.LEVEL_NODE,
10066 keep=filter(None, [self.op.pnode, self.op.snode,
10067 self.op.src_node]))
10068 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10069 keep=filter(None, [self.op.pnode, self.op.snode,
10070 self.op.src_node]))
10072 #### node related checks
10074 # check primary node
10075 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10076 assert self.pnode is not None, \
10077 "Cannot retrieve locked node %s" % self.op.pnode
10079 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10080 pnode.name, errors.ECODE_STATE)
10082 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10083 pnode.name, errors.ECODE_STATE)
10084 if not pnode.vm_capable:
10085 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10086 " '%s'" % pnode.name, errors.ECODE_STATE)
10088 self.secondaries = []
10090 # mirror node verification
10091 if self.op.disk_template in constants.DTS_INT_MIRROR:
10092 if self.op.snode == pnode.name:
10093 raise errors.OpPrereqError("The secondary node cannot be the"
10094 " primary node", errors.ECODE_INVAL)
10095 _CheckNodeOnline(self, self.op.snode)
10096 _CheckNodeNotDrained(self, self.op.snode)
10097 _CheckNodeVmCapable(self, self.op.snode)
10098 self.secondaries.append(self.op.snode)
10100 snode = self.cfg.GetNodeInfo(self.op.snode)
10101 if pnode.group != snode.group:
10102 self.LogWarning("The primary and secondary nodes are in two"
10103 " different node groups; the disk parameters"
10104 " from the first disk's node group will be"
10107 nodenames = [pnode.name] + self.secondaries
10109 # Verify instance specs
10110 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10112 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10113 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10114 constants.ISPEC_DISK_COUNT: len(self.disks),
10115 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10116 constants.ISPEC_NIC_COUNT: len(self.nics),
10117 constants.ISPEC_SPINDLE_USE: spindle_use,
10120 group_info = self.cfg.GetNodeGroup(pnode.group)
10121 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10122 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10123 if not self.op.ignore_ipolicy and res:
10124 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10125 (pnode.group, group_info.name, utils.CommaJoin(res)))
10126 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10128 if not self.adopt_disks:
10129 if self.op.disk_template == constants.DT_RBD:
10130 # _CheckRADOSFreeSpace() is just a placeholder.
10131 # Any function that checks prerequisites can be placed here.
10132 # Check if there is enough space on the RADOS cluster.
10133 _CheckRADOSFreeSpace()
10135 # Check lv size requirements, if not adopting
10136 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10137 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10139 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10140 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10141 disk[constants.IDISK_ADOPT])
10142 for disk in self.disks])
10143 if len(all_lvs) != len(self.disks):
10144 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10145 errors.ECODE_INVAL)
10146 for lv_name in all_lvs:
10148 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10149 # to ReserveLV uses the same syntax
10150 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10151 except errors.ReservationError:
10152 raise errors.OpPrereqError("LV named %s used by another instance" %
10153 lv_name, errors.ECODE_NOTUNIQUE)
10155 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10156 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10158 node_lvs = self.rpc.call_lv_list([pnode.name],
10159 vg_names.payload.keys())[pnode.name]
10160 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10161 node_lvs = node_lvs.payload
10163 delta = all_lvs.difference(node_lvs.keys())
10165 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10166 utils.CommaJoin(delta),
10167 errors.ECODE_INVAL)
10168 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10170 raise errors.OpPrereqError("Online logical volumes found, cannot"
10171 " adopt: %s" % utils.CommaJoin(online_lvs),
10172 errors.ECODE_STATE)
10173 # update the size of disk based on what is found
10174 for dsk in self.disks:
10175 dsk[constants.IDISK_SIZE] = \
10176 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10177 dsk[constants.IDISK_ADOPT])][0]))
10179 elif self.op.disk_template == constants.DT_BLOCK:
10180 # Normalize and de-duplicate device paths
10181 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10182 for disk in self.disks])
10183 if len(all_disks) != len(self.disks):
10184 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10185 errors.ECODE_INVAL)
10186 baddisks = [d for d in all_disks
10187 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10189 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10190 " cannot be adopted" %
10191 (", ".join(baddisks),
10192 constants.ADOPTABLE_BLOCKDEV_ROOT),
10193 errors.ECODE_INVAL)
10195 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10196 list(all_disks))[pnode.name]
10197 node_disks.Raise("Cannot get block device information from node %s" %
10199 node_disks = node_disks.payload
10200 delta = all_disks.difference(node_disks.keys())
10202 raise errors.OpPrereqError("Missing block device(s): %s" %
10203 utils.CommaJoin(delta),
10204 errors.ECODE_INVAL)
10205 for dsk in self.disks:
10206 dsk[constants.IDISK_SIZE] = \
10207 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10209 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10211 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10212 # check OS parameters (remotely)
10213 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10215 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10217 # memory check on primary node
10218 #TODO(dynmem): use MINMEM for checking
10220 _CheckNodeFreeMemory(self, self.pnode.name,
10221 "creating instance %s" % self.op.instance_name,
10222 self.be_full[constants.BE_MAXMEM],
10223 self.op.hypervisor)
10225 self.dry_run_result = list(nodenames)
10227 def Exec(self, feedback_fn):
10228 """Create and add the instance to the cluster.
10231 instance = self.op.instance_name
10232 pnode_name = self.pnode.name
10234 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10235 self.owned_locks(locking.LEVEL_NODE)), \
10236 "Node locks differ from node resource locks"
10238 ht_kind = self.op.hypervisor
10239 if ht_kind in constants.HTS_REQ_PORT:
10240 network_port = self.cfg.AllocatePort()
10242 network_port = None
10244 # This is ugly but we got a chicken-egg problem here
10245 # We can only take the group disk parameters, as the instance
10246 # has no disks yet (we are generating them right here).
10247 node = self.cfg.GetNodeInfo(pnode_name)
10248 nodegroup = self.cfg.GetNodeGroup(node.group)
10249 disks = _GenerateDiskTemplate(self,
10250 self.op.disk_template,
10251 instance, pnode_name,
10254 self.instance_file_storage_dir,
10255 self.op.file_driver,
10258 self.cfg.GetGroupDiskParams(nodegroup))
10260 iobj = objects.Instance(name=instance, os=self.op.os_type,
10261 primary_node=pnode_name,
10262 nics=self.nics, disks=disks,
10263 disk_template=self.op.disk_template,
10264 admin_state=constants.ADMINST_DOWN,
10265 network_port=network_port,
10266 beparams=self.op.beparams,
10267 hvparams=self.op.hvparams,
10268 hypervisor=self.op.hypervisor,
10269 osparams=self.op.osparams,
10273 for tag in self.op.tags:
10276 if self.adopt_disks:
10277 if self.op.disk_template == constants.DT_PLAIN:
10278 # rename LVs to the newly-generated names; we need to construct
10279 # 'fake' LV disks with the old data, plus the new unique_id
10280 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10282 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10283 rename_to.append(t_dsk.logical_id)
10284 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10285 self.cfg.SetDiskID(t_dsk, pnode_name)
10286 result = self.rpc.call_blockdev_rename(pnode_name,
10287 zip(tmp_disks, rename_to))
10288 result.Raise("Failed to rename adoped LVs")
10290 feedback_fn("* creating instance disks...")
10292 _CreateDisks(self, iobj)
10293 except errors.OpExecError:
10294 self.LogWarning("Device creation failed, reverting...")
10296 _RemoveDisks(self, iobj)
10298 self.cfg.ReleaseDRBDMinors(instance)
10301 feedback_fn("adding instance %s to cluster config" % instance)
10303 self.cfg.AddInstance(iobj, self.proc.GetECId())
10305 # Declare that we don't want to remove the instance lock anymore, as we've
10306 # added the instance to the config
10307 del self.remove_locks[locking.LEVEL_INSTANCE]
10309 if self.op.mode == constants.INSTANCE_IMPORT:
10310 # Release unused nodes
10311 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10313 # Release all nodes
10314 _ReleaseLocks(self, locking.LEVEL_NODE)
10317 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10318 feedback_fn("* wiping instance disks...")
10320 _WipeDisks(self, iobj)
10321 except errors.OpExecError, err:
10322 logging.exception("Wiping disks failed")
10323 self.LogWarning("Wiping instance disks failed (%s)", err)
10327 # Something is already wrong with the disks, don't do anything else
10329 elif self.op.wait_for_sync:
10330 disk_abort = not _WaitForSync(self, iobj)
10331 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10332 # make sure the disks are not degraded (still sync-ing is ok)
10333 feedback_fn("* checking mirrors status")
10334 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10339 _RemoveDisks(self, iobj)
10340 self.cfg.RemoveInstance(iobj.name)
10341 # Make sure the instance lock gets removed
10342 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10343 raise errors.OpExecError("There are some degraded disks for"
10346 # Release all node resource locks
10347 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10349 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10350 # we need to set the disks ID to the primary node, since the
10351 # preceding code might or might have not done it, depending on
10352 # disk template and other options
10353 for disk in iobj.disks:
10354 self.cfg.SetDiskID(disk, pnode_name)
10355 if self.op.mode == constants.INSTANCE_CREATE:
10356 if not self.op.no_install:
10357 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10358 not self.op.wait_for_sync)
10360 feedback_fn("* pausing disk sync to install instance OS")
10361 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10364 for idx, success in enumerate(result.payload):
10366 logging.warn("pause-sync of instance %s for disk %d failed",
10369 feedback_fn("* running the instance OS create scripts...")
10370 # FIXME: pass debug option from opcode to backend
10372 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10373 self.op.debug_level)
10375 feedback_fn("* resuming disk sync")
10376 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10379 for idx, success in enumerate(result.payload):
10381 logging.warn("resume-sync of instance %s for disk %d failed",
10384 os_add_result.Raise("Could not add os for instance %s"
10385 " on node %s" % (instance, pnode_name))
10388 if self.op.mode == constants.INSTANCE_IMPORT:
10389 feedback_fn("* running the instance OS import scripts...")
10393 for idx, image in enumerate(self.src_images):
10397 # FIXME: pass debug option from opcode to backend
10398 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10399 constants.IEIO_FILE, (image, ),
10400 constants.IEIO_SCRIPT,
10401 (iobj.disks[idx], idx),
10403 transfers.append(dt)
10406 masterd.instance.TransferInstanceData(self, feedback_fn,
10407 self.op.src_node, pnode_name,
10408 self.pnode.secondary_ip,
10410 if not compat.all(import_result):
10411 self.LogWarning("Some disks for instance %s on node %s were not"
10412 " imported successfully" % (instance, pnode_name))
10414 rename_from = self._old_instance_name
10416 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10417 feedback_fn("* preparing remote import...")
10418 # The source cluster will stop the instance before attempting to make
10419 # a connection. In some cases stopping an instance can take a long
10420 # time, hence the shutdown timeout is added to the connection
10422 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10423 self.op.source_shutdown_timeout)
10424 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10426 assert iobj.primary_node == self.pnode.name
10428 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10429 self.source_x509_ca,
10430 self._cds, timeouts)
10431 if not compat.all(disk_results):
10432 # TODO: Should the instance still be started, even if some disks
10433 # failed to import (valid for local imports, too)?
10434 self.LogWarning("Some disks for instance %s on node %s were not"
10435 " imported successfully" % (instance, pnode_name))
10437 rename_from = self.source_instance_name
10440 # also checked in the prereq part
10441 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10444 # Run rename script on newly imported instance
10445 assert iobj.name == instance
10446 feedback_fn("Running rename script for %s" % instance)
10447 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10449 self.op.debug_level)
10450 if result.fail_msg:
10451 self.LogWarning("Failed to run rename script for %s on node"
10452 " %s: %s" % (instance, pnode_name, result.fail_msg))
10454 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10457 iobj.admin_state = constants.ADMINST_UP
10458 self.cfg.Update(iobj, feedback_fn)
10459 logging.info("Starting instance %s on node %s", instance, pnode_name)
10460 feedback_fn("* starting instance...")
10461 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10463 result.Raise("Could not start instance")
10465 return list(iobj.all_nodes)
10468 class LUInstanceMultiAlloc(NoHooksLU):
10469 """Allocates multiple instances at the same time.
10474 def CheckArguments(self):
10475 """Check arguments.
10479 for inst in self.op.instances:
10480 if inst.iallocator is not None:
10481 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10482 " instance objects", errors.ECODE_INVAL)
10483 nodes.append(bool(inst.pnode))
10484 if inst.disk_template in constants.DTS_INT_MIRROR:
10485 nodes.append(bool(inst.snode))
10487 has_nodes = compat.any(nodes)
10488 if compat.all(nodes) ^ has_nodes:
10489 raise errors.OpPrereqError("There are instance objects providing"
10490 " pnode/snode while others do not",
10491 errors.ECODE_INVAL)
10493 if self.op.iallocator is None:
10494 default_iallocator = self.cfg.GetDefaultIAllocator()
10495 if default_iallocator and has_nodes:
10496 self.op.iallocator = default_iallocator
10498 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10499 " given and no cluster-wide default"
10500 " iallocator found; please specify either"
10501 " an iallocator or nodes on the instances"
10502 " or set a cluster-wide default iallocator",
10503 errors.ECODE_INVAL)
10505 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10507 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10508 utils.CommaJoin(dups), errors.ECODE_INVAL)
10510 def ExpandNames(self):
10511 """Calculate the locks.
10514 self.share_locks = _ShareAll()
10515 self.needed_locks = {}
10517 if self.op.iallocator:
10518 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10519 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10522 for inst in self.op.instances:
10523 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10524 nodeslist.append(inst.pnode)
10525 if inst.snode is not None:
10526 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10527 nodeslist.append(inst.snode)
10529 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10530 # Lock resources of instance's primary and secondary nodes (copy to
10531 # prevent accidential modification)
10532 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10534 def CheckPrereq(self):
10535 """Check prerequisite.
10538 cluster = self.cfg.GetClusterInfo()
10539 default_vg = self.cfg.GetVGName()
10540 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10541 _ComputeNics(op, cluster, None,
10542 self.cfg, self.proc),
10543 _ComputeFullBeParams(op, cluster))
10544 for op in self.op.instances]
10545 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10546 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10548 ial.Run(self.op.iallocator)
10550 if not ial.success:
10551 raise errors.OpPrereqError("Can't compute nodes using"
10552 " iallocator '%s': %s" %
10553 (self.op.iallocator, ial.info),
10554 errors.ECODE_NORES)
10556 self.ia_result = ial.result
10558 if self.op.dry_run:
10559 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10560 constants.JOB_IDS_KEY: [],
10563 def _ConstructPartialResult(self):
10564 """Contructs the partial result.
10567 (allocatable, failed) = self.ia_result
10569 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10570 map(compat.fst, allocatable),
10571 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10574 def Exec(self, feedback_fn):
10575 """Executes the opcode.
10578 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10579 (allocatable, failed) = self.ia_result
10582 for (name, nodes) in allocatable:
10583 op = op2inst.pop(name)
10586 (op.pnode, op.snode) = nodes
10588 (op.pnode,) = nodes
10592 missing = set(op2inst.keys()) - set(failed)
10593 assert not missing, \
10594 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10596 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10599 def _CheckRADOSFreeSpace():
10600 """Compute disk size requirements inside the RADOS cluster.
10603 # For the RADOS cluster we assume there is always enough space.
10607 class LUInstanceConsole(NoHooksLU):
10608 """Connect to an instance's console.
10610 This is somewhat special in that it returns the command line that
10611 you need to run on the master node in order to connect to the
10617 def ExpandNames(self):
10618 self.share_locks = _ShareAll()
10619 self._ExpandAndLockInstance()
10621 def CheckPrereq(self):
10622 """Check prerequisites.
10624 This checks that the instance is in the cluster.
10627 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10628 assert self.instance is not None, \
10629 "Cannot retrieve locked instance %s" % self.op.instance_name
10630 _CheckNodeOnline(self, self.instance.primary_node)
10632 def Exec(self, feedback_fn):
10633 """Connect to the console of an instance
10636 instance = self.instance
10637 node = instance.primary_node
10639 node_insts = self.rpc.call_instance_list([node],
10640 [instance.hypervisor])[node]
10641 node_insts.Raise("Can't get node information from %s" % node)
10643 if instance.name not in node_insts.payload:
10644 if instance.admin_state == constants.ADMINST_UP:
10645 state = constants.INSTST_ERRORDOWN
10646 elif instance.admin_state == constants.ADMINST_DOWN:
10647 state = constants.INSTST_ADMINDOWN
10649 state = constants.INSTST_ADMINOFFLINE
10650 raise errors.OpExecError("Instance %s is not running (state %s)" %
10651 (instance.name, state))
10653 logging.debug("Connecting to console of %s on %s", instance.name, node)
10655 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10658 def _GetInstanceConsole(cluster, instance):
10659 """Returns console information for an instance.
10661 @type cluster: L{objects.Cluster}
10662 @type instance: L{objects.Instance}
10666 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10667 # beparams and hvparams are passed separately, to avoid editing the
10668 # instance and then saving the defaults in the instance itself.
10669 hvparams = cluster.FillHV(instance)
10670 beparams = cluster.FillBE(instance)
10671 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10673 assert console.instance == instance.name
10674 assert console.Validate()
10676 return console.ToDict()
10679 class LUInstanceReplaceDisks(LogicalUnit):
10680 """Replace the disks of an instance.
10683 HPATH = "mirrors-replace"
10684 HTYPE = constants.HTYPE_INSTANCE
10687 def CheckArguments(self):
10688 """Check arguments.
10691 remote_node = self.op.remote_node
10692 ialloc = self.op.iallocator
10693 if self.op.mode == constants.REPLACE_DISK_CHG:
10694 if remote_node is None and ialloc is None:
10695 raise errors.OpPrereqError("When changing the secondary either an"
10696 " iallocator script must be used or the"
10697 " new node given", errors.ECODE_INVAL)
10699 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10701 elif remote_node is not None or ialloc is not None:
10702 # Not replacing the secondary
10703 raise errors.OpPrereqError("The iallocator and new node options can"
10704 " only be used when changing the"
10705 " secondary node", errors.ECODE_INVAL)
10707 def ExpandNames(self):
10708 self._ExpandAndLockInstance()
10710 assert locking.LEVEL_NODE not in self.needed_locks
10711 assert locking.LEVEL_NODE_RES not in self.needed_locks
10712 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10714 assert self.op.iallocator is None or self.op.remote_node is None, \
10715 "Conflicting options"
10717 if self.op.remote_node is not None:
10718 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10720 # Warning: do not remove the locking of the new secondary here
10721 # unless DRBD8.AddChildren is changed to work in parallel;
10722 # currently it doesn't since parallel invocations of
10723 # FindUnusedMinor will conflict
10724 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10725 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10727 self.needed_locks[locking.LEVEL_NODE] = []
10728 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10730 if self.op.iallocator is not None:
10731 # iallocator will select a new node in the same group
10732 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10734 self.needed_locks[locking.LEVEL_NODE_RES] = []
10736 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10737 self.op.iallocator, self.op.remote_node,
10738 self.op.disks, False, self.op.early_release,
10739 self.op.ignore_ipolicy)
10741 self.tasklets = [self.replacer]
10743 def DeclareLocks(self, level):
10744 if level == locking.LEVEL_NODEGROUP:
10745 assert self.op.remote_node is None
10746 assert self.op.iallocator is not None
10747 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10749 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10750 # Lock all groups used by instance optimistically; this requires going
10751 # via the node before it's locked, requiring verification later on
10752 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10753 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10755 elif level == locking.LEVEL_NODE:
10756 if self.op.iallocator is not None:
10757 assert self.op.remote_node is None
10758 assert not self.needed_locks[locking.LEVEL_NODE]
10760 # Lock member nodes of all locked groups
10761 self.needed_locks[locking.LEVEL_NODE] = \
10763 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10764 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10766 self._LockInstancesNodes()
10767 elif level == locking.LEVEL_NODE_RES:
10769 self.needed_locks[locking.LEVEL_NODE_RES] = \
10770 self.needed_locks[locking.LEVEL_NODE]
10772 def BuildHooksEnv(self):
10773 """Build hooks env.
10775 This runs on the master, the primary and all the secondaries.
10778 instance = self.replacer.instance
10780 "MODE": self.op.mode,
10781 "NEW_SECONDARY": self.op.remote_node,
10782 "OLD_SECONDARY": instance.secondary_nodes[0],
10784 env.update(_BuildInstanceHookEnvByObject(self, instance))
10787 def BuildHooksNodes(self):
10788 """Build hooks nodes.
10791 instance = self.replacer.instance
10793 self.cfg.GetMasterNode(),
10794 instance.primary_node,
10796 if self.op.remote_node is not None:
10797 nl.append(self.op.remote_node)
10800 def CheckPrereq(self):
10801 """Check prerequisites.
10804 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10805 self.op.iallocator is None)
10807 # Verify if node group locks are still correct
10808 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10810 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10812 return LogicalUnit.CheckPrereq(self)
10815 class TLReplaceDisks(Tasklet):
10816 """Replaces disks for an instance.
10818 Note: Locking is not within the scope of this class.
10821 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10822 disks, delay_iallocator, early_release, ignore_ipolicy):
10823 """Initializes this class.
10826 Tasklet.__init__(self, lu)
10829 self.instance_name = instance_name
10831 self.iallocator_name = iallocator_name
10832 self.remote_node = remote_node
10834 self.delay_iallocator = delay_iallocator
10835 self.early_release = early_release
10836 self.ignore_ipolicy = ignore_ipolicy
10839 self.instance = None
10840 self.new_node = None
10841 self.target_node = None
10842 self.other_node = None
10843 self.remote_node_info = None
10844 self.node_secondary_ip = None
10847 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10848 """Compute a new secondary node using an IAllocator.
10851 req = iallocator.IAReqRelocate(name=instance_name,
10852 relocate_from=list(relocate_from))
10853 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10855 ial.Run(iallocator_name)
10857 if not ial.success:
10858 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10859 " %s" % (iallocator_name, ial.info),
10860 errors.ECODE_NORES)
10862 remote_node_name = ial.result[0]
10864 lu.LogInfo("Selected new secondary for instance '%s': %s",
10865 instance_name, remote_node_name)
10867 return remote_node_name
10869 def _FindFaultyDisks(self, node_name):
10870 """Wrapper for L{_FindFaultyInstanceDisks}.
10873 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10876 def _CheckDisksActivated(self, instance):
10877 """Checks if the instance disks are activated.
10879 @param instance: The instance to check disks
10880 @return: True if they are activated, False otherwise
10883 nodes = instance.all_nodes
10885 for idx, dev in enumerate(instance.disks):
10887 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10888 self.cfg.SetDiskID(dev, node)
10890 result = _BlockdevFind(self, node, dev, instance)
10894 elif result.fail_msg or not result.payload:
10899 def CheckPrereq(self):
10900 """Check prerequisites.
10902 This checks that the instance is in the cluster.
10905 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10906 assert instance is not None, \
10907 "Cannot retrieve locked instance %s" % self.instance_name
10909 if instance.disk_template != constants.DT_DRBD8:
10910 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10911 " instances", errors.ECODE_INVAL)
10913 if len(instance.secondary_nodes) != 1:
10914 raise errors.OpPrereqError("The instance has a strange layout,"
10915 " expected one secondary but found %d" %
10916 len(instance.secondary_nodes),
10917 errors.ECODE_FAULT)
10919 if not self.delay_iallocator:
10920 self._CheckPrereq2()
10922 def _CheckPrereq2(self):
10923 """Check prerequisites, second part.
10925 This function should always be part of CheckPrereq. It was separated and is
10926 now called from Exec because during node evacuation iallocator was only
10927 called with an unmodified cluster model, not taking planned changes into
10931 instance = self.instance
10932 secondary_node = instance.secondary_nodes[0]
10934 if self.iallocator_name is None:
10935 remote_node = self.remote_node
10937 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10938 instance.name, instance.secondary_nodes)
10940 if remote_node is None:
10941 self.remote_node_info = None
10943 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10944 "Remote node '%s' is not locked" % remote_node
10946 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10947 assert self.remote_node_info is not None, \
10948 "Cannot retrieve locked node %s" % remote_node
10950 if remote_node == self.instance.primary_node:
10951 raise errors.OpPrereqError("The specified node is the primary node of"
10952 " the instance", errors.ECODE_INVAL)
10954 if remote_node == secondary_node:
10955 raise errors.OpPrereqError("The specified node is already the"
10956 " secondary node of the instance",
10957 errors.ECODE_INVAL)
10959 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10960 constants.REPLACE_DISK_CHG):
10961 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10962 errors.ECODE_INVAL)
10964 if self.mode == constants.REPLACE_DISK_AUTO:
10965 if not self._CheckDisksActivated(instance):
10966 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10967 " first" % self.instance_name,
10968 errors.ECODE_STATE)
10969 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10970 faulty_secondary = self._FindFaultyDisks(secondary_node)
10972 if faulty_primary and faulty_secondary:
10973 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10974 " one node and can not be repaired"
10975 " automatically" % self.instance_name,
10976 errors.ECODE_STATE)
10979 self.disks = faulty_primary
10980 self.target_node = instance.primary_node
10981 self.other_node = secondary_node
10982 check_nodes = [self.target_node, self.other_node]
10983 elif faulty_secondary:
10984 self.disks = faulty_secondary
10985 self.target_node = secondary_node
10986 self.other_node = instance.primary_node
10987 check_nodes = [self.target_node, self.other_node]
10993 # Non-automatic modes
10994 if self.mode == constants.REPLACE_DISK_PRI:
10995 self.target_node = instance.primary_node
10996 self.other_node = secondary_node
10997 check_nodes = [self.target_node, self.other_node]
10999 elif self.mode == constants.REPLACE_DISK_SEC:
11000 self.target_node = secondary_node
11001 self.other_node = instance.primary_node
11002 check_nodes = [self.target_node, self.other_node]
11004 elif self.mode == constants.REPLACE_DISK_CHG:
11005 self.new_node = remote_node
11006 self.other_node = instance.primary_node
11007 self.target_node = secondary_node
11008 check_nodes = [self.new_node, self.other_node]
11010 _CheckNodeNotDrained(self.lu, remote_node)
11011 _CheckNodeVmCapable(self.lu, remote_node)
11013 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11014 assert old_node_info is not None
11015 if old_node_info.offline and not self.early_release:
11016 # doesn't make sense to delay the release
11017 self.early_release = True
11018 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11019 " early-release mode", secondary_node)
11022 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11025 # If not specified all disks should be replaced
11027 self.disks = range(len(self.instance.disks))
11029 # TODO: This is ugly, but right now we can't distinguish between internal
11030 # submitted opcode and external one. We should fix that.
11031 if self.remote_node_info:
11032 # We change the node, lets verify it still meets instance policy
11033 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11034 cluster = self.cfg.GetClusterInfo()
11035 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11037 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11038 ignore=self.ignore_ipolicy)
11040 for node in check_nodes:
11041 _CheckNodeOnline(self.lu, node)
11043 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11046 if node_name is not None)
11048 # Release unneeded node and node resource locks
11049 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11050 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11052 # Release any owned node group
11053 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11054 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11056 # Check whether disks are valid
11057 for disk_idx in self.disks:
11058 instance.FindDisk(disk_idx)
11060 # Get secondary node IP addresses
11061 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11062 in self.cfg.GetMultiNodeInfo(touched_nodes))
11064 def Exec(self, feedback_fn):
11065 """Execute disk replacement.
11067 This dispatches the disk replacement to the appropriate handler.
11070 if self.delay_iallocator:
11071 self._CheckPrereq2()
11074 # Verify owned locks before starting operation
11075 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11076 assert set(owned_nodes) == set(self.node_secondary_ip), \
11077 ("Incorrect node locks, owning %s, expected %s" %
11078 (owned_nodes, self.node_secondary_ip.keys()))
11079 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11080 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11082 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11083 assert list(owned_instances) == [self.instance_name], \
11084 "Instance '%s' not locked" % self.instance_name
11086 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11087 "Should not own any node group lock at this point"
11090 feedback_fn("No disks need replacement for instance '%s'" %
11091 self.instance.name)
11094 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11095 (utils.CommaJoin(self.disks), self.instance.name))
11096 feedback_fn("Current primary node: %s", self.instance.primary_node)
11097 feedback_fn("Current seconary node: %s",
11098 utils.CommaJoin(self.instance.secondary_nodes))
11100 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11102 # Activate the instance disks if we're replacing them on a down instance
11104 _StartInstanceDisks(self.lu, self.instance, True)
11107 # Should we replace the secondary node?
11108 if self.new_node is not None:
11109 fn = self._ExecDrbd8Secondary
11111 fn = self._ExecDrbd8DiskOnly
11113 result = fn(feedback_fn)
11115 # Deactivate the instance disks if we're replacing them on a
11118 _SafeShutdownInstanceDisks(self.lu, self.instance)
11120 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11123 # Verify owned locks
11124 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11125 nodes = frozenset(self.node_secondary_ip)
11126 assert ((self.early_release and not owned_nodes) or
11127 (not self.early_release and not (set(owned_nodes) - nodes))), \
11128 ("Not owning the correct locks, early_release=%s, owned=%r,"
11129 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11133 def _CheckVolumeGroup(self, nodes):
11134 self.lu.LogInfo("Checking volume groups")
11136 vgname = self.cfg.GetVGName()
11138 # Make sure volume group exists on all involved nodes
11139 results = self.rpc.call_vg_list(nodes)
11141 raise errors.OpExecError("Can't list volume groups on the nodes")
11144 res = results[node]
11145 res.Raise("Error checking node %s" % node)
11146 if vgname not in res.payload:
11147 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11150 def _CheckDisksExistence(self, nodes):
11151 # Check disk existence
11152 for idx, dev in enumerate(self.instance.disks):
11153 if idx not in self.disks:
11157 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11158 self.cfg.SetDiskID(dev, node)
11160 result = _BlockdevFind(self, node, dev, self.instance)
11162 msg = result.fail_msg
11163 if msg or not result.payload:
11165 msg = "disk not found"
11166 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11169 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11170 for idx, dev in enumerate(self.instance.disks):
11171 if idx not in self.disks:
11174 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11177 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11178 on_primary, ldisk=ldisk):
11179 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11180 " replace disks for instance %s" %
11181 (node_name, self.instance.name))
11183 def _CreateNewStorage(self, node_name):
11184 """Create new storage on the primary or secondary node.
11186 This is only used for same-node replaces, not for changing the
11187 secondary node, hence we don't want to modify the existing disk.
11192 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11193 for idx, dev in enumerate(disks):
11194 if idx not in self.disks:
11197 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11199 self.cfg.SetDiskID(dev, node_name)
11201 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11202 names = _GenerateUniqueNames(self.lu, lv_names)
11204 (data_disk, meta_disk) = dev.children
11205 vg_data = data_disk.logical_id[0]
11206 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11207 logical_id=(vg_data, names[0]),
11208 params=data_disk.params)
11209 vg_meta = meta_disk.logical_id[0]
11210 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11211 size=constants.DRBD_META_SIZE,
11212 logical_id=(vg_meta, names[1]),
11213 params=meta_disk.params)
11215 new_lvs = [lv_data, lv_meta]
11216 old_lvs = [child.Copy() for child in dev.children]
11217 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11219 # we pass force_create=True to force the LVM creation
11220 for new_lv in new_lvs:
11221 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11222 _GetInstanceInfoText(self.instance), False)
11226 def _CheckDevices(self, node_name, iv_names):
11227 for name, (dev, _, _) in iv_names.iteritems():
11228 self.cfg.SetDiskID(dev, node_name)
11230 result = _BlockdevFind(self, node_name, dev, self.instance)
11232 msg = result.fail_msg
11233 if msg or not result.payload:
11235 msg = "disk not found"
11236 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11239 if result.payload.is_degraded:
11240 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11242 def _RemoveOldStorage(self, node_name, iv_names):
11243 for name, (_, old_lvs, _) in iv_names.iteritems():
11244 self.lu.LogInfo("Remove logical volumes for %s" % name)
11247 self.cfg.SetDiskID(lv, node_name)
11249 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11251 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11252 hint="remove unused LVs manually")
11254 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11255 """Replace a disk on the primary or secondary for DRBD 8.
11257 The algorithm for replace is quite complicated:
11259 1. for each disk to be replaced:
11261 1. create new LVs on the target node with unique names
11262 1. detach old LVs from the drbd device
11263 1. rename old LVs to name_replaced.<time_t>
11264 1. rename new LVs to old LVs
11265 1. attach the new LVs (with the old names now) to the drbd device
11267 1. wait for sync across all devices
11269 1. for each modified disk:
11271 1. remove old LVs (which have the name name_replaces.<time_t>)
11273 Failures are not very well handled.
11278 # Step: check device activation
11279 self.lu.LogStep(1, steps_total, "Check device existence")
11280 self._CheckDisksExistence([self.other_node, self.target_node])
11281 self._CheckVolumeGroup([self.target_node, self.other_node])
11283 # Step: check other node consistency
11284 self.lu.LogStep(2, steps_total, "Check peer consistency")
11285 self._CheckDisksConsistency(self.other_node,
11286 self.other_node == self.instance.primary_node,
11289 # Step: create new storage
11290 self.lu.LogStep(3, steps_total, "Allocate new storage")
11291 iv_names = self._CreateNewStorage(self.target_node)
11293 # Step: for each lv, detach+rename*2+attach
11294 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11295 for dev, old_lvs, new_lvs in iv_names.itervalues():
11296 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11298 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11300 result.Raise("Can't detach drbd from local storage on node"
11301 " %s for device %s" % (self.target_node, dev.iv_name))
11303 #cfg.Update(instance)
11305 # ok, we created the new LVs, so now we know we have the needed
11306 # storage; as such, we proceed on the target node to rename
11307 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11308 # using the assumption that logical_id == physical_id (which in
11309 # turn is the unique_id on that node)
11311 # FIXME(iustin): use a better name for the replaced LVs
11312 temp_suffix = int(time.time())
11313 ren_fn = lambda d, suff: (d.physical_id[0],
11314 d.physical_id[1] + "_replaced-%s" % suff)
11316 # Build the rename list based on what LVs exist on the node
11317 rename_old_to_new = []
11318 for to_ren in old_lvs:
11319 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11320 if not result.fail_msg and result.payload:
11322 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11324 self.lu.LogInfo("Renaming the old LVs on the target node")
11325 result = self.rpc.call_blockdev_rename(self.target_node,
11327 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11329 # Now we rename the new LVs to the old LVs
11330 self.lu.LogInfo("Renaming the new LVs on the target node")
11331 rename_new_to_old = [(new, old.physical_id)
11332 for old, new in zip(old_lvs, new_lvs)]
11333 result = self.rpc.call_blockdev_rename(self.target_node,
11335 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11337 # Intermediate steps of in memory modifications
11338 for old, new in zip(old_lvs, new_lvs):
11339 new.logical_id = old.logical_id
11340 self.cfg.SetDiskID(new, self.target_node)
11342 # We need to modify old_lvs so that removal later removes the
11343 # right LVs, not the newly added ones; note that old_lvs is a
11345 for disk in old_lvs:
11346 disk.logical_id = ren_fn(disk, temp_suffix)
11347 self.cfg.SetDiskID(disk, self.target_node)
11349 # Now that the new lvs have the old name, we can add them to the device
11350 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11351 result = self.rpc.call_blockdev_addchildren(self.target_node,
11352 (dev, self.instance), new_lvs)
11353 msg = result.fail_msg
11355 for new_lv in new_lvs:
11356 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11359 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11360 hint=("cleanup manually the unused logical"
11362 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11364 cstep = itertools.count(5)
11366 if self.early_release:
11367 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11368 self._RemoveOldStorage(self.target_node, iv_names)
11369 # TODO: Check if releasing locks early still makes sense
11370 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11372 # Release all resource locks except those used by the instance
11373 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11374 keep=self.node_secondary_ip.keys())
11376 # Release all node locks while waiting for sync
11377 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11379 # TODO: Can the instance lock be downgraded here? Take the optional disk
11380 # shutdown in the caller into consideration.
11383 # This can fail as the old devices are degraded and _WaitForSync
11384 # does a combined result over all disks, so we don't check its return value
11385 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11386 _WaitForSync(self.lu, self.instance)
11388 # Check all devices manually
11389 self._CheckDevices(self.instance.primary_node, iv_names)
11391 # Step: remove old storage
11392 if not self.early_release:
11393 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11394 self._RemoveOldStorage(self.target_node, iv_names)
11396 def _ExecDrbd8Secondary(self, feedback_fn):
11397 """Replace the secondary node for DRBD 8.
11399 The algorithm for replace is quite complicated:
11400 - for all disks of the instance:
11401 - create new LVs on the new node with same names
11402 - shutdown the drbd device on the old secondary
11403 - disconnect the drbd network on the primary
11404 - create the drbd device on the new secondary
11405 - network attach the drbd on the primary, using an artifice:
11406 the drbd code for Attach() will connect to the network if it
11407 finds a device which is connected to the good local disks but
11408 not network enabled
11409 - wait for sync across all devices
11410 - remove all disks from the old secondary
11412 Failures are not very well handled.
11417 pnode = self.instance.primary_node
11419 # Step: check device activation
11420 self.lu.LogStep(1, steps_total, "Check device existence")
11421 self._CheckDisksExistence([self.instance.primary_node])
11422 self._CheckVolumeGroup([self.instance.primary_node])
11424 # Step: check other node consistency
11425 self.lu.LogStep(2, steps_total, "Check peer consistency")
11426 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11428 # Step: create new storage
11429 self.lu.LogStep(3, steps_total, "Allocate new storage")
11430 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11431 for idx, dev in enumerate(disks):
11432 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11433 (self.new_node, idx))
11434 # we pass force_create=True to force LVM creation
11435 for new_lv in dev.children:
11436 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11437 True, _GetInstanceInfoText(self.instance), False)
11439 # Step 4: dbrd minors and drbd setups changes
11440 # after this, we must manually remove the drbd minors on both the
11441 # error and the success paths
11442 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11443 minors = self.cfg.AllocateDRBDMinor([self.new_node
11444 for dev in self.instance.disks],
11445 self.instance.name)
11446 logging.debug("Allocated minors %r", minors)
11449 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11450 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11451 (self.new_node, idx))
11452 # create new devices on new_node; note that we create two IDs:
11453 # one without port, so the drbd will be activated without
11454 # networking information on the new node at this stage, and one
11455 # with network, for the latter activation in step 4
11456 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11457 if self.instance.primary_node == o_node1:
11460 assert self.instance.primary_node == o_node2, "Three-node instance?"
11463 new_alone_id = (self.instance.primary_node, self.new_node, None,
11464 p_minor, new_minor, o_secret)
11465 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11466 p_minor, new_minor, o_secret)
11468 iv_names[idx] = (dev, dev.children, new_net_id)
11469 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11471 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11472 logical_id=new_alone_id,
11473 children=dev.children,
11476 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11479 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11481 _GetInstanceInfoText(self.instance), False)
11482 except errors.GenericError:
11483 self.cfg.ReleaseDRBDMinors(self.instance.name)
11486 # We have new devices, shutdown the drbd on the old secondary
11487 for idx, dev in enumerate(self.instance.disks):
11488 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11489 self.cfg.SetDiskID(dev, self.target_node)
11490 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11491 (dev, self.instance)).fail_msg
11493 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11494 "node: %s" % (idx, msg),
11495 hint=("Please cleanup this device manually as"
11496 " soon as possible"))
11498 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11499 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11500 self.instance.disks)[pnode]
11502 msg = result.fail_msg
11504 # detaches didn't succeed (unlikely)
11505 self.cfg.ReleaseDRBDMinors(self.instance.name)
11506 raise errors.OpExecError("Can't detach the disks from the network on"
11507 " old node: %s" % (msg,))
11509 # if we managed to detach at least one, we update all the disks of
11510 # the instance to point to the new secondary
11511 self.lu.LogInfo("Updating instance configuration")
11512 for dev, _, new_logical_id in iv_names.itervalues():
11513 dev.logical_id = new_logical_id
11514 self.cfg.SetDiskID(dev, self.instance.primary_node)
11516 self.cfg.Update(self.instance, feedback_fn)
11518 # Release all node locks (the configuration has been updated)
11519 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11521 # and now perform the drbd attach
11522 self.lu.LogInfo("Attaching primary drbds to new secondary"
11523 " (standalone => connected)")
11524 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11526 self.node_secondary_ip,
11527 (self.instance.disks, self.instance),
11528 self.instance.name,
11530 for to_node, to_result in result.items():
11531 msg = to_result.fail_msg
11533 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11535 hint=("please do a gnt-instance info to see the"
11536 " status of disks"))
11538 cstep = itertools.count(5)
11540 if self.early_release:
11541 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11542 self._RemoveOldStorage(self.target_node, iv_names)
11543 # TODO: Check if releasing locks early still makes sense
11544 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11546 # Release all resource locks except those used by the instance
11547 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11548 keep=self.node_secondary_ip.keys())
11550 # TODO: Can the instance lock be downgraded here? Take the optional disk
11551 # shutdown in the caller into consideration.
11554 # This can fail as the old devices are degraded and _WaitForSync
11555 # does a combined result over all disks, so we don't check its return value
11556 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11557 _WaitForSync(self.lu, self.instance)
11559 # Check all devices manually
11560 self._CheckDevices(self.instance.primary_node, iv_names)
11562 # Step: remove old storage
11563 if not self.early_release:
11564 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11565 self._RemoveOldStorage(self.target_node, iv_names)
11568 class LURepairNodeStorage(NoHooksLU):
11569 """Repairs the volume group on a node.
11574 def CheckArguments(self):
11575 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11577 storage_type = self.op.storage_type
11579 if (constants.SO_FIX_CONSISTENCY not in
11580 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11581 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11582 " repaired" % storage_type,
11583 errors.ECODE_INVAL)
11585 def ExpandNames(self):
11586 self.needed_locks = {
11587 locking.LEVEL_NODE: [self.op.node_name],
11590 def _CheckFaultyDisks(self, instance, node_name):
11591 """Ensure faulty disks abort the opcode or at least warn."""
11593 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11595 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11596 " node '%s'" % (instance.name, node_name),
11597 errors.ECODE_STATE)
11598 except errors.OpPrereqError, err:
11599 if self.op.ignore_consistency:
11600 self.proc.LogWarning(str(err.args[0]))
11604 def CheckPrereq(self):
11605 """Check prerequisites.
11608 # Check whether any instance on this node has faulty disks
11609 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11610 if inst.admin_state != constants.ADMINST_UP:
11612 check_nodes = set(inst.all_nodes)
11613 check_nodes.discard(self.op.node_name)
11614 for inst_node_name in check_nodes:
11615 self._CheckFaultyDisks(inst, inst_node_name)
11617 def Exec(self, feedback_fn):
11618 feedback_fn("Repairing storage unit '%s' on %s ..." %
11619 (self.op.name, self.op.node_name))
11621 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11622 result = self.rpc.call_storage_execute(self.op.node_name,
11623 self.op.storage_type, st_args,
11625 constants.SO_FIX_CONSISTENCY)
11626 result.Raise("Failed to repair storage unit '%s' on %s" %
11627 (self.op.name, self.op.node_name))
11630 class LUNodeEvacuate(NoHooksLU):
11631 """Evacuates instances off a list of nodes.
11636 _MODE2IALLOCATOR = {
11637 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11638 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11639 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11641 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11642 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11643 constants.IALLOCATOR_NEVAC_MODES)
11645 def CheckArguments(self):
11646 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11648 def ExpandNames(self):
11649 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11651 if self.op.remote_node is not None:
11652 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11653 assert self.op.remote_node
11655 if self.op.remote_node == self.op.node_name:
11656 raise errors.OpPrereqError("Can not use evacuated node as a new"
11657 " secondary node", errors.ECODE_INVAL)
11659 if self.op.mode != constants.NODE_EVAC_SEC:
11660 raise errors.OpPrereqError("Without the use of an iallocator only"
11661 " secondary instances can be evacuated",
11662 errors.ECODE_INVAL)
11665 self.share_locks = _ShareAll()
11666 self.needed_locks = {
11667 locking.LEVEL_INSTANCE: [],
11668 locking.LEVEL_NODEGROUP: [],
11669 locking.LEVEL_NODE: [],
11672 # Determine nodes (via group) optimistically, needs verification once locks
11673 # have been acquired
11674 self.lock_nodes = self._DetermineNodes()
11676 def _DetermineNodes(self):
11677 """Gets the list of nodes to operate on.
11680 if self.op.remote_node is None:
11681 # Iallocator will choose any node(s) in the same group
11682 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11684 group_nodes = frozenset([self.op.remote_node])
11686 # Determine nodes to be locked
11687 return set([self.op.node_name]) | group_nodes
11689 def _DetermineInstances(self):
11690 """Builds list of instances to operate on.
11693 assert self.op.mode in constants.NODE_EVAC_MODES
11695 if self.op.mode == constants.NODE_EVAC_PRI:
11696 # Primary instances only
11697 inst_fn = _GetNodePrimaryInstances
11698 assert self.op.remote_node is None, \
11699 "Evacuating primary instances requires iallocator"
11700 elif self.op.mode == constants.NODE_EVAC_SEC:
11701 # Secondary instances only
11702 inst_fn = _GetNodeSecondaryInstances
11705 assert self.op.mode == constants.NODE_EVAC_ALL
11706 inst_fn = _GetNodeInstances
11707 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11709 raise errors.OpPrereqError("Due to an issue with the iallocator"
11710 " interface it is not possible to evacuate"
11711 " all instances at once; specify explicitly"
11712 " whether to evacuate primary or secondary"
11714 errors.ECODE_INVAL)
11716 return inst_fn(self.cfg, self.op.node_name)
11718 def DeclareLocks(self, level):
11719 if level == locking.LEVEL_INSTANCE:
11720 # Lock instances optimistically, needs verification once node and group
11721 # locks have been acquired
11722 self.needed_locks[locking.LEVEL_INSTANCE] = \
11723 set(i.name for i in self._DetermineInstances())
11725 elif level == locking.LEVEL_NODEGROUP:
11726 # Lock node groups for all potential target nodes optimistically, needs
11727 # verification once nodes have been acquired
11728 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11729 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11731 elif level == locking.LEVEL_NODE:
11732 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11734 def CheckPrereq(self):
11736 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11737 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11738 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11740 need_nodes = self._DetermineNodes()
11742 if not owned_nodes.issuperset(need_nodes):
11743 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11744 " locks were acquired, current nodes are"
11745 " are '%s', used to be '%s'; retry the"
11747 (self.op.node_name,
11748 utils.CommaJoin(need_nodes),
11749 utils.CommaJoin(owned_nodes)),
11750 errors.ECODE_STATE)
11752 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11753 if owned_groups != wanted_groups:
11754 raise errors.OpExecError("Node groups changed since locks were acquired,"
11755 " current groups are '%s', used to be '%s';"
11756 " retry the operation" %
11757 (utils.CommaJoin(wanted_groups),
11758 utils.CommaJoin(owned_groups)))
11760 # Determine affected instances
11761 self.instances = self._DetermineInstances()
11762 self.instance_names = [i.name for i in self.instances]
11764 if set(self.instance_names) != owned_instances:
11765 raise errors.OpExecError("Instances on node '%s' changed since locks"
11766 " were acquired, current instances are '%s',"
11767 " used to be '%s'; retry the operation" %
11768 (self.op.node_name,
11769 utils.CommaJoin(self.instance_names),
11770 utils.CommaJoin(owned_instances)))
11772 if self.instance_names:
11773 self.LogInfo("Evacuating instances from node '%s': %s",
11775 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11777 self.LogInfo("No instances to evacuate from node '%s'",
11780 if self.op.remote_node is not None:
11781 for i in self.instances:
11782 if i.primary_node == self.op.remote_node:
11783 raise errors.OpPrereqError("Node %s is the primary node of"
11784 " instance %s, cannot use it as"
11786 (self.op.remote_node, i.name),
11787 errors.ECODE_INVAL)
11789 def Exec(self, feedback_fn):
11790 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11792 if not self.instance_names:
11793 # No instances to evacuate
11796 elif self.op.iallocator is not None:
11797 # TODO: Implement relocation to other group
11798 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11799 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11800 instances=list(self.instance_names))
11801 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11803 ial.Run(self.op.iallocator)
11805 if not ial.success:
11806 raise errors.OpPrereqError("Can't compute node evacuation using"
11807 " iallocator '%s': %s" %
11808 (self.op.iallocator, ial.info),
11809 errors.ECODE_NORES)
11811 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11813 elif self.op.remote_node is not None:
11814 assert self.op.mode == constants.NODE_EVAC_SEC
11816 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11817 remote_node=self.op.remote_node,
11819 mode=constants.REPLACE_DISK_CHG,
11820 early_release=self.op.early_release)]
11821 for instance_name in self.instance_names
11825 raise errors.ProgrammerError("No iallocator or remote node")
11827 return ResultWithJobs(jobs)
11830 def _SetOpEarlyRelease(early_release, op):
11831 """Sets C{early_release} flag on opcodes if available.
11835 op.early_release = early_release
11836 except AttributeError:
11837 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11842 def _NodeEvacDest(use_nodes, group, nodes):
11843 """Returns group or nodes depending on caller's choice.
11847 return utils.CommaJoin(nodes)
11852 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11853 """Unpacks the result of change-group and node-evacuate iallocator requests.
11855 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11856 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11858 @type lu: L{LogicalUnit}
11859 @param lu: Logical unit instance
11860 @type alloc_result: tuple/list
11861 @param alloc_result: Result from iallocator
11862 @type early_release: bool
11863 @param early_release: Whether to release locks early if possible
11864 @type use_nodes: bool
11865 @param use_nodes: Whether to display node names instead of groups
11868 (moved, failed, jobs) = alloc_result
11871 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11872 for (name, reason) in failed)
11873 lu.LogWarning("Unable to evacuate instances %s", failreason)
11874 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11877 lu.LogInfo("Instances to be moved: %s",
11878 utils.CommaJoin("%s (to %s)" %
11879 (name, _NodeEvacDest(use_nodes, group, nodes))
11880 for (name, group, nodes) in moved))
11882 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11883 map(opcodes.OpCode.LoadOpCode, ops))
11887 def _DiskSizeInBytesToMebibytes(lu, size):
11888 """Converts a disk size in bytes to mebibytes.
11890 Warns and rounds up if the size isn't an even multiple of 1 MiB.
11893 (mib, remainder) = divmod(size, 1024 * 1024)
11896 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
11897 " to not overwrite existing data (%s bytes will not be"
11898 " wiped)", (1024 * 1024) - remainder)
11904 class LUInstanceGrowDisk(LogicalUnit):
11905 """Grow a disk of an instance.
11908 HPATH = "disk-grow"
11909 HTYPE = constants.HTYPE_INSTANCE
11912 def ExpandNames(self):
11913 self._ExpandAndLockInstance()
11914 self.needed_locks[locking.LEVEL_NODE] = []
11915 self.needed_locks[locking.LEVEL_NODE_RES] = []
11916 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11917 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11919 def DeclareLocks(self, level):
11920 if level == locking.LEVEL_NODE:
11921 self._LockInstancesNodes()
11922 elif level == locking.LEVEL_NODE_RES:
11924 self.needed_locks[locking.LEVEL_NODE_RES] = \
11925 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11927 def BuildHooksEnv(self):
11928 """Build hooks env.
11930 This runs on the master, the primary and all the secondaries.
11934 "DISK": self.op.disk,
11935 "AMOUNT": self.op.amount,
11936 "ABSOLUTE": self.op.absolute,
11938 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11941 def BuildHooksNodes(self):
11942 """Build hooks nodes.
11945 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11948 def CheckPrereq(self):
11949 """Check prerequisites.
11951 This checks that the instance is in the cluster.
11954 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11955 assert instance is not None, \
11956 "Cannot retrieve locked instance %s" % self.op.instance_name
11957 nodenames = list(instance.all_nodes)
11958 for node in nodenames:
11959 _CheckNodeOnline(self, node)
11961 self.instance = instance
11963 if instance.disk_template not in constants.DTS_GROWABLE:
11964 raise errors.OpPrereqError("Instance's disk layout does not support"
11965 " growing", errors.ECODE_INVAL)
11967 self.disk = instance.FindDisk(self.op.disk)
11969 if self.op.absolute:
11970 self.target = self.op.amount
11971 self.delta = self.target - self.disk.size
11973 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11974 "current disk size (%s)" %
11975 (utils.FormatUnit(self.target, "h"),
11976 utils.FormatUnit(self.disk.size, "h")),
11977 errors.ECODE_STATE)
11979 self.delta = self.op.amount
11980 self.target = self.disk.size + self.delta
11982 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11983 utils.FormatUnit(self.delta, "h"),
11984 errors.ECODE_INVAL)
11986 if instance.disk_template not in (constants.DT_FILE,
11987 constants.DT_SHARED_FILE,
11989 # TODO: check the free disk space for file, when that feature will be
11991 _CheckNodesFreeDiskPerVG(self, nodenames,
11992 self.disk.ComputeGrowth(self.delta))
11994 def Exec(self, feedback_fn):
11995 """Execute disk grow.
11998 instance = self.instance
12001 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12002 assert (self.owned_locks(locking.LEVEL_NODE) ==
12003 self.owned_locks(locking.LEVEL_NODE_RES))
12005 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12007 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12009 raise errors.OpExecError("Cannot activate block device to grow")
12011 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12012 (self.op.disk, instance.name,
12013 utils.FormatUnit(self.delta, "h"),
12014 utils.FormatUnit(self.target, "h")))
12016 # First run all grow ops in dry-run mode
12017 for node in instance.all_nodes:
12018 self.cfg.SetDiskID(disk, node)
12019 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12021 result.Raise("Dry-run grow request failed to node %s" % node)
12024 # Get disk size from primary node for wiping
12025 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12026 result.Raise("Failed to retrieve disk size from node '%s'" %
12027 instance.primary_node)
12029 (disk_size_in_bytes, ) = result.payload
12031 if disk_size_in_bytes is None:
12032 raise errors.OpExecError("Failed to retrieve disk size from primary"
12033 " node '%s'" % instance.primary_node)
12035 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12037 assert old_disk_size >= disk.size, \
12038 ("Retrieved disk size too small (got %s, should be at least %s)" %
12039 (old_disk_size, disk.size))
12041 old_disk_size = None
12043 # We know that (as far as we can test) operations across different
12044 # nodes will succeed, time to run it for real on the backing storage
12045 for node in instance.all_nodes:
12046 self.cfg.SetDiskID(disk, node)
12047 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12049 result.Raise("Grow request failed to node %s" % node)
12051 # And now execute it for logical storage, on the primary node
12052 node = instance.primary_node
12053 self.cfg.SetDiskID(disk, node)
12054 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12056 result.Raise("Grow request failed to node %s" % node)
12058 disk.RecordGrow(self.delta)
12059 self.cfg.Update(instance, feedback_fn)
12061 # Changes have been recorded, release node lock
12062 _ReleaseLocks(self, locking.LEVEL_NODE)
12064 # Downgrade lock while waiting for sync
12065 self.glm.downgrade(locking.LEVEL_INSTANCE)
12067 assert wipe_disks ^ (old_disk_size is None)
12070 assert instance.disks[self.op.disk] == disk
12072 # Wipe newly added disk space
12073 _WipeDisks(self, instance,
12074 disks=[(self.op.disk, disk, old_disk_size)])
12076 if self.op.wait_for_sync:
12077 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12079 self.proc.LogWarning("Disk sync-ing has not returned a good"
12080 " status; please check the instance")
12081 if instance.admin_state != constants.ADMINST_UP:
12082 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12083 elif instance.admin_state != constants.ADMINST_UP:
12084 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12085 " not supposed to be running because no wait for"
12086 " sync mode was requested")
12088 assert self.owned_locks(locking.LEVEL_NODE_RES)
12089 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12092 class LUInstanceQueryData(NoHooksLU):
12093 """Query runtime instance data.
12098 def ExpandNames(self):
12099 self.needed_locks = {}
12101 # Use locking if requested or when non-static information is wanted
12102 if not (self.op.static or self.op.use_locking):
12103 self.LogWarning("Non-static data requested, locks need to be acquired")
12104 self.op.use_locking = True
12106 if self.op.instances or not self.op.use_locking:
12107 # Expand instance names right here
12108 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12110 # Will use acquired locks
12111 self.wanted_names = None
12113 if self.op.use_locking:
12114 self.share_locks = _ShareAll()
12116 if self.wanted_names is None:
12117 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12119 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12121 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12122 self.needed_locks[locking.LEVEL_NODE] = []
12123 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12125 def DeclareLocks(self, level):
12126 if self.op.use_locking:
12127 if level == locking.LEVEL_NODEGROUP:
12128 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12130 # Lock all groups used by instances optimistically; this requires going
12131 # via the node before it's locked, requiring verification later on
12132 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12133 frozenset(group_uuid
12134 for instance_name in owned_instances
12136 self.cfg.GetInstanceNodeGroups(instance_name))
12138 elif level == locking.LEVEL_NODE:
12139 self._LockInstancesNodes()
12141 def CheckPrereq(self):
12142 """Check prerequisites.
12144 This only checks the optional instance list against the existing names.
12147 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12148 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12149 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12151 if self.wanted_names is None:
12152 assert self.op.use_locking, "Locking was not used"
12153 self.wanted_names = owned_instances
12155 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12157 if self.op.use_locking:
12158 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12161 assert not (owned_instances or owned_groups or owned_nodes)
12163 self.wanted_instances = instances.values()
12165 def _ComputeBlockdevStatus(self, node, instance, dev):
12166 """Returns the status of a block device
12169 if self.op.static or not node:
12172 self.cfg.SetDiskID(dev, node)
12174 result = self.rpc.call_blockdev_find(node, dev)
12178 result.Raise("Can't compute disk status for %s" % instance.name)
12180 status = result.payload
12184 return (status.dev_path, status.major, status.minor,
12185 status.sync_percent, status.estimated_time,
12186 status.is_degraded, status.ldisk_status)
12188 def _ComputeDiskStatus(self, instance, snode, dev):
12189 """Compute block device status.
12192 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12194 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12196 def _ComputeDiskStatusInner(self, instance, snode, dev):
12197 """Compute block device status.
12199 @attention: The device has to be annotated already.
12202 if dev.dev_type in constants.LDS_DRBD:
12203 # we change the snode then (otherwise we use the one passed in)
12204 if dev.logical_id[0] == instance.primary_node:
12205 snode = dev.logical_id[1]
12207 snode = dev.logical_id[0]
12209 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12211 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12214 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12221 "iv_name": dev.iv_name,
12222 "dev_type": dev.dev_type,
12223 "logical_id": dev.logical_id,
12224 "physical_id": dev.physical_id,
12225 "pstatus": dev_pstatus,
12226 "sstatus": dev_sstatus,
12227 "children": dev_children,
12232 def Exec(self, feedback_fn):
12233 """Gather and return data"""
12236 cluster = self.cfg.GetClusterInfo()
12238 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12239 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12241 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12242 for node in nodes.values()))
12244 group2name_fn = lambda uuid: groups[uuid].name
12246 for instance in self.wanted_instances:
12247 pnode = nodes[instance.primary_node]
12249 if self.op.static or pnode.offline:
12250 remote_state = None
12252 self.LogWarning("Primary node %s is marked offline, returning static"
12253 " information only for instance %s" %
12254 (pnode.name, instance.name))
12256 remote_info = self.rpc.call_instance_info(instance.primary_node,
12258 instance.hypervisor)
12259 remote_info.Raise("Error checking node %s" % instance.primary_node)
12260 remote_info = remote_info.payload
12261 if remote_info and "state" in remote_info:
12262 remote_state = "up"
12264 if instance.admin_state == constants.ADMINST_UP:
12265 remote_state = "down"
12267 remote_state = instance.admin_state
12269 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12272 snodes_group_uuids = [nodes[snode_name].group
12273 for snode_name in instance.secondary_nodes]
12275 result[instance.name] = {
12276 "name": instance.name,
12277 "config_state": instance.admin_state,
12278 "run_state": remote_state,
12279 "pnode": instance.primary_node,
12280 "pnode_group_uuid": pnode.group,
12281 "pnode_group_name": group2name_fn(pnode.group),
12282 "snodes": instance.secondary_nodes,
12283 "snodes_group_uuids": snodes_group_uuids,
12284 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12286 # this happens to be the same format used for hooks
12287 "nics": _NICListToTuple(self, instance.nics),
12288 "disk_template": instance.disk_template,
12290 "hypervisor": instance.hypervisor,
12291 "network_port": instance.network_port,
12292 "hv_instance": instance.hvparams,
12293 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12294 "be_instance": instance.beparams,
12295 "be_actual": cluster.FillBE(instance),
12296 "os_instance": instance.osparams,
12297 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12298 "serial_no": instance.serial_no,
12299 "mtime": instance.mtime,
12300 "ctime": instance.ctime,
12301 "uuid": instance.uuid,
12307 def PrepareContainerMods(mods, private_fn):
12308 """Prepares a list of container modifications by adding a private data field.
12310 @type mods: list of tuples; (operation, index, parameters)
12311 @param mods: List of modifications
12312 @type private_fn: callable or None
12313 @param private_fn: Callable for constructing a private data field for a
12318 if private_fn is None:
12323 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12326 #: Type description for changes as returned by L{ApplyContainerMods}'s
12328 _TApplyContModsCbChanges = \
12329 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12330 ht.TNonEmptyString,
12335 def ApplyContainerMods(kind, container, chgdesc, mods,
12336 create_fn, modify_fn, remove_fn):
12337 """Applies descriptions in C{mods} to C{container}.
12340 @param kind: One-word item description
12341 @type container: list
12342 @param container: Container to modify
12343 @type chgdesc: None or list
12344 @param chgdesc: List of applied changes
12346 @param mods: Modifications as returned by L{PrepareContainerMods}
12347 @type create_fn: callable
12348 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12349 receives absolute item index, parameters and private data object as added
12350 by L{PrepareContainerMods}, returns tuple containing new item and changes
12352 @type modify_fn: callable
12353 @param modify_fn: Callback for modifying an existing item
12354 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12355 and private data object as added by L{PrepareContainerMods}, returns
12357 @type remove_fn: callable
12358 @param remove_fn: Callback on removing item; receives absolute item index,
12359 item and private data object as added by L{PrepareContainerMods}
12362 for (op, idx, params, private) in mods:
12365 absidx = len(container) - 1
12367 raise IndexError("Not accepting negative indices other than -1")
12368 elif idx > len(container):
12369 raise IndexError("Got %s index %s, but there are only %s" %
12370 (kind, idx, len(container)))
12376 if op == constants.DDM_ADD:
12377 # Calculate where item will be added
12379 addidx = len(container)
12383 if create_fn is None:
12386 (item, changes) = create_fn(addidx, params, private)
12389 container.append(item)
12392 assert idx <= len(container)
12393 # list.insert does so before the specified index
12394 container.insert(idx, item)
12396 # Retrieve existing item
12398 item = container[absidx]
12400 raise IndexError("Invalid %s index %s" % (kind, idx))
12402 if op == constants.DDM_REMOVE:
12405 if remove_fn is not None:
12406 remove_fn(absidx, item, private)
12408 changes = [("%s/%s" % (kind, absidx), "remove")]
12410 assert container[absidx] == item
12411 del container[absidx]
12412 elif op == constants.DDM_MODIFY:
12413 if modify_fn is not None:
12414 changes = modify_fn(absidx, item, params, private)
12416 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12418 assert _TApplyContModsCbChanges(changes)
12420 if not (chgdesc is None or changes is None):
12421 chgdesc.extend(changes)
12424 def _UpdateIvNames(base_index, disks):
12425 """Updates the C{iv_name} attribute of disks.
12427 @type disks: list of L{objects.Disk}
12430 for (idx, disk) in enumerate(disks):
12431 disk.iv_name = "disk/%s" % (base_index + idx, )
12434 class _InstNicModPrivate:
12435 """Data structure for network interface modifications.
12437 Used by L{LUInstanceSetParams}.
12440 def __init__(self):
12445 class LUInstanceSetParams(LogicalUnit):
12446 """Modifies an instances's parameters.
12449 HPATH = "instance-modify"
12450 HTYPE = constants.HTYPE_INSTANCE
12454 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12455 assert ht.TList(mods)
12456 assert not mods or len(mods[0]) in (2, 3)
12458 if mods and len(mods[0]) == 2:
12462 for op, params in mods:
12463 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12464 result.append((op, -1, params))
12468 raise errors.OpPrereqError("Only one %s add or remove operation is"
12469 " supported at a time" % kind,
12470 errors.ECODE_INVAL)
12472 result.append((constants.DDM_MODIFY, op, params))
12474 assert verify_fn(result)
12481 def _CheckMods(kind, mods, key_types, item_fn):
12482 """Ensures requested disk/NIC modifications are valid.
12485 for (op, _, params) in mods:
12486 assert ht.TDict(params)
12488 utils.ForceDictType(params, key_types)
12490 if op == constants.DDM_REMOVE:
12492 raise errors.OpPrereqError("No settings should be passed when"
12493 " removing a %s" % kind,
12494 errors.ECODE_INVAL)
12495 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12496 item_fn(op, params)
12498 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12501 def _VerifyDiskModification(op, params):
12502 """Verifies a disk modification.
12505 if op == constants.DDM_ADD:
12506 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12507 if mode not in constants.DISK_ACCESS_SET:
12508 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12509 errors.ECODE_INVAL)
12511 size = params.get(constants.IDISK_SIZE, None)
12513 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12514 constants.IDISK_SIZE, errors.ECODE_INVAL)
12518 except (TypeError, ValueError), err:
12519 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12520 errors.ECODE_INVAL)
12522 params[constants.IDISK_SIZE] = size
12524 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12525 raise errors.OpPrereqError("Disk size change not possible, use"
12526 " grow-disk", errors.ECODE_INVAL)
12529 def _VerifyNicModification(op, params):
12530 """Verifies a network interface modification.
12533 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12534 ip = params.get(constants.INIC_IP, None)
12537 elif ip.lower() == constants.VALUE_NONE:
12538 params[constants.INIC_IP] = None
12539 elif not netutils.IPAddress.IsValid(ip):
12540 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12541 errors.ECODE_INVAL)
12543 bridge = params.get("bridge", None)
12544 link = params.get(constants.INIC_LINK, None)
12545 if bridge and link:
12546 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12547 " at the same time", errors.ECODE_INVAL)
12548 elif bridge and bridge.lower() == constants.VALUE_NONE:
12549 params["bridge"] = None
12550 elif link and link.lower() == constants.VALUE_NONE:
12551 params[constants.INIC_LINK] = None
12553 if op == constants.DDM_ADD:
12554 macaddr = params.get(constants.INIC_MAC, None)
12555 if macaddr is None:
12556 params[constants.INIC_MAC] = constants.VALUE_AUTO
12558 if constants.INIC_MAC in params:
12559 macaddr = params[constants.INIC_MAC]
12560 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12561 macaddr = utils.NormalizeAndValidateMac(macaddr)
12563 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12564 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12565 " modifying an existing NIC",
12566 errors.ECODE_INVAL)
12568 def CheckArguments(self):
12569 if not (self.op.nics or self.op.disks or self.op.disk_template or
12570 self.op.hvparams or self.op.beparams or self.op.os_name or
12571 self.op.offline is not None or self.op.runtime_mem):
12572 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12574 if self.op.hvparams:
12575 _CheckGlobalHvParams(self.op.hvparams)
12577 self.op.disks = self._UpgradeDiskNicMods(
12578 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12579 self.op.nics = self._UpgradeDiskNicMods(
12580 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12582 # Check disk modifications
12583 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12584 self._VerifyDiskModification)
12586 if self.op.disks and self.op.disk_template is not None:
12587 raise errors.OpPrereqError("Disk template conversion and other disk"
12588 " changes not supported at the same time",
12589 errors.ECODE_INVAL)
12591 if (self.op.disk_template and
12592 self.op.disk_template in constants.DTS_INT_MIRROR and
12593 self.op.remote_node is None):
12594 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12595 " one requires specifying a secondary node",
12596 errors.ECODE_INVAL)
12598 # Check NIC modifications
12599 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12600 self._VerifyNicModification)
12602 def ExpandNames(self):
12603 self._ExpandAndLockInstance()
12604 # Can't even acquire node locks in shared mode as upcoming changes in
12605 # Ganeti 2.6 will start to modify the node object on disk conversion
12606 self.needed_locks[locking.LEVEL_NODE] = []
12607 self.needed_locks[locking.LEVEL_NODE_RES] = []
12608 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12610 def DeclareLocks(self, level):
12611 # TODO: Acquire group lock in shared mode (disk parameters)
12612 if level == locking.LEVEL_NODE:
12613 self._LockInstancesNodes()
12614 if self.op.disk_template and self.op.remote_node:
12615 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12616 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12617 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12619 self.needed_locks[locking.LEVEL_NODE_RES] = \
12620 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12622 def BuildHooksEnv(self):
12623 """Build hooks env.
12625 This runs on the master, primary and secondaries.
12629 if constants.BE_MINMEM in self.be_new:
12630 args["minmem"] = self.be_new[constants.BE_MINMEM]
12631 if constants.BE_MAXMEM in self.be_new:
12632 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12633 if constants.BE_VCPUS in self.be_new:
12634 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12635 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12636 # information at all.
12638 if self._new_nics is not None:
12641 for nic in self._new_nics:
12642 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12643 mode = nicparams[constants.NIC_MODE]
12644 link = nicparams[constants.NIC_LINK]
12645 nics.append((nic.ip, nic.mac, mode, link))
12647 args["nics"] = nics
12649 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12650 if self.op.disk_template:
12651 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12652 if self.op.runtime_mem:
12653 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12657 def BuildHooksNodes(self):
12658 """Build hooks nodes.
12661 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12664 def _PrepareNicModification(self, params, private, old_ip, old_params,
12666 update_params_dict = dict([(key, params[key])
12667 for key in constants.NICS_PARAMETERS
12670 if "bridge" in params:
12671 update_params_dict[constants.NIC_LINK] = params["bridge"]
12673 new_params = _GetUpdatedParams(old_params, update_params_dict)
12674 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12676 new_filled_params = cluster.SimpleFillNIC(new_params)
12677 objects.NIC.CheckParameterSyntax(new_filled_params)
12679 new_mode = new_filled_params[constants.NIC_MODE]
12680 if new_mode == constants.NIC_MODE_BRIDGED:
12681 bridge = new_filled_params[constants.NIC_LINK]
12682 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12684 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12686 self.warn.append(msg)
12688 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12690 elif new_mode == constants.NIC_MODE_ROUTED:
12691 ip = params.get(constants.INIC_IP, old_ip)
12693 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12694 " on a routed NIC", errors.ECODE_INVAL)
12696 if constants.INIC_MAC in params:
12697 mac = params[constants.INIC_MAC]
12699 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12700 errors.ECODE_INVAL)
12701 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12702 # otherwise generate the MAC address
12703 params[constants.INIC_MAC] = \
12704 self.cfg.GenerateMAC(self.proc.GetECId())
12706 # or validate/reserve the current one
12708 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12709 except errors.ReservationError:
12710 raise errors.OpPrereqError("MAC address '%s' already in use"
12711 " in cluster" % mac,
12712 errors.ECODE_NOTUNIQUE)
12714 private.params = new_params
12715 private.filled = new_filled_params
12717 def CheckPrereq(self):
12718 """Check prerequisites.
12720 This only checks the instance list against the existing names.
12723 # checking the new params on the primary/secondary nodes
12725 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12726 cluster = self.cluster = self.cfg.GetClusterInfo()
12727 assert self.instance is not None, \
12728 "Cannot retrieve locked instance %s" % self.op.instance_name
12729 pnode = instance.primary_node
12730 nodelist = list(instance.all_nodes)
12731 pnode_info = self.cfg.GetNodeInfo(pnode)
12732 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12734 # Prepare disk/NIC modifications
12735 self.diskmod = PrepareContainerMods(self.op.disks, None)
12736 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12739 if self.op.os_name and not self.op.force:
12740 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12741 self.op.force_variant)
12742 instance_os = self.op.os_name
12744 instance_os = instance.os
12746 assert not (self.op.disk_template and self.op.disks), \
12747 "Can't modify disk template and apply disk changes at the same time"
12749 if self.op.disk_template:
12750 if instance.disk_template == self.op.disk_template:
12751 raise errors.OpPrereqError("Instance already has disk template %s" %
12752 instance.disk_template, errors.ECODE_INVAL)
12754 if (instance.disk_template,
12755 self.op.disk_template) not in self._DISK_CONVERSIONS:
12756 raise errors.OpPrereqError("Unsupported disk template conversion from"
12757 " %s to %s" % (instance.disk_template,
12758 self.op.disk_template),
12759 errors.ECODE_INVAL)
12760 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12761 msg="cannot change disk template")
12762 if self.op.disk_template in constants.DTS_INT_MIRROR:
12763 if self.op.remote_node == pnode:
12764 raise errors.OpPrereqError("Given new secondary node %s is the same"
12765 " as the primary node of the instance" %
12766 self.op.remote_node, errors.ECODE_STATE)
12767 _CheckNodeOnline(self, self.op.remote_node)
12768 _CheckNodeNotDrained(self, self.op.remote_node)
12769 # FIXME: here we assume that the old instance type is DT_PLAIN
12770 assert instance.disk_template == constants.DT_PLAIN
12771 disks = [{constants.IDISK_SIZE: d.size,
12772 constants.IDISK_VG: d.logical_id[0]}
12773 for d in instance.disks]
12774 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12775 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12777 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12778 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12779 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12781 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12782 ignore=self.op.ignore_ipolicy)
12783 if pnode_info.group != snode_info.group:
12784 self.LogWarning("The primary and secondary nodes are in two"
12785 " different node groups; the disk parameters"
12786 " from the first disk's node group will be"
12789 # hvparams processing
12790 if self.op.hvparams:
12791 hv_type = instance.hypervisor
12792 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12793 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12794 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12797 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12798 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12799 self.hv_proposed = self.hv_new = hv_new # the new actual values
12800 self.hv_inst = i_hvdict # the new dict (without defaults)
12802 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12804 self.hv_new = self.hv_inst = {}
12806 # beparams processing
12807 if self.op.beparams:
12808 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12810 objects.UpgradeBeParams(i_bedict)
12811 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12812 be_new = cluster.SimpleFillBE(i_bedict)
12813 self.be_proposed = self.be_new = be_new # the new actual values
12814 self.be_inst = i_bedict # the new dict (without defaults)
12816 self.be_new = self.be_inst = {}
12817 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12818 be_old = cluster.FillBE(instance)
12820 # CPU param validation -- checking every time a parameter is
12821 # changed to cover all cases where either CPU mask or vcpus have
12823 if (constants.BE_VCPUS in self.be_proposed and
12824 constants.HV_CPU_MASK in self.hv_proposed):
12826 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12827 # Verify mask is consistent with number of vCPUs. Can skip this
12828 # test if only 1 entry in the CPU mask, which means same mask
12829 # is applied to all vCPUs.
12830 if (len(cpu_list) > 1 and
12831 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12832 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12834 (self.be_proposed[constants.BE_VCPUS],
12835 self.hv_proposed[constants.HV_CPU_MASK]),
12836 errors.ECODE_INVAL)
12838 # Only perform this test if a new CPU mask is given
12839 if constants.HV_CPU_MASK in self.hv_new:
12840 # Calculate the largest CPU number requested
12841 max_requested_cpu = max(map(max, cpu_list))
12842 # Check that all of the instance's nodes have enough physical CPUs to
12843 # satisfy the requested CPU mask
12844 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12845 max_requested_cpu + 1, instance.hypervisor)
12847 # osparams processing
12848 if self.op.osparams:
12849 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12850 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12851 self.os_inst = i_osdict # the new dict (without defaults)
12857 #TODO(dynmem): do the appropriate check involving MINMEM
12858 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12859 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12860 mem_check_list = [pnode]
12861 if be_new[constants.BE_AUTO_BALANCE]:
12862 # either we changed auto_balance to yes or it was from before
12863 mem_check_list.extend(instance.secondary_nodes)
12864 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12865 instance.hypervisor)
12866 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12867 [instance.hypervisor])
12868 pninfo = nodeinfo[pnode]
12869 msg = pninfo.fail_msg
12871 # Assume the primary node is unreachable and go ahead
12872 self.warn.append("Can't get info from primary node %s: %s" %
12875 (_, _, (pnhvinfo, )) = pninfo.payload
12876 if not isinstance(pnhvinfo.get("memory_free", None), int):
12877 self.warn.append("Node data from primary node %s doesn't contain"
12878 " free memory information" % pnode)
12879 elif instance_info.fail_msg:
12880 self.warn.append("Can't get instance runtime information: %s" %
12881 instance_info.fail_msg)
12883 if instance_info.payload:
12884 current_mem = int(instance_info.payload["memory"])
12886 # Assume instance not running
12887 # (there is a slight race condition here, but it's not very
12888 # probable, and we have no other way to check)
12889 # TODO: Describe race condition
12891 #TODO(dynmem): do the appropriate check involving MINMEM
12892 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12893 pnhvinfo["memory_free"])
12895 raise errors.OpPrereqError("This change will prevent the instance"
12896 " from starting, due to %d MB of memory"
12897 " missing on its primary node" %
12898 miss_mem, errors.ECODE_NORES)
12900 if be_new[constants.BE_AUTO_BALANCE]:
12901 for node, nres in nodeinfo.items():
12902 if node not in instance.secondary_nodes:
12904 nres.Raise("Can't get info from secondary node %s" % node,
12905 prereq=True, ecode=errors.ECODE_STATE)
12906 (_, _, (nhvinfo, )) = nres.payload
12907 if not isinstance(nhvinfo.get("memory_free", None), int):
12908 raise errors.OpPrereqError("Secondary node %s didn't return free"
12909 " memory information" % node,
12910 errors.ECODE_STATE)
12911 #TODO(dynmem): do the appropriate check involving MINMEM
12912 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12913 raise errors.OpPrereqError("This change will prevent the instance"
12914 " from failover to its secondary node"
12915 " %s, due to not enough memory" % node,
12916 errors.ECODE_STATE)
12918 if self.op.runtime_mem:
12919 remote_info = self.rpc.call_instance_info(instance.primary_node,
12921 instance.hypervisor)
12922 remote_info.Raise("Error checking node %s" % instance.primary_node)
12923 if not remote_info.payload: # not running already
12924 raise errors.OpPrereqError("Instance %s is not running" %
12925 instance.name, errors.ECODE_STATE)
12927 current_memory = remote_info.payload["memory"]
12928 if (not self.op.force and
12929 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12930 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12931 raise errors.OpPrereqError("Instance %s must have memory between %d"
12932 " and %d MB of memory unless --force is"
12935 self.be_proposed[constants.BE_MINMEM],
12936 self.be_proposed[constants.BE_MAXMEM]),
12937 errors.ECODE_INVAL)
12939 if self.op.runtime_mem > current_memory:
12940 _CheckNodeFreeMemory(self, instance.primary_node,
12941 "ballooning memory for instance %s" %
12943 self.op.memory - current_memory,
12944 instance.hypervisor)
12946 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12947 raise errors.OpPrereqError("Disk operations not supported for"
12948 " diskless instances", errors.ECODE_INVAL)
12950 def _PrepareNicCreate(_, params, private):
12951 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12952 return (None, None)
12954 def _PrepareNicMod(_, nic, params, private):
12955 self._PrepareNicModification(params, private, nic.ip,
12956 nic.nicparams, cluster, pnode)
12959 # Verify NIC changes (operating on copy)
12960 nics = instance.nics[:]
12961 ApplyContainerMods("NIC", nics, None, self.nicmod,
12962 _PrepareNicCreate, _PrepareNicMod, None)
12963 if len(nics) > constants.MAX_NICS:
12964 raise errors.OpPrereqError("Instance has too many network interfaces"
12965 " (%d), cannot add more" % constants.MAX_NICS,
12966 errors.ECODE_STATE)
12968 # Verify disk changes (operating on a copy)
12969 disks = instance.disks[:]
12970 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12971 if len(disks) > constants.MAX_DISKS:
12972 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12973 " more" % constants.MAX_DISKS,
12974 errors.ECODE_STATE)
12976 if self.op.offline is not None:
12977 if self.op.offline:
12978 msg = "can't change to offline"
12980 msg = "can't change to online"
12981 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12983 # Pre-compute NIC changes (necessary to use result in hooks)
12984 self._nic_chgdesc = []
12986 # Operate on copies as this is still in prereq
12987 nics = [nic.Copy() for nic in instance.nics]
12988 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12989 self._CreateNewNic, self._ApplyNicMods, None)
12990 self._new_nics = nics
12992 self._new_nics = None
12994 def _ConvertPlainToDrbd(self, feedback_fn):
12995 """Converts an instance from plain to drbd.
12998 feedback_fn("Converting template to drbd")
12999 instance = self.instance
13000 pnode = instance.primary_node
13001 snode = self.op.remote_node
13003 assert instance.disk_template == constants.DT_PLAIN
13005 # create a fake disk info for _GenerateDiskTemplate
13006 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13007 constants.IDISK_VG: d.logical_id[0]}
13008 for d in instance.disks]
13009 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13010 instance.name, pnode, [snode],
13011 disk_info, None, None, 0, feedback_fn,
13013 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13015 info = _GetInstanceInfoText(instance)
13016 feedback_fn("Creating additional volumes...")
13017 # first, create the missing data and meta devices
13018 for disk in anno_disks:
13019 # unfortunately this is... not too nice
13020 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13022 for child in disk.children:
13023 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13024 # at this stage, all new LVs have been created, we can rename the
13026 feedback_fn("Renaming original volumes...")
13027 rename_list = [(o, n.children[0].logical_id)
13028 for (o, n) in zip(instance.disks, new_disks)]
13029 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13030 result.Raise("Failed to rename original LVs")
13032 feedback_fn("Initializing DRBD devices...")
13033 # all child devices are in place, we can now create the DRBD devices
13034 for disk in anno_disks:
13035 for node in [pnode, snode]:
13036 f_create = node == pnode
13037 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13039 # at this point, the instance has been modified
13040 instance.disk_template = constants.DT_DRBD8
13041 instance.disks = new_disks
13042 self.cfg.Update(instance, feedback_fn)
13044 # Release node locks while waiting for sync
13045 _ReleaseLocks(self, locking.LEVEL_NODE)
13047 # disks are created, waiting for sync
13048 disk_abort = not _WaitForSync(self, instance,
13049 oneshot=not self.op.wait_for_sync)
13051 raise errors.OpExecError("There are some degraded disks for"
13052 " this instance, please cleanup manually")
13054 # Node resource locks will be released by caller
13056 def _ConvertDrbdToPlain(self, feedback_fn):
13057 """Converts an instance from drbd to plain.
13060 instance = self.instance
13062 assert len(instance.secondary_nodes) == 1
13063 assert instance.disk_template == constants.DT_DRBD8
13065 pnode = instance.primary_node
13066 snode = instance.secondary_nodes[0]
13067 feedback_fn("Converting template to plain")
13069 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13070 new_disks = [d.children[0] for d in instance.disks]
13072 # copy over size and mode
13073 for parent, child in zip(old_disks, new_disks):
13074 child.size = parent.size
13075 child.mode = parent.mode
13077 # this is a DRBD disk, return its port to the pool
13078 # NOTE: this must be done right before the call to cfg.Update!
13079 for disk in old_disks:
13080 tcp_port = disk.logical_id[2]
13081 self.cfg.AddTcpUdpPort(tcp_port)
13083 # update instance structure
13084 instance.disks = new_disks
13085 instance.disk_template = constants.DT_PLAIN
13086 self.cfg.Update(instance, feedback_fn)
13088 # Release locks in case removing disks takes a while
13089 _ReleaseLocks(self, locking.LEVEL_NODE)
13091 feedback_fn("Removing volumes on the secondary node...")
13092 for disk in old_disks:
13093 self.cfg.SetDiskID(disk, snode)
13094 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13096 self.LogWarning("Could not remove block device %s on node %s,"
13097 " continuing anyway: %s", disk.iv_name, snode, msg)
13099 feedback_fn("Removing unneeded volumes on the primary node...")
13100 for idx, disk in enumerate(old_disks):
13101 meta = disk.children[1]
13102 self.cfg.SetDiskID(meta, pnode)
13103 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13105 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13106 " continuing anyway: %s", idx, pnode, msg)
13108 def _CreateNewDisk(self, idx, params, _):
13109 """Creates a new disk.
13112 instance = self.instance
13115 if instance.disk_template in constants.DTS_FILEBASED:
13116 (file_driver, file_path) = instance.disks[0].logical_id
13117 file_path = os.path.dirname(file_path)
13119 file_driver = file_path = None
13122 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13123 instance.primary_node, instance.secondary_nodes,
13124 [params], file_path, file_driver, idx,
13125 self.Log, self.diskparams)[0]
13127 info = _GetInstanceInfoText(instance)
13129 logging.info("Creating volume %s for instance %s",
13130 disk.iv_name, instance.name)
13131 # Note: this needs to be kept in sync with _CreateDisks
13133 for node in instance.all_nodes:
13134 f_create = (node == instance.primary_node)
13136 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13137 except errors.OpExecError, err:
13138 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13139 disk.iv_name, disk, node, err)
13142 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13146 def _ModifyDisk(idx, disk, params, _):
13147 """Modifies a disk.
13150 disk.mode = params[constants.IDISK_MODE]
13153 ("disk.mode/%d" % idx, disk.mode),
13156 def _RemoveDisk(self, idx, root, _):
13160 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13161 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13162 self.cfg.SetDiskID(disk, node)
13163 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13165 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13166 " continuing anyway", idx, node, msg)
13168 # if this is a DRBD disk, return its port to the pool
13169 if root.dev_type in constants.LDS_DRBD:
13170 self.cfg.AddTcpUdpPort(root.logical_id[2])
13173 def _CreateNewNic(idx, params, private):
13174 """Creates data structure for a new network interface.
13177 mac = params[constants.INIC_MAC]
13178 ip = params.get(constants.INIC_IP, None)
13179 nicparams = private.params
13181 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13183 "add:mac=%s,ip=%s,mode=%s,link=%s" %
13184 (mac, ip, private.filled[constants.NIC_MODE],
13185 private.filled[constants.NIC_LINK])),
13189 def _ApplyNicMods(idx, nic, params, private):
13190 """Modifies a network interface.
13195 for key in [constants.INIC_MAC, constants.INIC_IP]:
13197 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13198 setattr(nic, key, params[key])
13201 nic.nicparams = private.params
13203 for (key, val) in params.items():
13204 changes.append(("nic.%s/%d" % (key, idx), val))
13208 def Exec(self, feedback_fn):
13209 """Modifies an instance.
13211 All parameters take effect only at the next restart of the instance.
13214 # Process here the warnings from CheckPrereq, as we don't have a
13215 # feedback_fn there.
13216 # TODO: Replace with self.LogWarning
13217 for warn in self.warn:
13218 feedback_fn("WARNING: %s" % warn)
13220 assert ((self.op.disk_template is None) ^
13221 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13222 "Not owning any node resource locks"
13225 instance = self.instance
13228 if self.op.runtime_mem:
13229 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13231 self.op.runtime_mem)
13232 rpcres.Raise("Cannot modify instance runtime memory")
13233 result.append(("runtime_memory", self.op.runtime_mem))
13235 # Apply disk changes
13236 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13237 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13238 _UpdateIvNames(0, instance.disks)
13240 if self.op.disk_template:
13242 check_nodes = set(instance.all_nodes)
13243 if self.op.remote_node:
13244 check_nodes.add(self.op.remote_node)
13245 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13246 owned = self.owned_locks(level)
13247 assert not (check_nodes - owned), \
13248 ("Not owning the correct locks, owning %r, expected at least %r" %
13249 (owned, check_nodes))
13251 r_shut = _ShutdownInstanceDisks(self, instance)
13253 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13254 " proceed with disk template conversion")
13255 mode = (instance.disk_template, self.op.disk_template)
13257 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13259 self.cfg.ReleaseDRBDMinors(instance.name)
13261 result.append(("disk_template", self.op.disk_template))
13263 assert instance.disk_template == self.op.disk_template, \
13264 ("Expected disk template '%s', found '%s'" %
13265 (self.op.disk_template, instance.disk_template))
13267 # Release node and resource locks if there are any (they might already have
13268 # been released during disk conversion)
13269 _ReleaseLocks(self, locking.LEVEL_NODE)
13270 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13272 # Apply NIC changes
13273 if self._new_nics is not None:
13274 instance.nics = self._new_nics
13275 result.extend(self._nic_chgdesc)
13278 if self.op.hvparams:
13279 instance.hvparams = self.hv_inst
13280 for key, val in self.op.hvparams.iteritems():
13281 result.append(("hv/%s" % key, val))
13284 if self.op.beparams:
13285 instance.beparams = self.be_inst
13286 for key, val in self.op.beparams.iteritems():
13287 result.append(("be/%s" % key, val))
13290 if self.op.os_name:
13291 instance.os = self.op.os_name
13294 if self.op.osparams:
13295 instance.osparams = self.os_inst
13296 for key, val in self.op.osparams.iteritems():
13297 result.append(("os/%s" % key, val))
13299 if self.op.offline is None:
13302 elif self.op.offline:
13303 # Mark instance as offline
13304 self.cfg.MarkInstanceOffline(instance.name)
13305 result.append(("admin_state", constants.ADMINST_OFFLINE))
13307 # Mark instance as online, but stopped
13308 self.cfg.MarkInstanceDown(instance.name)
13309 result.append(("admin_state", constants.ADMINST_DOWN))
13311 self.cfg.Update(instance, feedback_fn)
13313 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13314 self.owned_locks(locking.LEVEL_NODE)), \
13315 "All node locks should have been released by now"
13319 _DISK_CONVERSIONS = {
13320 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13321 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13325 class LUInstanceChangeGroup(LogicalUnit):
13326 HPATH = "instance-change-group"
13327 HTYPE = constants.HTYPE_INSTANCE
13330 def ExpandNames(self):
13331 self.share_locks = _ShareAll()
13332 self.needed_locks = {
13333 locking.LEVEL_NODEGROUP: [],
13334 locking.LEVEL_NODE: [],
13337 self._ExpandAndLockInstance()
13339 if self.op.target_groups:
13340 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13341 self.op.target_groups)
13343 self.req_target_uuids = None
13345 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13347 def DeclareLocks(self, level):
13348 if level == locking.LEVEL_NODEGROUP:
13349 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13351 if self.req_target_uuids:
13352 lock_groups = set(self.req_target_uuids)
13354 # Lock all groups used by instance optimistically; this requires going
13355 # via the node before it's locked, requiring verification later on
13356 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13357 lock_groups.update(instance_groups)
13359 # No target groups, need to lock all of them
13360 lock_groups = locking.ALL_SET
13362 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13364 elif level == locking.LEVEL_NODE:
13365 if self.req_target_uuids:
13366 # Lock all nodes used by instances
13367 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13368 self._LockInstancesNodes()
13370 # Lock all nodes in all potential target groups
13371 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13372 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13373 member_nodes = [node_name
13374 for group in lock_groups
13375 for node_name in self.cfg.GetNodeGroup(group).members]
13376 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13378 # Lock all nodes as all groups are potential targets
13379 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13381 def CheckPrereq(self):
13382 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13383 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13384 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13386 assert (self.req_target_uuids is None or
13387 owned_groups.issuperset(self.req_target_uuids))
13388 assert owned_instances == set([self.op.instance_name])
13390 # Get instance information
13391 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13393 # Check if node groups for locked instance are still correct
13394 assert owned_nodes.issuperset(self.instance.all_nodes), \
13395 ("Instance %s's nodes changed while we kept the lock" %
13396 self.op.instance_name)
13398 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13401 if self.req_target_uuids:
13402 # User requested specific target groups
13403 self.target_uuids = frozenset(self.req_target_uuids)
13405 # All groups except those used by the instance are potential targets
13406 self.target_uuids = owned_groups - inst_groups
13408 conflicting_groups = self.target_uuids & inst_groups
13409 if conflicting_groups:
13410 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13411 " used by the instance '%s'" %
13412 (utils.CommaJoin(conflicting_groups),
13413 self.op.instance_name),
13414 errors.ECODE_INVAL)
13416 if not self.target_uuids:
13417 raise errors.OpPrereqError("There are no possible target groups",
13418 errors.ECODE_INVAL)
13420 def BuildHooksEnv(self):
13421 """Build hooks env.
13424 assert self.target_uuids
13427 "TARGET_GROUPS": " ".join(self.target_uuids),
13430 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13434 def BuildHooksNodes(self):
13435 """Build hooks nodes.
13438 mn = self.cfg.GetMasterNode()
13439 return ([mn], [mn])
13441 def Exec(self, feedback_fn):
13442 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13444 assert instances == [self.op.instance_name], "Instance not locked"
13446 req = iallocator.IAReqGroupChange(instances=instances,
13447 target_groups=list(self.target_uuids))
13448 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13450 ial.Run(self.op.iallocator)
13452 if not ial.success:
13453 raise errors.OpPrereqError("Can't compute solution for changing group of"
13454 " instance '%s' using iallocator '%s': %s" %
13455 (self.op.instance_name, self.op.iallocator,
13456 ial.info), errors.ECODE_NORES)
13458 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13460 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13461 " instance '%s'", len(jobs), self.op.instance_name)
13463 return ResultWithJobs(jobs)
13466 class LUBackupQuery(NoHooksLU):
13467 """Query the exports list
13472 def CheckArguments(self):
13473 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13474 ["node", "export"], self.op.use_locking)
13476 def ExpandNames(self):
13477 self.expq.ExpandNames(self)
13479 def DeclareLocks(self, level):
13480 self.expq.DeclareLocks(self, level)
13482 def Exec(self, feedback_fn):
13485 for (node, expname) in self.expq.OldStyleQuery(self):
13486 if expname is None:
13487 result[node] = False
13489 result.setdefault(node, []).append(expname)
13494 class _ExportQuery(_QueryBase):
13495 FIELDS = query.EXPORT_FIELDS
13497 #: The node name is not a unique key for this query
13498 SORT_FIELD = "node"
13500 def ExpandNames(self, lu):
13501 lu.needed_locks = {}
13503 # The following variables interact with _QueryBase._GetNames
13505 self.wanted = _GetWantedNodes(lu, self.names)
13507 self.wanted = locking.ALL_SET
13509 self.do_locking = self.use_locking
13511 if self.do_locking:
13512 lu.share_locks = _ShareAll()
13513 lu.needed_locks = {
13514 locking.LEVEL_NODE: self.wanted,
13517 def DeclareLocks(self, lu, level):
13520 def _GetQueryData(self, lu):
13521 """Computes the list of nodes and their attributes.
13524 # Locking is not used
13526 assert not (compat.any(lu.glm.is_owned(level)
13527 for level in locking.LEVELS
13528 if level != locking.LEVEL_CLUSTER) or
13529 self.do_locking or self.use_locking)
13531 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13535 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13537 result.append((node, None))
13539 result.extend((node, expname) for expname in nres.payload)
13544 class LUBackupPrepare(NoHooksLU):
13545 """Prepares an instance for an export and returns useful information.
13550 def ExpandNames(self):
13551 self._ExpandAndLockInstance()
13553 def CheckPrereq(self):
13554 """Check prerequisites.
13557 instance_name = self.op.instance_name
13559 self.instance = self.cfg.GetInstanceInfo(instance_name)
13560 assert self.instance is not None, \
13561 "Cannot retrieve locked instance %s" % self.op.instance_name
13562 _CheckNodeOnline(self, self.instance.primary_node)
13564 self._cds = _GetClusterDomainSecret()
13566 def Exec(self, feedback_fn):
13567 """Prepares an instance for an export.
13570 instance = self.instance
13572 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13573 salt = utils.GenerateSecret(8)
13575 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13576 result = self.rpc.call_x509_cert_create(instance.primary_node,
13577 constants.RIE_CERT_VALIDITY)
13578 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13580 (name, cert_pem) = result.payload
13582 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13586 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13587 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13589 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13595 class LUBackupExport(LogicalUnit):
13596 """Export an instance to an image in the cluster.
13599 HPATH = "instance-export"
13600 HTYPE = constants.HTYPE_INSTANCE
13603 def CheckArguments(self):
13604 """Check the arguments.
13607 self.x509_key_name = self.op.x509_key_name
13608 self.dest_x509_ca_pem = self.op.destination_x509_ca
13610 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13611 if not self.x509_key_name:
13612 raise errors.OpPrereqError("Missing X509 key name for encryption",
13613 errors.ECODE_INVAL)
13615 if not self.dest_x509_ca_pem:
13616 raise errors.OpPrereqError("Missing destination X509 CA",
13617 errors.ECODE_INVAL)
13619 def ExpandNames(self):
13620 self._ExpandAndLockInstance()
13622 # Lock all nodes for local exports
13623 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13624 # FIXME: lock only instance primary and destination node
13626 # Sad but true, for now we have do lock all nodes, as we don't know where
13627 # the previous export might be, and in this LU we search for it and
13628 # remove it from its current node. In the future we could fix this by:
13629 # - making a tasklet to search (share-lock all), then create the
13630 # new one, then one to remove, after
13631 # - removing the removal operation altogether
13632 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13634 def DeclareLocks(self, level):
13635 """Last minute lock declaration."""
13636 # All nodes are locked anyway, so nothing to do here.
13638 def BuildHooksEnv(self):
13639 """Build hooks env.
13641 This will run on the master, primary node and target node.
13645 "EXPORT_MODE": self.op.mode,
13646 "EXPORT_NODE": self.op.target_node,
13647 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13648 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13649 # TODO: Generic function for boolean env variables
13650 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13653 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13657 def BuildHooksNodes(self):
13658 """Build hooks nodes.
13661 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13663 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13664 nl.append(self.op.target_node)
13668 def CheckPrereq(self):
13669 """Check prerequisites.
13671 This checks that the instance and node names are valid.
13674 instance_name = self.op.instance_name
13676 self.instance = self.cfg.GetInstanceInfo(instance_name)
13677 assert self.instance is not None, \
13678 "Cannot retrieve locked instance %s" % self.op.instance_name
13679 _CheckNodeOnline(self, self.instance.primary_node)
13681 if (self.op.remove_instance and
13682 self.instance.admin_state == constants.ADMINST_UP and
13683 not self.op.shutdown):
13684 raise errors.OpPrereqError("Can not remove instance without shutting it"
13685 " down before", errors.ECODE_STATE)
13687 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13688 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13689 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13690 assert self.dst_node is not None
13692 _CheckNodeOnline(self, self.dst_node.name)
13693 _CheckNodeNotDrained(self, self.dst_node.name)
13696 self.dest_disk_info = None
13697 self.dest_x509_ca = None
13699 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13700 self.dst_node = None
13702 if len(self.op.target_node) != len(self.instance.disks):
13703 raise errors.OpPrereqError(("Received destination information for %s"
13704 " disks, but instance %s has %s disks") %
13705 (len(self.op.target_node), instance_name,
13706 len(self.instance.disks)),
13707 errors.ECODE_INVAL)
13709 cds = _GetClusterDomainSecret()
13711 # Check X509 key name
13713 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13714 except (TypeError, ValueError), err:
13715 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13716 errors.ECODE_INVAL)
13718 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13719 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13720 errors.ECODE_INVAL)
13722 # Load and verify CA
13724 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13725 except OpenSSL.crypto.Error, err:
13726 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13727 (err, ), errors.ECODE_INVAL)
13729 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13730 if errcode is not None:
13731 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13732 (msg, ), errors.ECODE_INVAL)
13734 self.dest_x509_ca = cert
13736 # Verify target information
13738 for idx, disk_data in enumerate(self.op.target_node):
13740 (host, port, magic) = \
13741 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13742 except errors.GenericError, err:
13743 raise errors.OpPrereqError("Target info for disk %s: %s" %
13744 (idx, err), errors.ECODE_INVAL)
13746 disk_info.append((host, port, magic))
13748 assert len(disk_info) == len(self.op.target_node)
13749 self.dest_disk_info = disk_info
13752 raise errors.ProgrammerError("Unhandled export mode %r" %
13755 # instance disk type verification
13756 # TODO: Implement export support for file-based disks
13757 for disk in self.instance.disks:
13758 if disk.dev_type == constants.LD_FILE:
13759 raise errors.OpPrereqError("Export not supported for instances with"
13760 " file-based disks", errors.ECODE_INVAL)
13762 def _CleanupExports(self, feedback_fn):
13763 """Removes exports of current instance from all other nodes.
13765 If an instance in a cluster with nodes A..D was exported to node C, its
13766 exports will be removed from the nodes A, B and D.
13769 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13771 nodelist = self.cfg.GetNodeList()
13772 nodelist.remove(self.dst_node.name)
13774 # on one-node clusters nodelist will be empty after the removal
13775 # if we proceed the backup would be removed because OpBackupQuery
13776 # substitutes an empty list with the full cluster node list.
13777 iname = self.instance.name
13779 feedback_fn("Removing old exports for instance %s" % iname)
13780 exportlist = self.rpc.call_export_list(nodelist)
13781 for node in exportlist:
13782 if exportlist[node].fail_msg:
13784 if iname in exportlist[node].payload:
13785 msg = self.rpc.call_export_remove(node, iname).fail_msg
13787 self.LogWarning("Could not remove older export for instance %s"
13788 " on node %s: %s", iname, node, msg)
13790 def Exec(self, feedback_fn):
13791 """Export an instance to an image in the cluster.
13794 assert self.op.mode in constants.EXPORT_MODES
13796 instance = self.instance
13797 src_node = instance.primary_node
13799 if self.op.shutdown:
13800 # shutdown the instance, but not the disks
13801 feedback_fn("Shutting down instance %s" % instance.name)
13802 result = self.rpc.call_instance_shutdown(src_node, instance,
13803 self.op.shutdown_timeout)
13804 # TODO: Maybe ignore failures if ignore_remove_failures is set
13805 result.Raise("Could not shutdown instance %s on"
13806 " node %s" % (instance.name, src_node))
13808 # set the disks ID correctly since call_instance_start needs the
13809 # correct drbd minor to create the symlinks
13810 for disk in instance.disks:
13811 self.cfg.SetDiskID(disk, src_node)
13813 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13816 # Activate the instance disks if we'exporting a stopped instance
13817 feedback_fn("Activating disks for %s" % instance.name)
13818 _StartInstanceDisks(self, instance, None)
13821 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13824 helper.CreateSnapshots()
13826 if (self.op.shutdown and
13827 instance.admin_state == constants.ADMINST_UP and
13828 not self.op.remove_instance):
13829 assert not activate_disks
13830 feedback_fn("Starting instance %s" % instance.name)
13831 result = self.rpc.call_instance_start(src_node,
13832 (instance, None, None), False)
13833 msg = result.fail_msg
13835 feedback_fn("Failed to start instance: %s" % msg)
13836 _ShutdownInstanceDisks(self, instance)
13837 raise errors.OpExecError("Could not start instance: %s" % msg)
13839 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13840 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13841 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13842 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13843 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13845 (key_name, _, _) = self.x509_key_name
13848 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13851 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13852 key_name, dest_ca_pem,
13857 # Check for backwards compatibility
13858 assert len(dresults) == len(instance.disks)
13859 assert compat.all(isinstance(i, bool) for i in dresults), \
13860 "Not all results are boolean: %r" % dresults
13864 feedback_fn("Deactivating disks for %s" % instance.name)
13865 _ShutdownInstanceDisks(self, instance)
13867 if not (compat.all(dresults) and fin_resu):
13870 failures.append("export finalization")
13871 if not compat.all(dresults):
13872 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13874 failures.append("disk export: disk(s) %s" % fdsk)
13876 raise errors.OpExecError("Export failed, errors in %s" %
13877 utils.CommaJoin(failures))
13879 # At this point, the export was successful, we can cleanup/finish
13881 # Remove instance if requested
13882 if self.op.remove_instance:
13883 feedback_fn("Removing instance %s" % instance.name)
13884 _RemoveInstance(self, feedback_fn, instance,
13885 self.op.ignore_remove_failures)
13887 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13888 self._CleanupExports(feedback_fn)
13890 return fin_resu, dresults
13893 class LUBackupRemove(NoHooksLU):
13894 """Remove exports related to the named instance.
13899 def ExpandNames(self):
13900 self.needed_locks = {}
13901 # We need all nodes to be locked in order for RemoveExport to work, but we
13902 # don't need to lock the instance itself, as nothing will happen to it (and
13903 # we can remove exports also for a removed instance)
13904 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13906 def Exec(self, feedback_fn):
13907 """Remove any export.
13910 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13911 # If the instance was not found we'll try with the name that was passed in.
13912 # This will only work if it was an FQDN, though.
13914 if not instance_name:
13916 instance_name = self.op.instance_name
13918 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13919 exportlist = self.rpc.call_export_list(locked_nodes)
13921 for node in exportlist:
13922 msg = exportlist[node].fail_msg
13924 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13926 if instance_name in exportlist[node].payload:
13928 result = self.rpc.call_export_remove(node, instance_name)
13929 msg = result.fail_msg
13931 logging.error("Could not remove export for instance %s"
13932 " on node %s: %s", instance_name, node, msg)
13934 if fqdn_warn and not found:
13935 feedback_fn("Export not found. If trying to remove an export belonging"
13936 " to a deleted instance please use its Fully Qualified"
13940 class LUGroupAdd(LogicalUnit):
13941 """Logical unit for creating node groups.
13944 HPATH = "group-add"
13945 HTYPE = constants.HTYPE_GROUP
13948 def ExpandNames(self):
13949 # We need the new group's UUID here so that we can create and acquire the
13950 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13951 # that it should not check whether the UUID exists in the configuration.
13952 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13953 self.needed_locks = {}
13954 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13956 def CheckPrereq(self):
13957 """Check prerequisites.
13959 This checks that the given group name is not an existing node group
13964 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13965 except errors.OpPrereqError:
13968 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13969 " node group (UUID: %s)" %
13970 (self.op.group_name, existing_uuid),
13971 errors.ECODE_EXISTS)
13973 if self.op.ndparams:
13974 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13976 if self.op.hv_state:
13977 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13979 self.new_hv_state = None
13981 if self.op.disk_state:
13982 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13984 self.new_disk_state = None
13986 if self.op.diskparams:
13987 for templ in constants.DISK_TEMPLATES:
13988 if templ in self.op.diskparams:
13989 utils.ForceDictType(self.op.diskparams[templ],
13990 constants.DISK_DT_TYPES)
13991 self.new_diskparams = self.op.diskparams
13993 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13994 except errors.OpPrereqError, err:
13995 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13996 errors.ECODE_INVAL)
13998 self.new_diskparams = {}
14000 if self.op.ipolicy:
14001 cluster = self.cfg.GetClusterInfo()
14002 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14004 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14005 except errors.ConfigurationError, err:
14006 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14007 errors.ECODE_INVAL)
14009 def BuildHooksEnv(self):
14010 """Build hooks env.
14014 "GROUP_NAME": self.op.group_name,
14017 def BuildHooksNodes(self):
14018 """Build hooks nodes.
14021 mn = self.cfg.GetMasterNode()
14022 return ([mn], [mn])
14024 def Exec(self, feedback_fn):
14025 """Add the node group to the cluster.
14028 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14029 uuid=self.group_uuid,
14030 alloc_policy=self.op.alloc_policy,
14031 ndparams=self.op.ndparams,
14032 diskparams=self.new_diskparams,
14033 ipolicy=self.op.ipolicy,
14034 hv_state_static=self.new_hv_state,
14035 disk_state_static=self.new_disk_state)
14037 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14038 del self.remove_locks[locking.LEVEL_NODEGROUP]
14041 class LUGroupAssignNodes(NoHooksLU):
14042 """Logical unit for assigning nodes to groups.
14047 def ExpandNames(self):
14048 # These raise errors.OpPrereqError on their own:
14049 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14050 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14052 # We want to lock all the affected nodes and groups. We have readily
14053 # available the list of nodes, and the *destination* group. To gather the
14054 # list of "source" groups, we need to fetch node information later on.
14055 self.needed_locks = {
14056 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14057 locking.LEVEL_NODE: self.op.nodes,
14060 def DeclareLocks(self, level):
14061 if level == locking.LEVEL_NODEGROUP:
14062 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14064 # Try to get all affected nodes' groups without having the group or node
14065 # lock yet. Needs verification later in the code flow.
14066 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14068 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14070 def CheckPrereq(self):
14071 """Check prerequisites.
14074 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14075 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14076 frozenset(self.op.nodes))
14078 expected_locks = (set([self.group_uuid]) |
14079 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14080 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14081 if actual_locks != expected_locks:
14082 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14083 " current groups are '%s', used to be '%s'" %
14084 (utils.CommaJoin(expected_locks),
14085 utils.CommaJoin(actual_locks)))
14087 self.node_data = self.cfg.GetAllNodesInfo()
14088 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14089 instance_data = self.cfg.GetAllInstancesInfo()
14091 if self.group is None:
14092 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14093 (self.op.group_name, self.group_uuid))
14095 (new_splits, previous_splits) = \
14096 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14097 for node in self.op.nodes],
14098 self.node_data, instance_data)
14101 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14103 if not self.op.force:
14104 raise errors.OpExecError("The following instances get split by this"
14105 " change and --force was not given: %s" %
14108 self.LogWarning("This operation will split the following instances: %s",
14111 if previous_splits:
14112 self.LogWarning("In addition, these already-split instances continue"
14113 " to be split across groups: %s",
14114 utils.CommaJoin(utils.NiceSort(previous_splits)))
14116 def Exec(self, feedback_fn):
14117 """Assign nodes to a new group.
14120 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14122 self.cfg.AssignGroupNodes(mods)
14125 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14126 """Check for split instances after a node assignment.
14128 This method considers a series of node assignments as an atomic operation,
14129 and returns information about split instances after applying the set of
14132 In particular, it returns information about newly split instances, and
14133 instances that were already split, and remain so after the change.
14135 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14138 @type changes: list of (node_name, new_group_uuid) pairs.
14139 @param changes: list of node assignments to consider.
14140 @param node_data: a dict with data for all nodes
14141 @param instance_data: a dict with all instances to consider
14142 @rtype: a two-tuple
14143 @return: a list of instances that were previously okay and result split as a
14144 consequence of this change, and a list of instances that were previously
14145 split and this change does not fix.
14148 changed_nodes = dict((node, group) for node, group in changes
14149 if node_data[node].group != group)
14151 all_split_instances = set()
14152 previously_split_instances = set()
14154 def InstanceNodes(instance):
14155 return [instance.primary_node] + list(instance.secondary_nodes)
14157 for inst in instance_data.values():
14158 if inst.disk_template not in constants.DTS_INT_MIRROR:
14161 instance_nodes = InstanceNodes(inst)
14163 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14164 previously_split_instances.add(inst.name)
14166 if len(set(changed_nodes.get(node, node_data[node].group)
14167 for node in instance_nodes)) > 1:
14168 all_split_instances.add(inst.name)
14170 return (list(all_split_instances - previously_split_instances),
14171 list(previously_split_instances & all_split_instances))
14174 class _GroupQuery(_QueryBase):
14175 FIELDS = query.GROUP_FIELDS
14177 def ExpandNames(self, lu):
14178 lu.needed_locks = {}
14180 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14181 self._cluster = lu.cfg.GetClusterInfo()
14182 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14185 self.wanted = [name_to_uuid[name]
14186 for name in utils.NiceSort(name_to_uuid.keys())]
14188 # Accept names to be either names or UUIDs.
14191 all_uuid = frozenset(self._all_groups.keys())
14193 for name in self.names:
14194 if name in all_uuid:
14195 self.wanted.append(name)
14196 elif name in name_to_uuid:
14197 self.wanted.append(name_to_uuid[name])
14199 missing.append(name)
14202 raise errors.OpPrereqError("Some groups do not exist: %s" %
14203 utils.CommaJoin(missing),
14204 errors.ECODE_NOENT)
14206 def DeclareLocks(self, lu, level):
14209 def _GetQueryData(self, lu):
14210 """Computes the list of node groups and their attributes.
14213 do_nodes = query.GQ_NODE in self.requested_data
14214 do_instances = query.GQ_INST in self.requested_data
14216 group_to_nodes = None
14217 group_to_instances = None
14219 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14220 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14221 # latter GetAllInstancesInfo() is not enough, for we have to go through
14222 # instance->node. Hence, we will need to process nodes even if we only need
14223 # instance information.
14224 if do_nodes or do_instances:
14225 all_nodes = lu.cfg.GetAllNodesInfo()
14226 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14229 for node in all_nodes.values():
14230 if node.group in group_to_nodes:
14231 group_to_nodes[node.group].append(node.name)
14232 node_to_group[node.name] = node.group
14235 all_instances = lu.cfg.GetAllInstancesInfo()
14236 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14238 for instance in all_instances.values():
14239 node = instance.primary_node
14240 if node in node_to_group:
14241 group_to_instances[node_to_group[node]].append(instance.name)
14244 # Do not pass on node information if it was not requested.
14245 group_to_nodes = None
14247 return query.GroupQueryData(self._cluster,
14248 [self._all_groups[uuid]
14249 for uuid in self.wanted],
14250 group_to_nodes, group_to_instances,
14251 query.GQ_DISKPARAMS in self.requested_data)
14254 class LUGroupQuery(NoHooksLU):
14255 """Logical unit for querying node groups.
14260 def CheckArguments(self):
14261 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14262 self.op.output_fields, False)
14264 def ExpandNames(self):
14265 self.gq.ExpandNames(self)
14267 def DeclareLocks(self, level):
14268 self.gq.DeclareLocks(self, level)
14270 def Exec(self, feedback_fn):
14271 return self.gq.OldStyleQuery(self)
14274 class LUGroupSetParams(LogicalUnit):
14275 """Modifies the parameters of a node group.
14278 HPATH = "group-modify"
14279 HTYPE = constants.HTYPE_GROUP
14282 def CheckArguments(self):
14285 self.op.diskparams,
14286 self.op.alloc_policy,
14288 self.op.disk_state,
14292 if all_changes.count(None) == len(all_changes):
14293 raise errors.OpPrereqError("Please pass at least one modification",
14294 errors.ECODE_INVAL)
14296 def ExpandNames(self):
14297 # This raises errors.OpPrereqError on its own:
14298 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14300 self.needed_locks = {
14301 locking.LEVEL_INSTANCE: [],
14302 locking.LEVEL_NODEGROUP: [self.group_uuid],
14305 self.share_locks[locking.LEVEL_INSTANCE] = 1
14307 def DeclareLocks(self, level):
14308 if level == locking.LEVEL_INSTANCE:
14309 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14311 # Lock instances optimistically, needs verification once group lock has
14313 self.needed_locks[locking.LEVEL_INSTANCE] = \
14314 self.cfg.GetNodeGroupInstances(self.group_uuid)
14317 def _UpdateAndVerifyDiskParams(old, new):
14318 """Updates and verifies disk parameters.
14321 new_params = _GetUpdatedParams(old, new)
14322 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14325 def CheckPrereq(self):
14326 """Check prerequisites.
14329 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14331 # Check if locked instances are still correct
14332 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14334 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14335 cluster = self.cfg.GetClusterInfo()
14337 if self.group is None:
14338 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14339 (self.op.group_name, self.group_uuid))
14341 if self.op.ndparams:
14342 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14343 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14344 self.new_ndparams = new_ndparams
14346 if self.op.diskparams:
14347 diskparams = self.group.diskparams
14348 uavdp = self._UpdateAndVerifyDiskParams
14349 # For each disktemplate subdict update and verify the values
14350 new_diskparams = dict((dt,
14351 uavdp(diskparams.get(dt, {}),
14352 self.op.diskparams[dt]))
14353 for dt in constants.DISK_TEMPLATES
14354 if dt in self.op.diskparams)
14355 # As we've all subdicts of diskparams ready, lets merge the actual
14356 # dict with all updated subdicts
14357 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14359 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14360 except errors.OpPrereqError, err:
14361 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14362 errors.ECODE_INVAL)
14364 if self.op.hv_state:
14365 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14366 self.group.hv_state_static)
14368 if self.op.disk_state:
14369 self.new_disk_state = \
14370 _MergeAndVerifyDiskState(self.op.disk_state,
14371 self.group.disk_state_static)
14373 if self.op.ipolicy:
14374 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14378 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14379 inst_filter = lambda inst: inst.name in owned_instances
14380 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14381 gmi = ganeti.masterd.instance
14383 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14385 new_ipolicy, instances)
14388 self.LogWarning("After the ipolicy change the following instances"
14389 " violate them: %s",
14390 utils.CommaJoin(violations))
14392 def BuildHooksEnv(self):
14393 """Build hooks env.
14397 "GROUP_NAME": self.op.group_name,
14398 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14401 def BuildHooksNodes(self):
14402 """Build hooks nodes.
14405 mn = self.cfg.GetMasterNode()
14406 return ([mn], [mn])
14408 def Exec(self, feedback_fn):
14409 """Modifies the node group.
14414 if self.op.ndparams:
14415 self.group.ndparams = self.new_ndparams
14416 result.append(("ndparams", str(self.group.ndparams)))
14418 if self.op.diskparams:
14419 self.group.diskparams = self.new_diskparams
14420 result.append(("diskparams", str(self.group.diskparams)))
14422 if self.op.alloc_policy:
14423 self.group.alloc_policy = self.op.alloc_policy
14425 if self.op.hv_state:
14426 self.group.hv_state_static = self.new_hv_state
14428 if self.op.disk_state:
14429 self.group.disk_state_static = self.new_disk_state
14431 if self.op.ipolicy:
14432 self.group.ipolicy = self.new_ipolicy
14434 self.cfg.Update(self.group, feedback_fn)
14438 class LUGroupRemove(LogicalUnit):
14439 HPATH = "group-remove"
14440 HTYPE = constants.HTYPE_GROUP
14443 def ExpandNames(self):
14444 # This will raises errors.OpPrereqError on its own:
14445 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14446 self.needed_locks = {
14447 locking.LEVEL_NODEGROUP: [self.group_uuid],
14450 def CheckPrereq(self):
14451 """Check prerequisites.
14453 This checks that the given group name exists as a node group, that is
14454 empty (i.e., contains no nodes), and that is not the last group of the
14458 # Verify that the group is empty.
14459 group_nodes = [node.name
14460 for node in self.cfg.GetAllNodesInfo().values()
14461 if node.group == self.group_uuid]
14464 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14466 (self.op.group_name,
14467 utils.CommaJoin(utils.NiceSort(group_nodes))),
14468 errors.ECODE_STATE)
14470 # Verify the cluster would not be left group-less.
14471 if len(self.cfg.GetNodeGroupList()) == 1:
14472 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14473 " removed" % self.op.group_name,
14474 errors.ECODE_STATE)
14476 def BuildHooksEnv(self):
14477 """Build hooks env.
14481 "GROUP_NAME": self.op.group_name,
14484 def BuildHooksNodes(self):
14485 """Build hooks nodes.
14488 mn = self.cfg.GetMasterNode()
14489 return ([mn], [mn])
14491 def Exec(self, feedback_fn):
14492 """Remove the node group.
14496 self.cfg.RemoveNodeGroup(self.group_uuid)
14497 except errors.ConfigurationError:
14498 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14499 (self.op.group_name, self.group_uuid))
14501 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14504 class LUGroupRename(LogicalUnit):
14505 HPATH = "group-rename"
14506 HTYPE = constants.HTYPE_GROUP
14509 def ExpandNames(self):
14510 # This raises errors.OpPrereqError on its own:
14511 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14513 self.needed_locks = {
14514 locking.LEVEL_NODEGROUP: [self.group_uuid],
14517 def CheckPrereq(self):
14518 """Check prerequisites.
14520 Ensures requested new name is not yet used.
14524 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14525 except errors.OpPrereqError:
14528 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14529 " node group (UUID: %s)" %
14530 (self.op.new_name, new_name_uuid),
14531 errors.ECODE_EXISTS)
14533 def BuildHooksEnv(self):
14534 """Build hooks env.
14538 "OLD_NAME": self.op.group_name,
14539 "NEW_NAME": self.op.new_name,
14542 def BuildHooksNodes(self):
14543 """Build hooks nodes.
14546 mn = self.cfg.GetMasterNode()
14548 all_nodes = self.cfg.GetAllNodesInfo()
14549 all_nodes.pop(mn, None)
14552 run_nodes.extend(node.name for node in all_nodes.values()
14553 if node.group == self.group_uuid)
14555 return (run_nodes, run_nodes)
14557 def Exec(self, feedback_fn):
14558 """Rename the node group.
14561 group = self.cfg.GetNodeGroup(self.group_uuid)
14564 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14565 (self.op.group_name, self.group_uuid))
14567 group.name = self.op.new_name
14568 self.cfg.Update(group, feedback_fn)
14570 return self.op.new_name
14573 class LUGroupEvacuate(LogicalUnit):
14574 HPATH = "group-evacuate"
14575 HTYPE = constants.HTYPE_GROUP
14578 def ExpandNames(self):
14579 # This raises errors.OpPrereqError on its own:
14580 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14582 if self.op.target_groups:
14583 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14584 self.op.target_groups)
14586 self.req_target_uuids = []
14588 if self.group_uuid in self.req_target_uuids:
14589 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14590 " as a target group (targets are %s)" %
14592 utils.CommaJoin(self.req_target_uuids)),
14593 errors.ECODE_INVAL)
14595 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14597 self.share_locks = _ShareAll()
14598 self.needed_locks = {
14599 locking.LEVEL_INSTANCE: [],
14600 locking.LEVEL_NODEGROUP: [],
14601 locking.LEVEL_NODE: [],
14604 def DeclareLocks(self, level):
14605 if level == locking.LEVEL_INSTANCE:
14606 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14608 # Lock instances optimistically, needs verification once node and group
14609 # locks have been acquired
14610 self.needed_locks[locking.LEVEL_INSTANCE] = \
14611 self.cfg.GetNodeGroupInstances(self.group_uuid)
14613 elif level == locking.LEVEL_NODEGROUP:
14614 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14616 if self.req_target_uuids:
14617 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14619 # Lock all groups used by instances optimistically; this requires going
14620 # via the node before it's locked, requiring verification later on
14621 lock_groups.update(group_uuid
14622 for instance_name in
14623 self.owned_locks(locking.LEVEL_INSTANCE)
14625 self.cfg.GetInstanceNodeGroups(instance_name))
14627 # No target groups, need to lock all of them
14628 lock_groups = locking.ALL_SET
14630 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14632 elif level == locking.LEVEL_NODE:
14633 # This will only lock the nodes in the group to be evacuated which
14634 # contain actual instances
14635 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14636 self._LockInstancesNodes()
14638 # Lock all nodes in group to be evacuated and target groups
14639 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14640 assert self.group_uuid in owned_groups
14641 member_nodes = [node_name
14642 for group in owned_groups
14643 for node_name in self.cfg.GetNodeGroup(group).members]
14644 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14646 def CheckPrereq(self):
14647 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14648 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14649 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14651 assert owned_groups.issuperset(self.req_target_uuids)
14652 assert self.group_uuid in owned_groups
14654 # Check if locked instances are still correct
14655 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14657 # Get instance information
14658 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14660 # Check if node groups for locked instances are still correct
14661 _CheckInstancesNodeGroups(self.cfg, self.instances,
14662 owned_groups, owned_nodes, self.group_uuid)
14664 if self.req_target_uuids:
14665 # User requested specific target groups
14666 self.target_uuids = self.req_target_uuids
14668 # All groups except the one to be evacuated are potential targets
14669 self.target_uuids = [group_uuid for group_uuid in owned_groups
14670 if group_uuid != self.group_uuid]
14672 if not self.target_uuids:
14673 raise errors.OpPrereqError("There are no possible target groups",
14674 errors.ECODE_INVAL)
14676 def BuildHooksEnv(self):
14677 """Build hooks env.
14681 "GROUP_NAME": self.op.group_name,
14682 "TARGET_GROUPS": " ".join(self.target_uuids),
14685 def BuildHooksNodes(self):
14686 """Build hooks nodes.
14689 mn = self.cfg.GetMasterNode()
14691 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14693 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14695 return (run_nodes, run_nodes)
14697 def Exec(self, feedback_fn):
14698 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14700 assert self.group_uuid not in self.target_uuids
14702 req = iallocator.IAReqGroupChange(instances=instances,
14703 target_groups=self.target_uuids)
14704 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14706 ial.Run(self.op.iallocator)
14708 if not ial.success:
14709 raise errors.OpPrereqError("Can't compute group evacuation using"
14710 " iallocator '%s': %s" %
14711 (self.op.iallocator, ial.info),
14712 errors.ECODE_NORES)
14714 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14716 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14717 len(jobs), self.op.group_name)
14719 return ResultWithJobs(jobs)
14722 class TagsLU(NoHooksLU): # pylint: disable=W0223
14723 """Generic tags LU.
14725 This is an abstract class which is the parent of all the other tags LUs.
14728 def ExpandNames(self):
14729 self.group_uuid = None
14730 self.needed_locks = {}
14732 if self.op.kind == constants.TAG_NODE:
14733 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14734 lock_level = locking.LEVEL_NODE
14735 lock_name = self.op.name
14736 elif self.op.kind == constants.TAG_INSTANCE:
14737 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14738 lock_level = locking.LEVEL_INSTANCE
14739 lock_name = self.op.name
14740 elif self.op.kind == constants.TAG_NODEGROUP:
14741 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14742 lock_level = locking.LEVEL_NODEGROUP
14743 lock_name = self.group_uuid
14748 if lock_level and getattr(self.op, "use_locking", True):
14749 self.needed_locks[lock_level] = lock_name
14751 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14752 # not possible to acquire the BGL based on opcode parameters)
14754 def CheckPrereq(self):
14755 """Check prerequisites.
14758 if self.op.kind == constants.TAG_CLUSTER:
14759 self.target = self.cfg.GetClusterInfo()
14760 elif self.op.kind == constants.TAG_NODE:
14761 self.target = self.cfg.GetNodeInfo(self.op.name)
14762 elif self.op.kind == constants.TAG_INSTANCE:
14763 self.target = self.cfg.GetInstanceInfo(self.op.name)
14764 elif self.op.kind == constants.TAG_NODEGROUP:
14765 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14767 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14768 str(self.op.kind), errors.ECODE_INVAL)
14771 class LUTagsGet(TagsLU):
14772 """Returns the tags of a given object.
14777 def ExpandNames(self):
14778 TagsLU.ExpandNames(self)
14780 # Share locks as this is only a read operation
14781 self.share_locks = _ShareAll()
14783 def Exec(self, feedback_fn):
14784 """Returns the tag list.
14787 return list(self.target.GetTags())
14790 class LUTagsSearch(NoHooksLU):
14791 """Searches the tags for a given pattern.
14796 def ExpandNames(self):
14797 self.needed_locks = {}
14799 def CheckPrereq(self):
14800 """Check prerequisites.
14802 This checks the pattern passed for validity by compiling it.
14806 self.re = re.compile(self.op.pattern)
14807 except re.error, err:
14808 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14809 (self.op.pattern, err), errors.ECODE_INVAL)
14811 def Exec(self, feedback_fn):
14812 """Returns the tag list.
14816 tgts = [("/cluster", cfg.GetClusterInfo())]
14817 ilist = cfg.GetAllInstancesInfo().values()
14818 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14819 nlist = cfg.GetAllNodesInfo().values()
14820 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14821 tgts.extend(("/nodegroup/%s" % n.name, n)
14822 for n in cfg.GetAllNodeGroupsInfo().values())
14824 for path, target in tgts:
14825 for tag in target.GetTags():
14826 if self.re.search(tag):
14827 results.append((path, tag))
14831 class LUTagsSet(TagsLU):
14832 """Sets a tag on a given object.
14837 def CheckPrereq(self):
14838 """Check prerequisites.
14840 This checks the type and length of the tag name and value.
14843 TagsLU.CheckPrereq(self)
14844 for tag in self.op.tags:
14845 objects.TaggableObject.ValidateTag(tag)
14847 def Exec(self, feedback_fn):
14852 for tag in self.op.tags:
14853 self.target.AddTag(tag)
14854 except errors.TagError, err:
14855 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14856 self.cfg.Update(self.target, feedback_fn)
14859 class LUTagsDel(TagsLU):
14860 """Delete a list of tags from a given object.
14865 def CheckPrereq(self):
14866 """Check prerequisites.
14868 This checks that we have the given tag.
14871 TagsLU.CheckPrereq(self)
14872 for tag in self.op.tags:
14873 objects.TaggableObject.ValidateTag(tag)
14874 del_tags = frozenset(self.op.tags)
14875 cur_tags = self.target.GetTags()
14877 diff_tags = del_tags - cur_tags
14879 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14880 raise errors.OpPrereqError("Tag(s) %s not found" %
14881 (utils.CommaJoin(diff_names), ),
14882 errors.ECODE_NOENT)
14884 def Exec(self, feedback_fn):
14885 """Remove the tag from the object.
14888 for tag in self.op.tags:
14889 self.target.RemoveTag(tag)
14890 self.cfg.Update(self.target, feedback_fn)
14893 class LUTestDelay(NoHooksLU):
14894 """Sleep for a specified amount of time.
14896 This LU sleeps on the master and/or nodes for a specified amount of
14902 def ExpandNames(self):
14903 """Expand names and set required locks.
14905 This expands the node list, if any.
14908 self.needed_locks = {}
14909 if self.op.on_nodes:
14910 # _GetWantedNodes can be used here, but is not always appropriate to use
14911 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14912 # more information.
14913 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14914 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14916 def _TestDelay(self):
14917 """Do the actual sleep.
14920 if self.op.on_master:
14921 if not utils.TestDelay(self.op.duration):
14922 raise errors.OpExecError("Error during master delay test")
14923 if self.op.on_nodes:
14924 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14925 for node, node_result in result.items():
14926 node_result.Raise("Failure during rpc call to node %s" % node)
14928 def Exec(self, feedback_fn):
14929 """Execute the test delay opcode, with the wanted repetitions.
14932 if self.op.repeat == 0:
14935 top_value = self.op.repeat - 1
14936 for i in range(self.op.repeat):
14937 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14941 class LUTestJqueue(NoHooksLU):
14942 """Utility LU to test some aspects of the job queue.
14947 # Must be lower than default timeout for WaitForJobChange to see whether it
14948 # notices changed jobs
14949 _CLIENT_CONNECT_TIMEOUT = 20.0
14950 _CLIENT_CONFIRM_TIMEOUT = 60.0
14953 def _NotifyUsingSocket(cls, cb, errcls):
14954 """Opens a Unix socket and waits for another program to connect.
14957 @param cb: Callback to send socket name to client
14958 @type errcls: class
14959 @param errcls: Exception class to use for errors
14962 # Using a temporary directory as there's no easy way to create temporary
14963 # sockets without writing a custom loop around tempfile.mktemp and
14965 tmpdir = tempfile.mkdtemp()
14967 tmpsock = utils.PathJoin(tmpdir, "sock")
14969 logging.debug("Creating temporary socket at %s", tmpsock)
14970 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14975 # Send details to client
14978 # Wait for client to connect before continuing
14979 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14981 (conn, _) = sock.accept()
14982 except socket.error, err:
14983 raise errcls("Client didn't connect in time (%s)" % err)
14987 # Remove as soon as client is connected
14988 shutil.rmtree(tmpdir)
14990 # Wait for client to close
14993 # pylint: disable=E1101
14994 # Instance of '_socketobject' has no ... member
14995 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14997 except socket.error, err:
14998 raise errcls("Client failed to confirm notification (%s)" % err)
15002 def _SendNotification(self, test, arg, sockname):
15003 """Sends a notification to the client.
15006 @param test: Test name
15007 @param arg: Test argument (depends on test)
15008 @type sockname: string
15009 @param sockname: Socket path
15012 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15014 def _Notify(self, prereq, test, arg):
15015 """Notifies the client of a test.
15018 @param prereq: Whether this is a prereq-phase test
15020 @param test: Test name
15021 @param arg: Test argument (depends on test)
15025 errcls = errors.OpPrereqError
15027 errcls = errors.OpExecError
15029 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15033 def CheckArguments(self):
15034 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15035 self.expandnames_calls = 0
15037 def ExpandNames(self):
15038 checkargs_calls = getattr(self, "checkargs_calls", 0)
15039 if checkargs_calls < 1:
15040 raise errors.ProgrammerError("CheckArguments was not called")
15042 self.expandnames_calls += 1
15044 if self.op.notify_waitlock:
15045 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15047 self.LogInfo("Expanding names")
15049 # Get lock on master node (just to get a lock, not for a particular reason)
15050 self.needed_locks = {
15051 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15054 def Exec(self, feedback_fn):
15055 if self.expandnames_calls < 1:
15056 raise errors.ProgrammerError("ExpandNames was not called")
15058 if self.op.notify_exec:
15059 self._Notify(False, constants.JQT_EXEC, None)
15061 self.LogInfo("Executing")
15063 if self.op.log_messages:
15064 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15065 for idx, msg in enumerate(self.op.log_messages):
15066 self.LogInfo("Sending log message %s", idx + 1)
15067 feedback_fn(constants.JQT_MSGPREFIX + msg)
15068 # Report how many test messages have been sent
15069 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15072 raise errors.OpExecError("Opcode failure was requested")
15077 class LUTestAllocator(NoHooksLU):
15078 """Run allocator tests.
15080 This LU runs the allocator tests
15083 def CheckPrereq(self):
15084 """Check prerequisites.
15086 This checks the opcode parameters depending on the director and mode test.
15089 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15090 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15091 for attr in ["memory", "disks", "disk_template",
15092 "os", "tags", "nics", "vcpus"]:
15093 if not hasattr(self.op, attr):
15094 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15095 attr, errors.ECODE_INVAL)
15096 iname = self.cfg.ExpandInstanceName(self.op.name)
15097 if iname is not None:
15098 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15099 iname, errors.ECODE_EXISTS)
15100 if not isinstance(self.op.nics, list):
15101 raise errors.OpPrereqError("Invalid parameter 'nics'",
15102 errors.ECODE_INVAL)
15103 if not isinstance(self.op.disks, list):
15104 raise errors.OpPrereqError("Invalid parameter 'disks'",
15105 errors.ECODE_INVAL)
15106 for row in self.op.disks:
15107 if (not isinstance(row, dict) or
15108 constants.IDISK_SIZE not in row or
15109 not isinstance(row[constants.IDISK_SIZE], int) or
15110 constants.IDISK_MODE not in row or
15111 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15112 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15113 " parameter", errors.ECODE_INVAL)
15114 if self.op.hypervisor is None:
15115 self.op.hypervisor = self.cfg.GetHypervisorType()
15116 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15117 fname = _ExpandInstanceName(self.cfg, self.op.name)
15118 self.op.name = fname
15119 self.relocate_from = \
15120 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15121 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15122 constants.IALLOCATOR_MODE_NODE_EVAC):
15123 if not self.op.instances:
15124 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15125 self.op.instances = _GetWantedInstances(self, self.op.instances)
15127 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15128 self.op.mode, errors.ECODE_INVAL)
15130 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15131 if self.op.allocator is None:
15132 raise errors.OpPrereqError("Missing allocator name",
15133 errors.ECODE_INVAL)
15134 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15135 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15136 self.op.direction, errors.ECODE_INVAL)
15138 def Exec(self, feedback_fn):
15139 """Run the allocator test.
15142 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15143 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15144 memory=self.op.memory,
15145 disks=self.op.disks,
15146 disk_template=self.op.disk_template,
15150 vcpus=self.op.vcpus,
15151 spindle_use=self.op.spindle_use,
15152 hypervisor=self.op.hypervisor)
15153 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15154 req = iallocator.IAReqRelocate(name=self.op.name,
15155 relocate_from=list(self.relocate_from))
15156 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15157 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15158 target_groups=self.op.target_groups)
15159 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15160 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15161 evac_mode=self.op.evac_mode)
15162 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15163 disk_template = self.op.disk_template
15164 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15165 memory=self.op.memory,
15166 disks=self.op.disks,
15167 disk_template=disk_template,
15171 vcpus=self.op.vcpus,
15172 spindle_use=self.op.spindle_use,
15173 hypervisor=self.op.hypervisor)
15174 for idx in range(self.op.count)]
15175 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15177 raise errors.ProgrammerError("Uncatched mode %s in"
15178 " LUTestAllocator.Exec", self.op.mode)
15180 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15181 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15182 result = ial.in_text
15184 ial.Run(self.op.allocator, validate=False)
15185 result = ial.out_text
15189 #: Query type implementations
15191 constants.QR_CLUSTER: _ClusterQuery,
15192 constants.QR_INSTANCE: _InstanceQuery,
15193 constants.QR_NODE: _NodeQuery,
15194 constants.QR_GROUP: _GroupQuery,
15195 constants.QR_OS: _OsQuery,
15196 constants.QR_EXPORT: _ExportQuery,
15199 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15202 def _GetQueryImplementation(name):
15203 """Returns the implemtnation for a query type.
15205 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15209 return _QUERY_IMPL[name]
15211 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15212 errors.ECODE_INVAL)