4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti.masterd import iallocator
65 import ganeti.masterd.instance # pylint: disable=W0611
69 INSTANCE_DOWN = [constants.ADMINST_DOWN]
70 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
71 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
73 #: Instance status in which an instance can be marked as offline/online
74 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
75 constants.ADMINST_OFFLINE,
80 """Data container for LU results with jobs.
82 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
83 by L{mcpu._ProcessResult}. The latter will then submit the jobs
84 contained in the C{jobs} attribute and include the job IDs in the opcode
88 def __init__(self, jobs, **kwargs):
89 """Initializes this class.
91 Additional return values can be specified as keyword arguments.
93 @type jobs: list of lists of L{opcode.OpCode}
94 @param jobs: A list of lists of opcode objects
101 class LogicalUnit(object):
102 """Logical Unit base class.
104 Subclasses must follow these rules:
105 - implement ExpandNames
106 - implement CheckPrereq (except when tasklets are used)
107 - implement Exec (except when tasklets are used)
108 - implement BuildHooksEnv
109 - implement BuildHooksNodes
110 - redefine HPATH and HTYPE
111 - optionally redefine their run requirements:
112 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
114 Note that all commands require root permissions.
116 @ivar dry_run_result: the value (if any) that will be returned to the caller
117 in dry-run mode (signalled by opcode dry_run parameter)
124 def __init__(self, processor, op, context, rpc_runner):
125 """Constructor for LogicalUnit.
127 This needs to be overridden in derived classes in order to check op
131 self.proc = processor
133 self.cfg = context.cfg
134 self.glm = context.glm
136 self.owned_locks = context.glm.list_owned
137 self.context = context
138 self.rpc = rpc_runner
139 # Dicts used to declare locking needs to mcpu
140 self.needed_locks = None
141 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
143 self.remove_locks = {}
144 # Used to force good behavior when calling helper functions
145 self.recalculate_locks = {}
147 self.Log = processor.Log # pylint: disable=C0103
148 self.LogWarning = processor.LogWarning # pylint: disable=C0103
149 self.LogInfo = processor.LogInfo # pylint: disable=C0103
150 self.LogStep = processor.LogStep # pylint: disable=C0103
151 # support for dry-run
152 self.dry_run_result = None
153 # support for generic debug attribute
154 if (not hasattr(self.op, "debug_level") or
155 not isinstance(self.op.debug_level, int)):
156 self.op.debug_level = 0
161 # Validate opcode parameters and set defaults
162 self.op.Validate(True)
164 self.CheckArguments()
166 def CheckArguments(self):
167 """Check syntactic validity for the opcode arguments.
169 This method is for doing a simple syntactic check and ensure
170 validity of opcode parameters, without any cluster-related
171 checks. While the same can be accomplished in ExpandNames and/or
172 CheckPrereq, doing these separate is better because:
174 - ExpandNames is left as as purely a lock-related function
175 - CheckPrereq is run after we have acquired locks (and possible
178 The function is allowed to change the self.op attribute so that
179 later methods can no longer worry about missing parameters.
184 def ExpandNames(self):
185 """Expand names for this LU.
187 This method is called before starting to execute the opcode, and it should
188 update all the parameters of the opcode to their canonical form (e.g. a
189 short node name must be fully expanded after this method has successfully
190 completed). This way locking, hooks, logging, etc. can work correctly.
192 LUs which implement this method must also populate the self.needed_locks
193 member, as a dict with lock levels as keys, and a list of needed lock names
196 - use an empty dict if you don't need any lock
197 - if you don't need any lock at a particular level omit that
198 level (note that in this case C{DeclareLocks} won't be called
199 at all for that level)
200 - if you need locks at a level, but you can't calculate it in
201 this function, initialise that level with an empty list and do
202 further processing in L{LogicalUnit.DeclareLocks} (see that
203 function's docstring)
204 - don't put anything for the BGL level
205 - if you want all locks at a level use L{locking.ALL_SET} as a value
207 If you need to share locks (rather than acquire them exclusively) at one
208 level you can modify self.share_locks, setting a true value (usually 1) for
209 that level. By default locks are not shared.
211 This function can also define a list of tasklets, which then will be
212 executed in order instead of the usual LU-level CheckPrereq and Exec
213 functions, if those are not defined by the LU.
217 # Acquire all nodes and one instance
218 self.needed_locks = {
219 locking.LEVEL_NODE: locking.ALL_SET,
220 locking.LEVEL_INSTANCE: ['instance1.example.com'],
222 # Acquire just two nodes
223 self.needed_locks = {
224 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
227 self.needed_locks = {} # No, you can't leave it to the default value None
230 # The implementation of this method is mandatory only if the new LU is
231 # concurrent, so that old LUs don't need to be changed all at the same
234 self.needed_locks = {} # Exclusive LUs don't need locks.
236 raise NotImplementedError
238 def DeclareLocks(self, level):
239 """Declare LU locking needs for a level
241 While most LUs can just declare their locking needs at ExpandNames time,
242 sometimes there's the need to calculate some locks after having acquired
243 the ones before. This function is called just before acquiring locks at a
244 particular level, but after acquiring the ones at lower levels, and permits
245 such calculations. It can be used to modify self.needed_locks, and by
246 default it does nothing.
248 This function is only called if you have something already set in
249 self.needed_locks for the level.
251 @param level: Locking level which is going to be locked
252 @type level: member of L{ganeti.locking.LEVELS}
256 def CheckPrereq(self):
257 """Check prerequisites for this LU.
259 This method should check that the prerequisites for the execution
260 of this LU are fulfilled. It can do internode communication, but
261 it should be idempotent - no cluster or system changes are
264 The method should raise errors.OpPrereqError in case something is
265 not fulfilled. Its return value is ignored.
267 This method should also update all the parameters of the opcode to
268 their canonical form if it hasn't been done by ExpandNames before.
271 if self.tasklets is not None:
272 for (idx, tl) in enumerate(self.tasklets):
273 logging.debug("Checking prerequisites for tasklet %s/%s",
274 idx + 1, len(self.tasklets))
279 def Exec(self, feedback_fn):
282 This method should implement the actual work. It should raise
283 errors.OpExecError for failures that are somewhat dealt with in
287 if self.tasklets is not None:
288 for (idx, tl) in enumerate(self.tasklets):
289 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
292 raise NotImplementedError
294 def BuildHooksEnv(self):
295 """Build hooks environment for this LU.
298 @return: Dictionary containing the environment that will be used for
299 running the hooks for this LU. The keys of the dict must not be prefixed
300 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
301 will extend the environment with additional variables. If no environment
302 should be defined, an empty dictionary should be returned (not C{None}).
303 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
307 raise NotImplementedError
309 def BuildHooksNodes(self):
310 """Build list of nodes to run LU's hooks.
312 @rtype: tuple; (list, list)
313 @return: Tuple containing a list of node names on which the hook
314 should run before the execution and a list of node names on which the
315 hook should run after the execution. No nodes should be returned as an
316 empty list (and not None).
317 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
321 raise NotImplementedError
323 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
324 """Notify the LU about the results of its hooks.
326 This method is called every time a hooks phase is executed, and notifies
327 the Logical Unit about the hooks' result. The LU can then use it to alter
328 its result based on the hooks. By default the method does nothing and the
329 previous result is passed back unchanged but any LU can define it if it
330 wants to use the local cluster hook-scripts somehow.
332 @param phase: one of L{constants.HOOKS_PHASE_POST} or
333 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
334 @param hook_results: the results of the multi-node hooks rpc call
335 @param feedback_fn: function used send feedback back to the caller
336 @param lu_result: the previous Exec result this LU had, or None
338 @return: the new Exec result, based on the previous result
342 # API must be kept, thus we ignore the unused argument and could
343 # be a function warnings
344 # pylint: disable=W0613,R0201
347 def _ExpandAndLockInstance(self):
348 """Helper function to expand and lock an instance.
350 Many LUs that work on an instance take its name in self.op.instance_name
351 and need to expand it and then declare the expanded name for locking. This
352 function does it, and then updates self.op.instance_name to the expanded
353 name. It also initializes needed_locks as a dict, if this hasn't been done
357 if self.needed_locks is None:
358 self.needed_locks = {}
360 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
361 "_ExpandAndLockInstance called with instance-level locks set"
362 self.op.instance_name = _ExpandInstanceName(self.cfg,
363 self.op.instance_name)
364 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
366 def _LockInstancesNodes(self, primary_only=False,
367 level=locking.LEVEL_NODE):
368 """Helper function to declare instances' nodes for locking.
370 This function should be called after locking one or more instances to lock
371 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
372 with all primary or secondary nodes for instances already locked and
373 present in self.needed_locks[locking.LEVEL_INSTANCE].
375 It should be called from DeclareLocks, and for safety only works if
376 self.recalculate_locks[locking.LEVEL_NODE] is set.
378 In the future it may grow parameters to just lock some instance's nodes, or
379 to just lock primaries or secondary nodes, if needed.
381 If should be called in DeclareLocks in a way similar to::
383 if level == locking.LEVEL_NODE:
384 self._LockInstancesNodes()
386 @type primary_only: boolean
387 @param primary_only: only lock primary nodes of locked instances
388 @param level: Which lock level to use for locking nodes
391 assert level in self.recalculate_locks, \
392 "_LockInstancesNodes helper function called with no nodes to recalculate"
394 # TODO: check if we're really been called with the instance locks held
396 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
397 # future we might want to have different behaviors depending on the value
398 # of self.recalculate_locks[locking.LEVEL_NODE]
400 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
401 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
402 wanted_nodes.append(instance.primary_node)
404 wanted_nodes.extend(instance.secondary_nodes)
406 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
407 self.needed_locks[level] = wanted_nodes
408 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
409 self.needed_locks[level].extend(wanted_nodes)
411 raise errors.ProgrammerError("Unknown recalculation mode")
413 del self.recalculate_locks[level]
416 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
417 """Simple LU which runs no hooks.
419 This LU is intended as a parent for other LogicalUnits which will
420 run no hooks, in order to reduce duplicate code.
426 def BuildHooksEnv(self):
427 """Empty BuildHooksEnv for NoHooksLu.
429 This just raises an error.
432 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
434 def BuildHooksNodes(self):
435 """Empty BuildHooksNodes for NoHooksLU.
438 raise AssertionError("BuildHooksNodes called for NoHooksLU")
442 """Tasklet base class.
444 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
445 they can mix legacy code with tasklets. Locking needs to be done in the LU,
446 tasklets know nothing about locks.
448 Subclasses must follow these rules:
449 - Implement CheckPrereq
453 def __init__(self, lu):
460 def CheckPrereq(self):
461 """Check prerequisites for this tasklets.
463 This method should check whether the prerequisites for the execution of
464 this tasklet are fulfilled. It can do internode communication, but it
465 should be idempotent - no cluster or system changes are allowed.
467 The method should raise errors.OpPrereqError in case something is not
468 fulfilled. Its return value is ignored.
470 This method should also update all parameters to their canonical form if it
471 hasn't been done before.
476 def Exec(self, feedback_fn):
477 """Execute the tasklet.
479 This method should implement the actual work. It should raise
480 errors.OpExecError for failures that are somewhat dealt with in code, or
484 raise NotImplementedError
488 """Base for query utility classes.
491 #: Attribute holding field definitions
497 def __init__(self, qfilter, fields, use_locking):
498 """Initializes this class.
501 self.use_locking = use_locking
503 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
504 namefield=self.SORT_FIELD)
505 self.requested_data = self.query.RequestedData()
506 self.names = self.query.RequestedNames()
508 # Sort only if no names were requested
509 self.sort_by_name = not self.names
511 self.do_locking = None
514 def _GetNames(self, lu, all_names, lock_level):
515 """Helper function to determine names asked for in the query.
519 names = lu.owned_locks(lock_level)
523 if self.wanted == locking.ALL_SET:
524 assert not self.names
525 # caller didn't specify names, so ordering is not important
526 return utils.NiceSort(names)
528 # caller specified names and we must keep the same order
530 assert not self.do_locking or lu.glm.is_owned(lock_level)
532 missing = set(self.wanted).difference(names)
534 raise errors.OpExecError("Some items were removed before retrieving"
535 " their data: %s" % missing)
537 # Return expanded names
540 def ExpandNames(self, lu):
541 """Expand names for this query.
543 See L{LogicalUnit.ExpandNames}.
546 raise NotImplementedError()
548 def DeclareLocks(self, lu, level):
549 """Declare locks for this query.
551 See L{LogicalUnit.DeclareLocks}.
554 raise NotImplementedError()
556 def _GetQueryData(self, lu):
557 """Collects all data for this query.
559 @return: Query data object
562 raise NotImplementedError()
564 def NewStyleQuery(self, lu):
565 """Collect data and execute query.
568 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
569 sort_by_name=self.sort_by_name)
571 def OldStyleQuery(self, lu):
572 """Collect data and execute query.
575 return self.query.OldStyleQuery(self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
580 """Returns a dict declaring all lock levels shared.
583 return dict.fromkeys(locking.LEVELS, 1)
586 def _AnnotateDiskParams(instance, devs, cfg):
587 """Little helper wrapper to the rpc annotation method.
589 @param instance: The instance object
590 @type devs: List of L{objects.Disk}
591 @param devs: The root devices (not any of its children!)
592 @param cfg: The config object
593 @returns The annotated disk copies
594 @see L{rpc.AnnotateDiskParams}
597 return rpc.AnnotateDiskParams(instance.disk_template, devs,
598 cfg.GetInstanceDiskParams(instance))
601 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
603 """Checks if node groups for locked instances are still correct.
605 @type cfg: L{config.ConfigWriter}
606 @param cfg: Cluster configuration
607 @type instances: dict; string as key, L{objects.Instance} as value
608 @param instances: Dictionary, instance name as key, instance object as value
609 @type owned_groups: iterable of string
610 @param owned_groups: List of owned groups
611 @type owned_nodes: iterable of string
612 @param owned_nodes: List of owned nodes
613 @type cur_group_uuid: string or None
614 @param cur_group_uuid: Optional group UUID to check against instance's groups
617 for (name, inst) in instances.items():
618 assert owned_nodes.issuperset(inst.all_nodes), \
619 "Instance %s's nodes changed while we kept the lock" % name
621 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
623 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
624 "Instance %s has no node in group %s" % (name, cur_group_uuid)
627 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
629 """Checks if the owned node groups are still correct for an instance.
631 @type cfg: L{config.ConfigWriter}
632 @param cfg: The cluster configuration
633 @type instance_name: string
634 @param instance_name: Instance name
635 @type owned_groups: set or frozenset
636 @param owned_groups: List of currently owned node groups
637 @type primary_only: boolean
638 @param primary_only: Whether to check node groups for only the primary node
641 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
643 if not owned_groups.issuperset(inst_groups):
644 raise errors.OpPrereqError("Instance %s's node groups changed since"
645 " locks were acquired, current groups are"
646 " are '%s', owning groups '%s'; retry the"
649 utils.CommaJoin(inst_groups),
650 utils.CommaJoin(owned_groups)),
656 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
657 """Checks if the instances in a node group are still correct.
659 @type cfg: L{config.ConfigWriter}
660 @param cfg: The cluster configuration
661 @type group_uuid: string
662 @param group_uuid: Node group UUID
663 @type owned_instances: set or frozenset
664 @param owned_instances: List of currently owned instances
667 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
668 if owned_instances != wanted_instances:
669 raise errors.OpPrereqError("Instances in node group '%s' changed since"
670 " locks were acquired, wanted '%s', have '%s';"
671 " retry the operation" %
673 utils.CommaJoin(wanted_instances),
674 utils.CommaJoin(owned_instances)),
677 return wanted_instances
680 def _SupportsOob(cfg, node):
681 """Tells if node supports OOB.
683 @type cfg: L{config.ConfigWriter}
684 @param cfg: The cluster configuration
685 @type node: L{objects.Node}
686 @param node: The node
687 @return: The OOB script if supported or an empty string otherwise
690 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
693 def _GetWantedNodes(lu, nodes):
694 """Returns list of checked and expanded node names.
696 @type lu: L{LogicalUnit}
697 @param lu: the logical unit on whose behalf we execute
699 @param nodes: list of node names or None for all nodes
701 @return: the list of nodes, sorted
702 @raise errors.ProgrammerError: if the nodes parameter is wrong type
706 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
708 return utils.NiceSort(lu.cfg.GetNodeList())
711 def _GetWantedInstances(lu, instances):
712 """Returns list of checked and expanded instance names.
714 @type lu: L{LogicalUnit}
715 @param lu: the logical unit on whose behalf we execute
716 @type instances: list
717 @param instances: list of instance names or None for all instances
719 @return: the list of instances, sorted
720 @raise errors.OpPrereqError: if the instances parameter is wrong type
721 @raise errors.OpPrereqError: if any of the passed instances is not found
725 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
727 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
731 def _GetUpdatedParams(old_params, update_dict,
732 use_default=True, use_none=False):
733 """Return the new version of a parameter dictionary.
735 @type old_params: dict
736 @param old_params: old parameters
737 @type update_dict: dict
738 @param update_dict: dict containing new parameter values, or
739 constants.VALUE_DEFAULT to reset the parameter to its default
741 @param use_default: boolean
742 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
743 values as 'to be deleted' values
744 @param use_none: boolean
745 @type use_none: whether to recognise C{None} values as 'to be
748 @return: the new parameter dictionary
751 params_copy = copy.deepcopy(old_params)
752 for key, val in update_dict.iteritems():
753 if ((use_default and val == constants.VALUE_DEFAULT) or
754 (use_none and val is None)):
760 params_copy[key] = val
764 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
765 """Return the new version of a instance policy.
767 @param group_policy: whether this policy applies to a group and thus
768 we should support removal of policy entries
771 use_none = use_default = group_policy
772 ipolicy = copy.deepcopy(old_ipolicy)
773 for key, value in new_ipolicy.items():
774 if key not in constants.IPOLICY_ALL_KEYS:
775 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
777 if key in constants.IPOLICY_ISPECS:
778 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
779 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
781 use_default=use_default)
783 if (not value or value == [constants.VALUE_DEFAULT] or
784 value == constants.VALUE_DEFAULT):
788 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
789 " on the cluster'" % key,
792 if key in constants.IPOLICY_PARAMETERS:
793 # FIXME: we assume all such values are float
795 ipolicy[key] = float(value)
796 except (TypeError, ValueError), err:
797 raise errors.OpPrereqError("Invalid value for attribute"
798 " '%s': '%s', error: %s" %
799 (key, value, err), errors.ECODE_INVAL)
801 # FIXME: we assume all others are lists; this should be redone
803 ipolicy[key] = list(value)
805 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
806 except errors.ConfigurationError, err:
807 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
812 def _UpdateAndVerifySubDict(base, updates, type_check):
813 """Updates and verifies a dict with sub dicts of the same type.
815 @param base: The dict with the old data
816 @param updates: The dict with the new data
817 @param type_check: Dict suitable to ForceDictType to verify correct types
818 @returns: A new dict with updated and verified values
822 new = _GetUpdatedParams(old, value)
823 utils.ForceDictType(new, type_check)
826 ret = copy.deepcopy(base)
827 ret.update(dict((key, fn(base.get(key, {}), value))
828 for key, value in updates.items()))
832 def _MergeAndVerifyHvState(op_input, obj_input):
833 """Combines the hv state from an opcode with the one of the object
835 @param op_input: The input dict from the opcode
836 @param obj_input: The input dict from the objects
837 @return: The verified and updated dict
841 invalid_hvs = set(op_input) - constants.HYPER_TYPES
843 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
844 " %s" % utils.CommaJoin(invalid_hvs),
846 if obj_input is None:
848 type_check = constants.HVSTS_PARAMETER_TYPES
849 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
854 def _MergeAndVerifyDiskState(op_input, obj_input):
855 """Combines the disk state from an opcode with the one of the object
857 @param op_input: The input dict from the opcode
858 @param obj_input: The input dict from the objects
859 @return: The verified and updated dict
862 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
864 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
865 utils.CommaJoin(invalid_dst),
867 type_check = constants.DSS_PARAMETER_TYPES
868 if obj_input is None:
870 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
872 for key, value in op_input.items())
877 def _ReleaseLocks(lu, level, names=None, keep=None):
878 """Releases locks owned by an LU.
880 @type lu: L{LogicalUnit}
881 @param level: Lock level
882 @type names: list or None
883 @param names: Names of locks to release
884 @type keep: list or None
885 @param keep: Names of locks to retain
888 assert not (keep is not None and names is not None), \
889 "Only one of the 'names' and the 'keep' parameters can be given"
891 if names is not None:
892 should_release = names.__contains__
894 should_release = lambda name: name not in keep
896 should_release = None
898 owned = lu.owned_locks(level)
900 # Not owning any lock at this level, do nothing
907 # Determine which locks to release
909 if should_release(name):
914 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
916 # Release just some locks
917 lu.glm.release(level, names=release)
919 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
922 lu.glm.release(level)
924 assert not lu.glm.is_owned(level), "No locks should be owned"
927 def _MapInstanceDisksToNodes(instances):
928 """Creates a map from (node, volume) to instance name.
930 @type instances: list of L{objects.Instance}
931 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
934 return dict(((node, vol), inst.name)
935 for inst in instances
936 for (node, vols) in inst.MapLVsByNode().items()
940 def _RunPostHook(lu, node_name):
941 """Runs the post-hook for an opcode on a single node.
944 hm = lu.proc.BuildHooksManager(lu)
946 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
947 except Exception, err: # pylint: disable=W0703
948 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
951 def _CheckOutputFields(static, dynamic, selected):
952 """Checks whether all selected fields are valid.
954 @type static: L{utils.FieldSet}
955 @param static: static fields set
956 @type dynamic: L{utils.FieldSet}
957 @param dynamic: dynamic fields set
964 delta = f.NonMatching(selected)
966 raise errors.OpPrereqError("Unknown output fields selected: %s"
967 % ",".join(delta), errors.ECODE_INVAL)
970 def _CheckGlobalHvParams(params):
971 """Validates that given hypervisor params are not global ones.
973 This will ensure that instances don't get customised versions of
977 used_globals = constants.HVC_GLOBALS.intersection(params)
979 msg = ("The following hypervisor parameters are global and cannot"
980 " be customized at instance level, please modify them at"
981 " cluster level: %s" % utils.CommaJoin(used_globals))
982 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
985 def _CheckNodeOnline(lu, node, msg=None):
986 """Ensure that a given node is online.
988 @param lu: the LU on behalf of which we make the check
989 @param node: the node to check
990 @param msg: if passed, should be a message to replace the default one
991 @raise errors.OpPrereqError: if the node is offline
995 msg = "Can't use offline node"
996 if lu.cfg.GetNodeInfo(node).offline:
997 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1000 def _CheckNodeNotDrained(lu, node):
1001 """Ensure that a given node is not drained.
1003 @param lu: the LU on behalf of which we make the check
1004 @param node: the node to check
1005 @raise errors.OpPrereqError: if the node is drained
1008 if lu.cfg.GetNodeInfo(node).drained:
1009 raise errors.OpPrereqError("Can't use drained node %s" % node,
1013 def _CheckNodeVmCapable(lu, node):
1014 """Ensure that a given node is vm capable.
1016 @param lu: the LU on behalf of which we make the check
1017 @param node: the node to check
1018 @raise errors.OpPrereqError: if the node is not vm capable
1021 if not lu.cfg.GetNodeInfo(node).vm_capable:
1022 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1026 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1027 """Ensure that a node supports a given OS.
1029 @param lu: the LU on behalf of which we make the check
1030 @param node: the node to check
1031 @param os_name: the OS to query about
1032 @param force_variant: whether to ignore variant errors
1033 @raise errors.OpPrereqError: if the node is not supporting the OS
1036 result = lu.rpc.call_os_get(node, os_name)
1037 result.Raise("OS '%s' not in supported OS list for node %s" %
1039 prereq=True, ecode=errors.ECODE_INVAL)
1040 if not force_variant:
1041 _CheckOSVariant(result.payload, os_name)
1044 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1045 """Ensure that a node has the given secondary ip.
1047 @type lu: L{LogicalUnit}
1048 @param lu: the LU on behalf of which we make the check
1050 @param node: the node to check
1051 @type secondary_ip: string
1052 @param secondary_ip: the ip to check
1053 @type prereq: boolean
1054 @param prereq: whether to throw a prerequisite or an execute error
1055 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1056 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1059 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1060 result.Raise("Failure checking secondary ip on node %s" % node,
1061 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1062 if not result.payload:
1063 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1064 " please fix and re-run this command" % secondary_ip)
1066 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1068 raise errors.OpExecError(msg)
1071 def _GetClusterDomainSecret():
1072 """Reads the cluster domain secret.
1075 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1079 def _CheckInstanceState(lu, instance, req_states, msg=None):
1080 """Ensure that an instance is in one of the required states.
1082 @param lu: the LU on behalf of which we make the check
1083 @param instance: the instance to check
1084 @param msg: if passed, should be a message to replace the default one
1085 @raise errors.OpPrereqError: if the instance is not in the required state
1089 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1090 if instance.admin_state not in req_states:
1091 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1092 (instance.name, instance.admin_state, msg),
1095 if constants.ADMINST_UP not in req_states:
1096 pnode = instance.primary_node
1097 if not lu.cfg.GetNodeInfo(pnode).offline:
1098 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1099 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1100 prereq=True, ecode=errors.ECODE_ENVIRON)
1101 if instance.name in ins_l.payload:
1102 raise errors.OpPrereqError("Instance %s is running, %s" %
1103 (instance.name, msg), errors.ECODE_STATE)
1105 lu.LogWarning("Primary node offline, ignoring check that instance"
1109 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1110 """Computes if value is in the desired range.
1112 @param name: name of the parameter for which we perform the check
1113 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1115 @param ipolicy: dictionary containing min, max and std values
1116 @param value: actual value that we want to use
1117 @return: None or element not meeting the criteria
1121 if value in [None, constants.VALUE_AUTO]:
1123 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1124 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1125 if value > max_v or min_v > value:
1127 fqn = "%s/%s" % (name, qualifier)
1130 return ("%s value %s is not in range [%s, %s]" %
1131 (fqn, value, min_v, max_v))
1135 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1136 nic_count, disk_sizes, spindle_use,
1137 _compute_fn=_ComputeMinMaxSpec):
1138 """Verifies ipolicy against provided specs.
1141 @param ipolicy: The ipolicy
1143 @param mem_size: The memory size
1144 @type cpu_count: int
1145 @param cpu_count: Used cpu cores
1146 @type disk_count: int
1147 @param disk_count: Number of disks used
1148 @type nic_count: int
1149 @param nic_count: Number of nics used
1150 @type disk_sizes: list of ints
1151 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1152 @type spindle_use: int
1153 @param spindle_use: The number of spindles this instance uses
1154 @param _compute_fn: The compute function (unittest only)
1155 @return: A list of violations, or an empty list of no violations are found
1158 assert disk_count == len(disk_sizes)
1161 (constants.ISPEC_MEM_SIZE, "", mem_size),
1162 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1163 (constants.ISPEC_DISK_COUNT, "", disk_count),
1164 (constants.ISPEC_NIC_COUNT, "", nic_count),
1165 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1166 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1167 for idx, d in enumerate(disk_sizes)]
1170 (_compute_fn(name, qualifier, ipolicy, value)
1171 for (name, qualifier, value) in test_settings))
1174 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1175 _compute_fn=_ComputeIPolicySpecViolation):
1176 """Compute if instance meets the specs of ipolicy.
1179 @param ipolicy: The ipolicy to verify against
1180 @type instance: L{objects.Instance}
1181 @param instance: The instance to verify
1182 @param _compute_fn: The function to verify ipolicy (unittest only)
1183 @see: L{_ComputeIPolicySpecViolation}
1186 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1187 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1188 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1189 disk_count = len(instance.disks)
1190 disk_sizes = [disk.size for disk in instance.disks]
1191 nic_count = len(instance.nics)
1193 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1194 disk_sizes, spindle_use)
1197 def _ComputeIPolicyInstanceSpecViolation(
1198 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1199 """Compute if instance specs meets the specs of ipolicy.
1202 @param ipolicy: The ipolicy to verify against
1203 @param instance_spec: dict
1204 @param instance_spec: The instance spec to verify
1205 @param _compute_fn: The function to verify ipolicy (unittest only)
1206 @see: L{_ComputeIPolicySpecViolation}
1209 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1210 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1211 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1212 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1213 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1214 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1216 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1217 disk_sizes, spindle_use)
1220 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1222 _compute_fn=_ComputeIPolicyInstanceViolation):
1223 """Compute if instance meets the specs of the new target group.
1225 @param ipolicy: The ipolicy to verify
1226 @param instance: The instance object to verify
1227 @param current_group: The current group of the instance
1228 @param target_group: The new group of the instance
1229 @param _compute_fn: The function to verify ipolicy (unittest only)
1230 @see: L{_ComputeIPolicySpecViolation}
1233 if current_group == target_group:
1236 return _compute_fn(ipolicy, instance)
1239 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1240 _compute_fn=_ComputeIPolicyNodeViolation):
1241 """Checks that the target node is correct in terms of instance policy.
1243 @param ipolicy: The ipolicy to verify
1244 @param instance: The instance object to verify
1245 @param node: The new node to relocate
1246 @param ignore: Ignore violations of the ipolicy
1247 @param _compute_fn: The function to verify ipolicy (unittest only)
1248 @see: L{_ComputeIPolicySpecViolation}
1251 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1252 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1255 msg = ("Instance does not meet target node group's (%s) instance"
1256 " policy: %s") % (node.group, utils.CommaJoin(res))
1260 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1263 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1264 """Computes a set of any instances that would violate the new ipolicy.
1266 @param old_ipolicy: The current (still in-place) ipolicy
1267 @param new_ipolicy: The new (to become) ipolicy
1268 @param instances: List of instances to verify
1269 @return: A list of instances which violates the new ipolicy but
1273 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1274 _ComputeViolatingInstances(old_ipolicy, instances))
1277 def _ExpandItemName(fn, name, kind):
1278 """Expand an item name.
1280 @param fn: the function to use for expansion
1281 @param name: requested item name
1282 @param kind: text description ('Node' or 'Instance')
1283 @return: the resolved (full) name
1284 @raise errors.OpPrereqError: if the item is not found
1287 full_name = fn(name)
1288 if full_name is None:
1289 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1294 def _ExpandNodeName(cfg, name):
1295 """Wrapper over L{_ExpandItemName} for nodes."""
1296 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1299 def _ExpandInstanceName(cfg, name):
1300 """Wrapper over L{_ExpandItemName} for instance."""
1301 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1304 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1305 minmem, maxmem, vcpus, nics, disk_template, disks,
1306 bep, hvp, hypervisor_name, tags):
1307 """Builds instance related env variables for hooks
1309 This builds the hook environment from individual variables.
1312 @param name: the name of the instance
1313 @type primary_node: string
1314 @param primary_node: the name of the instance's primary node
1315 @type secondary_nodes: list
1316 @param secondary_nodes: list of secondary nodes as strings
1317 @type os_type: string
1318 @param os_type: the name of the instance's OS
1319 @type status: string
1320 @param status: the desired status of the instance
1321 @type minmem: string
1322 @param minmem: the minimum memory size of the instance
1323 @type maxmem: string
1324 @param maxmem: the maximum memory size of the instance
1326 @param vcpus: the count of VCPUs the instance has
1328 @param nics: list of tuples (ip, mac, mode, link) representing
1329 the NICs the instance has
1330 @type disk_template: string
1331 @param disk_template: the disk template of the instance
1333 @param disks: the list of (size, mode) pairs
1335 @param bep: the backend parameters for the instance
1337 @param hvp: the hypervisor parameters for the instance
1338 @type hypervisor_name: string
1339 @param hypervisor_name: the hypervisor for the instance
1341 @param tags: list of instance tags as strings
1343 @return: the hook environment for this instance
1348 "INSTANCE_NAME": name,
1349 "INSTANCE_PRIMARY": primary_node,
1350 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1351 "INSTANCE_OS_TYPE": os_type,
1352 "INSTANCE_STATUS": status,
1353 "INSTANCE_MINMEM": minmem,
1354 "INSTANCE_MAXMEM": maxmem,
1355 # TODO(2.7) remove deprecated "memory" value
1356 "INSTANCE_MEMORY": maxmem,
1357 "INSTANCE_VCPUS": vcpus,
1358 "INSTANCE_DISK_TEMPLATE": disk_template,
1359 "INSTANCE_HYPERVISOR": hypervisor_name,
1362 nic_count = len(nics)
1363 for idx, (ip, mac, mode, link) in enumerate(nics):
1366 env["INSTANCE_NIC%d_IP" % idx] = ip
1367 env["INSTANCE_NIC%d_MAC" % idx] = mac
1368 env["INSTANCE_NIC%d_MODE" % idx] = mode
1369 env["INSTANCE_NIC%d_LINK" % idx] = link
1370 if mode == constants.NIC_MODE_BRIDGED:
1371 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1375 env["INSTANCE_NIC_COUNT"] = nic_count
1378 disk_count = len(disks)
1379 for idx, (size, mode) in enumerate(disks):
1380 env["INSTANCE_DISK%d_SIZE" % idx] = size
1381 env["INSTANCE_DISK%d_MODE" % idx] = mode
1385 env["INSTANCE_DISK_COUNT"] = disk_count
1390 env["INSTANCE_TAGS"] = " ".join(tags)
1392 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1393 for key, value in source.items():
1394 env["INSTANCE_%s_%s" % (kind, key)] = value
1399 def _NICListToTuple(lu, nics):
1400 """Build a list of nic information tuples.
1402 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1403 value in LUInstanceQueryData.
1405 @type lu: L{LogicalUnit}
1406 @param lu: the logical unit on whose behalf we execute
1407 @type nics: list of L{objects.NIC}
1408 @param nics: list of nics to convert to hooks tuples
1412 cluster = lu.cfg.GetClusterInfo()
1416 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1417 mode = filled_params[constants.NIC_MODE]
1418 link = filled_params[constants.NIC_LINK]
1419 hooks_nics.append((ip, mac, mode, link))
1423 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1424 """Builds instance related env variables for hooks from an object.
1426 @type lu: L{LogicalUnit}
1427 @param lu: the logical unit on whose behalf we execute
1428 @type instance: L{objects.Instance}
1429 @param instance: the instance for which we should build the
1431 @type override: dict
1432 @param override: dictionary with key/values that will override
1435 @return: the hook environment dictionary
1438 cluster = lu.cfg.GetClusterInfo()
1439 bep = cluster.FillBE(instance)
1440 hvp = cluster.FillHV(instance)
1442 "name": instance.name,
1443 "primary_node": instance.primary_node,
1444 "secondary_nodes": instance.secondary_nodes,
1445 "os_type": instance.os,
1446 "status": instance.admin_state,
1447 "maxmem": bep[constants.BE_MAXMEM],
1448 "minmem": bep[constants.BE_MINMEM],
1449 "vcpus": bep[constants.BE_VCPUS],
1450 "nics": _NICListToTuple(lu, instance.nics),
1451 "disk_template": instance.disk_template,
1452 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1455 "hypervisor_name": instance.hypervisor,
1456 "tags": instance.tags,
1459 args.update(override)
1460 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1463 def _AdjustCandidatePool(lu, exceptions):
1464 """Adjust the candidate pool after node operations.
1467 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1469 lu.LogInfo("Promoted nodes to master candidate role: %s",
1470 utils.CommaJoin(node.name for node in mod_list))
1471 for name in mod_list:
1472 lu.context.ReaddNode(name)
1473 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1475 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1479 def _DecideSelfPromotion(lu, exceptions=None):
1480 """Decide whether I should promote myself as a master candidate.
1483 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1484 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1485 # the new node will increase mc_max with one, so:
1486 mc_should = min(mc_should + 1, cp_size)
1487 return mc_now < mc_should
1490 def _ComputeViolatingInstances(ipolicy, instances):
1491 """Computes a set of instances who violates given ipolicy.
1493 @param ipolicy: The ipolicy to verify
1494 @type instances: object.Instance
1495 @param instances: List of instances to verify
1496 @return: A frozenset of instance names violating the ipolicy
1499 return frozenset([inst.name for inst in instances
1500 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1503 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1504 """Check that the brigdes needed by a list of nics exist.
1507 cluster = lu.cfg.GetClusterInfo()
1508 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1509 brlist = [params[constants.NIC_LINK] for params in paramslist
1510 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1512 result = lu.rpc.call_bridges_exist(target_node, brlist)
1513 result.Raise("Error checking bridges on destination node '%s'" %
1514 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1517 def _CheckInstanceBridgesExist(lu, instance, node=None):
1518 """Check that the brigdes needed by an instance exist.
1522 node = instance.primary_node
1523 _CheckNicsBridgesExist(lu, instance.nics, node)
1526 def _CheckOSVariant(os_obj, name):
1527 """Check whether an OS name conforms to the os variants specification.
1529 @type os_obj: L{objects.OS}
1530 @param os_obj: OS object to check
1532 @param name: OS name passed by the user, to check for validity
1535 variant = objects.OS.GetVariant(name)
1536 if not os_obj.supported_variants:
1538 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1539 " passed)" % (os_obj.name, variant),
1543 raise errors.OpPrereqError("OS name must include a variant",
1546 if variant not in os_obj.supported_variants:
1547 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1550 def _GetNodeInstancesInner(cfg, fn):
1551 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1554 def _GetNodeInstances(cfg, node_name):
1555 """Returns a list of all primary and secondary instances on a node.
1559 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1562 def _GetNodePrimaryInstances(cfg, node_name):
1563 """Returns primary instances on a node.
1566 return _GetNodeInstancesInner(cfg,
1567 lambda inst: node_name == inst.primary_node)
1570 def _GetNodeSecondaryInstances(cfg, node_name):
1571 """Returns secondary instances on a node.
1574 return _GetNodeInstancesInner(cfg,
1575 lambda inst: node_name in inst.secondary_nodes)
1578 def _GetStorageTypeArgs(cfg, storage_type):
1579 """Returns the arguments for a storage type.
1582 # Special case for file storage
1583 if storage_type == constants.ST_FILE:
1584 # storage.FileStorage wants a list of storage directories
1585 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1590 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1593 for dev in instance.disks:
1594 cfg.SetDiskID(dev, node_name)
1596 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1598 result.Raise("Failed to get disk status from node %s" % node_name,
1599 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1601 for idx, bdev_status in enumerate(result.payload):
1602 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1608 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1609 """Check the sanity of iallocator and node arguments and use the
1610 cluster-wide iallocator if appropriate.
1612 Check that at most one of (iallocator, node) is specified. If none is
1613 specified, then the LU's opcode's iallocator slot is filled with the
1614 cluster-wide default iallocator.
1616 @type iallocator_slot: string
1617 @param iallocator_slot: the name of the opcode iallocator slot
1618 @type node_slot: string
1619 @param node_slot: the name of the opcode target node slot
1622 node = getattr(lu.op, node_slot, None)
1623 ialloc = getattr(lu.op, iallocator_slot, None)
1625 if node is not None and ialloc is not None:
1626 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1628 elif node is None and ialloc is None:
1629 default_iallocator = lu.cfg.GetDefaultIAllocator()
1630 if default_iallocator:
1631 setattr(lu.op, iallocator_slot, default_iallocator)
1633 raise errors.OpPrereqError("No iallocator or node given and no"
1634 " cluster-wide default iallocator found;"
1635 " please specify either an iallocator or a"
1636 " node, or set a cluster-wide default"
1637 " iallocator", errors.ECODE_INVAL)
1640 def _GetDefaultIAllocator(cfg, ialloc):
1641 """Decides on which iallocator to use.
1643 @type cfg: L{config.ConfigWriter}
1644 @param cfg: Cluster configuration object
1645 @type ialloc: string or None
1646 @param ialloc: Iallocator specified in opcode
1648 @return: Iallocator name
1652 # Use default iallocator
1653 ialloc = cfg.GetDefaultIAllocator()
1656 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1657 " opcode nor as a cluster-wide default",
1663 class LUClusterPostInit(LogicalUnit):
1664 """Logical unit for running hooks after cluster initialization.
1667 HPATH = "cluster-init"
1668 HTYPE = constants.HTYPE_CLUSTER
1670 def BuildHooksEnv(self):
1675 "OP_TARGET": self.cfg.GetClusterName(),
1678 def BuildHooksNodes(self):
1679 """Build hooks nodes.
1682 return ([], [self.cfg.GetMasterNode()])
1684 def Exec(self, feedback_fn):
1691 class LUClusterDestroy(LogicalUnit):
1692 """Logical unit for destroying the cluster.
1695 HPATH = "cluster-destroy"
1696 HTYPE = constants.HTYPE_CLUSTER
1698 def BuildHooksEnv(self):
1703 "OP_TARGET": self.cfg.GetClusterName(),
1706 def BuildHooksNodes(self):
1707 """Build hooks nodes.
1712 def CheckPrereq(self):
1713 """Check prerequisites.
1715 This checks whether the cluster is empty.
1717 Any errors are signaled by raising errors.OpPrereqError.
1720 master = self.cfg.GetMasterNode()
1722 nodelist = self.cfg.GetNodeList()
1723 if len(nodelist) != 1 or nodelist[0] != master:
1724 raise errors.OpPrereqError("There are still %d node(s) in"
1725 " this cluster." % (len(nodelist) - 1),
1727 instancelist = self.cfg.GetInstanceList()
1729 raise errors.OpPrereqError("There are still %d instance(s) in"
1730 " this cluster." % len(instancelist),
1733 def Exec(self, feedback_fn):
1734 """Destroys the cluster.
1737 master_params = self.cfg.GetMasterNetworkParameters()
1739 # Run post hooks on master node before it's removed
1740 _RunPostHook(self, master_params.name)
1742 ems = self.cfg.GetUseExternalMipScript()
1743 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1746 self.LogWarning("Error disabling the master IP address: %s",
1749 return master_params.name
1752 def _VerifyCertificate(filename):
1753 """Verifies a certificate for L{LUClusterVerifyConfig}.
1755 @type filename: string
1756 @param filename: Path to PEM file
1760 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1761 utils.ReadFile(filename))
1762 except Exception, err: # pylint: disable=W0703
1763 return (LUClusterVerifyConfig.ETYPE_ERROR,
1764 "Failed to load X509 certificate %s: %s" % (filename, err))
1767 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1768 constants.SSL_CERT_EXPIRATION_ERROR)
1771 fnamemsg = "While verifying %s: %s" % (filename, msg)
1776 return (None, fnamemsg)
1777 elif errcode == utils.CERT_WARNING:
1778 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1779 elif errcode == utils.CERT_ERROR:
1780 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1782 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1785 def _GetAllHypervisorParameters(cluster, instances):
1786 """Compute the set of all hypervisor parameters.
1788 @type cluster: L{objects.Cluster}
1789 @param cluster: the cluster object
1790 @param instances: list of L{objects.Instance}
1791 @param instances: additional instances from which to obtain parameters
1792 @rtype: list of (origin, hypervisor, parameters)
1793 @return: a list with all parameters found, indicating the hypervisor they
1794 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1799 for hv_name in cluster.enabled_hypervisors:
1800 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1802 for os_name, os_hvp in cluster.os_hvp.items():
1803 for hv_name, hv_params in os_hvp.items():
1805 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1806 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1808 # TODO: collapse identical parameter values in a single one
1809 for instance in instances:
1810 if instance.hvparams:
1811 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1812 cluster.FillHV(instance)))
1817 class _VerifyErrors(object):
1818 """Mix-in for cluster/group verify LUs.
1820 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1821 self.op and self._feedback_fn to be available.)
1825 ETYPE_FIELD = "code"
1826 ETYPE_ERROR = "ERROR"
1827 ETYPE_WARNING = "WARNING"
1829 def _Error(self, ecode, item, msg, *args, **kwargs):
1830 """Format an error message.
1832 Based on the opcode's error_codes parameter, either format a
1833 parseable error code, or a simpler error string.
1835 This must be called only from Exec and functions called from Exec.
1838 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1839 itype, etxt, _ = ecode
1840 # first complete the msg
1843 # then format the whole message
1844 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1845 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1851 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1852 # and finally report it via the feedback_fn
1853 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1855 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1856 """Log an error message if the passed condition is True.
1860 or self.op.debug_simulate_errors) # pylint: disable=E1101
1862 # If the error code is in the list of ignored errors, demote the error to a
1864 (_, etxt, _) = ecode
1865 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1866 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1869 self._Error(ecode, *args, **kwargs)
1871 # do not mark the operation as failed for WARN cases only
1872 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1873 self.bad = self.bad or cond
1876 class LUClusterVerify(NoHooksLU):
1877 """Submits all jobs necessary to verify the cluster.
1882 def ExpandNames(self):
1883 self.needed_locks = {}
1885 def Exec(self, feedback_fn):
1888 if self.op.group_name:
1889 groups = [self.op.group_name]
1890 depends_fn = lambda: None
1892 groups = self.cfg.GetNodeGroupList()
1894 # Verify global configuration
1896 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1899 # Always depend on global verification
1900 depends_fn = lambda: [(-len(jobs), [])]
1903 [opcodes.OpClusterVerifyGroup(group_name=group,
1904 ignore_errors=self.op.ignore_errors,
1905 depends=depends_fn())]
1906 for group in groups)
1908 # Fix up all parameters
1909 for op in itertools.chain(*jobs): # pylint: disable=W0142
1910 op.debug_simulate_errors = self.op.debug_simulate_errors
1911 op.verbose = self.op.verbose
1912 op.error_codes = self.op.error_codes
1914 op.skip_checks = self.op.skip_checks
1915 except AttributeError:
1916 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1918 return ResultWithJobs(jobs)
1921 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1922 """Verifies the cluster config.
1927 def _VerifyHVP(self, hvp_data):
1928 """Verifies locally the syntax of the hypervisor parameters.
1931 for item, hv_name, hv_params in hvp_data:
1932 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1935 hv_class = hypervisor.GetHypervisor(hv_name)
1936 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1937 hv_class.CheckParameterSyntax(hv_params)
1938 except errors.GenericError, err:
1939 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1941 def ExpandNames(self):
1942 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1943 self.share_locks = _ShareAll()
1945 def CheckPrereq(self):
1946 """Check prerequisites.
1949 # Retrieve all information
1950 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1951 self.all_node_info = self.cfg.GetAllNodesInfo()
1952 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1954 def Exec(self, feedback_fn):
1955 """Verify integrity of cluster, performing various test on nodes.
1959 self._feedback_fn = feedback_fn
1961 feedback_fn("* Verifying cluster config")
1963 for msg in self.cfg.VerifyConfig():
1964 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1966 feedback_fn("* Verifying cluster certificate files")
1968 for cert_filename in pathutils.ALL_CERT_FILES:
1969 (errcode, msg) = _VerifyCertificate(cert_filename)
1970 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1972 feedback_fn("* Verifying hypervisor parameters")
1974 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1975 self.all_inst_info.values()))
1977 feedback_fn("* Verifying all nodes belong to an existing group")
1979 # We do this verification here because, should this bogus circumstance
1980 # occur, it would never be caught by VerifyGroup, which only acts on
1981 # nodes/instances reachable from existing node groups.
1983 dangling_nodes = set(node.name for node in self.all_node_info.values()
1984 if node.group not in self.all_group_info)
1986 dangling_instances = {}
1987 no_node_instances = []
1989 for inst in self.all_inst_info.values():
1990 if inst.primary_node in dangling_nodes:
1991 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1992 elif inst.primary_node not in self.all_node_info:
1993 no_node_instances.append(inst.name)
1998 utils.CommaJoin(dangling_instances.get(node.name,
2000 for node in dangling_nodes]
2002 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2004 "the following nodes (and their instances) belong to a non"
2005 " existing group: %s", utils.CommaJoin(pretty_dangling))
2007 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2009 "the following instances have a non-existing primary-node:"
2010 " %s", utils.CommaJoin(no_node_instances))
2015 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2016 """Verifies the status of a node group.
2019 HPATH = "cluster-verify"
2020 HTYPE = constants.HTYPE_CLUSTER
2023 _HOOKS_INDENT_RE = re.compile("^", re.M)
2025 class NodeImage(object):
2026 """A class representing the logical and physical status of a node.
2029 @ivar name: the node name to which this object refers
2030 @ivar volumes: a structure as returned from
2031 L{ganeti.backend.GetVolumeList} (runtime)
2032 @ivar instances: a list of running instances (runtime)
2033 @ivar pinst: list of configured primary instances (config)
2034 @ivar sinst: list of configured secondary instances (config)
2035 @ivar sbp: dictionary of {primary-node: list of instances} for all
2036 instances for which this node is secondary (config)
2037 @ivar mfree: free memory, as reported by hypervisor (runtime)
2038 @ivar dfree: free disk, as reported by the node (runtime)
2039 @ivar offline: the offline status (config)
2040 @type rpc_fail: boolean
2041 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2042 not whether the individual keys were correct) (runtime)
2043 @type lvm_fail: boolean
2044 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2045 @type hyp_fail: boolean
2046 @ivar hyp_fail: whether the RPC call didn't return the instance list
2047 @type ghost: boolean
2048 @ivar ghost: whether this is a known node or not (config)
2049 @type os_fail: boolean
2050 @ivar os_fail: whether the RPC call didn't return valid OS data
2052 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2053 @type vm_capable: boolean
2054 @ivar vm_capable: whether the node can host instances
2057 def __init__(self, offline=False, name=None, vm_capable=True):
2066 self.offline = offline
2067 self.vm_capable = vm_capable
2068 self.rpc_fail = False
2069 self.lvm_fail = False
2070 self.hyp_fail = False
2072 self.os_fail = False
2075 def ExpandNames(self):
2076 # This raises errors.OpPrereqError on its own:
2077 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2079 # Get instances in node group; this is unsafe and needs verification later
2081 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2083 self.needed_locks = {
2084 locking.LEVEL_INSTANCE: inst_names,
2085 locking.LEVEL_NODEGROUP: [self.group_uuid],
2086 locking.LEVEL_NODE: [],
2089 self.share_locks = _ShareAll()
2091 def DeclareLocks(self, level):
2092 if level == locking.LEVEL_NODE:
2093 # Get members of node group; this is unsafe and needs verification later
2094 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2096 all_inst_info = self.cfg.GetAllInstancesInfo()
2098 # In Exec(), we warn about mirrored instances that have primary and
2099 # secondary living in separate node groups. To fully verify that
2100 # volumes for these instances are healthy, we will need to do an
2101 # extra call to their secondaries. We ensure here those nodes will
2103 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2104 # Important: access only the instances whose lock is owned
2105 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2106 nodes.update(all_inst_info[inst].secondary_nodes)
2108 self.needed_locks[locking.LEVEL_NODE] = nodes
2110 def CheckPrereq(self):
2111 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2112 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2114 group_nodes = set(self.group_info.members)
2116 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2119 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2121 unlocked_instances = \
2122 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2125 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2126 utils.CommaJoin(unlocked_nodes),
2129 if unlocked_instances:
2130 raise errors.OpPrereqError("Missing lock for instances: %s" %
2131 utils.CommaJoin(unlocked_instances),
2134 self.all_node_info = self.cfg.GetAllNodesInfo()
2135 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2137 self.my_node_names = utils.NiceSort(group_nodes)
2138 self.my_inst_names = utils.NiceSort(group_instances)
2140 self.my_node_info = dict((name, self.all_node_info[name])
2141 for name in self.my_node_names)
2143 self.my_inst_info = dict((name, self.all_inst_info[name])
2144 for name in self.my_inst_names)
2146 # We detect here the nodes that will need the extra RPC calls for verifying
2147 # split LV volumes; they should be locked.
2148 extra_lv_nodes = set()
2150 for inst in self.my_inst_info.values():
2151 if inst.disk_template in constants.DTS_INT_MIRROR:
2152 for nname in inst.all_nodes:
2153 if self.all_node_info[nname].group != self.group_uuid:
2154 extra_lv_nodes.add(nname)
2156 unlocked_lv_nodes = \
2157 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2159 if unlocked_lv_nodes:
2160 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2161 utils.CommaJoin(unlocked_lv_nodes),
2163 self.extra_lv_nodes = list(extra_lv_nodes)
2165 def _VerifyNode(self, ninfo, nresult):
2166 """Perform some basic validation on data returned from a node.
2168 - check the result data structure is well formed and has all the
2170 - check ganeti version
2172 @type ninfo: L{objects.Node}
2173 @param ninfo: the node to check
2174 @param nresult: the results from the node
2176 @return: whether overall this call was successful (and we can expect
2177 reasonable values in the respose)
2181 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2183 # main result, nresult should be a non-empty dict
2184 test = not nresult or not isinstance(nresult, dict)
2185 _ErrorIf(test, constants.CV_ENODERPC, node,
2186 "unable to verify node: no data returned")
2190 # compares ganeti version
2191 local_version = constants.PROTOCOL_VERSION
2192 remote_version = nresult.get("version", None)
2193 test = not (remote_version and
2194 isinstance(remote_version, (list, tuple)) and
2195 len(remote_version) == 2)
2196 _ErrorIf(test, constants.CV_ENODERPC, node,
2197 "connection to node returned invalid data")
2201 test = local_version != remote_version[0]
2202 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2203 "incompatible protocol versions: master %s,"
2204 " node %s", local_version, remote_version[0])
2208 # node seems compatible, we can actually try to look into its results
2210 # full package version
2211 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2212 constants.CV_ENODEVERSION, node,
2213 "software version mismatch: master %s, node %s",
2214 constants.RELEASE_VERSION, remote_version[1],
2215 code=self.ETYPE_WARNING)
2217 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2218 if ninfo.vm_capable and isinstance(hyp_result, dict):
2219 for hv_name, hv_result in hyp_result.iteritems():
2220 test = hv_result is not None
2221 _ErrorIf(test, constants.CV_ENODEHV, node,
2222 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2224 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2225 if ninfo.vm_capable and isinstance(hvp_result, list):
2226 for item, hv_name, hv_result in hvp_result:
2227 _ErrorIf(True, constants.CV_ENODEHV, node,
2228 "hypervisor %s parameter verify failure (source %s): %s",
2229 hv_name, item, hv_result)
2231 test = nresult.get(constants.NV_NODESETUP,
2232 ["Missing NODESETUP results"])
2233 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2238 def _VerifyNodeTime(self, ninfo, nresult,
2239 nvinfo_starttime, nvinfo_endtime):
2240 """Check the node time.
2242 @type ninfo: L{objects.Node}
2243 @param ninfo: the node to check
2244 @param nresult: the remote results for the node
2245 @param nvinfo_starttime: the start time of the RPC call
2246 @param nvinfo_endtime: the end time of the RPC call
2250 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2252 ntime = nresult.get(constants.NV_TIME, None)
2254 ntime_merged = utils.MergeTime(ntime)
2255 except (ValueError, TypeError):
2256 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2259 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2260 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2261 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2262 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2266 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2267 "Node time diverges by at least %s from master node time",
2270 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2271 """Check the node LVM results.
2273 @type ninfo: L{objects.Node}
2274 @param ninfo: the node to check
2275 @param nresult: the remote results for the node
2276 @param vg_name: the configured VG name
2283 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2285 # checks vg existence and size > 20G
2286 vglist = nresult.get(constants.NV_VGLIST, None)
2288 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2290 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2291 constants.MIN_VG_SIZE)
2292 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2295 pvlist = nresult.get(constants.NV_PVLIST, None)
2296 test = pvlist is None
2297 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2299 # check that ':' is not present in PV names, since it's a
2300 # special character for lvcreate (denotes the range of PEs to
2302 for _, pvname, owner_vg in pvlist:
2303 test = ":" in pvname
2304 _ErrorIf(test, constants.CV_ENODELVM, node,
2305 "Invalid character ':' in PV '%s' of VG '%s'",
2308 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2309 """Check the node bridges.
2311 @type ninfo: L{objects.Node}
2312 @param ninfo: the node to check
2313 @param nresult: the remote results for the node
2314 @param bridges: the expected list of bridges
2321 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2323 missing = nresult.get(constants.NV_BRIDGES, None)
2324 test = not isinstance(missing, list)
2325 _ErrorIf(test, constants.CV_ENODENET, node,
2326 "did not return valid bridge information")
2328 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2329 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2331 def _VerifyNodeUserScripts(self, ninfo, nresult):
2332 """Check the results of user scripts presence and executability on the node
2334 @type ninfo: L{objects.Node}
2335 @param ninfo: the node to check
2336 @param nresult: the remote results for the node
2341 test = not constants.NV_USERSCRIPTS in nresult
2342 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2343 "did not return user scripts information")
2345 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2347 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2348 "user scripts not present or not executable: %s" %
2349 utils.CommaJoin(sorted(broken_scripts)))
2351 def _VerifyNodeNetwork(self, ninfo, nresult):
2352 """Check the node network connectivity results.
2354 @type ninfo: L{objects.Node}
2355 @param ninfo: the node to check
2356 @param nresult: the remote results for the node
2360 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2362 test = constants.NV_NODELIST not in nresult
2363 _ErrorIf(test, constants.CV_ENODESSH, node,
2364 "node hasn't returned node ssh connectivity data")
2366 if nresult[constants.NV_NODELIST]:
2367 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2368 _ErrorIf(True, constants.CV_ENODESSH, node,
2369 "ssh communication with node '%s': %s", a_node, a_msg)
2371 test = constants.NV_NODENETTEST not in nresult
2372 _ErrorIf(test, constants.CV_ENODENET, node,
2373 "node hasn't returned node tcp connectivity data")
2375 if nresult[constants.NV_NODENETTEST]:
2376 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2378 _ErrorIf(True, constants.CV_ENODENET, node,
2379 "tcp communication with node '%s': %s",
2380 anode, nresult[constants.NV_NODENETTEST][anode])
2382 test = constants.NV_MASTERIP not in nresult
2383 _ErrorIf(test, constants.CV_ENODENET, node,
2384 "node hasn't returned node master IP reachability data")
2386 if not nresult[constants.NV_MASTERIP]:
2387 if node == self.master_node:
2388 msg = "the master node cannot reach the master IP (not configured?)"
2390 msg = "cannot reach the master IP"
2391 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2393 def _VerifyInstance(self, instance, instanceconfig, node_image,
2395 """Verify an instance.
2397 This function checks to see if the required block devices are
2398 available on the instance's node.
2401 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2402 node_current = instanceconfig.primary_node
2404 node_vol_should = {}
2405 instanceconfig.MapLVsByNode(node_vol_should)
2407 cluster = self.cfg.GetClusterInfo()
2408 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2410 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2411 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2413 for node in node_vol_should:
2414 n_img = node_image[node]
2415 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2416 # ignore missing volumes on offline or broken nodes
2418 for volume in node_vol_should[node]:
2419 test = volume not in n_img.volumes
2420 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2421 "volume %s missing on node %s", volume, node)
2423 if instanceconfig.admin_state == constants.ADMINST_UP:
2424 pri_img = node_image[node_current]
2425 test = instance not in pri_img.instances and not pri_img.offline
2426 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2427 "instance not running on its primary node %s",
2430 diskdata = [(nname, success, status, idx)
2431 for (nname, disks) in diskstatus.items()
2432 for idx, (success, status) in enumerate(disks)]
2434 for nname, success, bdev_status, idx in diskdata:
2435 # the 'ghost node' construction in Exec() ensures that we have a
2437 snode = node_image[nname]
2438 bad_snode = snode.ghost or snode.offline
2439 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2440 not success and not bad_snode,
2441 constants.CV_EINSTANCEFAULTYDISK, instance,
2442 "couldn't retrieve status for disk/%s on %s: %s",
2443 idx, nname, bdev_status)
2444 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2445 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2446 constants.CV_EINSTANCEFAULTYDISK, instance,
2447 "disk/%s on %s is faulty", idx, nname)
2449 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2450 """Verify if there are any unknown volumes in the cluster.
2452 The .os, .swap and backup volumes are ignored. All other volumes are
2453 reported as unknown.
2455 @type reserved: L{ganeti.utils.FieldSet}
2456 @param reserved: a FieldSet of reserved volume names
2459 for node, n_img in node_image.items():
2460 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2461 self.all_node_info[node].group != self.group_uuid):
2462 # skip non-healthy nodes
2464 for volume in n_img.volumes:
2465 test = ((node not in node_vol_should or
2466 volume not in node_vol_should[node]) and
2467 not reserved.Matches(volume))
2468 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2469 "volume %s is unknown", volume)
2471 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2472 """Verify N+1 Memory Resilience.
2474 Check that if one single node dies we can still start all the
2475 instances it was primary for.
2478 cluster_info = self.cfg.GetClusterInfo()
2479 for node, n_img in node_image.items():
2480 # This code checks that every node which is now listed as
2481 # secondary has enough memory to host all instances it is
2482 # supposed to should a single other node in the cluster fail.
2483 # FIXME: not ready for failover to an arbitrary node
2484 # FIXME: does not support file-backed instances
2485 # WARNING: we currently take into account down instances as well
2486 # as up ones, considering that even if they're down someone
2487 # might want to start them even in the event of a node failure.
2488 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2489 # we're skipping nodes marked offline and nodes in other groups from
2490 # the N+1 warning, since most likely we don't have good memory
2491 # infromation from them; we already list instances living on such
2492 # nodes, and that's enough warning
2494 #TODO(dynmem): also consider ballooning out other instances
2495 for prinode, instances in n_img.sbp.items():
2497 for instance in instances:
2498 bep = cluster_info.FillBE(instance_cfg[instance])
2499 if bep[constants.BE_AUTO_BALANCE]:
2500 needed_mem += bep[constants.BE_MINMEM]
2501 test = n_img.mfree < needed_mem
2502 self._ErrorIf(test, constants.CV_ENODEN1, node,
2503 "not enough memory to accomodate instance failovers"
2504 " should node %s fail (%dMiB needed, %dMiB available)",
2505 prinode, needed_mem, n_img.mfree)
2508 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2509 (files_all, files_opt, files_mc, files_vm)):
2510 """Verifies file checksums collected from all nodes.
2512 @param errorif: Callback for reporting errors
2513 @param nodeinfo: List of L{objects.Node} objects
2514 @param master_node: Name of master node
2515 @param all_nvinfo: RPC results
2518 # Define functions determining which nodes to consider for a file
2521 (files_mc, lambda node: (node.master_candidate or
2522 node.name == master_node)),
2523 (files_vm, lambda node: node.vm_capable),
2526 # Build mapping from filename to list of nodes which should have the file
2528 for (files, fn) in files2nodefn:
2530 filenodes = nodeinfo
2532 filenodes = filter(fn, nodeinfo)
2533 nodefiles.update((filename,
2534 frozenset(map(operator.attrgetter("name"), filenodes)))
2535 for filename in files)
2537 assert set(nodefiles) == (files_all | files_mc | files_vm)
2539 fileinfo = dict((filename, {}) for filename in nodefiles)
2540 ignore_nodes = set()
2542 for node in nodeinfo:
2544 ignore_nodes.add(node.name)
2547 nresult = all_nvinfo[node.name]
2549 if nresult.fail_msg or not nresult.payload:
2552 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2554 test = not (node_files and isinstance(node_files, dict))
2555 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2556 "Node did not return file checksum data")
2558 ignore_nodes.add(node.name)
2561 # Build per-checksum mapping from filename to nodes having it
2562 for (filename, checksum) in node_files.items():
2563 assert filename in nodefiles
2564 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2566 for (filename, checksums) in fileinfo.items():
2567 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2569 # Nodes having the file
2570 with_file = frozenset(node_name
2571 for nodes in fileinfo[filename].values()
2572 for node_name in nodes) - ignore_nodes
2574 expected_nodes = nodefiles[filename] - ignore_nodes
2576 # Nodes missing file
2577 missing_file = expected_nodes - with_file
2579 if filename in files_opt:
2581 errorif(missing_file and missing_file != expected_nodes,
2582 constants.CV_ECLUSTERFILECHECK, None,
2583 "File %s is optional, but it must exist on all or no"
2584 " nodes (not found on %s)",
2585 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2587 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2588 "File %s is missing from node(s) %s", filename,
2589 utils.CommaJoin(utils.NiceSort(missing_file)))
2591 # Warn if a node has a file it shouldn't
2592 unexpected = with_file - expected_nodes
2594 constants.CV_ECLUSTERFILECHECK, None,
2595 "File %s should not exist on node(s) %s",
2596 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2598 # See if there are multiple versions of the file
2599 test = len(checksums) > 1
2601 variants = ["variant %s on %s" %
2602 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2603 for (idx, (checksum, nodes)) in
2604 enumerate(sorted(checksums.items()))]
2608 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2609 "File %s found with %s different checksums (%s)",
2610 filename, len(checksums), "; ".join(variants))
2612 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2614 """Verifies and the node DRBD status.
2616 @type ninfo: L{objects.Node}
2617 @param ninfo: the node to check
2618 @param nresult: the remote results for the node
2619 @param instanceinfo: the dict of instances
2620 @param drbd_helper: the configured DRBD usermode helper
2621 @param drbd_map: the DRBD map as returned by
2622 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2626 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2629 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2630 test = (helper_result is None)
2631 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2632 "no drbd usermode helper returned")
2634 status, payload = helper_result
2636 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2637 "drbd usermode helper check unsuccessful: %s", payload)
2638 test = status and (payload != drbd_helper)
2639 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2640 "wrong drbd usermode helper: %s", payload)
2642 # compute the DRBD minors
2644 for minor, instance in drbd_map[node].items():
2645 test = instance not in instanceinfo
2646 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2647 "ghost instance '%s' in temporary DRBD map", instance)
2648 # ghost instance should not be running, but otherwise we
2649 # don't give double warnings (both ghost instance and
2650 # unallocated minor in use)
2652 node_drbd[minor] = (instance, False)
2654 instance = instanceinfo[instance]
2655 node_drbd[minor] = (instance.name,
2656 instance.admin_state == constants.ADMINST_UP)
2658 # and now check them
2659 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2660 test = not isinstance(used_minors, (tuple, list))
2661 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2662 "cannot parse drbd status file: %s", str(used_minors))
2664 # we cannot check drbd status
2667 for minor, (iname, must_exist) in node_drbd.items():
2668 test = minor not in used_minors and must_exist
2669 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2670 "drbd minor %d of instance %s is not active", minor, iname)
2671 for minor in used_minors:
2672 test = minor not in node_drbd
2673 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2674 "unallocated drbd minor %d is in use", minor)
2676 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2677 """Builds the node OS structures.
2679 @type ninfo: L{objects.Node}
2680 @param ninfo: the node to check
2681 @param nresult: the remote results for the node
2682 @param nimg: the node image object
2686 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2688 remote_os = nresult.get(constants.NV_OSLIST, None)
2689 test = (not isinstance(remote_os, list) or
2690 not compat.all(isinstance(v, list) and len(v) == 7
2691 for v in remote_os))
2693 _ErrorIf(test, constants.CV_ENODEOS, node,
2694 "node hasn't returned valid OS data")
2703 for (name, os_path, status, diagnose,
2704 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2706 if name not in os_dict:
2709 # parameters is a list of lists instead of list of tuples due to
2710 # JSON lacking a real tuple type, fix it:
2711 parameters = [tuple(v) for v in parameters]
2712 os_dict[name].append((os_path, status, diagnose,
2713 set(variants), set(parameters), set(api_ver)))
2715 nimg.oslist = os_dict
2717 def _VerifyNodeOS(self, ninfo, nimg, base):
2718 """Verifies the node OS list.
2720 @type ninfo: L{objects.Node}
2721 @param ninfo: the node to check
2722 @param nimg: the node image object
2723 @param base: the 'template' node we match against (e.g. from the master)
2727 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2729 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2731 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2732 for os_name, os_data in nimg.oslist.items():
2733 assert os_data, "Empty OS status for OS %s?!" % os_name
2734 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2735 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2736 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2737 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2738 "OS '%s' has multiple entries (first one shadows the rest): %s",
2739 os_name, utils.CommaJoin([v[0] for v in os_data]))
2740 # comparisons with the 'base' image
2741 test = os_name not in base.oslist
2742 _ErrorIf(test, constants.CV_ENODEOS, node,
2743 "Extra OS %s not present on reference node (%s)",
2747 assert base.oslist[os_name], "Base node has empty OS status?"
2748 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2750 # base OS is invalid, skipping
2752 for kind, a, b in [("API version", f_api, b_api),
2753 ("variants list", f_var, b_var),
2754 ("parameters", beautify_params(f_param),
2755 beautify_params(b_param))]:
2756 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2757 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2758 kind, os_name, base.name,
2759 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2761 # check any missing OSes
2762 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2763 _ErrorIf(missing, constants.CV_ENODEOS, node,
2764 "OSes present on reference node %s but missing on this node: %s",
2765 base.name, utils.CommaJoin(missing))
2767 def _VerifyOob(self, ninfo, nresult):
2768 """Verifies out of band functionality of a node.
2770 @type ninfo: L{objects.Node}
2771 @param ninfo: the node to check
2772 @param nresult: the remote results for the node
2776 # We just have to verify the paths on master and/or master candidates
2777 # as the oob helper is invoked on the master
2778 if ((ninfo.master_candidate or ninfo.master_capable) and
2779 constants.NV_OOB_PATHS in nresult):
2780 for path_result in nresult[constants.NV_OOB_PATHS]:
2781 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2783 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2784 """Verifies and updates the node volume data.
2786 This function will update a L{NodeImage}'s internal structures
2787 with data from the remote call.
2789 @type ninfo: L{objects.Node}
2790 @param ninfo: the node to check
2791 @param nresult: the remote results for the node
2792 @param nimg: the node image object
2793 @param vg_name: the configured VG name
2797 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2799 nimg.lvm_fail = True
2800 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2803 elif isinstance(lvdata, basestring):
2804 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2805 utils.SafeEncode(lvdata))
2806 elif not isinstance(lvdata, dict):
2807 _ErrorIf(True, constants.CV_ENODELVM, node,
2808 "rpc call to node failed (lvlist)")
2810 nimg.volumes = lvdata
2811 nimg.lvm_fail = False
2813 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2814 """Verifies and updates the node instance list.
2816 If the listing was successful, then updates this node's instance
2817 list. Otherwise, it marks the RPC call as failed for the instance
2820 @type ninfo: L{objects.Node}
2821 @param ninfo: the node to check
2822 @param nresult: the remote results for the node
2823 @param nimg: the node image object
2826 idata = nresult.get(constants.NV_INSTANCELIST, None)
2827 test = not isinstance(idata, list)
2828 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2829 "rpc call to node failed (instancelist): %s",
2830 utils.SafeEncode(str(idata)))
2832 nimg.hyp_fail = True
2834 nimg.instances = idata
2836 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2837 """Verifies and computes a node information map
2839 @type ninfo: L{objects.Node}
2840 @param ninfo: the node to check
2841 @param nresult: the remote results for the node
2842 @param nimg: the node image object
2843 @param vg_name: the configured VG name
2847 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2849 # try to read free memory (from the hypervisor)
2850 hv_info = nresult.get(constants.NV_HVINFO, None)
2851 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2852 _ErrorIf(test, constants.CV_ENODEHV, node,
2853 "rpc call to node failed (hvinfo)")
2856 nimg.mfree = int(hv_info["memory_free"])
2857 except (ValueError, TypeError):
2858 _ErrorIf(True, constants.CV_ENODERPC, node,
2859 "node returned invalid nodeinfo, check hypervisor")
2861 # FIXME: devise a free space model for file based instances as well
2862 if vg_name is not None:
2863 test = (constants.NV_VGLIST not in nresult or
2864 vg_name not in nresult[constants.NV_VGLIST])
2865 _ErrorIf(test, constants.CV_ENODELVM, node,
2866 "node didn't return data for the volume group '%s'"
2867 " - it is either missing or broken", vg_name)
2870 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2871 except (ValueError, TypeError):
2872 _ErrorIf(True, constants.CV_ENODERPC, node,
2873 "node returned invalid LVM info, check LVM status")
2875 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2876 """Gets per-disk status information for all instances.
2878 @type nodelist: list of strings
2879 @param nodelist: Node names
2880 @type node_image: dict of (name, L{objects.Node})
2881 @param node_image: Node objects
2882 @type instanceinfo: dict of (name, L{objects.Instance})
2883 @param instanceinfo: Instance objects
2884 @rtype: {instance: {node: [(succes, payload)]}}
2885 @return: a dictionary of per-instance dictionaries with nodes as
2886 keys and disk information as values; the disk information is a
2887 list of tuples (success, payload)
2890 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2893 node_disks_devonly = {}
2894 diskless_instances = set()
2895 diskless = constants.DT_DISKLESS
2897 for nname in nodelist:
2898 node_instances = list(itertools.chain(node_image[nname].pinst,
2899 node_image[nname].sinst))
2900 diskless_instances.update(inst for inst in node_instances
2901 if instanceinfo[inst].disk_template == diskless)
2902 disks = [(inst, disk)
2903 for inst in node_instances
2904 for disk in instanceinfo[inst].disks]
2907 # No need to collect data
2910 node_disks[nname] = disks
2912 # _AnnotateDiskParams makes already copies of the disks
2914 for (inst, dev) in disks:
2915 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2916 self.cfg.SetDiskID(anno_disk, nname)
2917 devonly.append(anno_disk)
2919 node_disks_devonly[nname] = devonly
2921 assert len(node_disks) == len(node_disks_devonly)
2923 # Collect data from all nodes with disks
2924 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2927 assert len(result) == len(node_disks)
2931 for (nname, nres) in result.items():
2932 disks = node_disks[nname]
2935 # No data from this node
2936 data = len(disks) * [(False, "node offline")]
2939 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2940 "while getting disk information: %s", msg)
2942 # No data from this node
2943 data = len(disks) * [(False, msg)]
2946 for idx, i in enumerate(nres.payload):
2947 if isinstance(i, (tuple, list)) and len(i) == 2:
2950 logging.warning("Invalid result from node %s, entry %d: %s",
2952 data.append((False, "Invalid result from the remote node"))
2954 for ((inst, _), status) in zip(disks, data):
2955 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2957 # Add empty entries for diskless instances.
2958 for inst in diskless_instances:
2959 assert inst not in instdisk
2962 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2963 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2964 compat.all(isinstance(s, (tuple, list)) and
2965 len(s) == 2 for s in statuses)
2966 for inst, nnames in instdisk.items()
2967 for nname, statuses in nnames.items())
2968 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2973 def _SshNodeSelector(group_uuid, all_nodes):
2974 """Create endless iterators for all potential SSH check hosts.
2977 nodes = [node for node in all_nodes
2978 if (node.group != group_uuid and
2980 keyfunc = operator.attrgetter("group")
2982 return map(itertools.cycle,
2983 [sorted(map(operator.attrgetter("name"), names))
2984 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2988 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2989 """Choose which nodes should talk to which other nodes.
2991 We will make nodes contact all nodes in their group, and one node from
2994 @warning: This algorithm has a known issue if one node group is much
2995 smaller than others (e.g. just one node). In such a case all other
2996 nodes will talk to the single node.
2999 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3000 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3002 return (online_nodes,
3003 dict((name, sorted([i.next() for i in sel]))
3004 for name in online_nodes))
3006 def BuildHooksEnv(self):
3009 Cluster-Verify hooks just ran in the post phase and their failure makes
3010 the output be logged in the verify output and the verification to fail.
3014 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3017 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3018 for node in self.my_node_info.values())
3022 def BuildHooksNodes(self):
3023 """Build hooks nodes.
3026 return ([], self.my_node_names)
3028 def Exec(self, feedback_fn):
3029 """Verify integrity of the node group, performing various test on nodes.
3032 # This method has too many local variables. pylint: disable=R0914
3033 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3035 if not self.my_node_names:
3037 feedback_fn("* Empty node group, skipping verification")
3041 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3042 verbose = self.op.verbose
3043 self._feedback_fn = feedback_fn
3045 vg_name = self.cfg.GetVGName()
3046 drbd_helper = self.cfg.GetDRBDHelper()
3047 cluster = self.cfg.GetClusterInfo()
3048 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3049 hypervisors = cluster.enabled_hypervisors
3050 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3052 i_non_redundant = [] # Non redundant instances
3053 i_non_a_balanced = [] # Non auto-balanced instances
3054 i_offline = 0 # Count of offline instances
3055 n_offline = 0 # Count of offline nodes
3056 n_drained = 0 # Count of nodes being drained
3057 node_vol_should = {}
3059 # FIXME: verify OS list
3062 filemap = _ComputeAncillaryFiles(cluster, False)
3064 # do local checksums
3065 master_node = self.master_node = self.cfg.GetMasterNode()
3066 master_ip = self.cfg.GetMasterIP()
3068 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3071 if self.cfg.GetUseExternalMipScript():
3072 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3074 node_verify_param = {
3075 constants.NV_FILELIST:
3076 utils.UniqueSequence(filename
3077 for files in filemap
3078 for filename in files),
3079 constants.NV_NODELIST:
3080 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3081 self.all_node_info.values()),
3082 constants.NV_HYPERVISOR: hypervisors,
3083 constants.NV_HVPARAMS:
3084 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3085 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3086 for node in node_data_list
3087 if not node.offline],
3088 constants.NV_INSTANCELIST: hypervisors,
3089 constants.NV_VERSION: None,
3090 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3091 constants.NV_NODESETUP: None,
3092 constants.NV_TIME: None,
3093 constants.NV_MASTERIP: (master_node, master_ip),
3094 constants.NV_OSLIST: None,
3095 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3096 constants.NV_USERSCRIPTS: user_scripts,
3099 if vg_name is not None:
3100 node_verify_param[constants.NV_VGLIST] = None
3101 node_verify_param[constants.NV_LVLIST] = vg_name
3102 node_verify_param[constants.NV_PVLIST] = [vg_name]
3103 node_verify_param[constants.NV_DRBDLIST] = None
3106 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3109 # FIXME: this needs to be changed per node-group, not cluster-wide
3111 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3112 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3113 bridges.add(default_nicpp[constants.NIC_LINK])
3114 for instance in self.my_inst_info.values():
3115 for nic in instance.nics:
3116 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3117 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3118 bridges.add(full_nic[constants.NIC_LINK])
3121 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3123 # Build our expected cluster state
3124 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3126 vm_capable=node.vm_capable))
3127 for node in node_data_list)
3131 for node in self.all_node_info.values():
3132 path = _SupportsOob(self.cfg, node)
3133 if path and path not in oob_paths:
3134 oob_paths.append(path)
3137 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3139 for instance in self.my_inst_names:
3140 inst_config = self.my_inst_info[instance]
3141 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3144 for nname in inst_config.all_nodes:
3145 if nname not in node_image:
3146 gnode = self.NodeImage(name=nname)
3147 gnode.ghost = (nname not in self.all_node_info)
3148 node_image[nname] = gnode
3150 inst_config.MapLVsByNode(node_vol_should)
3152 pnode = inst_config.primary_node
3153 node_image[pnode].pinst.append(instance)
3155 for snode in inst_config.secondary_nodes:
3156 nimg = node_image[snode]
3157 nimg.sinst.append(instance)
3158 if pnode not in nimg.sbp:
3159 nimg.sbp[pnode] = []
3160 nimg.sbp[pnode].append(instance)
3162 # At this point, we have the in-memory data structures complete,
3163 # except for the runtime information, which we'll gather next
3165 # Due to the way our RPC system works, exact response times cannot be
3166 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3167 # time before and after executing the request, we can at least have a time
3169 nvinfo_starttime = time.time()
3170 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3172 self.cfg.GetClusterName())
3173 nvinfo_endtime = time.time()
3175 if self.extra_lv_nodes and vg_name is not None:
3177 self.rpc.call_node_verify(self.extra_lv_nodes,
3178 {constants.NV_LVLIST: vg_name},
3179 self.cfg.GetClusterName())
3181 extra_lv_nvinfo = {}
3183 all_drbd_map = self.cfg.ComputeDRBDMap()
3185 feedback_fn("* Gathering disk information (%s nodes)" %
3186 len(self.my_node_names))
3187 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3190 feedback_fn("* Verifying configuration file consistency")
3192 # If not all nodes are being checked, we need to make sure the master node
3193 # and a non-checked vm_capable node are in the list.
3194 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3196 vf_nvinfo = all_nvinfo.copy()
3197 vf_node_info = list(self.my_node_info.values())
3198 additional_nodes = []
3199 if master_node not in self.my_node_info:
3200 additional_nodes.append(master_node)
3201 vf_node_info.append(self.all_node_info[master_node])
3202 # Add the first vm_capable node we find which is not included,
3203 # excluding the master node (which we already have)
3204 for node in absent_nodes:
3205 nodeinfo = self.all_node_info[node]
3206 if (nodeinfo.vm_capable and not nodeinfo.offline and
3207 node != master_node):
3208 additional_nodes.append(node)
3209 vf_node_info.append(self.all_node_info[node])
3211 key = constants.NV_FILELIST
3212 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3213 {key: node_verify_param[key]},
3214 self.cfg.GetClusterName()))
3216 vf_nvinfo = all_nvinfo
3217 vf_node_info = self.my_node_info.values()
3219 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3221 feedback_fn("* Verifying node status")
3225 for node_i in node_data_list:
3227 nimg = node_image[node]
3231 feedback_fn("* Skipping offline node %s" % (node,))
3235 if node == master_node:
3237 elif node_i.master_candidate:
3238 ntype = "master candidate"
3239 elif node_i.drained:
3245 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3247 msg = all_nvinfo[node].fail_msg
3248 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3251 nimg.rpc_fail = True
3254 nresult = all_nvinfo[node].payload
3256 nimg.call_ok = self._VerifyNode(node_i, nresult)
3257 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3258 self._VerifyNodeNetwork(node_i, nresult)
3259 self._VerifyNodeUserScripts(node_i, nresult)
3260 self._VerifyOob(node_i, nresult)
3263 self._VerifyNodeLVM(node_i, nresult, vg_name)
3264 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3267 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3268 self._UpdateNodeInstances(node_i, nresult, nimg)
3269 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3270 self._UpdateNodeOS(node_i, nresult, nimg)
3272 if not nimg.os_fail:
3273 if refos_img is None:
3275 self._VerifyNodeOS(node_i, nimg, refos_img)
3276 self._VerifyNodeBridges(node_i, nresult, bridges)
3278 # Check whether all running instancies are primary for the node. (This
3279 # can no longer be done from _VerifyInstance below, since some of the
3280 # wrong instances could be from other node groups.)
3281 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3283 for inst in non_primary_inst:
3284 test = inst in self.all_inst_info
3285 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3286 "instance should not run on node %s", node_i.name)
3287 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3288 "node is running unknown instance %s", inst)
3290 for node, result in extra_lv_nvinfo.items():
3291 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3292 node_image[node], vg_name)
3294 feedback_fn("* Verifying instance status")
3295 for instance in self.my_inst_names:
3297 feedback_fn("* Verifying instance %s" % instance)
3298 inst_config = self.my_inst_info[instance]
3299 self._VerifyInstance(instance, inst_config, node_image,
3301 inst_nodes_offline = []
3303 pnode = inst_config.primary_node
3304 pnode_img = node_image[pnode]
3305 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3306 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3307 " primary node failed", instance)
3309 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3311 constants.CV_EINSTANCEBADNODE, instance,
3312 "instance is marked as running and lives on offline node %s",
3313 inst_config.primary_node)
3315 # If the instance is non-redundant we cannot survive losing its primary
3316 # node, so we are not N+1 compliant. On the other hand we have no disk
3317 # templates with more than one secondary so that situation is not well
3319 # FIXME: does not support file-backed instances
3320 if not inst_config.secondary_nodes:
3321 i_non_redundant.append(instance)
3323 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3324 constants.CV_EINSTANCELAYOUT,
3325 instance, "instance has multiple secondary nodes: %s",
3326 utils.CommaJoin(inst_config.secondary_nodes),
3327 code=self.ETYPE_WARNING)
3329 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3330 pnode = inst_config.primary_node
3331 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3332 instance_groups = {}
3334 for node in instance_nodes:
3335 instance_groups.setdefault(self.all_node_info[node].group,
3339 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3340 # Sort so that we always list the primary node first.
3341 for group, nodes in sorted(instance_groups.items(),
3342 key=lambda (_, nodes): pnode in nodes,
3345 self._ErrorIf(len(instance_groups) > 1,
3346 constants.CV_EINSTANCESPLITGROUPS,
3347 instance, "instance has primary and secondary nodes in"
3348 " different groups: %s", utils.CommaJoin(pretty_list),
3349 code=self.ETYPE_WARNING)
3351 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3352 i_non_a_balanced.append(instance)
3354 for snode in inst_config.secondary_nodes:
3355 s_img = node_image[snode]
3356 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3357 snode, "instance %s, connection to secondary node failed",
3361 inst_nodes_offline.append(snode)
3363 # warn that the instance lives on offline nodes
3364 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3365 "instance has offline secondary node(s) %s",
3366 utils.CommaJoin(inst_nodes_offline))
3367 # ... or ghost/non-vm_capable nodes
3368 for node in inst_config.all_nodes:
3369 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3370 instance, "instance lives on ghost node %s", node)
3371 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3372 instance, "instance lives on non-vm_capable node %s", node)
3374 feedback_fn("* Verifying orphan volumes")
3375 reserved = utils.FieldSet(*cluster.reserved_lvs)
3377 # We will get spurious "unknown volume" warnings if any node of this group
3378 # is secondary for an instance whose primary is in another group. To avoid
3379 # them, we find these instances and add their volumes to node_vol_should.
3380 for inst in self.all_inst_info.values():
3381 for secondary in inst.secondary_nodes:
3382 if (secondary in self.my_node_info
3383 and inst.name not in self.my_inst_info):
3384 inst.MapLVsByNode(node_vol_should)
3387 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3389 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3390 feedback_fn("* Verifying N+1 Memory redundancy")
3391 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3393 feedback_fn("* Other Notes")
3395 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3396 % len(i_non_redundant))
3398 if i_non_a_balanced:
3399 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3400 % len(i_non_a_balanced))
3403 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3406 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3409 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3413 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3414 """Analyze the post-hooks' result
3416 This method analyses the hook result, handles it, and sends some
3417 nicely-formatted feedback back to the user.
3419 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3420 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3421 @param hooks_results: the results of the multi-node hooks rpc call
3422 @param feedback_fn: function used send feedback back to the caller
3423 @param lu_result: previous Exec result
3424 @return: the new Exec result, based on the previous result
3428 # We only really run POST phase hooks, only for non-empty groups,
3429 # and are only interested in their results
3430 if not self.my_node_names:
3433 elif phase == constants.HOOKS_PHASE_POST:
3434 # Used to change hooks' output to proper indentation
3435 feedback_fn("* Hooks Results")
3436 assert hooks_results, "invalid result from hooks"
3438 for node_name in hooks_results:
3439 res = hooks_results[node_name]
3441 test = msg and not res.offline
3442 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3443 "Communication failure in hooks execution: %s", msg)
3444 if res.offline or msg:
3445 # No need to investigate payload if node is offline or gave
3448 for script, hkr, output in res.payload:
3449 test = hkr == constants.HKR_FAIL
3450 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3451 "Script %s failed, output:", script)
3453 output = self._HOOKS_INDENT_RE.sub(" ", output)
3454 feedback_fn("%s" % output)
3460 class LUClusterVerifyDisks(NoHooksLU):
3461 """Verifies the cluster disks status.
3466 def ExpandNames(self):
3467 self.share_locks = _ShareAll()
3468 self.needed_locks = {
3469 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3472 def Exec(self, feedback_fn):
3473 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3475 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3476 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3477 for group in group_names])
3480 class LUGroupVerifyDisks(NoHooksLU):
3481 """Verifies the status of all disks in a node group.
3486 def ExpandNames(self):
3487 # Raises errors.OpPrereqError on its own if group can't be found
3488 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3490 self.share_locks = _ShareAll()
3491 self.needed_locks = {
3492 locking.LEVEL_INSTANCE: [],
3493 locking.LEVEL_NODEGROUP: [],
3494 locking.LEVEL_NODE: [],
3497 def DeclareLocks(self, level):
3498 if level == locking.LEVEL_INSTANCE:
3499 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3501 # Lock instances optimistically, needs verification once node and group
3502 # locks have been acquired
3503 self.needed_locks[locking.LEVEL_INSTANCE] = \
3504 self.cfg.GetNodeGroupInstances(self.group_uuid)
3506 elif level == locking.LEVEL_NODEGROUP:
3507 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3509 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3510 set([self.group_uuid] +
3511 # Lock all groups used by instances optimistically; this requires
3512 # going via the node before it's locked, requiring verification
3515 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3516 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3518 elif level == locking.LEVEL_NODE:
3519 # This will only lock the nodes in the group to be verified which contain
3521 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3522 self._LockInstancesNodes()
3524 # Lock all nodes in group to be verified
3525 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3526 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3527 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3529 def CheckPrereq(self):
3530 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3531 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3532 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3534 assert self.group_uuid in owned_groups
3536 # Check if locked instances are still correct
3537 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3539 # Get instance information
3540 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3542 # Check if node groups for locked instances are still correct
3543 _CheckInstancesNodeGroups(self.cfg, self.instances,
3544 owned_groups, owned_nodes, self.group_uuid)
3546 def Exec(self, feedback_fn):
3547 """Verify integrity of cluster disks.
3549 @rtype: tuple of three items
3550 @return: a tuple of (dict of node-to-node_error, list of instances
3551 which need activate-disks, dict of instance: (node, volume) for
3556 res_instances = set()
3559 nv_dict = _MapInstanceDisksToNodes(
3560 [inst for inst in self.instances.values()
3561 if inst.admin_state == constants.ADMINST_UP])
3564 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3565 set(self.cfg.GetVmCapableNodeList()))
3567 node_lvs = self.rpc.call_lv_list(nodes, [])
3569 for (node, node_res) in node_lvs.items():
3570 if node_res.offline:
3573 msg = node_res.fail_msg
3575 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3576 res_nodes[node] = msg
3579 for lv_name, (_, _, lv_online) in node_res.payload.items():
3580 inst = nv_dict.pop((node, lv_name), None)
3581 if not (lv_online or inst is None):
3582 res_instances.add(inst)
3584 # any leftover items in nv_dict are missing LVs, let's arrange the data
3586 for key, inst in nv_dict.iteritems():
3587 res_missing.setdefault(inst, []).append(list(key))
3589 return (res_nodes, list(res_instances), res_missing)
3592 class LUClusterRepairDiskSizes(NoHooksLU):
3593 """Verifies the cluster disks sizes.
3598 def ExpandNames(self):
3599 if self.op.instances:
3600 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3601 self.needed_locks = {
3602 locking.LEVEL_NODE_RES: [],
3603 locking.LEVEL_INSTANCE: self.wanted_names,
3605 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3607 self.wanted_names = None
3608 self.needed_locks = {
3609 locking.LEVEL_NODE_RES: locking.ALL_SET,
3610 locking.LEVEL_INSTANCE: locking.ALL_SET,
3612 self.share_locks = {
3613 locking.LEVEL_NODE_RES: 1,
3614 locking.LEVEL_INSTANCE: 0,
3617 def DeclareLocks(self, level):
3618 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3619 self._LockInstancesNodes(primary_only=True, level=level)
3621 def CheckPrereq(self):
3622 """Check prerequisites.
3624 This only checks the optional instance list against the existing names.
3627 if self.wanted_names is None:
3628 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3630 self.wanted_instances = \
3631 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3633 def _EnsureChildSizes(self, disk):
3634 """Ensure children of the disk have the needed disk size.
3636 This is valid mainly for DRBD8 and fixes an issue where the
3637 children have smaller disk size.
3639 @param disk: an L{ganeti.objects.Disk} object
3642 if disk.dev_type == constants.LD_DRBD8:
3643 assert disk.children, "Empty children for DRBD8?"
3644 fchild = disk.children[0]
3645 mismatch = fchild.size < disk.size
3647 self.LogInfo("Child disk has size %d, parent %d, fixing",
3648 fchild.size, disk.size)
3649 fchild.size = disk.size
3651 # and we recurse on this child only, not on the metadev
3652 return self._EnsureChildSizes(fchild) or mismatch
3656 def Exec(self, feedback_fn):
3657 """Verify the size of cluster disks.
3660 # TODO: check child disks too
3661 # TODO: check differences in size between primary/secondary nodes
3663 for instance in self.wanted_instances:
3664 pnode = instance.primary_node
3665 if pnode not in per_node_disks:
3666 per_node_disks[pnode] = []
3667 for idx, disk in enumerate(instance.disks):
3668 per_node_disks[pnode].append((instance, idx, disk))
3670 assert not (frozenset(per_node_disks.keys()) -
3671 self.owned_locks(locking.LEVEL_NODE_RES)), \
3672 "Not owning correct locks"
3673 assert not self.owned_locks(locking.LEVEL_NODE)
3676 for node, dskl in per_node_disks.items():
3677 newl = [v[2].Copy() for v in dskl]
3679 self.cfg.SetDiskID(dsk, node)
3680 result = self.rpc.call_blockdev_getsize(node, newl)
3682 self.LogWarning("Failure in blockdev_getsize call to node"
3683 " %s, ignoring", node)
3685 if len(result.payload) != len(dskl):
3686 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3687 " result.payload=%s", node, len(dskl), result.payload)
3688 self.LogWarning("Invalid result from node %s, ignoring node results",
3691 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3693 self.LogWarning("Disk %d of instance %s did not return size"
3694 " information, ignoring", idx, instance.name)
3696 if not isinstance(size, (int, long)):
3697 self.LogWarning("Disk %d of instance %s did not return valid"
3698 " size information, ignoring", idx, instance.name)
3701 if size != disk.size:
3702 self.LogInfo("Disk %d of instance %s has mismatched size,"
3703 " correcting: recorded %d, actual %d", idx,
3704 instance.name, disk.size, size)
3706 self.cfg.Update(instance, feedback_fn)
3707 changed.append((instance.name, idx, size))
3708 if self._EnsureChildSizes(disk):
3709 self.cfg.Update(instance, feedback_fn)
3710 changed.append((instance.name, idx, disk.size))
3714 class LUClusterRename(LogicalUnit):
3715 """Rename the cluster.
3718 HPATH = "cluster-rename"
3719 HTYPE = constants.HTYPE_CLUSTER
3721 def BuildHooksEnv(self):
3726 "OP_TARGET": self.cfg.GetClusterName(),
3727 "NEW_NAME": self.op.name,
3730 def BuildHooksNodes(self):
3731 """Build hooks nodes.
3734 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3736 def CheckPrereq(self):
3737 """Verify that the passed name is a valid one.
3740 hostname = netutils.GetHostname(name=self.op.name,
3741 family=self.cfg.GetPrimaryIPFamily())
3743 new_name = hostname.name
3744 self.ip = new_ip = hostname.ip
3745 old_name = self.cfg.GetClusterName()
3746 old_ip = self.cfg.GetMasterIP()
3747 if new_name == old_name and new_ip == old_ip:
3748 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3749 " cluster has changed",
3751 if new_ip != old_ip:
3752 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3753 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3754 " reachable on the network" %
3755 new_ip, errors.ECODE_NOTUNIQUE)
3757 self.op.name = new_name
3759 def Exec(self, feedback_fn):
3760 """Rename the cluster.
3763 clustername = self.op.name
3766 # shutdown the master IP
3767 master_params = self.cfg.GetMasterNetworkParameters()
3768 ems = self.cfg.GetUseExternalMipScript()
3769 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3771 result.Raise("Could not disable the master role")
3774 cluster = self.cfg.GetClusterInfo()
3775 cluster.cluster_name = clustername
3776 cluster.master_ip = new_ip
3777 self.cfg.Update(cluster, feedback_fn)
3779 # update the known hosts file
3780 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3781 node_list = self.cfg.GetOnlineNodeList()
3783 node_list.remove(master_params.name)
3786 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3788 master_params.ip = new_ip
3789 result = self.rpc.call_node_activate_master_ip(master_params.name,
3791 msg = result.fail_msg
3793 self.LogWarning("Could not re-enable the master role on"
3794 " the master, please restart manually: %s", msg)
3799 def _ValidateNetmask(cfg, netmask):
3800 """Checks if a netmask is valid.
3802 @type cfg: L{config.ConfigWriter}
3803 @param cfg: The cluster configuration
3805 @param netmask: the netmask to be verified
3806 @raise errors.OpPrereqError: if the validation fails
3809 ip_family = cfg.GetPrimaryIPFamily()
3811 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3812 except errors.ProgrammerError:
3813 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3814 ip_family, errors.ECODE_INVAL)
3815 if not ipcls.ValidateNetmask(netmask):
3816 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3817 (netmask), errors.ECODE_INVAL)
3820 class LUClusterSetParams(LogicalUnit):
3821 """Change the parameters of the cluster.
3824 HPATH = "cluster-modify"
3825 HTYPE = constants.HTYPE_CLUSTER
3828 def CheckArguments(self):
3832 if self.op.uid_pool:
3833 uidpool.CheckUidPool(self.op.uid_pool)
3835 if self.op.add_uids:
3836 uidpool.CheckUidPool(self.op.add_uids)
3838 if self.op.remove_uids:
3839 uidpool.CheckUidPool(self.op.remove_uids)
3841 if self.op.master_netmask is not None:
3842 _ValidateNetmask(self.cfg, self.op.master_netmask)
3844 if self.op.diskparams:
3845 for dt_params in self.op.diskparams.values():
3846 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3848 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3849 except errors.OpPrereqError, err:
3850 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3853 def ExpandNames(self):
3854 # FIXME: in the future maybe other cluster params won't require checking on
3855 # all nodes to be modified.
3856 self.needed_locks = {
3857 locking.LEVEL_NODE: locking.ALL_SET,
3858 locking.LEVEL_INSTANCE: locking.ALL_SET,
3859 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3861 self.share_locks = {
3862 locking.LEVEL_NODE: 1,
3863 locking.LEVEL_INSTANCE: 1,
3864 locking.LEVEL_NODEGROUP: 1,
3867 def BuildHooksEnv(self):
3872 "OP_TARGET": self.cfg.GetClusterName(),
3873 "NEW_VG_NAME": self.op.vg_name,
3876 def BuildHooksNodes(self):
3877 """Build hooks nodes.
3880 mn = self.cfg.GetMasterNode()
3883 def CheckPrereq(self):
3884 """Check prerequisites.
3886 This checks whether the given params don't conflict and
3887 if the given volume group is valid.
3890 if self.op.vg_name is not None and not self.op.vg_name:
3891 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3892 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3893 " instances exist", errors.ECODE_INVAL)
3895 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3896 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3897 raise errors.OpPrereqError("Cannot disable drbd helper while"
3898 " drbd-based instances exist",
3901 node_list = self.owned_locks(locking.LEVEL_NODE)
3903 # if vg_name not None, checks given volume group on all nodes
3905 vglist = self.rpc.call_vg_list(node_list)
3906 for node in node_list:
3907 msg = vglist[node].fail_msg
3909 # ignoring down node
3910 self.LogWarning("Error while gathering data on node %s"
3911 " (ignoring node): %s", node, msg)
3913 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3915 constants.MIN_VG_SIZE)
3917 raise errors.OpPrereqError("Error on node '%s': %s" %
3918 (node, vgstatus), errors.ECODE_ENVIRON)
3920 if self.op.drbd_helper:
3921 # checks given drbd helper on all nodes
3922 helpers = self.rpc.call_drbd_helper(node_list)
3923 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3925 self.LogInfo("Not checking drbd helper on offline node %s", node)
3927 msg = helpers[node].fail_msg
3929 raise errors.OpPrereqError("Error checking drbd helper on node"
3930 " '%s': %s" % (node, msg),
3931 errors.ECODE_ENVIRON)
3932 node_helper = helpers[node].payload
3933 if node_helper != self.op.drbd_helper:
3934 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3935 (node, node_helper), errors.ECODE_ENVIRON)
3937 self.cluster = cluster = self.cfg.GetClusterInfo()
3938 # validate params changes
3939 if self.op.beparams:
3940 objects.UpgradeBeParams(self.op.beparams)
3941 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3942 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3944 if self.op.ndparams:
3945 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3946 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3948 # TODO: we need a more general way to handle resetting
3949 # cluster-level parameters to default values
3950 if self.new_ndparams["oob_program"] == "":
3951 self.new_ndparams["oob_program"] = \
3952 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3954 if self.op.hv_state:
3955 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3956 self.cluster.hv_state_static)
3957 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3958 for hv, values in new_hv_state.items())
3960 if self.op.disk_state:
3961 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3962 self.cluster.disk_state_static)
3963 self.new_disk_state = \
3964 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3965 for name, values in svalues.items()))
3966 for storage, svalues in new_disk_state.items())
3969 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3972 all_instances = self.cfg.GetAllInstancesInfo().values()
3974 for group in self.cfg.GetAllNodeGroupsInfo().values():
3975 instances = frozenset([inst for inst in all_instances
3976 if compat.any(node in group.members
3977 for node in inst.all_nodes)])
3978 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3979 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3980 new = _ComputeNewInstanceViolations(ipol,
3981 new_ipolicy, instances)
3983 violations.update(new)
3986 self.LogWarning("After the ipolicy change the following instances"
3987 " violate them: %s",
3988 utils.CommaJoin(utils.NiceSort(violations)))
3990 if self.op.nicparams:
3991 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3992 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3993 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3996 # check all instances for consistency
3997 for instance in self.cfg.GetAllInstancesInfo().values():
3998 for nic_idx, nic in enumerate(instance.nics):
3999 params_copy = copy.deepcopy(nic.nicparams)
4000 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4002 # check parameter syntax
4004 objects.NIC.CheckParameterSyntax(params_filled)
4005 except errors.ConfigurationError, err:
4006 nic_errors.append("Instance %s, nic/%d: %s" %
4007 (instance.name, nic_idx, err))
4009 # if we're moving instances to routed, check that they have an ip
4010 target_mode = params_filled[constants.NIC_MODE]
4011 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4012 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4013 " address" % (instance.name, nic_idx))
4015 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4016 "\n".join(nic_errors), errors.ECODE_INVAL)
4018 # hypervisor list/parameters
4019 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4020 if self.op.hvparams:
4021 for hv_name, hv_dict in self.op.hvparams.items():
4022 if hv_name not in self.new_hvparams:
4023 self.new_hvparams[hv_name] = hv_dict
4025 self.new_hvparams[hv_name].update(hv_dict)
4027 # disk template parameters
4028 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4029 if self.op.diskparams:
4030 for dt_name, dt_params in self.op.diskparams.items():
4031 if dt_name not in self.op.diskparams:
4032 self.new_diskparams[dt_name] = dt_params
4034 self.new_diskparams[dt_name].update(dt_params)
4036 # os hypervisor parameters
4037 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4039 for os_name, hvs in self.op.os_hvp.items():
4040 if os_name not in self.new_os_hvp:
4041 self.new_os_hvp[os_name] = hvs
4043 for hv_name, hv_dict in hvs.items():
4044 if hv_name not in self.new_os_hvp[os_name]:
4045 self.new_os_hvp[os_name][hv_name] = hv_dict
4047 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4050 self.new_osp = objects.FillDict(cluster.osparams, {})
4051 if self.op.osparams:
4052 for os_name, osp in self.op.osparams.items():
4053 if os_name not in self.new_osp:
4054 self.new_osp[os_name] = {}
4056 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4059 if not self.new_osp[os_name]:
4060 # we removed all parameters
4061 del self.new_osp[os_name]
4063 # check the parameter validity (remote check)
4064 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4065 os_name, self.new_osp[os_name])
4067 # changes to the hypervisor list
4068 if self.op.enabled_hypervisors is not None:
4069 self.hv_list = self.op.enabled_hypervisors
4070 for hv in self.hv_list:
4071 # if the hypervisor doesn't already exist in the cluster
4072 # hvparams, we initialize it to empty, and then (in both
4073 # cases) we make sure to fill the defaults, as we might not
4074 # have a complete defaults list if the hypervisor wasn't
4076 if hv not in new_hvp:
4078 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4079 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4081 self.hv_list = cluster.enabled_hypervisors
4083 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4084 # either the enabled list has changed, or the parameters have, validate
4085 for hv_name, hv_params in self.new_hvparams.items():
4086 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4087 (self.op.enabled_hypervisors and
4088 hv_name in self.op.enabled_hypervisors)):
4089 # either this is a new hypervisor, or its parameters have changed
4090 hv_class = hypervisor.GetHypervisor(hv_name)
4091 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4092 hv_class.CheckParameterSyntax(hv_params)
4093 _CheckHVParams(self, node_list, hv_name, hv_params)
4096 # no need to check any newly-enabled hypervisors, since the
4097 # defaults have already been checked in the above code-block
4098 for os_name, os_hvp in self.new_os_hvp.items():
4099 for hv_name, hv_params in os_hvp.items():
4100 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4101 # we need to fill in the new os_hvp on top of the actual hv_p
4102 cluster_defaults = self.new_hvparams.get(hv_name, {})
4103 new_osp = objects.FillDict(cluster_defaults, hv_params)
4104 hv_class = hypervisor.GetHypervisor(hv_name)
4105 hv_class.CheckParameterSyntax(new_osp)
4106 _CheckHVParams(self, node_list, hv_name, new_osp)
4108 if self.op.default_iallocator:
4109 alloc_script = utils.FindFile(self.op.default_iallocator,
4110 constants.IALLOCATOR_SEARCH_PATH,
4112 if alloc_script is None:
4113 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4114 " specified" % self.op.default_iallocator,
4117 def Exec(self, feedback_fn):
4118 """Change the parameters of the cluster.
4121 if self.op.vg_name is not None:
4122 new_volume = self.op.vg_name
4125 if new_volume != self.cfg.GetVGName():
4126 self.cfg.SetVGName(new_volume)
4128 feedback_fn("Cluster LVM configuration already in desired"
4129 " state, not changing")
4130 if self.op.drbd_helper is not None:
4131 new_helper = self.op.drbd_helper
4134 if new_helper != self.cfg.GetDRBDHelper():
4135 self.cfg.SetDRBDHelper(new_helper)
4137 feedback_fn("Cluster DRBD helper already in desired state,"
4139 if self.op.hvparams:
4140 self.cluster.hvparams = self.new_hvparams
4142 self.cluster.os_hvp = self.new_os_hvp
4143 if self.op.enabled_hypervisors is not None:
4144 self.cluster.hvparams = self.new_hvparams
4145 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4146 if self.op.beparams:
4147 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4148 if self.op.nicparams:
4149 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4151 self.cluster.ipolicy = self.new_ipolicy
4152 if self.op.osparams:
4153 self.cluster.osparams = self.new_osp
4154 if self.op.ndparams:
4155 self.cluster.ndparams = self.new_ndparams
4156 if self.op.diskparams:
4157 self.cluster.diskparams = self.new_diskparams
4158 if self.op.hv_state:
4159 self.cluster.hv_state_static = self.new_hv_state
4160 if self.op.disk_state:
4161 self.cluster.disk_state_static = self.new_disk_state
4163 if self.op.candidate_pool_size is not None:
4164 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4165 # we need to update the pool size here, otherwise the save will fail
4166 _AdjustCandidatePool(self, [])
4168 if self.op.maintain_node_health is not None:
4169 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4170 feedback_fn("Note: CONFD was disabled at build time, node health"
4171 " maintenance is not useful (still enabling it)")
4172 self.cluster.maintain_node_health = self.op.maintain_node_health
4174 if self.op.prealloc_wipe_disks is not None:
4175 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4177 if self.op.add_uids is not None:
4178 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4180 if self.op.remove_uids is not None:
4181 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4183 if self.op.uid_pool is not None:
4184 self.cluster.uid_pool = self.op.uid_pool
4186 if self.op.default_iallocator is not None:
4187 self.cluster.default_iallocator = self.op.default_iallocator
4189 if self.op.reserved_lvs is not None:
4190 self.cluster.reserved_lvs = self.op.reserved_lvs
4192 if self.op.use_external_mip_script is not None:
4193 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4195 def helper_os(aname, mods, desc):
4197 lst = getattr(self.cluster, aname)
4198 for key, val in mods:
4199 if key == constants.DDM_ADD:
4201 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4204 elif key == constants.DDM_REMOVE:
4208 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4210 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4212 if self.op.hidden_os:
4213 helper_os("hidden_os", self.op.hidden_os, "hidden")
4215 if self.op.blacklisted_os:
4216 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4218 if self.op.master_netdev:
4219 master_params = self.cfg.GetMasterNetworkParameters()
4220 ems = self.cfg.GetUseExternalMipScript()
4221 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4222 self.cluster.master_netdev)
4223 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4225 result.Raise("Could not disable the master ip")
4226 feedback_fn("Changing master_netdev from %s to %s" %
4227 (master_params.netdev, self.op.master_netdev))
4228 self.cluster.master_netdev = self.op.master_netdev
4230 if self.op.master_netmask:
4231 master_params = self.cfg.GetMasterNetworkParameters()
4232 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4233 result = self.rpc.call_node_change_master_netmask(master_params.name,
4234 master_params.netmask,
4235 self.op.master_netmask,
4237 master_params.netdev)
4239 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4242 self.cluster.master_netmask = self.op.master_netmask
4244 self.cfg.Update(self.cluster, feedback_fn)
4246 if self.op.master_netdev:
4247 master_params = self.cfg.GetMasterNetworkParameters()
4248 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4249 self.op.master_netdev)
4250 ems = self.cfg.GetUseExternalMipScript()
4251 result = self.rpc.call_node_activate_master_ip(master_params.name,
4254 self.LogWarning("Could not re-enable the master ip on"
4255 " the master, please restart manually: %s",
4259 def _UploadHelper(lu, nodes, fname):
4260 """Helper for uploading a file and showing warnings.
4263 if os.path.exists(fname):
4264 result = lu.rpc.call_upload_file(nodes, fname)
4265 for to_node, to_result in result.items():
4266 msg = to_result.fail_msg
4268 msg = ("Copy of file %s to node %s failed: %s" %
4269 (fname, to_node, msg))
4270 lu.proc.LogWarning(msg)
4273 def _ComputeAncillaryFiles(cluster, redist):
4274 """Compute files external to Ganeti which need to be consistent.
4276 @type redist: boolean
4277 @param redist: Whether to include files which need to be redistributed
4280 # Compute files for all nodes
4282 pathutils.SSH_KNOWN_HOSTS_FILE,
4283 pathutils.CONFD_HMAC_KEY,
4284 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4285 pathutils.SPICE_CERT_FILE,
4286 pathutils.SPICE_CACERT_FILE,
4287 pathutils.RAPI_USERS_FILE,
4291 files_all.update(pathutils.ALL_CERT_FILES)
4292 files_all.update(ssconf.SimpleStore().GetFileList())
4294 # we need to ship at least the RAPI certificate
4295 files_all.add(pathutils.RAPI_CERT_FILE)
4297 if cluster.modify_etc_hosts:
4298 files_all.add(constants.ETC_HOSTS)
4300 if cluster.use_external_mip_script:
4301 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4303 # Files which are optional, these must:
4304 # - be present in one other category as well
4305 # - either exist or not exist on all nodes of that category (mc, vm all)
4307 pathutils.RAPI_USERS_FILE,
4310 # Files which should only be on master candidates
4314 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4316 # Files which should only be on VM-capable nodes
4319 for hv_name in cluster.enabled_hypervisors
4320 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4324 for hv_name in cluster.enabled_hypervisors
4325 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4327 # Filenames in each category must be unique
4328 all_files_set = files_all | files_mc | files_vm
4329 assert (len(all_files_set) ==
4330 sum(map(len, [files_all, files_mc, files_vm]))), \
4331 "Found file listed in more than one file list"
4333 # Optional files must be present in one other category
4334 assert all_files_set.issuperset(files_opt), \
4335 "Optional file not in a different required list"
4337 return (files_all, files_opt, files_mc, files_vm)
4340 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4341 """Distribute additional files which are part of the cluster configuration.
4343 ConfigWriter takes care of distributing the config and ssconf files, but
4344 there are more files which should be distributed to all nodes. This function
4345 makes sure those are copied.
4347 @param lu: calling logical unit
4348 @param additional_nodes: list of nodes not in the config to distribute to
4349 @type additional_vm: boolean
4350 @param additional_vm: whether the additional nodes are vm-capable or not
4353 # Gather target nodes
4354 cluster = lu.cfg.GetClusterInfo()
4355 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4357 online_nodes = lu.cfg.GetOnlineNodeList()
4358 online_set = frozenset(online_nodes)
4359 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4361 if additional_nodes is not None:
4362 online_nodes.extend(additional_nodes)
4364 vm_nodes.extend(additional_nodes)
4366 # Never distribute to master node
4367 for nodelist in [online_nodes, vm_nodes]:
4368 if master_info.name in nodelist:
4369 nodelist.remove(master_info.name)
4372 (files_all, _, files_mc, files_vm) = \
4373 _ComputeAncillaryFiles(cluster, True)
4375 # Never re-distribute configuration file from here
4376 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4377 pathutils.CLUSTER_CONF_FILE in files_vm)
4378 assert not files_mc, "Master candidates not handled in this function"
4381 (online_nodes, files_all),
4382 (vm_nodes, files_vm),
4386 for (node_list, files) in filemap:
4388 _UploadHelper(lu, node_list, fname)
4391 class LUClusterRedistConf(NoHooksLU):
4392 """Force the redistribution of cluster configuration.
4394 This is a very simple LU.
4399 def ExpandNames(self):
4400 self.needed_locks = {
4401 locking.LEVEL_NODE: locking.ALL_SET,
4403 self.share_locks[locking.LEVEL_NODE] = 1
4405 def Exec(self, feedback_fn):
4406 """Redistribute the configuration.
4409 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4410 _RedistributeAncillaryFiles(self)
4413 class LUClusterActivateMasterIp(NoHooksLU):
4414 """Activate the master IP on the master node.
4417 def Exec(self, feedback_fn):
4418 """Activate the master IP.
4421 master_params = self.cfg.GetMasterNetworkParameters()
4422 ems = self.cfg.GetUseExternalMipScript()
4423 result = self.rpc.call_node_activate_master_ip(master_params.name,
4425 result.Raise("Could not activate the master IP")
4428 class LUClusterDeactivateMasterIp(NoHooksLU):
4429 """Deactivate the master IP on the master node.
4432 def Exec(self, feedback_fn):
4433 """Deactivate the master IP.
4436 master_params = self.cfg.GetMasterNetworkParameters()
4437 ems = self.cfg.GetUseExternalMipScript()
4438 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4440 result.Raise("Could not deactivate the master IP")
4443 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4444 """Sleep and poll for an instance's disk to sync.
4447 if not instance.disks or disks is not None and not disks:
4450 disks = _ExpandCheckDisks(instance, disks)
4453 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4455 node = instance.primary_node
4458 lu.cfg.SetDiskID(dev, node)
4460 # TODO: Convert to utils.Retry
4463 degr_retries = 10 # in seconds, as we sleep 1 second each time
4467 cumul_degraded = False
4468 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4469 msg = rstats.fail_msg
4471 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4474 raise errors.RemoteError("Can't contact node %s for mirror data,"
4475 " aborting." % node)
4478 rstats = rstats.payload
4480 for i, mstat in enumerate(rstats):
4482 lu.LogWarning("Can't compute data for node %s/%s",
4483 node, disks[i].iv_name)
4486 cumul_degraded = (cumul_degraded or
4487 (mstat.is_degraded and mstat.sync_percent is None))
4488 if mstat.sync_percent is not None:
4490 if mstat.estimated_time is not None:
4491 rem_time = ("%s remaining (estimated)" %
4492 utils.FormatSeconds(mstat.estimated_time))
4493 max_time = mstat.estimated_time
4495 rem_time = "no time estimate"
4496 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4497 (disks[i].iv_name, mstat.sync_percent, rem_time))
4499 # if we're done but degraded, let's do a few small retries, to
4500 # make sure we see a stable and not transient situation; therefore
4501 # we force restart of the loop
4502 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4503 logging.info("Degraded disks found, %d retries left", degr_retries)
4511 time.sleep(min(60, max_time))
4514 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4515 return not cumul_degraded
4518 def _BlockdevFind(lu, node, dev, instance):
4519 """Wrapper around call_blockdev_find to annotate diskparams.
4521 @param lu: A reference to the lu object
4522 @param node: The node to call out
4523 @param dev: The device to find
4524 @param instance: The instance object the device belongs to
4525 @returns The result of the rpc call
4528 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4529 return lu.rpc.call_blockdev_find(node, disk)
4532 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4533 """Wrapper around L{_CheckDiskConsistencyInner}.
4536 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4537 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4541 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4543 """Check that mirrors are not degraded.
4545 @attention: The device has to be annotated already.
4547 The ldisk parameter, if True, will change the test from the
4548 is_degraded attribute (which represents overall non-ok status for
4549 the device(s)) to the ldisk (representing the local storage status).
4552 lu.cfg.SetDiskID(dev, node)
4556 if on_primary or dev.AssembleOnSecondary():
4557 rstats = lu.rpc.call_blockdev_find(node, dev)
4558 msg = rstats.fail_msg
4560 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4562 elif not rstats.payload:
4563 lu.LogWarning("Can't find disk on node %s", node)
4567 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4569 result = result and not rstats.payload.is_degraded
4572 for child in dev.children:
4573 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4579 class LUOobCommand(NoHooksLU):
4580 """Logical unit for OOB handling.
4584 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4586 def ExpandNames(self):
4587 """Gather locks we need.
4590 if self.op.node_names:
4591 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4592 lock_names = self.op.node_names
4594 lock_names = locking.ALL_SET
4596 self.needed_locks = {
4597 locking.LEVEL_NODE: lock_names,
4600 def CheckPrereq(self):
4601 """Check prerequisites.
4604 - the node exists in the configuration
4607 Any errors are signaled by raising errors.OpPrereqError.
4611 self.master_node = self.cfg.GetMasterNode()
4613 assert self.op.power_delay >= 0.0
4615 if self.op.node_names:
4616 if (self.op.command in self._SKIP_MASTER and
4617 self.master_node in self.op.node_names):
4618 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4619 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4621 if master_oob_handler:
4622 additional_text = ("run '%s %s %s' if you want to operate on the"
4623 " master regardless") % (master_oob_handler,
4627 additional_text = "it does not support out-of-band operations"
4629 raise errors.OpPrereqError(("Operating on the master node %s is not"
4630 " allowed for %s; %s") %
4631 (self.master_node, self.op.command,
4632 additional_text), errors.ECODE_INVAL)
4634 self.op.node_names = self.cfg.GetNodeList()
4635 if self.op.command in self._SKIP_MASTER:
4636 self.op.node_names.remove(self.master_node)
4638 if self.op.command in self._SKIP_MASTER:
4639 assert self.master_node not in self.op.node_names
4641 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4643 raise errors.OpPrereqError("Node %s not found" % node_name,
4646 self.nodes.append(node)
4648 if (not self.op.ignore_status and
4649 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4650 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4651 " not marked offline") % node_name,
4654 def Exec(self, feedback_fn):
4655 """Execute OOB and return result if we expect any.
4658 master_node = self.master_node
4661 for idx, node in enumerate(utils.NiceSort(self.nodes,
4662 key=lambda node: node.name)):
4663 node_entry = [(constants.RS_NORMAL, node.name)]
4664 ret.append(node_entry)
4666 oob_program = _SupportsOob(self.cfg, node)
4669 node_entry.append((constants.RS_UNAVAIL, None))
4672 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4673 self.op.command, oob_program, node.name)
4674 result = self.rpc.call_run_oob(master_node, oob_program,
4675 self.op.command, node.name,
4679 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4680 node.name, result.fail_msg)
4681 node_entry.append((constants.RS_NODATA, None))
4684 self._CheckPayload(result)
4685 except errors.OpExecError, err:
4686 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4688 node_entry.append((constants.RS_NODATA, None))
4690 if self.op.command == constants.OOB_HEALTH:
4691 # For health we should log important events
4692 for item, status in result.payload:
4693 if status in [constants.OOB_STATUS_WARNING,
4694 constants.OOB_STATUS_CRITICAL]:
4695 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4696 item, node.name, status)
4698 if self.op.command == constants.OOB_POWER_ON:
4700 elif self.op.command == constants.OOB_POWER_OFF:
4701 node.powered = False
4702 elif self.op.command == constants.OOB_POWER_STATUS:
4703 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4704 if powered != node.powered:
4705 logging.warning(("Recorded power state (%s) of node '%s' does not"
4706 " match actual power state (%s)"), node.powered,
4709 # For configuration changing commands we should update the node
4710 if self.op.command in (constants.OOB_POWER_ON,
4711 constants.OOB_POWER_OFF):
4712 self.cfg.Update(node, feedback_fn)
4714 node_entry.append((constants.RS_NORMAL, result.payload))
4716 if (self.op.command == constants.OOB_POWER_ON and
4717 idx < len(self.nodes) - 1):
4718 time.sleep(self.op.power_delay)
4722 def _CheckPayload(self, result):
4723 """Checks if the payload is valid.
4725 @param result: RPC result
4726 @raises errors.OpExecError: If payload is not valid
4730 if self.op.command == constants.OOB_HEALTH:
4731 if not isinstance(result.payload, list):
4732 errs.append("command 'health' is expected to return a list but got %s" %
4733 type(result.payload))
4735 for item, status in result.payload:
4736 if status not in constants.OOB_STATUSES:
4737 errs.append("health item '%s' has invalid status '%s'" %
4740 if self.op.command == constants.OOB_POWER_STATUS:
4741 if not isinstance(result.payload, dict):
4742 errs.append("power-status is expected to return a dict but got %s" %
4743 type(result.payload))
4745 if self.op.command in [
4746 constants.OOB_POWER_ON,
4747 constants.OOB_POWER_OFF,
4748 constants.OOB_POWER_CYCLE,
4750 if result.payload is not None:
4751 errs.append("%s is expected to not return payload but got '%s'" %
4752 (self.op.command, result.payload))
4755 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4756 utils.CommaJoin(errs))
4759 class _OsQuery(_QueryBase):
4760 FIELDS = query.OS_FIELDS
4762 def ExpandNames(self, lu):
4763 # Lock all nodes in shared mode
4764 # Temporary removal of locks, should be reverted later
4765 # TODO: reintroduce locks when they are lighter-weight
4766 lu.needed_locks = {}
4767 #self.share_locks[locking.LEVEL_NODE] = 1
4768 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4770 # The following variables interact with _QueryBase._GetNames
4772 self.wanted = self.names
4774 self.wanted = locking.ALL_SET
4776 self.do_locking = self.use_locking
4778 def DeclareLocks(self, lu, level):
4782 def _DiagnoseByOS(rlist):
4783 """Remaps a per-node return list into an a per-os per-node dictionary
4785 @param rlist: a map with node names as keys and OS objects as values
4788 @return: a dictionary with osnames as keys and as value another
4789 map, with nodes as keys and tuples of (path, status, diagnose,
4790 variants, parameters, api_versions) as values, eg::
4792 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4793 (/srv/..., False, "invalid api")],
4794 "node2": [(/srv/..., True, "", [], [])]}
4799 # we build here the list of nodes that didn't fail the RPC (at RPC
4800 # level), so that nodes with a non-responding node daemon don't
4801 # make all OSes invalid
4802 good_nodes = [node_name for node_name in rlist
4803 if not rlist[node_name].fail_msg]
4804 for node_name, nr in rlist.items():
4805 if nr.fail_msg or not nr.payload:
4807 for (name, path, status, diagnose, variants,
4808 params, api_versions) in nr.payload:
4809 if name not in all_os:
4810 # build a list of nodes for this os containing empty lists
4811 # for each node in node_list
4813 for nname in good_nodes:
4814 all_os[name][nname] = []
4815 # convert params from [name, help] to (name, help)
4816 params = [tuple(v) for v in params]
4817 all_os[name][node_name].append((path, status, diagnose,
4818 variants, params, api_versions))
4821 def _GetQueryData(self, lu):
4822 """Computes the list of nodes and their attributes.
4825 # Locking is not used
4826 assert not (compat.any(lu.glm.is_owned(level)
4827 for level in locking.LEVELS
4828 if level != locking.LEVEL_CLUSTER) or
4829 self.do_locking or self.use_locking)
4831 valid_nodes = [node.name
4832 for node in lu.cfg.GetAllNodesInfo().values()
4833 if not node.offline and node.vm_capable]
4834 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4835 cluster = lu.cfg.GetClusterInfo()
4839 for (os_name, os_data) in pol.items():
4840 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4841 hidden=(os_name in cluster.hidden_os),
4842 blacklisted=(os_name in cluster.blacklisted_os))
4846 api_versions = set()
4848 for idx, osl in enumerate(os_data.values()):
4849 info.valid = bool(info.valid and osl and osl[0][1])
4853 (node_variants, node_params, node_api) = osl[0][3:6]
4856 variants.update(node_variants)
4857 parameters.update(node_params)
4858 api_versions.update(node_api)
4860 # Filter out inconsistent values
4861 variants.intersection_update(node_variants)
4862 parameters.intersection_update(node_params)
4863 api_versions.intersection_update(node_api)
4865 info.variants = list(variants)
4866 info.parameters = list(parameters)
4867 info.api_versions = list(api_versions)
4869 data[os_name] = info
4871 # Prepare data in requested order
4872 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4876 class LUOsDiagnose(NoHooksLU):
4877 """Logical unit for OS diagnose/query.
4883 def _BuildFilter(fields, names):
4884 """Builds a filter for querying OSes.
4887 name_filter = qlang.MakeSimpleFilter("name", names)
4889 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4890 # respective field is not requested
4891 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4892 for fname in ["hidden", "blacklisted"]
4893 if fname not in fields]
4894 if "valid" not in fields:
4895 status_filter.append([qlang.OP_TRUE, "valid"])
4898 status_filter.insert(0, qlang.OP_AND)
4900 status_filter = None
4902 if name_filter and status_filter:
4903 return [qlang.OP_AND, name_filter, status_filter]
4907 return status_filter
4909 def CheckArguments(self):
4910 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4911 self.op.output_fields, False)
4913 def ExpandNames(self):
4914 self.oq.ExpandNames(self)
4916 def Exec(self, feedback_fn):
4917 return self.oq.OldStyleQuery(self)
4920 class LUNodeRemove(LogicalUnit):
4921 """Logical unit for removing a node.
4924 HPATH = "node-remove"
4925 HTYPE = constants.HTYPE_NODE
4927 def BuildHooksEnv(self):
4932 "OP_TARGET": self.op.node_name,
4933 "NODE_NAME": self.op.node_name,
4936 def BuildHooksNodes(self):
4937 """Build hooks nodes.
4939 This doesn't run on the target node in the pre phase as a failed
4940 node would then be impossible to remove.
4943 all_nodes = self.cfg.GetNodeList()
4945 all_nodes.remove(self.op.node_name)
4948 return (all_nodes, all_nodes)
4950 def CheckPrereq(self):
4951 """Check prerequisites.
4954 - the node exists in the configuration
4955 - it does not have primary or secondary instances
4956 - it's not the master
4958 Any errors are signaled by raising errors.OpPrereqError.
4961 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4962 node = self.cfg.GetNodeInfo(self.op.node_name)
4963 assert node is not None
4965 masternode = self.cfg.GetMasterNode()
4966 if node.name == masternode:
4967 raise errors.OpPrereqError("Node is the master node, failover to another"
4968 " node is required", errors.ECODE_INVAL)
4970 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4971 if node.name in instance.all_nodes:
4972 raise errors.OpPrereqError("Instance %s is still running on the node,"
4973 " please remove first" % instance_name,
4975 self.op.node_name = node.name
4978 def Exec(self, feedback_fn):
4979 """Removes the node from the cluster.
4983 logging.info("Stopping the node daemon and removing configs from node %s",
4986 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4988 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4991 # Promote nodes to master candidate as needed
4992 _AdjustCandidatePool(self, exceptions=[node.name])
4993 self.context.RemoveNode(node.name)
4995 # Run post hooks on the node before it's removed
4996 _RunPostHook(self, node.name)
4998 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4999 msg = result.fail_msg
5001 self.LogWarning("Errors encountered on the remote node while leaving"
5002 " the cluster: %s", msg)
5004 # Remove node from our /etc/hosts
5005 if self.cfg.GetClusterInfo().modify_etc_hosts:
5006 master_node = self.cfg.GetMasterNode()
5007 result = self.rpc.call_etc_hosts_modify(master_node,
5008 constants.ETC_HOSTS_REMOVE,
5010 result.Raise("Can't update hosts file with new host data")
5011 _RedistributeAncillaryFiles(self)
5014 class _NodeQuery(_QueryBase):
5015 FIELDS = query.NODE_FIELDS
5017 def ExpandNames(self, lu):
5018 lu.needed_locks = {}
5019 lu.share_locks = _ShareAll()
5022 self.wanted = _GetWantedNodes(lu, self.names)
5024 self.wanted = locking.ALL_SET
5026 self.do_locking = (self.use_locking and
5027 query.NQ_LIVE in self.requested_data)
5030 # If any non-static field is requested we need to lock the nodes
5031 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5033 def DeclareLocks(self, lu, level):
5036 def _GetQueryData(self, lu):
5037 """Computes the list of nodes and their attributes.
5040 all_info = lu.cfg.GetAllNodesInfo()
5042 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5044 # Gather data as requested
5045 if query.NQ_LIVE in self.requested_data:
5046 # filter out non-vm_capable nodes
5047 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5049 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5050 [lu.cfg.GetHypervisorType()])
5051 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5052 for (name, nresult) in node_data.items()
5053 if not nresult.fail_msg and nresult.payload)
5057 if query.NQ_INST in self.requested_data:
5058 node_to_primary = dict([(name, set()) for name in nodenames])
5059 node_to_secondary = dict([(name, set()) for name in nodenames])
5061 inst_data = lu.cfg.GetAllInstancesInfo()
5063 for inst in inst_data.values():
5064 if inst.primary_node in node_to_primary:
5065 node_to_primary[inst.primary_node].add(inst.name)
5066 for secnode in inst.secondary_nodes:
5067 if secnode in node_to_secondary:
5068 node_to_secondary[secnode].add(inst.name)
5070 node_to_primary = None
5071 node_to_secondary = None
5073 if query.NQ_OOB in self.requested_data:
5074 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5075 for name, node in all_info.iteritems())
5079 if query.NQ_GROUP in self.requested_data:
5080 groups = lu.cfg.GetAllNodeGroupsInfo()
5084 return query.NodeQueryData([all_info[name] for name in nodenames],
5085 live_data, lu.cfg.GetMasterNode(),
5086 node_to_primary, node_to_secondary, groups,
5087 oob_support, lu.cfg.GetClusterInfo())
5090 class LUNodeQuery(NoHooksLU):
5091 """Logical unit for querying nodes.
5094 # pylint: disable=W0142
5097 def CheckArguments(self):
5098 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5099 self.op.output_fields, self.op.use_locking)
5101 def ExpandNames(self):
5102 self.nq.ExpandNames(self)
5104 def DeclareLocks(self, level):
5105 self.nq.DeclareLocks(self, level)
5107 def Exec(self, feedback_fn):
5108 return self.nq.OldStyleQuery(self)
5111 class LUNodeQueryvols(NoHooksLU):
5112 """Logical unit for getting volumes on node(s).
5116 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5117 _FIELDS_STATIC = utils.FieldSet("node")
5119 def CheckArguments(self):
5120 _CheckOutputFields(static=self._FIELDS_STATIC,
5121 dynamic=self._FIELDS_DYNAMIC,
5122 selected=self.op.output_fields)
5124 def ExpandNames(self):
5125 self.share_locks = _ShareAll()
5126 self.needed_locks = {}
5128 if not self.op.nodes:
5129 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5131 self.needed_locks[locking.LEVEL_NODE] = \
5132 _GetWantedNodes(self, self.op.nodes)
5134 def Exec(self, feedback_fn):
5135 """Computes the list of nodes and their attributes.
5138 nodenames = self.owned_locks(locking.LEVEL_NODE)
5139 volumes = self.rpc.call_node_volumes(nodenames)
5141 ilist = self.cfg.GetAllInstancesInfo()
5142 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5145 for node in nodenames:
5146 nresult = volumes[node]
5149 msg = nresult.fail_msg
5151 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5154 node_vols = sorted(nresult.payload,
5155 key=operator.itemgetter("dev"))
5157 for vol in node_vols:
5159 for field in self.op.output_fields:
5162 elif field == "phys":
5166 elif field == "name":
5168 elif field == "size":
5169 val = int(float(vol["size"]))
5170 elif field == "instance":
5171 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5173 raise errors.ParameterError(field)
5174 node_output.append(str(val))
5176 output.append(node_output)
5181 class LUNodeQueryStorage(NoHooksLU):
5182 """Logical unit for getting information on storage units on node(s).
5185 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5188 def CheckArguments(self):
5189 _CheckOutputFields(static=self._FIELDS_STATIC,
5190 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5191 selected=self.op.output_fields)
5193 def ExpandNames(self):
5194 self.share_locks = _ShareAll()
5195 self.needed_locks = {}
5198 self.needed_locks[locking.LEVEL_NODE] = \
5199 _GetWantedNodes(self, self.op.nodes)
5201 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5203 def Exec(self, feedback_fn):
5204 """Computes the list of nodes and their attributes.
5207 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5209 # Always get name to sort by
5210 if constants.SF_NAME in self.op.output_fields:
5211 fields = self.op.output_fields[:]
5213 fields = [constants.SF_NAME] + self.op.output_fields
5215 # Never ask for node or type as it's only known to the LU
5216 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5217 while extra in fields:
5218 fields.remove(extra)
5220 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5221 name_idx = field_idx[constants.SF_NAME]
5223 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5224 data = self.rpc.call_storage_list(self.nodes,
5225 self.op.storage_type, st_args,
5226 self.op.name, fields)
5230 for node in utils.NiceSort(self.nodes):
5231 nresult = data[node]
5235 msg = nresult.fail_msg
5237 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5240 rows = dict([(row[name_idx], row) for row in nresult.payload])
5242 for name in utils.NiceSort(rows.keys()):
5247 for field in self.op.output_fields:
5248 if field == constants.SF_NODE:
5250 elif field == constants.SF_TYPE:
5251 val = self.op.storage_type
5252 elif field in field_idx:
5253 val = row[field_idx[field]]
5255 raise errors.ParameterError(field)
5264 class _InstanceQuery(_QueryBase):
5265 FIELDS = query.INSTANCE_FIELDS
5267 def ExpandNames(self, lu):
5268 lu.needed_locks = {}
5269 lu.share_locks = _ShareAll()
5272 self.wanted = _GetWantedInstances(lu, self.names)
5274 self.wanted = locking.ALL_SET
5276 self.do_locking = (self.use_locking and
5277 query.IQ_LIVE in self.requested_data)
5279 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5280 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5281 lu.needed_locks[locking.LEVEL_NODE] = []
5282 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5284 self.do_grouplocks = (self.do_locking and
5285 query.IQ_NODES in self.requested_data)
5287 def DeclareLocks(self, lu, level):
5289 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5290 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5292 # Lock all groups used by instances optimistically; this requires going
5293 # via the node before it's locked, requiring verification later on
5294 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5296 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5297 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5298 elif level == locking.LEVEL_NODE:
5299 lu._LockInstancesNodes() # pylint: disable=W0212
5302 def _CheckGroupLocks(lu):
5303 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5304 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5306 # Check if node groups for locked instances are still correct
5307 for instance_name in owned_instances:
5308 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5310 def _GetQueryData(self, lu):
5311 """Computes the list of instances and their attributes.
5314 if self.do_grouplocks:
5315 self._CheckGroupLocks(lu)
5317 cluster = lu.cfg.GetClusterInfo()
5318 all_info = lu.cfg.GetAllInstancesInfo()
5320 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5322 instance_list = [all_info[name] for name in instance_names]
5323 nodes = frozenset(itertools.chain(*(inst.all_nodes
5324 for inst in instance_list)))
5325 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5328 wrongnode_inst = set()
5330 # Gather data as requested
5331 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5333 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5335 result = node_data[name]
5337 # offline nodes will be in both lists
5338 assert result.fail_msg
5339 offline_nodes.append(name)
5341 bad_nodes.append(name)
5342 elif result.payload:
5343 for inst in result.payload:
5344 if inst in all_info:
5345 if all_info[inst].primary_node == name:
5346 live_data.update(result.payload)
5348 wrongnode_inst.add(inst)
5350 # orphan instance; we don't list it here as we don't
5351 # handle this case yet in the output of instance listing
5352 logging.warning("Orphan instance '%s' found on node %s",
5354 # else no instance is alive
5358 if query.IQ_DISKUSAGE in self.requested_data:
5359 gmi = ganeti.masterd.instance
5360 disk_usage = dict((inst.name,
5361 gmi.ComputeDiskSize(inst.disk_template,
5362 [{constants.IDISK_SIZE: disk.size}
5363 for disk in inst.disks]))
5364 for inst in instance_list)
5368 if query.IQ_CONSOLE in self.requested_data:
5370 for inst in instance_list:
5371 if inst.name in live_data:
5372 # Instance is running
5373 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5375 consinfo[inst.name] = None
5376 assert set(consinfo.keys()) == set(instance_names)
5380 if query.IQ_NODES in self.requested_data:
5381 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5383 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5384 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5385 for uuid in set(map(operator.attrgetter("group"),
5391 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5392 disk_usage, offline_nodes, bad_nodes,
5393 live_data, wrongnode_inst, consinfo,
5397 class LUQuery(NoHooksLU):
5398 """Query for resources/items of a certain kind.
5401 # pylint: disable=W0142
5404 def CheckArguments(self):
5405 qcls = _GetQueryImplementation(self.op.what)
5407 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5409 def ExpandNames(self):
5410 self.impl.ExpandNames(self)
5412 def DeclareLocks(self, level):
5413 self.impl.DeclareLocks(self, level)
5415 def Exec(self, feedback_fn):
5416 return self.impl.NewStyleQuery(self)
5419 class LUQueryFields(NoHooksLU):
5420 """Query for resources/items of a certain kind.
5423 # pylint: disable=W0142
5426 def CheckArguments(self):
5427 self.qcls = _GetQueryImplementation(self.op.what)
5429 def ExpandNames(self):
5430 self.needed_locks = {}
5432 def Exec(self, feedback_fn):
5433 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5436 class LUNodeModifyStorage(NoHooksLU):
5437 """Logical unit for modifying a storage volume on a node.
5442 def CheckArguments(self):
5443 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5445 storage_type = self.op.storage_type
5448 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5450 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5451 " modified" % storage_type,
5454 diff = set(self.op.changes.keys()) - modifiable
5456 raise errors.OpPrereqError("The following fields can not be modified for"
5457 " storage units of type '%s': %r" %
5458 (storage_type, list(diff)),
5461 def ExpandNames(self):
5462 self.needed_locks = {
5463 locking.LEVEL_NODE: self.op.node_name,
5466 def Exec(self, feedback_fn):
5467 """Computes the list of nodes and their attributes.
5470 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5471 result = self.rpc.call_storage_modify(self.op.node_name,
5472 self.op.storage_type, st_args,
5473 self.op.name, self.op.changes)
5474 result.Raise("Failed to modify storage unit '%s' on %s" %
5475 (self.op.name, self.op.node_name))
5478 class LUNodeAdd(LogicalUnit):
5479 """Logical unit for adding node to the cluster.
5483 HTYPE = constants.HTYPE_NODE
5484 _NFLAGS = ["master_capable", "vm_capable"]
5486 def CheckArguments(self):
5487 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5488 # validate/normalize the node name
5489 self.hostname = netutils.GetHostname(name=self.op.node_name,
5490 family=self.primary_ip_family)
5491 self.op.node_name = self.hostname.name
5493 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5494 raise errors.OpPrereqError("Cannot readd the master node",
5497 if self.op.readd and self.op.group:
5498 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5499 " being readded", errors.ECODE_INVAL)
5501 def BuildHooksEnv(self):
5504 This will run on all nodes before, and on all nodes + the new node after.
5508 "OP_TARGET": self.op.node_name,
5509 "NODE_NAME": self.op.node_name,
5510 "NODE_PIP": self.op.primary_ip,
5511 "NODE_SIP": self.op.secondary_ip,
5512 "MASTER_CAPABLE": str(self.op.master_capable),
5513 "VM_CAPABLE": str(self.op.vm_capable),
5516 def BuildHooksNodes(self):
5517 """Build hooks nodes.
5520 # Exclude added node
5521 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5522 post_nodes = pre_nodes + [self.op.node_name, ]
5524 return (pre_nodes, post_nodes)
5526 def CheckPrereq(self):
5527 """Check prerequisites.
5530 - the new node is not already in the config
5532 - its parameters (single/dual homed) matches the cluster
5534 Any errors are signaled by raising errors.OpPrereqError.
5538 hostname = self.hostname
5539 node = hostname.name
5540 primary_ip = self.op.primary_ip = hostname.ip
5541 if self.op.secondary_ip is None:
5542 if self.primary_ip_family == netutils.IP6Address.family:
5543 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5544 " IPv4 address must be given as secondary",
5546 self.op.secondary_ip = primary_ip
5548 secondary_ip = self.op.secondary_ip
5549 if not netutils.IP4Address.IsValid(secondary_ip):
5550 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5551 " address" % secondary_ip, errors.ECODE_INVAL)
5553 node_list = cfg.GetNodeList()
5554 if not self.op.readd and node in node_list:
5555 raise errors.OpPrereqError("Node %s is already in the configuration" %
5556 node, errors.ECODE_EXISTS)
5557 elif self.op.readd and node not in node_list:
5558 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5561 self.changed_primary_ip = False
5563 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5564 if self.op.readd and node == existing_node_name:
5565 if existing_node.secondary_ip != secondary_ip:
5566 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5567 " address configuration as before",
5569 if existing_node.primary_ip != primary_ip:
5570 self.changed_primary_ip = True
5574 if (existing_node.primary_ip == primary_ip or
5575 existing_node.secondary_ip == primary_ip or
5576 existing_node.primary_ip == secondary_ip or
5577 existing_node.secondary_ip == secondary_ip):
5578 raise errors.OpPrereqError("New node ip address(es) conflict with"
5579 " existing node %s" % existing_node.name,
5580 errors.ECODE_NOTUNIQUE)
5582 # After this 'if' block, None is no longer a valid value for the
5583 # _capable op attributes
5585 old_node = self.cfg.GetNodeInfo(node)
5586 assert old_node is not None, "Can't retrieve locked node %s" % node
5587 for attr in self._NFLAGS:
5588 if getattr(self.op, attr) is None:
5589 setattr(self.op, attr, getattr(old_node, attr))
5591 for attr in self._NFLAGS:
5592 if getattr(self.op, attr) is None:
5593 setattr(self.op, attr, True)
5595 if self.op.readd and not self.op.vm_capable:
5596 pri, sec = cfg.GetNodeInstances(node)
5598 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5599 " flag set to false, but it already holds"
5600 " instances" % node,
5603 # check that the type of the node (single versus dual homed) is the
5604 # same as for the master
5605 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5606 master_singlehomed = myself.secondary_ip == myself.primary_ip
5607 newbie_singlehomed = secondary_ip == primary_ip
5608 if master_singlehomed != newbie_singlehomed:
5609 if master_singlehomed:
5610 raise errors.OpPrereqError("The master has no secondary ip but the"
5611 " new node has one",
5614 raise errors.OpPrereqError("The master has a secondary ip but the"
5615 " new node doesn't have one",
5618 # checks reachability
5619 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5620 raise errors.OpPrereqError("Node not reachable by ping",
5621 errors.ECODE_ENVIRON)
5623 if not newbie_singlehomed:
5624 # check reachability from my secondary ip to newbie's secondary ip
5625 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5626 source=myself.secondary_ip):
5627 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5628 " based ping to node daemon port",
5629 errors.ECODE_ENVIRON)
5636 if self.op.master_capable:
5637 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5639 self.master_candidate = False
5642 self.new_node = old_node
5644 node_group = cfg.LookupNodeGroup(self.op.group)
5645 self.new_node = objects.Node(name=node,
5646 primary_ip=primary_ip,
5647 secondary_ip=secondary_ip,
5648 master_candidate=self.master_candidate,
5649 offline=False, drained=False,
5652 if self.op.ndparams:
5653 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5655 if self.op.hv_state:
5656 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5658 if self.op.disk_state:
5659 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5661 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5662 # it a property on the base class.
5663 result = rpc.DnsOnlyRunner().call_version([node])[node]
5664 result.Raise("Can't get version information from node %s" % node)
5665 if constants.PROTOCOL_VERSION == result.payload:
5666 logging.info("Communication to node %s fine, sw version %s match",
5667 node, result.payload)
5669 raise errors.OpPrereqError("Version mismatch master version %s,"
5670 " node version %s" %
5671 (constants.PROTOCOL_VERSION, result.payload),
5672 errors.ECODE_ENVIRON)
5674 def Exec(self, feedback_fn):
5675 """Adds the new node to the cluster.
5678 new_node = self.new_node
5679 node = new_node.name
5681 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5684 # We adding a new node so we assume it's powered
5685 new_node.powered = True
5687 # for re-adds, reset the offline/drained/master-candidate flags;
5688 # we need to reset here, otherwise offline would prevent RPC calls
5689 # later in the procedure; this also means that if the re-add
5690 # fails, we are left with a non-offlined, broken node
5692 new_node.drained = new_node.offline = False # pylint: disable=W0201
5693 self.LogInfo("Readding a node, the offline/drained flags were reset")
5694 # if we demote the node, we do cleanup later in the procedure
5695 new_node.master_candidate = self.master_candidate
5696 if self.changed_primary_ip:
5697 new_node.primary_ip = self.op.primary_ip
5699 # copy the master/vm_capable flags
5700 for attr in self._NFLAGS:
5701 setattr(new_node, attr, getattr(self.op, attr))
5703 # notify the user about any possible mc promotion
5704 if new_node.master_candidate:
5705 self.LogInfo("Node will be a master candidate")
5707 if self.op.ndparams:
5708 new_node.ndparams = self.op.ndparams
5710 new_node.ndparams = {}
5712 if self.op.hv_state:
5713 new_node.hv_state_static = self.new_hv_state
5715 if self.op.disk_state:
5716 new_node.disk_state_static = self.new_disk_state
5718 # Add node to our /etc/hosts, and add key to known_hosts
5719 if self.cfg.GetClusterInfo().modify_etc_hosts:
5720 master_node = self.cfg.GetMasterNode()
5721 result = self.rpc.call_etc_hosts_modify(master_node,
5722 constants.ETC_HOSTS_ADD,
5725 result.Raise("Can't update hosts file with new host data")
5727 if new_node.secondary_ip != new_node.primary_ip:
5728 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5731 node_verify_list = [self.cfg.GetMasterNode()]
5732 node_verify_param = {
5733 constants.NV_NODELIST: ([node], {}),
5734 # TODO: do a node-net-test as well?
5737 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5738 self.cfg.GetClusterName())
5739 for verifier in node_verify_list:
5740 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5741 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5743 for failed in nl_payload:
5744 feedback_fn("ssh/hostname verification failed"
5745 " (checking from %s): %s" %
5746 (verifier, nl_payload[failed]))
5747 raise errors.OpExecError("ssh/hostname verification failed")
5750 _RedistributeAncillaryFiles(self)
5751 self.context.ReaddNode(new_node)
5752 # make sure we redistribute the config
5753 self.cfg.Update(new_node, feedback_fn)
5754 # and make sure the new node will not have old files around
5755 if not new_node.master_candidate:
5756 result = self.rpc.call_node_demote_from_mc(new_node.name)
5757 msg = result.fail_msg
5759 self.LogWarning("Node failed to demote itself from master"
5760 " candidate status: %s" % msg)
5762 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5763 additional_vm=self.op.vm_capable)
5764 self.context.AddNode(new_node, self.proc.GetECId())
5767 class LUNodeSetParams(LogicalUnit):
5768 """Modifies the parameters of a node.
5770 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5771 to the node role (as _ROLE_*)
5772 @cvar _R2F: a dictionary from node role to tuples of flags
5773 @cvar _FLAGS: a list of attribute names corresponding to the flags
5776 HPATH = "node-modify"
5777 HTYPE = constants.HTYPE_NODE
5779 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5781 (True, False, False): _ROLE_CANDIDATE,
5782 (False, True, False): _ROLE_DRAINED,
5783 (False, False, True): _ROLE_OFFLINE,
5784 (False, False, False): _ROLE_REGULAR,
5786 _R2F = dict((v, k) for k, v in _F2R.items())
5787 _FLAGS = ["master_candidate", "drained", "offline"]
5789 def CheckArguments(self):
5790 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5791 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5792 self.op.master_capable, self.op.vm_capable,
5793 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5795 if all_mods.count(None) == len(all_mods):
5796 raise errors.OpPrereqError("Please pass at least one modification",
5798 if all_mods.count(True) > 1:
5799 raise errors.OpPrereqError("Can't set the node into more than one"
5800 " state at the same time",
5803 # Boolean value that tells us whether we might be demoting from MC
5804 self.might_demote = (self.op.master_candidate is False or
5805 self.op.offline is True or
5806 self.op.drained is True or
5807 self.op.master_capable is False)
5809 if self.op.secondary_ip:
5810 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5811 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5812 " address" % self.op.secondary_ip,
5815 self.lock_all = self.op.auto_promote and self.might_demote
5816 self.lock_instances = self.op.secondary_ip is not None
5818 def _InstanceFilter(self, instance):
5819 """Filter for getting affected instances.
5822 return (instance.disk_template in constants.DTS_INT_MIRROR and
5823 self.op.node_name in instance.all_nodes)
5825 def ExpandNames(self):
5827 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5829 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5831 # Since modifying a node can have severe effects on currently running
5832 # operations the resource lock is at least acquired in shared mode
5833 self.needed_locks[locking.LEVEL_NODE_RES] = \
5834 self.needed_locks[locking.LEVEL_NODE]
5836 # Get node resource and instance locks in shared mode; they are not used
5837 # for anything but read-only access
5838 self.share_locks[locking.LEVEL_NODE_RES] = 1
5839 self.share_locks[locking.LEVEL_INSTANCE] = 1
5841 if self.lock_instances:
5842 self.needed_locks[locking.LEVEL_INSTANCE] = \
5843 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5845 def BuildHooksEnv(self):
5848 This runs on the master node.
5852 "OP_TARGET": self.op.node_name,
5853 "MASTER_CANDIDATE": str(self.op.master_candidate),
5854 "OFFLINE": str(self.op.offline),
5855 "DRAINED": str(self.op.drained),
5856 "MASTER_CAPABLE": str(self.op.master_capable),
5857 "VM_CAPABLE": str(self.op.vm_capable),
5860 def BuildHooksNodes(self):
5861 """Build hooks nodes.
5864 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5867 def CheckPrereq(self):
5868 """Check prerequisites.
5870 This only checks the instance list against the existing names.
5873 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5875 if self.lock_instances:
5876 affected_instances = \
5877 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5879 # Verify instance locks
5880 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5881 wanted_instances = frozenset(affected_instances.keys())
5882 if wanted_instances - owned_instances:
5883 raise errors.OpPrereqError("Instances affected by changing node %s's"
5884 " secondary IP address have changed since"
5885 " locks were acquired, wanted '%s', have"
5886 " '%s'; retry the operation" %
5888 utils.CommaJoin(wanted_instances),
5889 utils.CommaJoin(owned_instances)),
5892 affected_instances = None
5894 if (self.op.master_candidate is not None or
5895 self.op.drained is not None or
5896 self.op.offline is not None):
5897 # we can't change the master's node flags
5898 if self.op.node_name == self.cfg.GetMasterNode():
5899 raise errors.OpPrereqError("The master role can be changed"
5900 " only via master-failover",
5903 if self.op.master_candidate and not node.master_capable:
5904 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5905 " it a master candidate" % node.name,
5908 if self.op.vm_capable is False:
5909 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5911 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5912 " the vm_capable flag" % node.name,
5915 if node.master_candidate and self.might_demote and not self.lock_all:
5916 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5917 # check if after removing the current node, we're missing master
5919 (mc_remaining, mc_should, _) = \
5920 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5921 if mc_remaining < mc_should:
5922 raise errors.OpPrereqError("Not enough master candidates, please"
5923 " pass auto promote option to allow"
5924 " promotion (--auto-promote or RAPI"
5925 " auto_promote=True)", errors.ECODE_STATE)
5927 self.old_flags = old_flags = (node.master_candidate,
5928 node.drained, node.offline)
5929 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5930 self.old_role = old_role = self._F2R[old_flags]
5932 # Check for ineffective changes
5933 for attr in self._FLAGS:
5934 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5935 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5936 setattr(self.op, attr, None)
5938 # Past this point, any flag change to False means a transition
5939 # away from the respective state, as only real changes are kept
5941 # TODO: We might query the real power state if it supports OOB
5942 if _SupportsOob(self.cfg, node):
5943 if self.op.offline is False and not (node.powered or
5944 self.op.powered is True):
5945 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5946 " offline status can be reset") %
5947 self.op.node_name, errors.ECODE_STATE)
5948 elif self.op.powered is not None:
5949 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5950 " as it does not support out-of-band"
5951 " handling") % self.op.node_name,
5954 # If we're being deofflined/drained, we'll MC ourself if needed
5955 if (self.op.drained is False or self.op.offline is False or
5956 (self.op.master_capable and not node.master_capable)):
5957 if _DecideSelfPromotion(self):
5958 self.op.master_candidate = True
5959 self.LogInfo("Auto-promoting node to master candidate")
5961 # If we're no longer master capable, we'll demote ourselves from MC
5962 if self.op.master_capable is False and node.master_candidate:
5963 self.LogInfo("Demoting from master candidate")
5964 self.op.master_candidate = False
5967 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5968 if self.op.master_candidate:
5969 new_role = self._ROLE_CANDIDATE
5970 elif self.op.drained:
5971 new_role = self._ROLE_DRAINED
5972 elif self.op.offline:
5973 new_role = self._ROLE_OFFLINE
5974 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5975 # False is still in new flags, which means we're un-setting (the
5977 new_role = self._ROLE_REGULAR
5978 else: # no new flags, nothing, keep old role
5981 self.new_role = new_role
5983 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5984 # Trying to transition out of offline status
5985 result = self.rpc.call_version([node.name])[node.name]
5987 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5988 " to report its version: %s" %
5989 (node.name, result.fail_msg),
5992 self.LogWarning("Transitioning node from offline to online state"
5993 " without using re-add. Please make sure the node"
5996 # When changing the secondary ip, verify if this is a single-homed to
5997 # multi-homed transition or vice versa, and apply the relevant
5999 if self.op.secondary_ip:
6000 # Ok even without locking, because this can't be changed by any LU
6001 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6002 master_singlehomed = master.secondary_ip == master.primary_ip
6003 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6004 if self.op.force and node.name == master.name:
6005 self.LogWarning("Transitioning from single-homed to multi-homed"
6006 " cluster. All nodes will require a secondary ip.")
6008 raise errors.OpPrereqError("Changing the secondary ip on a"
6009 " single-homed cluster requires the"
6010 " --force option to be passed, and the"
6011 " target node to be the master",
6013 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6014 if self.op.force and node.name == master.name:
6015 self.LogWarning("Transitioning from multi-homed to single-homed"
6016 " cluster. Secondary IPs will have to be removed.")
6018 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6019 " same as the primary IP on a multi-homed"
6020 " cluster, unless the --force option is"
6021 " passed, and the target node is the"
6022 " master", errors.ECODE_INVAL)
6024 assert not (frozenset(affected_instances) -
6025 self.owned_locks(locking.LEVEL_INSTANCE))
6028 if affected_instances:
6029 msg = ("Cannot change secondary IP address: offline node has"
6030 " instances (%s) configured to use it" %
6031 utils.CommaJoin(affected_instances.keys()))
6032 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6034 # On online nodes, check that no instances are running, and that
6035 # the node has the new ip and we can reach it.
6036 for instance in affected_instances.values():
6037 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6038 msg="cannot change secondary ip")
6040 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6041 if master.name != node.name:
6042 # check reachability from master secondary ip to new secondary ip
6043 if not netutils.TcpPing(self.op.secondary_ip,
6044 constants.DEFAULT_NODED_PORT,
6045 source=master.secondary_ip):
6046 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6047 " based ping to node daemon port",
6048 errors.ECODE_ENVIRON)
6050 if self.op.ndparams:
6051 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6052 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6053 self.new_ndparams = new_ndparams
6055 if self.op.hv_state:
6056 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6057 self.node.hv_state_static)
6059 if self.op.disk_state:
6060 self.new_disk_state = \
6061 _MergeAndVerifyDiskState(self.op.disk_state,
6062 self.node.disk_state_static)
6064 def Exec(self, feedback_fn):
6069 old_role = self.old_role
6070 new_role = self.new_role
6074 if self.op.ndparams:
6075 node.ndparams = self.new_ndparams
6077 if self.op.powered is not None:
6078 node.powered = self.op.powered
6080 if self.op.hv_state:
6081 node.hv_state_static = self.new_hv_state
6083 if self.op.disk_state:
6084 node.disk_state_static = self.new_disk_state
6086 for attr in ["master_capable", "vm_capable"]:
6087 val = getattr(self.op, attr)
6089 setattr(node, attr, val)
6090 result.append((attr, str(val)))
6092 if new_role != old_role:
6093 # Tell the node to demote itself, if no longer MC and not offline
6094 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6095 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6097 self.LogWarning("Node failed to demote itself: %s", msg)
6099 new_flags = self._R2F[new_role]
6100 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6102 result.append((desc, str(nf)))
6103 (node.master_candidate, node.drained, node.offline) = new_flags
6105 # we locked all nodes, we adjust the CP before updating this node
6107 _AdjustCandidatePool(self, [node.name])
6109 if self.op.secondary_ip:
6110 node.secondary_ip = self.op.secondary_ip
6111 result.append(("secondary_ip", self.op.secondary_ip))
6113 # this will trigger configuration file update, if needed
6114 self.cfg.Update(node, feedback_fn)
6116 # this will trigger job queue propagation or cleanup if the mc
6118 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6119 self.context.ReaddNode(node)
6124 class LUNodePowercycle(NoHooksLU):
6125 """Powercycles a node.
6130 def CheckArguments(self):
6131 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6132 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6133 raise errors.OpPrereqError("The node is the master and the force"
6134 " parameter was not set",
6137 def ExpandNames(self):
6138 """Locking for PowercycleNode.
6140 This is a last-resort option and shouldn't block on other
6141 jobs. Therefore, we grab no locks.
6144 self.needed_locks = {}
6146 def Exec(self, feedback_fn):
6150 result = self.rpc.call_node_powercycle(self.op.node_name,
6151 self.cfg.GetHypervisorType())
6152 result.Raise("Failed to schedule the reboot")
6153 return result.payload
6156 class LUClusterQuery(NoHooksLU):
6157 """Query cluster configuration.
6162 def ExpandNames(self):
6163 self.needed_locks = {}
6165 def Exec(self, feedback_fn):
6166 """Return cluster config.
6169 cluster = self.cfg.GetClusterInfo()
6172 # Filter just for enabled hypervisors
6173 for os_name, hv_dict in cluster.os_hvp.items():
6174 os_hvp[os_name] = {}
6175 for hv_name, hv_params in hv_dict.items():
6176 if hv_name in cluster.enabled_hypervisors:
6177 os_hvp[os_name][hv_name] = hv_params
6179 # Convert ip_family to ip_version
6180 primary_ip_version = constants.IP4_VERSION
6181 if cluster.primary_ip_family == netutils.IP6Address.family:
6182 primary_ip_version = constants.IP6_VERSION
6185 "software_version": constants.RELEASE_VERSION,
6186 "protocol_version": constants.PROTOCOL_VERSION,
6187 "config_version": constants.CONFIG_VERSION,
6188 "os_api_version": max(constants.OS_API_VERSIONS),
6189 "export_version": constants.EXPORT_VERSION,
6190 "architecture": runtime.GetArchInfo(),
6191 "name": cluster.cluster_name,
6192 "master": cluster.master_node,
6193 "default_hypervisor": cluster.primary_hypervisor,
6194 "enabled_hypervisors": cluster.enabled_hypervisors,
6195 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6196 for hypervisor_name in cluster.enabled_hypervisors]),
6198 "beparams": cluster.beparams,
6199 "osparams": cluster.osparams,
6200 "ipolicy": cluster.ipolicy,
6201 "nicparams": cluster.nicparams,
6202 "ndparams": cluster.ndparams,
6203 "diskparams": cluster.diskparams,
6204 "candidate_pool_size": cluster.candidate_pool_size,
6205 "master_netdev": cluster.master_netdev,
6206 "master_netmask": cluster.master_netmask,
6207 "use_external_mip_script": cluster.use_external_mip_script,
6208 "volume_group_name": cluster.volume_group_name,
6209 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6210 "file_storage_dir": cluster.file_storage_dir,
6211 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6212 "maintain_node_health": cluster.maintain_node_health,
6213 "ctime": cluster.ctime,
6214 "mtime": cluster.mtime,
6215 "uuid": cluster.uuid,
6216 "tags": list(cluster.GetTags()),
6217 "uid_pool": cluster.uid_pool,
6218 "default_iallocator": cluster.default_iallocator,
6219 "reserved_lvs": cluster.reserved_lvs,
6220 "primary_ip_version": primary_ip_version,
6221 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6222 "hidden_os": cluster.hidden_os,
6223 "blacklisted_os": cluster.blacklisted_os,
6229 class LUClusterConfigQuery(NoHooksLU):
6230 """Return configuration values.
6235 def CheckArguments(self):
6236 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6238 def ExpandNames(self):
6239 self.cq.ExpandNames(self)
6241 def DeclareLocks(self, level):
6242 self.cq.DeclareLocks(self, level)
6244 def Exec(self, feedback_fn):
6245 result = self.cq.OldStyleQuery(self)
6247 assert len(result) == 1
6252 class _ClusterQuery(_QueryBase):
6253 FIELDS = query.CLUSTER_FIELDS
6255 #: Do not sort (there is only one item)
6258 def ExpandNames(self, lu):
6259 lu.needed_locks = {}
6261 # The following variables interact with _QueryBase._GetNames
6262 self.wanted = locking.ALL_SET
6263 self.do_locking = self.use_locking
6266 raise errors.OpPrereqError("Can not use locking for cluster queries",
6269 def DeclareLocks(self, lu, level):
6272 def _GetQueryData(self, lu):
6273 """Computes the list of nodes and their attributes.
6276 # Locking is not used
6277 assert not (compat.any(lu.glm.is_owned(level)
6278 for level in locking.LEVELS
6279 if level != locking.LEVEL_CLUSTER) or
6280 self.do_locking or self.use_locking)
6282 if query.CQ_CONFIG in self.requested_data:
6283 cluster = lu.cfg.GetClusterInfo()
6285 cluster = NotImplemented
6287 if query.CQ_QUEUE_DRAINED in self.requested_data:
6288 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6290 drain_flag = NotImplemented
6292 if query.CQ_WATCHER_PAUSE in self.requested_data:
6293 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6295 watcher_pause = NotImplemented
6297 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6300 class LUInstanceActivateDisks(NoHooksLU):
6301 """Bring up an instance's disks.
6306 def ExpandNames(self):
6307 self._ExpandAndLockInstance()
6308 self.needed_locks[locking.LEVEL_NODE] = []
6309 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6311 def DeclareLocks(self, level):
6312 if level == locking.LEVEL_NODE:
6313 self._LockInstancesNodes()
6315 def CheckPrereq(self):
6316 """Check prerequisites.
6318 This checks that the instance is in the cluster.
6321 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6322 assert self.instance is not None, \
6323 "Cannot retrieve locked instance %s" % self.op.instance_name
6324 _CheckNodeOnline(self, self.instance.primary_node)
6326 def Exec(self, feedback_fn):
6327 """Activate the disks.
6330 disks_ok, disks_info = \
6331 _AssembleInstanceDisks(self, self.instance,
6332 ignore_size=self.op.ignore_size)
6334 raise errors.OpExecError("Cannot activate block devices")
6336 if self.op.wait_for_sync:
6337 if not _WaitForSync(self, self.instance):
6338 raise errors.OpExecError("Some disks of the instance are degraded!")
6343 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6345 """Prepare the block devices for an instance.
6347 This sets up the block devices on all nodes.
6349 @type lu: L{LogicalUnit}
6350 @param lu: the logical unit on whose behalf we execute
6351 @type instance: L{objects.Instance}
6352 @param instance: the instance for whose disks we assemble
6353 @type disks: list of L{objects.Disk} or None
6354 @param disks: which disks to assemble (or all, if None)
6355 @type ignore_secondaries: boolean
6356 @param ignore_secondaries: if true, errors on secondary nodes
6357 won't result in an error return from the function
6358 @type ignore_size: boolean
6359 @param ignore_size: if true, the current known size of the disk
6360 will not be used during the disk activation, useful for cases
6361 when the size is wrong
6362 @return: False if the operation failed, otherwise a list of
6363 (host, instance_visible_name, node_visible_name)
6364 with the mapping from node devices to instance devices
6369 iname = instance.name
6370 disks = _ExpandCheckDisks(instance, disks)
6372 # With the two passes mechanism we try to reduce the window of
6373 # opportunity for the race condition of switching DRBD to primary
6374 # before handshaking occured, but we do not eliminate it
6376 # The proper fix would be to wait (with some limits) until the
6377 # connection has been made and drbd transitions from WFConnection
6378 # into any other network-connected state (Connected, SyncTarget,
6381 # 1st pass, assemble on all nodes in secondary mode
6382 for idx, inst_disk in enumerate(disks):
6383 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6385 node_disk = node_disk.Copy()
6386 node_disk.UnsetSize()
6387 lu.cfg.SetDiskID(node_disk, node)
6388 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6390 msg = result.fail_msg
6392 is_offline_secondary = (node in instance.secondary_nodes and
6394 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6395 " (is_primary=False, pass=1): %s",
6396 inst_disk.iv_name, node, msg)
6397 if not (ignore_secondaries or is_offline_secondary):
6400 # FIXME: race condition on drbd migration to primary
6402 # 2nd pass, do only the primary node
6403 for idx, inst_disk in enumerate(disks):
6406 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6407 if node != instance.primary_node:
6410 node_disk = node_disk.Copy()
6411 node_disk.UnsetSize()
6412 lu.cfg.SetDiskID(node_disk, node)
6413 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6415 msg = result.fail_msg
6417 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6418 " (is_primary=True, pass=2): %s",
6419 inst_disk.iv_name, node, msg)
6422 dev_path = result.payload
6424 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6426 # leave the disks configured for the primary node
6427 # this is a workaround that would be fixed better by
6428 # improving the logical/physical id handling
6430 lu.cfg.SetDiskID(disk, instance.primary_node)
6432 return disks_ok, device_info
6435 def _StartInstanceDisks(lu, instance, force):
6436 """Start the disks of an instance.
6439 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6440 ignore_secondaries=force)
6442 _ShutdownInstanceDisks(lu, instance)
6443 if force is not None and not force:
6444 lu.proc.LogWarning("", hint="If the message above refers to a"
6446 " you can retry the operation using '--force'.")
6447 raise errors.OpExecError("Disk consistency error")
6450 class LUInstanceDeactivateDisks(NoHooksLU):
6451 """Shutdown an instance's disks.
6456 def ExpandNames(self):
6457 self._ExpandAndLockInstance()
6458 self.needed_locks[locking.LEVEL_NODE] = []
6459 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6461 def DeclareLocks(self, level):
6462 if level == locking.LEVEL_NODE:
6463 self._LockInstancesNodes()
6465 def CheckPrereq(self):
6466 """Check prerequisites.
6468 This checks that the instance is in the cluster.
6471 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6472 assert self.instance is not None, \
6473 "Cannot retrieve locked instance %s" % self.op.instance_name
6475 def Exec(self, feedback_fn):
6476 """Deactivate the disks
6479 instance = self.instance
6481 _ShutdownInstanceDisks(self, instance)
6483 _SafeShutdownInstanceDisks(self, instance)
6486 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6487 """Shutdown block devices of an instance.
6489 This function checks if an instance is running, before calling
6490 _ShutdownInstanceDisks.
6493 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6494 _ShutdownInstanceDisks(lu, instance, disks=disks)
6497 def _ExpandCheckDisks(instance, disks):
6498 """Return the instance disks selected by the disks list
6500 @type disks: list of L{objects.Disk} or None
6501 @param disks: selected disks
6502 @rtype: list of L{objects.Disk}
6503 @return: selected instance disks to act on
6507 return instance.disks
6509 if not set(disks).issubset(instance.disks):
6510 raise errors.ProgrammerError("Can only act on disks belonging to the"
6515 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6516 """Shutdown block devices of an instance.
6518 This does the shutdown on all nodes of the instance.
6520 If the ignore_primary is false, errors on the primary node are
6525 disks = _ExpandCheckDisks(instance, disks)
6528 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6529 lu.cfg.SetDiskID(top_disk, node)
6530 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6531 msg = result.fail_msg
6533 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6534 disk.iv_name, node, msg)
6535 if ((node == instance.primary_node and not ignore_primary) or
6536 (node != instance.primary_node and not result.offline)):
6541 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6542 """Checks if a node has enough free memory.
6544 This function check if a given node has the needed amount of free
6545 memory. In case the node has less memory or we cannot get the
6546 information from the node, this function raise an OpPrereqError
6549 @type lu: C{LogicalUnit}
6550 @param lu: a logical unit from which we get configuration data
6552 @param node: the node to check
6553 @type reason: C{str}
6554 @param reason: string to use in the error message
6555 @type requested: C{int}
6556 @param requested: the amount of memory in MiB to check for
6557 @type hypervisor_name: C{str}
6558 @param hypervisor_name: the hypervisor to ask for memory stats
6560 @return: node current free memory
6561 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6562 we cannot check the node
6565 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6566 nodeinfo[node].Raise("Can't get data from node %s" % node,
6567 prereq=True, ecode=errors.ECODE_ENVIRON)
6568 (_, _, (hv_info, )) = nodeinfo[node].payload
6570 free_mem = hv_info.get("memory_free", None)
6571 if not isinstance(free_mem, int):
6572 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6573 " was '%s'" % (node, free_mem),
6574 errors.ECODE_ENVIRON)
6575 if requested > free_mem:
6576 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6577 " needed %s MiB, available %s MiB" %
6578 (node, reason, requested, free_mem),
6583 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6584 """Checks if nodes have enough free disk space in the all VGs.
6586 This function check if all given nodes have the needed amount of
6587 free disk. In case any node has less disk or we cannot get the
6588 information from the node, this function raise an OpPrereqError
6591 @type lu: C{LogicalUnit}
6592 @param lu: a logical unit from which we get configuration data
6593 @type nodenames: C{list}
6594 @param nodenames: the list of node names to check
6595 @type req_sizes: C{dict}
6596 @param req_sizes: the hash of vg and corresponding amount of disk in
6598 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6599 or we cannot check the node
6602 for vg, req_size in req_sizes.items():
6603 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6606 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6607 """Checks if nodes have enough free disk space in the specified VG.
6609 This function check if all given nodes have the needed amount of
6610 free disk. In case any node has less disk or we cannot get the
6611 information from the node, this function raise an OpPrereqError
6614 @type lu: C{LogicalUnit}
6615 @param lu: a logical unit from which we get configuration data
6616 @type nodenames: C{list}
6617 @param nodenames: the list of node names to check
6619 @param vg: the volume group to check
6620 @type requested: C{int}
6621 @param requested: the amount of disk in MiB to check for
6622 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6623 or we cannot check the node
6626 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6627 for node in nodenames:
6628 info = nodeinfo[node]
6629 info.Raise("Cannot get current information from node %s" % node,
6630 prereq=True, ecode=errors.ECODE_ENVIRON)
6631 (_, (vg_info, ), _) = info.payload
6632 vg_free = vg_info.get("vg_free", None)
6633 if not isinstance(vg_free, int):
6634 raise errors.OpPrereqError("Can't compute free disk space on node"
6635 " %s for vg %s, result was '%s'" %
6636 (node, vg, vg_free), errors.ECODE_ENVIRON)
6637 if requested > vg_free:
6638 raise errors.OpPrereqError("Not enough disk space on target node %s"
6639 " vg %s: required %d MiB, available %d MiB" %
6640 (node, vg, requested, vg_free),
6644 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6645 """Checks if nodes have enough physical CPUs
6647 This function checks if all given nodes have the needed number of
6648 physical CPUs. In case any node has less CPUs or we cannot get the
6649 information from the node, this function raises an OpPrereqError
6652 @type lu: C{LogicalUnit}
6653 @param lu: a logical unit from which we get configuration data
6654 @type nodenames: C{list}
6655 @param nodenames: the list of node names to check
6656 @type requested: C{int}
6657 @param requested: the minimum acceptable number of physical CPUs
6658 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6659 or we cannot check the node
6662 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6663 for node in nodenames:
6664 info = nodeinfo[node]
6665 info.Raise("Cannot get current information from node %s" % node,
6666 prereq=True, ecode=errors.ECODE_ENVIRON)
6667 (_, _, (hv_info, )) = info.payload
6668 num_cpus = hv_info.get("cpu_total", None)
6669 if not isinstance(num_cpus, int):
6670 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6671 " on node %s, result was '%s'" %
6672 (node, num_cpus), errors.ECODE_ENVIRON)
6673 if requested > num_cpus:
6674 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6675 "required" % (node, num_cpus, requested),
6679 class LUInstanceStartup(LogicalUnit):
6680 """Starts an instance.
6683 HPATH = "instance-start"
6684 HTYPE = constants.HTYPE_INSTANCE
6687 def CheckArguments(self):
6689 if self.op.beparams:
6690 # fill the beparams dict
6691 objects.UpgradeBeParams(self.op.beparams)
6692 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6694 def ExpandNames(self):
6695 self._ExpandAndLockInstance()
6696 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6698 def DeclareLocks(self, level):
6699 if level == locking.LEVEL_NODE_RES:
6700 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6702 def BuildHooksEnv(self):
6705 This runs on master, primary and secondary nodes of the instance.
6709 "FORCE": self.op.force,
6712 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6716 def BuildHooksNodes(self):
6717 """Build hooks nodes.
6720 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6723 def CheckPrereq(self):
6724 """Check prerequisites.
6726 This checks that the instance is in the cluster.
6729 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6730 assert self.instance is not None, \
6731 "Cannot retrieve locked instance %s" % self.op.instance_name
6734 if self.op.hvparams:
6735 # check hypervisor parameter syntax (locally)
6736 cluster = self.cfg.GetClusterInfo()
6737 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6738 filled_hvp = cluster.FillHV(instance)
6739 filled_hvp.update(self.op.hvparams)
6740 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6741 hv_type.CheckParameterSyntax(filled_hvp)
6742 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6744 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6746 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6748 if self.primary_offline and self.op.ignore_offline_nodes:
6749 self.proc.LogWarning("Ignoring offline primary node")
6751 if self.op.hvparams or self.op.beparams:
6752 self.proc.LogWarning("Overridden parameters are ignored")
6754 _CheckNodeOnline(self, instance.primary_node)
6756 bep = self.cfg.GetClusterInfo().FillBE(instance)
6757 bep.update(self.op.beparams)
6759 # check bridges existence
6760 _CheckInstanceBridgesExist(self, instance)
6762 remote_info = self.rpc.call_instance_info(instance.primary_node,
6764 instance.hypervisor)
6765 remote_info.Raise("Error checking node %s" % instance.primary_node,
6766 prereq=True, ecode=errors.ECODE_ENVIRON)
6767 if not remote_info.payload: # not running already
6768 _CheckNodeFreeMemory(self, instance.primary_node,
6769 "starting instance %s" % instance.name,
6770 bep[constants.BE_MINMEM], instance.hypervisor)
6772 def Exec(self, feedback_fn):
6773 """Start the instance.
6776 instance = self.instance
6777 force = self.op.force
6779 if not self.op.no_remember:
6780 self.cfg.MarkInstanceUp(instance.name)
6782 if self.primary_offline:
6783 assert self.op.ignore_offline_nodes
6784 self.proc.LogInfo("Primary node offline, marked instance as started")
6786 node_current = instance.primary_node
6788 _StartInstanceDisks(self, instance, force)
6791 self.rpc.call_instance_start(node_current,
6792 (instance, self.op.hvparams,
6794 self.op.startup_paused)
6795 msg = result.fail_msg
6797 _ShutdownInstanceDisks(self, instance)
6798 raise errors.OpExecError("Could not start instance: %s" % msg)
6801 class LUInstanceReboot(LogicalUnit):
6802 """Reboot an instance.
6805 HPATH = "instance-reboot"
6806 HTYPE = constants.HTYPE_INSTANCE
6809 def ExpandNames(self):
6810 self._ExpandAndLockInstance()
6812 def BuildHooksEnv(self):
6815 This runs on master, primary and secondary nodes of the instance.
6819 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6820 "REBOOT_TYPE": self.op.reboot_type,
6821 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6824 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6828 def BuildHooksNodes(self):
6829 """Build hooks nodes.
6832 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6835 def CheckPrereq(self):
6836 """Check prerequisites.
6838 This checks that the instance is in the cluster.
6841 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6842 assert self.instance is not None, \
6843 "Cannot retrieve locked instance %s" % self.op.instance_name
6844 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6845 _CheckNodeOnline(self, instance.primary_node)
6847 # check bridges existence
6848 _CheckInstanceBridgesExist(self, instance)
6850 def Exec(self, feedback_fn):
6851 """Reboot the instance.
6854 instance = self.instance
6855 ignore_secondaries = self.op.ignore_secondaries
6856 reboot_type = self.op.reboot_type
6858 remote_info = self.rpc.call_instance_info(instance.primary_node,
6860 instance.hypervisor)
6861 remote_info.Raise("Error checking node %s" % instance.primary_node)
6862 instance_running = bool(remote_info.payload)
6864 node_current = instance.primary_node
6866 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6867 constants.INSTANCE_REBOOT_HARD]:
6868 for disk in instance.disks:
6869 self.cfg.SetDiskID(disk, node_current)
6870 result = self.rpc.call_instance_reboot(node_current, instance,
6872 self.op.shutdown_timeout)
6873 result.Raise("Could not reboot instance")
6875 if instance_running:
6876 result = self.rpc.call_instance_shutdown(node_current, instance,
6877 self.op.shutdown_timeout)
6878 result.Raise("Could not shutdown instance for full reboot")
6879 _ShutdownInstanceDisks(self, instance)
6881 self.LogInfo("Instance %s was already stopped, starting now",
6883 _StartInstanceDisks(self, instance, ignore_secondaries)
6884 result = self.rpc.call_instance_start(node_current,
6885 (instance, None, None), False)
6886 msg = result.fail_msg
6888 _ShutdownInstanceDisks(self, instance)
6889 raise errors.OpExecError("Could not start instance for"
6890 " full reboot: %s" % msg)
6892 self.cfg.MarkInstanceUp(instance.name)
6895 class LUInstanceShutdown(LogicalUnit):
6896 """Shutdown an instance.
6899 HPATH = "instance-stop"
6900 HTYPE = constants.HTYPE_INSTANCE
6903 def ExpandNames(self):
6904 self._ExpandAndLockInstance()
6906 def BuildHooksEnv(self):
6909 This runs on master, primary and secondary nodes of the instance.
6912 env = _BuildInstanceHookEnvByObject(self, self.instance)
6913 env["TIMEOUT"] = self.op.timeout
6916 def BuildHooksNodes(self):
6917 """Build hooks nodes.
6920 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6923 def CheckPrereq(self):
6924 """Check prerequisites.
6926 This checks that the instance is in the cluster.
6929 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6930 assert self.instance is not None, \
6931 "Cannot retrieve locked instance %s" % self.op.instance_name
6933 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6935 self.primary_offline = \
6936 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6938 if self.primary_offline and self.op.ignore_offline_nodes:
6939 self.proc.LogWarning("Ignoring offline primary node")
6941 _CheckNodeOnline(self, self.instance.primary_node)
6943 def Exec(self, feedback_fn):
6944 """Shutdown the instance.
6947 instance = self.instance
6948 node_current = instance.primary_node
6949 timeout = self.op.timeout
6951 if not self.op.no_remember:
6952 self.cfg.MarkInstanceDown(instance.name)
6954 if self.primary_offline:
6955 assert self.op.ignore_offline_nodes
6956 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6958 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6959 msg = result.fail_msg
6961 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6963 _ShutdownInstanceDisks(self, instance)
6966 class LUInstanceReinstall(LogicalUnit):
6967 """Reinstall an instance.
6970 HPATH = "instance-reinstall"
6971 HTYPE = constants.HTYPE_INSTANCE
6974 def ExpandNames(self):
6975 self._ExpandAndLockInstance()
6977 def BuildHooksEnv(self):
6980 This runs on master, primary and secondary nodes of the instance.
6983 return _BuildInstanceHookEnvByObject(self, self.instance)
6985 def BuildHooksNodes(self):
6986 """Build hooks nodes.
6989 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6992 def CheckPrereq(self):
6993 """Check prerequisites.
6995 This checks that the instance is in the cluster and is not running.
6998 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6999 assert instance is not None, \
7000 "Cannot retrieve locked instance %s" % self.op.instance_name
7001 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7002 " offline, cannot reinstall")
7004 if instance.disk_template == constants.DT_DISKLESS:
7005 raise errors.OpPrereqError("Instance '%s' has no disks" %
7006 self.op.instance_name,
7008 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7010 if self.op.os_type is not None:
7012 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7013 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7014 instance_os = self.op.os_type
7016 instance_os = instance.os
7018 nodelist = list(instance.all_nodes)
7020 if self.op.osparams:
7021 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7022 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7023 self.os_inst = i_osdict # the new dict (without defaults)
7027 self.instance = instance
7029 def Exec(self, feedback_fn):
7030 """Reinstall the instance.
7033 inst = self.instance
7035 if self.op.os_type is not None:
7036 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7037 inst.os = self.op.os_type
7038 # Write to configuration
7039 self.cfg.Update(inst, feedback_fn)
7041 _StartInstanceDisks(self, inst, None)
7043 feedback_fn("Running the instance OS create scripts...")
7044 # FIXME: pass debug option from opcode to backend
7045 result = self.rpc.call_instance_os_add(inst.primary_node,
7046 (inst, self.os_inst), True,
7047 self.op.debug_level)
7048 result.Raise("Could not install OS for instance %s on node %s" %
7049 (inst.name, inst.primary_node))
7051 _ShutdownInstanceDisks(self, inst)
7054 class LUInstanceRecreateDisks(LogicalUnit):
7055 """Recreate an instance's missing disks.
7058 HPATH = "instance-recreate-disks"
7059 HTYPE = constants.HTYPE_INSTANCE
7062 _MODIFYABLE = frozenset([
7063 constants.IDISK_SIZE,
7064 constants.IDISK_MODE,
7067 # New or changed disk parameters may have different semantics
7068 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7069 constants.IDISK_ADOPT,
7071 # TODO: Implement support changing VG while recreating
7073 constants.IDISK_METAVG,
7076 def _RunAllocator(self):
7077 """Run the allocator based on input opcode.
7080 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7083 # The allocator should actually run in "relocate" mode, but current
7084 # allocators don't support relocating all the nodes of an instance at
7085 # the same time. As a workaround we use "allocate" mode, but this is
7086 # suboptimal for two reasons:
7087 # - The instance name passed to the allocator is present in the list of
7088 # existing instances, so there could be a conflict within the
7089 # internal structures of the allocator. This doesn't happen with the
7090 # current allocators, but it's a liability.
7091 # - The allocator counts the resources used by the instance twice: once
7092 # because the instance exists already, and once because it tries to
7093 # allocate a new instance.
7094 # The allocator could choose some of the nodes on which the instance is
7095 # running, but that's not a problem. If the instance nodes are broken,
7096 # they should be already be marked as drained or offline, and hence
7097 # skipped by the allocator. If instance disks have been lost for other
7098 # reasons, then recreating the disks on the same nodes should be fine.
7099 disk_template = self.instance.disk_template
7100 spindle_use = be_full[constants.BE_SPINDLE_USE]
7101 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7102 disk_template=disk_template,
7103 tags=list(self.instance.GetTags()),
7104 os=self.instance.os,
7106 vcpus=be_full[constants.BE_VCPUS],
7107 memory=be_full[constants.BE_MAXMEM],
7108 spindle_use=spindle_use,
7109 disks=[{constants.IDISK_SIZE: d.size,
7110 constants.IDISK_MODE: d.mode}
7111 for d in self.instance.disks],
7112 hypervisor=self.instance.hypervisor)
7113 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7115 ial.Run(self.op.iallocator)
7117 assert req.RequiredNodes() == len(self.instance.all_nodes)
7120 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7121 " %s" % (self.op.iallocator, ial.info),
7124 self.op.nodes = ial.result
7125 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7126 self.op.instance_name, self.op.iallocator,
7127 utils.CommaJoin(ial.result))
7129 def CheckArguments(self):
7130 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7131 # Normalize and convert deprecated list of disk indices
7132 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7134 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7136 raise errors.OpPrereqError("Some disks have been specified more than"
7137 " once: %s" % utils.CommaJoin(duplicates),
7140 if self.op.iallocator and self.op.nodes:
7141 raise errors.OpPrereqError("Give either the iallocator or the new"
7142 " nodes, not both", errors.ECODE_INVAL)
7144 for (idx, params) in self.op.disks:
7145 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7146 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7148 raise errors.OpPrereqError("Parameters for disk %s try to change"
7149 " unmodifyable parameter(s): %s" %
7150 (idx, utils.CommaJoin(unsupported)),
7153 def ExpandNames(self):
7154 self._ExpandAndLockInstance()
7155 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7157 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7158 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7160 self.needed_locks[locking.LEVEL_NODE] = []
7161 if self.op.iallocator:
7162 # iallocator will select a new node in the same group
7163 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7164 self.needed_locks[locking.LEVEL_NODE_RES] = []
7166 def DeclareLocks(self, level):
7167 if level == locking.LEVEL_NODEGROUP:
7168 assert self.op.iallocator is not None
7169 assert not self.op.nodes
7170 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7171 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7172 # Lock the primary group used by the instance optimistically; this
7173 # requires going via the node before it's locked, requiring
7174 # verification later on
7175 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7176 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7178 elif level == locking.LEVEL_NODE:
7179 # If an allocator is used, then we lock all the nodes in the current
7180 # instance group, as we don't know yet which ones will be selected;
7181 # if we replace the nodes without using an allocator, locks are
7182 # already declared in ExpandNames; otherwise, we need to lock all the
7183 # instance nodes for disk re-creation
7184 if self.op.iallocator:
7185 assert not self.op.nodes
7186 assert not self.needed_locks[locking.LEVEL_NODE]
7187 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7189 # Lock member nodes of the group of the primary node
7190 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7191 self.needed_locks[locking.LEVEL_NODE].extend(
7192 self.cfg.GetNodeGroup(group_uuid).members)
7193 elif not self.op.nodes:
7194 self._LockInstancesNodes(primary_only=False)
7195 elif level == locking.LEVEL_NODE_RES:
7197 self.needed_locks[locking.LEVEL_NODE_RES] = \
7198 self.needed_locks[locking.LEVEL_NODE][:]
7200 def BuildHooksEnv(self):
7203 This runs on master, primary and secondary nodes of the instance.
7206 return _BuildInstanceHookEnvByObject(self, self.instance)
7208 def BuildHooksNodes(self):
7209 """Build hooks nodes.
7212 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7215 def CheckPrereq(self):
7216 """Check prerequisites.
7218 This checks that the instance is in the cluster and is not running.
7221 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7222 assert instance is not None, \
7223 "Cannot retrieve locked instance %s" % self.op.instance_name
7225 if len(self.op.nodes) != len(instance.all_nodes):
7226 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7227 " %d replacement nodes were specified" %
7228 (instance.name, len(instance.all_nodes),
7229 len(self.op.nodes)),
7231 assert instance.disk_template != constants.DT_DRBD8 or \
7232 len(self.op.nodes) == 2
7233 assert instance.disk_template != constants.DT_PLAIN or \
7234 len(self.op.nodes) == 1
7235 primary_node = self.op.nodes[0]
7237 primary_node = instance.primary_node
7238 if not self.op.iallocator:
7239 _CheckNodeOnline(self, primary_node)
7241 if instance.disk_template == constants.DT_DISKLESS:
7242 raise errors.OpPrereqError("Instance '%s' has no disks" %
7243 self.op.instance_name, errors.ECODE_INVAL)
7245 # Verify if node group locks are still correct
7246 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7248 # Node group locks are acquired only for the primary node (and only
7249 # when the allocator is used)
7250 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7253 # if we replace nodes *and* the old primary is offline, we don't
7254 # check the instance state
7255 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7256 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7257 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7258 msg="cannot recreate disks")
7261 self.disks = dict(self.op.disks)
7263 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7265 maxidx = max(self.disks.keys())
7266 if maxidx >= len(instance.disks):
7267 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7270 if ((self.op.nodes or self.op.iallocator) and
7271 sorted(self.disks.keys()) != range(len(instance.disks))):
7272 raise errors.OpPrereqError("Can't recreate disks partially and"
7273 " change the nodes at the same time",
7276 self.instance = instance
7278 if self.op.iallocator:
7279 self._RunAllocator()
7281 # Release unneeded node and node resource locks
7282 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7283 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7285 def Exec(self, feedback_fn):
7286 """Recreate the disks.
7289 instance = self.instance
7291 assert (self.owned_locks(locking.LEVEL_NODE) ==
7292 self.owned_locks(locking.LEVEL_NODE_RES))
7295 mods = [] # keeps track of needed changes
7297 for idx, disk in enumerate(instance.disks):
7299 changes = self.disks[idx]
7301 # Disk should not be recreated
7305 # update secondaries for disks, if needed
7306 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7307 # need to update the nodes and minors
7308 assert len(self.op.nodes) == 2
7309 assert len(disk.logical_id) == 6 # otherwise disk internals
7311 (_, _, old_port, _, _, old_secret) = disk.logical_id
7312 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7313 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7314 new_minors[0], new_minors[1], old_secret)
7315 assert len(disk.logical_id) == len(new_id)
7319 mods.append((idx, new_id, changes))
7321 # now that we have passed all asserts above, we can apply the mods
7322 # in a single run (to avoid partial changes)
7323 for idx, new_id, changes in mods:
7324 disk = instance.disks[idx]
7325 if new_id is not None:
7326 assert disk.dev_type == constants.LD_DRBD8
7327 disk.logical_id = new_id
7329 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7330 mode=changes.get(constants.IDISK_MODE, None))
7332 # change primary node, if needed
7334 instance.primary_node = self.op.nodes[0]
7335 self.LogWarning("Changing the instance's nodes, you will have to"
7336 " remove any disks left on the older nodes manually")
7339 self.cfg.Update(instance, feedback_fn)
7341 _CreateDisks(self, instance, to_skip=to_skip)
7344 class LUInstanceRename(LogicalUnit):
7345 """Rename an instance.
7348 HPATH = "instance-rename"
7349 HTYPE = constants.HTYPE_INSTANCE
7351 def CheckArguments(self):
7355 if self.op.ip_check and not self.op.name_check:
7356 # TODO: make the ip check more flexible and not depend on the name check
7357 raise errors.OpPrereqError("IP address check requires a name check",
7360 def BuildHooksEnv(self):
7363 This runs on master, primary and secondary nodes of the instance.
7366 env = _BuildInstanceHookEnvByObject(self, self.instance)
7367 env["INSTANCE_NEW_NAME"] = self.op.new_name
7370 def BuildHooksNodes(self):
7371 """Build hooks nodes.
7374 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7377 def CheckPrereq(self):
7378 """Check prerequisites.
7380 This checks that the instance is in the cluster and is not running.
7383 self.op.instance_name = _ExpandInstanceName(self.cfg,
7384 self.op.instance_name)
7385 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7386 assert instance is not None
7387 _CheckNodeOnline(self, instance.primary_node)
7388 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7389 msg="cannot rename")
7390 self.instance = instance
7392 new_name = self.op.new_name
7393 if self.op.name_check:
7394 hostname = netutils.GetHostname(name=new_name)
7395 if hostname.name != new_name:
7396 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7398 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7399 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7400 " same as given hostname '%s'") %
7401 (hostname.name, self.op.new_name),
7403 new_name = self.op.new_name = hostname.name
7404 if (self.op.ip_check and
7405 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7406 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7407 (hostname.ip, new_name),
7408 errors.ECODE_NOTUNIQUE)
7410 instance_list = self.cfg.GetInstanceList()
7411 if new_name in instance_list and new_name != instance.name:
7412 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7413 new_name, errors.ECODE_EXISTS)
7415 def Exec(self, feedback_fn):
7416 """Rename the instance.
7419 inst = self.instance
7420 old_name = inst.name
7422 rename_file_storage = False
7423 if (inst.disk_template in constants.DTS_FILEBASED and
7424 self.op.new_name != inst.name):
7425 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7426 rename_file_storage = True
7428 self.cfg.RenameInstance(inst.name, self.op.new_name)
7429 # Change the instance lock. This is definitely safe while we hold the BGL.
7430 # Otherwise the new lock would have to be added in acquired mode.
7432 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7433 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7435 # re-read the instance from the configuration after rename
7436 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7438 if rename_file_storage:
7439 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7440 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7441 old_file_storage_dir,
7442 new_file_storage_dir)
7443 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7444 " (but the instance has been renamed in Ganeti)" %
7445 (inst.primary_node, old_file_storage_dir,
7446 new_file_storage_dir))
7448 _StartInstanceDisks(self, inst, None)
7450 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7451 old_name, self.op.debug_level)
7452 msg = result.fail_msg
7454 msg = ("Could not run OS rename script for instance %s on node %s"
7455 " (but the instance has been renamed in Ganeti): %s" %
7456 (inst.name, inst.primary_node, msg))
7457 self.proc.LogWarning(msg)
7459 _ShutdownInstanceDisks(self, inst)
7464 class LUInstanceRemove(LogicalUnit):
7465 """Remove an instance.
7468 HPATH = "instance-remove"
7469 HTYPE = constants.HTYPE_INSTANCE
7472 def ExpandNames(self):
7473 self._ExpandAndLockInstance()
7474 self.needed_locks[locking.LEVEL_NODE] = []
7475 self.needed_locks[locking.LEVEL_NODE_RES] = []
7476 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7478 def DeclareLocks(self, level):
7479 if level == locking.LEVEL_NODE:
7480 self._LockInstancesNodes()
7481 elif level == locking.LEVEL_NODE_RES:
7483 self.needed_locks[locking.LEVEL_NODE_RES] = \
7484 self.needed_locks[locking.LEVEL_NODE][:]
7486 def BuildHooksEnv(self):
7489 This runs on master, primary and secondary nodes of the instance.
7492 env = _BuildInstanceHookEnvByObject(self, self.instance)
7493 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7496 def BuildHooksNodes(self):
7497 """Build hooks nodes.
7500 nl = [self.cfg.GetMasterNode()]
7501 nl_post = list(self.instance.all_nodes) + nl
7502 return (nl, nl_post)
7504 def CheckPrereq(self):
7505 """Check prerequisites.
7507 This checks that the instance is in the cluster.
7510 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7511 assert self.instance is not None, \
7512 "Cannot retrieve locked instance %s" % self.op.instance_name
7514 def Exec(self, feedback_fn):
7515 """Remove the instance.
7518 instance = self.instance
7519 logging.info("Shutting down instance %s on node %s",
7520 instance.name, instance.primary_node)
7522 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7523 self.op.shutdown_timeout)
7524 msg = result.fail_msg
7526 if self.op.ignore_failures:
7527 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7529 raise errors.OpExecError("Could not shutdown instance %s on"
7531 (instance.name, instance.primary_node, msg))
7533 assert (self.owned_locks(locking.LEVEL_NODE) ==
7534 self.owned_locks(locking.LEVEL_NODE_RES))
7535 assert not (set(instance.all_nodes) -
7536 self.owned_locks(locking.LEVEL_NODE)), \
7537 "Not owning correct locks"
7539 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7542 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7543 """Utility function to remove an instance.
7546 logging.info("Removing block devices for instance %s", instance.name)
7548 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7549 if not ignore_failures:
7550 raise errors.OpExecError("Can't remove instance's disks")
7551 feedback_fn("Warning: can't remove instance's disks")
7553 logging.info("Removing instance %s out of cluster config", instance.name)
7555 lu.cfg.RemoveInstance(instance.name)
7557 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7558 "Instance lock removal conflict"
7560 # Remove lock for the instance
7561 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7564 class LUInstanceQuery(NoHooksLU):
7565 """Logical unit for querying instances.
7568 # pylint: disable=W0142
7571 def CheckArguments(self):
7572 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7573 self.op.output_fields, self.op.use_locking)
7575 def ExpandNames(self):
7576 self.iq.ExpandNames(self)
7578 def DeclareLocks(self, level):
7579 self.iq.DeclareLocks(self, level)
7581 def Exec(self, feedback_fn):
7582 return self.iq.OldStyleQuery(self)
7585 class LUInstanceFailover(LogicalUnit):
7586 """Failover an instance.
7589 HPATH = "instance-failover"
7590 HTYPE = constants.HTYPE_INSTANCE
7593 def CheckArguments(self):
7594 """Check the arguments.
7597 self.iallocator = getattr(self.op, "iallocator", None)
7598 self.target_node = getattr(self.op, "target_node", None)
7600 def ExpandNames(self):
7601 self._ExpandAndLockInstance()
7603 if self.op.target_node is not None:
7604 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7606 self.needed_locks[locking.LEVEL_NODE] = []
7607 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7609 self.needed_locks[locking.LEVEL_NODE_RES] = []
7610 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7612 ignore_consistency = self.op.ignore_consistency
7613 shutdown_timeout = self.op.shutdown_timeout
7614 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7617 ignore_consistency=ignore_consistency,
7618 shutdown_timeout=shutdown_timeout,
7619 ignore_ipolicy=self.op.ignore_ipolicy)
7620 self.tasklets = [self._migrater]
7622 def DeclareLocks(self, level):
7623 if level == locking.LEVEL_NODE:
7624 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7625 if instance.disk_template in constants.DTS_EXT_MIRROR:
7626 if self.op.target_node is None:
7627 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7629 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7630 self.op.target_node]
7631 del self.recalculate_locks[locking.LEVEL_NODE]
7633 self._LockInstancesNodes()
7634 elif level == locking.LEVEL_NODE_RES:
7636 self.needed_locks[locking.LEVEL_NODE_RES] = \
7637 self.needed_locks[locking.LEVEL_NODE][:]
7639 def BuildHooksEnv(self):
7642 This runs on master, primary and secondary nodes of the instance.
7645 instance = self._migrater.instance
7646 source_node = instance.primary_node
7647 target_node = self.op.target_node
7649 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7650 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7651 "OLD_PRIMARY": source_node,
7652 "NEW_PRIMARY": target_node,
7655 if instance.disk_template in constants.DTS_INT_MIRROR:
7656 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7657 env["NEW_SECONDARY"] = source_node
7659 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7661 env.update(_BuildInstanceHookEnvByObject(self, instance))
7665 def BuildHooksNodes(self):
7666 """Build hooks nodes.
7669 instance = self._migrater.instance
7670 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7671 return (nl, nl + [instance.primary_node])
7674 class LUInstanceMigrate(LogicalUnit):
7675 """Migrate an instance.
7677 This is migration without shutting down, compared to the failover,
7678 which is done with shutdown.
7681 HPATH = "instance-migrate"
7682 HTYPE = constants.HTYPE_INSTANCE
7685 def ExpandNames(self):
7686 self._ExpandAndLockInstance()
7688 if self.op.target_node is not None:
7689 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7691 self.needed_locks[locking.LEVEL_NODE] = []
7692 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7694 self.needed_locks[locking.LEVEL_NODE] = []
7695 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7698 TLMigrateInstance(self, self.op.instance_name,
7699 cleanup=self.op.cleanup,
7701 fallback=self.op.allow_failover,
7702 allow_runtime_changes=self.op.allow_runtime_changes,
7703 ignore_ipolicy=self.op.ignore_ipolicy)
7704 self.tasklets = [self._migrater]
7706 def DeclareLocks(self, level):
7707 if level == locking.LEVEL_NODE:
7708 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7709 if instance.disk_template in constants.DTS_EXT_MIRROR:
7710 if self.op.target_node is None:
7711 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7713 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7714 self.op.target_node]
7715 del self.recalculate_locks[locking.LEVEL_NODE]
7717 self._LockInstancesNodes()
7718 elif level == locking.LEVEL_NODE_RES:
7720 self.needed_locks[locking.LEVEL_NODE_RES] = \
7721 self.needed_locks[locking.LEVEL_NODE][:]
7723 def BuildHooksEnv(self):
7726 This runs on master, primary and secondary nodes of the instance.
7729 instance = self._migrater.instance
7730 source_node = instance.primary_node
7731 target_node = self.op.target_node
7732 env = _BuildInstanceHookEnvByObject(self, instance)
7734 "MIGRATE_LIVE": self._migrater.live,
7735 "MIGRATE_CLEANUP": self.op.cleanup,
7736 "OLD_PRIMARY": source_node,
7737 "NEW_PRIMARY": target_node,
7738 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7741 if instance.disk_template in constants.DTS_INT_MIRROR:
7742 env["OLD_SECONDARY"] = target_node
7743 env["NEW_SECONDARY"] = source_node
7745 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7749 def BuildHooksNodes(self):
7750 """Build hooks nodes.
7753 instance = self._migrater.instance
7754 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7755 return (nl, nl + [instance.primary_node])
7758 class LUInstanceMove(LogicalUnit):
7759 """Move an instance by data-copying.
7762 HPATH = "instance-move"
7763 HTYPE = constants.HTYPE_INSTANCE
7766 def ExpandNames(self):
7767 self._ExpandAndLockInstance()
7768 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7769 self.op.target_node = target_node
7770 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7771 self.needed_locks[locking.LEVEL_NODE_RES] = []
7772 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7774 def DeclareLocks(self, level):
7775 if level == locking.LEVEL_NODE:
7776 self._LockInstancesNodes(primary_only=True)
7777 elif level == locking.LEVEL_NODE_RES:
7779 self.needed_locks[locking.LEVEL_NODE_RES] = \
7780 self.needed_locks[locking.LEVEL_NODE][:]
7782 def BuildHooksEnv(self):
7785 This runs on master, primary and secondary nodes of the instance.
7789 "TARGET_NODE": self.op.target_node,
7790 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7792 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7795 def BuildHooksNodes(self):
7796 """Build hooks nodes.
7800 self.cfg.GetMasterNode(),
7801 self.instance.primary_node,
7802 self.op.target_node,
7806 def CheckPrereq(self):
7807 """Check prerequisites.
7809 This checks that the instance is in the cluster.
7812 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7813 assert self.instance is not None, \
7814 "Cannot retrieve locked instance %s" % self.op.instance_name
7816 node = self.cfg.GetNodeInfo(self.op.target_node)
7817 assert node is not None, \
7818 "Cannot retrieve locked node %s" % self.op.target_node
7820 self.target_node = target_node = node.name
7822 if target_node == instance.primary_node:
7823 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7824 (instance.name, target_node),
7827 bep = self.cfg.GetClusterInfo().FillBE(instance)
7829 for idx, dsk in enumerate(instance.disks):
7830 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7831 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7832 " cannot copy" % idx, errors.ECODE_STATE)
7834 _CheckNodeOnline(self, target_node)
7835 _CheckNodeNotDrained(self, target_node)
7836 _CheckNodeVmCapable(self, target_node)
7837 cluster = self.cfg.GetClusterInfo()
7838 group_info = self.cfg.GetNodeGroup(node.group)
7839 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7840 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7841 ignore=self.op.ignore_ipolicy)
7843 if instance.admin_state == constants.ADMINST_UP:
7844 # check memory requirements on the secondary node
7845 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7846 instance.name, bep[constants.BE_MAXMEM],
7847 instance.hypervisor)
7849 self.LogInfo("Not checking memory on the secondary node as"
7850 " instance will not be started")
7852 # check bridge existance
7853 _CheckInstanceBridgesExist(self, instance, node=target_node)
7855 def Exec(self, feedback_fn):
7856 """Move an instance.
7858 The move is done by shutting it down on its present node, copying
7859 the data over (slow) and starting it on the new node.
7862 instance = self.instance
7864 source_node = instance.primary_node
7865 target_node = self.target_node
7867 self.LogInfo("Shutting down instance %s on source node %s",
7868 instance.name, source_node)
7870 assert (self.owned_locks(locking.LEVEL_NODE) ==
7871 self.owned_locks(locking.LEVEL_NODE_RES))
7873 result = self.rpc.call_instance_shutdown(source_node, instance,
7874 self.op.shutdown_timeout)
7875 msg = result.fail_msg
7877 if self.op.ignore_consistency:
7878 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7879 " Proceeding anyway. Please make sure node"
7880 " %s is down. Error details: %s",
7881 instance.name, source_node, source_node, msg)
7883 raise errors.OpExecError("Could not shutdown instance %s on"
7885 (instance.name, source_node, msg))
7887 # create the target disks
7889 _CreateDisks(self, instance, target_node=target_node)
7890 except errors.OpExecError:
7891 self.LogWarning("Device creation failed, reverting...")
7893 _RemoveDisks(self, instance, target_node=target_node)
7895 self.cfg.ReleaseDRBDMinors(instance.name)
7898 cluster_name = self.cfg.GetClusterInfo().cluster_name
7901 # activate, get path, copy the data over
7902 for idx, disk in enumerate(instance.disks):
7903 self.LogInfo("Copying data for disk %d", idx)
7904 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7905 instance.name, True, idx)
7907 self.LogWarning("Can't assemble newly created disk %d: %s",
7908 idx, result.fail_msg)
7909 errs.append(result.fail_msg)
7911 dev_path = result.payload
7912 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7913 target_node, dev_path,
7916 self.LogWarning("Can't copy data over for disk %d: %s",
7917 idx, result.fail_msg)
7918 errs.append(result.fail_msg)
7922 self.LogWarning("Some disks failed to copy, aborting")
7924 _RemoveDisks(self, instance, target_node=target_node)
7926 self.cfg.ReleaseDRBDMinors(instance.name)
7927 raise errors.OpExecError("Errors during disk copy: %s" %
7930 instance.primary_node = target_node
7931 self.cfg.Update(instance, feedback_fn)
7933 self.LogInfo("Removing the disks on the original node")
7934 _RemoveDisks(self, instance, target_node=source_node)
7936 # Only start the instance if it's marked as up
7937 if instance.admin_state == constants.ADMINST_UP:
7938 self.LogInfo("Starting instance %s on node %s",
7939 instance.name, target_node)
7941 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7942 ignore_secondaries=True)
7944 _ShutdownInstanceDisks(self, instance)
7945 raise errors.OpExecError("Can't activate the instance's disks")
7947 result = self.rpc.call_instance_start(target_node,
7948 (instance, None, None), False)
7949 msg = result.fail_msg
7951 _ShutdownInstanceDisks(self, instance)
7952 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7953 (instance.name, target_node, msg))
7956 class LUNodeMigrate(LogicalUnit):
7957 """Migrate all instances from a node.
7960 HPATH = "node-migrate"
7961 HTYPE = constants.HTYPE_NODE
7964 def CheckArguments(self):
7967 def ExpandNames(self):
7968 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7970 self.share_locks = _ShareAll()
7971 self.needed_locks = {
7972 locking.LEVEL_NODE: [self.op.node_name],
7975 def BuildHooksEnv(self):
7978 This runs on the master, the primary and all the secondaries.
7982 "NODE_NAME": self.op.node_name,
7983 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7986 def BuildHooksNodes(self):
7987 """Build hooks nodes.
7990 nl = [self.cfg.GetMasterNode()]
7993 def CheckPrereq(self):
7996 def Exec(self, feedback_fn):
7997 # Prepare jobs for migration instances
7998 allow_runtime_changes = self.op.allow_runtime_changes
8000 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8003 iallocator=self.op.iallocator,
8004 target_node=self.op.target_node,
8005 allow_runtime_changes=allow_runtime_changes,
8006 ignore_ipolicy=self.op.ignore_ipolicy)]
8007 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8010 # TODO: Run iallocator in this opcode and pass correct placement options to
8011 # OpInstanceMigrate. Since other jobs can modify the cluster between
8012 # running the iallocator and the actual migration, a good consistency model
8013 # will have to be found.
8015 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8016 frozenset([self.op.node_name]))
8018 return ResultWithJobs(jobs)
8021 class TLMigrateInstance(Tasklet):
8022 """Tasklet class for instance migration.
8025 @ivar live: whether the migration will be done live or non-live;
8026 this variable is initalized only after CheckPrereq has run
8027 @type cleanup: boolean
8028 @ivar cleanup: Wheater we cleanup from a failed migration
8029 @type iallocator: string
8030 @ivar iallocator: The iallocator used to determine target_node
8031 @type target_node: string
8032 @ivar target_node: If given, the target_node to reallocate the instance to
8033 @type failover: boolean
8034 @ivar failover: Whether operation results in failover or migration
8035 @type fallback: boolean
8036 @ivar fallback: Whether fallback to failover is allowed if migration not
8038 @type ignore_consistency: boolean
8039 @ivar ignore_consistency: Wheter we should ignore consistency between source
8041 @type shutdown_timeout: int
8042 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8043 @type ignore_ipolicy: bool
8044 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8049 _MIGRATION_POLL_INTERVAL = 1 # seconds
8050 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8052 def __init__(self, lu, instance_name, cleanup=False,
8053 failover=False, fallback=False,
8054 ignore_consistency=False,
8055 allow_runtime_changes=True,
8056 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8057 ignore_ipolicy=False):
8058 """Initializes this class.
8061 Tasklet.__init__(self, lu)
8064 self.instance_name = instance_name
8065 self.cleanup = cleanup
8066 self.live = False # will be overridden later
8067 self.failover = failover
8068 self.fallback = fallback
8069 self.ignore_consistency = ignore_consistency
8070 self.shutdown_timeout = shutdown_timeout
8071 self.ignore_ipolicy = ignore_ipolicy
8072 self.allow_runtime_changes = allow_runtime_changes
8074 def CheckPrereq(self):
8075 """Check prerequisites.
8077 This checks that the instance is in the cluster.
8080 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8081 instance = self.cfg.GetInstanceInfo(instance_name)
8082 assert instance is not None
8083 self.instance = instance
8084 cluster = self.cfg.GetClusterInfo()
8086 if (not self.cleanup and
8087 not instance.admin_state == constants.ADMINST_UP and
8088 not self.failover and self.fallback):
8089 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8090 " switching to failover")
8091 self.failover = True
8093 if instance.disk_template not in constants.DTS_MIRRORED:
8098 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8099 " %s" % (instance.disk_template, text),
8102 if instance.disk_template in constants.DTS_EXT_MIRROR:
8103 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8105 if self.lu.op.iallocator:
8106 self._RunAllocator()
8108 # We set set self.target_node as it is required by
8110 self.target_node = self.lu.op.target_node
8112 # Check that the target node is correct in terms of instance policy
8113 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8114 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8115 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8117 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8118 ignore=self.ignore_ipolicy)
8120 # self.target_node is already populated, either directly or by the
8122 target_node = self.target_node
8123 if self.target_node == instance.primary_node:
8124 raise errors.OpPrereqError("Cannot migrate instance %s"
8125 " to its primary (%s)" %
8126 (instance.name, instance.primary_node),
8129 if len(self.lu.tasklets) == 1:
8130 # It is safe to release locks only when we're the only tasklet
8132 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8133 keep=[instance.primary_node, self.target_node])
8136 secondary_nodes = instance.secondary_nodes
8137 if not secondary_nodes:
8138 raise errors.ConfigurationError("No secondary node but using"
8139 " %s disk template" %
8140 instance.disk_template)
8141 target_node = secondary_nodes[0]
8142 if self.lu.op.iallocator or (self.lu.op.target_node and
8143 self.lu.op.target_node != target_node):
8145 text = "failed over"
8148 raise errors.OpPrereqError("Instances with disk template %s cannot"
8149 " be %s to arbitrary nodes"
8150 " (neither an iallocator nor a target"
8151 " node can be passed)" %
8152 (instance.disk_template, text),
8154 nodeinfo = self.cfg.GetNodeInfo(target_node)
8155 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8156 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8158 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8159 ignore=self.ignore_ipolicy)
8161 i_be = cluster.FillBE(instance)
8163 # check memory requirements on the secondary node
8164 if (not self.cleanup and
8165 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8166 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8167 "migrating instance %s" %
8169 i_be[constants.BE_MINMEM],
8170 instance.hypervisor)
8172 self.lu.LogInfo("Not checking memory on the secondary node as"
8173 " instance will not be started")
8175 # check if failover must be forced instead of migration
8176 if (not self.cleanup and not self.failover and
8177 i_be[constants.BE_ALWAYS_FAILOVER]):
8178 self.lu.LogInfo("Instance configured to always failover; fallback"
8180 self.failover = True
8182 # check bridge existance
8183 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8185 if not self.cleanup:
8186 _CheckNodeNotDrained(self.lu, target_node)
8187 if not self.failover:
8188 result = self.rpc.call_instance_migratable(instance.primary_node,
8190 if result.fail_msg and self.fallback:
8191 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8193 self.failover = True
8195 result.Raise("Can't migrate, please use failover",
8196 prereq=True, ecode=errors.ECODE_STATE)
8198 assert not (self.failover and self.cleanup)
8200 if not self.failover:
8201 if self.lu.op.live is not None and self.lu.op.mode is not None:
8202 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8203 " parameters are accepted",
8205 if self.lu.op.live is not None:
8207 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8209 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8210 # reset the 'live' parameter to None so that repeated
8211 # invocations of CheckPrereq do not raise an exception
8212 self.lu.op.live = None
8213 elif self.lu.op.mode is None:
8214 # read the default value from the hypervisor
8215 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8216 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8218 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8220 # Failover is never live
8223 if not (self.failover or self.cleanup):
8224 remote_info = self.rpc.call_instance_info(instance.primary_node,
8226 instance.hypervisor)
8227 remote_info.Raise("Error checking instance on node %s" %
8228 instance.primary_node)
8229 instance_running = bool(remote_info.payload)
8230 if instance_running:
8231 self.current_mem = int(remote_info.payload["memory"])
8233 def _RunAllocator(self):
8234 """Run the allocator based on input opcode.
8237 # FIXME: add a self.ignore_ipolicy option
8238 req = iallocator.IAReqRelocate(name=self.instance_name,
8239 relocate_from=[self.instance.primary_node])
8240 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8242 ial.Run(self.lu.op.iallocator)
8245 raise errors.OpPrereqError("Can't compute nodes using"
8246 " iallocator '%s': %s" %
8247 (self.lu.op.iallocator, ial.info),
8249 self.target_node = ial.result[0]
8250 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8251 self.instance_name, self.lu.op.iallocator,
8252 utils.CommaJoin(ial.result))
8254 def _WaitUntilSync(self):
8255 """Poll with custom rpc for disk sync.
8257 This uses our own step-based rpc call.
8260 self.feedback_fn("* wait until resync is done")
8264 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8266 (self.instance.disks,
8269 for node, nres in result.items():
8270 nres.Raise("Cannot resync disks on node %s" % node)
8271 node_done, node_percent = nres.payload
8272 all_done = all_done and node_done
8273 if node_percent is not None:
8274 min_percent = min(min_percent, node_percent)
8276 if min_percent < 100:
8277 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8280 def _EnsureSecondary(self, node):
8281 """Demote a node to secondary.
8284 self.feedback_fn("* switching node %s to secondary mode" % node)
8286 for dev in self.instance.disks:
8287 self.cfg.SetDiskID(dev, node)
8289 result = self.rpc.call_blockdev_close(node, self.instance.name,
8290 self.instance.disks)
8291 result.Raise("Cannot change disk to secondary on node %s" % node)
8293 def _GoStandalone(self):
8294 """Disconnect from the network.
8297 self.feedback_fn("* changing into standalone mode")
8298 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8299 self.instance.disks)
8300 for node, nres in result.items():
8301 nres.Raise("Cannot disconnect disks node %s" % node)
8303 def _GoReconnect(self, multimaster):
8304 """Reconnect to the network.
8310 msg = "single-master"
8311 self.feedback_fn("* changing disks into %s mode" % msg)
8312 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8313 (self.instance.disks, self.instance),
8314 self.instance.name, multimaster)
8315 for node, nres in result.items():
8316 nres.Raise("Cannot change disks config on node %s" % node)
8318 def _ExecCleanup(self):
8319 """Try to cleanup after a failed migration.
8321 The cleanup is done by:
8322 - check that the instance is running only on one node
8323 (and update the config if needed)
8324 - change disks on its secondary node to secondary
8325 - wait until disks are fully synchronized
8326 - disconnect from the network
8327 - change disks into single-master mode
8328 - wait again until disks are fully synchronized
8331 instance = self.instance
8332 target_node = self.target_node
8333 source_node = self.source_node
8335 # check running on only one node
8336 self.feedback_fn("* checking where the instance actually runs"
8337 " (if this hangs, the hypervisor might be in"
8339 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8340 for node, result in ins_l.items():
8341 result.Raise("Can't contact node %s" % node)
8343 runningon_source = instance.name in ins_l[source_node].payload
8344 runningon_target = instance.name in ins_l[target_node].payload
8346 if runningon_source and runningon_target:
8347 raise errors.OpExecError("Instance seems to be running on two nodes,"
8348 " or the hypervisor is confused; you will have"
8349 " to ensure manually that it runs only on one"
8350 " and restart this operation")
8352 if not (runningon_source or runningon_target):
8353 raise errors.OpExecError("Instance does not seem to be running at all;"
8354 " in this case it's safer to repair by"
8355 " running 'gnt-instance stop' to ensure disk"
8356 " shutdown, and then restarting it")
8358 if runningon_target:
8359 # the migration has actually succeeded, we need to update the config
8360 self.feedback_fn("* instance running on secondary node (%s),"
8361 " updating config" % target_node)
8362 instance.primary_node = target_node
8363 self.cfg.Update(instance, self.feedback_fn)
8364 demoted_node = source_node
8366 self.feedback_fn("* instance confirmed to be running on its"
8367 " primary node (%s)" % source_node)
8368 demoted_node = target_node
8370 if instance.disk_template in constants.DTS_INT_MIRROR:
8371 self._EnsureSecondary(demoted_node)
8373 self._WaitUntilSync()
8374 except errors.OpExecError:
8375 # we ignore here errors, since if the device is standalone, it
8376 # won't be able to sync
8378 self._GoStandalone()
8379 self._GoReconnect(False)
8380 self._WaitUntilSync()
8382 self.feedback_fn("* done")
8384 def _RevertDiskStatus(self):
8385 """Try to revert the disk status after a failed migration.
8388 target_node = self.target_node
8389 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8393 self._EnsureSecondary(target_node)
8394 self._GoStandalone()
8395 self._GoReconnect(False)
8396 self._WaitUntilSync()
8397 except errors.OpExecError, err:
8398 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8399 " please try to recover the instance manually;"
8400 " error '%s'" % str(err))
8402 def _AbortMigration(self):
8403 """Call the hypervisor code to abort a started migration.
8406 instance = self.instance
8407 target_node = self.target_node
8408 source_node = self.source_node
8409 migration_info = self.migration_info
8411 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8415 abort_msg = abort_result.fail_msg
8417 logging.error("Aborting migration failed on target node %s: %s",
8418 target_node, abort_msg)
8419 # Don't raise an exception here, as we stil have to try to revert the
8420 # disk status, even if this step failed.
8422 abort_result = self.rpc.call_instance_finalize_migration_src(
8423 source_node, instance, False, self.live)
8424 abort_msg = abort_result.fail_msg
8426 logging.error("Aborting migration failed on source node %s: %s",
8427 source_node, abort_msg)
8429 def _ExecMigration(self):
8430 """Migrate an instance.
8432 The migrate is done by:
8433 - change the disks into dual-master mode
8434 - wait until disks are fully synchronized again
8435 - migrate the instance
8436 - change disks on the new secondary node (the old primary) to secondary
8437 - wait until disks are fully synchronized
8438 - change disks into single-master mode
8441 instance = self.instance
8442 target_node = self.target_node
8443 source_node = self.source_node
8445 # Check for hypervisor version mismatch and warn the user.
8446 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8447 None, [self.instance.hypervisor])
8448 for ninfo in nodeinfo.values():
8449 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8451 (_, _, (src_info, )) = nodeinfo[source_node].payload
8452 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8454 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8455 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8456 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8457 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8458 if src_version != dst_version:
8459 self.feedback_fn("* warning: hypervisor version mismatch between"
8460 " source (%s) and target (%s) node" %
8461 (src_version, dst_version))
8463 self.feedback_fn("* checking disk consistency between source and target")
8464 for (idx, dev) in enumerate(instance.disks):
8465 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8466 raise errors.OpExecError("Disk %s is degraded or not fully"
8467 " synchronized on target node,"
8468 " aborting migration" % idx)
8470 if self.current_mem > self.tgt_free_mem:
8471 if not self.allow_runtime_changes:
8472 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8473 " free memory to fit instance %s on target"
8474 " node %s (have %dMB, need %dMB)" %
8475 (instance.name, target_node,
8476 self.tgt_free_mem, self.current_mem))
8477 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8478 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8481 rpcres.Raise("Cannot modify instance runtime memory")
8483 # First get the migration information from the remote node
8484 result = self.rpc.call_migration_info(source_node, instance)
8485 msg = result.fail_msg
8487 log_err = ("Failed fetching source migration information from %s: %s" %
8489 logging.error(log_err)
8490 raise errors.OpExecError(log_err)
8492 self.migration_info = migration_info = result.payload
8494 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8495 # Then switch the disks to master/master mode
8496 self._EnsureSecondary(target_node)
8497 self._GoStandalone()
8498 self._GoReconnect(True)
8499 self._WaitUntilSync()
8501 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8502 result = self.rpc.call_accept_instance(target_node,
8505 self.nodes_ip[target_node])
8507 msg = result.fail_msg
8509 logging.error("Instance pre-migration failed, trying to revert"
8510 " disk status: %s", msg)
8511 self.feedback_fn("Pre-migration failed, aborting")
8512 self._AbortMigration()
8513 self._RevertDiskStatus()
8514 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8515 (instance.name, msg))
8517 self.feedback_fn("* migrating instance to %s" % target_node)
8518 result = self.rpc.call_instance_migrate(source_node, instance,
8519 self.nodes_ip[target_node],
8521 msg = result.fail_msg
8523 logging.error("Instance migration failed, trying to revert"
8524 " disk status: %s", msg)
8525 self.feedback_fn("Migration failed, aborting")
8526 self._AbortMigration()
8527 self._RevertDiskStatus()
8528 raise errors.OpExecError("Could not migrate instance %s: %s" %
8529 (instance.name, msg))
8531 self.feedback_fn("* starting memory transfer")
8532 last_feedback = time.time()
8534 result = self.rpc.call_instance_get_migration_status(source_node,
8536 msg = result.fail_msg
8537 ms = result.payload # MigrationStatus instance
8538 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8539 logging.error("Instance migration failed, trying to revert"
8540 " disk status: %s", msg)
8541 self.feedback_fn("Migration failed, aborting")
8542 self._AbortMigration()
8543 self._RevertDiskStatus()
8544 raise errors.OpExecError("Could not migrate instance %s: %s" %
8545 (instance.name, msg))
8547 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8548 self.feedback_fn("* memory transfer complete")
8551 if (utils.TimeoutExpired(last_feedback,
8552 self._MIGRATION_FEEDBACK_INTERVAL) and
8553 ms.transferred_ram is not None):
8554 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8555 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8556 last_feedback = time.time()
8558 time.sleep(self._MIGRATION_POLL_INTERVAL)
8560 result = self.rpc.call_instance_finalize_migration_src(source_node,
8564 msg = result.fail_msg
8566 logging.error("Instance migration succeeded, but finalization failed"
8567 " on the source node: %s", msg)
8568 raise errors.OpExecError("Could not finalize instance migration: %s" %
8571 instance.primary_node = target_node
8573 # distribute new instance config to the other nodes
8574 self.cfg.Update(instance, self.feedback_fn)
8576 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8580 msg = result.fail_msg
8582 logging.error("Instance migration succeeded, but finalization failed"
8583 " on the target node: %s", msg)
8584 raise errors.OpExecError("Could not finalize instance migration: %s" %
8587 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8588 self._EnsureSecondary(source_node)
8589 self._WaitUntilSync()
8590 self._GoStandalone()
8591 self._GoReconnect(False)
8592 self._WaitUntilSync()
8594 # If the instance's disk template is `rbd' and there was a successful
8595 # migration, unmap the device from the source node.
8596 if self.instance.disk_template == constants.DT_RBD:
8597 disks = _ExpandCheckDisks(instance, instance.disks)
8598 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8600 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8601 msg = result.fail_msg
8603 logging.error("Migration was successful, but couldn't unmap the"
8604 " block device %s on source node %s: %s",
8605 disk.iv_name, source_node, msg)
8606 logging.error("You need to unmap the device %s manually on %s",
8607 disk.iv_name, source_node)
8609 self.feedback_fn("* done")
8611 def _ExecFailover(self):
8612 """Failover an instance.
8614 The failover is done by shutting it down on its present node and
8615 starting it on the secondary.
8618 instance = self.instance
8619 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8621 source_node = instance.primary_node
8622 target_node = self.target_node
8624 if instance.admin_state == constants.ADMINST_UP:
8625 self.feedback_fn("* checking disk consistency between source and target")
8626 for (idx, dev) in enumerate(instance.disks):
8627 # for drbd, these are drbd over lvm
8628 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8630 if primary_node.offline:
8631 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8633 (primary_node.name, idx, target_node))
8634 elif not self.ignore_consistency:
8635 raise errors.OpExecError("Disk %s is degraded on target node,"
8636 " aborting failover" % idx)
8638 self.feedback_fn("* not checking disk consistency as instance is not"
8641 self.feedback_fn("* shutting down instance on source node")
8642 logging.info("Shutting down instance %s on node %s",
8643 instance.name, source_node)
8645 result = self.rpc.call_instance_shutdown(source_node, instance,
8646 self.shutdown_timeout)
8647 msg = result.fail_msg
8649 if self.ignore_consistency or primary_node.offline:
8650 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8651 " proceeding anyway; please make sure node"
8652 " %s is down; error details: %s",
8653 instance.name, source_node, source_node, msg)
8655 raise errors.OpExecError("Could not shutdown instance %s on"
8657 (instance.name, source_node, msg))
8659 self.feedback_fn("* deactivating the instance's disks on source node")
8660 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8661 raise errors.OpExecError("Can't shut down the instance's disks")
8663 instance.primary_node = target_node
8664 # distribute new instance config to the other nodes
8665 self.cfg.Update(instance, self.feedback_fn)
8667 # Only start the instance if it's marked as up
8668 if instance.admin_state == constants.ADMINST_UP:
8669 self.feedback_fn("* activating the instance's disks on target node %s" %
8671 logging.info("Starting instance %s on node %s",
8672 instance.name, target_node)
8674 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8675 ignore_secondaries=True)
8677 _ShutdownInstanceDisks(self.lu, instance)
8678 raise errors.OpExecError("Can't activate the instance's disks")
8680 self.feedback_fn("* starting the instance on the target node %s" %
8682 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8684 msg = result.fail_msg
8686 _ShutdownInstanceDisks(self.lu, instance)
8687 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8688 (instance.name, target_node, msg))
8690 def Exec(self, feedback_fn):
8691 """Perform the migration.
8694 self.feedback_fn = feedback_fn
8695 self.source_node = self.instance.primary_node
8697 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8698 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8699 self.target_node = self.instance.secondary_nodes[0]
8700 # Otherwise self.target_node has been populated either
8701 # directly, or through an iallocator.
8703 self.all_nodes = [self.source_node, self.target_node]
8704 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8705 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8708 feedback_fn("Failover instance %s" % self.instance.name)
8709 self._ExecFailover()
8711 feedback_fn("Migrating instance %s" % self.instance.name)
8714 return self._ExecCleanup()
8716 return self._ExecMigration()
8719 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8721 """Wrapper around L{_CreateBlockDevInner}.
8723 This method annotates the root device first.
8726 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8727 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8731 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8733 """Create a tree of block devices on a given node.
8735 If this device type has to be created on secondaries, create it and
8738 If not, just recurse to children keeping the same 'force' value.
8740 @attention: The device has to be annotated already.
8742 @param lu: the lu on whose behalf we execute
8743 @param node: the node on which to create the device
8744 @type instance: L{objects.Instance}
8745 @param instance: the instance which owns the device
8746 @type device: L{objects.Disk}
8747 @param device: the device to create
8748 @type force_create: boolean
8749 @param force_create: whether to force creation of this device; this
8750 will be change to True whenever we find a device which has
8751 CreateOnSecondary() attribute
8752 @param info: the extra 'metadata' we should attach to the device
8753 (this will be represented as a LVM tag)
8754 @type force_open: boolean
8755 @param force_open: this parameter will be passes to the
8756 L{backend.BlockdevCreate} function where it specifies
8757 whether we run on primary or not, and it affects both
8758 the child assembly and the device own Open() execution
8761 if device.CreateOnSecondary():
8765 for child in device.children:
8766 _CreateBlockDevInner(lu, node, instance, child, force_create,
8769 if not force_create:
8772 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8775 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8776 """Create a single block device on a given node.
8778 This will not recurse over children of the device, so they must be
8781 @param lu: the lu on whose behalf we execute
8782 @param node: the node on which to create the device
8783 @type instance: L{objects.Instance}
8784 @param instance: the instance which owns the device
8785 @type device: L{objects.Disk}
8786 @param device: the device to create
8787 @param info: the extra 'metadata' we should attach to the device
8788 (this will be represented as a LVM tag)
8789 @type force_open: boolean
8790 @param force_open: this parameter will be passes to the
8791 L{backend.BlockdevCreate} function where it specifies
8792 whether we run on primary or not, and it affects both
8793 the child assembly and the device own Open() execution
8796 lu.cfg.SetDiskID(device, node)
8797 result = lu.rpc.call_blockdev_create(node, device, device.size,
8798 instance.name, force_open, info)
8799 result.Raise("Can't create block device %s on"
8800 " node %s for instance %s" % (device, node, instance.name))
8801 if device.physical_id is None:
8802 device.physical_id = result.payload
8805 def _GenerateUniqueNames(lu, exts):
8806 """Generate a suitable LV name.
8808 This will generate a logical volume name for the given instance.
8813 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8814 results.append("%s%s" % (new_id, val))
8818 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8819 iv_name, p_minor, s_minor):
8820 """Generate a drbd8 device complete with its children.
8823 assert len(vgnames) == len(names) == 2
8824 port = lu.cfg.AllocatePort()
8825 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8827 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8828 logical_id=(vgnames[0], names[0]),
8830 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8831 size=constants.DRBD_META_SIZE,
8832 logical_id=(vgnames[1], names[1]),
8834 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8835 logical_id=(primary, secondary, port,
8838 children=[dev_data, dev_meta],
8839 iv_name=iv_name, params={})
8843 _DISK_TEMPLATE_NAME_PREFIX = {
8844 constants.DT_PLAIN: "",
8845 constants.DT_RBD: ".rbd",
8849 _DISK_TEMPLATE_DEVICE_TYPE = {
8850 constants.DT_PLAIN: constants.LD_LV,
8851 constants.DT_FILE: constants.LD_FILE,
8852 constants.DT_SHARED_FILE: constants.LD_FILE,
8853 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8854 constants.DT_RBD: constants.LD_RBD,
8858 def _GenerateDiskTemplate(
8859 lu, template_name, instance_name, primary_node, secondary_nodes,
8860 disk_info, file_storage_dir, file_driver, base_index,
8861 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8862 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8863 """Generate the entire disk layout for a given template type.
8866 #TODO: compute space requirements
8868 vgname = lu.cfg.GetVGName()
8869 disk_count = len(disk_info)
8872 if template_name == constants.DT_DISKLESS:
8874 elif template_name == constants.DT_DRBD8:
8875 if len(secondary_nodes) != 1:
8876 raise errors.ProgrammerError("Wrong template configuration")
8877 remote_node = secondary_nodes[0]
8878 minors = lu.cfg.AllocateDRBDMinor(
8879 [primary_node, remote_node] * len(disk_info), instance_name)
8881 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8883 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8886 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8887 for i in range(disk_count)]):
8888 names.append(lv_prefix + "_data")
8889 names.append(lv_prefix + "_meta")
8890 for idx, disk in enumerate(disk_info):
8891 disk_index = idx + base_index
8892 data_vg = disk.get(constants.IDISK_VG, vgname)
8893 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8894 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8895 disk[constants.IDISK_SIZE],
8897 names[idx * 2:idx * 2 + 2],
8898 "disk/%d" % disk_index,
8899 minors[idx * 2], minors[idx * 2 + 1])
8900 disk_dev.mode = disk[constants.IDISK_MODE]
8901 disks.append(disk_dev)
8904 raise errors.ProgrammerError("Wrong template configuration")
8906 if template_name == constants.DT_FILE:
8908 elif template_name == constants.DT_SHARED_FILE:
8909 _req_shr_file_storage()
8911 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8912 if name_prefix is None:
8915 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8916 (name_prefix, base_index + i)
8917 for i in range(disk_count)])
8919 if template_name == constants.DT_PLAIN:
8920 def logical_id_fn(idx, _, disk):
8921 vg = disk.get(constants.IDISK_VG, vgname)
8922 return (vg, names[idx])
8923 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8925 lambda _, disk_index, disk: (file_driver,
8926 "%s/disk%d" % (file_storage_dir,
8928 elif template_name == constants.DT_BLOCK:
8930 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8931 disk[constants.IDISK_ADOPT])
8932 elif template_name == constants.DT_RBD:
8933 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8935 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8937 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8939 for idx, disk in enumerate(disk_info):
8940 disk_index = idx + base_index
8941 size = disk[constants.IDISK_SIZE]
8942 feedback_fn("* disk %s, size %s" %
8943 (disk_index, utils.FormatUnit(size, "h")))
8944 disks.append(objects.Disk(dev_type=dev_type, size=size,
8945 logical_id=logical_id_fn(idx, disk_index, disk),
8946 iv_name="disk/%d" % disk_index,
8947 mode=disk[constants.IDISK_MODE],
8953 def _GetInstanceInfoText(instance):
8954 """Compute that text that should be added to the disk's metadata.
8957 return "originstname+%s" % instance.name
8960 def _CalcEta(time_taken, written, total_size):
8961 """Calculates the ETA based on size written and total size.
8963 @param time_taken: The time taken so far
8964 @param written: amount written so far
8965 @param total_size: The total size of data to be written
8966 @return: The remaining time in seconds
8969 avg_time = time_taken / float(written)
8970 return (total_size - written) * avg_time
8973 def _WipeDisks(lu, instance):
8974 """Wipes instance disks.
8976 @type lu: L{LogicalUnit}
8977 @param lu: the logical unit on whose behalf we execute
8978 @type instance: L{objects.Instance}
8979 @param instance: the instance whose disks we should create
8980 @return: the success of the wipe
8983 node = instance.primary_node
8985 for device in instance.disks:
8986 lu.cfg.SetDiskID(device, node)
8988 logging.info("Pause sync of instance %s disks", instance.name)
8989 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8990 (instance.disks, instance),
8992 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8994 for idx, success in enumerate(result.payload):
8996 logging.warn("pause-sync of instance %s for disks %d failed",
9000 for idx, device in enumerate(instance.disks):
9001 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9002 # MAX_WIPE_CHUNK at max
9003 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9004 constants.MIN_WIPE_CHUNK_PERCENT)
9005 # we _must_ make this an int, otherwise rounding errors will
9007 wipe_chunk_size = int(wipe_chunk_size)
9009 lu.LogInfo("* Wiping disk %d", idx)
9010 logging.info("Wiping disk %d for instance %s, node %s using"
9011 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9016 start_time = time.time()
9018 while offset < size:
9019 wipe_size = min(wipe_chunk_size, size - offset)
9020 logging.debug("Wiping disk %d, offset %s, chunk %s",
9021 idx, offset, wipe_size)
9022 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9024 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9025 (idx, offset, wipe_size))
9028 if now - last_output >= 60:
9029 eta = _CalcEta(now - start_time, offset, size)
9030 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9031 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9034 logging.info("Resume sync of instance %s disks", instance.name)
9036 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9037 (instance.disks, instance),
9041 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9042 " please have a look at the status and troubleshoot"
9043 " the issue: %s", node, result.fail_msg)
9045 for idx, success in enumerate(result.payload):
9047 lu.LogWarning("Resume sync of disk %d failed, please have a"
9048 " look at the status and troubleshoot the issue", idx)
9049 logging.warn("resume-sync of instance %s for disks %d failed",
9053 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9054 """Create all disks for an instance.
9056 This abstracts away some work from AddInstance.
9058 @type lu: L{LogicalUnit}
9059 @param lu: the logical unit on whose behalf we execute
9060 @type instance: L{objects.Instance}
9061 @param instance: the instance whose disks we should create
9063 @param to_skip: list of indices to skip
9064 @type target_node: string
9065 @param target_node: if passed, overrides the target node for creation
9067 @return: the success of the creation
9070 info = _GetInstanceInfoText(instance)
9071 if target_node is None:
9072 pnode = instance.primary_node
9073 all_nodes = instance.all_nodes
9078 if instance.disk_template in constants.DTS_FILEBASED:
9079 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9080 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9082 result.Raise("Failed to create directory '%s' on"
9083 " node %s" % (file_storage_dir, pnode))
9085 # Note: this needs to be kept in sync with adding of disks in
9086 # LUInstanceSetParams
9087 for idx, device in enumerate(instance.disks):
9088 if to_skip and idx in to_skip:
9090 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9092 for node in all_nodes:
9093 f_create = node == pnode
9094 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9097 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9098 """Remove all disks for an instance.
9100 This abstracts away some work from `AddInstance()` and
9101 `RemoveInstance()`. Note that in case some of the devices couldn't
9102 be removed, the removal will continue with the other ones (compare
9103 with `_CreateDisks()`).
9105 @type lu: L{LogicalUnit}
9106 @param lu: the logical unit on whose behalf we execute
9107 @type instance: L{objects.Instance}
9108 @param instance: the instance whose disks we should remove
9109 @type target_node: string
9110 @param target_node: used to override the node on which to remove the disks
9112 @return: the success of the removal
9115 logging.info("Removing block devices for instance %s", instance.name)
9118 ports_to_release = set()
9119 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9120 for (idx, device) in enumerate(anno_disks):
9122 edata = [(target_node, device)]
9124 edata = device.ComputeNodeTree(instance.primary_node)
9125 for node, disk in edata:
9126 lu.cfg.SetDiskID(disk, node)
9127 result = lu.rpc.call_blockdev_remove(node, disk)
9129 lu.LogWarning("Could not remove disk %s on node %s,"
9130 " continuing anyway: %s", idx, node, result.fail_msg)
9131 if not (result.offline and node != instance.primary_node):
9134 # if this is a DRBD disk, return its port to the pool
9135 if device.dev_type in constants.LDS_DRBD:
9136 ports_to_release.add(device.logical_id[2])
9138 if all_result or ignore_failures:
9139 for port in ports_to_release:
9140 lu.cfg.AddTcpUdpPort(port)
9142 if instance.disk_template == constants.DT_FILE:
9143 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9147 tgt = instance.primary_node
9148 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9150 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9151 file_storage_dir, instance.primary_node, result.fail_msg)
9157 def _ComputeDiskSizePerVG(disk_template, disks):
9158 """Compute disk size requirements in the volume group
9161 def _compute(disks, payload):
9162 """Universal algorithm.
9167 vgs[disk[constants.IDISK_VG]] = \
9168 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9172 # Required free disk space as a function of disk and swap space
9174 constants.DT_DISKLESS: {},
9175 constants.DT_PLAIN: _compute(disks, 0),
9176 # 128 MB are added for drbd metadata for each disk
9177 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9178 constants.DT_FILE: {},
9179 constants.DT_SHARED_FILE: {},
9182 if disk_template not in req_size_dict:
9183 raise errors.ProgrammerError("Disk template '%s' size requirement"
9184 " is unknown" % disk_template)
9186 return req_size_dict[disk_template]
9189 def _FilterVmNodes(lu, nodenames):
9190 """Filters out non-vm_capable nodes from a list.
9192 @type lu: L{LogicalUnit}
9193 @param lu: the logical unit for which we check
9194 @type nodenames: list
9195 @param nodenames: the list of nodes on which we should check
9197 @return: the list of vm-capable nodes
9200 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9201 return [name for name in nodenames if name not in vm_nodes]
9204 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9205 """Hypervisor parameter validation.
9207 This function abstract the hypervisor parameter validation to be
9208 used in both instance create and instance modify.
9210 @type lu: L{LogicalUnit}
9211 @param lu: the logical unit for which we check
9212 @type nodenames: list
9213 @param nodenames: the list of nodes on which we should check
9214 @type hvname: string
9215 @param hvname: the name of the hypervisor we should use
9216 @type hvparams: dict
9217 @param hvparams: the parameters which we need to check
9218 @raise errors.OpPrereqError: if the parameters are not valid
9221 nodenames = _FilterVmNodes(lu, nodenames)
9223 cluster = lu.cfg.GetClusterInfo()
9224 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9226 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9227 for node in nodenames:
9231 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9234 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9235 """OS parameters validation.
9237 @type lu: L{LogicalUnit}
9238 @param lu: the logical unit for which we check
9239 @type required: boolean
9240 @param required: whether the validation should fail if the OS is not
9242 @type nodenames: list
9243 @param nodenames: the list of nodes on which we should check
9244 @type osname: string
9245 @param osname: the name of the hypervisor we should use
9246 @type osparams: dict
9247 @param osparams: the parameters which we need to check
9248 @raise errors.OpPrereqError: if the parameters are not valid
9251 nodenames = _FilterVmNodes(lu, nodenames)
9252 result = lu.rpc.call_os_validate(nodenames, required, osname,
9253 [constants.OS_VALIDATE_PARAMETERS],
9255 for node, nres in result.items():
9256 # we don't check for offline cases since this should be run only
9257 # against the master node and/or an instance's nodes
9258 nres.Raise("OS Parameters validation failed on node %s" % node)
9259 if not nres.payload:
9260 lu.LogInfo("OS %s not found on node %s, validation skipped",
9264 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9265 """Wrapper around IAReqInstanceAlloc.
9267 @param op: The instance opcode
9268 @param disks: The computed disks
9269 @param nics: The computed nics
9270 @param beparams: The full filled beparams
9272 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9275 spindle_use = beparams[constants.BE_SPINDLE_USE]
9276 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9277 disk_template=op.disk_template,
9280 vcpus=beparams[constants.BE_VCPUS],
9281 memory=beparams[constants.BE_MAXMEM],
9282 spindle_use=spindle_use,
9284 nics=[n.ToDict() for n in nics],
9285 hypervisor=op.hypervisor)
9288 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9289 """Computes the nics.
9291 @param op: The instance opcode
9292 @param cluster: Cluster configuration object
9293 @param default_ip: The default ip to assign
9294 @param cfg: An instance of the configuration object
9295 @param proc: The executer instance
9297 @returns: The build up nics
9301 for idx, nic in enumerate(op.nics):
9302 nic_mode_req = nic.get(constants.INIC_MODE, None)
9303 nic_mode = nic_mode_req
9304 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9305 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9307 # in routed mode, for the first nic, the default ip is 'auto'
9308 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9309 default_ip_mode = constants.VALUE_AUTO
9311 default_ip_mode = constants.VALUE_NONE
9313 # ip validity checks
9314 ip = nic.get(constants.INIC_IP, default_ip_mode)
9315 if ip is None or ip.lower() == constants.VALUE_NONE:
9317 elif ip.lower() == constants.VALUE_AUTO:
9318 if not op.name_check:
9319 raise errors.OpPrereqError("IP address set to auto but name checks"
9320 " have been skipped",
9324 if not netutils.IPAddress.IsValid(ip):
9325 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9329 # TODO: check the ip address for uniqueness
9330 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9331 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9334 # MAC address verification
9335 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9336 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9337 mac = utils.NormalizeAndValidateMac(mac)
9340 # TODO: We need to factor this out
9341 cfg.ReserveMAC(mac, proc.GetECId())
9342 except errors.ReservationError:
9343 raise errors.OpPrereqError("MAC address %s already in use"
9344 " in cluster" % mac,
9345 errors.ECODE_NOTUNIQUE)
9347 # Build nic parameters
9348 link = nic.get(constants.INIC_LINK, None)
9349 if link == constants.VALUE_AUTO:
9350 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9353 nicparams[constants.NIC_MODE] = nic_mode
9355 nicparams[constants.NIC_LINK] = link
9357 check_params = cluster.SimpleFillNIC(nicparams)
9358 objects.NIC.CheckParameterSyntax(check_params)
9359 nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9364 def _ComputeDisks(op, default_vg):
9365 """Computes the instance disks.
9367 @param op: The instance opcode
9368 @param default_vg: The default_vg to assume
9370 @return: The computer disks
9374 for disk in op.disks:
9375 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9376 if mode not in constants.DISK_ACCESS_SET:
9377 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9378 mode, errors.ECODE_INVAL)
9379 size = disk.get(constants.IDISK_SIZE, None)
9381 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9384 except (TypeError, ValueError):
9385 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9388 data_vg = disk.get(constants.IDISK_VG, default_vg)
9390 constants.IDISK_SIZE: size,
9391 constants.IDISK_MODE: mode,
9392 constants.IDISK_VG: data_vg,
9394 if constants.IDISK_METAVG in disk:
9395 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9396 if constants.IDISK_ADOPT in disk:
9397 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9398 disks.append(new_disk)
9403 def _ComputeFullBeParams(op, cluster):
9404 """Computes the full beparams.
9406 @param op: The instance opcode
9407 @param cluster: The cluster config object
9409 @return: The fully filled beparams
9412 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9413 for param, value in op.beparams.iteritems():
9414 if value == constants.VALUE_AUTO:
9415 op.beparams[param] = default_beparams[param]
9416 objects.UpgradeBeParams(op.beparams)
9417 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9418 return cluster.SimpleFillBE(op.beparams)
9421 class LUInstanceCreate(LogicalUnit):
9422 """Create an instance.
9425 HPATH = "instance-add"
9426 HTYPE = constants.HTYPE_INSTANCE
9429 def CheckArguments(self):
9433 # do not require name_check to ease forward/backward compatibility
9435 if self.op.no_install and self.op.start:
9436 self.LogInfo("No-installation mode selected, disabling startup")
9437 self.op.start = False
9438 # validate/normalize the instance name
9439 self.op.instance_name = \
9440 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9442 if self.op.ip_check and not self.op.name_check:
9443 # TODO: make the ip check more flexible and not depend on the name check
9444 raise errors.OpPrereqError("Cannot do IP address check without a name"
9445 " check", errors.ECODE_INVAL)
9447 # check nics' parameter names
9448 for nic in self.op.nics:
9449 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9451 # check disks. parameter names and consistent adopt/no-adopt strategy
9452 has_adopt = has_no_adopt = False
9453 for disk in self.op.disks:
9454 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9455 if constants.IDISK_ADOPT in disk:
9459 if has_adopt and has_no_adopt:
9460 raise errors.OpPrereqError("Either all disks are adopted or none is",
9463 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9464 raise errors.OpPrereqError("Disk adoption is not supported for the"
9465 " '%s' disk template" %
9466 self.op.disk_template,
9468 if self.op.iallocator is not None:
9469 raise errors.OpPrereqError("Disk adoption not allowed with an"
9470 " iallocator script", errors.ECODE_INVAL)
9471 if self.op.mode == constants.INSTANCE_IMPORT:
9472 raise errors.OpPrereqError("Disk adoption not allowed for"
9473 " instance import", errors.ECODE_INVAL)
9475 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9476 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9477 " but no 'adopt' parameter given" %
9478 self.op.disk_template,
9481 self.adopt_disks = has_adopt
9483 # instance name verification
9484 if self.op.name_check:
9485 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9486 self.op.instance_name = self.hostname1.name
9487 # used in CheckPrereq for ip ping check
9488 self.check_ip = self.hostname1.ip
9490 self.check_ip = None
9492 # file storage checks
9493 if (self.op.file_driver and
9494 not self.op.file_driver in constants.FILE_DRIVER):
9495 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9496 self.op.file_driver, errors.ECODE_INVAL)
9498 if self.op.disk_template == constants.DT_FILE:
9499 opcodes.RequireFileStorage()
9500 elif self.op.disk_template == constants.DT_SHARED_FILE:
9501 opcodes.RequireSharedFileStorage()
9503 ### Node/iallocator related checks
9504 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9506 if self.op.pnode is not None:
9507 if self.op.disk_template in constants.DTS_INT_MIRROR:
9508 if self.op.snode is None:
9509 raise errors.OpPrereqError("The networked disk templates need"
9510 " a mirror node", errors.ECODE_INVAL)
9512 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9514 self.op.snode = None
9516 self._cds = _GetClusterDomainSecret()
9518 if self.op.mode == constants.INSTANCE_IMPORT:
9519 # On import force_variant must be True, because if we forced it at
9520 # initial install, our only chance when importing it back is that it
9522 self.op.force_variant = True
9524 if self.op.no_install:
9525 self.LogInfo("No-installation mode has no effect during import")
9527 elif self.op.mode == constants.INSTANCE_CREATE:
9528 if self.op.os_type is None:
9529 raise errors.OpPrereqError("No guest OS specified",
9531 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9532 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9533 " installation" % self.op.os_type,
9535 if self.op.disk_template is None:
9536 raise errors.OpPrereqError("No disk template specified",
9539 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9540 # Check handshake to ensure both clusters have the same domain secret
9541 src_handshake = self.op.source_handshake
9542 if not src_handshake:
9543 raise errors.OpPrereqError("Missing source handshake",
9546 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9549 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9552 # Load and check source CA
9553 self.source_x509_ca_pem = self.op.source_x509_ca
9554 if not self.source_x509_ca_pem:
9555 raise errors.OpPrereqError("Missing source X509 CA",
9559 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9561 except OpenSSL.crypto.Error, err:
9562 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9563 (err, ), errors.ECODE_INVAL)
9565 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9566 if errcode is not None:
9567 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9570 self.source_x509_ca = cert
9572 src_instance_name = self.op.source_instance_name
9573 if not src_instance_name:
9574 raise errors.OpPrereqError("Missing source instance name",
9577 self.source_instance_name = \
9578 netutils.GetHostname(name=src_instance_name).name
9581 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9582 self.op.mode, errors.ECODE_INVAL)
9584 def ExpandNames(self):
9585 """ExpandNames for CreateInstance.
9587 Figure out the right locks for instance creation.
9590 self.needed_locks = {}
9592 instance_name = self.op.instance_name
9593 # this is just a preventive check, but someone might still add this
9594 # instance in the meantime, and creation will fail at lock-add time
9595 if instance_name in self.cfg.GetInstanceList():
9596 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9597 instance_name, errors.ECODE_EXISTS)
9599 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9601 if self.op.iallocator:
9602 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9603 # specifying a group on instance creation and then selecting nodes from
9605 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9606 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9608 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9609 nodelist = [self.op.pnode]
9610 if self.op.snode is not None:
9611 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9612 nodelist.append(self.op.snode)
9613 self.needed_locks[locking.LEVEL_NODE] = nodelist
9614 # Lock resources of instance's primary and secondary nodes (copy to
9615 # prevent accidential modification)
9616 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9618 # in case of import lock the source node too
9619 if self.op.mode == constants.INSTANCE_IMPORT:
9620 src_node = self.op.src_node
9621 src_path = self.op.src_path
9623 if src_path is None:
9624 self.op.src_path = src_path = self.op.instance_name
9626 if src_node is None:
9627 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9628 self.op.src_node = None
9629 if os.path.isabs(src_path):
9630 raise errors.OpPrereqError("Importing an instance from a path"
9631 " requires a source node option",
9634 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9635 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9636 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9637 if not os.path.isabs(src_path):
9638 self.op.src_path = src_path = \
9639 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9641 def _RunAllocator(self):
9642 """Run the allocator based on input opcode.
9645 req = _CreateInstanceAllocRequest(self.op, self.disks,
9646 self.nics, self.be_full)
9647 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9649 ial.Run(self.op.iallocator)
9652 raise errors.OpPrereqError("Can't compute nodes using"
9653 " iallocator '%s': %s" %
9654 (self.op.iallocator, ial.info),
9656 self.op.pnode = ial.result[0]
9657 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9658 self.op.instance_name, self.op.iallocator,
9659 utils.CommaJoin(ial.result))
9661 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9663 if req.RequiredNodes() == 2:
9664 self.op.snode = ial.result[1]
9666 def BuildHooksEnv(self):
9669 This runs on master, primary and secondary nodes of the instance.
9673 "ADD_MODE": self.op.mode,
9675 if self.op.mode == constants.INSTANCE_IMPORT:
9676 env["SRC_NODE"] = self.op.src_node
9677 env["SRC_PATH"] = self.op.src_path
9678 env["SRC_IMAGES"] = self.src_images
9680 env.update(_BuildInstanceHookEnv(
9681 name=self.op.instance_name,
9682 primary_node=self.op.pnode,
9683 secondary_nodes=self.secondaries,
9684 status=self.op.start,
9685 os_type=self.op.os_type,
9686 minmem=self.be_full[constants.BE_MINMEM],
9687 maxmem=self.be_full[constants.BE_MAXMEM],
9688 vcpus=self.be_full[constants.BE_VCPUS],
9689 nics=_NICListToTuple(self, self.nics),
9690 disk_template=self.op.disk_template,
9691 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9692 for d in self.disks],
9695 hypervisor_name=self.op.hypervisor,
9701 def BuildHooksNodes(self):
9702 """Build hooks nodes.
9705 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9708 def _ReadExportInfo(self):
9709 """Reads the export information from disk.
9711 It will override the opcode source node and path with the actual
9712 information, if these two were not specified before.
9714 @return: the export information
9717 assert self.op.mode == constants.INSTANCE_IMPORT
9719 src_node = self.op.src_node
9720 src_path = self.op.src_path
9722 if src_node is None:
9723 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9724 exp_list = self.rpc.call_export_list(locked_nodes)
9726 for node in exp_list:
9727 if exp_list[node].fail_msg:
9729 if src_path in exp_list[node].payload:
9731 self.op.src_node = src_node = node
9732 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9736 raise errors.OpPrereqError("No export found for relative path %s" %
9737 src_path, errors.ECODE_INVAL)
9739 _CheckNodeOnline(self, src_node)
9740 result = self.rpc.call_export_info(src_node, src_path)
9741 result.Raise("No export or invalid export found in dir %s" % src_path)
9743 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9744 if not export_info.has_section(constants.INISECT_EXP):
9745 raise errors.ProgrammerError("Corrupted export config",
9746 errors.ECODE_ENVIRON)
9748 ei_version = export_info.get(constants.INISECT_EXP, "version")
9749 if (int(ei_version) != constants.EXPORT_VERSION):
9750 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9751 (ei_version, constants.EXPORT_VERSION),
9752 errors.ECODE_ENVIRON)
9755 def _ReadExportParams(self, einfo):
9756 """Use export parameters as defaults.
9758 In case the opcode doesn't specify (as in override) some instance
9759 parameters, then try to use them from the export information, if
9763 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9765 if self.op.disk_template is None:
9766 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9767 self.op.disk_template = einfo.get(constants.INISECT_INS,
9769 if self.op.disk_template not in constants.DISK_TEMPLATES:
9770 raise errors.OpPrereqError("Disk template specified in configuration"
9771 " file is not one of the allowed values:"
9773 " ".join(constants.DISK_TEMPLATES),
9776 raise errors.OpPrereqError("No disk template specified and the export"
9777 " is missing the disk_template information",
9780 if not self.op.disks:
9782 # TODO: import the disk iv_name too
9783 for idx in range(constants.MAX_DISKS):
9784 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9785 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9786 disks.append({constants.IDISK_SIZE: disk_sz})
9787 self.op.disks = disks
9788 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9789 raise errors.OpPrereqError("No disk info specified and the export"
9790 " is missing the disk information",
9793 if not self.op.nics:
9795 for idx in range(constants.MAX_NICS):
9796 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9798 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9799 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9806 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9807 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9809 if (self.op.hypervisor is None and
9810 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9811 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9813 if einfo.has_section(constants.INISECT_HYP):
9814 # use the export parameters but do not override the ones
9815 # specified by the user
9816 for name, value in einfo.items(constants.INISECT_HYP):
9817 if name not in self.op.hvparams:
9818 self.op.hvparams[name] = value
9820 if einfo.has_section(constants.INISECT_BEP):
9821 # use the parameters, without overriding
9822 for name, value in einfo.items(constants.INISECT_BEP):
9823 if name not in self.op.beparams:
9824 self.op.beparams[name] = value
9825 # Compatibility for the old "memory" be param
9826 if name == constants.BE_MEMORY:
9827 if constants.BE_MAXMEM not in self.op.beparams:
9828 self.op.beparams[constants.BE_MAXMEM] = value
9829 if constants.BE_MINMEM not in self.op.beparams:
9830 self.op.beparams[constants.BE_MINMEM] = value
9832 # try to read the parameters old style, from the main section
9833 for name in constants.BES_PARAMETERS:
9834 if (name not in self.op.beparams and
9835 einfo.has_option(constants.INISECT_INS, name)):
9836 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9838 if einfo.has_section(constants.INISECT_OSP):
9839 # use the parameters, without overriding
9840 for name, value in einfo.items(constants.INISECT_OSP):
9841 if name not in self.op.osparams:
9842 self.op.osparams[name] = value
9844 def _RevertToDefaults(self, cluster):
9845 """Revert the instance parameters to the default values.
9849 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9850 for name in self.op.hvparams.keys():
9851 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9852 del self.op.hvparams[name]
9854 be_defs = cluster.SimpleFillBE({})
9855 for name in self.op.beparams.keys():
9856 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9857 del self.op.beparams[name]
9859 nic_defs = cluster.SimpleFillNIC({})
9860 for nic in self.op.nics:
9861 for name in constants.NICS_PARAMETERS:
9862 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9865 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9866 for name in self.op.osparams.keys():
9867 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9868 del self.op.osparams[name]
9870 def _CalculateFileStorageDir(self):
9871 """Calculate final instance file storage dir.
9874 # file storage dir calculation/check
9875 self.instance_file_storage_dir = None
9876 if self.op.disk_template in constants.DTS_FILEBASED:
9877 # build the full file storage dir path
9880 if self.op.disk_template == constants.DT_SHARED_FILE:
9881 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9883 get_fsd_fn = self.cfg.GetFileStorageDir
9885 cfg_storagedir = get_fsd_fn()
9886 if not cfg_storagedir:
9887 raise errors.OpPrereqError("Cluster file storage dir not defined",
9889 joinargs.append(cfg_storagedir)
9891 if self.op.file_storage_dir is not None:
9892 joinargs.append(self.op.file_storage_dir)
9894 joinargs.append(self.op.instance_name)
9896 # pylint: disable=W0142
9897 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9899 def CheckPrereq(self): # pylint: disable=R0914
9900 """Check prerequisites.
9903 self._CalculateFileStorageDir()
9905 if self.op.mode == constants.INSTANCE_IMPORT:
9906 export_info = self._ReadExportInfo()
9907 self._ReadExportParams(export_info)
9908 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9910 self._old_instance_name = None
9912 if (not self.cfg.GetVGName() and
9913 self.op.disk_template not in constants.DTS_NOT_LVM):
9914 raise errors.OpPrereqError("Cluster does not support lvm-based"
9915 " instances", errors.ECODE_STATE)
9917 if (self.op.hypervisor is None or
9918 self.op.hypervisor == constants.VALUE_AUTO):
9919 self.op.hypervisor = self.cfg.GetHypervisorType()
9921 cluster = self.cfg.GetClusterInfo()
9922 enabled_hvs = cluster.enabled_hypervisors
9923 if self.op.hypervisor not in enabled_hvs:
9924 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9926 (self.op.hypervisor, ",".join(enabled_hvs)),
9929 # Check tag validity
9930 for tag in self.op.tags:
9931 objects.TaggableObject.ValidateTag(tag)
9933 # check hypervisor parameter syntax (locally)
9934 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9935 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9937 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9938 hv_type.CheckParameterSyntax(filled_hvp)
9939 self.hv_full = filled_hvp
9940 # check that we don't specify global parameters on an instance
9941 _CheckGlobalHvParams(self.op.hvparams)
9943 # fill and remember the beparams dict
9944 self.be_full = _ComputeFullBeParams(self.op, cluster)
9946 # build os parameters
9947 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9949 # now that hvp/bep are in final format, let's reset to defaults,
9951 if self.op.identify_defaults:
9952 self._RevertToDefaults(cluster)
9955 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
9958 # disk checks/pre-build
9959 default_vg = self.cfg.GetVGName()
9960 self.disks = _ComputeDisks(self.op, default_vg)
9962 if self.op.mode == constants.INSTANCE_IMPORT:
9964 for idx in range(len(self.disks)):
9965 option = "disk%d_dump" % idx
9966 if export_info.has_option(constants.INISECT_INS, option):
9967 # FIXME: are the old os-es, disk sizes, etc. useful?
9968 export_name = export_info.get(constants.INISECT_INS, option)
9969 image = utils.PathJoin(self.op.src_path, export_name)
9970 disk_images.append(image)
9972 disk_images.append(False)
9974 self.src_images = disk_images
9976 if self.op.instance_name == self._old_instance_name:
9977 for idx, nic in enumerate(self.nics):
9978 if nic.mac == constants.VALUE_AUTO:
9979 nic_mac_ini = "nic%d_mac" % idx
9980 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9982 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9984 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9985 if self.op.ip_check:
9986 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9987 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9988 (self.check_ip, self.op.instance_name),
9989 errors.ECODE_NOTUNIQUE)
9991 #### mac address generation
9992 # By generating here the mac address both the allocator and the hooks get
9993 # the real final mac address rather than the 'auto' or 'generate' value.
9994 # There is a race condition between the generation and the instance object
9995 # creation, which means that we know the mac is valid now, but we're not
9996 # sure it will be when we actually add the instance. If things go bad
9997 # adding the instance will abort because of a duplicate mac, and the
9998 # creation job will fail.
9999 for nic in self.nics:
10000 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10001 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10005 if self.op.iallocator is not None:
10006 self._RunAllocator()
10008 # Release all unneeded node locks
10009 _ReleaseLocks(self, locking.LEVEL_NODE,
10010 keep=filter(None, [self.op.pnode, self.op.snode,
10011 self.op.src_node]))
10012 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10013 keep=filter(None, [self.op.pnode, self.op.snode,
10014 self.op.src_node]))
10016 #### node related checks
10018 # check primary node
10019 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10020 assert self.pnode is not None, \
10021 "Cannot retrieve locked node %s" % self.op.pnode
10023 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10024 pnode.name, errors.ECODE_STATE)
10026 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10027 pnode.name, errors.ECODE_STATE)
10028 if not pnode.vm_capable:
10029 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10030 " '%s'" % pnode.name, errors.ECODE_STATE)
10032 self.secondaries = []
10034 # mirror node verification
10035 if self.op.disk_template in constants.DTS_INT_MIRROR:
10036 if self.op.snode == pnode.name:
10037 raise errors.OpPrereqError("The secondary node cannot be the"
10038 " primary node", errors.ECODE_INVAL)
10039 _CheckNodeOnline(self, self.op.snode)
10040 _CheckNodeNotDrained(self, self.op.snode)
10041 _CheckNodeVmCapable(self, self.op.snode)
10042 self.secondaries.append(self.op.snode)
10044 snode = self.cfg.GetNodeInfo(self.op.snode)
10045 if pnode.group != snode.group:
10046 self.LogWarning("The primary and secondary nodes are in two"
10047 " different node groups; the disk parameters"
10048 " from the first disk's node group will be"
10051 nodenames = [pnode.name] + self.secondaries
10053 # Verify instance specs
10054 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10056 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10057 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10058 constants.ISPEC_DISK_COUNT: len(self.disks),
10059 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10060 constants.ISPEC_NIC_COUNT: len(self.nics),
10061 constants.ISPEC_SPINDLE_USE: spindle_use,
10064 group_info = self.cfg.GetNodeGroup(pnode.group)
10065 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10066 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10067 if not self.op.ignore_ipolicy and res:
10068 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10069 (pnode.group, group_info.name, utils.CommaJoin(res)))
10070 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10072 if not self.adopt_disks:
10073 if self.op.disk_template == constants.DT_RBD:
10074 # _CheckRADOSFreeSpace() is just a placeholder.
10075 # Any function that checks prerequisites can be placed here.
10076 # Check if there is enough space on the RADOS cluster.
10077 _CheckRADOSFreeSpace()
10079 # Check lv size requirements, if not adopting
10080 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10081 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10083 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10084 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10085 disk[constants.IDISK_ADOPT])
10086 for disk in self.disks])
10087 if len(all_lvs) != len(self.disks):
10088 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10089 errors.ECODE_INVAL)
10090 for lv_name in all_lvs:
10092 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10093 # to ReserveLV uses the same syntax
10094 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10095 except errors.ReservationError:
10096 raise errors.OpPrereqError("LV named %s used by another instance" %
10097 lv_name, errors.ECODE_NOTUNIQUE)
10099 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10100 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10102 node_lvs = self.rpc.call_lv_list([pnode.name],
10103 vg_names.payload.keys())[pnode.name]
10104 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10105 node_lvs = node_lvs.payload
10107 delta = all_lvs.difference(node_lvs.keys())
10109 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10110 utils.CommaJoin(delta),
10111 errors.ECODE_INVAL)
10112 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10114 raise errors.OpPrereqError("Online logical volumes found, cannot"
10115 " adopt: %s" % utils.CommaJoin(online_lvs),
10116 errors.ECODE_STATE)
10117 # update the size of disk based on what is found
10118 for dsk in self.disks:
10119 dsk[constants.IDISK_SIZE] = \
10120 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10121 dsk[constants.IDISK_ADOPT])][0]))
10123 elif self.op.disk_template == constants.DT_BLOCK:
10124 # Normalize and de-duplicate device paths
10125 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10126 for disk in self.disks])
10127 if len(all_disks) != len(self.disks):
10128 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10129 errors.ECODE_INVAL)
10130 baddisks = [d for d in all_disks
10131 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10133 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10134 " cannot be adopted" %
10135 (", ".join(baddisks),
10136 constants.ADOPTABLE_BLOCKDEV_ROOT),
10137 errors.ECODE_INVAL)
10139 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10140 list(all_disks))[pnode.name]
10141 node_disks.Raise("Cannot get block device information from node %s" %
10143 node_disks = node_disks.payload
10144 delta = all_disks.difference(node_disks.keys())
10146 raise errors.OpPrereqError("Missing block device(s): %s" %
10147 utils.CommaJoin(delta),
10148 errors.ECODE_INVAL)
10149 for dsk in self.disks:
10150 dsk[constants.IDISK_SIZE] = \
10151 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10153 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10155 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10156 # check OS parameters (remotely)
10157 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10159 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10161 # memory check on primary node
10162 #TODO(dynmem): use MINMEM for checking
10164 _CheckNodeFreeMemory(self, self.pnode.name,
10165 "creating instance %s" % self.op.instance_name,
10166 self.be_full[constants.BE_MAXMEM],
10167 self.op.hypervisor)
10169 self.dry_run_result = list(nodenames)
10171 def Exec(self, feedback_fn):
10172 """Create and add the instance to the cluster.
10175 instance = self.op.instance_name
10176 pnode_name = self.pnode.name
10178 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10179 self.owned_locks(locking.LEVEL_NODE)), \
10180 "Node locks differ from node resource locks"
10182 ht_kind = self.op.hypervisor
10183 if ht_kind in constants.HTS_REQ_PORT:
10184 network_port = self.cfg.AllocatePort()
10186 network_port = None
10188 # This is ugly but we got a chicken-egg problem here
10189 # We can only take the group disk parameters, as the instance
10190 # has no disks yet (we are generating them right here).
10191 node = self.cfg.GetNodeInfo(pnode_name)
10192 nodegroup = self.cfg.GetNodeGroup(node.group)
10193 disks = _GenerateDiskTemplate(self,
10194 self.op.disk_template,
10195 instance, pnode_name,
10198 self.instance_file_storage_dir,
10199 self.op.file_driver,
10202 self.cfg.GetGroupDiskParams(nodegroup))
10204 iobj = objects.Instance(name=instance, os=self.op.os_type,
10205 primary_node=pnode_name,
10206 nics=self.nics, disks=disks,
10207 disk_template=self.op.disk_template,
10208 admin_state=constants.ADMINST_DOWN,
10209 network_port=network_port,
10210 beparams=self.op.beparams,
10211 hvparams=self.op.hvparams,
10212 hypervisor=self.op.hypervisor,
10213 osparams=self.op.osparams,
10217 for tag in self.op.tags:
10220 if self.adopt_disks:
10221 if self.op.disk_template == constants.DT_PLAIN:
10222 # rename LVs to the newly-generated names; we need to construct
10223 # 'fake' LV disks with the old data, plus the new unique_id
10224 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10226 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10227 rename_to.append(t_dsk.logical_id)
10228 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10229 self.cfg.SetDiskID(t_dsk, pnode_name)
10230 result = self.rpc.call_blockdev_rename(pnode_name,
10231 zip(tmp_disks, rename_to))
10232 result.Raise("Failed to rename adoped LVs")
10234 feedback_fn("* creating instance disks...")
10236 _CreateDisks(self, iobj)
10237 except errors.OpExecError:
10238 self.LogWarning("Device creation failed, reverting...")
10240 _RemoveDisks(self, iobj)
10242 self.cfg.ReleaseDRBDMinors(instance)
10245 feedback_fn("adding instance %s to cluster config" % instance)
10247 self.cfg.AddInstance(iobj, self.proc.GetECId())
10249 # Declare that we don't want to remove the instance lock anymore, as we've
10250 # added the instance to the config
10251 del self.remove_locks[locking.LEVEL_INSTANCE]
10253 if self.op.mode == constants.INSTANCE_IMPORT:
10254 # Release unused nodes
10255 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10257 # Release all nodes
10258 _ReleaseLocks(self, locking.LEVEL_NODE)
10261 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10262 feedback_fn("* wiping instance disks...")
10264 _WipeDisks(self, iobj)
10265 except errors.OpExecError, err:
10266 logging.exception("Wiping disks failed")
10267 self.LogWarning("Wiping instance disks failed (%s)", err)
10271 # Something is already wrong with the disks, don't do anything else
10273 elif self.op.wait_for_sync:
10274 disk_abort = not _WaitForSync(self, iobj)
10275 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10276 # make sure the disks are not degraded (still sync-ing is ok)
10277 feedback_fn("* checking mirrors status")
10278 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10283 _RemoveDisks(self, iobj)
10284 self.cfg.RemoveInstance(iobj.name)
10285 # Make sure the instance lock gets removed
10286 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10287 raise errors.OpExecError("There are some degraded disks for"
10290 # Release all node resource locks
10291 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10293 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10294 # we need to set the disks ID to the primary node, since the
10295 # preceding code might or might have not done it, depending on
10296 # disk template and other options
10297 for disk in iobj.disks:
10298 self.cfg.SetDiskID(disk, pnode_name)
10299 if self.op.mode == constants.INSTANCE_CREATE:
10300 if not self.op.no_install:
10301 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10302 not self.op.wait_for_sync)
10304 feedback_fn("* pausing disk sync to install instance OS")
10305 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10308 for idx, success in enumerate(result.payload):
10310 logging.warn("pause-sync of instance %s for disk %d failed",
10313 feedback_fn("* running the instance OS create scripts...")
10314 # FIXME: pass debug option from opcode to backend
10316 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10317 self.op.debug_level)
10319 feedback_fn("* resuming disk sync")
10320 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10323 for idx, success in enumerate(result.payload):
10325 logging.warn("resume-sync of instance %s for disk %d failed",
10328 os_add_result.Raise("Could not add os for instance %s"
10329 " on node %s" % (instance, pnode_name))
10332 if self.op.mode == constants.INSTANCE_IMPORT:
10333 feedback_fn("* running the instance OS import scripts...")
10337 for idx, image in enumerate(self.src_images):
10341 # FIXME: pass debug option from opcode to backend
10342 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10343 constants.IEIO_FILE, (image, ),
10344 constants.IEIO_SCRIPT,
10345 (iobj.disks[idx], idx),
10347 transfers.append(dt)
10350 masterd.instance.TransferInstanceData(self, feedback_fn,
10351 self.op.src_node, pnode_name,
10352 self.pnode.secondary_ip,
10354 if not compat.all(import_result):
10355 self.LogWarning("Some disks for instance %s on node %s were not"
10356 " imported successfully" % (instance, pnode_name))
10358 rename_from = self._old_instance_name
10360 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10361 feedback_fn("* preparing remote import...")
10362 # The source cluster will stop the instance before attempting to make
10363 # a connection. In some cases stopping an instance can take a long
10364 # time, hence the shutdown timeout is added to the connection
10366 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10367 self.op.source_shutdown_timeout)
10368 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10370 assert iobj.primary_node == self.pnode.name
10372 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10373 self.source_x509_ca,
10374 self._cds, timeouts)
10375 if not compat.all(disk_results):
10376 # TODO: Should the instance still be started, even if some disks
10377 # failed to import (valid for local imports, too)?
10378 self.LogWarning("Some disks for instance %s on node %s were not"
10379 " imported successfully" % (instance, pnode_name))
10381 rename_from = self.source_instance_name
10384 # also checked in the prereq part
10385 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10388 # Run rename script on newly imported instance
10389 assert iobj.name == instance
10390 feedback_fn("Running rename script for %s" % instance)
10391 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10393 self.op.debug_level)
10394 if result.fail_msg:
10395 self.LogWarning("Failed to run rename script for %s on node"
10396 " %s: %s" % (instance, pnode_name, result.fail_msg))
10398 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10401 iobj.admin_state = constants.ADMINST_UP
10402 self.cfg.Update(iobj, feedback_fn)
10403 logging.info("Starting instance %s on node %s", instance, pnode_name)
10404 feedback_fn("* starting instance...")
10405 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10407 result.Raise("Could not start instance")
10409 return list(iobj.all_nodes)
10412 def _CheckRADOSFreeSpace():
10413 """Compute disk size requirements inside the RADOS cluster.
10416 # For the RADOS cluster we assume there is always enough space.
10420 class LUInstanceConsole(NoHooksLU):
10421 """Connect to an instance's console.
10423 This is somewhat special in that it returns the command line that
10424 you need to run on the master node in order to connect to the
10430 def ExpandNames(self):
10431 self.share_locks = _ShareAll()
10432 self._ExpandAndLockInstance()
10434 def CheckPrereq(self):
10435 """Check prerequisites.
10437 This checks that the instance is in the cluster.
10440 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10441 assert self.instance is not None, \
10442 "Cannot retrieve locked instance %s" % self.op.instance_name
10443 _CheckNodeOnline(self, self.instance.primary_node)
10445 def Exec(self, feedback_fn):
10446 """Connect to the console of an instance
10449 instance = self.instance
10450 node = instance.primary_node
10452 node_insts = self.rpc.call_instance_list([node],
10453 [instance.hypervisor])[node]
10454 node_insts.Raise("Can't get node information from %s" % node)
10456 if instance.name not in node_insts.payload:
10457 if instance.admin_state == constants.ADMINST_UP:
10458 state = constants.INSTST_ERRORDOWN
10459 elif instance.admin_state == constants.ADMINST_DOWN:
10460 state = constants.INSTST_ADMINDOWN
10462 state = constants.INSTST_ADMINOFFLINE
10463 raise errors.OpExecError("Instance %s is not running (state %s)" %
10464 (instance.name, state))
10466 logging.debug("Connecting to console of %s on %s", instance.name, node)
10468 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10471 def _GetInstanceConsole(cluster, instance):
10472 """Returns console information for an instance.
10474 @type cluster: L{objects.Cluster}
10475 @type instance: L{objects.Instance}
10479 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10480 # beparams and hvparams are passed separately, to avoid editing the
10481 # instance and then saving the defaults in the instance itself.
10482 hvparams = cluster.FillHV(instance)
10483 beparams = cluster.FillBE(instance)
10484 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10486 assert console.instance == instance.name
10487 assert console.Validate()
10489 return console.ToDict()
10492 class LUInstanceReplaceDisks(LogicalUnit):
10493 """Replace the disks of an instance.
10496 HPATH = "mirrors-replace"
10497 HTYPE = constants.HTYPE_INSTANCE
10500 def CheckArguments(self):
10501 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10502 self.op.iallocator)
10504 def ExpandNames(self):
10505 self._ExpandAndLockInstance()
10507 assert locking.LEVEL_NODE not in self.needed_locks
10508 assert locking.LEVEL_NODE_RES not in self.needed_locks
10509 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10511 assert self.op.iallocator is None or self.op.remote_node is None, \
10512 "Conflicting options"
10514 if self.op.remote_node is not None:
10515 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10517 # Warning: do not remove the locking of the new secondary here
10518 # unless DRBD8.AddChildren is changed to work in parallel;
10519 # currently it doesn't since parallel invocations of
10520 # FindUnusedMinor will conflict
10521 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10522 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10524 self.needed_locks[locking.LEVEL_NODE] = []
10525 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10527 if self.op.iallocator is not None:
10528 # iallocator will select a new node in the same group
10529 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10531 self.needed_locks[locking.LEVEL_NODE_RES] = []
10533 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10534 self.op.iallocator, self.op.remote_node,
10535 self.op.disks, False, self.op.early_release,
10536 self.op.ignore_ipolicy)
10538 self.tasklets = [self.replacer]
10540 def DeclareLocks(self, level):
10541 if level == locking.LEVEL_NODEGROUP:
10542 assert self.op.remote_node is None
10543 assert self.op.iallocator is not None
10544 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10546 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10547 # Lock all groups used by instance optimistically; this requires going
10548 # via the node before it's locked, requiring verification later on
10549 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10550 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10552 elif level == locking.LEVEL_NODE:
10553 if self.op.iallocator is not None:
10554 assert self.op.remote_node is None
10555 assert not self.needed_locks[locking.LEVEL_NODE]
10557 # Lock member nodes of all locked groups
10558 self.needed_locks[locking.LEVEL_NODE] = \
10560 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10561 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10563 self._LockInstancesNodes()
10564 elif level == locking.LEVEL_NODE_RES:
10566 self.needed_locks[locking.LEVEL_NODE_RES] = \
10567 self.needed_locks[locking.LEVEL_NODE]
10569 def BuildHooksEnv(self):
10570 """Build hooks env.
10572 This runs on the master, the primary and all the secondaries.
10575 instance = self.replacer.instance
10577 "MODE": self.op.mode,
10578 "NEW_SECONDARY": self.op.remote_node,
10579 "OLD_SECONDARY": instance.secondary_nodes[0],
10581 env.update(_BuildInstanceHookEnvByObject(self, instance))
10584 def BuildHooksNodes(self):
10585 """Build hooks nodes.
10588 instance = self.replacer.instance
10590 self.cfg.GetMasterNode(),
10591 instance.primary_node,
10593 if self.op.remote_node is not None:
10594 nl.append(self.op.remote_node)
10597 def CheckPrereq(self):
10598 """Check prerequisites.
10601 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10602 self.op.iallocator is None)
10604 # Verify if node group locks are still correct
10605 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10607 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10609 return LogicalUnit.CheckPrereq(self)
10612 class TLReplaceDisks(Tasklet):
10613 """Replaces disks for an instance.
10615 Note: Locking is not within the scope of this class.
10618 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10619 disks, delay_iallocator, early_release, ignore_ipolicy):
10620 """Initializes this class.
10623 Tasklet.__init__(self, lu)
10626 self.instance_name = instance_name
10628 self.iallocator_name = iallocator_name
10629 self.remote_node = remote_node
10631 self.delay_iallocator = delay_iallocator
10632 self.early_release = early_release
10633 self.ignore_ipolicy = ignore_ipolicy
10636 self.instance = None
10637 self.new_node = None
10638 self.target_node = None
10639 self.other_node = None
10640 self.remote_node_info = None
10641 self.node_secondary_ip = None
10644 def CheckArguments(mode, remote_node, ialloc):
10645 """Helper function for users of this class.
10648 # check for valid parameter combination
10649 if mode == constants.REPLACE_DISK_CHG:
10650 if remote_node is None and ialloc is None:
10651 raise errors.OpPrereqError("When changing the secondary either an"
10652 " iallocator script must be used or the"
10653 " new node given", errors.ECODE_INVAL)
10655 if remote_node is not None and ialloc is not None:
10656 raise errors.OpPrereqError("Give either the iallocator or the new"
10657 " secondary, not both", errors.ECODE_INVAL)
10659 elif remote_node is not None or ialloc is not None:
10660 # Not replacing the secondary
10661 raise errors.OpPrereqError("The iallocator and new node options can"
10662 " only be used when changing the"
10663 " secondary node", errors.ECODE_INVAL)
10666 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10667 """Compute a new secondary node using an IAllocator.
10670 req = iallocator.IAReqRelocate(name=instance_name,
10671 relocate_from=list(relocate_from))
10672 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10674 ial.Run(iallocator_name)
10676 if not ial.success:
10677 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10678 " %s" % (iallocator_name, ial.info),
10679 errors.ECODE_NORES)
10681 remote_node_name = ial.result[0]
10683 lu.LogInfo("Selected new secondary for instance '%s': %s",
10684 instance_name, remote_node_name)
10686 return remote_node_name
10688 def _FindFaultyDisks(self, node_name):
10689 """Wrapper for L{_FindFaultyInstanceDisks}.
10692 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10695 def _CheckDisksActivated(self, instance):
10696 """Checks if the instance disks are activated.
10698 @param instance: The instance to check disks
10699 @return: True if they are activated, False otherwise
10702 nodes = instance.all_nodes
10704 for idx, dev in enumerate(instance.disks):
10706 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10707 self.cfg.SetDiskID(dev, node)
10709 result = _BlockdevFind(self, node, dev, instance)
10713 elif result.fail_msg or not result.payload:
10718 def CheckPrereq(self):
10719 """Check prerequisites.
10721 This checks that the instance is in the cluster.
10724 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10725 assert instance is not None, \
10726 "Cannot retrieve locked instance %s" % self.instance_name
10728 if instance.disk_template != constants.DT_DRBD8:
10729 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10730 " instances", errors.ECODE_INVAL)
10732 if len(instance.secondary_nodes) != 1:
10733 raise errors.OpPrereqError("The instance has a strange layout,"
10734 " expected one secondary but found %d" %
10735 len(instance.secondary_nodes),
10736 errors.ECODE_FAULT)
10738 if not self.delay_iallocator:
10739 self._CheckPrereq2()
10741 def _CheckPrereq2(self):
10742 """Check prerequisites, second part.
10744 This function should always be part of CheckPrereq. It was separated and is
10745 now called from Exec because during node evacuation iallocator was only
10746 called with an unmodified cluster model, not taking planned changes into
10750 instance = self.instance
10751 secondary_node = instance.secondary_nodes[0]
10753 if self.iallocator_name is None:
10754 remote_node = self.remote_node
10756 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10757 instance.name, instance.secondary_nodes)
10759 if remote_node is None:
10760 self.remote_node_info = None
10762 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10763 "Remote node '%s' is not locked" % remote_node
10765 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10766 assert self.remote_node_info is not None, \
10767 "Cannot retrieve locked node %s" % remote_node
10769 if remote_node == self.instance.primary_node:
10770 raise errors.OpPrereqError("The specified node is the primary node of"
10771 " the instance", errors.ECODE_INVAL)
10773 if remote_node == secondary_node:
10774 raise errors.OpPrereqError("The specified node is already the"
10775 " secondary node of the instance",
10776 errors.ECODE_INVAL)
10778 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10779 constants.REPLACE_DISK_CHG):
10780 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10781 errors.ECODE_INVAL)
10783 if self.mode == constants.REPLACE_DISK_AUTO:
10784 if not self._CheckDisksActivated(instance):
10785 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10786 " first" % self.instance_name,
10787 errors.ECODE_STATE)
10788 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10789 faulty_secondary = self._FindFaultyDisks(secondary_node)
10791 if faulty_primary and faulty_secondary:
10792 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10793 " one node and can not be repaired"
10794 " automatically" % self.instance_name,
10795 errors.ECODE_STATE)
10798 self.disks = faulty_primary
10799 self.target_node = instance.primary_node
10800 self.other_node = secondary_node
10801 check_nodes = [self.target_node, self.other_node]
10802 elif faulty_secondary:
10803 self.disks = faulty_secondary
10804 self.target_node = secondary_node
10805 self.other_node = instance.primary_node
10806 check_nodes = [self.target_node, self.other_node]
10812 # Non-automatic modes
10813 if self.mode == constants.REPLACE_DISK_PRI:
10814 self.target_node = instance.primary_node
10815 self.other_node = secondary_node
10816 check_nodes = [self.target_node, self.other_node]
10818 elif self.mode == constants.REPLACE_DISK_SEC:
10819 self.target_node = secondary_node
10820 self.other_node = instance.primary_node
10821 check_nodes = [self.target_node, self.other_node]
10823 elif self.mode == constants.REPLACE_DISK_CHG:
10824 self.new_node = remote_node
10825 self.other_node = instance.primary_node
10826 self.target_node = secondary_node
10827 check_nodes = [self.new_node, self.other_node]
10829 _CheckNodeNotDrained(self.lu, remote_node)
10830 _CheckNodeVmCapable(self.lu, remote_node)
10832 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10833 assert old_node_info is not None
10834 if old_node_info.offline and not self.early_release:
10835 # doesn't make sense to delay the release
10836 self.early_release = True
10837 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10838 " early-release mode", secondary_node)
10841 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10844 # If not specified all disks should be replaced
10846 self.disks = range(len(self.instance.disks))
10848 # TODO: This is ugly, but right now we can't distinguish between internal
10849 # submitted opcode and external one. We should fix that.
10850 if self.remote_node_info:
10851 # We change the node, lets verify it still meets instance policy
10852 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10853 cluster = self.cfg.GetClusterInfo()
10854 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10856 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10857 ignore=self.ignore_ipolicy)
10859 for node in check_nodes:
10860 _CheckNodeOnline(self.lu, node)
10862 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10865 if node_name is not None)
10867 # Release unneeded node and node resource locks
10868 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10869 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10871 # Release any owned node group
10872 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10873 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10875 # Check whether disks are valid
10876 for disk_idx in self.disks:
10877 instance.FindDisk(disk_idx)
10879 # Get secondary node IP addresses
10880 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10881 in self.cfg.GetMultiNodeInfo(touched_nodes))
10883 def Exec(self, feedback_fn):
10884 """Execute disk replacement.
10886 This dispatches the disk replacement to the appropriate handler.
10889 if self.delay_iallocator:
10890 self._CheckPrereq2()
10893 # Verify owned locks before starting operation
10894 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10895 assert set(owned_nodes) == set(self.node_secondary_ip), \
10896 ("Incorrect node locks, owning %s, expected %s" %
10897 (owned_nodes, self.node_secondary_ip.keys()))
10898 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10899 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10901 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10902 assert list(owned_instances) == [self.instance_name], \
10903 "Instance '%s' not locked" % self.instance_name
10905 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10906 "Should not own any node group lock at this point"
10909 feedback_fn("No disks need replacement")
10912 feedback_fn("Replacing disk(s) %s for %s" %
10913 (utils.CommaJoin(self.disks), self.instance.name))
10915 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10917 # Activate the instance disks if we're replacing them on a down instance
10919 _StartInstanceDisks(self.lu, self.instance, True)
10922 # Should we replace the secondary node?
10923 if self.new_node is not None:
10924 fn = self._ExecDrbd8Secondary
10926 fn = self._ExecDrbd8DiskOnly
10928 result = fn(feedback_fn)
10930 # Deactivate the instance disks if we're replacing them on a
10933 _SafeShutdownInstanceDisks(self.lu, self.instance)
10935 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10938 # Verify owned locks
10939 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10940 nodes = frozenset(self.node_secondary_ip)
10941 assert ((self.early_release and not owned_nodes) or
10942 (not self.early_release and not (set(owned_nodes) - nodes))), \
10943 ("Not owning the correct locks, early_release=%s, owned=%r,"
10944 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10948 def _CheckVolumeGroup(self, nodes):
10949 self.lu.LogInfo("Checking volume groups")
10951 vgname = self.cfg.GetVGName()
10953 # Make sure volume group exists on all involved nodes
10954 results = self.rpc.call_vg_list(nodes)
10956 raise errors.OpExecError("Can't list volume groups on the nodes")
10959 res = results[node]
10960 res.Raise("Error checking node %s" % node)
10961 if vgname not in res.payload:
10962 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10965 def _CheckDisksExistence(self, nodes):
10966 # Check disk existence
10967 for idx, dev in enumerate(self.instance.disks):
10968 if idx not in self.disks:
10972 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10973 self.cfg.SetDiskID(dev, node)
10975 result = _BlockdevFind(self, node, dev, self.instance)
10977 msg = result.fail_msg
10978 if msg or not result.payload:
10980 msg = "disk not found"
10981 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10984 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10985 for idx, dev in enumerate(self.instance.disks):
10986 if idx not in self.disks:
10989 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10992 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10993 on_primary, ldisk=ldisk):
10994 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10995 " replace disks for instance %s" %
10996 (node_name, self.instance.name))
10998 def _CreateNewStorage(self, node_name):
10999 """Create new storage on the primary or secondary node.
11001 This is only used for same-node replaces, not for changing the
11002 secondary node, hence we don't want to modify the existing disk.
11007 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11008 for idx, dev in enumerate(disks):
11009 if idx not in self.disks:
11012 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11014 self.cfg.SetDiskID(dev, node_name)
11016 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11017 names = _GenerateUniqueNames(self.lu, lv_names)
11019 (data_disk, meta_disk) = dev.children
11020 vg_data = data_disk.logical_id[0]
11021 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11022 logical_id=(vg_data, names[0]),
11023 params=data_disk.params)
11024 vg_meta = meta_disk.logical_id[0]
11025 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11026 size=constants.DRBD_META_SIZE,
11027 logical_id=(vg_meta, names[1]),
11028 params=meta_disk.params)
11030 new_lvs = [lv_data, lv_meta]
11031 old_lvs = [child.Copy() for child in dev.children]
11032 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11034 # we pass force_create=True to force the LVM creation
11035 for new_lv in new_lvs:
11036 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11037 _GetInstanceInfoText(self.instance), False)
11041 def _CheckDevices(self, node_name, iv_names):
11042 for name, (dev, _, _) in iv_names.iteritems():
11043 self.cfg.SetDiskID(dev, node_name)
11045 result = _BlockdevFind(self, node_name, dev, self.instance)
11047 msg = result.fail_msg
11048 if msg or not result.payload:
11050 msg = "disk not found"
11051 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11054 if result.payload.is_degraded:
11055 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11057 def _RemoveOldStorage(self, node_name, iv_names):
11058 for name, (_, old_lvs, _) in iv_names.iteritems():
11059 self.lu.LogInfo("Remove logical volumes for %s" % name)
11062 self.cfg.SetDiskID(lv, node_name)
11064 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11066 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11067 hint="remove unused LVs manually")
11069 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11070 """Replace a disk on the primary or secondary for DRBD 8.
11072 The algorithm for replace is quite complicated:
11074 1. for each disk to be replaced:
11076 1. create new LVs on the target node with unique names
11077 1. detach old LVs from the drbd device
11078 1. rename old LVs to name_replaced.<time_t>
11079 1. rename new LVs to old LVs
11080 1. attach the new LVs (with the old names now) to the drbd device
11082 1. wait for sync across all devices
11084 1. for each modified disk:
11086 1. remove old LVs (which have the name name_replaces.<time_t>)
11088 Failures are not very well handled.
11093 # Step: check device activation
11094 self.lu.LogStep(1, steps_total, "Check device existence")
11095 self._CheckDisksExistence([self.other_node, self.target_node])
11096 self._CheckVolumeGroup([self.target_node, self.other_node])
11098 # Step: check other node consistency
11099 self.lu.LogStep(2, steps_total, "Check peer consistency")
11100 self._CheckDisksConsistency(self.other_node,
11101 self.other_node == self.instance.primary_node,
11104 # Step: create new storage
11105 self.lu.LogStep(3, steps_total, "Allocate new storage")
11106 iv_names = self._CreateNewStorage(self.target_node)
11108 # Step: for each lv, detach+rename*2+attach
11109 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11110 for dev, old_lvs, new_lvs in iv_names.itervalues():
11111 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11113 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11115 result.Raise("Can't detach drbd from local storage on node"
11116 " %s for device %s" % (self.target_node, dev.iv_name))
11118 #cfg.Update(instance)
11120 # ok, we created the new LVs, so now we know we have the needed
11121 # storage; as such, we proceed on the target node to rename
11122 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11123 # using the assumption that logical_id == physical_id (which in
11124 # turn is the unique_id on that node)
11126 # FIXME(iustin): use a better name for the replaced LVs
11127 temp_suffix = int(time.time())
11128 ren_fn = lambda d, suff: (d.physical_id[0],
11129 d.physical_id[1] + "_replaced-%s" % suff)
11131 # Build the rename list based on what LVs exist on the node
11132 rename_old_to_new = []
11133 for to_ren in old_lvs:
11134 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11135 if not result.fail_msg and result.payload:
11137 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11139 self.lu.LogInfo("Renaming the old LVs on the target node")
11140 result = self.rpc.call_blockdev_rename(self.target_node,
11142 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11144 # Now we rename the new LVs to the old LVs
11145 self.lu.LogInfo("Renaming the new LVs on the target node")
11146 rename_new_to_old = [(new, old.physical_id)
11147 for old, new in zip(old_lvs, new_lvs)]
11148 result = self.rpc.call_blockdev_rename(self.target_node,
11150 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11152 # Intermediate steps of in memory modifications
11153 for old, new in zip(old_lvs, new_lvs):
11154 new.logical_id = old.logical_id
11155 self.cfg.SetDiskID(new, self.target_node)
11157 # We need to modify old_lvs so that removal later removes the
11158 # right LVs, not the newly added ones; note that old_lvs is a
11160 for disk in old_lvs:
11161 disk.logical_id = ren_fn(disk, temp_suffix)
11162 self.cfg.SetDiskID(disk, self.target_node)
11164 # Now that the new lvs have the old name, we can add them to the device
11165 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11166 result = self.rpc.call_blockdev_addchildren(self.target_node,
11167 (dev, self.instance), new_lvs)
11168 msg = result.fail_msg
11170 for new_lv in new_lvs:
11171 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11174 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11175 hint=("cleanup manually the unused logical"
11177 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11179 cstep = itertools.count(5)
11181 if self.early_release:
11182 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11183 self._RemoveOldStorage(self.target_node, iv_names)
11184 # TODO: Check if releasing locks early still makes sense
11185 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11187 # Release all resource locks except those used by the instance
11188 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11189 keep=self.node_secondary_ip.keys())
11191 # Release all node locks while waiting for sync
11192 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11194 # TODO: Can the instance lock be downgraded here? Take the optional disk
11195 # shutdown in the caller into consideration.
11198 # This can fail as the old devices are degraded and _WaitForSync
11199 # does a combined result over all disks, so we don't check its return value
11200 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11201 _WaitForSync(self.lu, self.instance)
11203 # Check all devices manually
11204 self._CheckDevices(self.instance.primary_node, iv_names)
11206 # Step: remove old storage
11207 if not self.early_release:
11208 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11209 self._RemoveOldStorage(self.target_node, iv_names)
11211 def _ExecDrbd8Secondary(self, feedback_fn):
11212 """Replace the secondary node for DRBD 8.
11214 The algorithm for replace is quite complicated:
11215 - for all disks of the instance:
11216 - create new LVs on the new node with same names
11217 - shutdown the drbd device on the old secondary
11218 - disconnect the drbd network on the primary
11219 - create the drbd device on the new secondary
11220 - network attach the drbd on the primary, using an artifice:
11221 the drbd code for Attach() will connect to the network if it
11222 finds a device which is connected to the good local disks but
11223 not network enabled
11224 - wait for sync across all devices
11225 - remove all disks from the old secondary
11227 Failures are not very well handled.
11232 pnode = self.instance.primary_node
11234 # Step: check device activation
11235 self.lu.LogStep(1, steps_total, "Check device existence")
11236 self._CheckDisksExistence([self.instance.primary_node])
11237 self._CheckVolumeGroup([self.instance.primary_node])
11239 # Step: check other node consistency
11240 self.lu.LogStep(2, steps_total, "Check peer consistency")
11241 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11243 # Step: create new storage
11244 self.lu.LogStep(3, steps_total, "Allocate new storage")
11245 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11246 for idx, dev in enumerate(disks):
11247 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11248 (self.new_node, idx))
11249 # we pass force_create=True to force LVM creation
11250 for new_lv in dev.children:
11251 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11252 True, _GetInstanceInfoText(self.instance), False)
11254 # Step 4: dbrd minors and drbd setups changes
11255 # after this, we must manually remove the drbd minors on both the
11256 # error and the success paths
11257 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11258 minors = self.cfg.AllocateDRBDMinor([self.new_node
11259 for dev in self.instance.disks],
11260 self.instance.name)
11261 logging.debug("Allocated minors %r", minors)
11264 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11265 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11266 (self.new_node, idx))
11267 # create new devices on new_node; note that we create two IDs:
11268 # one without port, so the drbd will be activated without
11269 # networking information on the new node at this stage, and one
11270 # with network, for the latter activation in step 4
11271 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11272 if self.instance.primary_node == o_node1:
11275 assert self.instance.primary_node == o_node2, "Three-node instance?"
11278 new_alone_id = (self.instance.primary_node, self.new_node, None,
11279 p_minor, new_minor, o_secret)
11280 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11281 p_minor, new_minor, o_secret)
11283 iv_names[idx] = (dev, dev.children, new_net_id)
11284 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11286 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11287 logical_id=new_alone_id,
11288 children=dev.children,
11291 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11294 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11296 _GetInstanceInfoText(self.instance), False)
11297 except errors.GenericError:
11298 self.cfg.ReleaseDRBDMinors(self.instance.name)
11301 # We have new devices, shutdown the drbd on the old secondary
11302 for idx, dev in enumerate(self.instance.disks):
11303 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11304 self.cfg.SetDiskID(dev, self.target_node)
11305 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11306 (dev, self.instance)).fail_msg
11308 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11309 "node: %s" % (idx, msg),
11310 hint=("Please cleanup this device manually as"
11311 " soon as possible"))
11313 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11314 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11315 self.instance.disks)[pnode]
11317 msg = result.fail_msg
11319 # detaches didn't succeed (unlikely)
11320 self.cfg.ReleaseDRBDMinors(self.instance.name)
11321 raise errors.OpExecError("Can't detach the disks from the network on"
11322 " old node: %s" % (msg,))
11324 # if we managed to detach at least one, we update all the disks of
11325 # the instance to point to the new secondary
11326 self.lu.LogInfo("Updating instance configuration")
11327 for dev, _, new_logical_id in iv_names.itervalues():
11328 dev.logical_id = new_logical_id
11329 self.cfg.SetDiskID(dev, self.instance.primary_node)
11331 self.cfg.Update(self.instance, feedback_fn)
11333 # Release all node locks (the configuration has been updated)
11334 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11336 # and now perform the drbd attach
11337 self.lu.LogInfo("Attaching primary drbds to new secondary"
11338 " (standalone => connected)")
11339 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11341 self.node_secondary_ip,
11342 (self.instance.disks, self.instance),
11343 self.instance.name,
11345 for to_node, to_result in result.items():
11346 msg = to_result.fail_msg
11348 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11350 hint=("please do a gnt-instance info to see the"
11351 " status of disks"))
11353 cstep = itertools.count(5)
11355 if self.early_release:
11356 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11357 self._RemoveOldStorage(self.target_node, iv_names)
11358 # TODO: Check if releasing locks early still makes sense
11359 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11361 # Release all resource locks except those used by the instance
11362 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11363 keep=self.node_secondary_ip.keys())
11365 # TODO: Can the instance lock be downgraded here? Take the optional disk
11366 # shutdown in the caller into consideration.
11369 # This can fail as the old devices are degraded and _WaitForSync
11370 # does a combined result over all disks, so we don't check its return value
11371 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11372 _WaitForSync(self.lu, self.instance)
11374 # Check all devices manually
11375 self._CheckDevices(self.instance.primary_node, iv_names)
11377 # Step: remove old storage
11378 if not self.early_release:
11379 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11380 self._RemoveOldStorage(self.target_node, iv_names)
11383 class LURepairNodeStorage(NoHooksLU):
11384 """Repairs the volume group on a node.
11389 def CheckArguments(self):
11390 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11392 storage_type = self.op.storage_type
11394 if (constants.SO_FIX_CONSISTENCY not in
11395 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11396 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11397 " repaired" % storage_type,
11398 errors.ECODE_INVAL)
11400 def ExpandNames(self):
11401 self.needed_locks = {
11402 locking.LEVEL_NODE: [self.op.node_name],
11405 def _CheckFaultyDisks(self, instance, node_name):
11406 """Ensure faulty disks abort the opcode or at least warn."""
11408 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11410 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11411 " node '%s'" % (instance.name, node_name),
11412 errors.ECODE_STATE)
11413 except errors.OpPrereqError, err:
11414 if self.op.ignore_consistency:
11415 self.proc.LogWarning(str(err.args[0]))
11419 def CheckPrereq(self):
11420 """Check prerequisites.
11423 # Check whether any instance on this node has faulty disks
11424 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11425 if inst.admin_state != constants.ADMINST_UP:
11427 check_nodes = set(inst.all_nodes)
11428 check_nodes.discard(self.op.node_name)
11429 for inst_node_name in check_nodes:
11430 self._CheckFaultyDisks(inst, inst_node_name)
11432 def Exec(self, feedback_fn):
11433 feedback_fn("Repairing storage unit '%s' on %s ..." %
11434 (self.op.name, self.op.node_name))
11436 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11437 result = self.rpc.call_storage_execute(self.op.node_name,
11438 self.op.storage_type, st_args,
11440 constants.SO_FIX_CONSISTENCY)
11441 result.Raise("Failed to repair storage unit '%s' on %s" %
11442 (self.op.name, self.op.node_name))
11445 class LUNodeEvacuate(NoHooksLU):
11446 """Evacuates instances off a list of nodes.
11451 _MODE2IALLOCATOR = {
11452 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11453 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11454 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11456 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11457 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11458 constants.IALLOCATOR_NEVAC_MODES)
11460 def CheckArguments(self):
11461 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11463 def ExpandNames(self):
11464 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11466 if self.op.remote_node is not None:
11467 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11468 assert self.op.remote_node
11470 if self.op.remote_node == self.op.node_name:
11471 raise errors.OpPrereqError("Can not use evacuated node as a new"
11472 " secondary node", errors.ECODE_INVAL)
11474 if self.op.mode != constants.NODE_EVAC_SEC:
11475 raise errors.OpPrereqError("Without the use of an iallocator only"
11476 " secondary instances can be evacuated",
11477 errors.ECODE_INVAL)
11480 self.share_locks = _ShareAll()
11481 self.needed_locks = {
11482 locking.LEVEL_INSTANCE: [],
11483 locking.LEVEL_NODEGROUP: [],
11484 locking.LEVEL_NODE: [],
11487 # Determine nodes (via group) optimistically, needs verification once locks
11488 # have been acquired
11489 self.lock_nodes = self._DetermineNodes()
11491 def _DetermineNodes(self):
11492 """Gets the list of nodes to operate on.
11495 if self.op.remote_node is None:
11496 # Iallocator will choose any node(s) in the same group
11497 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11499 group_nodes = frozenset([self.op.remote_node])
11501 # Determine nodes to be locked
11502 return set([self.op.node_name]) | group_nodes
11504 def _DetermineInstances(self):
11505 """Builds list of instances to operate on.
11508 assert self.op.mode in constants.NODE_EVAC_MODES
11510 if self.op.mode == constants.NODE_EVAC_PRI:
11511 # Primary instances only
11512 inst_fn = _GetNodePrimaryInstances
11513 assert self.op.remote_node is None, \
11514 "Evacuating primary instances requires iallocator"
11515 elif self.op.mode == constants.NODE_EVAC_SEC:
11516 # Secondary instances only
11517 inst_fn = _GetNodeSecondaryInstances
11520 assert self.op.mode == constants.NODE_EVAC_ALL
11521 inst_fn = _GetNodeInstances
11522 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11524 raise errors.OpPrereqError("Due to an issue with the iallocator"
11525 " interface it is not possible to evacuate"
11526 " all instances at once; specify explicitly"
11527 " whether to evacuate primary or secondary"
11529 errors.ECODE_INVAL)
11531 return inst_fn(self.cfg, self.op.node_name)
11533 def DeclareLocks(self, level):
11534 if level == locking.LEVEL_INSTANCE:
11535 # Lock instances optimistically, needs verification once node and group
11536 # locks have been acquired
11537 self.needed_locks[locking.LEVEL_INSTANCE] = \
11538 set(i.name for i in self._DetermineInstances())
11540 elif level == locking.LEVEL_NODEGROUP:
11541 # Lock node groups for all potential target nodes optimistically, needs
11542 # verification once nodes have been acquired
11543 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11544 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11546 elif level == locking.LEVEL_NODE:
11547 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11549 def CheckPrereq(self):
11551 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11552 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11553 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11555 need_nodes = self._DetermineNodes()
11557 if not owned_nodes.issuperset(need_nodes):
11558 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11559 " locks were acquired, current nodes are"
11560 " are '%s', used to be '%s'; retry the"
11562 (self.op.node_name,
11563 utils.CommaJoin(need_nodes),
11564 utils.CommaJoin(owned_nodes)),
11565 errors.ECODE_STATE)
11567 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11568 if owned_groups != wanted_groups:
11569 raise errors.OpExecError("Node groups changed since locks were acquired,"
11570 " current groups are '%s', used to be '%s';"
11571 " retry the operation" %
11572 (utils.CommaJoin(wanted_groups),
11573 utils.CommaJoin(owned_groups)))
11575 # Determine affected instances
11576 self.instances = self._DetermineInstances()
11577 self.instance_names = [i.name for i in self.instances]
11579 if set(self.instance_names) != owned_instances:
11580 raise errors.OpExecError("Instances on node '%s' changed since locks"
11581 " were acquired, current instances are '%s',"
11582 " used to be '%s'; retry the operation" %
11583 (self.op.node_name,
11584 utils.CommaJoin(self.instance_names),
11585 utils.CommaJoin(owned_instances)))
11587 if self.instance_names:
11588 self.LogInfo("Evacuating instances from node '%s': %s",
11590 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11592 self.LogInfo("No instances to evacuate from node '%s'",
11595 if self.op.remote_node is not None:
11596 for i in self.instances:
11597 if i.primary_node == self.op.remote_node:
11598 raise errors.OpPrereqError("Node %s is the primary node of"
11599 " instance %s, cannot use it as"
11601 (self.op.remote_node, i.name),
11602 errors.ECODE_INVAL)
11604 def Exec(self, feedback_fn):
11605 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11607 if not self.instance_names:
11608 # No instances to evacuate
11611 elif self.op.iallocator is not None:
11612 # TODO: Implement relocation to other group
11613 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11614 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11615 instances=list(self.instance_names))
11616 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11618 ial.Run(self.op.iallocator)
11620 if not ial.success:
11621 raise errors.OpPrereqError("Can't compute node evacuation using"
11622 " iallocator '%s': %s" %
11623 (self.op.iallocator, ial.info),
11624 errors.ECODE_NORES)
11626 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11628 elif self.op.remote_node is not None:
11629 assert self.op.mode == constants.NODE_EVAC_SEC
11631 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11632 remote_node=self.op.remote_node,
11634 mode=constants.REPLACE_DISK_CHG,
11635 early_release=self.op.early_release)]
11636 for instance_name in self.instance_names
11640 raise errors.ProgrammerError("No iallocator or remote node")
11642 return ResultWithJobs(jobs)
11645 def _SetOpEarlyRelease(early_release, op):
11646 """Sets C{early_release} flag on opcodes if available.
11650 op.early_release = early_release
11651 except AttributeError:
11652 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11657 def _NodeEvacDest(use_nodes, group, nodes):
11658 """Returns group or nodes depending on caller's choice.
11662 return utils.CommaJoin(nodes)
11667 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11668 """Unpacks the result of change-group and node-evacuate iallocator requests.
11670 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11671 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11673 @type lu: L{LogicalUnit}
11674 @param lu: Logical unit instance
11675 @type alloc_result: tuple/list
11676 @param alloc_result: Result from iallocator
11677 @type early_release: bool
11678 @param early_release: Whether to release locks early if possible
11679 @type use_nodes: bool
11680 @param use_nodes: Whether to display node names instead of groups
11683 (moved, failed, jobs) = alloc_result
11686 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11687 for (name, reason) in failed)
11688 lu.LogWarning("Unable to evacuate instances %s", failreason)
11689 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11692 lu.LogInfo("Instances to be moved: %s",
11693 utils.CommaJoin("%s (to %s)" %
11694 (name, _NodeEvacDest(use_nodes, group, nodes))
11695 for (name, group, nodes) in moved))
11697 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11698 map(opcodes.OpCode.LoadOpCode, ops))
11702 class LUInstanceGrowDisk(LogicalUnit):
11703 """Grow a disk of an instance.
11706 HPATH = "disk-grow"
11707 HTYPE = constants.HTYPE_INSTANCE
11710 def ExpandNames(self):
11711 self._ExpandAndLockInstance()
11712 self.needed_locks[locking.LEVEL_NODE] = []
11713 self.needed_locks[locking.LEVEL_NODE_RES] = []
11714 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11715 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11717 def DeclareLocks(self, level):
11718 if level == locking.LEVEL_NODE:
11719 self._LockInstancesNodes()
11720 elif level == locking.LEVEL_NODE_RES:
11722 self.needed_locks[locking.LEVEL_NODE_RES] = \
11723 self.needed_locks[locking.LEVEL_NODE][:]
11725 def BuildHooksEnv(self):
11726 """Build hooks env.
11728 This runs on the master, the primary and all the secondaries.
11732 "DISK": self.op.disk,
11733 "AMOUNT": self.op.amount,
11734 "ABSOLUTE": self.op.absolute,
11736 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11739 def BuildHooksNodes(self):
11740 """Build hooks nodes.
11743 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11746 def CheckPrereq(self):
11747 """Check prerequisites.
11749 This checks that the instance is in the cluster.
11752 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11753 assert instance is not None, \
11754 "Cannot retrieve locked instance %s" % self.op.instance_name
11755 nodenames = list(instance.all_nodes)
11756 for node in nodenames:
11757 _CheckNodeOnline(self, node)
11759 self.instance = instance
11761 if instance.disk_template not in constants.DTS_GROWABLE:
11762 raise errors.OpPrereqError("Instance's disk layout does not support"
11763 " growing", errors.ECODE_INVAL)
11765 self.disk = instance.FindDisk(self.op.disk)
11767 if self.op.absolute:
11768 self.target = self.op.amount
11769 self.delta = self.target - self.disk.size
11771 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11772 "current disk size (%s)" %
11773 (utils.FormatUnit(self.target, "h"),
11774 utils.FormatUnit(self.disk.size, "h")),
11775 errors.ECODE_STATE)
11777 self.delta = self.op.amount
11778 self.target = self.disk.size + self.delta
11780 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11781 utils.FormatUnit(self.delta, "h"),
11782 errors.ECODE_INVAL)
11784 if instance.disk_template not in (constants.DT_FILE,
11785 constants.DT_SHARED_FILE,
11787 # TODO: check the free disk space for file, when that feature will be
11789 _CheckNodesFreeDiskPerVG(self, nodenames,
11790 self.disk.ComputeGrowth(self.delta))
11792 def Exec(self, feedback_fn):
11793 """Execute disk grow.
11796 instance = self.instance
11799 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11800 assert (self.owned_locks(locking.LEVEL_NODE) ==
11801 self.owned_locks(locking.LEVEL_NODE_RES))
11803 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11805 raise errors.OpExecError("Cannot activate block device to grow")
11807 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11808 (self.op.disk, instance.name,
11809 utils.FormatUnit(self.delta, "h"),
11810 utils.FormatUnit(self.target, "h")))
11812 # First run all grow ops in dry-run mode
11813 for node in instance.all_nodes:
11814 self.cfg.SetDiskID(disk, node)
11815 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11817 result.Raise("Grow request failed to node %s" % node)
11819 # We know that (as far as we can test) operations across different
11820 # nodes will succeed, time to run it for real on the backing storage
11821 for node in instance.all_nodes:
11822 self.cfg.SetDiskID(disk, node)
11823 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11825 result.Raise("Grow request failed to node %s" % node)
11827 # And now execute it for logical storage, on the primary node
11828 node = instance.primary_node
11829 self.cfg.SetDiskID(disk, node)
11830 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11832 result.Raise("Grow request failed to node %s" % node)
11834 disk.RecordGrow(self.delta)
11835 self.cfg.Update(instance, feedback_fn)
11837 # Changes have been recorded, release node lock
11838 _ReleaseLocks(self, locking.LEVEL_NODE)
11840 # Downgrade lock while waiting for sync
11841 self.glm.downgrade(locking.LEVEL_INSTANCE)
11843 if self.op.wait_for_sync:
11844 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11846 self.proc.LogWarning("Disk sync-ing has not returned a good"
11847 " status; please check the instance")
11848 if instance.admin_state != constants.ADMINST_UP:
11849 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11850 elif instance.admin_state != constants.ADMINST_UP:
11851 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11852 " not supposed to be running because no wait for"
11853 " sync mode was requested")
11855 assert self.owned_locks(locking.LEVEL_NODE_RES)
11856 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11859 class LUInstanceQueryData(NoHooksLU):
11860 """Query runtime instance data.
11865 def ExpandNames(self):
11866 self.needed_locks = {}
11868 # Use locking if requested or when non-static information is wanted
11869 if not (self.op.static or self.op.use_locking):
11870 self.LogWarning("Non-static data requested, locks need to be acquired")
11871 self.op.use_locking = True
11873 if self.op.instances or not self.op.use_locking:
11874 # Expand instance names right here
11875 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11877 # Will use acquired locks
11878 self.wanted_names = None
11880 if self.op.use_locking:
11881 self.share_locks = _ShareAll()
11883 if self.wanted_names is None:
11884 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11886 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11888 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11889 self.needed_locks[locking.LEVEL_NODE] = []
11890 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11892 def DeclareLocks(self, level):
11893 if self.op.use_locking:
11894 if level == locking.LEVEL_NODEGROUP:
11895 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11897 # Lock all groups used by instances optimistically; this requires going
11898 # via the node before it's locked, requiring verification later on
11899 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11900 frozenset(group_uuid
11901 for instance_name in owned_instances
11903 self.cfg.GetInstanceNodeGroups(instance_name))
11905 elif level == locking.LEVEL_NODE:
11906 self._LockInstancesNodes()
11908 def CheckPrereq(self):
11909 """Check prerequisites.
11911 This only checks the optional instance list against the existing names.
11914 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11915 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11916 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11918 if self.wanted_names is None:
11919 assert self.op.use_locking, "Locking was not used"
11920 self.wanted_names = owned_instances
11922 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11924 if self.op.use_locking:
11925 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11928 assert not (owned_instances or owned_groups or owned_nodes)
11930 self.wanted_instances = instances.values()
11932 def _ComputeBlockdevStatus(self, node, instance, dev):
11933 """Returns the status of a block device
11936 if self.op.static or not node:
11939 self.cfg.SetDiskID(dev, node)
11941 result = self.rpc.call_blockdev_find(node, dev)
11945 result.Raise("Can't compute disk status for %s" % instance.name)
11947 status = result.payload
11951 return (status.dev_path, status.major, status.minor,
11952 status.sync_percent, status.estimated_time,
11953 status.is_degraded, status.ldisk_status)
11955 def _ComputeDiskStatus(self, instance, snode, dev):
11956 """Compute block device status.
11959 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11961 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11963 def _ComputeDiskStatusInner(self, instance, snode, dev):
11964 """Compute block device status.
11966 @attention: The device has to be annotated already.
11969 if dev.dev_type in constants.LDS_DRBD:
11970 # we change the snode then (otherwise we use the one passed in)
11971 if dev.logical_id[0] == instance.primary_node:
11972 snode = dev.logical_id[1]
11974 snode = dev.logical_id[0]
11976 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11978 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11981 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11988 "iv_name": dev.iv_name,
11989 "dev_type": dev.dev_type,
11990 "logical_id": dev.logical_id,
11991 "physical_id": dev.physical_id,
11992 "pstatus": dev_pstatus,
11993 "sstatus": dev_sstatus,
11994 "children": dev_children,
11999 def Exec(self, feedback_fn):
12000 """Gather and return data"""
12003 cluster = self.cfg.GetClusterInfo()
12005 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12006 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12008 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12009 for node in nodes.values()))
12011 group2name_fn = lambda uuid: groups[uuid].name
12013 for instance in self.wanted_instances:
12014 pnode = nodes[instance.primary_node]
12016 if self.op.static or pnode.offline:
12017 remote_state = None
12019 self.LogWarning("Primary node %s is marked offline, returning static"
12020 " information only for instance %s" %
12021 (pnode.name, instance.name))
12023 remote_info = self.rpc.call_instance_info(instance.primary_node,
12025 instance.hypervisor)
12026 remote_info.Raise("Error checking node %s" % instance.primary_node)
12027 remote_info = remote_info.payload
12028 if remote_info and "state" in remote_info:
12029 remote_state = "up"
12031 if instance.admin_state == constants.ADMINST_UP:
12032 remote_state = "down"
12034 remote_state = instance.admin_state
12036 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12039 snodes_group_uuids = [nodes[snode_name].group
12040 for snode_name in instance.secondary_nodes]
12042 result[instance.name] = {
12043 "name": instance.name,
12044 "config_state": instance.admin_state,
12045 "run_state": remote_state,
12046 "pnode": instance.primary_node,
12047 "pnode_group_uuid": pnode.group,
12048 "pnode_group_name": group2name_fn(pnode.group),
12049 "snodes": instance.secondary_nodes,
12050 "snodes_group_uuids": snodes_group_uuids,
12051 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12053 # this happens to be the same format used for hooks
12054 "nics": _NICListToTuple(self, instance.nics),
12055 "disk_template": instance.disk_template,
12057 "hypervisor": instance.hypervisor,
12058 "network_port": instance.network_port,
12059 "hv_instance": instance.hvparams,
12060 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12061 "be_instance": instance.beparams,
12062 "be_actual": cluster.FillBE(instance),
12063 "os_instance": instance.osparams,
12064 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12065 "serial_no": instance.serial_no,
12066 "mtime": instance.mtime,
12067 "ctime": instance.ctime,
12068 "uuid": instance.uuid,
12074 def PrepareContainerMods(mods, private_fn):
12075 """Prepares a list of container modifications by adding a private data field.
12077 @type mods: list of tuples; (operation, index, parameters)
12078 @param mods: List of modifications
12079 @type private_fn: callable or None
12080 @param private_fn: Callable for constructing a private data field for a
12085 if private_fn is None:
12090 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12093 #: Type description for changes as returned by L{ApplyContainerMods}'s
12095 _TApplyContModsCbChanges = \
12096 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12097 ht.TNonEmptyString,
12102 def ApplyContainerMods(kind, container, chgdesc, mods,
12103 create_fn, modify_fn, remove_fn):
12104 """Applies descriptions in C{mods} to C{container}.
12107 @param kind: One-word item description
12108 @type container: list
12109 @param container: Container to modify
12110 @type chgdesc: None or list
12111 @param chgdesc: List of applied changes
12113 @param mods: Modifications as returned by L{PrepareContainerMods}
12114 @type create_fn: callable
12115 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12116 receives absolute item index, parameters and private data object as added
12117 by L{PrepareContainerMods}, returns tuple containing new item and changes
12119 @type modify_fn: callable
12120 @param modify_fn: Callback for modifying an existing item
12121 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12122 and private data object as added by L{PrepareContainerMods}, returns
12124 @type remove_fn: callable
12125 @param remove_fn: Callback on removing item; receives absolute item index,
12126 item and private data object as added by L{PrepareContainerMods}
12129 for (op, idx, params, private) in mods:
12132 absidx = len(container) - 1
12134 raise IndexError("Not accepting negative indices other than -1")
12135 elif idx > len(container):
12136 raise IndexError("Got %s index %s, but there are only %s" %
12137 (kind, idx, len(container)))
12143 if op == constants.DDM_ADD:
12144 # Calculate where item will be added
12146 addidx = len(container)
12150 if create_fn is None:
12153 (item, changes) = create_fn(addidx, params, private)
12156 container.append(item)
12159 assert idx <= len(container)
12160 # list.insert does so before the specified index
12161 container.insert(idx, item)
12163 # Retrieve existing item
12165 item = container[absidx]
12167 raise IndexError("Invalid %s index %s" % (kind, idx))
12169 if op == constants.DDM_REMOVE:
12172 if remove_fn is not None:
12173 remove_fn(absidx, item, private)
12175 changes = [("%s/%s" % (kind, absidx), "remove")]
12177 assert container[absidx] == item
12178 del container[absidx]
12179 elif op == constants.DDM_MODIFY:
12180 if modify_fn is not None:
12181 changes = modify_fn(absidx, item, params, private)
12183 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12185 assert _TApplyContModsCbChanges(changes)
12187 if not (chgdesc is None or changes is None):
12188 chgdesc.extend(changes)
12191 def _UpdateIvNames(base_index, disks):
12192 """Updates the C{iv_name} attribute of disks.
12194 @type disks: list of L{objects.Disk}
12197 for (idx, disk) in enumerate(disks):
12198 disk.iv_name = "disk/%s" % (base_index + idx, )
12201 class _InstNicModPrivate:
12202 """Data structure for network interface modifications.
12204 Used by L{LUInstanceSetParams}.
12207 def __init__(self):
12212 class LUInstanceSetParams(LogicalUnit):
12213 """Modifies an instances's parameters.
12216 HPATH = "instance-modify"
12217 HTYPE = constants.HTYPE_INSTANCE
12221 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12222 assert ht.TList(mods)
12223 assert not mods or len(mods[0]) in (2, 3)
12225 if mods and len(mods[0]) == 2:
12229 for op, params in mods:
12230 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12231 result.append((op, -1, params))
12235 raise errors.OpPrereqError("Only one %s add or remove operation is"
12236 " supported at a time" % kind,
12237 errors.ECODE_INVAL)
12239 result.append((constants.DDM_MODIFY, op, params))
12241 assert verify_fn(result)
12248 def _CheckMods(kind, mods, key_types, item_fn):
12249 """Ensures requested disk/NIC modifications are valid.
12252 for (op, _, params) in mods:
12253 assert ht.TDict(params)
12255 utils.ForceDictType(params, key_types)
12257 if op == constants.DDM_REMOVE:
12259 raise errors.OpPrereqError("No settings should be passed when"
12260 " removing a %s" % kind,
12261 errors.ECODE_INVAL)
12262 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12263 item_fn(op, params)
12265 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12268 def _VerifyDiskModification(op, params):
12269 """Verifies a disk modification.
12272 if op == constants.DDM_ADD:
12273 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12274 if mode not in constants.DISK_ACCESS_SET:
12275 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12276 errors.ECODE_INVAL)
12278 size = params.get(constants.IDISK_SIZE, None)
12280 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12281 constants.IDISK_SIZE, errors.ECODE_INVAL)
12285 except (TypeError, ValueError), err:
12286 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12287 errors.ECODE_INVAL)
12289 params[constants.IDISK_SIZE] = size
12291 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12292 raise errors.OpPrereqError("Disk size change not possible, use"
12293 " grow-disk", errors.ECODE_INVAL)
12296 def _VerifyNicModification(op, params):
12297 """Verifies a network interface modification.
12300 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12301 ip = params.get(constants.INIC_IP, None)
12304 elif ip.lower() == constants.VALUE_NONE:
12305 params[constants.INIC_IP] = None
12306 elif not netutils.IPAddress.IsValid(ip):
12307 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12308 errors.ECODE_INVAL)
12310 bridge = params.get("bridge", None)
12311 link = params.get(constants.INIC_LINK, None)
12312 if bridge and link:
12313 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12314 " at the same time", errors.ECODE_INVAL)
12315 elif bridge and bridge.lower() == constants.VALUE_NONE:
12316 params["bridge"] = None
12317 elif link and link.lower() == constants.VALUE_NONE:
12318 params[constants.INIC_LINK] = None
12320 if op == constants.DDM_ADD:
12321 macaddr = params.get(constants.INIC_MAC, None)
12322 if macaddr is None:
12323 params[constants.INIC_MAC] = constants.VALUE_AUTO
12325 if constants.INIC_MAC in params:
12326 macaddr = params[constants.INIC_MAC]
12327 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12328 macaddr = utils.NormalizeAndValidateMac(macaddr)
12330 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12331 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12332 " modifying an existing NIC",
12333 errors.ECODE_INVAL)
12335 def CheckArguments(self):
12336 if not (self.op.nics or self.op.disks or self.op.disk_template or
12337 self.op.hvparams or self.op.beparams or self.op.os_name or
12338 self.op.offline is not None or self.op.runtime_mem):
12339 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12341 if self.op.hvparams:
12342 _CheckGlobalHvParams(self.op.hvparams)
12344 self.op.disks = self._UpgradeDiskNicMods(
12345 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12346 self.op.nics = self._UpgradeDiskNicMods(
12347 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12349 # Check disk modifications
12350 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12351 self._VerifyDiskModification)
12353 if self.op.disks and self.op.disk_template is not None:
12354 raise errors.OpPrereqError("Disk template conversion and other disk"
12355 " changes not supported at the same time",
12356 errors.ECODE_INVAL)
12358 if (self.op.disk_template and
12359 self.op.disk_template in constants.DTS_INT_MIRROR and
12360 self.op.remote_node is None):
12361 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12362 " one requires specifying a secondary node",
12363 errors.ECODE_INVAL)
12365 # Check NIC modifications
12366 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12367 self._VerifyNicModification)
12369 def ExpandNames(self):
12370 self._ExpandAndLockInstance()
12371 # Can't even acquire node locks in shared mode as upcoming changes in
12372 # Ganeti 2.6 will start to modify the node object on disk conversion
12373 self.needed_locks[locking.LEVEL_NODE] = []
12374 self.needed_locks[locking.LEVEL_NODE_RES] = []
12375 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12377 def DeclareLocks(self, level):
12378 # TODO: Acquire group lock in shared mode (disk parameters)
12379 if level == locking.LEVEL_NODE:
12380 self._LockInstancesNodes()
12381 if self.op.disk_template and self.op.remote_node:
12382 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12383 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12384 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12386 self.needed_locks[locking.LEVEL_NODE_RES] = \
12387 self.needed_locks[locking.LEVEL_NODE][:]
12389 def BuildHooksEnv(self):
12390 """Build hooks env.
12392 This runs on the master, primary and secondaries.
12396 if constants.BE_MINMEM in self.be_new:
12397 args["minmem"] = self.be_new[constants.BE_MINMEM]
12398 if constants.BE_MAXMEM in self.be_new:
12399 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12400 if constants.BE_VCPUS in self.be_new:
12401 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12402 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12403 # information at all.
12405 if self._new_nics is not None:
12408 for nic in self._new_nics:
12409 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12410 mode = nicparams[constants.NIC_MODE]
12411 link = nicparams[constants.NIC_LINK]
12412 nics.append((nic.ip, nic.mac, mode, link))
12414 args["nics"] = nics
12416 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12417 if self.op.disk_template:
12418 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12419 if self.op.runtime_mem:
12420 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12424 def BuildHooksNodes(self):
12425 """Build hooks nodes.
12428 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12431 def _PrepareNicModification(self, params, private, old_ip, old_params,
12433 update_params_dict = dict([(key, params[key])
12434 for key in constants.NICS_PARAMETERS
12437 if "bridge" in params:
12438 update_params_dict[constants.NIC_LINK] = params["bridge"]
12440 new_params = _GetUpdatedParams(old_params, update_params_dict)
12441 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12443 new_filled_params = cluster.SimpleFillNIC(new_params)
12444 objects.NIC.CheckParameterSyntax(new_filled_params)
12446 new_mode = new_filled_params[constants.NIC_MODE]
12447 if new_mode == constants.NIC_MODE_BRIDGED:
12448 bridge = new_filled_params[constants.NIC_LINK]
12449 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12451 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12453 self.warn.append(msg)
12455 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12457 elif new_mode == constants.NIC_MODE_ROUTED:
12458 ip = params.get(constants.INIC_IP, old_ip)
12460 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12461 " on a routed NIC", errors.ECODE_INVAL)
12463 if constants.INIC_MAC in params:
12464 mac = params[constants.INIC_MAC]
12466 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12467 errors.ECODE_INVAL)
12468 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12469 # otherwise generate the MAC address
12470 params[constants.INIC_MAC] = \
12471 self.cfg.GenerateMAC(self.proc.GetECId())
12473 # or validate/reserve the current one
12475 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12476 except errors.ReservationError:
12477 raise errors.OpPrereqError("MAC address '%s' already in use"
12478 " in cluster" % mac,
12479 errors.ECODE_NOTUNIQUE)
12481 private.params = new_params
12482 private.filled = new_filled_params
12484 def CheckPrereq(self):
12485 """Check prerequisites.
12487 This only checks the instance list against the existing names.
12490 # checking the new params on the primary/secondary nodes
12492 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12493 cluster = self.cluster = self.cfg.GetClusterInfo()
12494 assert self.instance is not None, \
12495 "Cannot retrieve locked instance %s" % self.op.instance_name
12496 pnode = instance.primary_node
12497 nodelist = list(instance.all_nodes)
12498 pnode_info = self.cfg.GetNodeInfo(pnode)
12499 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12501 # Prepare disk/NIC modifications
12502 self.diskmod = PrepareContainerMods(self.op.disks, None)
12503 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12506 if self.op.os_name and not self.op.force:
12507 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12508 self.op.force_variant)
12509 instance_os = self.op.os_name
12511 instance_os = instance.os
12513 assert not (self.op.disk_template and self.op.disks), \
12514 "Can't modify disk template and apply disk changes at the same time"
12516 if self.op.disk_template:
12517 if instance.disk_template == self.op.disk_template:
12518 raise errors.OpPrereqError("Instance already has disk template %s" %
12519 instance.disk_template, errors.ECODE_INVAL)
12521 if (instance.disk_template,
12522 self.op.disk_template) not in self._DISK_CONVERSIONS:
12523 raise errors.OpPrereqError("Unsupported disk template conversion from"
12524 " %s to %s" % (instance.disk_template,
12525 self.op.disk_template),
12526 errors.ECODE_INVAL)
12527 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12528 msg="cannot change disk template")
12529 if self.op.disk_template in constants.DTS_INT_MIRROR:
12530 if self.op.remote_node == pnode:
12531 raise errors.OpPrereqError("Given new secondary node %s is the same"
12532 " as the primary node of the instance" %
12533 self.op.remote_node, errors.ECODE_STATE)
12534 _CheckNodeOnline(self, self.op.remote_node)
12535 _CheckNodeNotDrained(self, self.op.remote_node)
12536 # FIXME: here we assume that the old instance type is DT_PLAIN
12537 assert instance.disk_template == constants.DT_PLAIN
12538 disks = [{constants.IDISK_SIZE: d.size,
12539 constants.IDISK_VG: d.logical_id[0]}
12540 for d in instance.disks]
12541 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12542 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12544 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12545 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12546 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12548 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12549 ignore=self.op.ignore_ipolicy)
12550 if pnode_info.group != snode_info.group:
12551 self.LogWarning("The primary and secondary nodes are in two"
12552 " different node groups; the disk parameters"
12553 " from the first disk's node group will be"
12556 # hvparams processing
12557 if self.op.hvparams:
12558 hv_type = instance.hypervisor
12559 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12560 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12561 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12564 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12565 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12566 self.hv_proposed = self.hv_new = hv_new # the new actual values
12567 self.hv_inst = i_hvdict # the new dict (without defaults)
12569 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12571 self.hv_new = self.hv_inst = {}
12573 # beparams processing
12574 if self.op.beparams:
12575 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12577 objects.UpgradeBeParams(i_bedict)
12578 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12579 be_new = cluster.SimpleFillBE(i_bedict)
12580 self.be_proposed = self.be_new = be_new # the new actual values
12581 self.be_inst = i_bedict # the new dict (without defaults)
12583 self.be_new = self.be_inst = {}
12584 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12585 be_old = cluster.FillBE(instance)
12587 # CPU param validation -- checking every time a parameter is
12588 # changed to cover all cases where either CPU mask or vcpus have
12590 if (constants.BE_VCPUS in self.be_proposed and
12591 constants.HV_CPU_MASK in self.hv_proposed):
12593 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12594 # Verify mask is consistent with number of vCPUs. Can skip this
12595 # test if only 1 entry in the CPU mask, which means same mask
12596 # is applied to all vCPUs.
12597 if (len(cpu_list) > 1 and
12598 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12599 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12601 (self.be_proposed[constants.BE_VCPUS],
12602 self.hv_proposed[constants.HV_CPU_MASK]),
12603 errors.ECODE_INVAL)
12605 # Only perform this test if a new CPU mask is given
12606 if constants.HV_CPU_MASK in self.hv_new:
12607 # Calculate the largest CPU number requested
12608 max_requested_cpu = max(map(max, cpu_list))
12609 # Check that all of the instance's nodes have enough physical CPUs to
12610 # satisfy the requested CPU mask
12611 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12612 max_requested_cpu + 1, instance.hypervisor)
12614 # osparams processing
12615 if self.op.osparams:
12616 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12617 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12618 self.os_inst = i_osdict # the new dict (without defaults)
12624 #TODO(dynmem): do the appropriate check involving MINMEM
12625 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12626 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12627 mem_check_list = [pnode]
12628 if be_new[constants.BE_AUTO_BALANCE]:
12629 # either we changed auto_balance to yes or it was from before
12630 mem_check_list.extend(instance.secondary_nodes)
12631 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12632 instance.hypervisor)
12633 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12634 [instance.hypervisor])
12635 pninfo = nodeinfo[pnode]
12636 msg = pninfo.fail_msg
12638 # Assume the primary node is unreachable and go ahead
12639 self.warn.append("Can't get info from primary node %s: %s" %
12642 (_, _, (pnhvinfo, )) = pninfo.payload
12643 if not isinstance(pnhvinfo.get("memory_free", None), int):
12644 self.warn.append("Node data from primary node %s doesn't contain"
12645 " free memory information" % pnode)
12646 elif instance_info.fail_msg:
12647 self.warn.append("Can't get instance runtime information: %s" %
12648 instance_info.fail_msg)
12650 if instance_info.payload:
12651 current_mem = int(instance_info.payload["memory"])
12653 # Assume instance not running
12654 # (there is a slight race condition here, but it's not very
12655 # probable, and we have no other way to check)
12656 # TODO: Describe race condition
12658 #TODO(dynmem): do the appropriate check involving MINMEM
12659 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12660 pnhvinfo["memory_free"])
12662 raise errors.OpPrereqError("This change will prevent the instance"
12663 " from starting, due to %d MB of memory"
12664 " missing on its primary node" %
12665 miss_mem, errors.ECODE_NORES)
12667 if be_new[constants.BE_AUTO_BALANCE]:
12668 for node, nres in nodeinfo.items():
12669 if node not in instance.secondary_nodes:
12671 nres.Raise("Can't get info from secondary node %s" % node,
12672 prereq=True, ecode=errors.ECODE_STATE)
12673 (_, _, (nhvinfo, )) = nres.payload
12674 if not isinstance(nhvinfo.get("memory_free", None), int):
12675 raise errors.OpPrereqError("Secondary node %s didn't return free"
12676 " memory information" % node,
12677 errors.ECODE_STATE)
12678 #TODO(dynmem): do the appropriate check involving MINMEM
12679 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12680 raise errors.OpPrereqError("This change will prevent the instance"
12681 " from failover to its secondary node"
12682 " %s, due to not enough memory" % node,
12683 errors.ECODE_STATE)
12685 if self.op.runtime_mem:
12686 remote_info = self.rpc.call_instance_info(instance.primary_node,
12688 instance.hypervisor)
12689 remote_info.Raise("Error checking node %s" % instance.primary_node)
12690 if not remote_info.payload: # not running already
12691 raise errors.OpPrereqError("Instance %s is not running" %
12692 instance.name, errors.ECODE_STATE)
12694 current_memory = remote_info.payload["memory"]
12695 if (not self.op.force and
12696 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12697 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12698 raise errors.OpPrereqError("Instance %s must have memory between %d"
12699 " and %d MB of memory unless --force is"
12702 self.be_proposed[constants.BE_MINMEM],
12703 self.be_proposed[constants.BE_MAXMEM]),
12704 errors.ECODE_INVAL)
12706 if self.op.runtime_mem > current_memory:
12707 _CheckNodeFreeMemory(self, instance.primary_node,
12708 "ballooning memory for instance %s" %
12710 self.op.memory - current_memory,
12711 instance.hypervisor)
12713 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12714 raise errors.OpPrereqError("Disk operations not supported for"
12715 " diskless instances", errors.ECODE_INVAL)
12717 def _PrepareNicCreate(_, params, private):
12718 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12719 return (None, None)
12721 def _PrepareNicMod(_, nic, params, private):
12722 self._PrepareNicModification(params, private, nic.ip,
12723 nic.nicparams, cluster, pnode)
12726 # Verify NIC changes (operating on copy)
12727 nics = instance.nics[:]
12728 ApplyContainerMods("NIC", nics, None, self.nicmod,
12729 _PrepareNicCreate, _PrepareNicMod, None)
12730 if len(nics) > constants.MAX_NICS:
12731 raise errors.OpPrereqError("Instance has too many network interfaces"
12732 " (%d), cannot add more" % constants.MAX_NICS,
12733 errors.ECODE_STATE)
12735 # Verify disk changes (operating on a copy)
12736 disks = instance.disks[:]
12737 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12738 if len(disks) > constants.MAX_DISKS:
12739 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12740 " more" % constants.MAX_DISKS,
12741 errors.ECODE_STATE)
12743 if self.op.offline is not None:
12744 if self.op.offline:
12745 msg = "can't change to offline"
12747 msg = "can't change to online"
12748 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12750 # Pre-compute NIC changes (necessary to use result in hooks)
12751 self._nic_chgdesc = []
12753 # Operate on copies as this is still in prereq
12754 nics = [nic.Copy() for nic in instance.nics]
12755 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12756 self._CreateNewNic, self._ApplyNicMods, None)
12757 self._new_nics = nics
12759 self._new_nics = None
12761 def _ConvertPlainToDrbd(self, feedback_fn):
12762 """Converts an instance from plain to drbd.
12765 feedback_fn("Converting template to drbd")
12766 instance = self.instance
12767 pnode = instance.primary_node
12768 snode = self.op.remote_node
12770 assert instance.disk_template == constants.DT_PLAIN
12772 # create a fake disk info for _GenerateDiskTemplate
12773 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12774 constants.IDISK_VG: d.logical_id[0]}
12775 for d in instance.disks]
12776 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12777 instance.name, pnode, [snode],
12778 disk_info, None, None, 0, feedback_fn,
12780 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12782 info = _GetInstanceInfoText(instance)
12783 feedback_fn("Creating additional volumes...")
12784 # first, create the missing data and meta devices
12785 for disk in anno_disks:
12786 # unfortunately this is... not too nice
12787 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12789 for child in disk.children:
12790 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12791 # at this stage, all new LVs have been created, we can rename the
12793 feedback_fn("Renaming original volumes...")
12794 rename_list = [(o, n.children[0].logical_id)
12795 for (o, n) in zip(instance.disks, new_disks)]
12796 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12797 result.Raise("Failed to rename original LVs")
12799 feedback_fn("Initializing DRBD devices...")
12800 # all child devices are in place, we can now create the DRBD devices
12801 for disk in anno_disks:
12802 for node in [pnode, snode]:
12803 f_create = node == pnode
12804 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12806 # at this point, the instance has been modified
12807 instance.disk_template = constants.DT_DRBD8
12808 instance.disks = new_disks
12809 self.cfg.Update(instance, feedback_fn)
12811 # Release node locks while waiting for sync
12812 _ReleaseLocks(self, locking.LEVEL_NODE)
12814 # disks are created, waiting for sync
12815 disk_abort = not _WaitForSync(self, instance,
12816 oneshot=not self.op.wait_for_sync)
12818 raise errors.OpExecError("There are some degraded disks for"
12819 " this instance, please cleanup manually")
12821 # Node resource locks will be released by caller
12823 def _ConvertDrbdToPlain(self, feedback_fn):
12824 """Converts an instance from drbd to plain.
12827 instance = self.instance
12829 assert len(instance.secondary_nodes) == 1
12830 assert instance.disk_template == constants.DT_DRBD8
12832 pnode = instance.primary_node
12833 snode = instance.secondary_nodes[0]
12834 feedback_fn("Converting template to plain")
12836 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12837 new_disks = [d.children[0] for d in instance.disks]
12839 # copy over size and mode
12840 for parent, child in zip(old_disks, new_disks):
12841 child.size = parent.size
12842 child.mode = parent.mode
12844 # this is a DRBD disk, return its port to the pool
12845 # NOTE: this must be done right before the call to cfg.Update!
12846 for disk in old_disks:
12847 tcp_port = disk.logical_id[2]
12848 self.cfg.AddTcpUdpPort(tcp_port)
12850 # update instance structure
12851 instance.disks = new_disks
12852 instance.disk_template = constants.DT_PLAIN
12853 self.cfg.Update(instance, feedback_fn)
12855 # Release locks in case removing disks takes a while
12856 _ReleaseLocks(self, locking.LEVEL_NODE)
12858 feedback_fn("Removing volumes on the secondary node...")
12859 for disk in old_disks:
12860 self.cfg.SetDiskID(disk, snode)
12861 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12863 self.LogWarning("Could not remove block device %s on node %s,"
12864 " continuing anyway: %s", disk.iv_name, snode, msg)
12866 feedback_fn("Removing unneeded volumes on the primary node...")
12867 for idx, disk in enumerate(old_disks):
12868 meta = disk.children[1]
12869 self.cfg.SetDiskID(meta, pnode)
12870 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12872 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12873 " continuing anyway: %s", idx, pnode, msg)
12875 def _CreateNewDisk(self, idx, params, _):
12876 """Creates a new disk.
12879 instance = self.instance
12882 if instance.disk_template in constants.DTS_FILEBASED:
12883 (file_driver, file_path) = instance.disks[0].logical_id
12884 file_path = os.path.dirname(file_path)
12886 file_driver = file_path = None
12889 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12890 instance.primary_node, instance.secondary_nodes,
12891 [params], file_path, file_driver, idx,
12892 self.Log, self.diskparams)[0]
12894 info = _GetInstanceInfoText(instance)
12896 logging.info("Creating volume %s for instance %s",
12897 disk.iv_name, instance.name)
12898 # Note: this needs to be kept in sync with _CreateDisks
12900 for node in instance.all_nodes:
12901 f_create = (node == instance.primary_node)
12903 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12904 except errors.OpExecError, err:
12905 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12906 disk.iv_name, disk, node, err)
12909 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12913 def _ModifyDisk(idx, disk, params, _):
12914 """Modifies a disk.
12917 disk.mode = params[constants.IDISK_MODE]
12920 ("disk.mode/%d" % idx, disk.mode),
12923 def _RemoveDisk(self, idx, root, _):
12927 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12928 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12929 self.cfg.SetDiskID(disk, node)
12930 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12932 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12933 " continuing anyway", idx, node, msg)
12935 # if this is a DRBD disk, return its port to the pool
12936 if root.dev_type in constants.LDS_DRBD:
12937 self.cfg.AddTcpUdpPort(root.logical_id[2])
12940 def _CreateNewNic(idx, params, private):
12941 """Creates data structure for a new network interface.
12944 mac = params[constants.INIC_MAC]
12945 ip = params.get(constants.INIC_IP, None)
12946 nicparams = private.params
12948 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12950 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12951 (mac, ip, private.filled[constants.NIC_MODE],
12952 private.filled[constants.NIC_LINK])),
12956 def _ApplyNicMods(idx, nic, params, private):
12957 """Modifies a network interface.
12962 for key in [constants.INIC_MAC, constants.INIC_IP]:
12964 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12965 setattr(nic, key, params[key])
12968 nic.nicparams = private.params
12970 for (key, val) in params.items():
12971 changes.append(("nic.%s/%d" % (key, idx), val))
12975 def Exec(self, feedback_fn):
12976 """Modifies an instance.
12978 All parameters take effect only at the next restart of the instance.
12981 # Process here the warnings from CheckPrereq, as we don't have a
12982 # feedback_fn there.
12983 # TODO: Replace with self.LogWarning
12984 for warn in self.warn:
12985 feedback_fn("WARNING: %s" % warn)
12987 assert ((self.op.disk_template is None) ^
12988 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12989 "Not owning any node resource locks"
12992 instance = self.instance
12995 if self.op.runtime_mem:
12996 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12998 self.op.runtime_mem)
12999 rpcres.Raise("Cannot modify instance runtime memory")
13000 result.append(("runtime_memory", self.op.runtime_mem))
13002 # Apply disk changes
13003 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13004 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13005 _UpdateIvNames(0, instance.disks)
13007 if self.op.disk_template:
13009 check_nodes = set(instance.all_nodes)
13010 if self.op.remote_node:
13011 check_nodes.add(self.op.remote_node)
13012 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13013 owned = self.owned_locks(level)
13014 assert not (check_nodes - owned), \
13015 ("Not owning the correct locks, owning %r, expected at least %r" %
13016 (owned, check_nodes))
13018 r_shut = _ShutdownInstanceDisks(self, instance)
13020 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13021 " proceed with disk template conversion")
13022 mode = (instance.disk_template, self.op.disk_template)
13024 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13026 self.cfg.ReleaseDRBDMinors(instance.name)
13028 result.append(("disk_template", self.op.disk_template))
13030 assert instance.disk_template == self.op.disk_template, \
13031 ("Expected disk template '%s', found '%s'" %
13032 (self.op.disk_template, instance.disk_template))
13034 # Release node and resource locks if there are any (they might already have
13035 # been released during disk conversion)
13036 _ReleaseLocks(self, locking.LEVEL_NODE)
13037 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13039 # Apply NIC changes
13040 if self._new_nics is not None:
13041 instance.nics = self._new_nics
13042 result.extend(self._nic_chgdesc)
13045 if self.op.hvparams:
13046 instance.hvparams = self.hv_inst
13047 for key, val in self.op.hvparams.iteritems():
13048 result.append(("hv/%s" % key, val))
13051 if self.op.beparams:
13052 instance.beparams = self.be_inst
13053 for key, val in self.op.beparams.iteritems():
13054 result.append(("be/%s" % key, val))
13057 if self.op.os_name:
13058 instance.os = self.op.os_name
13061 if self.op.osparams:
13062 instance.osparams = self.os_inst
13063 for key, val in self.op.osparams.iteritems():
13064 result.append(("os/%s" % key, val))
13066 if self.op.offline is None:
13069 elif self.op.offline:
13070 # Mark instance as offline
13071 self.cfg.MarkInstanceOffline(instance.name)
13072 result.append(("admin_state", constants.ADMINST_OFFLINE))
13074 # Mark instance as online, but stopped
13075 self.cfg.MarkInstanceDown(instance.name)
13076 result.append(("admin_state", constants.ADMINST_DOWN))
13078 self.cfg.Update(instance, feedback_fn)
13080 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13081 self.owned_locks(locking.LEVEL_NODE)), \
13082 "All node locks should have been released by now"
13086 _DISK_CONVERSIONS = {
13087 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13088 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13092 class LUInstanceChangeGroup(LogicalUnit):
13093 HPATH = "instance-change-group"
13094 HTYPE = constants.HTYPE_INSTANCE
13097 def ExpandNames(self):
13098 self.share_locks = _ShareAll()
13099 self.needed_locks = {
13100 locking.LEVEL_NODEGROUP: [],
13101 locking.LEVEL_NODE: [],
13104 self._ExpandAndLockInstance()
13106 if self.op.target_groups:
13107 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13108 self.op.target_groups)
13110 self.req_target_uuids = None
13112 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13114 def DeclareLocks(self, level):
13115 if level == locking.LEVEL_NODEGROUP:
13116 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13118 if self.req_target_uuids:
13119 lock_groups = set(self.req_target_uuids)
13121 # Lock all groups used by instance optimistically; this requires going
13122 # via the node before it's locked, requiring verification later on
13123 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13124 lock_groups.update(instance_groups)
13126 # No target groups, need to lock all of them
13127 lock_groups = locking.ALL_SET
13129 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13131 elif level == locking.LEVEL_NODE:
13132 if self.req_target_uuids:
13133 # Lock all nodes used by instances
13134 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13135 self._LockInstancesNodes()
13137 # Lock all nodes in all potential target groups
13138 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13139 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13140 member_nodes = [node_name
13141 for group in lock_groups
13142 for node_name in self.cfg.GetNodeGroup(group).members]
13143 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13145 # Lock all nodes as all groups are potential targets
13146 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13148 def CheckPrereq(self):
13149 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13150 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13151 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13153 assert (self.req_target_uuids is None or
13154 owned_groups.issuperset(self.req_target_uuids))
13155 assert owned_instances == set([self.op.instance_name])
13157 # Get instance information
13158 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13160 # Check if node groups for locked instance are still correct
13161 assert owned_nodes.issuperset(self.instance.all_nodes), \
13162 ("Instance %s's nodes changed while we kept the lock" %
13163 self.op.instance_name)
13165 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13168 if self.req_target_uuids:
13169 # User requested specific target groups
13170 self.target_uuids = frozenset(self.req_target_uuids)
13172 # All groups except those used by the instance are potential targets
13173 self.target_uuids = owned_groups - inst_groups
13175 conflicting_groups = self.target_uuids & inst_groups
13176 if conflicting_groups:
13177 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13178 " used by the instance '%s'" %
13179 (utils.CommaJoin(conflicting_groups),
13180 self.op.instance_name),
13181 errors.ECODE_INVAL)
13183 if not self.target_uuids:
13184 raise errors.OpPrereqError("There are no possible target groups",
13185 errors.ECODE_INVAL)
13187 def BuildHooksEnv(self):
13188 """Build hooks env.
13191 assert self.target_uuids
13194 "TARGET_GROUPS": " ".join(self.target_uuids),
13197 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13201 def BuildHooksNodes(self):
13202 """Build hooks nodes.
13205 mn = self.cfg.GetMasterNode()
13206 return ([mn], [mn])
13208 def Exec(self, feedback_fn):
13209 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13211 assert instances == [self.op.instance_name], "Instance not locked"
13213 req = iallocator.IAReqGroupChange(instances=instances,
13214 target_groups=list(self.target_uuids))
13215 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13217 ial.Run(self.op.iallocator)
13219 if not ial.success:
13220 raise errors.OpPrereqError("Can't compute solution for changing group of"
13221 " instance '%s' using iallocator '%s': %s" %
13222 (self.op.instance_name, self.op.iallocator,
13223 ial.info), errors.ECODE_NORES)
13225 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13227 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13228 " instance '%s'", len(jobs), self.op.instance_name)
13230 return ResultWithJobs(jobs)
13233 class LUBackupQuery(NoHooksLU):
13234 """Query the exports list
13239 def CheckArguments(self):
13240 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13241 ["node", "export"], self.op.use_locking)
13243 def ExpandNames(self):
13244 self.expq.ExpandNames(self)
13246 def DeclareLocks(self, level):
13247 self.expq.DeclareLocks(self, level)
13249 def Exec(self, feedback_fn):
13252 for (node, expname) in self.expq.OldStyleQuery(self):
13253 if expname is None:
13254 result[node] = False
13256 result.setdefault(node, []).append(expname)
13261 class _ExportQuery(_QueryBase):
13262 FIELDS = query.EXPORT_FIELDS
13264 #: The node name is not a unique key for this query
13265 SORT_FIELD = "node"
13267 def ExpandNames(self, lu):
13268 lu.needed_locks = {}
13270 # The following variables interact with _QueryBase._GetNames
13272 self.wanted = _GetWantedNodes(lu, self.names)
13274 self.wanted = locking.ALL_SET
13276 self.do_locking = self.use_locking
13278 if self.do_locking:
13279 lu.share_locks = _ShareAll()
13280 lu.needed_locks = {
13281 locking.LEVEL_NODE: self.wanted,
13284 def DeclareLocks(self, lu, level):
13287 def _GetQueryData(self, lu):
13288 """Computes the list of nodes and their attributes.
13291 # Locking is not used
13293 assert not (compat.any(lu.glm.is_owned(level)
13294 for level in locking.LEVELS
13295 if level != locking.LEVEL_CLUSTER) or
13296 self.do_locking or self.use_locking)
13298 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13302 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13304 result.append((node, None))
13306 result.extend((node, expname) for expname in nres.payload)
13311 class LUBackupPrepare(NoHooksLU):
13312 """Prepares an instance for an export and returns useful information.
13317 def ExpandNames(self):
13318 self._ExpandAndLockInstance()
13320 def CheckPrereq(self):
13321 """Check prerequisites.
13324 instance_name = self.op.instance_name
13326 self.instance = self.cfg.GetInstanceInfo(instance_name)
13327 assert self.instance is not None, \
13328 "Cannot retrieve locked instance %s" % self.op.instance_name
13329 _CheckNodeOnline(self, self.instance.primary_node)
13331 self._cds = _GetClusterDomainSecret()
13333 def Exec(self, feedback_fn):
13334 """Prepares an instance for an export.
13337 instance = self.instance
13339 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13340 salt = utils.GenerateSecret(8)
13342 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13343 result = self.rpc.call_x509_cert_create(instance.primary_node,
13344 constants.RIE_CERT_VALIDITY)
13345 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13347 (name, cert_pem) = result.payload
13349 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13353 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13354 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13356 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13362 class LUBackupExport(LogicalUnit):
13363 """Export an instance to an image in the cluster.
13366 HPATH = "instance-export"
13367 HTYPE = constants.HTYPE_INSTANCE
13370 def CheckArguments(self):
13371 """Check the arguments.
13374 self.x509_key_name = self.op.x509_key_name
13375 self.dest_x509_ca_pem = self.op.destination_x509_ca
13377 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13378 if not self.x509_key_name:
13379 raise errors.OpPrereqError("Missing X509 key name for encryption",
13380 errors.ECODE_INVAL)
13382 if not self.dest_x509_ca_pem:
13383 raise errors.OpPrereqError("Missing destination X509 CA",
13384 errors.ECODE_INVAL)
13386 def ExpandNames(self):
13387 self._ExpandAndLockInstance()
13389 # Lock all nodes for local exports
13390 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13391 # FIXME: lock only instance primary and destination node
13393 # Sad but true, for now we have do lock all nodes, as we don't know where
13394 # the previous export might be, and in this LU we search for it and
13395 # remove it from its current node. In the future we could fix this by:
13396 # - making a tasklet to search (share-lock all), then create the
13397 # new one, then one to remove, after
13398 # - removing the removal operation altogether
13399 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13401 def DeclareLocks(self, level):
13402 """Last minute lock declaration."""
13403 # All nodes are locked anyway, so nothing to do here.
13405 def BuildHooksEnv(self):
13406 """Build hooks env.
13408 This will run on the master, primary node and target node.
13412 "EXPORT_MODE": self.op.mode,
13413 "EXPORT_NODE": self.op.target_node,
13414 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13415 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13416 # TODO: Generic function for boolean env variables
13417 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13420 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13424 def BuildHooksNodes(self):
13425 """Build hooks nodes.
13428 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13430 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13431 nl.append(self.op.target_node)
13435 def CheckPrereq(self):
13436 """Check prerequisites.
13438 This checks that the instance and node names are valid.
13441 instance_name = self.op.instance_name
13443 self.instance = self.cfg.GetInstanceInfo(instance_name)
13444 assert self.instance is not None, \
13445 "Cannot retrieve locked instance %s" % self.op.instance_name
13446 _CheckNodeOnline(self, self.instance.primary_node)
13448 if (self.op.remove_instance and
13449 self.instance.admin_state == constants.ADMINST_UP and
13450 not self.op.shutdown):
13451 raise errors.OpPrereqError("Can not remove instance without shutting it"
13452 " down before", errors.ECODE_STATE)
13454 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13455 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13456 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13457 assert self.dst_node is not None
13459 _CheckNodeOnline(self, self.dst_node.name)
13460 _CheckNodeNotDrained(self, self.dst_node.name)
13463 self.dest_disk_info = None
13464 self.dest_x509_ca = None
13466 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13467 self.dst_node = None
13469 if len(self.op.target_node) != len(self.instance.disks):
13470 raise errors.OpPrereqError(("Received destination information for %s"
13471 " disks, but instance %s has %s disks") %
13472 (len(self.op.target_node), instance_name,
13473 len(self.instance.disks)),
13474 errors.ECODE_INVAL)
13476 cds = _GetClusterDomainSecret()
13478 # Check X509 key name
13480 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13481 except (TypeError, ValueError), err:
13482 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13483 errors.ECODE_INVAL)
13485 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13486 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13487 errors.ECODE_INVAL)
13489 # Load and verify CA
13491 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13492 except OpenSSL.crypto.Error, err:
13493 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13494 (err, ), errors.ECODE_INVAL)
13496 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13497 if errcode is not None:
13498 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13499 (msg, ), errors.ECODE_INVAL)
13501 self.dest_x509_ca = cert
13503 # Verify target information
13505 for idx, disk_data in enumerate(self.op.target_node):
13507 (host, port, magic) = \
13508 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13509 except errors.GenericError, err:
13510 raise errors.OpPrereqError("Target info for disk %s: %s" %
13511 (idx, err), errors.ECODE_INVAL)
13513 disk_info.append((host, port, magic))
13515 assert len(disk_info) == len(self.op.target_node)
13516 self.dest_disk_info = disk_info
13519 raise errors.ProgrammerError("Unhandled export mode %r" %
13522 # instance disk type verification
13523 # TODO: Implement export support for file-based disks
13524 for disk in self.instance.disks:
13525 if disk.dev_type == constants.LD_FILE:
13526 raise errors.OpPrereqError("Export not supported for instances with"
13527 " file-based disks", errors.ECODE_INVAL)
13529 def _CleanupExports(self, feedback_fn):
13530 """Removes exports of current instance from all other nodes.
13532 If an instance in a cluster with nodes A..D was exported to node C, its
13533 exports will be removed from the nodes A, B and D.
13536 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13538 nodelist = self.cfg.GetNodeList()
13539 nodelist.remove(self.dst_node.name)
13541 # on one-node clusters nodelist will be empty after the removal
13542 # if we proceed the backup would be removed because OpBackupQuery
13543 # substitutes an empty list with the full cluster node list.
13544 iname = self.instance.name
13546 feedback_fn("Removing old exports for instance %s" % iname)
13547 exportlist = self.rpc.call_export_list(nodelist)
13548 for node in exportlist:
13549 if exportlist[node].fail_msg:
13551 if iname in exportlist[node].payload:
13552 msg = self.rpc.call_export_remove(node, iname).fail_msg
13554 self.LogWarning("Could not remove older export for instance %s"
13555 " on node %s: %s", iname, node, msg)
13557 def Exec(self, feedback_fn):
13558 """Export an instance to an image in the cluster.
13561 assert self.op.mode in constants.EXPORT_MODES
13563 instance = self.instance
13564 src_node = instance.primary_node
13566 if self.op.shutdown:
13567 # shutdown the instance, but not the disks
13568 feedback_fn("Shutting down instance %s" % instance.name)
13569 result = self.rpc.call_instance_shutdown(src_node, instance,
13570 self.op.shutdown_timeout)
13571 # TODO: Maybe ignore failures if ignore_remove_failures is set
13572 result.Raise("Could not shutdown instance %s on"
13573 " node %s" % (instance.name, src_node))
13575 # set the disks ID correctly since call_instance_start needs the
13576 # correct drbd minor to create the symlinks
13577 for disk in instance.disks:
13578 self.cfg.SetDiskID(disk, src_node)
13580 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13583 # Activate the instance disks if we'exporting a stopped instance
13584 feedback_fn("Activating disks for %s" % instance.name)
13585 _StartInstanceDisks(self, instance, None)
13588 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13591 helper.CreateSnapshots()
13593 if (self.op.shutdown and
13594 instance.admin_state == constants.ADMINST_UP and
13595 not self.op.remove_instance):
13596 assert not activate_disks
13597 feedback_fn("Starting instance %s" % instance.name)
13598 result = self.rpc.call_instance_start(src_node,
13599 (instance, None, None), False)
13600 msg = result.fail_msg
13602 feedback_fn("Failed to start instance: %s" % msg)
13603 _ShutdownInstanceDisks(self, instance)
13604 raise errors.OpExecError("Could not start instance: %s" % msg)
13606 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13607 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13608 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13609 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13610 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13612 (key_name, _, _) = self.x509_key_name
13615 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13618 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13619 key_name, dest_ca_pem,
13624 # Check for backwards compatibility
13625 assert len(dresults) == len(instance.disks)
13626 assert compat.all(isinstance(i, bool) for i in dresults), \
13627 "Not all results are boolean: %r" % dresults
13631 feedback_fn("Deactivating disks for %s" % instance.name)
13632 _ShutdownInstanceDisks(self, instance)
13634 if not (compat.all(dresults) and fin_resu):
13637 failures.append("export finalization")
13638 if not compat.all(dresults):
13639 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13641 failures.append("disk export: disk(s) %s" % fdsk)
13643 raise errors.OpExecError("Export failed, errors in %s" %
13644 utils.CommaJoin(failures))
13646 # At this point, the export was successful, we can cleanup/finish
13648 # Remove instance if requested
13649 if self.op.remove_instance:
13650 feedback_fn("Removing instance %s" % instance.name)
13651 _RemoveInstance(self, feedback_fn, instance,
13652 self.op.ignore_remove_failures)
13654 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13655 self._CleanupExports(feedback_fn)
13657 return fin_resu, dresults
13660 class LUBackupRemove(NoHooksLU):
13661 """Remove exports related to the named instance.
13666 def ExpandNames(self):
13667 self.needed_locks = {}
13668 # We need all nodes to be locked in order for RemoveExport to work, but we
13669 # don't need to lock the instance itself, as nothing will happen to it (and
13670 # we can remove exports also for a removed instance)
13671 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13673 def Exec(self, feedback_fn):
13674 """Remove any export.
13677 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13678 # If the instance was not found we'll try with the name that was passed in.
13679 # This will only work if it was an FQDN, though.
13681 if not instance_name:
13683 instance_name = self.op.instance_name
13685 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13686 exportlist = self.rpc.call_export_list(locked_nodes)
13688 for node in exportlist:
13689 msg = exportlist[node].fail_msg
13691 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13693 if instance_name in exportlist[node].payload:
13695 result = self.rpc.call_export_remove(node, instance_name)
13696 msg = result.fail_msg
13698 logging.error("Could not remove export for instance %s"
13699 " on node %s: %s", instance_name, node, msg)
13701 if fqdn_warn and not found:
13702 feedback_fn("Export not found. If trying to remove an export belonging"
13703 " to a deleted instance please use its Fully Qualified"
13707 class LUGroupAdd(LogicalUnit):
13708 """Logical unit for creating node groups.
13711 HPATH = "group-add"
13712 HTYPE = constants.HTYPE_GROUP
13715 def ExpandNames(self):
13716 # We need the new group's UUID here so that we can create and acquire the
13717 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13718 # that it should not check whether the UUID exists in the configuration.
13719 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13720 self.needed_locks = {}
13721 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13723 def CheckPrereq(self):
13724 """Check prerequisites.
13726 This checks that the given group name is not an existing node group
13731 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13732 except errors.OpPrereqError:
13735 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13736 " node group (UUID: %s)" %
13737 (self.op.group_name, existing_uuid),
13738 errors.ECODE_EXISTS)
13740 if self.op.ndparams:
13741 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13743 if self.op.hv_state:
13744 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13746 self.new_hv_state = None
13748 if self.op.disk_state:
13749 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13751 self.new_disk_state = None
13753 if self.op.diskparams:
13754 for templ in constants.DISK_TEMPLATES:
13755 if templ in self.op.diskparams:
13756 utils.ForceDictType(self.op.diskparams[templ],
13757 constants.DISK_DT_TYPES)
13758 self.new_diskparams = self.op.diskparams
13760 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13761 except errors.OpPrereqError, err:
13762 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13763 errors.ECODE_INVAL)
13765 self.new_diskparams = {}
13767 if self.op.ipolicy:
13768 cluster = self.cfg.GetClusterInfo()
13769 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13771 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13772 except errors.ConfigurationError, err:
13773 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13774 errors.ECODE_INVAL)
13776 def BuildHooksEnv(self):
13777 """Build hooks env.
13781 "GROUP_NAME": self.op.group_name,
13784 def BuildHooksNodes(self):
13785 """Build hooks nodes.
13788 mn = self.cfg.GetMasterNode()
13789 return ([mn], [mn])
13791 def Exec(self, feedback_fn):
13792 """Add the node group to the cluster.
13795 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13796 uuid=self.group_uuid,
13797 alloc_policy=self.op.alloc_policy,
13798 ndparams=self.op.ndparams,
13799 diskparams=self.new_diskparams,
13800 ipolicy=self.op.ipolicy,
13801 hv_state_static=self.new_hv_state,
13802 disk_state_static=self.new_disk_state)
13804 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13805 del self.remove_locks[locking.LEVEL_NODEGROUP]
13808 class LUGroupAssignNodes(NoHooksLU):
13809 """Logical unit for assigning nodes to groups.
13814 def ExpandNames(self):
13815 # These raise errors.OpPrereqError on their own:
13816 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13817 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13819 # We want to lock all the affected nodes and groups. We have readily
13820 # available the list of nodes, and the *destination* group. To gather the
13821 # list of "source" groups, we need to fetch node information later on.
13822 self.needed_locks = {
13823 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13824 locking.LEVEL_NODE: self.op.nodes,
13827 def DeclareLocks(self, level):
13828 if level == locking.LEVEL_NODEGROUP:
13829 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13831 # Try to get all affected nodes' groups without having the group or node
13832 # lock yet. Needs verification later in the code flow.
13833 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13835 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13837 def CheckPrereq(self):
13838 """Check prerequisites.
13841 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13842 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13843 frozenset(self.op.nodes))
13845 expected_locks = (set([self.group_uuid]) |
13846 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13847 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13848 if actual_locks != expected_locks:
13849 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13850 " current groups are '%s', used to be '%s'" %
13851 (utils.CommaJoin(expected_locks),
13852 utils.CommaJoin(actual_locks)))
13854 self.node_data = self.cfg.GetAllNodesInfo()
13855 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13856 instance_data = self.cfg.GetAllInstancesInfo()
13858 if self.group is None:
13859 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13860 (self.op.group_name, self.group_uuid))
13862 (new_splits, previous_splits) = \
13863 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13864 for node in self.op.nodes],
13865 self.node_data, instance_data)
13868 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13870 if not self.op.force:
13871 raise errors.OpExecError("The following instances get split by this"
13872 " change and --force was not given: %s" %
13875 self.LogWarning("This operation will split the following instances: %s",
13878 if previous_splits:
13879 self.LogWarning("In addition, these already-split instances continue"
13880 " to be split across groups: %s",
13881 utils.CommaJoin(utils.NiceSort(previous_splits)))
13883 def Exec(self, feedback_fn):
13884 """Assign nodes to a new group.
13887 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13889 self.cfg.AssignGroupNodes(mods)
13892 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13893 """Check for split instances after a node assignment.
13895 This method considers a series of node assignments as an atomic operation,
13896 and returns information about split instances after applying the set of
13899 In particular, it returns information about newly split instances, and
13900 instances that were already split, and remain so after the change.
13902 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13905 @type changes: list of (node_name, new_group_uuid) pairs.
13906 @param changes: list of node assignments to consider.
13907 @param node_data: a dict with data for all nodes
13908 @param instance_data: a dict with all instances to consider
13909 @rtype: a two-tuple
13910 @return: a list of instances that were previously okay and result split as a
13911 consequence of this change, and a list of instances that were previously
13912 split and this change does not fix.
13915 changed_nodes = dict((node, group) for node, group in changes
13916 if node_data[node].group != group)
13918 all_split_instances = set()
13919 previously_split_instances = set()
13921 def InstanceNodes(instance):
13922 return [instance.primary_node] + list(instance.secondary_nodes)
13924 for inst in instance_data.values():
13925 if inst.disk_template not in constants.DTS_INT_MIRROR:
13928 instance_nodes = InstanceNodes(inst)
13930 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13931 previously_split_instances.add(inst.name)
13933 if len(set(changed_nodes.get(node, node_data[node].group)
13934 for node in instance_nodes)) > 1:
13935 all_split_instances.add(inst.name)
13937 return (list(all_split_instances - previously_split_instances),
13938 list(previously_split_instances & all_split_instances))
13941 class _GroupQuery(_QueryBase):
13942 FIELDS = query.GROUP_FIELDS
13944 def ExpandNames(self, lu):
13945 lu.needed_locks = {}
13947 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13948 self._cluster = lu.cfg.GetClusterInfo()
13949 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13952 self.wanted = [name_to_uuid[name]
13953 for name in utils.NiceSort(name_to_uuid.keys())]
13955 # Accept names to be either names or UUIDs.
13958 all_uuid = frozenset(self._all_groups.keys())
13960 for name in self.names:
13961 if name in all_uuid:
13962 self.wanted.append(name)
13963 elif name in name_to_uuid:
13964 self.wanted.append(name_to_uuid[name])
13966 missing.append(name)
13969 raise errors.OpPrereqError("Some groups do not exist: %s" %
13970 utils.CommaJoin(missing),
13971 errors.ECODE_NOENT)
13973 def DeclareLocks(self, lu, level):
13976 def _GetQueryData(self, lu):
13977 """Computes the list of node groups and their attributes.
13980 do_nodes = query.GQ_NODE in self.requested_data
13981 do_instances = query.GQ_INST in self.requested_data
13983 group_to_nodes = None
13984 group_to_instances = None
13986 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13987 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13988 # latter GetAllInstancesInfo() is not enough, for we have to go through
13989 # instance->node. Hence, we will need to process nodes even if we only need
13990 # instance information.
13991 if do_nodes or do_instances:
13992 all_nodes = lu.cfg.GetAllNodesInfo()
13993 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13996 for node in all_nodes.values():
13997 if node.group in group_to_nodes:
13998 group_to_nodes[node.group].append(node.name)
13999 node_to_group[node.name] = node.group
14002 all_instances = lu.cfg.GetAllInstancesInfo()
14003 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14005 for instance in all_instances.values():
14006 node = instance.primary_node
14007 if node in node_to_group:
14008 group_to_instances[node_to_group[node]].append(instance.name)
14011 # Do not pass on node information if it was not requested.
14012 group_to_nodes = None
14014 return query.GroupQueryData(self._cluster,
14015 [self._all_groups[uuid]
14016 for uuid in self.wanted],
14017 group_to_nodes, group_to_instances,
14018 query.GQ_DISKPARAMS in self.requested_data)
14021 class LUGroupQuery(NoHooksLU):
14022 """Logical unit for querying node groups.
14027 def CheckArguments(self):
14028 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14029 self.op.output_fields, False)
14031 def ExpandNames(self):
14032 self.gq.ExpandNames(self)
14034 def DeclareLocks(self, level):
14035 self.gq.DeclareLocks(self, level)
14037 def Exec(self, feedback_fn):
14038 return self.gq.OldStyleQuery(self)
14041 class LUGroupSetParams(LogicalUnit):
14042 """Modifies the parameters of a node group.
14045 HPATH = "group-modify"
14046 HTYPE = constants.HTYPE_GROUP
14049 def CheckArguments(self):
14052 self.op.diskparams,
14053 self.op.alloc_policy,
14055 self.op.disk_state,
14059 if all_changes.count(None) == len(all_changes):
14060 raise errors.OpPrereqError("Please pass at least one modification",
14061 errors.ECODE_INVAL)
14063 def ExpandNames(self):
14064 # This raises errors.OpPrereqError on its own:
14065 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14067 self.needed_locks = {
14068 locking.LEVEL_INSTANCE: [],
14069 locking.LEVEL_NODEGROUP: [self.group_uuid],
14072 self.share_locks[locking.LEVEL_INSTANCE] = 1
14074 def DeclareLocks(self, level):
14075 if level == locking.LEVEL_INSTANCE:
14076 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14078 # Lock instances optimistically, needs verification once group lock has
14080 self.needed_locks[locking.LEVEL_INSTANCE] = \
14081 self.cfg.GetNodeGroupInstances(self.group_uuid)
14084 def _UpdateAndVerifyDiskParams(old, new):
14085 """Updates and verifies disk parameters.
14088 new_params = _GetUpdatedParams(old, new)
14089 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14092 def CheckPrereq(self):
14093 """Check prerequisites.
14096 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14098 # Check if locked instances are still correct
14099 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14101 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14102 cluster = self.cfg.GetClusterInfo()
14104 if self.group is None:
14105 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14106 (self.op.group_name, self.group_uuid))
14108 if self.op.ndparams:
14109 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14110 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14111 self.new_ndparams = new_ndparams
14113 if self.op.diskparams:
14114 diskparams = self.group.diskparams
14115 uavdp = self._UpdateAndVerifyDiskParams
14116 # For each disktemplate subdict update and verify the values
14117 new_diskparams = dict((dt,
14118 uavdp(diskparams.get(dt, {}),
14119 self.op.diskparams[dt]))
14120 for dt in constants.DISK_TEMPLATES
14121 if dt in self.op.diskparams)
14122 # As we've all subdicts of diskparams ready, lets merge the actual
14123 # dict with all updated subdicts
14124 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14126 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14127 except errors.OpPrereqError, err:
14128 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14129 errors.ECODE_INVAL)
14131 if self.op.hv_state:
14132 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14133 self.group.hv_state_static)
14135 if self.op.disk_state:
14136 self.new_disk_state = \
14137 _MergeAndVerifyDiskState(self.op.disk_state,
14138 self.group.disk_state_static)
14140 if self.op.ipolicy:
14141 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14145 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14146 inst_filter = lambda inst: inst.name in owned_instances
14147 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14148 gmi = ganeti.masterd.instance
14150 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14152 new_ipolicy, instances)
14155 self.LogWarning("After the ipolicy change the following instances"
14156 " violate them: %s",
14157 utils.CommaJoin(violations))
14159 def BuildHooksEnv(self):
14160 """Build hooks env.
14164 "GROUP_NAME": self.op.group_name,
14165 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14168 def BuildHooksNodes(self):
14169 """Build hooks nodes.
14172 mn = self.cfg.GetMasterNode()
14173 return ([mn], [mn])
14175 def Exec(self, feedback_fn):
14176 """Modifies the node group.
14181 if self.op.ndparams:
14182 self.group.ndparams = self.new_ndparams
14183 result.append(("ndparams", str(self.group.ndparams)))
14185 if self.op.diskparams:
14186 self.group.diskparams = self.new_diskparams
14187 result.append(("diskparams", str(self.group.diskparams)))
14189 if self.op.alloc_policy:
14190 self.group.alloc_policy = self.op.alloc_policy
14192 if self.op.hv_state:
14193 self.group.hv_state_static = self.new_hv_state
14195 if self.op.disk_state:
14196 self.group.disk_state_static = self.new_disk_state
14198 if self.op.ipolicy:
14199 self.group.ipolicy = self.new_ipolicy
14201 self.cfg.Update(self.group, feedback_fn)
14205 class LUGroupRemove(LogicalUnit):
14206 HPATH = "group-remove"
14207 HTYPE = constants.HTYPE_GROUP
14210 def ExpandNames(self):
14211 # This will raises errors.OpPrereqError on its own:
14212 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14213 self.needed_locks = {
14214 locking.LEVEL_NODEGROUP: [self.group_uuid],
14217 def CheckPrereq(self):
14218 """Check prerequisites.
14220 This checks that the given group name exists as a node group, that is
14221 empty (i.e., contains no nodes), and that is not the last group of the
14225 # Verify that the group is empty.
14226 group_nodes = [node.name
14227 for node in self.cfg.GetAllNodesInfo().values()
14228 if node.group == self.group_uuid]
14231 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14233 (self.op.group_name,
14234 utils.CommaJoin(utils.NiceSort(group_nodes))),
14235 errors.ECODE_STATE)
14237 # Verify the cluster would not be left group-less.
14238 if len(self.cfg.GetNodeGroupList()) == 1:
14239 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14240 " removed" % self.op.group_name,
14241 errors.ECODE_STATE)
14243 def BuildHooksEnv(self):
14244 """Build hooks env.
14248 "GROUP_NAME": self.op.group_name,
14251 def BuildHooksNodes(self):
14252 """Build hooks nodes.
14255 mn = self.cfg.GetMasterNode()
14256 return ([mn], [mn])
14258 def Exec(self, feedback_fn):
14259 """Remove the node group.
14263 self.cfg.RemoveNodeGroup(self.group_uuid)
14264 except errors.ConfigurationError:
14265 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14266 (self.op.group_name, self.group_uuid))
14268 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14271 class LUGroupRename(LogicalUnit):
14272 HPATH = "group-rename"
14273 HTYPE = constants.HTYPE_GROUP
14276 def ExpandNames(self):
14277 # This raises errors.OpPrereqError on its own:
14278 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14280 self.needed_locks = {
14281 locking.LEVEL_NODEGROUP: [self.group_uuid],
14284 def CheckPrereq(self):
14285 """Check prerequisites.
14287 Ensures requested new name is not yet used.
14291 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14292 except errors.OpPrereqError:
14295 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14296 " node group (UUID: %s)" %
14297 (self.op.new_name, new_name_uuid),
14298 errors.ECODE_EXISTS)
14300 def BuildHooksEnv(self):
14301 """Build hooks env.
14305 "OLD_NAME": self.op.group_name,
14306 "NEW_NAME": self.op.new_name,
14309 def BuildHooksNodes(self):
14310 """Build hooks nodes.
14313 mn = self.cfg.GetMasterNode()
14315 all_nodes = self.cfg.GetAllNodesInfo()
14316 all_nodes.pop(mn, None)
14319 run_nodes.extend(node.name for node in all_nodes.values()
14320 if node.group == self.group_uuid)
14322 return (run_nodes, run_nodes)
14324 def Exec(self, feedback_fn):
14325 """Rename the node group.
14328 group = self.cfg.GetNodeGroup(self.group_uuid)
14331 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14332 (self.op.group_name, self.group_uuid))
14334 group.name = self.op.new_name
14335 self.cfg.Update(group, feedback_fn)
14337 return self.op.new_name
14340 class LUGroupEvacuate(LogicalUnit):
14341 HPATH = "group-evacuate"
14342 HTYPE = constants.HTYPE_GROUP
14345 def ExpandNames(self):
14346 # This raises errors.OpPrereqError on its own:
14347 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14349 if self.op.target_groups:
14350 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14351 self.op.target_groups)
14353 self.req_target_uuids = []
14355 if self.group_uuid in self.req_target_uuids:
14356 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14357 " as a target group (targets are %s)" %
14359 utils.CommaJoin(self.req_target_uuids)),
14360 errors.ECODE_INVAL)
14362 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14364 self.share_locks = _ShareAll()
14365 self.needed_locks = {
14366 locking.LEVEL_INSTANCE: [],
14367 locking.LEVEL_NODEGROUP: [],
14368 locking.LEVEL_NODE: [],
14371 def DeclareLocks(self, level):
14372 if level == locking.LEVEL_INSTANCE:
14373 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14375 # Lock instances optimistically, needs verification once node and group
14376 # locks have been acquired
14377 self.needed_locks[locking.LEVEL_INSTANCE] = \
14378 self.cfg.GetNodeGroupInstances(self.group_uuid)
14380 elif level == locking.LEVEL_NODEGROUP:
14381 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14383 if self.req_target_uuids:
14384 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14386 # Lock all groups used by instances optimistically; this requires going
14387 # via the node before it's locked, requiring verification later on
14388 lock_groups.update(group_uuid
14389 for instance_name in
14390 self.owned_locks(locking.LEVEL_INSTANCE)
14392 self.cfg.GetInstanceNodeGroups(instance_name))
14394 # No target groups, need to lock all of them
14395 lock_groups = locking.ALL_SET
14397 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14399 elif level == locking.LEVEL_NODE:
14400 # This will only lock the nodes in the group to be evacuated which
14401 # contain actual instances
14402 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14403 self._LockInstancesNodes()
14405 # Lock all nodes in group to be evacuated and target groups
14406 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14407 assert self.group_uuid in owned_groups
14408 member_nodes = [node_name
14409 for group in owned_groups
14410 for node_name in self.cfg.GetNodeGroup(group).members]
14411 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14413 def CheckPrereq(self):
14414 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14415 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14416 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14418 assert owned_groups.issuperset(self.req_target_uuids)
14419 assert self.group_uuid in owned_groups
14421 # Check if locked instances are still correct
14422 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14424 # Get instance information
14425 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14427 # Check if node groups for locked instances are still correct
14428 _CheckInstancesNodeGroups(self.cfg, self.instances,
14429 owned_groups, owned_nodes, self.group_uuid)
14431 if self.req_target_uuids:
14432 # User requested specific target groups
14433 self.target_uuids = self.req_target_uuids
14435 # All groups except the one to be evacuated are potential targets
14436 self.target_uuids = [group_uuid for group_uuid in owned_groups
14437 if group_uuid != self.group_uuid]
14439 if not self.target_uuids:
14440 raise errors.OpPrereqError("There are no possible target groups",
14441 errors.ECODE_INVAL)
14443 def BuildHooksEnv(self):
14444 """Build hooks env.
14448 "GROUP_NAME": self.op.group_name,
14449 "TARGET_GROUPS": " ".join(self.target_uuids),
14452 def BuildHooksNodes(self):
14453 """Build hooks nodes.
14456 mn = self.cfg.GetMasterNode()
14458 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14460 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14462 return (run_nodes, run_nodes)
14464 def Exec(self, feedback_fn):
14465 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14467 assert self.group_uuid not in self.target_uuids
14469 req = iallocator.IAReqGroupChange(instances=instances,
14470 target_groups=self.target_uuids)
14471 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14473 ial.Run(self.op.iallocator)
14475 if not ial.success:
14476 raise errors.OpPrereqError("Can't compute group evacuation using"
14477 " iallocator '%s': %s" %
14478 (self.op.iallocator, ial.info),
14479 errors.ECODE_NORES)
14481 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14483 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14484 len(jobs), self.op.group_name)
14486 return ResultWithJobs(jobs)
14489 class TagsLU(NoHooksLU): # pylint: disable=W0223
14490 """Generic tags LU.
14492 This is an abstract class which is the parent of all the other tags LUs.
14495 def ExpandNames(self):
14496 self.group_uuid = None
14497 self.needed_locks = {}
14499 if self.op.kind == constants.TAG_NODE:
14500 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14501 lock_level = locking.LEVEL_NODE
14502 lock_name = self.op.name
14503 elif self.op.kind == constants.TAG_INSTANCE:
14504 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14505 lock_level = locking.LEVEL_INSTANCE
14506 lock_name = self.op.name
14507 elif self.op.kind == constants.TAG_NODEGROUP:
14508 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14509 lock_level = locking.LEVEL_NODEGROUP
14510 lock_name = self.group_uuid
14515 if lock_level and getattr(self.op, "use_locking", True):
14516 self.needed_locks[lock_level] = lock_name
14518 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14519 # not possible to acquire the BGL based on opcode parameters)
14521 def CheckPrereq(self):
14522 """Check prerequisites.
14525 if self.op.kind == constants.TAG_CLUSTER:
14526 self.target = self.cfg.GetClusterInfo()
14527 elif self.op.kind == constants.TAG_NODE:
14528 self.target = self.cfg.GetNodeInfo(self.op.name)
14529 elif self.op.kind == constants.TAG_INSTANCE:
14530 self.target = self.cfg.GetInstanceInfo(self.op.name)
14531 elif self.op.kind == constants.TAG_NODEGROUP:
14532 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14534 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14535 str(self.op.kind), errors.ECODE_INVAL)
14538 class LUTagsGet(TagsLU):
14539 """Returns the tags of a given object.
14544 def ExpandNames(self):
14545 TagsLU.ExpandNames(self)
14547 # Share locks as this is only a read operation
14548 self.share_locks = _ShareAll()
14550 def Exec(self, feedback_fn):
14551 """Returns the tag list.
14554 return list(self.target.GetTags())
14557 class LUTagsSearch(NoHooksLU):
14558 """Searches the tags for a given pattern.
14563 def ExpandNames(self):
14564 self.needed_locks = {}
14566 def CheckPrereq(self):
14567 """Check prerequisites.
14569 This checks the pattern passed for validity by compiling it.
14573 self.re = re.compile(self.op.pattern)
14574 except re.error, err:
14575 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14576 (self.op.pattern, err), errors.ECODE_INVAL)
14578 def Exec(self, feedback_fn):
14579 """Returns the tag list.
14583 tgts = [("/cluster", cfg.GetClusterInfo())]
14584 ilist = cfg.GetAllInstancesInfo().values()
14585 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14586 nlist = cfg.GetAllNodesInfo().values()
14587 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14588 tgts.extend(("/nodegroup/%s" % n.name, n)
14589 for n in cfg.GetAllNodeGroupsInfo().values())
14591 for path, target in tgts:
14592 for tag in target.GetTags():
14593 if self.re.search(tag):
14594 results.append((path, tag))
14598 class LUTagsSet(TagsLU):
14599 """Sets a tag on a given object.
14604 def CheckPrereq(self):
14605 """Check prerequisites.
14607 This checks the type and length of the tag name and value.
14610 TagsLU.CheckPrereq(self)
14611 for tag in self.op.tags:
14612 objects.TaggableObject.ValidateTag(tag)
14614 def Exec(self, feedback_fn):
14619 for tag in self.op.tags:
14620 self.target.AddTag(tag)
14621 except errors.TagError, err:
14622 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14623 self.cfg.Update(self.target, feedback_fn)
14626 class LUTagsDel(TagsLU):
14627 """Delete a list of tags from a given object.
14632 def CheckPrereq(self):
14633 """Check prerequisites.
14635 This checks that we have the given tag.
14638 TagsLU.CheckPrereq(self)
14639 for tag in self.op.tags:
14640 objects.TaggableObject.ValidateTag(tag)
14641 del_tags = frozenset(self.op.tags)
14642 cur_tags = self.target.GetTags()
14644 diff_tags = del_tags - cur_tags
14646 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14647 raise errors.OpPrereqError("Tag(s) %s not found" %
14648 (utils.CommaJoin(diff_names), ),
14649 errors.ECODE_NOENT)
14651 def Exec(self, feedback_fn):
14652 """Remove the tag from the object.
14655 for tag in self.op.tags:
14656 self.target.RemoveTag(tag)
14657 self.cfg.Update(self.target, feedback_fn)
14660 class LUTestDelay(NoHooksLU):
14661 """Sleep for a specified amount of time.
14663 This LU sleeps on the master and/or nodes for a specified amount of
14669 def ExpandNames(self):
14670 """Expand names and set required locks.
14672 This expands the node list, if any.
14675 self.needed_locks = {}
14676 if self.op.on_nodes:
14677 # _GetWantedNodes can be used here, but is not always appropriate to use
14678 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14679 # more information.
14680 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14681 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14683 def _TestDelay(self):
14684 """Do the actual sleep.
14687 if self.op.on_master:
14688 if not utils.TestDelay(self.op.duration):
14689 raise errors.OpExecError("Error during master delay test")
14690 if self.op.on_nodes:
14691 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14692 for node, node_result in result.items():
14693 node_result.Raise("Failure during rpc call to node %s" % node)
14695 def Exec(self, feedback_fn):
14696 """Execute the test delay opcode, with the wanted repetitions.
14699 if self.op.repeat == 0:
14702 top_value = self.op.repeat - 1
14703 for i in range(self.op.repeat):
14704 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14708 class LUTestJqueue(NoHooksLU):
14709 """Utility LU to test some aspects of the job queue.
14714 # Must be lower than default timeout for WaitForJobChange to see whether it
14715 # notices changed jobs
14716 _CLIENT_CONNECT_TIMEOUT = 20.0
14717 _CLIENT_CONFIRM_TIMEOUT = 60.0
14720 def _NotifyUsingSocket(cls, cb, errcls):
14721 """Opens a Unix socket and waits for another program to connect.
14724 @param cb: Callback to send socket name to client
14725 @type errcls: class
14726 @param errcls: Exception class to use for errors
14729 # Using a temporary directory as there's no easy way to create temporary
14730 # sockets without writing a custom loop around tempfile.mktemp and
14732 tmpdir = tempfile.mkdtemp()
14734 tmpsock = utils.PathJoin(tmpdir, "sock")
14736 logging.debug("Creating temporary socket at %s", tmpsock)
14737 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14742 # Send details to client
14745 # Wait for client to connect before continuing
14746 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14748 (conn, _) = sock.accept()
14749 except socket.error, err:
14750 raise errcls("Client didn't connect in time (%s)" % err)
14754 # Remove as soon as client is connected
14755 shutil.rmtree(tmpdir)
14757 # Wait for client to close
14760 # pylint: disable=E1101
14761 # Instance of '_socketobject' has no ... member
14762 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14764 except socket.error, err:
14765 raise errcls("Client failed to confirm notification (%s)" % err)
14769 def _SendNotification(self, test, arg, sockname):
14770 """Sends a notification to the client.
14773 @param test: Test name
14774 @param arg: Test argument (depends on test)
14775 @type sockname: string
14776 @param sockname: Socket path
14779 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14781 def _Notify(self, prereq, test, arg):
14782 """Notifies the client of a test.
14785 @param prereq: Whether this is a prereq-phase test
14787 @param test: Test name
14788 @param arg: Test argument (depends on test)
14792 errcls = errors.OpPrereqError
14794 errcls = errors.OpExecError
14796 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14800 def CheckArguments(self):
14801 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14802 self.expandnames_calls = 0
14804 def ExpandNames(self):
14805 checkargs_calls = getattr(self, "checkargs_calls", 0)
14806 if checkargs_calls < 1:
14807 raise errors.ProgrammerError("CheckArguments was not called")
14809 self.expandnames_calls += 1
14811 if self.op.notify_waitlock:
14812 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14814 self.LogInfo("Expanding names")
14816 # Get lock on master node (just to get a lock, not for a particular reason)
14817 self.needed_locks = {
14818 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14821 def Exec(self, feedback_fn):
14822 if self.expandnames_calls < 1:
14823 raise errors.ProgrammerError("ExpandNames was not called")
14825 if self.op.notify_exec:
14826 self._Notify(False, constants.JQT_EXEC, None)
14828 self.LogInfo("Executing")
14830 if self.op.log_messages:
14831 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14832 for idx, msg in enumerate(self.op.log_messages):
14833 self.LogInfo("Sending log message %s", idx + 1)
14834 feedback_fn(constants.JQT_MSGPREFIX + msg)
14835 # Report how many test messages have been sent
14836 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14839 raise errors.OpExecError("Opcode failure was requested")
14844 class LUTestAllocator(NoHooksLU):
14845 """Run allocator tests.
14847 This LU runs the allocator tests
14850 def CheckPrereq(self):
14851 """Check prerequisites.
14853 This checks the opcode parameters depending on the director and mode test.
14856 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
14857 constants.IALLOCATOR_MODE_MULTI_ALLOC):
14858 for attr in ["memory", "disks", "disk_template",
14859 "os", "tags", "nics", "vcpus"]:
14860 if not hasattr(self.op, attr):
14861 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14862 attr, errors.ECODE_INVAL)
14863 iname = self.cfg.ExpandInstanceName(self.op.name)
14864 if iname is not None:
14865 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14866 iname, errors.ECODE_EXISTS)
14867 if not isinstance(self.op.nics, list):
14868 raise errors.OpPrereqError("Invalid parameter 'nics'",
14869 errors.ECODE_INVAL)
14870 if not isinstance(self.op.disks, list):
14871 raise errors.OpPrereqError("Invalid parameter 'disks'",
14872 errors.ECODE_INVAL)
14873 for row in self.op.disks:
14874 if (not isinstance(row, dict) or
14875 constants.IDISK_SIZE not in row or
14876 not isinstance(row[constants.IDISK_SIZE], int) or
14877 constants.IDISK_MODE not in row or
14878 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14879 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14880 " parameter", errors.ECODE_INVAL)
14881 if self.op.hypervisor is None:
14882 self.op.hypervisor = self.cfg.GetHypervisorType()
14883 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14884 fname = _ExpandInstanceName(self.cfg, self.op.name)
14885 self.op.name = fname
14886 self.relocate_from = \
14887 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14888 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14889 constants.IALLOCATOR_MODE_NODE_EVAC):
14890 if not self.op.instances:
14891 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14892 self.op.instances = _GetWantedInstances(self, self.op.instances)
14894 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14895 self.op.mode, errors.ECODE_INVAL)
14897 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14898 if self.op.allocator is None:
14899 raise errors.OpPrereqError("Missing allocator name",
14900 errors.ECODE_INVAL)
14901 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14902 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14903 self.op.direction, errors.ECODE_INVAL)
14905 def Exec(self, feedback_fn):
14906 """Run the allocator test.
14909 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14910 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
14911 memory=self.op.memory,
14912 disks=self.op.disks,
14913 disk_template=self.op.disk_template,
14917 vcpus=self.op.vcpus,
14918 spindle_use=self.op.spindle_use,
14919 hypervisor=self.op.hypervisor)
14920 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14921 req = iallocator.IAReqRelocate(name=self.op.name,
14922 relocate_from=list(self.relocate_from))
14923 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14924 req = iallocator.IAReqGroupChange(instances=self.op.instances,
14925 target_groups=self.op.target_groups)
14926 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14927 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
14928 evac_mode=self.op.evac_mode)
14929 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
14930 disk_template = self.op.disk_template
14931 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
14932 memory=self.op.memory,
14933 disks=self.op.disks,
14934 disk_template=disk_template,
14938 vcpus=self.op.vcpus,
14939 spindle_use=self.op.spindle_use,
14940 hypervisor=self.op.hypervisor)
14941 for idx in range(self.op.count)]
14942 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
14944 raise errors.ProgrammerError("Uncatched mode %s in"
14945 " LUTestAllocator.Exec", self.op.mode)
14947 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14948 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14949 result = ial.in_text
14951 ial.Run(self.op.allocator, validate=False)
14952 result = ial.out_text
14956 #: Query type implementations
14958 constants.QR_CLUSTER: _ClusterQuery,
14959 constants.QR_INSTANCE: _InstanceQuery,
14960 constants.QR_NODE: _NodeQuery,
14961 constants.QR_GROUP: _GroupQuery,
14962 constants.QR_OS: _OsQuery,
14963 constants.QR_EXPORT: _ExportQuery,
14966 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14969 def _GetQueryImplementation(name):
14970 """Returns the implemtnation for a query type.
14972 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14976 return _QUERY_IMPL[name]
14978 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14979 errors.ECODE_INVAL)