4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti.masterd import iallocator
65 import ganeti.masterd.instance # pylint: disable=W0611
69 INSTANCE_DOWN = [constants.ADMINST_DOWN]
70 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
71 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
73 #: Instance status in which an instance can be marked as offline/online
74 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
75 constants.ADMINST_OFFLINE,
80 """Data container for LU results with jobs.
82 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
83 by L{mcpu._ProcessResult}. The latter will then submit the jobs
84 contained in the C{jobs} attribute and include the job IDs in the opcode
88 def __init__(self, jobs, **kwargs):
89 """Initializes this class.
91 Additional return values can be specified as keyword arguments.
93 @type jobs: list of lists of L{opcode.OpCode}
94 @param jobs: A list of lists of opcode objects
101 class LogicalUnit(object):
102 """Logical Unit base class.
104 Subclasses must follow these rules:
105 - implement ExpandNames
106 - implement CheckPrereq (except when tasklets are used)
107 - implement Exec (except when tasklets are used)
108 - implement BuildHooksEnv
109 - implement BuildHooksNodes
110 - redefine HPATH and HTYPE
111 - optionally redefine their run requirements:
112 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
114 Note that all commands require root permissions.
116 @ivar dry_run_result: the value (if any) that will be returned to the caller
117 in dry-run mode (signalled by opcode dry_run parameter)
124 def __init__(self, processor, op, context, rpc_runner):
125 """Constructor for LogicalUnit.
127 This needs to be overridden in derived classes in order to check op
131 self.proc = processor
133 self.cfg = context.cfg
134 self.glm = context.glm
136 self.owned_locks = context.glm.list_owned
137 self.context = context
138 self.rpc = rpc_runner
139 # Dicts used to declare locking needs to mcpu
140 self.needed_locks = None
141 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
143 self.remove_locks = {}
144 # Used to force good behavior when calling helper functions
145 self.recalculate_locks = {}
147 self.Log = processor.Log # pylint: disable=C0103
148 self.LogWarning = processor.LogWarning # pylint: disable=C0103
149 self.LogInfo = processor.LogInfo # pylint: disable=C0103
150 self.LogStep = processor.LogStep # pylint: disable=C0103
151 # support for dry-run
152 self.dry_run_result = None
153 # support for generic debug attribute
154 if (not hasattr(self.op, "debug_level") or
155 not isinstance(self.op.debug_level, int)):
156 self.op.debug_level = 0
161 # Validate opcode parameters and set defaults
162 self.op.Validate(True)
164 self.CheckArguments()
166 def CheckArguments(self):
167 """Check syntactic validity for the opcode arguments.
169 This method is for doing a simple syntactic check and ensure
170 validity of opcode parameters, without any cluster-related
171 checks. While the same can be accomplished in ExpandNames and/or
172 CheckPrereq, doing these separate is better because:
174 - ExpandNames is left as as purely a lock-related function
175 - CheckPrereq is run after we have acquired locks (and possible
178 The function is allowed to change the self.op attribute so that
179 later methods can no longer worry about missing parameters.
184 def ExpandNames(self):
185 """Expand names for this LU.
187 This method is called before starting to execute the opcode, and it should
188 update all the parameters of the opcode to their canonical form (e.g. a
189 short node name must be fully expanded after this method has successfully
190 completed). This way locking, hooks, logging, etc. can work correctly.
192 LUs which implement this method must also populate the self.needed_locks
193 member, as a dict with lock levels as keys, and a list of needed lock names
196 - use an empty dict if you don't need any lock
197 - if you don't need any lock at a particular level omit that
198 level (note that in this case C{DeclareLocks} won't be called
199 at all for that level)
200 - if you need locks at a level, but you can't calculate it in
201 this function, initialise that level with an empty list and do
202 further processing in L{LogicalUnit.DeclareLocks} (see that
203 function's docstring)
204 - don't put anything for the BGL level
205 - if you want all locks at a level use L{locking.ALL_SET} as a value
207 If you need to share locks (rather than acquire them exclusively) at one
208 level you can modify self.share_locks, setting a true value (usually 1) for
209 that level. By default locks are not shared.
211 This function can also define a list of tasklets, which then will be
212 executed in order instead of the usual LU-level CheckPrereq and Exec
213 functions, if those are not defined by the LU.
217 # Acquire all nodes and one instance
218 self.needed_locks = {
219 locking.LEVEL_NODE: locking.ALL_SET,
220 locking.LEVEL_INSTANCE: ['instance1.example.com'],
222 # Acquire just two nodes
223 self.needed_locks = {
224 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
227 self.needed_locks = {} # No, you can't leave it to the default value None
230 # The implementation of this method is mandatory only if the new LU is
231 # concurrent, so that old LUs don't need to be changed all at the same
234 self.needed_locks = {} # Exclusive LUs don't need locks.
236 raise NotImplementedError
238 def DeclareLocks(self, level):
239 """Declare LU locking needs for a level
241 While most LUs can just declare their locking needs at ExpandNames time,
242 sometimes there's the need to calculate some locks after having acquired
243 the ones before. This function is called just before acquiring locks at a
244 particular level, but after acquiring the ones at lower levels, and permits
245 such calculations. It can be used to modify self.needed_locks, and by
246 default it does nothing.
248 This function is only called if you have something already set in
249 self.needed_locks for the level.
251 @param level: Locking level which is going to be locked
252 @type level: member of L{ganeti.locking.LEVELS}
256 def CheckPrereq(self):
257 """Check prerequisites for this LU.
259 This method should check that the prerequisites for the execution
260 of this LU are fulfilled. It can do internode communication, but
261 it should be idempotent - no cluster or system changes are
264 The method should raise errors.OpPrereqError in case something is
265 not fulfilled. Its return value is ignored.
267 This method should also update all the parameters of the opcode to
268 their canonical form if it hasn't been done by ExpandNames before.
271 if self.tasklets is not None:
272 for (idx, tl) in enumerate(self.tasklets):
273 logging.debug("Checking prerequisites for tasklet %s/%s",
274 idx + 1, len(self.tasklets))
279 def Exec(self, feedback_fn):
282 This method should implement the actual work. It should raise
283 errors.OpExecError for failures that are somewhat dealt with in
287 if self.tasklets is not None:
288 for (idx, tl) in enumerate(self.tasklets):
289 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
292 raise NotImplementedError
294 def BuildHooksEnv(self):
295 """Build hooks environment for this LU.
298 @return: Dictionary containing the environment that will be used for
299 running the hooks for this LU. The keys of the dict must not be prefixed
300 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
301 will extend the environment with additional variables. If no environment
302 should be defined, an empty dictionary should be returned (not C{None}).
303 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
307 raise NotImplementedError
309 def BuildHooksNodes(self):
310 """Build list of nodes to run LU's hooks.
312 @rtype: tuple; (list, list)
313 @return: Tuple containing a list of node names on which the hook
314 should run before the execution and a list of node names on which the
315 hook should run after the execution. No nodes should be returned as an
316 empty list (and not None).
317 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
321 raise NotImplementedError
323 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
324 """Notify the LU about the results of its hooks.
326 This method is called every time a hooks phase is executed, and notifies
327 the Logical Unit about the hooks' result. The LU can then use it to alter
328 its result based on the hooks. By default the method does nothing and the
329 previous result is passed back unchanged but any LU can define it if it
330 wants to use the local cluster hook-scripts somehow.
332 @param phase: one of L{constants.HOOKS_PHASE_POST} or
333 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
334 @param hook_results: the results of the multi-node hooks rpc call
335 @param feedback_fn: function used send feedback back to the caller
336 @param lu_result: the previous Exec result this LU had, or None
338 @return: the new Exec result, based on the previous result
342 # API must be kept, thus we ignore the unused argument and could
343 # be a function warnings
344 # pylint: disable=W0613,R0201
347 def _ExpandAndLockInstance(self):
348 """Helper function to expand and lock an instance.
350 Many LUs that work on an instance take its name in self.op.instance_name
351 and need to expand it and then declare the expanded name for locking. This
352 function does it, and then updates self.op.instance_name to the expanded
353 name. It also initializes needed_locks as a dict, if this hasn't been done
357 if self.needed_locks is None:
358 self.needed_locks = {}
360 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
361 "_ExpandAndLockInstance called with instance-level locks set"
362 self.op.instance_name = _ExpandInstanceName(self.cfg,
363 self.op.instance_name)
364 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
366 def _LockInstancesNodes(self, primary_only=False,
367 level=locking.LEVEL_NODE):
368 """Helper function to declare instances' nodes for locking.
370 This function should be called after locking one or more instances to lock
371 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
372 with all primary or secondary nodes for instances already locked and
373 present in self.needed_locks[locking.LEVEL_INSTANCE].
375 It should be called from DeclareLocks, and for safety only works if
376 self.recalculate_locks[locking.LEVEL_NODE] is set.
378 In the future it may grow parameters to just lock some instance's nodes, or
379 to just lock primaries or secondary nodes, if needed.
381 If should be called in DeclareLocks in a way similar to::
383 if level == locking.LEVEL_NODE:
384 self._LockInstancesNodes()
386 @type primary_only: boolean
387 @param primary_only: only lock primary nodes of locked instances
388 @param level: Which lock level to use for locking nodes
391 assert level in self.recalculate_locks, \
392 "_LockInstancesNodes helper function called with no nodes to recalculate"
394 # TODO: check if we're really been called with the instance locks held
396 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
397 # future we might want to have different behaviors depending on the value
398 # of self.recalculate_locks[locking.LEVEL_NODE]
400 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
401 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
402 wanted_nodes.append(instance.primary_node)
404 wanted_nodes.extend(instance.secondary_nodes)
406 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
407 self.needed_locks[level] = wanted_nodes
408 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
409 self.needed_locks[level].extend(wanted_nodes)
411 raise errors.ProgrammerError("Unknown recalculation mode")
413 del self.recalculate_locks[level]
416 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
417 """Simple LU which runs no hooks.
419 This LU is intended as a parent for other LogicalUnits which will
420 run no hooks, in order to reduce duplicate code.
426 def BuildHooksEnv(self):
427 """Empty BuildHooksEnv for NoHooksLu.
429 This just raises an error.
432 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
434 def BuildHooksNodes(self):
435 """Empty BuildHooksNodes for NoHooksLU.
438 raise AssertionError("BuildHooksNodes called for NoHooksLU")
442 """Tasklet base class.
444 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
445 they can mix legacy code with tasklets. Locking needs to be done in the LU,
446 tasklets know nothing about locks.
448 Subclasses must follow these rules:
449 - Implement CheckPrereq
453 def __init__(self, lu):
460 def CheckPrereq(self):
461 """Check prerequisites for this tasklets.
463 This method should check whether the prerequisites for the execution of
464 this tasklet are fulfilled. It can do internode communication, but it
465 should be idempotent - no cluster or system changes are allowed.
467 The method should raise errors.OpPrereqError in case something is not
468 fulfilled. Its return value is ignored.
470 This method should also update all parameters to their canonical form if it
471 hasn't been done before.
476 def Exec(self, feedback_fn):
477 """Execute the tasklet.
479 This method should implement the actual work. It should raise
480 errors.OpExecError for failures that are somewhat dealt with in code, or
484 raise NotImplementedError
488 """Base for query utility classes.
491 #: Attribute holding field definitions
497 def __init__(self, qfilter, fields, use_locking):
498 """Initializes this class.
501 self.use_locking = use_locking
503 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
504 namefield=self.SORT_FIELD)
505 self.requested_data = self.query.RequestedData()
506 self.names = self.query.RequestedNames()
508 # Sort only if no names were requested
509 self.sort_by_name = not self.names
511 self.do_locking = None
514 def _GetNames(self, lu, all_names, lock_level):
515 """Helper function to determine names asked for in the query.
519 names = lu.owned_locks(lock_level)
523 if self.wanted == locking.ALL_SET:
524 assert not self.names
525 # caller didn't specify names, so ordering is not important
526 return utils.NiceSort(names)
528 # caller specified names and we must keep the same order
530 assert not self.do_locking or lu.glm.is_owned(lock_level)
532 missing = set(self.wanted).difference(names)
534 raise errors.OpExecError("Some items were removed before retrieving"
535 " their data: %s" % missing)
537 # Return expanded names
540 def ExpandNames(self, lu):
541 """Expand names for this query.
543 See L{LogicalUnit.ExpandNames}.
546 raise NotImplementedError()
548 def DeclareLocks(self, lu, level):
549 """Declare locks for this query.
551 See L{LogicalUnit.DeclareLocks}.
554 raise NotImplementedError()
556 def _GetQueryData(self, lu):
557 """Collects all data for this query.
559 @return: Query data object
562 raise NotImplementedError()
564 def NewStyleQuery(self, lu):
565 """Collect data and execute query.
568 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
569 sort_by_name=self.sort_by_name)
571 def OldStyleQuery(self, lu):
572 """Collect data and execute query.
575 return self.query.OldStyleQuery(self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
580 """Returns a dict declaring all lock levels shared.
583 return dict.fromkeys(locking.LEVELS, 1)
586 def _AnnotateDiskParams(instance, devs, cfg):
587 """Little helper wrapper to the rpc annotation method.
589 @param instance: The instance object
590 @type devs: List of L{objects.Disk}
591 @param devs: The root devices (not any of its children!)
592 @param cfg: The config object
593 @returns The annotated disk copies
594 @see L{rpc.AnnotateDiskParams}
597 return rpc.AnnotateDiskParams(instance.disk_template, devs,
598 cfg.GetInstanceDiskParams(instance))
601 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
603 """Checks if node groups for locked instances are still correct.
605 @type cfg: L{config.ConfigWriter}
606 @param cfg: Cluster configuration
607 @type instances: dict; string as key, L{objects.Instance} as value
608 @param instances: Dictionary, instance name as key, instance object as value
609 @type owned_groups: iterable of string
610 @param owned_groups: List of owned groups
611 @type owned_nodes: iterable of string
612 @param owned_nodes: List of owned nodes
613 @type cur_group_uuid: string or None
614 @param cur_group_uuid: Optional group UUID to check against instance's groups
617 for (name, inst) in instances.items():
618 assert owned_nodes.issuperset(inst.all_nodes), \
619 "Instance %s's nodes changed while we kept the lock" % name
621 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
623 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
624 "Instance %s has no node in group %s" % (name, cur_group_uuid)
627 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
629 """Checks if the owned node groups are still correct for an instance.
631 @type cfg: L{config.ConfigWriter}
632 @param cfg: The cluster configuration
633 @type instance_name: string
634 @param instance_name: Instance name
635 @type owned_groups: set or frozenset
636 @param owned_groups: List of currently owned node groups
637 @type primary_only: boolean
638 @param primary_only: Whether to check node groups for only the primary node
641 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
643 if not owned_groups.issuperset(inst_groups):
644 raise errors.OpPrereqError("Instance %s's node groups changed since"
645 " locks were acquired, current groups are"
646 " are '%s', owning groups '%s'; retry the"
649 utils.CommaJoin(inst_groups),
650 utils.CommaJoin(owned_groups)),
656 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
657 """Checks if the instances in a node group are still correct.
659 @type cfg: L{config.ConfigWriter}
660 @param cfg: The cluster configuration
661 @type group_uuid: string
662 @param group_uuid: Node group UUID
663 @type owned_instances: set or frozenset
664 @param owned_instances: List of currently owned instances
667 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
668 if owned_instances != wanted_instances:
669 raise errors.OpPrereqError("Instances in node group '%s' changed since"
670 " locks were acquired, wanted '%s', have '%s';"
671 " retry the operation" %
673 utils.CommaJoin(wanted_instances),
674 utils.CommaJoin(owned_instances)),
677 return wanted_instances
680 def _SupportsOob(cfg, node):
681 """Tells if node supports OOB.
683 @type cfg: L{config.ConfigWriter}
684 @param cfg: The cluster configuration
685 @type node: L{objects.Node}
686 @param node: The node
687 @return: The OOB script if supported or an empty string otherwise
690 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
693 def _GetWantedNodes(lu, nodes):
694 """Returns list of checked and expanded node names.
696 @type lu: L{LogicalUnit}
697 @param lu: the logical unit on whose behalf we execute
699 @param nodes: list of node names or None for all nodes
701 @return: the list of nodes, sorted
702 @raise errors.ProgrammerError: if the nodes parameter is wrong type
706 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
708 return utils.NiceSort(lu.cfg.GetNodeList())
711 def _GetWantedInstances(lu, instances):
712 """Returns list of checked and expanded instance names.
714 @type lu: L{LogicalUnit}
715 @param lu: the logical unit on whose behalf we execute
716 @type instances: list
717 @param instances: list of instance names or None for all instances
719 @return: the list of instances, sorted
720 @raise errors.OpPrereqError: if the instances parameter is wrong type
721 @raise errors.OpPrereqError: if any of the passed instances is not found
725 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
727 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
731 def _GetUpdatedParams(old_params, update_dict,
732 use_default=True, use_none=False):
733 """Return the new version of a parameter dictionary.
735 @type old_params: dict
736 @param old_params: old parameters
737 @type update_dict: dict
738 @param update_dict: dict containing new parameter values, or
739 constants.VALUE_DEFAULT to reset the parameter to its default
741 @param use_default: boolean
742 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
743 values as 'to be deleted' values
744 @param use_none: boolean
745 @type use_none: whether to recognise C{None} values as 'to be
748 @return: the new parameter dictionary
751 params_copy = copy.deepcopy(old_params)
752 for key, val in update_dict.iteritems():
753 if ((use_default and val == constants.VALUE_DEFAULT) or
754 (use_none and val is None)):
760 params_copy[key] = val
764 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
765 """Return the new version of a instance policy.
767 @param group_policy: whether this policy applies to a group and thus
768 we should support removal of policy entries
771 use_none = use_default = group_policy
772 ipolicy = copy.deepcopy(old_ipolicy)
773 for key, value in new_ipolicy.items():
774 if key not in constants.IPOLICY_ALL_KEYS:
775 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
777 if key in constants.IPOLICY_ISPECS:
778 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
779 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
781 use_default=use_default)
783 if (not value or value == [constants.VALUE_DEFAULT] or
784 value == constants.VALUE_DEFAULT):
788 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
789 " on the cluster'" % key,
792 if key in constants.IPOLICY_PARAMETERS:
793 # FIXME: we assume all such values are float
795 ipolicy[key] = float(value)
796 except (TypeError, ValueError), err:
797 raise errors.OpPrereqError("Invalid value for attribute"
798 " '%s': '%s', error: %s" %
799 (key, value, err), errors.ECODE_INVAL)
801 # FIXME: we assume all others are lists; this should be redone
803 ipolicy[key] = list(value)
805 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
806 except errors.ConfigurationError, err:
807 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
812 def _UpdateAndVerifySubDict(base, updates, type_check):
813 """Updates and verifies a dict with sub dicts of the same type.
815 @param base: The dict with the old data
816 @param updates: The dict with the new data
817 @param type_check: Dict suitable to ForceDictType to verify correct types
818 @returns: A new dict with updated and verified values
822 new = _GetUpdatedParams(old, value)
823 utils.ForceDictType(new, type_check)
826 ret = copy.deepcopy(base)
827 ret.update(dict((key, fn(base.get(key, {}), value))
828 for key, value in updates.items()))
832 def _MergeAndVerifyHvState(op_input, obj_input):
833 """Combines the hv state from an opcode with the one of the object
835 @param op_input: The input dict from the opcode
836 @param obj_input: The input dict from the objects
837 @return: The verified and updated dict
841 invalid_hvs = set(op_input) - constants.HYPER_TYPES
843 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
844 " %s" % utils.CommaJoin(invalid_hvs),
846 if obj_input is None:
848 type_check = constants.HVSTS_PARAMETER_TYPES
849 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
854 def _MergeAndVerifyDiskState(op_input, obj_input):
855 """Combines the disk state from an opcode with the one of the object
857 @param op_input: The input dict from the opcode
858 @param obj_input: The input dict from the objects
859 @return: The verified and updated dict
862 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
864 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
865 utils.CommaJoin(invalid_dst),
867 type_check = constants.DSS_PARAMETER_TYPES
868 if obj_input is None:
870 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
872 for key, value in op_input.items())
877 def _ReleaseLocks(lu, level, names=None, keep=None):
878 """Releases locks owned by an LU.
880 @type lu: L{LogicalUnit}
881 @param level: Lock level
882 @type names: list or None
883 @param names: Names of locks to release
884 @type keep: list or None
885 @param keep: Names of locks to retain
888 assert not (keep is not None and names is not None), \
889 "Only one of the 'names' and the 'keep' parameters can be given"
891 if names is not None:
892 should_release = names.__contains__
894 should_release = lambda name: name not in keep
896 should_release = None
898 owned = lu.owned_locks(level)
900 # Not owning any lock at this level, do nothing
907 # Determine which locks to release
909 if should_release(name):
914 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
916 # Release just some locks
917 lu.glm.release(level, names=release)
919 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
922 lu.glm.release(level)
924 assert not lu.glm.is_owned(level), "No locks should be owned"
927 def _MapInstanceDisksToNodes(instances):
928 """Creates a map from (node, volume) to instance name.
930 @type instances: list of L{objects.Instance}
931 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
934 return dict(((node, vol), inst.name)
935 for inst in instances
936 for (node, vols) in inst.MapLVsByNode().items()
940 def _RunPostHook(lu, node_name):
941 """Runs the post-hook for an opcode on a single node.
944 hm = lu.proc.BuildHooksManager(lu)
946 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
947 except Exception, err: # pylint: disable=W0703
948 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
951 def _CheckOutputFields(static, dynamic, selected):
952 """Checks whether all selected fields are valid.
954 @type static: L{utils.FieldSet}
955 @param static: static fields set
956 @type dynamic: L{utils.FieldSet}
957 @param dynamic: dynamic fields set
964 delta = f.NonMatching(selected)
966 raise errors.OpPrereqError("Unknown output fields selected: %s"
967 % ",".join(delta), errors.ECODE_INVAL)
970 def _CheckGlobalHvParams(params):
971 """Validates that given hypervisor params are not global ones.
973 This will ensure that instances don't get customised versions of
977 used_globals = constants.HVC_GLOBALS.intersection(params)
979 msg = ("The following hypervisor parameters are global and cannot"
980 " be customized at instance level, please modify them at"
981 " cluster level: %s" % utils.CommaJoin(used_globals))
982 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
985 def _CheckNodeOnline(lu, node, msg=None):
986 """Ensure that a given node is online.
988 @param lu: the LU on behalf of which we make the check
989 @param node: the node to check
990 @param msg: if passed, should be a message to replace the default one
991 @raise errors.OpPrereqError: if the node is offline
995 msg = "Can't use offline node"
996 if lu.cfg.GetNodeInfo(node).offline:
997 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1000 def _CheckNodeNotDrained(lu, node):
1001 """Ensure that a given node is not drained.
1003 @param lu: the LU on behalf of which we make the check
1004 @param node: the node to check
1005 @raise errors.OpPrereqError: if the node is drained
1008 if lu.cfg.GetNodeInfo(node).drained:
1009 raise errors.OpPrereqError("Can't use drained node %s" % node,
1013 def _CheckNodeVmCapable(lu, node):
1014 """Ensure that a given node is vm capable.
1016 @param lu: the LU on behalf of which we make the check
1017 @param node: the node to check
1018 @raise errors.OpPrereqError: if the node is not vm capable
1021 if not lu.cfg.GetNodeInfo(node).vm_capable:
1022 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1026 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1027 """Ensure that a node supports a given OS.
1029 @param lu: the LU on behalf of which we make the check
1030 @param node: the node to check
1031 @param os_name: the OS to query about
1032 @param force_variant: whether to ignore variant errors
1033 @raise errors.OpPrereqError: if the node is not supporting the OS
1036 result = lu.rpc.call_os_get(node, os_name)
1037 result.Raise("OS '%s' not in supported OS list for node %s" %
1039 prereq=True, ecode=errors.ECODE_INVAL)
1040 if not force_variant:
1041 _CheckOSVariant(result.payload, os_name)
1044 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1045 """Ensure that a node has the given secondary ip.
1047 @type lu: L{LogicalUnit}
1048 @param lu: the LU on behalf of which we make the check
1050 @param node: the node to check
1051 @type secondary_ip: string
1052 @param secondary_ip: the ip to check
1053 @type prereq: boolean
1054 @param prereq: whether to throw a prerequisite or an execute error
1055 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1056 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1059 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1060 result.Raise("Failure checking secondary ip on node %s" % node,
1061 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1062 if not result.payload:
1063 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1064 " please fix and re-run this command" % secondary_ip)
1066 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1068 raise errors.OpExecError(msg)
1071 def _GetClusterDomainSecret():
1072 """Reads the cluster domain secret.
1075 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1079 def _CheckInstanceState(lu, instance, req_states, msg=None):
1080 """Ensure that an instance is in one of the required states.
1082 @param lu: the LU on behalf of which we make the check
1083 @param instance: the instance to check
1084 @param msg: if passed, should be a message to replace the default one
1085 @raise errors.OpPrereqError: if the instance is not in the required state
1089 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1090 if instance.admin_state not in req_states:
1091 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1092 (instance.name, instance.admin_state, msg),
1095 if constants.ADMINST_UP not in req_states:
1096 pnode = instance.primary_node
1097 if not lu.cfg.GetNodeInfo(pnode).offline:
1098 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1099 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1100 prereq=True, ecode=errors.ECODE_ENVIRON)
1101 if instance.name in ins_l.payload:
1102 raise errors.OpPrereqError("Instance %s is running, %s" %
1103 (instance.name, msg), errors.ECODE_STATE)
1105 lu.LogWarning("Primary node offline, ignoring check that instance"
1109 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1110 """Computes if value is in the desired range.
1112 @param name: name of the parameter for which we perform the check
1113 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1115 @param ipolicy: dictionary containing min, max and std values
1116 @param value: actual value that we want to use
1117 @return: None or element not meeting the criteria
1121 if value in [None, constants.VALUE_AUTO]:
1123 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1124 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1125 if value > max_v or min_v > value:
1127 fqn = "%s/%s" % (name, qualifier)
1130 return ("%s value %s is not in range [%s, %s]" %
1131 (fqn, value, min_v, max_v))
1135 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1136 nic_count, disk_sizes, spindle_use,
1137 _compute_fn=_ComputeMinMaxSpec):
1138 """Verifies ipolicy against provided specs.
1141 @param ipolicy: The ipolicy
1143 @param mem_size: The memory size
1144 @type cpu_count: int
1145 @param cpu_count: Used cpu cores
1146 @type disk_count: int
1147 @param disk_count: Number of disks used
1148 @type nic_count: int
1149 @param nic_count: Number of nics used
1150 @type disk_sizes: list of ints
1151 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1152 @type spindle_use: int
1153 @param spindle_use: The number of spindles this instance uses
1154 @param _compute_fn: The compute function (unittest only)
1155 @return: A list of violations, or an empty list of no violations are found
1158 assert disk_count == len(disk_sizes)
1161 (constants.ISPEC_MEM_SIZE, "", mem_size),
1162 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1163 (constants.ISPEC_DISK_COUNT, "", disk_count),
1164 (constants.ISPEC_NIC_COUNT, "", nic_count),
1165 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1166 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1167 for idx, d in enumerate(disk_sizes)]
1170 (_compute_fn(name, qualifier, ipolicy, value)
1171 for (name, qualifier, value) in test_settings))
1174 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1175 _compute_fn=_ComputeIPolicySpecViolation):
1176 """Compute if instance meets the specs of ipolicy.
1179 @param ipolicy: The ipolicy to verify against
1180 @type instance: L{objects.Instance}
1181 @param instance: The instance to verify
1182 @param _compute_fn: The function to verify ipolicy (unittest only)
1183 @see: L{_ComputeIPolicySpecViolation}
1186 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1187 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1188 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1189 disk_count = len(instance.disks)
1190 disk_sizes = [disk.size for disk in instance.disks]
1191 nic_count = len(instance.nics)
1193 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1194 disk_sizes, spindle_use)
1197 def _ComputeIPolicyInstanceSpecViolation(
1198 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1199 """Compute if instance specs meets the specs of ipolicy.
1202 @param ipolicy: The ipolicy to verify against
1203 @param instance_spec: dict
1204 @param instance_spec: The instance spec to verify
1205 @param _compute_fn: The function to verify ipolicy (unittest only)
1206 @see: L{_ComputeIPolicySpecViolation}
1209 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1210 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1211 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1212 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1213 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1214 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1216 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1217 disk_sizes, spindle_use)
1220 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1222 _compute_fn=_ComputeIPolicyInstanceViolation):
1223 """Compute if instance meets the specs of the new target group.
1225 @param ipolicy: The ipolicy to verify
1226 @param instance: The instance object to verify
1227 @param current_group: The current group of the instance
1228 @param target_group: The new group of the instance
1229 @param _compute_fn: The function to verify ipolicy (unittest only)
1230 @see: L{_ComputeIPolicySpecViolation}
1233 if current_group == target_group:
1236 return _compute_fn(ipolicy, instance)
1239 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1240 _compute_fn=_ComputeIPolicyNodeViolation):
1241 """Checks that the target node is correct in terms of instance policy.
1243 @param ipolicy: The ipolicy to verify
1244 @param instance: The instance object to verify
1245 @param node: The new node to relocate
1246 @param ignore: Ignore violations of the ipolicy
1247 @param _compute_fn: The function to verify ipolicy (unittest only)
1248 @see: L{_ComputeIPolicySpecViolation}
1251 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1252 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1255 msg = ("Instance does not meet target node group's (%s) instance"
1256 " policy: %s") % (node.group, utils.CommaJoin(res))
1260 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1263 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1264 """Computes a set of any instances that would violate the new ipolicy.
1266 @param old_ipolicy: The current (still in-place) ipolicy
1267 @param new_ipolicy: The new (to become) ipolicy
1268 @param instances: List of instances to verify
1269 @return: A list of instances which violates the new ipolicy but
1273 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1274 _ComputeViolatingInstances(old_ipolicy, instances))
1277 def _ExpandItemName(fn, name, kind):
1278 """Expand an item name.
1280 @param fn: the function to use for expansion
1281 @param name: requested item name
1282 @param kind: text description ('Node' or 'Instance')
1283 @return: the resolved (full) name
1284 @raise errors.OpPrereqError: if the item is not found
1287 full_name = fn(name)
1288 if full_name is None:
1289 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1294 def _ExpandNodeName(cfg, name):
1295 """Wrapper over L{_ExpandItemName} for nodes."""
1296 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1299 def _ExpandInstanceName(cfg, name):
1300 """Wrapper over L{_ExpandItemName} for instance."""
1301 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1304 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1305 minmem, maxmem, vcpus, nics, disk_template, disks,
1306 bep, hvp, hypervisor_name, tags):
1307 """Builds instance related env variables for hooks
1309 This builds the hook environment from individual variables.
1312 @param name: the name of the instance
1313 @type primary_node: string
1314 @param primary_node: the name of the instance's primary node
1315 @type secondary_nodes: list
1316 @param secondary_nodes: list of secondary nodes as strings
1317 @type os_type: string
1318 @param os_type: the name of the instance's OS
1319 @type status: string
1320 @param status: the desired status of the instance
1321 @type minmem: string
1322 @param minmem: the minimum memory size of the instance
1323 @type maxmem: string
1324 @param maxmem: the maximum memory size of the instance
1326 @param vcpus: the count of VCPUs the instance has
1328 @param nics: list of tuples (ip, mac, mode, link) representing
1329 the NICs the instance has
1330 @type disk_template: string
1331 @param disk_template: the disk template of the instance
1333 @param disks: the list of (size, mode) pairs
1335 @param bep: the backend parameters for the instance
1337 @param hvp: the hypervisor parameters for the instance
1338 @type hypervisor_name: string
1339 @param hypervisor_name: the hypervisor for the instance
1341 @param tags: list of instance tags as strings
1343 @return: the hook environment for this instance
1348 "INSTANCE_NAME": name,
1349 "INSTANCE_PRIMARY": primary_node,
1350 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1351 "INSTANCE_OS_TYPE": os_type,
1352 "INSTANCE_STATUS": status,
1353 "INSTANCE_MINMEM": minmem,
1354 "INSTANCE_MAXMEM": maxmem,
1355 # TODO(2.7) remove deprecated "memory" value
1356 "INSTANCE_MEMORY": maxmem,
1357 "INSTANCE_VCPUS": vcpus,
1358 "INSTANCE_DISK_TEMPLATE": disk_template,
1359 "INSTANCE_HYPERVISOR": hypervisor_name,
1362 nic_count = len(nics)
1363 for idx, (ip, mac, mode, link) in enumerate(nics):
1366 env["INSTANCE_NIC%d_IP" % idx] = ip
1367 env["INSTANCE_NIC%d_MAC" % idx] = mac
1368 env["INSTANCE_NIC%d_MODE" % idx] = mode
1369 env["INSTANCE_NIC%d_LINK" % idx] = link
1370 if mode == constants.NIC_MODE_BRIDGED:
1371 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1375 env["INSTANCE_NIC_COUNT"] = nic_count
1378 disk_count = len(disks)
1379 for idx, (size, mode) in enumerate(disks):
1380 env["INSTANCE_DISK%d_SIZE" % idx] = size
1381 env["INSTANCE_DISK%d_MODE" % idx] = mode
1385 env["INSTANCE_DISK_COUNT"] = disk_count
1390 env["INSTANCE_TAGS"] = " ".join(tags)
1392 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1393 for key, value in source.items():
1394 env["INSTANCE_%s_%s" % (kind, key)] = value
1399 def _NICListToTuple(lu, nics):
1400 """Build a list of nic information tuples.
1402 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1403 value in LUInstanceQueryData.
1405 @type lu: L{LogicalUnit}
1406 @param lu: the logical unit on whose behalf we execute
1407 @type nics: list of L{objects.NIC}
1408 @param nics: list of nics to convert to hooks tuples
1412 cluster = lu.cfg.GetClusterInfo()
1416 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1417 mode = filled_params[constants.NIC_MODE]
1418 link = filled_params[constants.NIC_LINK]
1419 hooks_nics.append((ip, mac, mode, link))
1423 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1424 """Builds instance related env variables for hooks from an object.
1426 @type lu: L{LogicalUnit}
1427 @param lu: the logical unit on whose behalf we execute
1428 @type instance: L{objects.Instance}
1429 @param instance: the instance for which we should build the
1431 @type override: dict
1432 @param override: dictionary with key/values that will override
1435 @return: the hook environment dictionary
1438 cluster = lu.cfg.GetClusterInfo()
1439 bep = cluster.FillBE(instance)
1440 hvp = cluster.FillHV(instance)
1442 "name": instance.name,
1443 "primary_node": instance.primary_node,
1444 "secondary_nodes": instance.secondary_nodes,
1445 "os_type": instance.os,
1446 "status": instance.admin_state,
1447 "maxmem": bep[constants.BE_MAXMEM],
1448 "minmem": bep[constants.BE_MINMEM],
1449 "vcpus": bep[constants.BE_VCPUS],
1450 "nics": _NICListToTuple(lu, instance.nics),
1451 "disk_template": instance.disk_template,
1452 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1455 "hypervisor_name": instance.hypervisor,
1456 "tags": instance.tags,
1459 args.update(override)
1460 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1463 def _AdjustCandidatePool(lu, exceptions):
1464 """Adjust the candidate pool after node operations.
1467 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1469 lu.LogInfo("Promoted nodes to master candidate role: %s",
1470 utils.CommaJoin(node.name for node in mod_list))
1471 for name in mod_list:
1472 lu.context.ReaddNode(name)
1473 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1475 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1479 def _DecideSelfPromotion(lu, exceptions=None):
1480 """Decide whether I should promote myself as a master candidate.
1483 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1484 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1485 # the new node will increase mc_max with one, so:
1486 mc_should = min(mc_should + 1, cp_size)
1487 return mc_now < mc_should
1490 def _ComputeViolatingInstances(ipolicy, instances):
1491 """Computes a set of instances who violates given ipolicy.
1493 @param ipolicy: The ipolicy to verify
1494 @type instances: object.Instance
1495 @param instances: List of instances to verify
1496 @return: A frozenset of instance names violating the ipolicy
1499 return frozenset([inst.name for inst in instances
1500 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1503 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1504 """Check that the brigdes needed by a list of nics exist.
1507 cluster = lu.cfg.GetClusterInfo()
1508 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1509 brlist = [params[constants.NIC_LINK] for params in paramslist
1510 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1512 result = lu.rpc.call_bridges_exist(target_node, brlist)
1513 result.Raise("Error checking bridges on destination node '%s'" %
1514 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1517 def _CheckInstanceBridgesExist(lu, instance, node=None):
1518 """Check that the brigdes needed by an instance exist.
1522 node = instance.primary_node
1523 _CheckNicsBridgesExist(lu, instance.nics, node)
1526 def _CheckOSVariant(os_obj, name):
1527 """Check whether an OS name conforms to the os variants specification.
1529 @type os_obj: L{objects.OS}
1530 @param os_obj: OS object to check
1532 @param name: OS name passed by the user, to check for validity
1535 variant = objects.OS.GetVariant(name)
1536 if not os_obj.supported_variants:
1538 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1539 " passed)" % (os_obj.name, variant),
1543 raise errors.OpPrereqError("OS name must include a variant",
1546 if variant not in os_obj.supported_variants:
1547 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1550 def _GetNodeInstancesInner(cfg, fn):
1551 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1554 def _GetNodeInstances(cfg, node_name):
1555 """Returns a list of all primary and secondary instances on a node.
1559 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1562 def _GetNodePrimaryInstances(cfg, node_name):
1563 """Returns primary instances on a node.
1566 return _GetNodeInstancesInner(cfg,
1567 lambda inst: node_name == inst.primary_node)
1570 def _GetNodeSecondaryInstances(cfg, node_name):
1571 """Returns secondary instances on a node.
1574 return _GetNodeInstancesInner(cfg,
1575 lambda inst: node_name in inst.secondary_nodes)
1578 def _GetStorageTypeArgs(cfg, storage_type):
1579 """Returns the arguments for a storage type.
1582 # Special case for file storage
1583 if storage_type == constants.ST_FILE:
1584 # storage.FileStorage wants a list of storage directories
1585 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1590 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1593 for dev in instance.disks:
1594 cfg.SetDiskID(dev, node_name)
1596 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1598 result.Raise("Failed to get disk status from node %s" % node_name,
1599 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1601 for idx, bdev_status in enumerate(result.payload):
1602 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1608 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1609 """Check the sanity of iallocator and node arguments and use the
1610 cluster-wide iallocator if appropriate.
1612 Check that at most one of (iallocator, node) is specified. If none is
1613 specified, then the LU's opcode's iallocator slot is filled with the
1614 cluster-wide default iallocator.
1616 @type iallocator_slot: string
1617 @param iallocator_slot: the name of the opcode iallocator slot
1618 @type node_slot: string
1619 @param node_slot: the name of the opcode target node slot
1622 node = getattr(lu.op, node_slot, None)
1623 ialloc = getattr(lu.op, iallocator_slot, None)
1625 if node is not None and ialloc is not None:
1626 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1628 elif node is None and ialloc is None:
1629 default_iallocator = lu.cfg.GetDefaultIAllocator()
1630 if default_iallocator:
1631 setattr(lu.op, iallocator_slot, default_iallocator)
1633 raise errors.OpPrereqError("No iallocator or node given and no"
1634 " cluster-wide default iallocator found;"
1635 " please specify either an iallocator or a"
1636 " node, or set a cluster-wide default"
1637 " iallocator", errors.ECODE_INVAL)
1640 def _GetDefaultIAllocator(cfg, ialloc):
1641 """Decides on which iallocator to use.
1643 @type cfg: L{config.ConfigWriter}
1644 @param cfg: Cluster configuration object
1645 @type ialloc: string or None
1646 @param ialloc: Iallocator specified in opcode
1648 @return: Iallocator name
1652 # Use default iallocator
1653 ialloc = cfg.GetDefaultIAllocator()
1656 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1657 " opcode nor as a cluster-wide default",
1663 class LUClusterPostInit(LogicalUnit):
1664 """Logical unit for running hooks after cluster initialization.
1667 HPATH = "cluster-init"
1668 HTYPE = constants.HTYPE_CLUSTER
1670 def BuildHooksEnv(self):
1675 "OP_TARGET": self.cfg.GetClusterName(),
1678 def BuildHooksNodes(self):
1679 """Build hooks nodes.
1682 return ([], [self.cfg.GetMasterNode()])
1684 def Exec(self, feedback_fn):
1691 class LUClusterDestroy(LogicalUnit):
1692 """Logical unit for destroying the cluster.
1695 HPATH = "cluster-destroy"
1696 HTYPE = constants.HTYPE_CLUSTER
1698 def BuildHooksEnv(self):
1703 "OP_TARGET": self.cfg.GetClusterName(),
1706 def BuildHooksNodes(self):
1707 """Build hooks nodes.
1712 def CheckPrereq(self):
1713 """Check prerequisites.
1715 This checks whether the cluster is empty.
1717 Any errors are signaled by raising errors.OpPrereqError.
1720 master = self.cfg.GetMasterNode()
1722 nodelist = self.cfg.GetNodeList()
1723 if len(nodelist) != 1 or nodelist[0] != master:
1724 raise errors.OpPrereqError("There are still %d node(s) in"
1725 " this cluster." % (len(nodelist) - 1),
1727 instancelist = self.cfg.GetInstanceList()
1729 raise errors.OpPrereqError("There are still %d instance(s) in"
1730 " this cluster." % len(instancelist),
1733 def Exec(self, feedback_fn):
1734 """Destroys the cluster.
1737 master_params = self.cfg.GetMasterNetworkParameters()
1739 # Run post hooks on master node before it's removed
1740 _RunPostHook(self, master_params.name)
1742 ems = self.cfg.GetUseExternalMipScript()
1743 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1746 self.LogWarning("Error disabling the master IP address: %s",
1749 return master_params.name
1752 def _VerifyCertificate(filename):
1753 """Verifies a certificate for L{LUClusterVerifyConfig}.
1755 @type filename: string
1756 @param filename: Path to PEM file
1760 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1761 utils.ReadFile(filename))
1762 except Exception, err: # pylint: disable=W0703
1763 return (LUClusterVerifyConfig.ETYPE_ERROR,
1764 "Failed to load X509 certificate %s: %s" % (filename, err))
1767 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1768 constants.SSL_CERT_EXPIRATION_ERROR)
1771 fnamemsg = "While verifying %s: %s" % (filename, msg)
1776 return (None, fnamemsg)
1777 elif errcode == utils.CERT_WARNING:
1778 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1779 elif errcode == utils.CERT_ERROR:
1780 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1782 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1785 def _GetAllHypervisorParameters(cluster, instances):
1786 """Compute the set of all hypervisor parameters.
1788 @type cluster: L{objects.Cluster}
1789 @param cluster: the cluster object
1790 @param instances: list of L{objects.Instance}
1791 @param instances: additional instances from which to obtain parameters
1792 @rtype: list of (origin, hypervisor, parameters)
1793 @return: a list with all parameters found, indicating the hypervisor they
1794 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1799 for hv_name in cluster.enabled_hypervisors:
1800 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1802 for os_name, os_hvp in cluster.os_hvp.items():
1803 for hv_name, hv_params in os_hvp.items():
1805 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1806 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1808 # TODO: collapse identical parameter values in a single one
1809 for instance in instances:
1810 if instance.hvparams:
1811 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1812 cluster.FillHV(instance)))
1817 class _VerifyErrors(object):
1818 """Mix-in for cluster/group verify LUs.
1820 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1821 self.op and self._feedback_fn to be available.)
1825 ETYPE_FIELD = "code"
1826 ETYPE_ERROR = "ERROR"
1827 ETYPE_WARNING = "WARNING"
1829 def _Error(self, ecode, item, msg, *args, **kwargs):
1830 """Format an error message.
1832 Based on the opcode's error_codes parameter, either format a
1833 parseable error code, or a simpler error string.
1835 This must be called only from Exec and functions called from Exec.
1838 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1839 itype, etxt, _ = ecode
1840 # first complete the msg
1843 # then format the whole message
1844 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1845 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1851 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1852 # and finally report it via the feedback_fn
1853 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1855 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1856 """Log an error message if the passed condition is True.
1860 or self.op.debug_simulate_errors) # pylint: disable=E1101
1862 # If the error code is in the list of ignored errors, demote the error to a
1864 (_, etxt, _) = ecode
1865 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1866 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1869 self._Error(ecode, *args, **kwargs)
1871 # do not mark the operation as failed for WARN cases only
1872 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1873 self.bad = self.bad or cond
1876 class LUClusterVerify(NoHooksLU):
1877 """Submits all jobs necessary to verify the cluster.
1882 def ExpandNames(self):
1883 self.needed_locks = {}
1885 def Exec(self, feedback_fn):
1888 if self.op.group_name:
1889 groups = [self.op.group_name]
1890 depends_fn = lambda: None
1892 groups = self.cfg.GetNodeGroupList()
1894 # Verify global configuration
1896 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1899 # Always depend on global verification
1900 depends_fn = lambda: [(-len(jobs), [])]
1903 [opcodes.OpClusterVerifyGroup(group_name=group,
1904 ignore_errors=self.op.ignore_errors,
1905 depends=depends_fn())]
1906 for group in groups)
1908 # Fix up all parameters
1909 for op in itertools.chain(*jobs): # pylint: disable=W0142
1910 op.debug_simulate_errors = self.op.debug_simulate_errors
1911 op.verbose = self.op.verbose
1912 op.error_codes = self.op.error_codes
1914 op.skip_checks = self.op.skip_checks
1915 except AttributeError:
1916 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1918 return ResultWithJobs(jobs)
1921 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1922 """Verifies the cluster config.
1927 def _VerifyHVP(self, hvp_data):
1928 """Verifies locally the syntax of the hypervisor parameters.
1931 for item, hv_name, hv_params in hvp_data:
1932 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1935 hv_class = hypervisor.GetHypervisor(hv_name)
1936 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1937 hv_class.CheckParameterSyntax(hv_params)
1938 except errors.GenericError, err:
1939 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1941 def ExpandNames(self):
1942 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1943 self.share_locks = _ShareAll()
1945 def CheckPrereq(self):
1946 """Check prerequisites.
1949 # Retrieve all information
1950 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1951 self.all_node_info = self.cfg.GetAllNodesInfo()
1952 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1954 def Exec(self, feedback_fn):
1955 """Verify integrity of cluster, performing various test on nodes.
1959 self._feedback_fn = feedback_fn
1961 feedback_fn("* Verifying cluster config")
1963 for msg in self.cfg.VerifyConfig():
1964 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1966 feedback_fn("* Verifying cluster certificate files")
1968 for cert_filename in pathutils.ALL_CERT_FILES:
1969 (errcode, msg) = _VerifyCertificate(cert_filename)
1970 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1972 feedback_fn("* Verifying hypervisor parameters")
1974 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1975 self.all_inst_info.values()))
1977 feedback_fn("* Verifying all nodes belong to an existing group")
1979 # We do this verification here because, should this bogus circumstance
1980 # occur, it would never be caught by VerifyGroup, which only acts on
1981 # nodes/instances reachable from existing node groups.
1983 dangling_nodes = set(node.name for node in self.all_node_info.values()
1984 if node.group not in self.all_group_info)
1986 dangling_instances = {}
1987 no_node_instances = []
1989 for inst in self.all_inst_info.values():
1990 if inst.primary_node in dangling_nodes:
1991 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1992 elif inst.primary_node not in self.all_node_info:
1993 no_node_instances.append(inst.name)
1998 utils.CommaJoin(dangling_instances.get(node.name,
2000 for node in dangling_nodes]
2002 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2004 "the following nodes (and their instances) belong to a non"
2005 " existing group: %s", utils.CommaJoin(pretty_dangling))
2007 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2009 "the following instances have a non-existing primary-node:"
2010 " %s", utils.CommaJoin(no_node_instances))
2015 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2016 """Verifies the status of a node group.
2019 HPATH = "cluster-verify"
2020 HTYPE = constants.HTYPE_CLUSTER
2023 _HOOKS_INDENT_RE = re.compile("^", re.M)
2025 class NodeImage(object):
2026 """A class representing the logical and physical status of a node.
2029 @ivar name: the node name to which this object refers
2030 @ivar volumes: a structure as returned from
2031 L{ganeti.backend.GetVolumeList} (runtime)
2032 @ivar instances: a list of running instances (runtime)
2033 @ivar pinst: list of configured primary instances (config)
2034 @ivar sinst: list of configured secondary instances (config)
2035 @ivar sbp: dictionary of {primary-node: list of instances} for all
2036 instances for which this node is secondary (config)
2037 @ivar mfree: free memory, as reported by hypervisor (runtime)
2038 @ivar dfree: free disk, as reported by the node (runtime)
2039 @ivar offline: the offline status (config)
2040 @type rpc_fail: boolean
2041 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2042 not whether the individual keys were correct) (runtime)
2043 @type lvm_fail: boolean
2044 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2045 @type hyp_fail: boolean
2046 @ivar hyp_fail: whether the RPC call didn't return the instance list
2047 @type ghost: boolean
2048 @ivar ghost: whether this is a known node or not (config)
2049 @type os_fail: boolean
2050 @ivar os_fail: whether the RPC call didn't return valid OS data
2052 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2053 @type vm_capable: boolean
2054 @ivar vm_capable: whether the node can host instances
2057 def __init__(self, offline=False, name=None, vm_capable=True):
2066 self.offline = offline
2067 self.vm_capable = vm_capable
2068 self.rpc_fail = False
2069 self.lvm_fail = False
2070 self.hyp_fail = False
2072 self.os_fail = False
2075 def ExpandNames(self):
2076 # This raises errors.OpPrereqError on its own:
2077 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2079 # Get instances in node group; this is unsafe and needs verification later
2081 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2083 self.needed_locks = {
2084 locking.LEVEL_INSTANCE: inst_names,
2085 locking.LEVEL_NODEGROUP: [self.group_uuid],
2086 locking.LEVEL_NODE: [],
2089 self.share_locks = _ShareAll()
2091 def DeclareLocks(self, level):
2092 if level == locking.LEVEL_NODE:
2093 # Get members of node group; this is unsafe and needs verification later
2094 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2096 all_inst_info = self.cfg.GetAllInstancesInfo()
2098 # In Exec(), we warn about mirrored instances that have primary and
2099 # secondary living in separate node groups. To fully verify that
2100 # volumes for these instances are healthy, we will need to do an
2101 # extra call to their secondaries. We ensure here those nodes will
2103 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2104 # Important: access only the instances whose lock is owned
2105 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2106 nodes.update(all_inst_info[inst].secondary_nodes)
2108 self.needed_locks[locking.LEVEL_NODE] = nodes
2110 def CheckPrereq(self):
2111 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2112 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2114 group_nodes = set(self.group_info.members)
2116 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2119 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2121 unlocked_instances = \
2122 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2125 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2126 utils.CommaJoin(unlocked_nodes),
2129 if unlocked_instances:
2130 raise errors.OpPrereqError("Missing lock for instances: %s" %
2131 utils.CommaJoin(unlocked_instances),
2134 self.all_node_info = self.cfg.GetAllNodesInfo()
2135 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2137 self.my_node_names = utils.NiceSort(group_nodes)
2138 self.my_inst_names = utils.NiceSort(group_instances)
2140 self.my_node_info = dict((name, self.all_node_info[name])
2141 for name in self.my_node_names)
2143 self.my_inst_info = dict((name, self.all_inst_info[name])
2144 for name in self.my_inst_names)
2146 # We detect here the nodes that will need the extra RPC calls for verifying
2147 # split LV volumes; they should be locked.
2148 extra_lv_nodes = set()
2150 for inst in self.my_inst_info.values():
2151 if inst.disk_template in constants.DTS_INT_MIRROR:
2152 for nname in inst.all_nodes:
2153 if self.all_node_info[nname].group != self.group_uuid:
2154 extra_lv_nodes.add(nname)
2156 unlocked_lv_nodes = \
2157 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2159 if unlocked_lv_nodes:
2160 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2161 utils.CommaJoin(unlocked_lv_nodes),
2163 self.extra_lv_nodes = list(extra_lv_nodes)
2165 def _VerifyNode(self, ninfo, nresult):
2166 """Perform some basic validation on data returned from a node.
2168 - check the result data structure is well formed and has all the
2170 - check ganeti version
2172 @type ninfo: L{objects.Node}
2173 @param ninfo: the node to check
2174 @param nresult: the results from the node
2176 @return: whether overall this call was successful (and we can expect
2177 reasonable values in the respose)
2181 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2183 # main result, nresult should be a non-empty dict
2184 test = not nresult or not isinstance(nresult, dict)
2185 _ErrorIf(test, constants.CV_ENODERPC, node,
2186 "unable to verify node: no data returned")
2190 # compares ganeti version
2191 local_version = constants.PROTOCOL_VERSION
2192 remote_version = nresult.get("version", None)
2193 test = not (remote_version and
2194 isinstance(remote_version, (list, tuple)) and
2195 len(remote_version) == 2)
2196 _ErrorIf(test, constants.CV_ENODERPC, node,
2197 "connection to node returned invalid data")
2201 test = local_version != remote_version[0]
2202 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2203 "incompatible protocol versions: master %s,"
2204 " node %s", local_version, remote_version[0])
2208 # node seems compatible, we can actually try to look into its results
2210 # full package version
2211 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2212 constants.CV_ENODEVERSION, node,
2213 "software version mismatch: master %s, node %s",
2214 constants.RELEASE_VERSION, remote_version[1],
2215 code=self.ETYPE_WARNING)
2217 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2218 if ninfo.vm_capable and isinstance(hyp_result, dict):
2219 for hv_name, hv_result in hyp_result.iteritems():
2220 test = hv_result is not None
2221 _ErrorIf(test, constants.CV_ENODEHV, node,
2222 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2224 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2225 if ninfo.vm_capable and isinstance(hvp_result, list):
2226 for item, hv_name, hv_result in hvp_result:
2227 _ErrorIf(True, constants.CV_ENODEHV, node,
2228 "hypervisor %s parameter verify failure (source %s): %s",
2229 hv_name, item, hv_result)
2231 test = nresult.get(constants.NV_NODESETUP,
2232 ["Missing NODESETUP results"])
2233 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2238 def _VerifyNodeTime(self, ninfo, nresult,
2239 nvinfo_starttime, nvinfo_endtime):
2240 """Check the node time.
2242 @type ninfo: L{objects.Node}
2243 @param ninfo: the node to check
2244 @param nresult: the remote results for the node
2245 @param nvinfo_starttime: the start time of the RPC call
2246 @param nvinfo_endtime: the end time of the RPC call
2250 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2252 ntime = nresult.get(constants.NV_TIME, None)
2254 ntime_merged = utils.MergeTime(ntime)
2255 except (ValueError, TypeError):
2256 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2259 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2260 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2261 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2262 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2266 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2267 "Node time diverges by at least %s from master node time",
2270 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2271 """Check the node LVM results.
2273 @type ninfo: L{objects.Node}
2274 @param ninfo: the node to check
2275 @param nresult: the remote results for the node
2276 @param vg_name: the configured VG name
2283 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2285 # checks vg existence and size > 20G
2286 vglist = nresult.get(constants.NV_VGLIST, None)
2288 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2290 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2291 constants.MIN_VG_SIZE)
2292 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2295 pvlist = nresult.get(constants.NV_PVLIST, None)
2296 test = pvlist is None
2297 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2299 # check that ':' is not present in PV names, since it's a
2300 # special character for lvcreate (denotes the range of PEs to
2302 for _, pvname, owner_vg in pvlist:
2303 test = ":" in pvname
2304 _ErrorIf(test, constants.CV_ENODELVM, node,
2305 "Invalid character ':' in PV '%s' of VG '%s'",
2308 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2309 """Check the node bridges.
2311 @type ninfo: L{objects.Node}
2312 @param ninfo: the node to check
2313 @param nresult: the remote results for the node
2314 @param bridges: the expected list of bridges
2321 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2323 missing = nresult.get(constants.NV_BRIDGES, None)
2324 test = not isinstance(missing, list)
2325 _ErrorIf(test, constants.CV_ENODENET, node,
2326 "did not return valid bridge information")
2328 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2329 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2331 def _VerifyNodeUserScripts(self, ninfo, nresult):
2332 """Check the results of user scripts presence and executability on the node
2334 @type ninfo: L{objects.Node}
2335 @param ninfo: the node to check
2336 @param nresult: the remote results for the node
2341 test = not constants.NV_USERSCRIPTS in nresult
2342 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2343 "did not return user scripts information")
2345 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2347 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2348 "user scripts not present or not executable: %s" %
2349 utils.CommaJoin(sorted(broken_scripts)))
2351 def _VerifyNodeNetwork(self, ninfo, nresult):
2352 """Check the node network connectivity results.
2354 @type ninfo: L{objects.Node}
2355 @param ninfo: the node to check
2356 @param nresult: the remote results for the node
2360 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2362 test = constants.NV_NODELIST not in nresult
2363 _ErrorIf(test, constants.CV_ENODESSH, node,
2364 "node hasn't returned node ssh connectivity data")
2366 if nresult[constants.NV_NODELIST]:
2367 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2368 _ErrorIf(True, constants.CV_ENODESSH, node,
2369 "ssh communication with node '%s': %s", a_node, a_msg)
2371 test = constants.NV_NODENETTEST not in nresult
2372 _ErrorIf(test, constants.CV_ENODENET, node,
2373 "node hasn't returned node tcp connectivity data")
2375 if nresult[constants.NV_NODENETTEST]:
2376 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2378 _ErrorIf(True, constants.CV_ENODENET, node,
2379 "tcp communication with node '%s': %s",
2380 anode, nresult[constants.NV_NODENETTEST][anode])
2382 test = constants.NV_MASTERIP not in nresult
2383 _ErrorIf(test, constants.CV_ENODENET, node,
2384 "node hasn't returned node master IP reachability data")
2386 if not nresult[constants.NV_MASTERIP]:
2387 if node == self.master_node:
2388 msg = "the master node cannot reach the master IP (not configured?)"
2390 msg = "cannot reach the master IP"
2391 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2393 def _VerifyInstance(self, instance, instanceconfig, node_image,
2395 """Verify an instance.
2397 This function checks to see if the required block devices are
2398 available on the instance's node.
2401 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2402 node_current = instanceconfig.primary_node
2404 node_vol_should = {}
2405 instanceconfig.MapLVsByNode(node_vol_should)
2407 cluster = self.cfg.GetClusterInfo()
2408 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2410 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2411 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2413 for node in node_vol_should:
2414 n_img = node_image[node]
2415 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2416 # ignore missing volumes on offline or broken nodes
2418 for volume in node_vol_should[node]:
2419 test = volume not in n_img.volumes
2420 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2421 "volume %s missing on node %s", volume, node)
2423 if instanceconfig.admin_state == constants.ADMINST_UP:
2424 pri_img = node_image[node_current]
2425 test = instance not in pri_img.instances and not pri_img.offline
2426 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2427 "instance not running on its primary node %s",
2430 diskdata = [(nname, success, status, idx)
2431 for (nname, disks) in diskstatus.items()
2432 for idx, (success, status) in enumerate(disks)]
2434 for nname, success, bdev_status, idx in diskdata:
2435 # the 'ghost node' construction in Exec() ensures that we have a
2437 snode = node_image[nname]
2438 bad_snode = snode.ghost or snode.offline
2439 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2440 not success and not bad_snode,
2441 constants.CV_EINSTANCEFAULTYDISK, instance,
2442 "couldn't retrieve status for disk/%s on %s: %s",
2443 idx, nname, bdev_status)
2444 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2445 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2446 constants.CV_EINSTANCEFAULTYDISK, instance,
2447 "disk/%s on %s is faulty", idx, nname)
2449 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2450 """Verify if there are any unknown volumes in the cluster.
2452 The .os, .swap and backup volumes are ignored. All other volumes are
2453 reported as unknown.
2455 @type reserved: L{ganeti.utils.FieldSet}
2456 @param reserved: a FieldSet of reserved volume names
2459 for node, n_img in node_image.items():
2460 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2461 self.all_node_info[node].group != self.group_uuid):
2462 # skip non-healthy nodes
2464 for volume in n_img.volumes:
2465 test = ((node not in node_vol_should or
2466 volume not in node_vol_should[node]) and
2467 not reserved.Matches(volume))
2468 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2469 "volume %s is unknown", volume)
2471 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2472 """Verify N+1 Memory Resilience.
2474 Check that if one single node dies we can still start all the
2475 instances it was primary for.
2478 cluster_info = self.cfg.GetClusterInfo()
2479 for node, n_img in node_image.items():
2480 # This code checks that every node which is now listed as
2481 # secondary has enough memory to host all instances it is
2482 # supposed to should a single other node in the cluster fail.
2483 # FIXME: not ready for failover to an arbitrary node
2484 # FIXME: does not support file-backed instances
2485 # WARNING: we currently take into account down instances as well
2486 # as up ones, considering that even if they're down someone
2487 # might want to start them even in the event of a node failure.
2488 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2489 # we're skipping nodes marked offline and nodes in other groups from
2490 # the N+1 warning, since most likely we don't have good memory
2491 # infromation from them; we already list instances living on such
2492 # nodes, and that's enough warning
2494 #TODO(dynmem): also consider ballooning out other instances
2495 for prinode, instances in n_img.sbp.items():
2497 for instance in instances:
2498 bep = cluster_info.FillBE(instance_cfg[instance])
2499 if bep[constants.BE_AUTO_BALANCE]:
2500 needed_mem += bep[constants.BE_MINMEM]
2501 test = n_img.mfree < needed_mem
2502 self._ErrorIf(test, constants.CV_ENODEN1, node,
2503 "not enough memory to accomodate instance failovers"
2504 " should node %s fail (%dMiB needed, %dMiB available)",
2505 prinode, needed_mem, n_img.mfree)
2508 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2509 (files_all, files_opt, files_mc, files_vm)):
2510 """Verifies file checksums collected from all nodes.
2512 @param errorif: Callback for reporting errors
2513 @param nodeinfo: List of L{objects.Node} objects
2514 @param master_node: Name of master node
2515 @param all_nvinfo: RPC results
2518 # Define functions determining which nodes to consider for a file
2521 (files_mc, lambda node: (node.master_candidate or
2522 node.name == master_node)),
2523 (files_vm, lambda node: node.vm_capable),
2526 # Build mapping from filename to list of nodes which should have the file
2528 for (files, fn) in files2nodefn:
2530 filenodes = nodeinfo
2532 filenodes = filter(fn, nodeinfo)
2533 nodefiles.update((filename,
2534 frozenset(map(operator.attrgetter("name"), filenodes)))
2535 for filename in files)
2537 assert set(nodefiles) == (files_all | files_mc | files_vm)
2539 fileinfo = dict((filename, {}) for filename in nodefiles)
2540 ignore_nodes = set()
2542 for node in nodeinfo:
2544 ignore_nodes.add(node.name)
2547 nresult = all_nvinfo[node.name]
2549 if nresult.fail_msg or not nresult.payload:
2552 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2554 test = not (node_files and isinstance(node_files, dict))
2555 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2556 "Node did not return file checksum data")
2558 ignore_nodes.add(node.name)
2561 # Build per-checksum mapping from filename to nodes having it
2562 for (filename, checksum) in node_files.items():
2563 assert filename in nodefiles
2564 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2566 for (filename, checksums) in fileinfo.items():
2567 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2569 # Nodes having the file
2570 with_file = frozenset(node_name
2571 for nodes in fileinfo[filename].values()
2572 for node_name in nodes) - ignore_nodes
2574 expected_nodes = nodefiles[filename] - ignore_nodes
2576 # Nodes missing file
2577 missing_file = expected_nodes - with_file
2579 if filename in files_opt:
2581 errorif(missing_file and missing_file != expected_nodes,
2582 constants.CV_ECLUSTERFILECHECK, None,
2583 "File %s is optional, but it must exist on all or no"
2584 " nodes (not found on %s)",
2585 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2587 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2588 "File %s is missing from node(s) %s", filename,
2589 utils.CommaJoin(utils.NiceSort(missing_file)))
2591 # Warn if a node has a file it shouldn't
2592 unexpected = with_file - expected_nodes
2594 constants.CV_ECLUSTERFILECHECK, None,
2595 "File %s should not exist on node(s) %s",
2596 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2598 # See if there are multiple versions of the file
2599 test = len(checksums) > 1
2601 variants = ["variant %s on %s" %
2602 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2603 for (idx, (checksum, nodes)) in
2604 enumerate(sorted(checksums.items()))]
2608 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2609 "File %s found with %s different checksums (%s)",
2610 filename, len(checksums), "; ".join(variants))
2612 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2614 """Verifies and the node DRBD status.
2616 @type ninfo: L{objects.Node}
2617 @param ninfo: the node to check
2618 @param nresult: the remote results for the node
2619 @param instanceinfo: the dict of instances
2620 @param drbd_helper: the configured DRBD usermode helper
2621 @param drbd_map: the DRBD map as returned by
2622 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2626 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2629 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2630 test = (helper_result is None)
2631 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2632 "no drbd usermode helper returned")
2634 status, payload = helper_result
2636 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2637 "drbd usermode helper check unsuccessful: %s", payload)
2638 test = status and (payload != drbd_helper)
2639 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2640 "wrong drbd usermode helper: %s", payload)
2642 # compute the DRBD minors
2644 for minor, instance in drbd_map[node].items():
2645 test = instance not in instanceinfo
2646 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2647 "ghost instance '%s' in temporary DRBD map", instance)
2648 # ghost instance should not be running, but otherwise we
2649 # don't give double warnings (both ghost instance and
2650 # unallocated minor in use)
2652 node_drbd[minor] = (instance, False)
2654 instance = instanceinfo[instance]
2655 node_drbd[minor] = (instance.name,
2656 instance.admin_state == constants.ADMINST_UP)
2658 # and now check them
2659 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2660 test = not isinstance(used_minors, (tuple, list))
2661 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2662 "cannot parse drbd status file: %s", str(used_minors))
2664 # we cannot check drbd status
2667 for minor, (iname, must_exist) in node_drbd.items():
2668 test = minor not in used_minors and must_exist
2669 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2670 "drbd minor %d of instance %s is not active", minor, iname)
2671 for minor in used_minors:
2672 test = minor not in node_drbd
2673 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2674 "unallocated drbd minor %d is in use", minor)
2676 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2677 """Builds the node OS structures.
2679 @type ninfo: L{objects.Node}
2680 @param ninfo: the node to check
2681 @param nresult: the remote results for the node
2682 @param nimg: the node image object
2686 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2688 remote_os = nresult.get(constants.NV_OSLIST, None)
2689 test = (not isinstance(remote_os, list) or
2690 not compat.all(isinstance(v, list) and len(v) == 7
2691 for v in remote_os))
2693 _ErrorIf(test, constants.CV_ENODEOS, node,
2694 "node hasn't returned valid OS data")
2703 for (name, os_path, status, diagnose,
2704 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2706 if name not in os_dict:
2709 # parameters is a list of lists instead of list of tuples due to
2710 # JSON lacking a real tuple type, fix it:
2711 parameters = [tuple(v) for v in parameters]
2712 os_dict[name].append((os_path, status, diagnose,
2713 set(variants), set(parameters), set(api_ver)))
2715 nimg.oslist = os_dict
2717 def _VerifyNodeOS(self, ninfo, nimg, base):
2718 """Verifies the node OS list.
2720 @type ninfo: L{objects.Node}
2721 @param ninfo: the node to check
2722 @param nimg: the node image object
2723 @param base: the 'template' node we match against (e.g. from the master)
2727 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2729 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2731 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2732 for os_name, os_data in nimg.oslist.items():
2733 assert os_data, "Empty OS status for OS %s?!" % os_name
2734 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2735 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2736 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2737 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2738 "OS '%s' has multiple entries (first one shadows the rest): %s",
2739 os_name, utils.CommaJoin([v[0] for v in os_data]))
2740 # comparisons with the 'base' image
2741 test = os_name not in base.oslist
2742 _ErrorIf(test, constants.CV_ENODEOS, node,
2743 "Extra OS %s not present on reference node (%s)",
2747 assert base.oslist[os_name], "Base node has empty OS status?"
2748 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2750 # base OS is invalid, skipping
2752 for kind, a, b in [("API version", f_api, b_api),
2753 ("variants list", f_var, b_var),
2754 ("parameters", beautify_params(f_param),
2755 beautify_params(b_param))]:
2756 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2757 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2758 kind, os_name, base.name,
2759 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2761 # check any missing OSes
2762 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2763 _ErrorIf(missing, constants.CV_ENODEOS, node,
2764 "OSes present on reference node %s but missing on this node: %s",
2765 base.name, utils.CommaJoin(missing))
2767 def _VerifyOob(self, ninfo, nresult):
2768 """Verifies out of band functionality of a node.
2770 @type ninfo: L{objects.Node}
2771 @param ninfo: the node to check
2772 @param nresult: the remote results for the node
2776 # We just have to verify the paths on master and/or master candidates
2777 # as the oob helper is invoked on the master
2778 if ((ninfo.master_candidate or ninfo.master_capable) and
2779 constants.NV_OOB_PATHS in nresult):
2780 for path_result in nresult[constants.NV_OOB_PATHS]:
2781 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2783 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2784 """Verifies and updates the node volume data.
2786 This function will update a L{NodeImage}'s internal structures
2787 with data from the remote call.
2789 @type ninfo: L{objects.Node}
2790 @param ninfo: the node to check
2791 @param nresult: the remote results for the node
2792 @param nimg: the node image object
2793 @param vg_name: the configured VG name
2797 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2799 nimg.lvm_fail = True
2800 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2803 elif isinstance(lvdata, basestring):
2804 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2805 utils.SafeEncode(lvdata))
2806 elif not isinstance(lvdata, dict):
2807 _ErrorIf(True, constants.CV_ENODELVM, node,
2808 "rpc call to node failed (lvlist)")
2810 nimg.volumes = lvdata
2811 nimg.lvm_fail = False
2813 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2814 """Verifies and updates the node instance list.
2816 If the listing was successful, then updates this node's instance
2817 list. Otherwise, it marks the RPC call as failed for the instance
2820 @type ninfo: L{objects.Node}
2821 @param ninfo: the node to check
2822 @param nresult: the remote results for the node
2823 @param nimg: the node image object
2826 idata = nresult.get(constants.NV_INSTANCELIST, None)
2827 test = not isinstance(idata, list)
2828 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2829 "rpc call to node failed (instancelist): %s",
2830 utils.SafeEncode(str(idata)))
2832 nimg.hyp_fail = True
2834 nimg.instances = idata
2836 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2837 """Verifies and computes a node information map
2839 @type ninfo: L{objects.Node}
2840 @param ninfo: the node to check
2841 @param nresult: the remote results for the node
2842 @param nimg: the node image object
2843 @param vg_name: the configured VG name
2847 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2849 # try to read free memory (from the hypervisor)
2850 hv_info = nresult.get(constants.NV_HVINFO, None)
2851 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2852 _ErrorIf(test, constants.CV_ENODEHV, node,
2853 "rpc call to node failed (hvinfo)")
2856 nimg.mfree = int(hv_info["memory_free"])
2857 except (ValueError, TypeError):
2858 _ErrorIf(True, constants.CV_ENODERPC, node,
2859 "node returned invalid nodeinfo, check hypervisor")
2861 # FIXME: devise a free space model for file based instances as well
2862 if vg_name is not None:
2863 test = (constants.NV_VGLIST not in nresult or
2864 vg_name not in nresult[constants.NV_VGLIST])
2865 _ErrorIf(test, constants.CV_ENODELVM, node,
2866 "node didn't return data for the volume group '%s'"
2867 " - it is either missing or broken", vg_name)
2870 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2871 except (ValueError, TypeError):
2872 _ErrorIf(True, constants.CV_ENODERPC, node,
2873 "node returned invalid LVM info, check LVM status")
2875 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2876 """Gets per-disk status information for all instances.
2878 @type nodelist: list of strings
2879 @param nodelist: Node names
2880 @type node_image: dict of (name, L{objects.Node})
2881 @param node_image: Node objects
2882 @type instanceinfo: dict of (name, L{objects.Instance})
2883 @param instanceinfo: Instance objects
2884 @rtype: {instance: {node: [(succes, payload)]}}
2885 @return: a dictionary of per-instance dictionaries with nodes as
2886 keys and disk information as values; the disk information is a
2887 list of tuples (success, payload)
2890 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2893 node_disks_devonly = {}
2894 diskless_instances = set()
2895 diskless = constants.DT_DISKLESS
2897 for nname in nodelist:
2898 node_instances = list(itertools.chain(node_image[nname].pinst,
2899 node_image[nname].sinst))
2900 diskless_instances.update(inst for inst in node_instances
2901 if instanceinfo[inst].disk_template == diskless)
2902 disks = [(inst, disk)
2903 for inst in node_instances
2904 for disk in instanceinfo[inst].disks]
2907 # No need to collect data
2910 node_disks[nname] = disks
2912 # _AnnotateDiskParams makes already copies of the disks
2914 for (inst, dev) in disks:
2915 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2916 self.cfg.SetDiskID(anno_disk, nname)
2917 devonly.append(anno_disk)
2919 node_disks_devonly[nname] = devonly
2921 assert len(node_disks) == len(node_disks_devonly)
2923 # Collect data from all nodes with disks
2924 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2927 assert len(result) == len(node_disks)
2931 for (nname, nres) in result.items():
2932 disks = node_disks[nname]
2935 # No data from this node
2936 data = len(disks) * [(False, "node offline")]
2939 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2940 "while getting disk information: %s", msg)
2942 # No data from this node
2943 data = len(disks) * [(False, msg)]
2946 for idx, i in enumerate(nres.payload):
2947 if isinstance(i, (tuple, list)) and len(i) == 2:
2950 logging.warning("Invalid result from node %s, entry %d: %s",
2952 data.append((False, "Invalid result from the remote node"))
2954 for ((inst, _), status) in zip(disks, data):
2955 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2957 # Add empty entries for diskless instances.
2958 for inst in diskless_instances:
2959 assert inst not in instdisk
2962 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2963 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2964 compat.all(isinstance(s, (tuple, list)) and
2965 len(s) == 2 for s in statuses)
2966 for inst, nnames in instdisk.items()
2967 for nname, statuses in nnames.items())
2968 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2973 def _SshNodeSelector(group_uuid, all_nodes):
2974 """Create endless iterators for all potential SSH check hosts.
2977 nodes = [node for node in all_nodes
2978 if (node.group != group_uuid and
2980 keyfunc = operator.attrgetter("group")
2982 return map(itertools.cycle,
2983 [sorted(map(operator.attrgetter("name"), names))
2984 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2988 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2989 """Choose which nodes should talk to which other nodes.
2991 We will make nodes contact all nodes in their group, and one node from
2994 @warning: This algorithm has a known issue if one node group is much
2995 smaller than others (e.g. just one node). In such a case all other
2996 nodes will talk to the single node.
2999 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3000 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3002 return (online_nodes,
3003 dict((name, sorted([i.next() for i in sel]))
3004 for name in online_nodes))
3006 def BuildHooksEnv(self):
3009 Cluster-Verify hooks just ran in the post phase and their failure makes
3010 the output be logged in the verify output and the verification to fail.
3014 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3017 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3018 for node in self.my_node_info.values())
3022 def BuildHooksNodes(self):
3023 """Build hooks nodes.
3026 return ([], self.my_node_names)
3028 def Exec(self, feedback_fn):
3029 """Verify integrity of the node group, performing various test on nodes.
3032 # This method has too many local variables. pylint: disable=R0914
3033 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3035 if not self.my_node_names:
3037 feedback_fn("* Empty node group, skipping verification")
3041 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3042 verbose = self.op.verbose
3043 self._feedback_fn = feedback_fn
3045 vg_name = self.cfg.GetVGName()
3046 drbd_helper = self.cfg.GetDRBDHelper()
3047 cluster = self.cfg.GetClusterInfo()
3048 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3049 hypervisors = cluster.enabled_hypervisors
3050 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3052 i_non_redundant = [] # Non redundant instances
3053 i_non_a_balanced = [] # Non auto-balanced instances
3054 i_offline = 0 # Count of offline instances
3055 n_offline = 0 # Count of offline nodes
3056 n_drained = 0 # Count of nodes being drained
3057 node_vol_should = {}
3059 # FIXME: verify OS list
3062 filemap = _ComputeAncillaryFiles(cluster, False)
3064 # do local checksums
3065 master_node = self.master_node = self.cfg.GetMasterNode()
3066 master_ip = self.cfg.GetMasterIP()
3068 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3071 if self.cfg.GetUseExternalMipScript():
3072 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3074 node_verify_param = {
3075 constants.NV_FILELIST:
3076 utils.UniqueSequence(filename
3077 for files in filemap
3078 for filename in files),
3079 constants.NV_NODELIST:
3080 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3081 self.all_node_info.values()),
3082 constants.NV_HYPERVISOR: hypervisors,
3083 constants.NV_HVPARAMS:
3084 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3085 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3086 for node in node_data_list
3087 if not node.offline],
3088 constants.NV_INSTANCELIST: hypervisors,
3089 constants.NV_VERSION: None,
3090 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3091 constants.NV_NODESETUP: None,
3092 constants.NV_TIME: None,
3093 constants.NV_MASTERIP: (master_node, master_ip),
3094 constants.NV_OSLIST: None,
3095 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3096 constants.NV_USERSCRIPTS: user_scripts,
3099 if vg_name is not None:
3100 node_verify_param[constants.NV_VGLIST] = None
3101 node_verify_param[constants.NV_LVLIST] = vg_name
3102 node_verify_param[constants.NV_PVLIST] = [vg_name]
3103 node_verify_param[constants.NV_DRBDLIST] = None
3106 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3109 # FIXME: this needs to be changed per node-group, not cluster-wide
3111 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3112 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3113 bridges.add(default_nicpp[constants.NIC_LINK])
3114 for instance in self.my_inst_info.values():
3115 for nic in instance.nics:
3116 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3117 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3118 bridges.add(full_nic[constants.NIC_LINK])
3121 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3123 # Build our expected cluster state
3124 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3126 vm_capable=node.vm_capable))
3127 for node in node_data_list)
3131 for node in self.all_node_info.values():
3132 path = _SupportsOob(self.cfg, node)
3133 if path and path not in oob_paths:
3134 oob_paths.append(path)
3137 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3139 for instance in self.my_inst_names:
3140 inst_config = self.my_inst_info[instance]
3141 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3144 for nname in inst_config.all_nodes:
3145 if nname not in node_image:
3146 gnode = self.NodeImage(name=nname)
3147 gnode.ghost = (nname not in self.all_node_info)
3148 node_image[nname] = gnode
3150 inst_config.MapLVsByNode(node_vol_should)
3152 pnode = inst_config.primary_node
3153 node_image[pnode].pinst.append(instance)
3155 for snode in inst_config.secondary_nodes:
3156 nimg = node_image[snode]
3157 nimg.sinst.append(instance)
3158 if pnode not in nimg.sbp:
3159 nimg.sbp[pnode] = []
3160 nimg.sbp[pnode].append(instance)
3162 # At this point, we have the in-memory data structures complete,
3163 # except for the runtime information, which we'll gather next
3165 # Due to the way our RPC system works, exact response times cannot be
3166 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3167 # time before and after executing the request, we can at least have a time
3169 nvinfo_starttime = time.time()
3170 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3172 self.cfg.GetClusterName())
3173 nvinfo_endtime = time.time()
3175 if self.extra_lv_nodes and vg_name is not None:
3177 self.rpc.call_node_verify(self.extra_lv_nodes,
3178 {constants.NV_LVLIST: vg_name},
3179 self.cfg.GetClusterName())
3181 extra_lv_nvinfo = {}
3183 all_drbd_map = self.cfg.ComputeDRBDMap()
3185 feedback_fn("* Gathering disk information (%s nodes)" %
3186 len(self.my_node_names))
3187 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3190 feedback_fn("* Verifying configuration file consistency")
3192 # If not all nodes are being checked, we need to make sure the master node
3193 # and a non-checked vm_capable node are in the list.
3194 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3196 vf_nvinfo = all_nvinfo.copy()
3197 vf_node_info = list(self.my_node_info.values())
3198 additional_nodes = []
3199 if master_node not in self.my_node_info:
3200 additional_nodes.append(master_node)
3201 vf_node_info.append(self.all_node_info[master_node])
3202 # Add the first vm_capable node we find which is not included,
3203 # excluding the master node (which we already have)
3204 for node in absent_nodes:
3205 nodeinfo = self.all_node_info[node]
3206 if (nodeinfo.vm_capable and not nodeinfo.offline and
3207 node != master_node):
3208 additional_nodes.append(node)
3209 vf_node_info.append(self.all_node_info[node])
3211 key = constants.NV_FILELIST
3212 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3213 {key: node_verify_param[key]},
3214 self.cfg.GetClusterName()))
3216 vf_nvinfo = all_nvinfo
3217 vf_node_info = self.my_node_info.values()
3219 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3221 feedback_fn("* Verifying node status")
3225 for node_i in node_data_list:
3227 nimg = node_image[node]
3231 feedback_fn("* Skipping offline node %s" % (node,))
3235 if node == master_node:
3237 elif node_i.master_candidate:
3238 ntype = "master candidate"
3239 elif node_i.drained:
3245 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3247 msg = all_nvinfo[node].fail_msg
3248 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3251 nimg.rpc_fail = True
3254 nresult = all_nvinfo[node].payload
3256 nimg.call_ok = self._VerifyNode(node_i, nresult)
3257 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3258 self._VerifyNodeNetwork(node_i, nresult)
3259 self._VerifyNodeUserScripts(node_i, nresult)
3260 self._VerifyOob(node_i, nresult)
3263 self._VerifyNodeLVM(node_i, nresult, vg_name)
3264 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3267 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3268 self._UpdateNodeInstances(node_i, nresult, nimg)
3269 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3270 self._UpdateNodeOS(node_i, nresult, nimg)
3272 if not nimg.os_fail:
3273 if refos_img is None:
3275 self._VerifyNodeOS(node_i, nimg, refos_img)
3276 self._VerifyNodeBridges(node_i, nresult, bridges)
3278 # Check whether all running instancies are primary for the node. (This
3279 # can no longer be done from _VerifyInstance below, since some of the
3280 # wrong instances could be from other node groups.)
3281 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3283 for inst in non_primary_inst:
3284 test = inst in self.all_inst_info
3285 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3286 "instance should not run on node %s", node_i.name)
3287 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3288 "node is running unknown instance %s", inst)
3290 for node, result in extra_lv_nvinfo.items():
3291 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3292 node_image[node], vg_name)
3294 feedback_fn("* Verifying instance status")
3295 for instance in self.my_inst_names:
3297 feedback_fn("* Verifying instance %s" % instance)
3298 inst_config = self.my_inst_info[instance]
3299 self._VerifyInstance(instance, inst_config, node_image,
3301 inst_nodes_offline = []
3303 pnode = inst_config.primary_node
3304 pnode_img = node_image[pnode]
3305 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3306 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3307 " primary node failed", instance)
3309 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3311 constants.CV_EINSTANCEBADNODE, instance,
3312 "instance is marked as running and lives on offline node %s",
3313 inst_config.primary_node)
3315 # If the instance is non-redundant we cannot survive losing its primary
3316 # node, so we are not N+1 compliant. On the other hand we have no disk
3317 # templates with more than one secondary so that situation is not well
3319 # FIXME: does not support file-backed instances
3320 if not inst_config.secondary_nodes:
3321 i_non_redundant.append(instance)
3323 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3324 constants.CV_EINSTANCELAYOUT,
3325 instance, "instance has multiple secondary nodes: %s",
3326 utils.CommaJoin(inst_config.secondary_nodes),
3327 code=self.ETYPE_WARNING)
3329 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3330 pnode = inst_config.primary_node
3331 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3332 instance_groups = {}
3334 for node in instance_nodes:
3335 instance_groups.setdefault(self.all_node_info[node].group,
3339 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3340 # Sort so that we always list the primary node first.
3341 for group, nodes in sorted(instance_groups.items(),
3342 key=lambda (_, nodes): pnode in nodes,
3345 self._ErrorIf(len(instance_groups) > 1,
3346 constants.CV_EINSTANCESPLITGROUPS,
3347 instance, "instance has primary and secondary nodes in"
3348 " different groups: %s", utils.CommaJoin(pretty_list),
3349 code=self.ETYPE_WARNING)
3351 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3352 i_non_a_balanced.append(instance)
3354 for snode in inst_config.secondary_nodes:
3355 s_img = node_image[snode]
3356 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3357 snode, "instance %s, connection to secondary node failed",
3361 inst_nodes_offline.append(snode)
3363 # warn that the instance lives on offline nodes
3364 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3365 "instance has offline secondary node(s) %s",
3366 utils.CommaJoin(inst_nodes_offline))
3367 # ... or ghost/non-vm_capable nodes
3368 for node in inst_config.all_nodes:
3369 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3370 instance, "instance lives on ghost node %s", node)
3371 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3372 instance, "instance lives on non-vm_capable node %s", node)
3374 feedback_fn("* Verifying orphan volumes")
3375 reserved = utils.FieldSet(*cluster.reserved_lvs)
3377 # We will get spurious "unknown volume" warnings if any node of this group
3378 # is secondary for an instance whose primary is in another group. To avoid
3379 # them, we find these instances and add their volumes to node_vol_should.
3380 for inst in self.all_inst_info.values():
3381 for secondary in inst.secondary_nodes:
3382 if (secondary in self.my_node_info
3383 and inst.name not in self.my_inst_info):
3384 inst.MapLVsByNode(node_vol_should)
3387 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3389 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3390 feedback_fn("* Verifying N+1 Memory redundancy")
3391 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3393 feedback_fn("* Other Notes")
3395 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3396 % len(i_non_redundant))
3398 if i_non_a_balanced:
3399 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3400 % len(i_non_a_balanced))
3403 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3406 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3409 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3413 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3414 """Analyze the post-hooks' result
3416 This method analyses the hook result, handles it, and sends some
3417 nicely-formatted feedback back to the user.
3419 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3420 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3421 @param hooks_results: the results of the multi-node hooks rpc call
3422 @param feedback_fn: function used send feedback back to the caller
3423 @param lu_result: previous Exec result
3424 @return: the new Exec result, based on the previous result
3428 # We only really run POST phase hooks, only for non-empty groups,
3429 # and are only interested in their results
3430 if not self.my_node_names:
3433 elif phase == constants.HOOKS_PHASE_POST:
3434 # Used to change hooks' output to proper indentation
3435 feedback_fn("* Hooks Results")
3436 assert hooks_results, "invalid result from hooks"
3438 for node_name in hooks_results:
3439 res = hooks_results[node_name]
3441 test = msg and not res.offline
3442 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3443 "Communication failure in hooks execution: %s", msg)
3444 if res.offline or msg:
3445 # No need to investigate payload if node is offline or gave
3448 for script, hkr, output in res.payload:
3449 test = hkr == constants.HKR_FAIL
3450 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3451 "Script %s failed, output:", script)
3453 output = self._HOOKS_INDENT_RE.sub(" ", output)
3454 feedback_fn("%s" % output)
3460 class LUClusterVerifyDisks(NoHooksLU):
3461 """Verifies the cluster disks status.
3466 def ExpandNames(self):
3467 self.share_locks = _ShareAll()
3468 self.needed_locks = {
3469 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3472 def Exec(self, feedback_fn):
3473 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3475 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3476 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3477 for group in group_names])
3480 class LUGroupVerifyDisks(NoHooksLU):
3481 """Verifies the status of all disks in a node group.
3486 def ExpandNames(self):
3487 # Raises errors.OpPrereqError on its own if group can't be found
3488 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3490 self.share_locks = _ShareAll()
3491 self.needed_locks = {
3492 locking.LEVEL_INSTANCE: [],
3493 locking.LEVEL_NODEGROUP: [],
3494 locking.LEVEL_NODE: [],
3497 def DeclareLocks(self, level):
3498 if level == locking.LEVEL_INSTANCE:
3499 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3501 # Lock instances optimistically, needs verification once node and group
3502 # locks have been acquired
3503 self.needed_locks[locking.LEVEL_INSTANCE] = \
3504 self.cfg.GetNodeGroupInstances(self.group_uuid)
3506 elif level == locking.LEVEL_NODEGROUP:
3507 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3509 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3510 set([self.group_uuid] +
3511 # Lock all groups used by instances optimistically; this requires
3512 # going via the node before it's locked, requiring verification
3515 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3516 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3518 elif level == locking.LEVEL_NODE:
3519 # This will only lock the nodes in the group to be verified which contain
3521 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3522 self._LockInstancesNodes()
3524 # Lock all nodes in group to be verified
3525 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3526 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3527 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3529 def CheckPrereq(self):
3530 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3531 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3532 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3534 assert self.group_uuid in owned_groups
3536 # Check if locked instances are still correct
3537 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3539 # Get instance information
3540 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3542 # Check if node groups for locked instances are still correct
3543 _CheckInstancesNodeGroups(self.cfg, self.instances,
3544 owned_groups, owned_nodes, self.group_uuid)
3546 def Exec(self, feedback_fn):
3547 """Verify integrity of cluster disks.
3549 @rtype: tuple of three items
3550 @return: a tuple of (dict of node-to-node_error, list of instances
3551 which need activate-disks, dict of instance: (node, volume) for
3556 res_instances = set()
3559 nv_dict = _MapInstanceDisksToNodes(
3560 [inst for inst in self.instances.values()
3561 if inst.admin_state == constants.ADMINST_UP])
3564 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3565 set(self.cfg.GetVmCapableNodeList()))
3567 node_lvs = self.rpc.call_lv_list(nodes, [])
3569 for (node, node_res) in node_lvs.items():
3570 if node_res.offline:
3573 msg = node_res.fail_msg
3575 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3576 res_nodes[node] = msg
3579 for lv_name, (_, _, lv_online) in node_res.payload.items():
3580 inst = nv_dict.pop((node, lv_name), None)
3581 if not (lv_online or inst is None):
3582 res_instances.add(inst)
3584 # any leftover items in nv_dict are missing LVs, let's arrange the data
3586 for key, inst in nv_dict.iteritems():
3587 res_missing.setdefault(inst, []).append(list(key))
3589 return (res_nodes, list(res_instances), res_missing)
3592 class LUClusterRepairDiskSizes(NoHooksLU):
3593 """Verifies the cluster disks sizes.
3598 def ExpandNames(self):
3599 if self.op.instances:
3600 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3601 self.needed_locks = {
3602 locking.LEVEL_NODE_RES: [],
3603 locking.LEVEL_INSTANCE: self.wanted_names,
3605 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3607 self.wanted_names = None
3608 self.needed_locks = {
3609 locking.LEVEL_NODE_RES: locking.ALL_SET,
3610 locking.LEVEL_INSTANCE: locking.ALL_SET,
3612 self.share_locks = {
3613 locking.LEVEL_NODE_RES: 1,
3614 locking.LEVEL_INSTANCE: 0,
3617 def DeclareLocks(self, level):
3618 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3619 self._LockInstancesNodes(primary_only=True, level=level)
3621 def CheckPrereq(self):
3622 """Check prerequisites.
3624 This only checks the optional instance list against the existing names.
3627 if self.wanted_names is None:
3628 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3630 self.wanted_instances = \
3631 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3633 def _EnsureChildSizes(self, disk):
3634 """Ensure children of the disk have the needed disk size.
3636 This is valid mainly for DRBD8 and fixes an issue where the
3637 children have smaller disk size.
3639 @param disk: an L{ganeti.objects.Disk} object
3642 if disk.dev_type == constants.LD_DRBD8:
3643 assert disk.children, "Empty children for DRBD8?"
3644 fchild = disk.children[0]
3645 mismatch = fchild.size < disk.size
3647 self.LogInfo("Child disk has size %d, parent %d, fixing",
3648 fchild.size, disk.size)
3649 fchild.size = disk.size
3651 # and we recurse on this child only, not on the metadev
3652 return self._EnsureChildSizes(fchild) or mismatch
3656 def Exec(self, feedback_fn):
3657 """Verify the size of cluster disks.
3660 # TODO: check child disks too
3661 # TODO: check differences in size between primary/secondary nodes
3663 for instance in self.wanted_instances:
3664 pnode = instance.primary_node
3665 if pnode not in per_node_disks:
3666 per_node_disks[pnode] = []
3667 for idx, disk in enumerate(instance.disks):
3668 per_node_disks[pnode].append((instance, idx, disk))
3670 assert not (frozenset(per_node_disks.keys()) -
3671 self.owned_locks(locking.LEVEL_NODE_RES)), \
3672 "Not owning correct locks"
3673 assert not self.owned_locks(locking.LEVEL_NODE)
3676 for node, dskl in per_node_disks.items():
3677 newl = [v[2].Copy() for v in dskl]
3679 self.cfg.SetDiskID(dsk, node)
3680 result = self.rpc.call_blockdev_getsize(node, newl)
3682 self.LogWarning("Failure in blockdev_getsize call to node"
3683 " %s, ignoring", node)
3685 if len(result.payload) != len(dskl):
3686 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3687 " result.payload=%s", node, len(dskl), result.payload)
3688 self.LogWarning("Invalid result from node %s, ignoring node results",
3691 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3693 self.LogWarning("Disk %d of instance %s did not return size"
3694 " information, ignoring", idx, instance.name)
3696 if not isinstance(size, (int, long)):
3697 self.LogWarning("Disk %d of instance %s did not return valid"
3698 " size information, ignoring", idx, instance.name)
3701 if size != disk.size:
3702 self.LogInfo("Disk %d of instance %s has mismatched size,"
3703 " correcting: recorded %d, actual %d", idx,
3704 instance.name, disk.size, size)
3706 self.cfg.Update(instance, feedback_fn)
3707 changed.append((instance.name, idx, size))
3708 if self._EnsureChildSizes(disk):
3709 self.cfg.Update(instance, feedback_fn)
3710 changed.append((instance.name, idx, disk.size))
3714 class LUClusterRename(LogicalUnit):
3715 """Rename the cluster.
3718 HPATH = "cluster-rename"
3719 HTYPE = constants.HTYPE_CLUSTER
3721 def BuildHooksEnv(self):
3726 "OP_TARGET": self.cfg.GetClusterName(),
3727 "NEW_NAME": self.op.name,
3730 def BuildHooksNodes(self):
3731 """Build hooks nodes.
3734 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3736 def CheckPrereq(self):
3737 """Verify that the passed name is a valid one.
3740 hostname = netutils.GetHostname(name=self.op.name,
3741 family=self.cfg.GetPrimaryIPFamily())
3743 new_name = hostname.name
3744 self.ip = new_ip = hostname.ip
3745 old_name = self.cfg.GetClusterName()
3746 old_ip = self.cfg.GetMasterIP()
3747 if new_name == old_name and new_ip == old_ip:
3748 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3749 " cluster has changed",
3751 if new_ip != old_ip:
3752 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3753 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3754 " reachable on the network" %
3755 new_ip, errors.ECODE_NOTUNIQUE)
3757 self.op.name = new_name
3759 def Exec(self, feedback_fn):
3760 """Rename the cluster.
3763 clustername = self.op.name
3766 # shutdown the master IP
3767 master_params = self.cfg.GetMasterNetworkParameters()
3768 ems = self.cfg.GetUseExternalMipScript()
3769 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3771 result.Raise("Could not disable the master role")
3774 cluster = self.cfg.GetClusterInfo()
3775 cluster.cluster_name = clustername
3776 cluster.master_ip = new_ip
3777 self.cfg.Update(cluster, feedback_fn)
3779 # update the known hosts file
3780 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3781 node_list = self.cfg.GetOnlineNodeList()
3783 node_list.remove(master_params.name)
3786 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3788 master_params.ip = new_ip
3789 result = self.rpc.call_node_activate_master_ip(master_params.name,
3791 msg = result.fail_msg
3793 self.LogWarning("Could not re-enable the master role on"
3794 " the master, please restart manually: %s", msg)
3799 def _ValidateNetmask(cfg, netmask):
3800 """Checks if a netmask is valid.
3802 @type cfg: L{config.ConfigWriter}
3803 @param cfg: The cluster configuration
3805 @param netmask: the netmask to be verified
3806 @raise errors.OpPrereqError: if the validation fails
3809 ip_family = cfg.GetPrimaryIPFamily()
3811 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3812 except errors.ProgrammerError:
3813 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3814 ip_family, errors.ECODE_INVAL)
3815 if not ipcls.ValidateNetmask(netmask):
3816 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3817 (netmask), errors.ECODE_INVAL)
3820 class LUClusterSetParams(LogicalUnit):
3821 """Change the parameters of the cluster.
3824 HPATH = "cluster-modify"
3825 HTYPE = constants.HTYPE_CLUSTER
3828 def CheckArguments(self):
3832 if self.op.uid_pool:
3833 uidpool.CheckUidPool(self.op.uid_pool)
3835 if self.op.add_uids:
3836 uidpool.CheckUidPool(self.op.add_uids)
3838 if self.op.remove_uids:
3839 uidpool.CheckUidPool(self.op.remove_uids)
3841 if self.op.master_netmask is not None:
3842 _ValidateNetmask(self.cfg, self.op.master_netmask)
3844 if self.op.diskparams:
3845 for dt_params in self.op.diskparams.values():
3846 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3848 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3849 except errors.OpPrereqError, err:
3850 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3853 def ExpandNames(self):
3854 # FIXME: in the future maybe other cluster params won't require checking on
3855 # all nodes to be modified.
3856 self.needed_locks = {
3857 locking.LEVEL_NODE: locking.ALL_SET,
3858 locking.LEVEL_INSTANCE: locking.ALL_SET,
3859 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3861 self.share_locks = {
3862 locking.LEVEL_NODE: 1,
3863 locking.LEVEL_INSTANCE: 1,
3864 locking.LEVEL_NODEGROUP: 1,
3867 def BuildHooksEnv(self):
3872 "OP_TARGET": self.cfg.GetClusterName(),
3873 "NEW_VG_NAME": self.op.vg_name,
3876 def BuildHooksNodes(self):
3877 """Build hooks nodes.
3880 mn = self.cfg.GetMasterNode()
3883 def CheckPrereq(self):
3884 """Check prerequisites.
3886 This checks whether the given params don't conflict and
3887 if the given volume group is valid.
3890 if self.op.vg_name is not None and not self.op.vg_name:
3891 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3892 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3893 " instances exist", errors.ECODE_INVAL)
3895 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3896 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3897 raise errors.OpPrereqError("Cannot disable drbd helper while"
3898 " drbd-based instances exist",
3901 node_list = self.owned_locks(locking.LEVEL_NODE)
3903 # if vg_name not None, checks given volume group on all nodes
3905 vglist = self.rpc.call_vg_list(node_list)
3906 for node in node_list:
3907 msg = vglist[node].fail_msg
3909 # ignoring down node
3910 self.LogWarning("Error while gathering data on node %s"
3911 " (ignoring node): %s", node, msg)
3913 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3915 constants.MIN_VG_SIZE)
3917 raise errors.OpPrereqError("Error on node '%s': %s" %
3918 (node, vgstatus), errors.ECODE_ENVIRON)
3920 if self.op.drbd_helper:
3921 # checks given drbd helper on all nodes
3922 helpers = self.rpc.call_drbd_helper(node_list)
3923 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3925 self.LogInfo("Not checking drbd helper on offline node %s", node)
3927 msg = helpers[node].fail_msg
3929 raise errors.OpPrereqError("Error checking drbd helper on node"
3930 " '%s': %s" % (node, msg),
3931 errors.ECODE_ENVIRON)
3932 node_helper = helpers[node].payload
3933 if node_helper != self.op.drbd_helper:
3934 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3935 (node, node_helper), errors.ECODE_ENVIRON)
3937 self.cluster = cluster = self.cfg.GetClusterInfo()
3938 # validate params changes
3939 if self.op.beparams:
3940 objects.UpgradeBeParams(self.op.beparams)
3941 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3942 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3944 if self.op.ndparams:
3945 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3946 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3948 # TODO: we need a more general way to handle resetting
3949 # cluster-level parameters to default values
3950 if self.new_ndparams["oob_program"] == "":
3951 self.new_ndparams["oob_program"] = \
3952 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3954 if self.op.hv_state:
3955 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3956 self.cluster.hv_state_static)
3957 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3958 for hv, values in new_hv_state.items())
3960 if self.op.disk_state:
3961 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3962 self.cluster.disk_state_static)
3963 self.new_disk_state = \
3964 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3965 for name, values in svalues.items()))
3966 for storage, svalues in new_disk_state.items())
3969 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3972 all_instances = self.cfg.GetAllInstancesInfo().values()
3974 for group in self.cfg.GetAllNodeGroupsInfo().values():
3975 instances = frozenset([inst for inst in all_instances
3976 if compat.any(node in group.members
3977 for node in inst.all_nodes)])
3978 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3979 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3980 new = _ComputeNewInstanceViolations(ipol,
3981 new_ipolicy, instances)
3983 violations.update(new)
3986 self.LogWarning("After the ipolicy change the following instances"
3987 " violate them: %s",
3988 utils.CommaJoin(utils.NiceSort(violations)))
3990 if self.op.nicparams:
3991 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3992 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3993 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3996 # check all instances for consistency
3997 for instance in self.cfg.GetAllInstancesInfo().values():
3998 for nic_idx, nic in enumerate(instance.nics):
3999 params_copy = copy.deepcopy(nic.nicparams)
4000 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4002 # check parameter syntax
4004 objects.NIC.CheckParameterSyntax(params_filled)
4005 except errors.ConfigurationError, err:
4006 nic_errors.append("Instance %s, nic/%d: %s" %
4007 (instance.name, nic_idx, err))
4009 # if we're moving instances to routed, check that they have an ip
4010 target_mode = params_filled[constants.NIC_MODE]
4011 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4012 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4013 " address" % (instance.name, nic_idx))
4015 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4016 "\n".join(nic_errors), errors.ECODE_INVAL)
4018 # hypervisor list/parameters
4019 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4020 if self.op.hvparams:
4021 for hv_name, hv_dict in self.op.hvparams.items():
4022 if hv_name not in self.new_hvparams:
4023 self.new_hvparams[hv_name] = hv_dict
4025 self.new_hvparams[hv_name].update(hv_dict)
4027 # disk template parameters
4028 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4029 if self.op.diskparams:
4030 for dt_name, dt_params in self.op.diskparams.items():
4031 if dt_name not in self.op.diskparams:
4032 self.new_diskparams[dt_name] = dt_params
4034 self.new_diskparams[dt_name].update(dt_params)
4036 # os hypervisor parameters
4037 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4039 for os_name, hvs in self.op.os_hvp.items():
4040 if os_name not in self.new_os_hvp:
4041 self.new_os_hvp[os_name] = hvs
4043 for hv_name, hv_dict in hvs.items():
4044 if hv_name not in self.new_os_hvp[os_name]:
4045 self.new_os_hvp[os_name][hv_name] = hv_dict
4047 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4050 self.new_osp = objects.FillDict(cluster.osparams, {})
4051 if self.op.osparams:
4052 for os_name, osp in self.op.osparams.items():
4053 if os_name not in self.new_osp:
4054 self.new_osp[os_name] = {}
4056 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4059 if not self.new_osp[os_name]:
4060 # we removed all parameters
4061 del self.new_osp[os_name]
4063 # check the parameter validity (remote check)
4064 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4065 os_name, self.new_osp[os_name])
4067 # changes to the hypervisor list
4068 if self.op.enabled_hypervisors is not None:
4069 self.hv_list = self.op.enabled_hypervisors
4070 for hv in self.hv_list:
4071 # if the hypervisor doesn't already exist in the cluster
4072 # hvparams, we initialize it to empty, and then (in both
4073 # cases) we make sure to fill the defaults, as we might not
4074 # have a complete defaults list if the hypervisor wasn't
4076 if hv not in new_hvp:
4078 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4079 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4081 self.hv_list = cluster.enabled_hypervisors
4083 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4084 # either the enabled list has changed, or the parameters have, validate
4085 for hv_name, hv_params in self.new_hvparams.items():
4086 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4087 (self.op.enabled_hypervisors and
4088 hv_name in self.op.enabled_hypervisors)):
4089 # either this is a new hypervisor, or its parameters have changed
4090 hv_class = hypervisor.GetHypervisor(hv_name)
4091 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4092 hv_class.CheckParameterSyntax(hv_params)
4093 _CheckHVParams(self, node_list, hv_name, hv_params)
4096 # no need to check any newly-enabled hypervisors, since the
4097 # defaults have already been checked in the above code-block
4098 for os_name, os_hvp in self.new_os_hvp.items():
4099 for hv_name, hv_params in os_hvp.items():
4100 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4101 # we need to fill in the new os_hvp on top of the actual hv_p
4102 cluster_defaults = self.new_hvparams.get(hv_name, {})
4103 new_osp = objects.FillDict(cluster_defaults, hv_params)
4104 hv_class = hypervisor.GetHypervisor(hv_name)
4105 hv_class.CheckParameterSyntax(new_osp)
4106 _CheckHVParams(self, node_list, hv_name, new_osp)
4108 if self.op.default_iallocator:
4109 alloc_script = utils.FindFile(self.op.default_iallocator,
4110 constants.IALLOCATOR_SEARCH_PATH,
4112 if alloc_script is None:
4113 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4114 " specified" % self.op.default_iallocator,
4117 def Exec(self, feedback_fn):
4118 """Change the parameters of the cluster.
4121 if self.op.vg_name is not None:
4122 new_volume = self.op.vg_name
4125 if new_volume != self.cfg.GetVGName():
4126 self.cfg.SetVGName(new_volume)
4128 feedback_fn("Cluster LVM configuration already in desired"
4129 " state, not changing")
4130 if self.op.drbd_helper is not None:
4131 new_helper = self.op.drbd_helper
4134 if new_helper != self.cfg.GetDRBDHelper():
4135 self.cfg.SetDRBDHelper(new_helper)
4137 feedback_fn("Cluster DRBD helper already in desired state,"
4139 if self.op.hvparams:
4140 self.cluster.hvparams = self.new_hvparams
4142 self.cluster.os_hvp = self.new_os_hvp
4143 if self.op.enabled_hypervisors is not None:
4144 self.cluster.hvparams = self.new_hvparams
4145 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4146 if self.op.beparams:
4147 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4148 if self.op.nicparams:
4149 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4151 self.cluster.ipolicy = self.new_ipolicy
4152 if self.op.osparams:
4153 self.cluster.osparams = self.new_osp
4154 if self.op.ndparams:
4155 self.cluster.ndparams = self.new_ndparams
4156 if self.op.diskparams:
4157 self.cluster.diskparams = self.new_diskparams
4158 if self.op.hv_state:
4159 self.cluster.hv_state_static = self.new_hv_state
4160 if self.op.disk_state:
4161 self.cluster.disk_state_static = self.new_disk_state
4163 if self.op.candidate_pool_size is not None:
4164 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4165 # we need to update the pool size here, otherwise the save will fail
4166 _AdjustCandidatePool(self, [])
4168 if self.op.maintain_node_health is not None:
4169 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4170 feedback_fn("Note: CONFD was disabled at build time, node health"
4171 " maintenance is not useful (still enabling it)")
4172 self.cluster.maintain_node_health = self.op.maintain_node_health
4174 if self.op.prealloc_wipe_disks is not None:
4175 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4177 if self.op.add_uids is not None:
4178 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4180 if self.op.remove_uids is not None:
4181 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4183 if self.op.uid_pool is not None:
4184 self.cluster.uid_pool = self.op.uid_pool
4186 if self.op.default_iallocator is not None:
4187 self.cluster.default_iallocator = self.op.default_iallocator
4189 if self.op.reserved_lvs is not None:
4190 self.cluster.reserved_lvs = self.op.reserved_lvs
4192 if self.op.use_external_mip_script is not None:
4193 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4195 def helper_os(aname, mods, desc):
4197 lst = getattr(self.cluster, aname)
4198 for key, val in mods:
4199 if key == constants.DDM_ADD:
4201 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4204 elif key == constants.DDM_REMOVE:
4208 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4210 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4212 if self.op.hidden_os:
4213 helper_os("hidden_os", self.op.hidden_os, "hidden")
4215 if self.op.blacklisted_os:
4216 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4218 if self.op.master_netdev:
4219 master_params = self.cfg.GetMasterNetworkParameters()
4220 ems = self.cfg.GetUseExternalMipScript()
4221 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4222 self.cluster.master_netdev)
4223 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4225 result.Raise("Could not disable the master ip")
4226 feedback_fn("Changing master_netdev from %s to %s" %
4227 (master_params.netdev, self.op.master_netdev))
4228 self.cluster.master_netdev = self.op.master_netdev
4230 if self.op.master_netmask:
4231 master_params = self.cfg.GetMasterNetworkParameters()
4232 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4233 result = self.rpc.call_node_change_master_netmask(master_params.name,
4234 master_params.netmask,
4235 self.op.master_netmask,
4237 master_params.netdev)
4239 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4242 self.cluster.master_netmask = self.op.master_netmask
4244 self.cfg.Update(self.cluster, feedback_fn)
4246 if self.op.master_netdev:
4247 master_params = self.cfg.GetMasterNetworkParameters()
4248 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4249 self.op.master_netdev)
4250 ems = self.cfg.GetUseExternalMipScript()
4251 result = self.rpc.call_node_activate_master_ip(master_params.name,
4254 self.LogWarning("Could not re-enable the master ip on"
4255 " the master, please restart manually: %s",
4259 def _UploadHelper(lu, nodes, fname):
4260 """Helper for uploading a file and showing warnings.
4263 if os.path.exists(fname):
4264 result = lu.rpc.call_upload_file(nodes, fname)
4265 for to_node, to_result in result.items():
4266 msg = to_result.fail_msg
4268 msg = ("Copy of file %s to node %s failed: %s" %
4269 (fname, to_node, msg))
4270 lu.proc.LogWarning(msg)
4273 def _ComputeAncillaryFiles(cluster, redist):
4274 """Compute files external to Ganeti which need to be consistent.
4276 @type redist: boolean
4277 @param redist: Whether to include files which need to be redistributed
4280 # Compute files for all nodes
4282 pathutils.SSH_KNOWN_HOSTS_FILE,
4283 pathutils.CONFD_HMAC_KEY,
4284 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4285 pathutils.SPICE_CERT_FILE,
4286 pathutils.SPICE_CACERT_FILE,
4287 pathutils.RAPI_USERS_FILE,
4291 files_all.update(pathutils.ALL_CERT_FILES)
4292 files_all.update(ssconf.SimpleStore().GetFileList())
4294 # we need to ship at least the RAPI certificate
4295 files_all.add(pathutils.RAPI_CERT_FILE)
4297 if cluster.modify_etc_hosts:
4298 files_all.add(constants.ETC_HOSTS)
4300 if cluster.use_external_mip_script:
4301 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4303 # Files which are optional, these must:
4304 # - be present in one other category as well
4305 # - either exist or not exist on all nodes of that category (mc, vm all)
4307 pathutils.RAPI_USERS_FILE,
4310 # Files which should only be on master candidates
4314 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4316 # Files which should only be on VM-capable nodes
4319 for hv_name in cluster.enabled_hypervisors
4320 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4324 for hv_name in cluster.enabled_hypervisors
4325 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4327 # Filenames in each category must be unique
4328 all_files_set = files_all | files_mc | files_vm
4329 assert (len(all_files_set) ==
4330 sum(map(len, [files_all, files_mc, files_vm]))), \
4331 "Found file listed in more than one file list"
4333 # Optional files must be present in one other category
4334 assert all_files_set.issuperset(files_opt), \
4335 "Optional file not in a different required list"
4337 return (files_all, files_opt, files_mc, files_vm)
4340 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4341 """Distribute additional files which are part of the cluster configuration.
4343 ConfigWriter takes care of distributing the config and ssconf files, but
4344 there are more files which should be distributed to all nodes. This function
4345 makes sure those are copied.
4347 @param lu: calling logical unit
4348 @param additional_nodes: list of nodes not in the config to distribute to
4349 @type additional_vm: boolean
4350 @param additional_vm: whether the additional nodes are vm-capable or not
4353 # Gather target nodes
4354 cluster = lu.cfg.GetClusterInfo()
4355 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4357 online_nodes = lu.cfg.GetOnlineNodeList()
4358 online_set = frozenset(online_nodes)
4359 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4361 if additional_nodes is not None:
4362 online_nodes.extend(additional_nodes)
4364 vm_nodes.extend(additional_nodes)
4366 # Never distribute to master node
4367 for nodelist in [online_nodes, vm_nodes]:
4368 if master_info.name in nodelist:
4369 nodelist.remove(master_info.name)
4372 (files_all, _, files_mc, files_vm) = \
4373 _ComputeAncillaryFiles(cluster, True)
4375 # Never re-distribute configuration file from here
4376 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4377 pathutils.CLUSTER_CONF_FILE in files_vm)
4378 assert not files_mc, "Master candidates not handled in this function"
4381 (online_nodes, files_all),
4382 (vm_nodes, files_vm),
4386 for (node_list, files) in filemap:
4388 _UploadHelper(lu, node_list, fname)
4391 class LUClusterRedistConf(NoHooksLU):
4392 """Force the redistribution of cluster configuration.
4394 This is a very simple LU.
4399 def ExpandNames(self):
4400 self.needed_locks = {
4401 locking.LEVEL_NODE: locking.ALL_SET,
4403 self.share_locks[locking.LEVEL_NODE] = 1
4405 def Exec(self, feedback_fn):
4406 """Redistribute the configuration.
4409 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4410 _RedistributeAncillaryFiles(self)
4413 class LUClusterActivateMasterIp(NoHooksLU):
4414 """Activate the master IP on the master node.
4417 def Exec(self, feedback_fn):
4418 """Activate the master IP.
4421 master_params = self.cfg.GetMasterNetworkParameters()
4422 ems = self.cfg.GetUseExternalMipScript()
4423 result = self.rpc.call_node_activate_master_ip(master_params.name,
4425 result.Raise("Could not activate the master IP")
4428 class LUClusterDeactivateMasterIp(NoHooksLU):
4429 """Deactivate the master IP on the master node.
4432 def Exec(self, feedback_fn):
4433 """Deactivate the master IP.
4436 master_params = self.cfg.GetMasterNetworkParameters()
4437 ems = self.cfg.GetUseExternalMipScript()
4438 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4440 result.Raise("Could not deactivate the master IP")
4443 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4444 """Sleep and poll for an instance's disk to sync.
4447 if not instance.disks or disks is not None and not disks:
4450 disks = _ExpandCheckDisks(instance, disks)
4453 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4455 node = instance.primary_node
4458 lu.cfg.SetDiskID(dev, node)
4460 # TODO: Convert to utils.Retry
4463 degr_retries = 10 # in seconds, as we sleep 1 second each time
4467 cumul_degraded = False
4468 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4469 msg = rstats.fail_msg
4471 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4474 raise errors.RemoteError("Can't contact node %s for mirror data,"
4475 " aborting." % node)
4478 rstats = rstats.payload
4480 for i, mstat in enumerate(rstats):
4482 lu.LogWarning("Can't compute data for node %s/%s",
4483 node, disks[i].iv_name)
4486 cumul_degraded = (cumul_degraded or
4487 (mstat.is_degraded and mstat.sync_percent is None))
4488 if mstat.sync_percent is not None:
4490 if mstat.estimated_time is not None:
4491 rem_time = ("%s remaining (estimated)" %
4492 utils.FormatSeconds(mstat.estimated_time))
4493 max_time = mstat.estimated_time
4495 rem_time = "no time estimate"
4496 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4497 (disks[i].iv_name, mstat.sync_percent, rem_time))
4499 # if we're done but degraded, let's do a few small retries, to
4500 # make sure we see a stable and not transient situation; therefore
4501 # we force restart of the loop
4502 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4503 logging.info("Degraded disks found, %d retries left", degr_retries)
4511 time.sleep(min(60, max_time))
4514 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4515 return not cumul_degraded
4518 def _BlockdevFind(lu, node, dev, instance):
4519 """Wrapper around call_blockdev_find to annotate diskparams.
4521 @param lu: A reference to the lu object
4522 @param node: The node to call out
4523 @param dev: The device to find
4524 @param instance: The instance object the device belongs to
4525 @returns The result of the rpc call
4528 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4529 return lu.rpc.call_blockdev_find(node, disk)
4532 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4533 """Wrapper around L{_CheckDiskConsistencyInner}.
4536 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4537 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4541 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4543 """Check that mirrors are not degraded.
4545 @attention: The device has to be annotated already.
4547 The ldisk parameter, if True, will change the test from the
4548 is_degraded attribute (which represents overall non-ok status for
4549 the device(s)) to the ldisk (representing the local storage status).
4552 lu.cfg.SetDiskID(dev, node)
4556 if on_primary or dev.AssembleOnSecondary():
4557 rstats = lu.rpc.call_blockdev_find(node, dev)
4558 msg = rstats.fail_msg
4560 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4562 elif not rstats.payload:
4563 lu.LogWarning("Can't find disk on node %s", node)
4567 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4569 result = result and not rstats.payload.is_degraded
4572 for child in dev.children:
4573 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4579 class LUOobCommand(NoHooksLU):
4580 """Logical unit for OOB handling.
4584 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4586 def ExpandNames(self):
4587 """Gather locks we need.
4590 if self.op.node_names:
4591 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4592 lock_names = self.op.node_names
4594 lock_names = locking.ALL_SET
4596 self.needed_locks = {
4597 locking.LEVEL_NODE: lock_names,
4600 def CheckPrereq(self):
4601 """Check prerequisites.
4604 - the node exists in the configuration
4607 Any errors are signaled by raising errors.OpPrereqError.
4611 self.master_node = self.cfg.GetMasterNode()
4613 assert self.op.power_delay >= 0.0
4615 if self.op.node_names:
4616 if (self.op.command in self._SKIP_MASTER and
4617 self.master_node in self.op.node_names):
4618 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4619 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4621 if master_oob_handler:
4622 additional_text = ("run '%s %s %s' if you want to operate on the"
4623 " master regardless") % (master_oob_handler,
4627 additional_text = "it does not support out-of-band operations"
4629 raise errors.OpPrereqError(("Operating on the master node %s is not"
4630 " allowed for %s; %s") %
4631 (self.master_node, self.op.command,
4632 additional_text), errors.ECODE_INVAL)
4634 self.op.node_names = self.cfg.GetNodeList()
4635 if self.op.command in self._SKIP_MASTER:
4636 self.op.node_names.remove(self.master_node)
4638 if self.op.command in self._SKIP_MASTER:
4639 assert self.master_node not in self.op.node_names
4641 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4643 raise errors.OpPrereqError("Node %s not found" % node_name,
4646 self.nodes.append(node)
4648 if (not self.op.ignore_status and
4649 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4650 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4651 " not marked offline") % node_name,
4654 def Exec(self, feedback_fn):
4655 """Execute OOB and return result if we expect any.
4658 master_node = self.master_node
4661 for idx, node in enumerate(utils.NiceSort(self.nodes,
4662 key=lambda node: node.name)):
4663 node_entry = [(constants.RS_NORMAL, node.name)]
4664 ret.append(node_entry)
4666 oob_program = _SupportsOob(self.cfg, node)
4669 node_entry.append((constants.RS_UNAVAIL, None))
4672 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4673 self.op.command, oob_program, node.name)
4674 result = self.rpc.call_run_oob(master_node, oob_program,
4675 self.op.command, node.name,
4679 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4680 node.name, result.fail_msg)
4681 node_entry.append((constants.RS_NODATA, None))
4684 self._CheckPayload(result)
4685 except errors.OpExecError, err:
4686 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4688 node_entry.append((constants.RS_NODATA, None))
4690 if self.op.command == constants.OOB_HEALTH:
4691 # For health we should log important events
4692 for item, status in result.payload:
4693 if status in [constants.OOB_STATUS_WARNING,
4694 constants.OOB_STATUS_CRITICAL]:
4695 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4696 item, node.name, status)
4698 if self.op.command == constants.OOB_POWER_ON:
4700 elif self.op.command == constants.OOB_POWER_OFF:
4701 node.powered = False
4702 elif self.op.command == constants.OOB_POWER_STATUS:
4703 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4704 if powered != node.powered:
4705 logging.warning(("Recorded power state (%s) of node '%s' does not"
4706 " match actual power state (%s)"), node.powered,
4709 # For configuration changing commands we should update the node
4710 if self.op.command in (constants.OOB_POWER_ON,
4711 constants.OOB_POWER_OFF):
4712 self.cfg.Update(node, feedback_fn)
4714 node_entry.append((constants.RS_NORMAL, result.payload))
4716 if (self.op.command == constants.OOB_POWER_ON and
4717 idx < len(self.nodes) - 1):
4718 time.sleep(self.op.power_delay)
4722 def _CheckPayload(self, result):
4723 """Checks if the payload is valid.
4725 @param result: RPC result
4726 @raises errors.OpExecError: If payload is not valid
4730 if self.op.command == constants.OOB_HEALTH:
4731 if not isinstance(result.payload, list):
4732 errs.append("command 'health' is expected to return a list but got %s" %
4733 type(result.payload))
4735 for item, status in result.payload:
4736 if status not in constants.OOB_STATUSES:
4737 errs.append("health item '%s' has invalid status '%s'" %
4740 if self.op.command == constants.OOB_POWER_STATUS:
4741 if not isinstance(result.payload, dict):
4742 errs.append("power-status is expected to return a dict but got %s" %
4743 type(result.payload))
4745 if self.op.command in [
4746 constants.OOB_POWER_ON,
4747 constants.OOB_POWER_OFF,
4748 constants.OOB_POWER_CYCLE,
4750 if result.payload is not None:
4751 errs.append("%s is expected to not return payload but got '%s'" %
4752 (self.op.command, result.payload))
4755 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4756 utils.CommaJoin(errs))
4759 class _OsQuery(_QueryBase):
4760 FIELDS = query.OS_FIELDS
4762 def ExpandNames(self, lu):
4763 # Lock all nodes in shared mode
4764 # Temporary removal of locks, should be reverted later
4765 # TODO: reintroduce locks when they are lighter-weight
4766 lu.needed_locks = {}
4767 #self.share_locks[locking.LEVEL_NODE] = 1
4768 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4770 # The following variables interact with _QueryBase._GetNames
4772 self.wanted = self.names
4774 self.wanted = locking.ALL_SET
4776 self.do_locking = self.use_locking
4778 def DeclareLocks(self, lu, level):
4782 def _DiagnoseByOS(rlist):
4783 """Remaps a per-node return list into an a per-os per-node dictionary
4785 @param rlist: a map with node names as keys and OS objects as values
4788 @return: a dictionary with osnames as keys and as value another
4789 map, with nodes as keys and tuples of (path, status, diagnose,
4790 variants, parameters, api_versions) as values, eg::
4792 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4793 (/srv/..., False, "invalid api")],
4794 "node2": [(/srv/..., True, "", [], [])]}
4799 # we build here the list of nodes that didn't fail the RPC (at RPC
4800 # level), so that nodes with a non-responding node daemon don't
4801 # make all OSes invalid
4802 good_nodes = [node_name for node_name in rlist
4803 if not rlist[node_name].fail_msg]
4804 for node_name, nr in rlist.items():
4805 if nr.fail_msg or not nr.payload:
4807 for (name, path, status, diagnose, variants,
4808 params, api_versions) in nr.payload:
4809 if name not in all_os:
4810 # build a list of nodes for this os containing empty lists
4811 # for each node in node_list
4813 for nname in good_nodes:
4814 all_os[name][nname] = []
4815 # convert params from [name, help] to (name, help)
4816 params = [tuple(v) for v in params]
4817 all_os[name][node_name].append((path, status, diagnose,
4818 variants, params, api_versions))
4821 def _GetQueryData(self, lu):
4822 """Computes the list of nodes and their attributes.
4825 # Locking is not used
4826 assert not (compat.any(lu.glm.is_owned(level)
4827 for level in locking.LEVELS
4828 if level != locking.LEVEL_CLUSTER) or
4829 self.do_locking or self.use_locking)
4831 valid_nodes = [node.name
4832 for node in lu.cfg.GetAllNodesInfo().values()
4833 if not node.offline and node.vm_capable]
4834 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4835 cluster = lu.cfg.GetClusterInfo()
4839 for (os_name, os_data) in pol.items():
4840 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4841 hidden=(os_name in cluster.hidden_os),
4842 blacklisted=(os_name in cluster.blacklisted_os))
4846 api_versions = set()
4848 for idx, osl in enumerate(os_data.values()):
4849 info.valid = bool(info.valid and osl and osl[0][1])
4853 (node_variants, node_params, node_api) = osl[0][3:6]
4856 variants.update(node_variants)
4857 parameters.update(node_params)
4858 api_versions.update(node_api)
4860 # Filter out inconsistent values
4861 variants.intersection_update(node_variants)
4862 parameters.intersection_update(node_params)
4863 api_versions.intersection_update(node_api)
4865 info.variants = list(variants)
4866 info.parameters = list(parameters)
4867 info.api_versions = list(api_versions)
4869 data[os_name] = info
4871 # Prepare data in requested order
4872 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4876 class LUOsDiagnose(NoHooksLU):
4877 """Logical unit for OS diagnose/query.
4883 def _BuildFilter(fields, names):
4884 """Builds a filter for querying OSes.
4887 name_filter = qlang.MakeSimpleFilter("name", names)
4889 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4890 # respective field is not requested
4891 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4892 for fname in ["hidden", "blacklisted"]
4893 if fname not in fields]
4894 if "valid" not in fields:
4895 status_filter.append([qlang.OP_TRUE, "valid"])
4898 status_filter.insert(0, qlang.OP_AND)
4900 status_filter = None
4902 if name_filter and status_filter:
4903 return [qlang.OP_AND, name_filter, status_filter]
4907 return status_filter
4909 def CheckArguments(self):
4910 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4911 self.op.output_fields, False)
4913 def ExpandNames(self):
4914 self.oq.ExpandNames(self)
4916 def Exec(self, feedback_fn):
4917 return self.oq.OldStyleQuery(self)
4920 class LUNodeRemove(LogicalUnit):
4921 """Logical unit for removing a node.
4924 HPATH = "node-remove"
4925 HTYPE = constants.HTYPE_NODE
4927 def BuildHooksEnv(self):
4932 "OP_TARGET": self.op.node_name,
4933 "NODE_NAME": self.op.node_name,
4936 def BuildHooksNodes(self):
4937 """Build hooks nodes.
4939 This doesn't run on the target node in the pre phase as a failed
4940 node would then be impossible to remove.
4943 all_nodes = self.cfg.GetNodeList()
4945 all_nodes.remove(self.op.node_name)
4948 return (all_nodes, all_nodes)
4950 def CheckPrereq(self):
4951 """Check prerequisites.
4954 - the node exists in the configuration
4955 - it does not have primary or secondary instances
4956 - it's not the master
4958 Any errors are signaled by raising errors.OpPrereqError.
4961 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4962 node = self.cfg.GetNodeInfo(self.op.node_name)
4963 assert node is not None
4965 masternode = self.cfg.GetMasterNode()
4966 if node.name == masternode:
4967 raise errors.OpPrereqError("Node is the master node, failover to another"
4968 " node is required", errors.ECODE_INVAL)
4970 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4971 if node.name in instance.all_nodes:
4972 raise errors.OpPrereqError("Instance %s is still running on the node,"
4973 " please remove first" % instance_name,
4975 self.op.node_name = node.name
4978 def Exec(self, feedback_fn):
4979 """Removes the node from the cluster.
4983 logging.info("Stopping the node daemon and removing configs from node %s",
4986 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4988 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4991 # Promote nodes to master candidate as needed
4992 _AdjustCandidatePool(self, exceptions=[node.name])
4993 self.context.RemoveNode(node.name)
4995 # Run post hooks on the node before it's removed
4996 _RunPostHook(self, node.name)
4998 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4999 msg = result.fail_msg
5001 self.LogWarning("Errors encountered on the remote node while leaving"
5002 " the cluster: %s", msg)
5004 # Remove node from our /etc/hosts
5005 if self.cfg.GetClusterInfo().modify_etc_hosts:
5006 master_node = self.cfg.GetMasterNode()
5007 result = self.rpc.call_etc_hosts_modify(master_node,
5008 constants.ETC_HOSTS_REMOVE,
5010 result.Raise("Can't update hosts file with new host data")
5011 _RedistributeAncillaryFiles(self)
5014 class _NodeQuery(_QueryBase):
5015 FIELDS = query.NODE_FIELDS
5017 def ExpandNames(self, lu):
5018 lu.needed_locks = {}
5019 lu.share_locks = _ShareAll()
5022 self.wanted = _GetWantedNodes(lu, self.names)
5024 self.wanted = locking.ALL_SET
5026 self.do_locking = (self.use_locking and
5027 query.NQ_LIVE in self.requested_data)
5030 # If any non-static field is requested we need to lock the nodes
5031 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5033 def DeclareLocks(self, lu, level):
5036 def _GetQueryData(self, lu):
5037 """Computes the list of nodes and their attributes.
5040 all_info = lu.cfg.GetAllNodesInfo()
5042 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5044 # Gather data as requested
5045 if query.NQ_LIVE in self.requested_data:
5046 # filter out non-vm_capable nodes
5047 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5049 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5050 [lu.cfg.GetHypervisorType()])
5051 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5052 for (name, nresult) in node_data.items()
5053 if not nresult.fail_msg and nresult.payload)
5057 if query.NQ_INST in self.requested_data:
5058 node_to_primary = dict([(name, set()) for name in nodenames])
5059 node_to_secondary = dict([(name, set()) for name in nodenames])
5061 inst_data = lu.cfg.GetAllInstancesInfo()
5063 for inst in inst_data.values():
5064 if inst.primary_node in node_to_primary:
5065 node_to_primary[inst.primary_node].add(inst.name)
5066 for secnode in inst.secondary_nodes:
5067 if secnode in node_to_secondary:
5068 node_to_secondary[secnode].add(inst.name)
5070 node_to_primary = None
5071 node_to_secondary = None
5073 if query.NQ_OOB in self.requested_data:
5074 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5075 for name, node in all_info.iteritems())
5079 if query.NQ_GROUP in self.requested_data:
5080 groups = lu.cfg.GetAllNodeGroupsInfo()
5084 return query.NodeQueryData([all_info[name] for name in nodenames],
5085 live_data, lu.cfg.GetMasterNode(),
5086 node_to_primary, node_to_secondary, groups,
5087 oob_support, lu.cfg.GetClusterInfo())
5090 class LUNodeQuery(NoHooksLU):
5091 """Logical unit for querying nodes.
5094 # pylint: disable=W0142
5097 def CheckArguments(self):
5098 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5099 self.op.output_fields, self.op.use_locking)
5101 def ExpandNames(self):
5102 self.nq.ExpandNames(self)
5104 def DeclareLocks(self, level):
5105 self.nq.DeclareLocks(self, level)
5107 def Exec(self, feedback_fn):
5108 return self.nq.OldStyleQuery(self)
5111 class LUNodeQueryvols(NoHooksLU):
5112 """Logical unit for getting volumes on node(s).
5116 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5117 _FIELDS_STATIC = utils.FieldSet("node")
5119 def CheckArguments(self):
5120 _CheckOutputFields(static=self._FIELDS_STATIC,
5121 dynamic=self._FIELDS_DYNAMIC,
5122 selected=self.op.output_fields)
5124 def ExpandNames(self):
5125 self.share_locks = _ShareAll()
5126 self.needed_locks = {}
5128 if not self.op.nodes:
5129 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5131 self.needed_locks[locking.LEVEL_NODE] = \
5132 _GetWantedNodes(self, self.op.nodes)
5134 def Exec(self, feedback_fn):
5135 """Computes the list of nodes and their attributes.
5138 nodenames = self.owned_locks(locking.LEVEL_NODE)
5139 volumes = self.rpc.call_node_volumes(nodenames)
5141 ilist = self.cfg.GetAllInstancesInfo()
5142 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5145 for node in nodenames:
5146 nresult = volumes[node]
5149 msg = nresult.fail_msg
5151 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5154 node_vols = sorted(nresult.payload,
5155 key=operator.itemgetter("dev"))
5157 for vol in node_vols:
5159 for field in self.op.output_fields:
5162 elif field == "phys":
5166 elif field == "name":
5168 elif field == "size":
5169 val = int(float(vol["size"]))
5170 elif field == "instance":
5171 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5173 raise errors.ParameterError(field)
5174 node_output.append(str(val))
5176 output.append(node_output)
5181 class LUNodeQueryStorage(NoHooksLU):
5182 """Logical unit for getting information on storage units on node(s).
5185 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5188 def CheckArguments(self):
5189 _CheckOutputFields(static=self._FIELDS_STATIC,
5190 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5191 selected=self.op.output_fields)
5193 def ExpandNames(self):
5194 self.share_locks = _ShareAll()
5195 self.needed_locks = {}
5198 self.needed_locks[locking.LEVEL_NODE] = \
5199 _GetWantedNodes(self, self.op.nodes)
5201 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5203 def Exec(self, feedback_fn):
5204 """Computes the list of nodes and their attributes.
5207 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5209 # Always get name to sort by
5210 if constants.SF_NAME in self.op.output_fields:
5211 fields = self.op.output_fields[:]
5213 fields = [constants.SF_NAME] + self.op.output_fields
5215 # Never ask for node or type as it's only known to the LU
5216 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5217 while extra in fields:
5218 fields.remove(extra)
5220 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5221 name_idx = field_idx[constants.SF_NAME]
5223 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5224 data = self.rpc.call_storage_list(self.nodes,
5225 self.op.storage_type, st_args,
5226 self.op.name, fields)
5230 for node in utils.NiceSort(self.nodes):
5231 nresult = data[node]
5235 msg = nresult.fail_msg
5237 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5240 rows = dict([(row[name_idx], row) for row in nresult.payload])
5242 for name in utils.NiceSort(rows.keys()):
5247 for field in self.op.output_fields:
5248 if field == constants.SF_NODE:
5250 elif field == constants.SF_TYPE:
5251 val = self.op.storage_type
5252 elif field in field_idx:
5253 val = row[field_idx[field]]
5255 raise errors.ParameterError(field)
5264 class _InstanceQuery(_QueryBase):
5265 FIELDS = query.INSTANCE_FIELDS
5267 def ExpandNames(self, lu):
5268 lu.needed_locks = {}
5269 lu.share_locks = _ShareAll()
5272 self.wanted = _GetWantedInstances(lu, self.names)
5274 self.wanted = locking.ALL_SET
5276 self.do_locking = (self.use_locking and
5277 query.IQ_LIVE in self.requested_data)
5279 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5280 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5281 lu.needed_locks[locking.LEVEL_NODE] = []
5282 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5284 self.do_grouplocks = (self.do_locking and
5285 query.IQ_NODES in self.requested_data)
5287 def DeclareLocks(self, lu, level):
5289 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5290 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5292 # Lock all groups used by instances optimistically; this requires going
5293 # via the node before it's locked, requiring verification later on
5294 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5296 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5297 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5298 elif level == locking.LEVEL_NODE:
5299 lu._LockInstancesNodes() # pylint: disable=W0212
5302 def _CheckGroupLocks(lu):
5303 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5304 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5306 # Check if node groups for locked instances are still correct
5307 for instance_name in owned_instances:
5308 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5310 def _GetQueryData(self, lu):
5311 """Computes the list of instances and their attributes.
5314 if self.do_grouplocks:
5315 self._CheckGroupLocks(lu)
5317 cluster = lu.cfg.GetClusterInfo()
5318 all_info = lu.cfg.GetAllInstancesInfo()
5320 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5322 instance_list = [all_info[name] for name in instance_names]
5323 nodes = frozenset(itertools.chain(*(inst.all_nodes
5324 for inst in instance_list)))
5325 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5328 wrongnode_inst = set()
5330 # Gather data as requested
5331 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5333 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5335 result = node_data[name]
5337 # offline nodes will be in both lists
5338 assert result.fail_msg
5339 offline_nodes.append(name)
5341 bad_nodes.append(name)
5342 elif result.payload:
5343 for inst in result.payload:
5344 if inst in all_info:
5345 if all_info[inst].primary_node == name:
5346 live_data.update(result.payload)
5348 wrongnode_inst.add(inst)
5350 # orphan instance; we don't list it here as we don't
5351 # handle this case yet in the output of instance listing
5352 logging.warning("Orphan instance '%s' found on node %s",
5354 # else no instance is alive
5358 if query.IQ_DISKUSAGE in self.requested_data:
5359 gmi = ganeti.masterd.instance
5360 disk_usage = dict((inst.name,
5361 gmi.ComputeDiskSize(inst.disk_template,
5362 [{constants.IDISK_SIZE: disk.size}
5363 for disk in inst.disks]))
5364 for inst in instance_list)
5368 if query.IQ_CONSOLE in self.requested_data:
5370 for inst in instance_list:
5371 if inst.name in live_data:
5372 # Instance is running
5373 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5375 consinfo[inst.name] = None
5376 assert set(consinfo.keys()) == set(instance_names)
5380 if query.IQ_NODES in self.requested_data:
5381 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5383 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5384 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5385 for uuid in set(map(operator.attrgetter("group"),
5391 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5392 disk_usage, offline_nodes, bad_nodes,
5393 live_data, wrongnode_inst, consinfo,
5397 class LUQuery(NoHooksLU):
5398 """Query for resources/items of a certain kind.
5401 # pylint: disable=W0142
5404 def CheckArguments(self):
5405 qcls = _GetQueryImplementation(self.op.what)
5407 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5409 def ExpandNames(self):
5410 self.impl.ExpandNames(self)
5412 def DeclareLocks(self, level):
5413 self.impl.DeclareLocks(self, level)
5415 def Exec(self, feedback_fn):
5416 return self.impl.NewStyleQuery(self)
5419 class LUQueryFields(NoHooksLU):
5420 """Query for resources/items of a certain kind.
5423 # pylint: disable=W0142
5426 def CheckArguments(self):
5427 self.qcls = _GetQueryImplementation(self.op.what)
5429 def ExpandNames(self):
5430 self.needed_locks = {}
5432 def Exec(self, feedback_fn):
5433 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5436 class LUNodeModifyStorage(NoHooksLU):
5437 """Logical unit for modifying a storage volume on a node.
5442 def CheckArguments(self):
5443 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5445 storage_type = self.op.storage_type
5448 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5450 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5451 " modified" % storage_type,
5454 diff = set(self.op.changes.keys()) - modifiable
5456 raise errors.OpPrereqError("The following fields can not be modified for"
5457 " storage units of type '%s': %r" %
5458 (storage_type, list(diff)),
5461 def ExpandNames(self):
5462 self.needed_locks = {
5463 locking.LEVEL_NODE: self.op.node_name,
5466 def Exec(self, feedback_fn):
5467 """Computes the list of nodes and their attributes.
5470 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5471 result = self.rpc.call_storage_modify(self.op.node_name,
5472 self.op.storage_type, st_args,
5473 self.op.name, self.op.changes)
5474 result.Raise("Failed to modify storage unit '%s' on %s" %
5475 (self.op.name, self.op.node_name))
5478 class LUNodeAdd(LogicalUnit):
5479 """Logical unit for adding node to the cluster.
5483 HTYPE = constants.HTYPE_NODE
5484 _NFLAGS = ["master_capable", "vm_capable"]
5486 def CheckArguments(self):
5487 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5488 # validate/normalize the node name
5489 self.hostname = netutils.GetHostname(name=self.op.node_name,
5490 family=self.primary_ip_family)
5491 self.op.node_name = self.hostname.name
5493 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5494 raise errors.OpPrereqError("Cannot readd the master node",
5497 if self.op.readd and self.op.group:
5498 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5499 " being readded", errors.ECODE_INVAL)
5501 def BuildHooksEnv(self):
5504 This will run on all nodes before, and on all nodes + the new node after.
5508 "OP_TARGET": self.op.node_name,
5509 "NODE_NAME": self.op.node_name,
5510 "NODE_PIP": self.op.primary_ip,
5511 "NODE_SIP": self.op.secondary_ip,
5512 "MASTER_CAPABLE": str(self.op.master_capable),
5513 "VM_CAPABLE": str(self.op.vm_capable),
5516 def BuildHooksNodes(self):
5517 """Build hooks nodes.
5520 # Exclude added node
5521 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5522 post_nodes = pre_nodes + [self.op.node_name, ]
5524 return (pre_nodes, post_nodes)
5526 def CheckPrereq(self):
5527 """Check prerequisites.
5530 - the new node is not already in the config
5532 - its parameters (single/dual homed) matches the cluster
5534 Any errors are signaled by raising errors.OpPrereqError.
5538 hostname = self.hostname
5539 node = hostname.name
5540 primary_ip = self.op.primary_ip = hostname.ip
5541 if self.op.secondary_ip is None:
5542 if self.primary_ip_family == netutils.IP6Address.family:
5543 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5544 " IPv4 address must be given as secondary",
5546 self.op.secondary_ip = primary_ip
5548 secondary_ip = self.op.secondary_ip
5549 if not netutils.IP4Address.IsValid(secondary_ip):
5550 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5551 " address" % secondary_ip, errors.ECODE_INVAL)
5553 node_list = cfg.GetNodeList()
5554 if not self.op.readd and node in node_list:
5555 raise errors.OpPrereqError("Node %s is already in the configuration" %
5556 node, errors.ECODE_EXISTS)
5557 elif self.op.readd and node not in node_list:
5558 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5561 self.changed_primary_ip = False
5563 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5564 if self.op.readd and node == existing_node_name:
5565 if existing_node.secondary_ip != secondary_ip:
5566 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5567 " address configuration as before",
5569 if existing_node.primary_ip != primary_ip:
5570 self.changed_primary_ip = True
5574 if (existing_node.primary_ip == primary_ip or
5575 existing_node.secondary_ip == primary_ip or
5576 existing_node.primary_ip == secondary_ip or
5577 existing_node.secondary_ip == secondary_ip):
5578 raise errors.OpPrereqError("New node ip address(es) conflict with"
5579 " existing node %s" % existing_node.name,
5580 errors.ECODE_NOTUNIQUE)
5582 # After this 'if' block, None is no longer a valid value for the
5583 # _capable op attributes
5585 old_node = self.cfg.GetNodeInfo(node)
5586 assert old_node is not None, "Can't retrieve locked node %s" % node
5587 for attr in self._NFLAGS:
5588 if getattr(self.op, attr) is None:
5589 setattr(self.op, attr, getattr(old_node, attr))
5591 for attr in self._NFLAGS:
5592 if getattr(self.op, attr) is None:
5593 setattr(self.op, attr, True)
5595 if self.op.readd and not self.op.vm_capable:
5596 pri, sec = cfg.GetNodeInstances(node)
5598 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5599 " flag set to false, but it already holds"
5600 " instances" % node,
5603 # check that the type of the node (single versus dual homed) is the
5604 # same as for the master
5605 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5606 master_singlehomed = myself.secondary_ip == myself.primary_ip
5607 newbie_singlehomed = secondary_ip == primary_ip
5608 if master_singlehomed != newbie_singlehomed:
5609 if master_singlehomed:
5610 raise errors.OpPrereqError("The master has no secondary ip but the"
5611 " new node has one",
5614 raise errors.OpPrereqError("The master has a secondary ip but the"
5615 " new node doesn't have one",
5618 # checks reachability
5619 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5620 raise errors.OpPrereqError("Node not reachable by ping",
5621 errors.ECODE_ENVIRON)
5623 if not newbie_singlehomed:
5624 # check reachability from my secondary ip to newbie's secondary ip
5625 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5626 source=myself.secondary_ip):
5627 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5628 " based ping to node daemon port",
5629 errors.ECODE_ENVIRON)
5636 if self.op.master_capable:
5637 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5639 self.master_candidate = False
5642 self.new_node = old_node
5644 node_group = cfg.LookupNodeGroup(self.op.group)
5645 self.new_node = objects.Node(name=node,
5646 primary_ip=primary_ip,
5647 secondary_ip=secondary_ip,
5648 master_candidate=self.master_candidate,
5649 offline=False, drained=False,
5652 if self.op.ndparams:
5653 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5655 if self.op.hv_state:
5656 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5658 if self.op.disk_state:
5659 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5661 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5662 # it a property on the base class.
5663 result = rpc.DnsOnlyRunner().call_version([node])[node]
5664 result.Raise("Can't get version information from node %s" % node)
5665 if constants.PROTOCOL_VERSION == result.payload:
5666 logging.info("Communication to node %s fine, sw version %s match",
5667 node, result.payload)
5669 raise errors.OpPrereqError("Version mismatch master version %s,"
5670 " node version %s" %
5671 (constants.PROTOCOL_VERSION, result.payload),
5672 errors.ECODE_ENVIRON)
5674 def Exec(self, feedback_fn):
5675 """Adds the new node to the cluster.
5678 new_node = self.new_node
5679 node = new_node.name
5681 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5684 # We adding a new node so we assume it's powered
5685 new_node.powered = True
5687 # for re-adds, reset the offline/drained/master-candidate flags;
5688 # we need to reset here, otherwise offline would prevent RPC calls
5689 # later in the procedure; this also means that if the re-add
5690 # fails, we are left with a non-offlined, broken node
5692 new_node.drained = new_node.offline = False # pylint: disable=W0201
5693 self.LogInfo("Readding a node, the offline/drained flags were reset")
5694 # if we demote the node, we do cleanup later in the procedure
5695 new_node.master_candidate = self.master_candidate
5696 if self.changed_primary_ip:
5697 new_node.primary_ip = self.op.primary_ip
5699 # copy the master/vm_capable flags
5700 for attr in self._NFLAGS:
5701 setattr(new_node, attr, getattr(self.op, attr))
5703 # notify the user about any possible mc promotion
5704 if new_node.master_candidate:
5705 self.LogInfo("Node will be a master candidate")
5707 if self.op.ndparams:
5708 new_node.ndparams = self.op.ndparams
5710 new_node.ndparams = {}
5712 if self.op.hv_state:
5713 new_node.hv_state_static = self.new_hv_state
5715 if self.op.disk_state:
5716 new_node.disk_state_static = self.new_disk_state
5718 # Add node to our /etc/hosts, and add key to known_hosts
5719 if self.cfg.GetClusterInfo().modify_etc_hosts:
5720 master_node = self.cfg.GetMasterNode()
5721 result = self.rpc.call_etc_hosts_modify(master_node,
5722 constants.ETC_HOSTS_ADD,
5725 result.Raise("Can't update hosts file with new host data")
5727 if new_node.secondary_ip != new_node.primary_ip:
5728 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5731 node_verify_list = [self.cfg.GetMasterNode()]
5732 node_verify_param = {
5733 constants.NV_NODELIST: ([node], {}),
5734 # TODO: do a node-net-test as well?
5737 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5738 self.cfg.GetClusterName())
5739 for verifier in node_verify_list:
5740 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5741 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5743 for failed in nl_payload:
5744 feedback_fn("ssh/hostname verification failed"
5745 " (checking from %s): %s" %
5746 (verifier, nl_payload[failed]))
5747 raise errors.OpExecError("ssh/hostname verification failed")
5750 _RedistributeAncillaryFiles(self)
5751 self.context.ReaddNode(new_node)
5752 # make sure we redistribute the config
5753 self.cfg.Update(new_node, feedback_fn)
5754 # and make sure the new node will not have old files around
5755 if not new_node.master_candidate:
5756 result = self.rpc.call_node_demote_from_mc(new_node.name)
5757 msg = result.fail_msg
5759 self.LogWarning("Node failed to demote itself from master"
5760 " candidate status: %s" % msg)
5762 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5763 additional_vm=self.op.vm_capable)
5764 self.context.AddNode(new_node, self.proc.GetECId())
5767 class LUNodeSetParams(LogicalUnit):
5768 """Modifies the parameters of a node.
5770 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5771 to the node role (as _ROLE_*)
5772 @cvar _R2F: a dictionary from node role to tuples of flags
5773 @cvar _FLAGS: a list of attribute names corresponding to the flags
5776 HPATH = "node-modify"
5777 HTYPE = constants.HTYPE_NODE
5779 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5781 (True, False, False): _ROLE_CANDIDATE,
5782 (False, True, False): _ROLE_DRAINED,
5783 (False, False, True): _ROLE_OFFLINE,
5784 (False, False, False): _ROLE_REGULAR,
5786 _R2F = dict((v, k) for k, v in _F2R.items())
5787 _FLAGS = ["master_candidate", "drained", "offline"]
5789 def CheckArguments(self):
5790 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5791 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5792 self.op.master_capable, self.op.vm_capable,
5793 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5795 if all_mods.count(None) == len(all_mods):
5796 raise errors.OpPrereqError("Please pass at least one modification",
5798 if all_mods.count(True) > 1:
5799 raise errors.OpPrereqError("Can't set the node into more than one"
5800 " state at the same time",
5803 # Boolean value that tells us whether we might be demoting from MC
5804 self.might_demote = (self.op.master_candidate is False or
5805 self.op.offline is True or
5806 self.op.drained is True or
5807 self.op.master_capable is False)
5809 if self.op.secondary_ip:
5810 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5811 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5812 " address" % self.op.secondary_ip,
5815 self.lock_all = self.op.auto_promote and self.might_demote
5816 self.lock_instances = self.op.secondary_ip is not None
5818 def _InstanceFilter(self, instance):
5819 """Filter for getting affected instances.
5822 return (instance.disk_template in constants.DTS_INT_MIRROR and
5823 self.op.node_name in instance.all_nodes)
5825 def ExpandNames(self):
5827 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5829 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5831 # Since modifying a node can have severe effects on currently running
5832 # operations the resource lock is at least acquired in shared mode
5833 self.needed_locks[locking.LEVEL_NODE_RES] = \
5834 self.needed_locks[locking.LEVEL_NODE]
5836 # Get node resource and instance locks in shared mode; they are not used
5837 # for anything but read-only access
5838 self.share_locks[locking.LEVEL_NODE_RES] = 1
5839 self.share_locks[locking.LEVEL_INSTANCE] = 1
5841 if self.lock_instances:
5842 self.needed_locks[locking.LEVEL_INSTANCE] = \
5843 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5845 def BuildHooksEnv(self):
5848 This runs on the master node.
5852 "OP_TARGET": self.op.node_name,
5853 "MASTER_CANDIDATE": str(self.op.master_candidate),
5854 "OFFLINE": str(self.op.offline),
5855 "DRAINED": str(self.op.drained),
5856 "MASTER_CAPABLE": str(self.op.master_capable),
5857 "VM_CAPABLE": str(self.op.vm_capable),
5860 def BuildHooksNodes(self):
5861 """Build hooks nodes.
5864 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5867 def CheckPrereq(self):
5868 """Check prerequisites.
5870 This only checks the instance list against the existing names.
5873 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5875 if self.lock_instances:
5876 affected_instances = \
5877 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5879 # Verify instance locks
5880 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5881 wanted_instances = frozenset(affected_instances.keys())
5882 if wanted_instances - owned_instances:
5883 raise errors.OpPrereqError("Instances affected by changing node %s's"
5884 " secondary IP address have changed since"
5885 " locks were acquired, wanted '%s', have"
5886 " '%s'; retry the operation" %
5888 utils.CommaJoin(wanted_instances),
5889 utils.CommaJoin(owned_instances)),
5892 affected_instances = None
5894 if (self.op.master_candidate is not None or
5895 self.op.drained is not None or
5896 self.op.offline is not None):
5897 # we can't change the master's node flags
5898 if self.op.node_name == self.cfg.GetMasterNode():
5899 raise errors.OpPrereqError("The master role can be changed"
5900 " only via master-failover",
5903 if self.op.master_candidate and not node.master_capable:
5904 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5905 " it a master candidate" % node.name,
5908 if self.op.vm_capable is False:
5909 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5911 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5912 " the vm_capable flag" % node.name,
5915 if node.master_candidate and self.might_demote and not self.lock_all:
5916 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5917 # check if after removing the current node, we're missing master
5919 (mc_remaining, mc_should, _) = \
5920 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5921 if mc_remaining < mc_should:
5922 raise errors.OpPrereqError("Not enough master candidates, please"
5923 " pass auto promote option to allow"
5924 " promotion (--auto-promote or RAPI"
5925 " auto_promote=True)", errors.ECODE_STATE)
5927 self.old_flags = old_flags = (node.master_candidate,
5928 node.drained, node.offline)
5929 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5930 self.old_role = old_role = self._F2R[old_flags]
5932 # Check for ineffective changes
5933 for attr in self._FLAGS:
5934 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5935 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5936 setattr(self.op, attr, None)
5938 # Past this point, any flag change to False means a transition
5939 # away from the respective state, as only real changes are kept
5941 # TODO: We might query the real power state if it supports OOB
5942 if _SupportsOob(self.cfg, node):
5943 if self.op.offline is False and not (node.powered or
5944 self.op.powered is True):
5945 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5946 " offline status can be reset") %
5947 self.op.node_name, errors.ECODE_STATE)
5948 elif self.op.powered is not None:
5949 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5950 " as it does not support out-of-band"
5951 " handling") % self.op.node_name,
5954 # If we're being deofflined/drained, we'll MC ourself if needed
5955 if (self.op.drained is False or self.op.offline is False or
5956 (self.op.master_capable and not node.master_capable)):
5957 if _DecideSelfPromotion(self):
5958 self.op.master_candidate = True
5959 self.LogInfo("Auto-promoting node to master candidate")
5961 # If we're no longer master capable, we'll demote ourselves from MC
5962 if self.op.master_capable is False and node.master_candidate:
5963 self.LogInfo("Demoting from master candidate")
5964 self.op.master_candidate = False
5967 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5968 if self.op.master_candidate:
5969 new_role = self._ROLE_CANDIDATE
5970 elif self.op.drained:
5971 new_role = self._ROLE_DRAINED
5972 elif self.op.offline:
5973 new_role = self._ROLE_OFFLINE
5974 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5975 # False is still in new flags, which means we're un-setting (the
5977 new_role = self._ROLE_REGULAR
5978 else: # no new flags, nothing, keep old role
5981 self.new_role = new_role
5983 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5984 # Trying to transition out of offline status
5985 result = self.rpc.call_version([node.name])[node.name]
5987 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5988 " to report its version: %s" %
5989 (node.name, result.fail_msg),
5992 self.LogWarning("Transitioning node from offline to online state"
5993 " without using re-add. Please make sure the node"
5996 # When changing the secondary ip, verify if this is a single-homed to
5997 # multi-homed transition or vice versa, and apply the relevant
5999 if self.op.secondary_ip:
6000 # Ok even without locking, because this can't be changed by any LU
6001 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6002 master_singlehomed = master.secondary_ip == master.primary_ip
6003 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6004 if self.op.force and node.name == master.name:
6005 self.LogWarning("Transitioning from single-homed to multi-homed"
6006 " cluster. All nodes will require a secondary ip.")
6008 raise errors.OpPrereqError("Changing the secondary ip on a"
6009 " single-homed cluster requires the"
6010 " --force option to be passed, and the"
6011 " target node to be the master",
6013 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6014 if self.op.force and node.name == master.name:
6015 self.LogWarning("Transitioning from multi-homed to single-homed"
6016 " cluster. Secondary IPs will have to be removed.")
6018 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6019 " same as the primary IP on a multi-homed"
6020 " cluster, unless the --force option is"
6021 " passed, and the target node is the"
6022 " master", errors.ECODE_INVAL)
6024 assert not (frozenset(affected_instances) -
6025 self.owned_locks(locking.LEVEL_INSTANCE))
6028 if affected_instances:
6029 msg = ("Cannot change secondary IP address: offline node has"
6030 " instances (%s) configured to use it" %
6031 utils.CommaJoin(affected_instances.keys()))
6032 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6034 # On online nodes, check that no instances are running, and that
6035 # the node has the new ip and we can reach it.
6036 for instance in affected_instances.values():
6037 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6038 msg="cannot change secondary ip")
6040 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6041 if master.name != node.name:
6042 # check reachability from master secondary ip to new secondary ip
6043 if not netutils.TcpPing(self.op.secondary_ip,
6044 constants.DEFAULT_NODED_PORT,
6045 source=master.secondary_ip):
6046 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6047 " based ping to node daemon port",
6048 errors.ECODE_ENVIRON)
6050 if self.op.ndparams:
6051 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6052 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6053 self.new_ndparams = new_ndparams
6055 if self.op.hv_state:
6056 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6057 self.node.hv_state_static)
6059 if self.op.disk_state:
6060 self.new_disk_state = \
6061 _MergeAndVerifyDiskState(self.op.disk_state,
6062 self.node.disk_state_static)
6064 def Exec(self, feedback_fn):
6069 old_role = self.old_role
6070 new_role = self.new_role
6074 if self.op.ndparams:
6075 node.ndparams = self.new_ndparams
6077 if self.op.powered is not None:
6078 node.powered = self.op.powered
6080 if self.op.hv_state:
6081 node.hv_state_static = self.new_hv_state
6083 if self.op.disk_state:
6084 node.disk_state_static = self.new_disk_state
6086 for attr in ["master_capable", "vm_capable"]:
6087 val = getattr(self.op, attr)
6089 setattr(node, attr, val)
6090 result.append((attr, str(val)))
6092 if new_role != old_role:
6093 # Tell the node to demote itself, if no longer MC and not offline
6094 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6095 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6097 self.LogWarning("Node failed to demote itself: %s", msg)
6099 new_flags = self._R2F[new_role]
6100 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6102 result.append((desc, str(nf)))
6103 (node.master_candidate, node.drained, node.offline) = new_flags
6105 # we locked all nodes, we adjust the CP before updating this node
6107 _AdjustCandidatePool(self, [node.name])
6109 if self.op.secondary_ip:
6110 node.secondary_ip = self.op.secondary_ip
6111 result.append(("secondary_ip", self.op.secondary_ip))
6113 # this will trigger configuration file update, if needed
6114 self.cfg.Update(node, feedback_fn)
6116 # this will trigger job queue propagation or cleanup if the mc
6118 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6119 self.context.ReaddNode(node)
6124 class LUNodePowercycle(NoHooksLU):
6125 """Powercycles a node.
6130 def CheckArguments(self):
6131 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6132 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6133 raise errors.OpPrereqError("The node is the master and the force"
6134 " parameter was not set",
6137 def ExpandNames(self):
6138 """Locking for PowercycleNode.
6140 This is a last-resort option and shouldn't block on other
6141 jobs. Therefore, we grab no locks.
6144 self.needed_locks = {}
6146 def Exec(self, feedback_fn):
6150 result = self.rpc.call_node_powercycle(self.op.node_name,
6151 self.cfg.GetHypervisorType())
6152 result.Raise("Failed to schedule the reboot")
6153 return result.payload
6156 class LUClusterQuery(NoHooksLU):
6157 """Query cluster configuration.
6162 def ExpandNames(self):
6163 self.needed_locks = {}
6165 def Exec(self, feedback_fn):
6166 """Return cluster config.
6169 cluster = self.cfg.GetClusterInfo()
6172 # Filter just for enabled hypervisors
6173 for os_name, hv_dict in cluster.os_hvp.items():
6174 os_hvp[os_name] = {}
6175 for hv_name, hv_params in hv_dict.items():
6176 if hv_name in cluster.enabled_hypervisors:
6177 os_hvp[os_name][hv_name] = hv_params
6179 # Convert ip_family to ip_version
6180 primary_ip_version = constants.IP4_VERSION
6181 if cluster.primary_ip_family == netutils.IP6Address.family:
6182 primary_ip_version = constants.IP6_VERSION
6185 "software_version": constants.RELEASE_VERSION,
6186 "protocol_version": constants.PROTOCOL_VERSION,
6187 "config_version": constants.CONFIG_VERSION,
6188 "os_api_version": max(constants.OS_API_VERSIONS),
6189 "export_version": constants.EXPORT_VERSION,
6190 "architecture": runtime.GetArchInfo(),
6191 "name": cluster.cluster_name,
6192 "master": cluster.master_node,
6193 "default_hypervisor": cluster.primary_hypervisor,
6194 "enabled_hypervisors": cluster.enabled_hypervisors,
6195 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6196 for hypervisor_name in cluster.enabled_hypervisors]),
6198 "beparams": cluster.beparams,
6199 "osparams": cluster.osparams,
6200 "ipolicy": cluster.ipolicy,
6201 "nicparams": cluster.nicparams,
6202 "ndparams": cluster.ndparams,
6203 "diskparams": cluster.diskparams,
6204 "candidate_pool_size": cluster.candidate_pool_size,
6205 "master_netdev": cluster.master_netdev,
6206 "master_netmask": cluster.master_netmask,
6207 "use_external_mip_script": cluster.use_external_mip_script,
6208 "volume_group_name": cluster.volume_group_name,
6209 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6210 "file_storage_dir": cluster.file_storage_dir,
6211 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6212 "maintain_node_health": cluster.maintain_node_health,
6213 "ctime": cluster.ctime,
6214 "mtime": cluster.mtime,
6215 "uuid": cluster.uuid,
6216 "tags": list(cluster.GetTags()),
6217 "uid_pool": cluster.uid_pool,
6218 "default_iallocator": cluster.default_iallocator,
6219 "reserved_lvs": cluster.reserved_lvs,
6220 "primary_ip_version": primary_ip_version,
6221 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6222 "hidden_os": cluster.hidden_os,
6223 "blacklisted_os": cluster.blacklisted_os,
6229 class LUClusterConfigQuery(NoHooksLU):
6230 """Return configuration values.
6235 def CheckArguments(self):
6236 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6238 def ExpandNames(self):
6239 self.cq.ExpandNames(self)
6241 def DeclareLocks(self, level):
6242 self.cq.DeclareLocks(self, level)
6244 def Exec(self, feedback_fn):
6245 result = self.cq.OldStyleQuery(self)
6247 assert len(result) == 1
6252 class _ClusterQuery(_QueryBase):
6253 FIELDS = query.CLUSTER_FIELDS
6255 #: Do not sort (there is only one item)
6258 def ExpandNames(self, lu):
6259 lu.needed_locks = {}
6261 # The following variables interact with _QueryBase._GetNames
6262 self.wanted = locking.ALL_SET
6263 self.do_locking = self.use_locking
6266 raise errors.OpPrereqError("Can not use locking for cluster queries",
6269 def DeclareLocks(self, lu, level):
6272 def _GetQueryData(self, lu):
6273 """Computes the list of nodes and their attributes.
6276 # Locking is not used
6277 assert not (compat.any(lu.glm.is_owned(level)
6278 for level in locking.LEVELS
6279 if level != locking.LEVEL_CLUSTER) or
6280 self.do_locking or self.use_locking)
6282 if query.CQ_CONFIG in self.requested_data:
6283 cluster = lu.cfg.GetClusterInfo()
6285 cluster = NotImplemented
6287 if query.CQ_QUEUE_DRAINED in self.requested_data:
6288 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6290 drain_flag = NotImplemented
6292 if query.CQ_WATCHER_PAUSE in self.requested_data:
6293 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6295 watcher_pause = NotImplemented
6297 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6300 class LUInstanceActivateDisks(NoHooksLU):
6301 """Bring up an instance's disks.
6306 def ExpandNames(self):
6307 self._ExpandAndLockInstance()
6308 self.needed_locks[locking.LEVEL_NODE] = []
6309 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6311 def DeclareLocks(self, level):
6312 if level == locking.LEVEL_NODE:
6313 self._LockInstancesNodes()
6315 def CheckPrereq(self):
6316 """Check prerequisites.
6318 This checks that the instance is in the cluster.
6321 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6322 assert self.instance is not None, \
6323 "Cannot retrieve locked instance %s" % self.op.instance_name
6324 _CheckNodeOnline(self, self.instance.primary_node)
6326 def Exec(self, feedback_fn):
6327 """Activate the disks.
6330 disks_ok, disks_info = \
6331 _AssembleInstanceDisks(self, self.instance,
6332 ignore_size=self.op.ignore_size)
6334 raise errors.OpExecError("Cannot activate block devices")
6336 if self.op.wait_for_sync:
6337 if not _WaitForSync(self, self.instance):
6338 raise errors.OpExecError("Some disks of the instance are degraded!")
6343 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6345 """Prepare the block devices for an instance.
6347 This sets up the block devices on all nodes.
6349 @type lu: L{LogicalUnit}
6350 @param lu: the logical unit on whose behalf we execute
6351 @type instance: L{objects.Instance}
6352 @param instance: the instance for whose disks we assemble
6353 @type disks: list of L{objects.Disk} or None
6354 @param disks: which disks to assemble (or all, if None)
6355 @type ignore_secondaries: boolean
6356 @param ignore_secondaries: if true, errors on secondary nodes
6357 won't result in an error return from the function
6358 @type ignore_size: boolean
6359 @param ignore_size: if true, the current known size of the disk
6360 will not be used during the disk activation, useful for cases
6361 when the size is wrong
6362 @return: False if the operation failed, otherwise a list of
6363 (host, instance_visible_name, node_visible_name)
6364 with the mapping from node devices to instance devices
6369 iname = instance.name
6370 disks = _ExpandCheckDisks(instance, disks)
6372 # With the two passes mechanism we try to reduce the window of
6373 # opportunity for the race condition of switching DRBD to primary
6374 # before handshaking occured, but we do not eliminate it
6376 # The proper fix would be to wait (with some limits) until the
6377 # connection has been made and drbd transitions from WFConnection
6378 # into any other network-connected state (Connected, SyncTarget,
6381 # 1st pass, assemble on all nodes in secondary mode
6382 for idx, inst_disk in enumerate(disks):
6383 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6385 node_disk = node_disk.Copy()
6386 node_disk.UnsetSize()
6387 lu.cfg.SetDiskID(node_disk, node)
6388 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6390 msg = result.fail_msg
6392 is_offline_secondary = (node in instance.secondary_nodes and
6394 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6395 " (is_primary=False, pass=1): %s",
6396 inst_disk.iv_name, node, msg)
6397 if not (ignore_secondaries or is_offline_secondary):
6400 # FIXME: race condition on drbd migration to primary
6402 # 2nd pass, do only the primary node
6403 for idx, inst_disk in enumerate(disks):
6406 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6407 if node != instance.primary_node:
6410 node_disk = node_disk.Copy()
6411 node_disk.UnsetSize()
6412 lu.cfg.SetDiskID(node_disk, node)
6413 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6415 msg = result.fail_msg
6417 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6418 " (is_primary=True, pass=2): %s",
6419 inst_disk.iv_name, node, msg)
6422 dev_path = result.payload
6424 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6426 # leave the disks configured for the primary node
6427 # this is a workaround that would be fixed better by
6428 # improving the logical/physical id handling
6430 lu.cfg.SetDiskID(disk, instance.primary_node)
6432 return disks_ok, device_info
6435 def _StartInstanceDisks(lu, instance, force):
6436 """Start the disks of an instance.
6439 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6440 ignore_secondaries=force)
6442 _ShutdownInstanceDisks(lu, instance)
6443 if force is not None and not force:
6444 lu.proc.LogWarning("", hint="If the message above refers to a"
6446 " you can retry the operation using '--force'.")
6447 raise errors.OpExecError("Disk consistency error")
6450 class LUInstanceDeactivateDisks(NoHooksLU):
6451 """Shutdown an instance's disks.
6456 def ExpandNames(self):
6457 self._ExpandAndLockInstance()
6458 self.needed_locks[locking.LEVEL_NODE] = []
6459 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6461 def DeclareLocks(self, level):
6462 if level == locking.LEVEL_NODE:
6463 self._LockInstancesNodes()
6465 def CheckPrereq(self):
6466 """Check prerequisites.
6468 This checks that the instance is in the cluster.
6471 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6472 assert self.instance is not None, \
6473 "Cannot retrieve locked instance %s" % self.op.instance_name
6475 def Exec(self, feedback_fn):
6476 """Deactivate the disks
6479 instance = self.instance
6481 _ShutdownInstanceDisks(self, instance)
6483 _SafeShutdownInstanceDisks(self, instance)
6486 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6487 """Shutdown block devices of an instance.
6489 This function checks if an instance is running, before calling
6490 _ShutdownInstanceDisks.
6493 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6494 _ShutdownInstanceDisks(lu, instance, disks=disks)
6497 def _ExpandCheckDisks(instance, disks):
6498 """Return the instance disks selected by the disks list
6500 @type disks: list of L{objects.Disk} or None
6501 @param disks: selected disks
6502 @rtype: list of L{objects.Disk}
6503 @return: selected instance disks to act on
6507 return instance.disks
6509 if not set(disks).issubset(instance.disks):
6510 raise errors.ProgrammerError("Can only act on disks belonging to the"
6515 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6516 """Shutdown block devices of an instance.
6518 This does the shutdown on all nodes of the instance.
6520 If the ignore_primary is false, errors on the primary node are
6525 disks = _ExpandCheckDisks(instance, disks)
6528 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6529 lu.cfg.SetDiskID(top_disk, node)
6530 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6531 msg = result.fail_msg
6533 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6534 disk.iv_name, node, msg)
6535 if ((node == instance.primary_node and not ignore_primary) or
6536 (node != instance.primary_node and not result.offline)):
6541 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6542 """Checks if a node has enough free memory.
6544 This function check if a given node has the needed amount of free
6545 memory. In case the node has less memory or we cannot get the
6546 information from the node, this function raise an OpPrereqError
6549 @type lu: C{LogicalUnit}
6550 @param lu: a logical unit from which we get configuration data
6552 @param node: the node to check
6553 @type reason: C{str}
6554 @param reason: string to use in the error message
6555 @type requested: C{int}
6556 @param requested: the amount of memory in MiB to check for
6557 @type hypervisor_name: C{str}
6558 @param hypervisor_name: the hypervisor to ask for memory stats
6560 @return: node current free memory
6561 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6562 we cannot check the node
6565 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6566 nodeinfo[node].Raise("Can't get data from node %s" % node,
6567 prereq=True, ecode=errors.ECODE_ENVIRON)
6568 (_, _, (hv_info, )) = nodeinfo[node].payload
6570 free_mem = hv_info.get("memory_free", None)
6571 if not isinstance(free_mem, int):
6572 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6573 " was '%s'" % (node, free_mem),
6574 errors.ECODE_ENVIRON)
6575 if requested > free_mem:
6576 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6577 " needed %s MiB, available %s MiB" %
6578 (node, reason, requested, free_mem),
6583 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6584 """Checks if nodes have enough free disk space in the all VGs.
6586 This function check if all given nodes have the needed amount of
6587 free disk. In case any node has less disk or we cannot get the
6588 information from the node, this function raise an OpPrereqError
6591 @type lu: C{LogicalUnit}
6592 @param lu: a logical unit from which we get configuration data
6593 @type nodenames: C{list}
6594 @param nodenames: the list of node names to check
6595 @type req_sizes: C{dict}
6596 @param req_sizes: the hash of vg and corresponding amount of disk in
6598 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6599 or we cannot check the node
6602 for vg, req_size in req_sizes.items():
6603 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6606 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6607 """Checks if nodes have enough free disk space in the specified VG.
6609 This function check if all given nodes have the needed amount of
6610 free disk. In case any node has less disk or we cannot get the
6611 information from the node, this function raise an OpPrereqError
6614 @type lu: C{LogicalUnit}
6615 @param lu: a logical unit from which we get configuration data
6616 @type nodenames: C{list}
6617 @param nodenames: the list of node names to check
6619 @param vg: the volume group to check
6620 @type requested: C{int}
6621 @param requested: the amount of disk in MiB to check for
6622 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6623 or we cannot check the node
6626 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6627 for node in nodenames:
6628 info = nodeinfo[node]
6629 info.Raise("Cannot get current information from node %s" % node,
6630 prereq=True, ecode=errors.ECODE_ENVIRON)
6631 (_, (vg_info, ), _) = info.payload
6632 vg_free = vg_info.get("vg_free", None)
6633 if not isinstance(vg_free, int):
6634 raise errors.OpPrereqError("Can't compute free disk space on node"
6635 " %s for vg %s, result was '%s'" %
6636 (node, vg, vg_free), errors.ECODE_ENVIRON)
6637 if requested > vg_free:
6638 raise errors.OpPrereqError("Not enough disk space on target node %s"
6639 " vg %s: required %d MiB, available %d MiB" %
6640 (node, vg, requested, vg_free),
6644 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6645 """Checks if nodes have enough physical CPUs
6647 This function checks if all given nodes have the needed number of
6648 physical CPUs. In case any node has less CPUs or we cannot get the
6649 information from the node, this function raises an OpPrereqError
6652 @type lu: C{LogicalUnit}
6653 @param lu: a logical unit from which we get configuration data
6654 @type nodenames: C{list}
6655 @param nodenames: the list of node names to check
6656 @type requested: C{int}
6657 @param requested: the minimum acceptable number of physical CPUs
6658 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6659 or we cannot check the node
6662 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6663 for node in nodenames:
6664 info = nodeinfo[node]
6665 info.Raise("Cannot get current information from node %s" % node,
6666 prereq=True, ecode=errors.ECODE_ENVIRON)
6667 (_, _, (hv_info, )) = info.payload
6668 num_cpus = hv_info.get("cpu_total", None)
6669 if not isinstance(num_cpus, int):
6670 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6671 " on node %s, result was '%s'" %
6672 (node, num_cpus), errors.ECODE_ENVIRON)
6673 if requested > num_cpus:
6674 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6675 "required" % (node, num_cpus, requested),
6679 class LUInstanceStartup(LogicalUnit):
6680 """Starts an instance.
6683 HPATH = "instance-start"
6684 HTYPE = constants.HTYPE_INSTANCE
6687 def CheckArguments(self):
6689 if self.op.beparams:
6690 # fill the beparams dict
6691 objects.UpgradeBeParams(self.op.beparams)
6692 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6694 def ExpandNames(self):
6695 self._ExpandAndLockInstance()
6696 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6698 def DeclareLocks(self, level):
6699 if level == locking.LEVEL_NODE_RES:
6700 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6702 def BuildHooksEnv(self):
6705 This runs on master, primary and secondary nodes of the instance.
6709 "FORCE": self.op.force,
6712 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6716 def BuildHooksNodes(self):
6717 """Build hooks nodes.
6720 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6723 def CheckPrereq(self):
6724 """Check prerequisites.
6726 This checks that the instance is in the cluster.
6729 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6730 assert self.instance is not None, \
6731 "Cannot retrieve locked instance %s" % self.op.instance_name
6734 if self.op.hvparams:
6735 # check hypervisor parameter syntax (locally)
6736 cluster = self.cfg.GetClusterInfo()
6737 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6738 filled_hvp = cluster.FillHV(instance)
6739 filled_hvp.update(self.op.hvparams)
6740 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6741 hv_type.CheckParameterSyntax(filled_hvp)
6742 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6744 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6746 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6748 if self.primary_offline and self.op.ignore_offline_nodes:
6749 self.proc.LogWarning("Ignoring offline primary node")
6751 if self.op.hvparams or self.op.beparams:
6752 self.proc.LogWarning("Overridden parameters are ignored")
6754 _CheckNodeOnline(self, instance.primary_node)
6756 bep = self.cfg.GetClusterInfo().FillBE(instance)
6757 bep.update(self.op.beparams)
6759 # check bridges existence
6760 _CheckInstanceBridgesExist(self, instance)
6762 remote_info = self.rpc.call_instance_info(instance.primary_node,
6764 instance.hypervisor)
6765 remote_info.Raise("Error checking node %s" % instance.primary_node,
6766 prereq=True, ecode=errors.ECODE_ENVIRON)
6767 if not remote_info.payload: # not running already
6768 _CheckNodeFreeMemory(self, instance.primary_node,
6769 "starting instance %s" % instance.name,
6770 bep[constants.BE_MINMEM], instance.hypervisor)
6772 def Exec(self, feedback_fn):
6773 """Start the instance.
6776 instance = self.instance
6777 force = self.op.force
6779 if not self.op.no_remember:
6780 self.cfg.MarkInstanceUp(instance.name)
6782 if self.primary_offline:
6783 assert self.op.ignore_offline_nodes
6784 self.proc.LogInfo("Primary node offline, marked instance as started")
6786 node_current = instance.primary_node
6788 _StartInstanceDisks(self, instance, force)
6791 self.rpc.call_instance_start(node_current,
6792 (instance, self.op.hvparams,
6794 self.op.startup_paused)
6795 msg = result.fail_msg
6797 _ShutdownInstanceDisks(self, instance)
6798 raise errors.OpExecError("Could not start instance: %s" % msg)
6801 class LUInstanceReboot(LogicalUnit):
6802 """Reboot an instance.
6805 HPATH = "instance-reboot"
6806 HTYPE = constants.HTYPE_INSTANCE
6809 def ExpandNames(self):
6810 self._ExpandAndLockInstance()
6812 def BuildHooksEnv(self):
6815 This runs on master, primary and secondary nodes of the instance.
6819 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6820 "REBOOT_TYPE": self.op.reboot_type,
6821 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6824 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6828 def BuildHooksNodes(self):
6829 """Build hooks nodes.
6832 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6835 def CheckPrereq(self):
6836 """Check prerequisites.
6838 This checks that the instance is in the cluster.
6841 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6842 assert self.instance is not None, \
6843 "Cannot retrieve locked instance %s" % self.op.instance_name
6844 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6845 _CheckNodeOnline(self, instance.primary_node)
6847 # check bridges existence
6848 _CheckInstanceBridgesExist(self, instance)
6850 def Exec(self, feedback_fn):
6851 """Reboot the instance.
6854 instance = self.instance
6855 ignore_secondaries = self.op.ignore_secondaries
6856 reboot_type = self.op.reboot_type
6858 remote_info = self.rpc.call_instance_info(instance.primary_node,
6860 instance.hypervisor)
6861 remote_info.Raise("Error checking node %s" % instance.primary_node)
6862 instance_running = bool(remote_info.payload)
6864 node_current = instance.primary_node
6866 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6867 constants.INSTANCE_REBOOT_HARD]:
6868 for disk in instance.disks:
6869 self.cfg.SetDiskID(disk, node_current)
6870 result = self.rpc.call_instance_reboot(node_current, instance,
6872 self.op.shutdown_timeout)
6873 result.Raise("Could not reboot instance")
6875 if instance_running:
6876 result = self.rpc.call_instance_shutdown(node_current, instance,
6877 self.op.shutdown_timeout)
6878 result.Raise("Could not shutdown instance for full reboot")
6879 _ShutdownInstanceDisks(self, instance)
6881 self.LogInfo("Instance %s was already stopped, starting now",
6883 _StartInstanceDisks(self, instance, ignore_secondaries)
6884 result = self.rpc.call_instance_start(node_current,
6885 (instance, None, None), False)
6886 msg = result.fail_msg
6888 _ShutdownInstanceDisks(self, instance)
6889 raise errors.OpExecError("Could not start instance for"
6890 " full reboot: %s" % msg)
6892 self.cfg.MarkInstanceUp(instance.name)
6895 class LUInstanceShutdown(LogicalUnit):
6896 """Shutdown an instance.
6899 HPATH = "instance-stop"
6900 HTYPE = constants.HTYPE_INSTANCE
6903 def ExpandNames(self):
6904 self._ExpandAndLockInstance()
6906 def BuildHooksEnv(self):
6909 This runs on master, primary and secondary nodes of the instance.
6912 env = _BuildInstanceHookEnvByObject(self, self.instance)
6913 env["TIMEOUT"] = self.op.timeout
6916 def BuildHooksNodes(self):
6917 """Build hooks nodes.
6920 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6923 def CheckPrereq(self):
6924 """Check prerequisites.
6926 This checks that the instance is in the cluster.
6929 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6930 assert self.instance is not None, \
6931 "Cannot retrieve locked instance %s" % self.op.instance_name
6933 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6935 self.primary_offline = \
6936 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6938 if self.primary_offline and self.op.ignore_offline_nodes:
6939 self.proc.LogWarning("Ignoring offline primary node")
6941 _CheckNodeOnline(self, self.instance.primary_node)
6943 def Exec(self, feedback_fn):
6944 """Shutdown the instance.
6947 instance = self.instance
6948 node_current = instance.primary_node
6949 timeout = self.op.timeout
6951 if not self.op.no_remember:
6952 self.cfg.MarkInstanceDown(instance.name)
6954 if self.primary_offline:
6955 assert self.op.ignore_offline_nodes
6956 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6958 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6959 msg = result.fail_msg
6961 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6963 _ShutdownInstanceDisks(self, instance)
6966 class LUInstanceReinstall(LogicalUnit):
6967 """Reinstall an instance.
6970 HPATH = "instance-reinstall"
6971 HTYPE = constants.HTYPE_INSTANCE
6974 def ExpandNames(self):
6975 self._ExpandAndLockInstance()
6977 def BuildHooksEnv(self):
6980 This runs on master, primary and secondary nodes of the instance.
6983 return _BuildInstanceHookEnvByObject(self, self.instance)
6985 def BuildHooksNodes(self):
6986 """Build hooks nodes.
6989 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6992 def CheckPrereq(self):
6993 """Check prerequisites.
6995 This checks that the instance is in the cluster and is not running.
6998 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6999 assert instance is not None, \
7000 "Cannot retrieve locked instance %s" % self.op.instance_name
7001 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7002 " offline, cannot reinstall")
7004 if instance.disk_template == constants.DT_DISKLESS:
7005 raise errors.OpPrereqError("Instance '%s' has no disks" %
7006 self.op.instance_name,
7008 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7010 if self.op.os_type is not None:
7012 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7013 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7014 instance_os = self.op.os_type
7016 instance_os = instance.os
7018 nodelist = list(instance.all_nodes)
7020 if self.op.osparams:
7021 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7022 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7023 self.os_inst = i_osdict # the new dict (without defaults)
7027 self.instance = instance
7029 def Exec(self, feedback_fn):
7030 """Reinstall the instance.
7033 inst = self.instance
7035 if self.op.os_type is not None:
7036 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7037 inst.os = self.op.os_type
7038 # Write to configuration
7039 self.cfg.Update(inst, feedback_fn)
7041 _StartInstanceDisks(self, inst, None)
7043 feedback_fn("Running the instance OS create scripts...")
7044 # FIXME: pass debug option from opcode to backend
7045 result = self.rpc.call_instance_os_add(inst.primary_node,
7046 (inst, self.os_inst), True,
7047 self.op.debug_level)
7048 result.Raise("Could not install OS for instance %s on node %s" %
7049 (inst.name, inst.primary_node))
7051 _ShutdownInstanceDisks(self, inst)
7054 class LUInstanceRecreateDisks(LogicalUnit):
7055 """Recreate an instance's missing disks.
7058 HPATH = "instance-recreate-disks"
7059 HTYPE = constants.HTYPE_INSTANCE
7062 _MODIFYABLE = frozenset([
7063 constants.IDISK_SIZE,
7064 constants.IDISK_MODE,
7067 # New or changed disk parameters may have different semantics
7068 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7069 constants.IDISK_ADOPT,
7071 # TODO: Implement support changing VG while recreating
7073 constants.IDISK_METAVG,
7076 def _RunAllocator(self):
7077 """Run the allocator based on input opcode.
7080 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7083 # The allocator should actually run in "relocate" mode, but current
7084 # allocators don't support relocating all the nodes of an instance at
7085 # the same time. As a workaround we use "allocate" mode, but this is
7086 # suboptimal for two reasons:
7087 # - The instance name passed to the allocator is present in the list of
7088 # existing instances, so there could be a conflict within the
7089 # internal structures of the allocator. This doesn't happen with the
7090 # current allocators, but it's a liability.
7091 # - The allocator counts the resources used by the instance twice: once
7092 # because the instance exists already, and once because it tries to
7093 # allocate a new instance.
7094 # The allocator could choose some of the nodes on which the instance is
7095 # running, but that's not a problem. If the instance nodes are broken,
7096 # they should be already be marked as drained or offline, and hence
7097 # skipped by the allocator. If instance disks have been lost for other
7098 # reasons, then recreating the disks on the same nodes should be fine.
7099 disk_template = self.instance.disk_template
7100 spindle_use = be_full[constants.BE_SPINDLE_USE]
7101 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7102 disk_template=disk_template,
7103 tags=list(self.instance.GetTags()),
7104 os=self.instance.os,
7106 vcpus=be_full[constants.BE_VCPUS],
7107 memory=be_full[constants.BE_MAXMEM],
7108 spindle_use=spindle_use,
7109 disks=[{constants.IDISK_SIZE: d.size,
7110 constants.IDISK_MODE: d.mode}
7111 for d in self.instance.disks],
7112 hypervisor=self.instance.hypervisor)
7113 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7115 ial.Run(self.op.iallocator)
7117 assert req.RequiredNodes() == len(self.instance.all_nodes)
7120 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7121 " %s" % (self.op.iallocator, ial.info),
7124 self.op.nodes = ial.result
7125 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7126 self.op.instance_name, self.op.iallocator,
7127 utils.CommaJoin(ial.result))
7129 def CheckArguments(self):
7130 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7131 # Normalize and convert deprecated list of disk indices
7132 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7134 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7136 raise errors.OpPrereqError("Some disks have been specified more than"
7137 " once: %s" % utils.CommaJoin(duplicates),
7140 if self.op.iallocator and self.op.nodes:
7141 raise errors.OpPrereqError("Give either the iallocator or the new"
7142 " nodes, not both", errors.ECODE_INVAL)
7144 for (idx, params) in self.op.disks:
7145 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7146 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7148 raise errors.OpPrereqError("Parameters for disk %s try to change"
7149 " unmodifyable parameter(s): %s" %
7150 (idx, utils.CommaJoin(unsupported)),
7153 def ExpandNames(self):
7154 self._ExpandAndLockInstance()
7155 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7157 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7158 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7160 self.needed_locks[locking.LEVEL_NODE] = []
7161 if self.op.iallocator:
7162 # iallocator will select a new node in the same group
7163 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7164 self.needed_locks[locking.LEVEL_NODE_RES] = []
7166 def DeclareLocks(self, level):
7167 if level == locking.LEVEL_NODEGROUP:
7168 assert self.op.iallocator is not None
7169 assert not self.op.nodes
7170 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7171 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7172 # Lock the primary group used by the instance optimistically; this
7173 # requires going via the node before it's locked, requiring
7174 # verification later on
7175 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7176 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7178 elif level == locking.LEVEL_NODE:
7179 # If an allocator is used, then we lock all the nodes in the current
7180 # instance group, as we don't know yet which ones will be selected;
7181 # if we replace the nodes without using an allocator, locks are
7182 # already declared in ExpandNames; otherwise, we need to lock all the
7183 # instance nodes for disk re-creation
7184 if self.op.iallocator:
7185 assert not self.op.nodes
7186 assert not self.needed_locks[locking.LEVEL_NODE]
7187 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7189 # Lock member nodes of the group of the primary node
7190 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7191 self.needed_locks[locking.LEVEL_NODE].extend(
7192 self.cfg.GetNodeGroup(group_uuid).members)
7193 elif not self.op.nodes:
7194 self._LockInstancesNodes(primary_only=False)
7195 elif level == locking.LEVEL_NODE_RES:
7197 self.needed_locks[locking.LEVEL_NODE_RES] = \
7198 self.needed_locks[locking.LEVEL_NODE][:]
7200 def BuildHooksEnv(self):
7203 This runs on master, primary and secondary nodes of the instance.
7206 return _BuildInstanceHookEnvByObject(self, self.instance)
7208 def BuildHooksNodes(self):
7209 """Build hooks nodes.
7212 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7215 def CheckPrereq(self):
7216 """Check prerequisites.
7218 This checks that the instance is in the cluster and is not running.
7221 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7222 assert instance is not None, \
7223 "Cannot retrieve locked instance %s" % self.op.instance_name
7225 if len(self.op.nodes) != len(instance.all_nodes):
7226 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7227 " %d replacement nodes were specified" %
7228 (instance.name, len(instance.all_nodes),
7229 len(self.op.nodes)),
7231 assert instance.disk_template != constants.DT_DRBD8 or \
7232 len(self.op.nodes) == 2
7233 assert instance.disk_template != constants.DT_PLAIN or \
7234 len(self.op.nodes) == 1
7235 primary_node = self.op.nodes[0]
7237 primary_node = instance.primary_node
7238 if not self.op.iallocator:
7239 _CheckNodeOnline(self, primary_node)
7241 if instance.disk_template == constants.DT_DISKLESS:
7242 raise errors.OpPrereqError("Instance '%s' has no disks" %
7243 self.op.instance_name, errors.ECODE_INVAL)
7245 # Verify if node group locks are still correct
7246 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7248 # Node group locks are acquired only for the primary node (and only
7249 # when the allocator is used)
7250 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7253 # if we replace nodes *and* the old primary is offline, we don't
7254 # check the instance state
7255 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7256 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7257 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7258 msg="cannot recreate disks")
7261 self.disks = dict(self.op.disks)
7263 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7265 maxidx = max(self.disks.keys())
7266 if maxidx >= len(instance.disks):
7267 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7270 if ((self.op.nodes or self.op.iallocator) and
7271 sorted(self.disks.keys()) != range(len(instance.disks))):
7272 raise errors.OpPrereqError("Can't recreate disks partially and"
7273 " change the nodes at the same time",
7276 self.instance = instance
7278 if self.op.iallocator:
7279 self._RunAllocator()
7280 # Release unneeded node and node resource locks
7281 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7282 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7284 def Exec(self, feedback_fn):
7285 """Recreate the disks.
7288 instance = self.instance
7290 assert (self.owned_locks(locking.LEVEL_NODE) ==
7291 self.owned_locks(locking.LEVEL_NODE_RES))
7294 mods = [] # keeps track of needed changes
7296 for idx, disk in enumerate(instance.disks):
7298 changes = self.disks[idx]
7300 # Disk should not be recreated
7304 # update secondaries for disks, if needed
7305 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7306 # need to update the nodes and minors
7307 assert len(self.op.nodes) == 2
7308 assert len(disk.logical_id) == 6 # otherwise disk internals
7310 (_, _, old_port, _, _, old_secret) = disk.logical_id
7311 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7312 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7313 new_minors[0], new_minors[1], old_secret)
7314 assert len(disk.logical_id) == len(new_id)
7318 mods.append((idx, new_id, changes))
7320 # now that we have passed all asserts above, we can apply the mods
7321 # in a single run (to avoid partial changes)
7322 for idx, new_id, changes in mods:
7323 disk = instance.disks[idx]
7324 if new_id is not None:
7325 assert disk.dev_type == constants.LD_DRBD8
7326 disk.logical_id = new_id
7328 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7329 mode=changes.get(constants.IDISK_MODE, None))
7331 # change primary node, if needed
7333 instance.primary_node = self.op.nodes[0]
7334 self.LogWarning("Changing the instance's nodes, you will have to"
7335 " remove any disks left on the older nodes manually")
7338 self.cfg.Update(instance, feedback_fn)
7340 _CreateDisks(self, instance, to_skip=to_skip)
7343 class LUInstanceRename(LogicalUnit):
7344 """Rename an instance.
7347 HPATH = "instance-rename"
7348 HTYPE = constants.HTYPE_INSTANCE
7350 def CheckArguments(self):
7354 if self.op.ip_check and not self.op.name_check:
7355 # TODO: make the ip check more flexible and not depend on the name check
7356 raise errors.OpPrereqError("IP address check requires a name check",
7359 def BuildHooksEnv(self):
7362 This runs on master, primary and secondary nodes of the instance.
7365 env = _BuildInstanceHookEnvByObject(self, self.instance)
7366 env["INSTANCE_NEW_NAME"] = self.op.new_name
7369 def BuildHooksNodes(self):
7370 """Build hooks nodes.
7373 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7376 def CheckPrereq(self):
7377 """Check prerequisites.
7379 This checks that the instance is in the cluster and is not running.
7382 self.op.instance_name = _ExpandInstanceName(self.cfg,
7383 self.op.instance_name)
7384 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7385 assert instance is not None
7386 _CheckNodeOnline(self, instance.primary_node)
7387 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7388 msg="cannot rename")
7389 self.instance = instance
7391 new_name = self.op.new_name
7392 if self.op.name_check:
7393 hostname = netutils.GetHostname(name=new_name)
7394 if hostname.name != new_name:
7395 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7397 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7398 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7399 " same as given hostname '%s'") %
7400 (hostname.name, self.op.new_name),
7402 new_name = self.op.new_name = hostname.name
7403 if (self.op.ip_check and
7404 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7405 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7406 (hostname.ip, new_name),
7407 errors.ECODE_NOTUNIQUE)
7409 instance_list = self.cfg.GetInstanceList()
7410 if new_name in instance_list and new_name != instance.name:
7411 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7412 new_name, errors.ECODE_EXISTS)
7414 def Exec(self, feedback_fn):
7415 """Rename the instance.
7418 inst = self.instance
7419 old_name = inst.name
7421 rename_file_storage = False
7422 if (inst.disk_template in constants.DTS_FILEBASED and
7423 self.op.new_name != inst.name):
7424 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7425 rename_file_storage = True
7427 self.cfg.RenameInstance(inst.name, self.op.new_name)
7428 # Change the instance lock. This is definitely safe while we hold the BGL.
7429 # Otherwise the new lock would have to be added in acquired mode.
7431 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7432 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7434 # re-read the instance from the configuration after rename
7435 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7437 if rename_file_storage:
7438 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7439 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7440 old_file_storage_dir,
7441 new_file_storage_dir)
7442 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7443 " (but the instance has been renamed in Ganeti)" %
7444 (inst.primary_node, old_file_storage_dir,
7445 new_file_storage_dir))
7447 _StartInstanceDisks(self, inst, None)
7449 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7450 old_name, self.op.debug_level)
7451 msg = result.fail_msg
7453 msg = ("Could not run OS rename script for instance %s on node %s"
7454 " (but the instance has been renamed in Ganeti): %s" %
7455 (inst.name, inst.primary_node, msg))
7456 self.proc.LogWarning(msg)
7458 _ShutdownInstanceDisks(self, inst)
7463 class LUInstanceRemove(LogicalUnit):
7464 """Remove an instance.
7467 HPATH = "instance-remove"
7468 HTYPE = constants.HTYPE_INSTANCE
7471 def ExpandNames(self):
7472 self._ExpandAndLockInstance()
7473 self.needed_locks[locking.LEVEL_NODE] = []
7474 self.needed_locks[locking.LEVEL_NODE_RES] = []
7475 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7477 def DeclareLocks(self, level):
7478 if level == locking.LEVEL_NODE:
7479 self._LockInstancesNodes()
7480 elif level == locking.LEVEL_NODE_RES:
7482 self.needed_locks[locking.LEVEL_NODE_RES] = \
7483 self.needed_locks[locking.LEVEL_NODE][:]
7485 def BuildHooksEnv(self):
7488 This runs on master, primary and secondary nodes of the instance.
7491 env = _BuildInstanceHookEnvByObject(self, self.instance)
7492 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7495 def BuildHooksNodes(self):
7496 """Build hooks nodes.
7499 nl = [self.cfg.GetMasterNode()]
7500 nl_post = list(self.instance.all_nodes) + nl
7501 return (nl, nl_post)
7503 def CheckPrereq(self):
7504 """Check prerequisites.
7506 This checks that the instance is in the cluster.
7509 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7510 assert self.instance is not None, \
7511 "Cannot retrieve locked instance %s" % self.op.instance_name
7513 def Exec(self, feedback_fn):
7514 """Remove the instance.
7517 instance = self.instance
7518 logging.info("Shutting down instance %s on node %s",
7519 instance.name, instance.primary_node)
7521 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7522 self.op.shutdown_timeout)
7523 msg = result.fail_msg
7525 if self.op.ignore_failures:
7526 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7528 raise errors.OpExecError("Could not shutdown instance %s on"
7530 (instance.name, instance.primary_node, msg))
7532 assert (self.owned_locks(locking.LEVEL_NODE) ==
7533 self.owned_locks(locking.LEVEL_NODE_RES))
7534 assert not (set(instance.all_nodes) -
7535 self.owned_locks(locking.LEVEL_NODE)), \
7536 "Not owning correct locks"
7538 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7541 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7542 """Utility function to remove an instance.
7545 logging.info("Removing block devices for instance %s", instance.name)
7547 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7548 if not ignore_failures:
7549 raise errors.OpExecError("Can't remove instance's disks")
7550 feedback_fn("Warning: can't remove instance's disks")
7552 logging.info("Removing instance %s out of cluster config", instance.name)
7554 lu.cfg.RemoveInstance(instance.name)
7556 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7557 "Instance lock removal conflict"
7559 # Remove lock for the instance
7560 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7563 class LUInstanceQuery(NoHooksLU):
7564 """Logical unit for querying instances.
7567 # pylint: disable=W0142
7570 def CheckArguments(self):
7571 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7572 self.op.output_fields, self.op.use_locking)
7574 def ExpandNames(self):
7575 self.iq.ExpandNames(self)
7577 def DeclareLocks(self, level):
7578 self.iq.DeclareLocks(self, level)
7580 def Exec(self, feedback_fn):
7581 return self.iq.OldStyleQuery(self)
7584 class LUInstanceFailover(LogicalUnit):
7585 """Failover an instance.
7588 HPATH = "instance-failover"
7589 HTYPE = constants.HTYPE_INSTANCE
7592 def CheckArguments(self):
7593 """Check the arguments.
7596 self.iallocator = getattr(self.op, "iallocator", None)
7597 self.target_node = getattr(self.op, "target_node", None)
7599 def ExpandNames(self):
7600 self._ExpandAndLockInstance()
7602 if self.op.target_node is not None:
7603 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7605 self.needed_locks[locking.LEVEL_NODE] = []
7606 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7608 self.needed_locks[locking.LEVEL_NODE_RES] = []
7609 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7611 ignore_consistency = self.op.ignore_consistency
7612 shutdown_timeout = self.op.shutdown_timeout
7613 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7616 ignore_consistency=ignore_consistency,
7617 shutdown_timeout=shutdown_timeout,
7618 ignore_ipolicy=self.op.ignore_ipolicy)
7619 self.tasklets = [self._migrater]
7621 def DeclareLocks(self, level):
7622 if level == locking.LEVEL_NODE:
7623 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7624 if instance.disk_template in constants.DTS_EXT_MIRROR:
7625 if self.op.target_node is None:
7626 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7628 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7629 self.op.target_node]
7630 del self.recalculate_locks[locking.LEVEL_NODE]
7632 self._LockInstancesNodes()
7633 elif level == locking.LEVEL_NODE_RES:
7635 self.needed_locks[locking.LEVEL_NODE_RES] = \
7636 self.needed_locks[locking.LEVEL_NODE][:]
7638 def BuildHooksEnv(self):
7641 This runs on master, primary and secondary nodes of the instance.
7644 instance = self._migrater.instance
7645 source_node = instance.primary_node
7646 target_node = self.op.target_node
7648 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7649 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7650 "OLD_PRIMARY": source_node,
7651 "NEW_PRIMARY": target_node,
7654 if instance.disk_template in constants.DTS_INT_MIRROR:
7655 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7656 env["NEW_SECONDARY"] = source_node
7658 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7660 env.update(_BuildInstanceHookEnvByObject(self, instance))
7664 def BuildHooksNodes(self):
7665 """Build hooks nodes.
7668 instance = self._migrater.instance
7669 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7670 return (nl, nl + [instance.primary_node])
7673 class LUInstanceMigrate(LogicalUnit):
7674 """Migrate an instance.
7676 This is migration without shutting down, compared to the failover,
7677 which is done with shutdown.
7680 HPATH = "instance-migrate"
7681 HTYPE = constants.HTYPE_INSTANCE
7684 def ExpandNames(self):
7685 self._ExpandAndLockInstance()
7687 if self.op.target_node is not None:
7688 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7690 self.needed_locks[locking.LEVEL_NODE] = []
7691 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7693 self.needed_locks[locking.LEVEL_NODE] = []
7694 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7697 TLMigrateInstance(self, self.op.instance_name,
7698 cleanup=self.op.cleanup,
7700 fallback=self.op.allow_failover,
7701 allow_runtime_changes=self.op.allow_runtime_changes,
7702 ignore_ipolicy=self.op.ignore_ipolicy)
7703 self.tasklets = [self._migrater]
7705 def DeclareLocks(self, level):
7706 if level == locking.LEVEL_NODE:
7707 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7708 if instance.disk_template in constants.DTS_EXT_MIRROR:
7709 if self.op.target_node is None:
7710 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7712 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7713 self.op.target_node]
7714 del self.recalculate_locks[locking.LEVEL_NODE]
7716 self._LockInstancesNodes()
7717 elif level == locking.LEVEL_NODE_RES:
7719 self.needed_locks[locking.LEVEL_NODE_RES] = \
7720 self.needed_locks[locking.LEVEL_NODE][:]
7722 def BuildHooksEnv(self):
7725 This runs on master, primary and secondary nodes of the instance.
7728 instance = self._migrater.instance
7729 source_node = instance.primary_node
7730 target_node = self.op.target_node
7731 env = _BuildInstanceHookEnvByObject(self, instance)
7733 "MIGRATE_LIVE": self._migrater.live,
7734 "MIGRATE_CLEANUP": self.op.cleanup,
7735 "OLD_PRIMARY": source_node,
7736 "NEW_PRIMARY": target_node,
7737 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7740 if instance.disk_template in constants.DTS_INT_MIRROR:
7741 env["OLD_SECONDARY"] = target_node
7742 env["NEW_SECONDARY"] = source_node
7744 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7748 def BuildHooksNodes(self):
7749 """Build hooks nodes.
7752 instance = self._migrater.instance
7753 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7754 return (nl, nl + [instance.primary_node])
7757 class LUInstanceMove(LogicalUnit):
7758 """Move an instance by data-copying.
7761 HPATH = "instance-move"
7762 HTYPE = constants.HTYPE_INSTANCE
7765 def ExpandNames(self):
7766 self._ExpandAndLockInstance()
7767 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7768 self.op.target_node = target_node
7769 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7770 self.needed_locks[locking.LEVEL_NODE_RES] = []
7771 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7773 def DeclareLocks(self, level):
7774 if level == locking.LEVEL_NODE:
7775 self._LockInstancesNodes(primary_only=True)
7776 elif level == locking.LEVEL_NODE_RES:
7778 self.needed_locks[locking.LEVEL_NODE_RES] = \
7779 self.needed_locks[locking.LEVEL_NODE][:]
7781 def BuildHooksEnv(self):
7784 This runs on master, primary and secondary nodes of the instance.
7788 "TARGET_NODE": self.op.target_node,
7789 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7791 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7794 def BuildHooksNodes(self):
7795 """Build hooks nodes.
7799 self.cfg.GetMasterNode(),
7800 self.instance.primary_node,
7801 self.op.target_node,
7805 def CheckPrereq(self):
7806 """Check prerequisites.
7808 This checks that the instance is in the cluster.
7811 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7812 assert self.instance is not None, \
7813 "Cannot retrieve locked instance %s" % self.op.instance_name
7815 node = self.cfg.GetNodeInfo(self.op.target_node)
7816 assert node is not None, \
7817 "Cannot retrieve locked node %s" % self.op.target_node
7819 self.target_node = target_node = node.name
7821 if target_node == instance.primary_node:
7822 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7823 (instance.name, target_node),
7826 bep = self.cfg.GetClusterInfo().FillBE(instance)
7828 for idx, dsk in enumerate(instance.disks):
7829 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7830 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7831 " cannot copy" % idx, errors.ECODE_STATE)
7833 _CheckNodeOnline(self, target_node)
7834 _CheckNodeNotDrained(self, target_node)
7835 _CheckNodeVmCapable(self, target_node)
7836 cluster = self.cfg.GetClusterInfo()
7837 group_info = self.cfg.GetNodeGroup(node.group)
7838 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7839 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7840 ignore=self.op.ignore_ipolicy)
7842 if instance.admin_state == constants.ADMINST_UP:
7843 # check memory requirements on the secondary node
7844 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7845 instance.name, bep[constants.BE_MAXMEM],
7846 instance.hypervisor)
7848 self.LogInfo("Not checking memory on the secondary node as"
7849 " instance will not be started")
7851 # check bridge existance
7852 _CheckInstanceBridgesExist(self, instance, node=target_node)
7854 def Exec(self, feedback_fn):
7855 """Move an instance.
7857 The move is done by shutting it down on its present node, copying
7858 the data over (slow) and starting it on the new node.
7861 instance = self.instance
7863 source_node = instance.primary_node
7864 target_node = self.target_node
7866 self.LogInfo("Shutting down instance %s on source node %s",
7867 instance.name, source_node)
7869 assert (self.owned_locks(locking.LEVEL_NODE) ==
7870 self.owned_locks(locking.LEVEL_NODE_RES))
7872 result = self.rpc.call_instance_shutdown(source_node, instance,
7873 self.op.shutdown_timeout)
7874 msg = result.fail_msg
7876 if self.op.ignore_consistency:
7877 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7878 " Proceeding anyway. Please make sure node"
7879 " %s is down. Error details: %s",
7880 instance.name, source_node, source_node, msg)
7882 raise errors.OpExecError("Could not shutdown instance %s on"
7884 (instance.name, source_node, msg))
7886 # create the target disks
7888 _CreateDisks(self, instance, target_node=target_node)
7889 except errors.OpExecError:
7890 self.LogWarning("Device creation failed, reverting...")
7892 _RemoveDisks(self, instance, target_node=target_node)
7894 self.cfg.ReleaseDRBDMinors(instance.name)
7897 cluster_name = self.cfg.GetClusterInfo().cluster_name
7900 # activate, get path, copy the data over
7901 for idx, disk in enumerate(instance.disks):
7902 self.LogInfo("Copying data for disk %d", idx)
7903 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7904 instance.name, True, idx)
7906 self.LogWarning("Can't assemble newly created disk %d: %s",
7907 idx, result.fail_msg)
7908 errs.append(result.fail_msg)
7910 dev_path = result.payload
7911 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7912 target_node, dev_path,
7915 self.LogWarning("Can't copy data over for disk %d: %s",
7916 idx, result.fail_msg)
7917 errs.append(result.fail_msg)
7921 self.LogWarning("Some disks failed to copy, aborting")
7923 _RemoveDisks(self, instance, target_node=target_node)
7925 self.cfg.ReleaseDRBDMinors(instance.name)
7926 raise errors.OpExecError("Errors during disk copy: %s" %
7929 instance.primary_node = target_node
7930 self.cfg.Update(instance, feedback_fn)
7932 self.LogInfo("Removing the disks on the original node")
7933 _RemoveDisks(self, instance, target_node=source_node)
7935 # Only start the instance if it's marked as up
7936 if instance.admin_state == constants.ADMINST_UP:
7937 self.LogInfo("Starting instance %s on node %s",
7938 instance.name, target_node)
7940 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7941 ignore_secondaries=True)
7943 _ShutdownInstanceDisks(self, instance)
7944 raise errors.OpExecError("Can't activate the instance's disks")
7946 result = self.rpc.call_instance_start(target_node,
7947 (instance, None, None), False)
7948 msg = result.fail_msg
7950 _ShutdownInstanceDisks(self, instance)
7951 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7952 (instance.name, target_node, msg))
7955 class LUNodeMigrate(LogicalUnit):
7956 """Migrate all instances from a node.
7959 HPATH = "node-migrate"
7960 HTYPE = constants.HTYPE_NODE
7963 def CheckArguments(self):
7966 def ExpandNames(self):
7967 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7969 self.share_locks = _ShareAll()
7970 self.needed_locks = {
7971 locking.LEVEL_NODE: [self.op.node_name],
7974 def BuildHooksEnv(self):
7977 This runs on the master, the primary and all the secondaries.
7981 "NODE_NAME": self.op.node_name,
7982 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7985 def BuildHooksNodes(self):
7986 """Build hooks nodes.
7989 nl = [self.cfg.GetMasterNode()]
7992 def CheckPrereq(self):
7995 def Exec(self, feedback_fn):
7996 # Prepare jobs for migration instances
7997 allow_runtime_changes = self.op.allow_runtime_changes
7999 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8002 iallocator=self.op.iallocator,
8003 target_node=self.op.target_node,
8004 allow_runtime_changes=allow_runtime_changes,
8005 ignore_ipolicy=self.op.ignore_ipolicy)]
8006 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8009 # TODO: Run iallocator in this opcode and pass correct placement options to
8010 # OpInstanceMigrate. Since other jobs can modify the cluster between
8011 # running the iallocator and the actual migration, a good consistency model
8012 # will have to be found.
8014 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8015 frozenset([self.op.node_name]))
8017 return ResultWithJobs(jobs)
8020 class TLMigrateInstance(Tasklet):
8021 """Tasklet class for instance migration.
8024 @ivar live: whether the migration will be done live or non-live;
8025 this variable is initalized only after CheckPrereq has run
8026 @type cleanup: boolean
8027 @ivar cleanup: Wheater we cleanup from a failed migration
8028 @type iallocator: string
8029 @ivar iallocator: The iallocator used to determine target_node
8030 @type target_node: string
8031 @ivar target_node: If given, the target_node to reallocate the instance to
8032 @type failover: boolean
8033 @ivar failover: Whether operation results in failover or migration
8034 @type fallback: boolean
8035 @ivar fallback: Whether fallback to failover is allowed if migration not
8037 @type ignore_consistency: boolean
8038 @ivar ignore_consistency: Wheter we should ignore consistency between source
8040 @type shutdown_timeout: int
8041 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8042 @type ignore_ipolicy: bool
8043 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8048 _MIGRATION_POLL_INTERVAL = 1 # seconds
8049 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8051 def __init__(self, lu, instance_name, cleanup=False,
8052 failover=False, fallback=False,
8053 ignore_consistency=False,
8054 allow_runtime_changes=True,
8055 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8056 ignore_ipolicy=False):
8057 """Initializes this class.
8060 Tasklet.__init__(self, lu)
8063 self.instance_name = instance_name
8064 self.cleanup = cleanup
8065 self.live = False # will be overridden later
8066 self.failover = failover
8067 self.fallback = fallback
8068 self.ignore_consistency = ignore_consistency
8069 self.shutdown_timeout = shutdown_timeout
8070 self.ignore_ipolicy = ignore_ipolicy
8071 self.allow_runtime_changes = allow_runtime_changes
8073 def CheckPrereq(self):
8074 """Check prerequisites.
8076 This checks that the instance is in the cluster.
8079 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8080 instance = self.cfg.GetInstanceInfo(instance_name)
8081 assert instance is not None
8082 self.instance = instance
8083 cluster = self.cfg.GetClusterInfo()
8085 if (not self.cleanup and
8086 not instance.admin_state == constants.ADMINST_UP and
8087 not self.failover and self.fallback):
8088 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8089 " switching to failover")
8090 self.failover = True
8092 if instance.disk_template not in constants.DTS_MIRRORED:
8097 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8098 " %s" % (instance.disk_template, text),
8101 if instance.disk_template in constants.DTS_EXT_MIRROR:
8102 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8104 if self.lu.op.iallocator:
8105 self._RunAllocator()
8107 # We set set self.target_node as it is required by
8109 self.target_node = self.lu.op.target_node
8111 # Check that the target node is correct in terms of instance policy
8112 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8113 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8114 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8116 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8117 ignore=self.ignore_ipolicy)
8119 # self.target_node is already populated, either directly or by the
8121 target_node = self.target_node
8122 if self.target_node == instance.primary_node:
8123 raise errors.OpPrereqError("Cannot migrate instance %s"
8124 " to its primary (%s)" %
8125 (instance.name, instance.primary_node),
8128 if len(self.lu.tasklets) == 1:
8129 # It is safe to release locks only when we're the only tasklet
8131 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8132 keep=[instance.primary_node, self.target_node])
8135 secondary_nodes = instance.secondary_nodes
8136 if not secondary_nodes:
8137 raise errors.ConfigurationError("No secondary node but using"
8138 " %s disk template" %
8139 instance.disk_template)
8140 target_node = secondary_nodes[0]
8141 if self.lu.op.iallocator or (self.lu.op.target_node and
8142 self.lu.op.target_node != target_node):
8144 text = "failed over"
8147 raise errors.OpPrereqError("Instances with disk template %s cannot"
8148 " be %s to arbitrary nodes"
8149 " (neither an iallocator nor a target"
8150 " node can be passed)" %
8151 (instance.disk_template, text),
8153 nodeinfo = self.cfg.GetNodeInfo(target_node)
8154 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8155 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8157 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8158 ignore=self.ignore_ipolicy)
8160 i_be = cluster.FillBE(instance)
8162 # check memory requirements on the secondary node
8163 if (not self.cleanup and
8164 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8165 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8166 "migrating instance %s" %
8168 i_be[constants.BE_MINMEM],
8169 instance.hypervisor)
8171 self.lu.LogInfo("Not checking memory on the secondary node as"
8172 " instance will not be started")
8174 # check if failover must be forced instead of migration
8175 if (not self.cleanup and not self.failover and
8176 i_be[constants.BE_ALWAYS_FAILOVER]):
8177 self.lu.LogInfo("Instance configured to always failover; fallback"
8179 self.failover = True
8181 # check bridge existance
8182 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8184 if not self.cleanup:
8185 _CheckNodeNotDrained(self.lu, target_node)
8186 if not self.failover:
8187 result = self.rpc.call_instance_migratable(instance.primary_node,
8189 if result.fail_msg and self.fallback:
8190 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8192 self.failover = True
8194 result.Raise("Can't migrate, please use failover",
8195 prereq=True, ecode=errors.ECODE_STATE)
8197 assert not (self.failover and self.cleanup)
8199 if not self.failover:
8200 if self.lu.op.live is not None and self.lu.op.mode is not None:
8201 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8202 " parameters are accepted",
8204 if self.lu.op.live is not None:
8206 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8208 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8209 # reset the 'live' parameter to None so that repeated
8210 # invocations of CheckPrereq do not raise an exception
8211 self.lu.op.live = None
8212 elif self.lu.op.mode is None:
8213 # read the default value from the hypervisor
8214 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8215 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8217 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8219 # Failover is never live
8222 if not (self.failover or self.cleanup):
8223 remote_info = self.rpc.call_instance_info(instance.primary_node,
8225 instance.hypervisor)
8226 remote_info.Raise("Error checking instance on node %s" %
8227 instance.primary_node)
8228 instance_running = bool(remote_info.payload)
8229 if instance_running:
8230 self.current_mem = int(remote_info.payload["memory"])
8232 def _RunAllocator(self):
8233 """Run the allocator based on input opcode.
8236 # FIXME: add a self.ignore_ipolicy option
8237 req = iallocator.IAReqRelocate(name=self.instance_name,
8238 relocate_from=[self.instance.primary_node])
8239 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8241 ial.Run(self.lu.op.iallocator)
8244 raise errors.OpPrereqError("Can't compute nodes using"
8245 " iallocator '%s': %s" %
8246 (self.lu.op.iallocator, ial.info),
8248 self.target_node = ial.result[0]
8249 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8250 self.instance_name, self.lu.op.iallocator,
8251 utils.CommaJoin(ial.result))
8253 def _WaitUntilSync(self):
8254 """Poll with custom rpc for disk sync.
8256 This uses our own step-based rpc call.
8259 self.feedback_fn("* wait until resync is done")
8263 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8265 (self.instance.disks,
8268 for node, nres in result.items():
8269 nres.Raise("Cannot resync disks on node %s" % node)
8270 node_done, node_percent = nres.payload
8271 all_done = all_done and node_done
8272 if node_percent is not None:
8273 min_percent = min(min_percent, node_percent)
8275 if min_percent < 100:
8276 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8279 def _EnsureSecondary(self, node):
8280 """Demote a node to secondary.
8283 self.feedback_fn("* switching node %s to secondary mode" % node)
8285 for dev in self.instance.disks:
8286 self.cfg.SetDiskID(dev, node)
8288 result = self.rpc.call_blockdev_close(node, self.instance.name,
8289 self.instance.disks)
8290 result.Raise("Cannot change disk to secondary on node %s" % node)
8292 def _GoStandalone(self):
8293 """Disconnect from the network.
8296 self.feedback_fn("* changing into standalone mode")
8297 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8298 self.instance.disks)
8299 for node, nres in result.items():
8300 nres.Raise("Cannot disconnect disks node %s" % node)
8302 def _GoReconnect(self, multimaster):
8303 """Reconnect to the network.
8309 msg = "single-master"
8310 self.feedback_fn("* changing disks into %s mode" % msg)
8311 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8312 (self.instance.disks, self.instance),
8313 self.instance.name, multimaster)
8314 for node, nres in result.items():
8315 nres.Raise("Cannot change disks config on node %s" % node)
8317 def _ExecCleanup(self):
8318 """Try to cleanup after a failed migration.
8320 The cleanup is done by:
8321 - check that the instance is running only on one node
8322 (and update the config if needed)
8323 - change disks on its secondary node to secondary
8324 - wait until disks are fully synchronized
8325 - disconnect from the network
8326 - change disks into single-master mode
8327 - wait again until disks are fully synchronized
8330 instance = self.instance
8331 target_node = self.target_node
8332 source_node = self.source_node
8334 # check running on only one node
8335 self.feedback_fn("* checking where the instance actually runs"
8336 " (if this hangs, the hypervisor might be in"
8338 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8339 for node, result in ins_l.items():
8340 result.Raise("Can't contact node %s" % node)
8342 runningon_source = instance.name in ins_l[source_node].payload
8343 runningon_target = instance.name in ins_l[target_node].payload
8345 if runningon_source and runningon_target:
8346 raise errors.OpExecError("Instance seems to be running on two nodes,"
8347 " or the hypervisor is confused; you will have"
8348 " to ensure manually that it runs only on one"
8349 " and restart this operation")
8351 if not (runningon_source or runningon_target):
8352 raise errors.OpExecError("Instance does not seem to be running at all;"
8353 " in this case it's safer to repair by"
8354 " running 'gnt-instance stop' to ensure disk"
8355 " shutdown, and then restarting it")
8357 if runningon_target:
8358 # the migration has actually succeeded, we need to update the config
8359 self.feedback_fn("* instance running on secondary node (%s),"
8360 " updating config" % target_node)
8361 instance.primary_node = target_node
8362 self.cfg.Update(instance, self.feedback_fn)
8363 demoted_node = source_node
8365 self.feedback_fn("* instance confirmed to be running on its"
8366 " primary node (%s)" % source_node)
8367 demoted_node = target_node
8369 if instance.disk_template in constants.DTS_INT_MIRROR:
8370 self._EnsureSecondary(demoted_node)
8372 self._WaitUntilSync()
8373 except errors.OpExecError:
8374 # we ignore here errors, since if the device is standalone, it
8375 # won't be able to sync
8377 self._GoStandalone()
8378 self._GoReconnect(False)
8379 self._WaitUntilSync()
8381 self.feedback_fn("* done")
8383 def _RevertDiskStatus(self):
8384 """Try to revert the disk status after a failed migration.
8387 target_node = self.target_node
8388 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8392 self._EnsureSecondary(target_node)
8393 self._GoStandalone()
8394 self._GoReconnect(False)
8395 self._WaitUntilSync()
8396 except errors.OpExecError, err:
8397 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8398 " please try to recover the instance manually;"
8399 " error '%s'" % str(err))
8401 def _AbortMigration(self):
8402 """Call the hypervisor code to abort a started migration.
8405 instance = self.instance
8406 target_node = self.target_node
8407 source_node = self.source_node
8408 migration_info = self.migration_info
8410 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8414 abort_msg = abort_result.fail_msg
8416 logging.error("Aborting migration failed on target node %s: %s",
8417 target_node, abort_msg)
8418 # Don't raise an exception here, as we stil have to try to revert the
8419 # disk status, even if this step failed.
8421 abort_result = self.rpc.call_instance_finalize_migration_src(
8422 source_node, instance, False, self.live)
8423 abort_msg = abort_result.fail_msg
8425 logging.error("Aborting migration failed on source node %s: %s",
8426 source_node, abort_msg)
8428 def _ExecMigration(self):
8429 """Migrate an instance.
8431 The migrate is done by:
8432 - change the disks into dual-master mode
8433 - wait until disks are fully synchronized again
8434 - migrate the instance
8435 - change disks on the new secondary node (the old primary) to secondary
8436 - wait until disks are fully synchronized
8437 - change disks into single-master mode
8440 instance = self.instance
8441 target_node = self.target_node
8442 source_node = self.source_node
8444 # Check for hypervisor version mismatch and warn the user.
8445 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8446 None, [self.instance.hypervisor])
8447 for ninfo in nodeinfo.values():
8448 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8450 (_, _, (src_info, )) = nodeinfo[source_node].payload
8451 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8453 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8454 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8455 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8456 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8457 if src_version != dst_version:
8458 self.feedback_fn("* warning: hypervisor version mismatch between"
8459 " source (%s) and target (%s) node" %
8460 (src_version, dst_version))
8462 self.feedback_fn("* checking disk consistency between source and target")
8463 for (idx, dev) in enumerate(instance.disks):
8464 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8465 raise errors.OpExecError("Disk %s is degraded or not fully"
8466 " synchronized on target node,"
8467 " aborting migration" % idx)
8469 if self.current_mem > self.tgt_free_mem:
8470 if not self.allow_runtime_changes:
8471 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8472 " free memory to fit instance %s on target"
8473 " node %s (have %dMB, need %dMB)" %
8474 (instance.name, target_node,
8475 self.tgt_free_mem, self.current_mem))
8476 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8477 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8480 rpcres.Raise("Cannot modify instance runtime memory")
8482 # First get the migration information from the remote node
8483 result = self.rpc.call_migration_info(source_node, instance)
8484 msg = result.fail_msg
8486 log_err = ("Failed fetching source migration information from %s: %s" %
8488 logging.error(log_err)
8489 raise errors.OpExecError(log_err)
8491 self.migration_info = migration_info = result.payload
8493 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8494 # Then switch the disks to master/master mode
8495 self._EnsureSecondary(target_node)
8496 self._GoStandalone()
8497 self._GoReconnect(True)
8498 self._WaitUntilSync()
8500 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8501 result = self.rpc.call_accept_instance(target_node,
8504 self.nodes_ip[target_node])
8506 msg = result.fail_msg
8508 logging.error("Instance pre-migration failed, trying to revert"
8509 " disk status: %s", msg)
8510 self.feedback_fn("Pre-migration failed, aborting")
8511 self._AbortMigration()
8512 self._RevertDiskStatus()
8513 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8514 (instance.name, msg))
8516 self.feedback_fn("* migrating instance to %s" % target_node)
8517 result = self.rpc.call_instance_migrate(source_node, instance,
8518 self.nodes_ip[target_node],
8520 msg = result.fail_msg
8522 logging.error("Instance migration failed, trying to revert"
8523 " disk status: %s", msg)
8524 self.feedback_fn("Migration failed, aborting")
8525 self._AbortMigration()
8526 self._RevertDiskStatus()
8527 raise errors.OpExecError("Could not migrate instance %s: %s" %
8528 (instance.name, msg))
8530 self.feedback_fn("* starting memory transfer")
8531 last_feedback = time.time()
8533 result = self.rpc.call_instance_get_migration_status(source_node,
8535 msg = result.fail_msg
8536 ms = result.payload # MigrationStatus instance
8537 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8538 logging.error("Instance migration failed, trying to revert"
8539 " disk status: %s", msg)
8540 self.feedback_fn("Migration failed, aborting")
8541 self._AbortMigration()
8542 self._RevertDiskStatus()
8543 raise errors.OpExecError("Could not migrate instance %s: %s" %
8544 (instance.name, msg))
8546 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8547 self.feedback_fn("* memory transfer complete")
8550 if (utils.TimeoutExpired(last_feedback,
8551 self._MIGRATION_FEEDBACK_INTERVAL) and
8552 ms.transferred_ram is not None):
8553 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8554 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8555 last_feedback = time.time()
8557 time.sleep(self._MIGRATION_POLL_INTERVAL)
8559 result = self.rpc.call_instance_finalize_migration_src(source_node,
8563 msg = result.fail_msg
8565 logging.error("Instance migration succeeded, but finalization failed"
8566 " on the source node: %s", msg)
8567 raise errors.OpExecError("Could not finalize instance migration: %s" %
8570 instance.primary_node = target_node
8572 # distribute new instance config to the other nodes
8573 self.cfg.Update(instance, self.feedback_fn)
8575 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8579 msg = result.fail_msg
8581 logging.error("Instance migration succeeded, but finalization failed"
8582 " on the target node: %s", msg)
8583 raise errors.OpExecError("Could not finalize instance migration: %s" %
8586 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8587 self._EnsureSecondary(source_node)
8588 self._WaitUntilSync()
8589 self._GoStandalone()
8590 self._GoReconnect(False)
8591 self._WaitUntilSync()
8593 # If the instance's disk template is `rbd' and there was a successful
8594 # migration, unmap the device from the source node.
8595 if self.instance.disk_template == constants.DT_RBD:
8596 disks = _ExpandCheckDisks(instance, instance.disks)
8597 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8599 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8600 msg = result.fail_msg
8602 logging.error("Migration was successful, but couldn't unmap the"
8603 " block device %s on source node %s: %s",
8604 disk.iv_name, source_node, msg)
8605 logging.error("You need to unmap the device %s manually on %s",
8606 disk.iv_name, source_node)
8608 self.feedback_fn("* done")
8610 def _ExecFailover(self):
8611 """Failover an instance.
8613 The failover is done by shutting it down on its present node and
8614 starting it on the secondary.
8617 instance = self.instance
8618 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8620 source_node = instance.primary_node
8621 target_node = self.target_node
8623 if instance.admin_state == constants.ADMINST_UP:
8624 self.feedback_fn("* checking disk consistency between source and target")
8625 for (idx, dev) in enumerate(instance.disks):
8626 # for drbd, these are drbd over lvm
8627 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8629 if primary_node.offline:
8630 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8632 (primary_node.name, idx, target_node))
8633 elif not self.ignore_consistency:
8634 raise errors.OpExecError("Disk %s is degraded on target node,"
8635 " aborting failover" % idx)
8637 self.feedback_fn("* not checking disk consistency as instance is not"
8640 self.feedback_fn("* shutting down instance on source node")
8641 logging.info("Shutting down instance %s on node %s",
8642 instance.name, source_node)
8644 result = self.rpc.call_instance_shutdown(source_node, instance,
8645 self.shutdown_timeout)
8646 msg = result.fail_msg
8648 if self.ignore_consistency or primary_node.offline:
8649 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8650 " proceeding anyway; please make sure node"
8651 " %s is down; error details: %s",
8652 instance.name, source_node, source_node, msg)
8654 raise errors.OpExecError("Could not shutdown instance %s on"
8656 (instance.name, source_node, msg))
8658 self.feedback_fn("* deactivating the instance's disks on source node")
8659 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8660 raise errors.OpExecError("Can't shut down the instance's disks")
8662 instance.primary_node = target_node
8663 # distribute new instance config to the other nodes
8664 self.cfg.Update(instance, self.feedback_fn)
8666 # Only start the instance if it's marked as up
8667 if instance.admin_state == constants.ADMINST_UP:
8668 self.feedback_fn("* activating the instance's disks on target node %s" %
8670 logging.info("Starting instance %s on node %s",
8671 instance.name, target_node)
8673 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8674 ignore_secondaries=True)
8676 _ShutdownInstanceDisks(self.lu, instance)
8677 raise errors.OpExecError("Can't activate the instance's disks")
8679 self.feedback_fn("* starting the instance on the target node %s" %
8681 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8683 msg = result.fail_msg
8685 _ShutdownInstanceDisks(self.lu, instance)
8686 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8687 (instance.name, target_node, msg))
8689 def Exec(self, feedback_fn):
8690 """Perform the migration.
8693 self.feedback_fn = feedback_fn
8694 self.source_node = self.instance.primary_node
8696 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8697 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8698 self.target_node = self.instance.secondary_nodes[0]
8699 # Otherwise self.target_node has been populated either
8700 # directly, or through an iallocator.
8702 self.all_nodes = [self.source_node, self.target_node]
8703 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8704 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8707 feedback_fn("Failover instance %s" % self.instance.name)
8708 self._ExecFailover()
8710 feedback_fn("Migrating instance %s" % self.instance.name)
8713 return self._ExecCleanup()
8715 return self._ExecMigration()
8718 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8720 """Wrapper around L{_CreateBlockDevInner}.
8722 This method annotates the root device first.
8725 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8726 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8730 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8732 """Create a tree of block devices on a given node.
8734 If this device type has to be created on secondaries, create it and
8737 If not, just recurse to children keeping the same 'force' value.
8739 @attention: The device has to be annotated already.
8741 @param lu: the lu on whose behalf we execute
8742 @param node: the node on which to create the device
8743 @type instance: L{objects.Instance}
8744 @param instance: the instance which owns the device
8745 @type device: L{objects.Disk}
8746 @param device: the device to create
8747 @type force_create: boolean
8748 @param force_create: whether to force creation of this device; this
8749 will be change to True whenever we find a device which has
8750 CreateOnSecondary() attribute
8751 @param info: the extra 'metadata' we should attach to the device
8752 (this will be represented as a LVM tag)
8753 @type force_open: boolean
8754 @param force_open: this parameter will be passes to the
8755 L{backend.BlockdevCreate} function where it specifies
8756 whether we run on primary or not, and it affects both
8757 the child assembly and the device own Open() execution
8760 if device.CreateOnSecondary():
8764 for child in device.children:
8765 _CreateBlockDevInner(lu, node, instance, child, force_create,
8768 if not force_create:
8771 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8774 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8775 """Create a single block device on a given node.
8777 This will not recurse over children of the device, so they must be
8780 @param lu: the lu on whose behalf we execute
8781 @param node: the node on which to create the device
8782 @type instance: L{objects.Instance}
8783 @param instance: the instance which owns the device
8784 @type device: L{objects.Disk}
8785 @param device: the device to create
8786 @param info: the extra 'metadata' we should attach to the device
8787 (this will be represented as a LVM tag)
8788 @type force_open: boolean
8789 @param force_open: this parameter will be passes to the
8790 L{backend.BlockdevCreate} function where it specifies
8791 whether we run on primary or not, and it affects both
8792 the child assembly and the device own Open() execution
8795 lu.cfg.SetDiskID(device, node)
8796 result = lu.rpc.call_blockdev_create(node, device, device.size,
8797 instance.name, force_open, info)
8798 result.Raise("Can't create block device %s on"
8799 " node %s for instance %s" % (device, node, instance.name))
8800 if device.physical_id is None:
8801 device.physical_id = result.payload
8804 def _GenerateUniqueNames(lu, exts):
8805 """Generate a suitable LV name.
8807 This will generate a logical volume name for the given instance.
8812 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8813 results.append("%s%s" % (new_id, val))
8817 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8818 iv_name, p_minor, s_minor):
8819 """Generate a drbd8 device complete with its children.
8822 assert len(vgnames) == len(names) == 2
8823 port = lu.cfg.AllocatePort()
8824 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8826 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8827 logical_id=(vgnames[0], names[0]),
8829 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8830 size=constants.DRBD_META_SIZE,
8831 logical_id=(vgnames[1], names[1]),
8833 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8834 logical_id=(primary, secondary, port,
8837 children=[dev_data, dev_meta],
8838 iv_name=iv_name, params={})
8842 _DISK_TEMPLATE_NAME_PREFIX = {
8843 constants.DT_PLAIN: "",
8844 constants.DT_RBD: ".rbd",
8848 _DISK_TEMPLATE_DEVICE_TYPE = {
8849 constants.DT_PLAIN: constants.LD_LV,
8850 constants.DT_FILE: constants.LD_FILE,
8851 constants.DT_SHARED_FILE: constants.LD_FILE,
8852 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8853 constants.DT_RBD: constants.LD_RBD,
8857 def _GenerateDiskTemplate(
8858 lu, template_name, instance_name, primary_node, secondary_nodes,
8859 disk_info, file_storage_dir, file_driver, base_index,
8860 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8861 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8862 """Generate the entire disk layout for a given template type.
8865 #TODO: compute space requirements
8867 vgname = lu.cfg.GetVGName()
8868 disk_count = len(disk_info)
8871 if template_name == constants.DT_DISKLESS:
8873 elif template_name == constants.DT_DRBD8:
8874 if len(secondary_nodes) != 1:
8875 raise errors.ProgrammerError("Wrong template configuration")
8876 remote_node = secondary_nodes[0]
8877 minors = lu.cfg.AllocateDRBDMinor(
8878 [primary_node, remote_node] * len(disk_info), instance_name)
8880 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8882 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8885 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8886 for i in range(disk_count)]):
8887 names.append(lv_prefix + "_data")
8888 names.append(lv_prefix + "_meta")
8889 for idx, disk in enumerate(disk_info):
8890 disk_index = idx + base_index
8891 data_vg = disk.get(constants.IDISK_VG, vgname)
8892 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8893 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8894 disk[constants.IDISK_SIZE],
8896 names[idx * 2:idx * 2 + 2],
8897 "disk/%d" % disk_index,
8898 minors[idx * 2], minors[idx * 2 + 1])
8899 disk_dev.mode = disk[constants.IDISK_MODE]
8900 disks.append(disk_dev)
8903 raise errors.ProgrammerError("Wrong template configuration")
8905 if template_name == constants.DT_FILE:
8907 elif template_name == constants.DT_SHARED_FILE:
8908 _req_shr_file_storage()
8910 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8911 if name_prefix is None:
8914 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8915 (name_prefix, base_index + i)
8916 for i in range(disk_count)])
8918 if template_name == constants.DT_PLAIN:
8919 def logical_id_fn(idx, _, disk):
8920 vg = disk.get(constants.IDISK_VG, vgname)
8921 return (vg, names[idx])
8922 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8924 lambda _, disk_index, disk: (file_driver,
8925 "%s/disk%d" % (file_storage_dir,
8927 elif template_name == constants.DT_BLOCK:
8929 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8930 disk[constants.IDISK_ADOPT])
8931 elif template_name == constants.DT_RBD:
8932 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8934 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8936 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8938 for idx, disk in enumerate(disk_info):
8939 disk_index = idx + base_index
8940 size = disk[constants.IDISK_SIZE]
8941 feedback_fn("* disk %s, size %s" %
8942 (disk_index, utils.FormatUnit(size, "h")))
8943 disks.append(objects.Disk(dev_type=dev_type, size=size,
8944 logical_id=logical_id_fn(idx, disk_index, disk),
8945 iv_name="disk/%d" % disk_index,
8946 mode=disk[constants.IDISK_MODE],
8952 def _GetInstanceInfoText(instance):
8953 """Compute that text that should be added to the disk's metadata.
8956 return "originstname+%s" % instance.name
8959 def _CalcEta(time_taken, written, total_size):
8960 """Calculates the ETA based on size written and total size.
8962 @param time_taken: The time taken so far
8963 @param written: amount written so far
8964 @param total_size: The total size of data to be written
8965 @return: The remaining time in seconds
8968 avg_time = time_taken / float(written)
8969 return (total_size - written) * avg_time
8972 def _WipeDisks(lu, instance):
8973 """Wipes instance disks.
8975 @type lu: L{LogicalUnit}
8976 @param lu: the logical unit on whose behalf we execute
8977 @type instance: L{objects.Instance}
8978 @param instance: the instance whose disks we should create
8979 @return: the success of the wipe
8982 node = instance.primary_node
8984 for device in instance.disks:
8985 lu.cfg.SetDiskID(device, node)
8987 logging.info("Pause sync of instance %s disks", instance.name)
8988 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8989 (instance.disks, instance),
8991 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8993 for idx, success in enumerate(result.payload):
8995 logging.warn("pause-sync of instance %s for disks %d failed",
8999 for idx, device in enumerate(instance.disks):
9000 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9001 # MAX_WIPE_CHUNK at max
9002 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9003 constants.MIN_WIPE_CHUNK_PERCENT)
9004 # we _must_ make this an int, otherwise rounding errors will
9006 wipe_chunk_size = int(wipe_chunk_size)
9008 lu.LogInfo("* Wiping disk %d", idx)
9009 logging.info("Wiping disk %d for instance %s, node %s using"
9010 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9015 start_time = time.time()
9017 while offset < size:
9018 wipe_size = min(wipe_chunk_size, size - offset)
9019 logging.debug("Wiping disk %d, offset %s, chunk %s",
9020 idx, offset, wipe_size)
9021 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9023 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9024 (idx, offset, wipe_size))
9027 if now - last_output >= 60:
9028 eta = _CalcEta(now - start_time, offset, size)
9029 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9030 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9033 logging.info("Resume sync of instance %s disks", instance.name)
9035 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9036 (instance.disks, instance),
9040 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9041 " please have a look at the status and troubleshoot"
9042 " the issue: %s", node, result.fail_msg)
9044 for idx, success in enumerate(result.payload):
9046 lu.LogWarning("Resume sync of disk %d failed, please have a"
9047 " look at the status and troubleshoot the issue", idx)
9048 logging.warn("resume-sync of instance %s for disks %d failed",
9052 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9053 """Create all disks for an instance.
9055 This abstracts away some work from AddInstance.
9057 @type lu: L{LogicalUnit}
9058 @param lu: the logical unit on whose behalf we execute
9059 @type instance: L{objects.Instance}
9060 @param instance: the instance whose disks we should create
9062 @param to_skip: list of indices to skip
9063 @type target_node: string
9064 @param target_node: if passed, overrides the target node for creation
9066 @return: the success of the creation
9069 info = _GetInstanceInfoText(instance)
9070 if target_node is None:
9071 pnode = instance.primary_node
9072 all_nodes = instance.all_nodes
9077 if instance.disk_template in constants.DTS_FILEBASED:
9078 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9079 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9081 result.Raise("Failed to create directory '%s' on"
9082 " node %s" % (file_storage_dir, pnode))
9084 # Note: this needs to be kept in sync with adding of disks in
9085 # LUInstanceSetParams
9086 for idx, device in enumerate(instance.disks):
9087 if to_skip and idx in to_skip:
9089 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9091 for node in all_nodes:
9092 f_create = node == pnode
9093 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9096 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9097 """Remove all disks for an instance.
9099 This abstracts away some work from `AddInstance()` and
9100 `RemoveInstance()`. Note that in case some of the devices couldn't
9101 be removed, the removal will continue with the other ones (compare
9102 with `_CreateDisks()`).
9104 @type lu: L{LogicalUnit}
9105 @param lu: the logical unit on whose behalf we execute
9106 @type instance: L{objects.Instance}
9107 @param instance: the instance whose disks we should remove
9108 @type target_node: string
9109 @param target_node: used to override the node on which to remove the disks
9111 @return: the success of the removal
9114 logging.info("Removing block devices for instance %s", instance.name)
9117 ports_to_release = set()
9118 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9119 for (idx, device) in enumerate(anno_disks):
9121 edata = [(target_node, device)]
9123 edata = device.ComputeNodeTree(instance.primary_node)
9124 for node, disk in edata:
9125 lu.cfg.SetDiskID(disk, node)
9126 result = lu.rpc.call_blockdev_remove(node, disk)
9128 lu.LogWarning("Could not remove disk %s on node %s,"
9129 " continuing anyway: %s", idx, node, result.fail_msg)
9130 if not (result.offline and node != instance.primary_node):
9133 # if this is a DRBD disk, return its port to the pool
9134 if device.dev_type in constants.LDS_DRBD:
9135 ports_to_release.add(device.logical_id[2])
9137 if all_result or ignore_failures:
9138 for port in ports_to_release:
9139 lu.cfg.AddTcpUdpPort(port)
9141 if instance.disk_template == constants.DT_FILE:
9142 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9146 tgt = instance.primary_node
9147 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9149 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9150 file_storage_dir, instance.primary_node, result.fail_msg)
9156 def _ComputeDiskSizePerVG(disk_template, disks):
9157 """Compute disk size requirements in the volume group
9160 def _compute(disks, payload):
9161 """Universal algorithm.
9166 vgs[disk[constants.IDISK_VG]] = \
9167 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9171 # Required free disk space as a function of disk and swap space
9173 constants.DT_DISKLESS: {},
9174 constants.DT_PLAIN: _compute(disks, 0),
9175 # 128 MB are added for drbd metadata for each disk
9176 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9177 constants.DT_FILE: {},
9178 constants.DT_SHARED_FILE: {},
9181 if disk_template not in req_size_dict:
9182 raise errors.ProgrammerError("Disk template '%s' size requirement"
9183 " is unknown" % disk_template)
9185 return req_size_dict[disk_template]
9188 def _FilterVmNodes(lu, nodenames):
9189 """Filters out non-vm_capable nodes from a list.
9191 @type lu: L{LogicalUnit}
9192 @param lu: the logical unit for which we check
9193 @type nodenames: list
9194 @param nodenames: the list of nodes on which we should check
9196 @return: the list of vm-capable nodes
9199 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9200 return [name for name in nodenames if name not in vm_nodes]
9203 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9204 """Hypervisor parameter validation.
9206 This function abstract the hypervisor parameter validation to be
9207 used in both instance create and instance modify.
9209 @type lu: L{LogicalUnit}
9210 @param lu: the logical unit for which we check
9211 @type nodenames: list
9212 @param nodenames: the list of nodes on which we should check
9213 @type hvname: string
9214 @param hvname: the name of the hypervisor we should use
9215 @type hvparams: dict
9216 @param hvparams: the parameters which we need to check
9217 @raise errors.OpPrereqError: if the parameters are not valid
9220 nodenames = _FilterVmNodes(lu, nodenames)
9222 cluster = lu.cfg.GetClusterInfo()
9223 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9225 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9226 for node in nodenames:
9230 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9233 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9234 """OS parameters validation.
9236 @type lu: L{LogicalUnit}
9237 @param lu: the logical unit for which we check
9238 @type required: boolean
9239 @param required: whether the validation should fail if the OS is not
9241 @type nodenames: list
9242 @param nodenames: the list of nodes on which we should check
9243 @type osname: string
9244 @param osname: the name of the hypervisor we should use
9245 @type osparams: dict
9246 @param osparams: the parameters which we need to check
9247 @raise errors.OpPrereqError: if the parameters are not valid
9250 nodenames = _FilterVmNodes(lu, nodenames)
9251 result = lu.rpc.call_os_validate(nodenames, required, osname,
9252 [constants.OS_VALIDATE_PARAMETERS],
9254 for node, nres in result.items():
9255 # we don't check for offline cases since this should be run only
9256 # against the master node and/or an instance's nodes
9257 nres.Raise("OS Parameters validation failed on node %s" % node)
9258 if not nres.payload:
9259 lu.LogInfo("OS %s not found on node %s, validation skipped",
9263 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9264 """Wrapper around IAReqInstanceAlloc.
9266 @param op: The instance opcode
9267 @param disks: The computed disks
9268 @param nics: The computed nics
9269 @param beparams: The full filled beparams
9271 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9274 spindle_use = beparams[constants.BE_SPINDLE_USE]
9275 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9276 disk_template=op.disk_template,
9279 vcpus=beparams[constants.BE_VCPUS],
9280 memory=beparams[constants.BE_MAXMEM],
9281 spindle_use=spindle_use,
9283 nics=[n.ToDict() for n in nics],
9284 hypervisor=op.hypervisor)
9287 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9288 """Computes the nics.
9290 @param op: The instance opcode
9291 @param cluster: Cluster configuration object
9292 @param default_ip: The default ip to assign
9293 @param cfg: An instance of the configuration object
9294 @param proc: The executer instance
9296 @returns: The build up nics
9300 for idx, nic in enumerate(op.nics):
9301 nic_mode_req = nic.get(constants.INIC_MODE, None)
9302 nic_mode = nic_mode_req
9303 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9304 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9306 # in routed mode, for the first nic, the default ip is 'auto'
9307 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9308 default_ip_mode = constants.VALUE_AUTO
9310 default_ip_mode = constants.VALUE_NONE
9312 # ip validity checks
9313 ip = nic.get(constants.INIC_IP, default_ip_mode)
9314 if ip is None or ip.lower() == constants.VALUE_NONE:
9316 elif ip.lower() == constants.VALUE_AUTO:
9317 if not op.name_check:
9318 raise errors.OpPrereqError("IP address set to auto but name checks"
9319 " have been skipped",
9323 if not netutils.IPAddress.IsValid(ip):
9324 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9328 # TODO: check the ip address for uniqueness
9329 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9330 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9333 # MAC address verification
9334 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9335 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9336 mac = utils.NormalizeAndValidateMac(mac)
9339 # TODO: We need to factor this out
9340 cfg.ReserveMAC(mac, proc.GetECId())
9341 except errors.ReservationError:
9342 raise errors.OpPrereqError("MAC address %s already in use"
9343 " in cluster" % mac,
9344 errors.ECODE_NOTUNIQUE)
9346 # Build nic parameters
9347 link = nic.get(constants.INIC_LINK, None)
9348 if link == constants.VALUE_AUTO:
9349 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9352 nicparams[constants.NIC_MODE] = nic_mode
9354 nicparams[constants.NIC_LINK] = link
9356 check_params = cluster.SimpleFillNIC(nicparams)
9357 objects.NIC.CheckParameterSyntax(check_params)
9358 nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9363 def _ComputeDisks(op, default_vg):
9364 """Computes the instance disks.
9366 @param op: The instance opcode
9367 @param default_vg: The default_vg to assume
9369 @return: The computer disks
9373 for disk in op.disks:
9374 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9375 if mode not in constants.DISK_ACCESS_SET:
9376 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9377 mode, errors.ECODE_INVAL)
9378 size = disk.get(constants.IDISK_SIZE, None)
9380 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9383 except (TypeError, ValueError):
9384 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9387 data_vg = disk.get(constants.IDISK_VG, default_vg)
9389 constants.IDISK_SIZE: size,
9390 constants.IDISK_MODE: mode,
9391 constants.IDISK_VG: data_vg,
9393 if constants.IDISK_METAVG in disk:
9394 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9395 if constants.IDISK_ADOPT in disk:
9396 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9397 disks.append(new_disk)
9402 def _ComputeFullBeParams(op, cluster):
9403 """Computes the full beparams.
9405 @param op: The instance opcode
9406 @param cluster: The cluster config object
9408 @return: The fully filled beparams
9411 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9412 for param, value in op.beparams.iteritems():
9413 if value == constants.VALUE_AUTO:
9414 op.beparams[param] = default_beparams[param]
9415 objects.UpgradeBeParams(op.beparams)
9416 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9417 return cluster.SimpleFillBE(op.beparams)
9420 class LUInstanceCreate(LogicalUnit):
9421 """Create an instance.
9424 HPATH = "instance-add"
9425 HTYPE = constants.HTYPE_INSTANCE
9428 def CheckArguments(self):
9432 # do not require name_check to ease forward/backward compatibility
9434 if self.op.no_install and self.op.start:
9435 self.LogInfo("No-installation mode selected, disabling startup")
9436 self.op.start = False
9437 # validate/normalize the instance name
9438 self.op.instance_name = \
9439 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9441 if self.op.ip_check and not self.op.name_check:
9442 # TODO: make the ip check more flexible and not depend on the name check
9443 raise errors.OpPrereqError("Cannot do IP address check without a name"
9444 " check", errors.ECODE_INVAL)
9446 # check nics' parameter names
9447 for nic in self.op.nics:
9448 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9450 # check disks. parameter names and consistent adopt/no-adopt strategy
9451 has_adopt = has_no_adopt = False
9452 for disk in self.op.disks:
9453 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9454 if constants.IDISK_ADOPT in disk:
9458 if has_adopt and has_no_adopt:
9459 raise errors.OpPrereqError("Either all disks are adopted or none is",
9462 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9463 raise errors.OpPrereqError("Disk adoption is not supported for the"
9464 " '%s' disk template" %
9465 self.op.disk_template,
9467 if self.op.iallocator is not None:
9468 raise errors.OpPrereqError("Disk adoption not allowed with an"
9469 " iallocator script", errors.ECODE_INVAL)
9470 if self.op.mode == constants.INSTANCE_IMPORT:
9471 raise errors.OpPrereqError("Disk adoption not allowed for"
9472 " instance import", errors.ECODE_INVAL)
9474 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9475 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9476 " but no 'adopt' parameter given" %
9477 self.op.disk_template,
9480 self.adopt_disks = has_adopt
9482 # instance name verification
9483 if self.op.name_check:
9484 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9485 self.op.instance_name = self.hostname1.name
9486 # used in CheckPrereq for ip ping check
9487 self.check_ip = self.hostname1.ip
9489 self.check_ip = None
9491 # file storage checks
9492 if (self.op.file_driver and
9493 not self.op.file_driver in constants.FILE_DRIVER):
9494 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9495 self.op.file_driver, errors.ECODE_INVAL)
9497 if self.op.disk_template == constants.DT_FILE:
9498 opcodes.RequireFileStorage()
9499 elif self.op.disk_template == constants.DT_SHARED_FILE:
9500 opcodes.RequireSharedFileStorage()
9502 ### Node/iallocator related checks
9503 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9505 if self.op.pnode is not None:
9506 if self.op.disk_template in constants.DTS_INT_MIRROR:
9507 if self.op.snode is None:
9508 raise errors.OpPrereqError("The networked disk templates need"
9509 " a mirror node", errors.ECODE_INVAL)
9511 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9513 self.op.snode = None
9515 self._cds = _GetClusterDomainSecret()
9517 if self.op.mode == constants.INSTANCE_IMPORT:
9518 # On import force_variant must be True, because if we forced it at
9519 # initial install, our only chance when importing it back is that it
9521 self.op.force_variant = True
9523 if self.op.no_install:
9524 self.LogInfo("No-installation mode has no effect during import")
9526 elif self.op.mode == constants.INSTANCE_CREATE:
9527 if self.op.os_type is None:
9528 raise errors.OpPrereqError("No guest OS specified",
9530 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9531 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9532 " installation" % self.op.os_type,
9534 if self.op.disk_template is None:
9535 raise errors.OpPrereqError("No disk template specified",
9538 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9539 # Check handshake to ensure both clusters have the same domain secret
9540 src_handshake = self.op.source_handshake
9541 if not src_handshake:
9542 raise errors.OpPrereqError("Missing source handshake",
9545 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9548 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9551 # Load and check source CA
9552 self.source_x509_ca_pem = self.op.source_x509_ca
9553 if not self.source_x509_ca_pem:
9554 raise errors.OpPrereqError("Missing source X509 CA",
9558 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9560 except OpenSSL.crypto.Error, err:
9561 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9562 (err, ), errors.ECODE_INVAL)
9564 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9565 if errcode is not None:
9566 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9569 self.source_x509_ca = cert
9571 src_instance_name = self.op.source_instance_name
9572 if not src_instance_name:
9573 raise errors.OpPrereqError("Missing source instance name",
9576 self.source_instance_name = \
9577 netutils.GetHostname(name=src_instance_name).name
9580 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9581 self.op.mode, errors.ECODE_INVAL)
9583 def ExpandNames(self):
9584 """ExpandNames for CreateInstance.
9586 Figure out the right locks for instance creation.
9589 self.needed_locks = {}
9591 instance_name = self.op.instance_name
9592 # this is just a preventive check, but someone might still add this
9593 # instance in the meantime, and creation will fail at lock-add time
9594 if instance_name in self.cfg.GetInstanceList():
9595 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9596 instance_name, errors.ECODE_EXISTS)
9598 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9600 if self.op.iallocator:
9601 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9602 # specifying a group on instance creation and then selecting nodes from
9604 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9605 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9607 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9608 nodelist = [self.op.pnode]
9609 if self.op.snode is not None:
9610 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9611 nodelist.append(self.op.snode)
9612 self.needed_locks[locking.LEVEL_NODE] = nodelist
9613 # Lock resources of instance's primary and secondary nodes (copy to
9614 # prevent accidential modification)
9615 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9617 # in case of import lock the source node too
9618 if self.op.mode == constants.INSTANCE_IMPORT:
9619 src_node = self.op.src_node
9620 src_path = self.op.src_path
9622 if src_path is None:
9623 self.op.src_path = src_path = self.op.instance_name
9625 if src_node is None:
9626 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9627 self.op.src_node = None
9628 if os.path.isabs(src_path):
9629 raise errors.OpPrereqError("Importing an instance from a path"
9630 " requires a source node option",
9633 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9634 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9635 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9636 if not os.path.isabs(src_path):
9637 self.op.src_path = src_path = \
9638 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9640 def _RunAllocator(self):
9641 """Run the allocator based on input opcode.
9644 req = _CreateInstanceAllocRequest(self.op, self.disks,
9645 self.nics, self.be_full)
9646 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9648 ial.Run(self.op.iallocator)
9651 raise errors.OpPrereqError("Can't compute nodes using"
9652 " iallocator '%s': %s" %
9653 (self.op.iallocator, ial.info),
9655 self.op.pnode = ial.result[0]
9656 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9657 self.op.instance_name, self.op.iallocator,
9658 utils.CommaJoin(ial.result))
9660 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9662 if req.RequiredNodes() == 2:
9663 self.op.snode = ial.result[1]
9665 def BuildHooksEnv(self):
9668 This runs on master, primary and secondary nodes of the instance.
9672 "ADD_MODE": self.op.mode,
9674 if self.op.mode == constants.INSTANCE_IMPORT:
9675 env["SRC_NODE"] = self.op.src_node
9676 env["SRC_PATH"] = self.op.src_path
9677 env["SRC_IMAGES"] = self.src_images
9679 env.update(_BuildInstanceHookEnv(
9680 name=self.op.instance_name,
9681 primary_node=self.op.pnode,
9682 secondary_nodes=self.secondaries,
9683 status=self.op.start,
9684 os_type=self.op.os_type,
9685 minmem=self.be_full[constants.BE_MINMEM],
9686 maxmem=self.be_full[constants.BE_MAXMEM],
9687 vcpus=self.be_full[constants.BE_VCPUS],
9688 nics=_NICListToTuple(self, self.nics),
9689 disk_template=self.op.disk_template,
9690 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9691 for d in self.disks],
9694 hypervisor_name=self.op.hypervisor,
9700 def BuildHooksNodes(self):
9701 """Build hooks nodes.
9704 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9707 def _ReadExportInfo(self):
9708 """Reads the export information from disk.
9710 It will override the opcode source node and path with the actual
9711 information, if these two were not specified before.
9713 @return: the export information
9716 assert self.op.mode == constants.INSTANCE_IMPORT
9718 src_node = self.op.src_node
9719 src_path = self.op.src_path
9721 if src_node is None:
9722 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9723 exp_list = self.rpc.call_export_list(locked_nodes)
9725 for node in exp_list:
9726 if exp_list[node].fail_msg:
9728 if src_path in exp_list[node].payload:
9730 self.op.src_node = src_node = node
9731 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9735 raise errors.OpPrereqError("No export found for relative path %s" %
9736 src_path, errors.ECODE_INVAL)
9738 _CheckNodeOnline(self, src_node)
9739 result = self.rpc.call_export_info(src_node, src_path)
9740 result.Raise("No export or invalid export found in dir %s" % src_path)
9742 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9743 if not export_info.has_section(constants.INISECT_EXP):
9744 raise errors.ProgrammerError("Corrupted export config",
9745 errors.ECODE_ENVIRON)
9747 ei_version = export_info.get(constants.INISECT_EXP, "version")
9748 if (int(ei_version) != constants.EXPORT_VERSION):
9749 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9750 (ei_version, constants.EXPORT_VERSION),
9751 errors.ECODE_ENVIRON)
9754 def _ReadExportParams(self, einfo):
9755 """Use export parameters as defaults.
9757 In case the opcode doesn't specify (as in override) some instance
9758 parameters, then try to use them from the export information, if
9762 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9764 if self.op.disk_template is None:
9765 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9766 self.op.disk_template = einfo.get(constants.INISECT_INS,
9768 if self.op.disk_template not in constants.DISK_TEMPLATES:
9769 raise errors.OpPrereqError("Disk template specified in configuration"
9770 " file is not one of the allowed values:"
9772 " ".join(constants.DISK_TEMPLATES),
9775 raise errors.OpPrereqError("No disk template specified and the export"
9776 " is missing the disk_template information",
9779 if not self.op.disks:
9781 # TODO: import the disk iv_name too
9782 for idx in range(constants.MAX_DISKS):
9783 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9784 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9785 disks.append({constants.IDISK_SIZE: disk_sz})
9786 self.op.disks = disks
9787 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9788 raise errors.OpPrereqError("No disk info specified and the export"
9789 " is missing the disk information",
9792 if not self.op.nics:
9794 for idx in range(constants.MAX_NICS):
9795 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9797 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9798 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9805 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9806 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9808 if (self.op.hypervisor is None and
9809 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9810 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9812 if einfo.has_section(constants.INISECT_HYP):
9813 # use the export parameters but do not override the ones
9814 # specified by the user
9815 for name, value in einfo.items(constants.INISECT_HYP):
9816 if name not in self.op.hvparams:
9817 self.op.hvparams[name] = value
9819 if einfo.has_section(constants.INISECT_BEP):
9820 # use the parameters, without overriding
9821 for name, value in einfo.items(constants.INISECT_BEP):
9822 if name not in self.op.beparams:
9823 self.op.beparams[name] = value
9824 # Compatibility for the old "memory" be param
9825 if name == constants.BE_MEMORY:
9826 if constants.BE_MAXMEM not in self.op.beparams:
9827 self.op.beparams[constants.BE_MAXMEM] = value
9828 if constants.BE_MINMEM not in self.op.beparams:
9829 self.op.beparams[constants.BE_MINMEM] = value
9831 # try to read the parameters old style, from the main section
9832 for name in constants.BES_PARAMETERS:
9833 if (name not in self.op.beparams and
9834 einfo.has_option(constants.INISECT_INS, name)):
9835 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9837 if einfo.has_section(constants.INISECT_OSP):
9838 # use the parameters, without overriding
9839 for name, value in einfo.items(constants.INISECT_OSP):
9840 if name not in self.op.osparams:
9841 self.op.osparams[name] = value
9843 def _RevertToDefaults(self, cluster):
9844 """Revert the instance parameters to the default values.
9848 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9849 for name in self.op.hvparams.keys():
9850 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9851 del self.op.hvparams[name]
9853 be_defs = cluster.SimpleFillBE({})
9854 for name in self.op.beparams.keys():
9855 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9856 del self.op.beparams[name]
9858 nic_defs = cluster.SimpleFillNIC({})
9859 for nic in self.op.nics:
9860 for name in constants.NICS_PARAMETERS:
9861 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9864 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9865 for name in self.op.osparams.keys():
9866 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9867 del self.op.osparams[name]
9869 def _CalculateFileStorageDir(self):
9870 """Calculate final instance file storage dir.
9873 # file storage dir calculation/check
9874 self.instance_file_storage_dir = None
9875 if self.op.disk_template in constants.DTS_FILEBASED:
9876 # build the full file storage dir path
9879 if self.op.disk_template == constants.DT_SHARED_FILE:
9880 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9882 get_fsd_fn = self.cfg.GetFileStorageDir
9884 cfg_storagedir = get_fsd_fn()
9885 if not cfg_storagedir:
9886 raise errors.OpPrereqError("Cluster file storage dir not defined",
9888 joinargs.append(cfg_storagedir)
9890 if self.op.file_storage_dir is not None:
9891 joinargs.append(self.op.file_storage_dir)
9893 joinargs.append(self.op.instance_name)
9895 # pylint: disable=W0142
9896 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9898 def CheckPrereq(self): # pylint: disable=R0914
9899 """Check prerequisites.
9902 self._CalculateFileStorageDir()
9904 if self.op.mode == constants.INSTANCE_IMPORT:
9905 export_info = self._ReadExportInfo()
9906 self._ReadExportParams(export_info)
9907 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9909 self._old_instance_name = None
9911 if (not self.cfg.GetVGName() and
9912 self.op.disk_template not in constants.DTS_NOT_LVM):
9913 raise errors.OpPrereqError("Cluster does not support lvm-based"
9914 " instances", errors.ECODE_STATE)
9916 if (self.op.hypervisor is None or
9917 self.op.hypervisor == constants.VALUE_AUTO):
9918 self.op.hypervisor = self.cfg.GetHypervisorType()
9920 cluster = self.cfg.GetClusterInfo()
9921 enabled_hvs = cluster.enabled_hypervisors
9922 if self.op.hypervisor not in enabled_hvs:
9923 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9925 (self.op.hypervisor, ",".join(enabled_hvs)),
9928 # Check tag validity
9929 for tag in self.op.tags:
9930 objects.TaggableObject.ValidateTag(tag)
9932 # check hypervisor parameter syntax (locally)
9933 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9934 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9936 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9937 hv_type.CheckParameterSyntax(filled_hvp)
9938 self.hv_full = filled_hvp
9939 # check that we don't specify global parameters on an instance
9940 _CheckGlobalHvParams(self.op.hvparams)
9942 # fill and remember the beparams dict
9943 self.be_full = _ComputeFullBeParams(self.op, cluster)
9945 # build os parameters
9946 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9948 # now that hvp/bep are in final format, let's reset to defaults,
9950 if self.op.identify_defaults:
9951 self._RevertToDefaults(cluster)
9954 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
9957 # disk checks/pre-build
9958 default_vg = self.cfg.GetVGName()
9959 self.disks = _ComputeDisks(self.op, default_vg)
9961 if self.op.mode == constants.INSTANCE_IMPORT:
9963 for idx in range(len(self.disks)):
9964 option = "disk%d_dump" % idx
9965 if export_info.has_option(constants.INISECT_INS, option):
9966 # FIXME: are the old os-es, disk sizes, etc. useful?
9967 export_name = export_info.get(constants.INISECT_INS, option)
9968 image = utils.PathJoin(self.op.src_path, export_name)
9969 disk_images.append(image)
9971 disk_images.append(False)
9973 self.src_images = disk_images
9975 if self.op.instance_name == self._old_instance_name:
9976 for idx, nic in enumerate(self.nics):
9977 if nic.mac == constants.VALUE_AUTO:
9978 nic_mac_ini = "nic%d_mac" % idx
9979 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9981 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9983 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9984 if self.op.ip_check:
9985 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9986 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9987 (self.check_ip, self.op.instance_name),
9988 errors.ECODE_NOTUNIQUE)
9990 #### mac address generation
9991 # By generating here the mac address both the allocator and the hooks get
9992 # the real final mac address rather than the 'auto' or 'generate' value.
9993 # There is a race condition between the generation and the instance object
9994 # creation, which means that we know the mac is valid now, but we're not
9995 # sure it will be when we actually add the instance. If things go bad
9996 # adding the instance will abort because of a duplicate mac, and the
9997 # creation job will fail.
9998 for nic in self.nics:
9999 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10000 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10004 if self.op.iallocator is not None:
10005 self._RunAllocator()
10007 # Release all unneeded node locks
10008 _ReleaseLocks(self, locking.LEVEL_NODE,
10009 keep=filter(None, [self.op.pnode, self.op.snode,
10010 self.op.src_node]))
10011 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10012 keep=filter(None, [self.op.pnode, self.op.snode,
10013 self.op.src_node]))
10015 #### node related checks
10017 # check primary node
10018 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10019 assert self.pnode is not None, \
10020 "Cannot retrieve locked node %s" % self.op.pnode
10022 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10023 pnode.name, errors.ECODE_STATE)
10025 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10026 pnode.name, errors.ECODE_STATE)
10027 if not pnode.vm_capable:
10028 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10029 " '%s'" % pnode.name, errors.ECODE_STATE)
10031 self.secondaries = []
10033 # mirror node verification
10034 if self.op.disk_template in constants.DTS_INT_MIRROR:
10035 if self.op.snode == pnode.name:
10036 raise errors.OpPrereqError("The secondary node cannot be the"
10037 " primary node", errors.ECODE_INVAL)
10038 _CheckNodeOnline(self, self.op.snode)
10039 _CheckNodeNotDrained(self, self.op.snode)
10040 _CheckNodeVmCapable(self, self.op.snode)
10041 self.secondaries.append(self.op.snode)
10043 snode = self.cfg.GetNodeInfo(self.op.snode)
10044 if pnode.group != snode.group:
10045 self.LogWarning("The primary and secondary nodes are in two"
10046 " different node groups; the disk parameters"
10047 " from the first disk's node group will be"
10050 nodenames = [pnode.name] + self.secondaries
10052 # Verify instance specs
10053 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10055 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10056 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10057 constants.ISPEC_DISK_COUNT: len(self.disks),
10058 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10059 constants.ISPEC_NIC_COUNT: len(self.nics),
10060 constants.ISPEC_SPINDLE_USE: spindle_use,
10063 group_info = self.cfg.GetNodeGroup(pnode.group)
10064 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10065 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10066 if not self.op.ignore_ipolicy and res:
10067 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10068 (pnode.group, group_info.name, utils.CommaJoin(res)))
10069 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10071 if not self.adopt_disks:
10072 if self.op.disk_template == constants.DT_RBD:
10073 # _CheckRADOSFreeSpace() is just a placeholder.
10074 # Any function that checks prerequisites can be placed here.
10075 # Check if there is enough space on the RADOS cluster.
10076 _CheckRADOSFreeSpace()
10078 # Check lv size requirements, if not adopting
10079 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10080 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10082 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10083 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10084 disk[constants.IDISK_ADOPT])
10085 for disk in self.disks])
10086 if len(all_lvs) != len(self.disks):
10087 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10088 errors.ECODE_INVAL)
10089 for lv_name in all_lvs:
10091 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10092 # to ReserveLV uses the same syntax
10093 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10094 except errors.ReservationError:
10095 raise errors.OpPrereqError("LV named %s used by another instance" %
10096 lv_name, errors.ECODE_NOTUNIQUE)
10098 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10099 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10101 node_lvs = self.rpc.call_lv_list([pnode.name],
10102 vg_names.payload.keys())[pnode.name]
10103 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10104 node_lvs = node_lvs.payload
10106 delta = all_lvs.difference(node_lvs.keys())
10108 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10109 utils.CommaJoin(delta),
10110 errors.ECODE_INVAL)
10111 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10113 raise errors.OpPrereqError("Online logical volumes found, cannot"
10114 " adopt: %s" % utils.CommaJoin(online_lvs),
10115 errors.ECODE_STATE)
10116 # update the size of disk based on what is found
10117 for dsk in self.disks:
10118 dsk[constants.IDISK_SIZE] = \
10119 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10120 dsk[constants.IDISK_ADOPT])][0]))
10122 elif self.op.disk_template == constants.DT_BLOCK:
10123 # Normalize and de-duplicate device paths
10124 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10125 for disk in self.disks])
10126 if len(all_disks) != len(self.disks):
10127 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10128 errors.ECODE_INVAL)
10129 baddisks = [d for d in all_disks
10130 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10132 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10133 " cannot be adopted" %
10134 (", ".join(baddisks),
10135 constants.ADOPTABLE_BLOCKDEV_ROOT),
10136 errors.ECODE_INVAL)
10138 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10139 list(all_disks))[pnode.name]
10140 node_disks.Raise("Cannot get block device information from node %s" %
10142 node_disks = node_disks.payload
10143 delta = all_disks.difference(node_disks.keys())
10145 raise errors.OpPrereqError("Missing block device(s): %s" %
10146 utils.CommaJoin(delta),
10147 errors.ECODE_INVAL)
10148 for dsk in self.disks:
10149 dsk[constants.IDISK_SIZE] = \
10150 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10152 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10154 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10155 # check OS parameters (remotely)
10156 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10158 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10160 # memory check on primary node
10161 #TODO(dynmem): use MINMEM for checking
10163 _CheckNodeFreeMemory(self, self.pnode.name,
10164 "creating instance %s" % self.op.instance_name,
10165 self.be_full[constants.BE_MAXMEM],
10166 self.op.hypervisor)
10168 self.dry_run_result = list(nodenames)
10170 def Exec(self, feedback_fn):
10171 """Create and add the instance to the cluster.
10174 instance = self.op.instance_name
10175 pnode_name = self.pnode.name
10177 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10178 self.owned_locks(locking.LEVEL_NODE)), \
10179 "Node locks differ from node resource locks"
10181 ht_kind = self.op.hypervisor
10182 if ht_kind in constants.HTS_REQ_PORT:
10183 network_port = self.cfg.AllocatePort()
10185 network_port = None
10187 # This is ugly but we got a chicken-egg problem here
10188 # We can only take the group disk parameters, as the instance
10189 # has no disks yet (we are generating them right here).
10190 node = self.cfg.GetNodeInfo(pnode_name)
10191 nodegroup = self.cfg.GetNodeGroup(node.group)
10192 disks = _GenerateDiskTemplate(self,
10193 self.op.disk_template,
10194 instance, pnode_name,
10197 self.instance_file_storage_dir,
10198 self.op.file_driver,
10201 self.cfg.GetGroupDiskParams(nodegroup))
10203 iobj = objects.Instance(name=instance, os=self.op.os_type,
10204 primary_node=pnode_name,
10205 nics=self.nics, disks=disks,
10206 disk_template=self.op.disk_template,
10207 admin_state=constants.ADMINST_DOWN,
10208 network_port=network_port,
10209 beparams=self.op.beparams,
10210 hvparams=self.op.hvparams,
10211 hypervisor=self.op.hypervisor,
10212 osparams=self.op.osparams,
10216 for tag in self.op.tags:
10219 if self.adopt_disks:
10220 if self.op.disk_template == constants.DT_PLAIN:
10221 # rename LVs to the newly-generated names; we need to construct
10222 # 'fake' LV disks with the old data, plus the new unique_id
10223 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10225 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10226 rename_to.append(t_dsk.logical_id)
10227 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10228 self.cfg.SetDiskID(t_dsk, pnode_name)
10229 result = self.rpc.call_blockdev_rename(pnode_name,
10230 zip(tmp_disks, rename_to))
10231 result.Raise("Failed to rename adoped LVs")
10233 feedback_fn("* creating instance disks...")
10235 _CreateDisks(self, iobj)
10236 except errors.OpExecError:
10237 self.LogWarning("Device creation failed, reverting...")
10239 _RemoveDisks(self, iobj)
10241 self.cfg.ReleaseDRBDMinors(instance)
10244 feedback_fn("adding instance %s to cluster config" % instance)
10246 self.cfg.AddInstance(iobj, self.proc.GetECId())
10248 # Declare that we don't want to remove the instance lock anymore, as we've
10249 # added the instance to the config
10250 del self.remove_locks[locking.LEVEL_INSTANCE]
10252 if self.op.mode == constants.INSTANCE_IMPORT:
10253 # Release unused nodes
10254 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10256 # Release all nodes
10257 _ReleaseLocks(self, locking.LEVEL_NODE)
10260 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10261 feedback_fn("* wiping instance disks...")
10263 _WipeDisks(self, iobj)
10264 except errors.OpExecError, err:
10265 logging.exception("Wiping disks failed")
10266 self.LogWarning("Wiping instance disks failed (%s)", err)
10270 # Something is already wrong with the disks, don't do anything else
10272 elif self.op.wait_for_sync:
10273 disk_abort = not _WaitForSync(self, iobj)
10274 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10275 # make sure the disks are not degraded (still sync-ing is ok)
10276 feedback_fn("* checking mirrors status")
10277 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10282 _RemoveDisks(self, iobj)
10283 self.cfg.RemoveInstance(iobj.name)
10284 # Make sure the instance lock gets removed
10285 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10286 raise errors.OpExecError("There are some degraded disks for"
10289 # Release all node resource locks
10290 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10292 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10293 # we need to set the disks ID to the primary node, since the
10294 # preceding code might or might have not done it, depending on
10295 # disk template and other options
10296 for disk in iobj.disks:
10297 self.cfg.SetDiskID(disk, pnode_name)
10298 if self.op.mode == constants.INSTANCE_CREATE:
10299 if not self.op.no_install:
10300 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10301 not self.op.wait_for_sync)
10303 feedback_fn("* pausing disk sync to install instance OS")
10304 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10307 for idx, success in enumerate(result.payload):
10309 logging.warn("pause-sync of instance %s for disk %d failed",
10312 feedback_fn("* running the instance OS create scripts...")
10313 # FIXME: pass debug option from opcode to backend
10315 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10316 self.op.debug_level)
10318 feedback_fn("* resuming disk sync")
10319 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10322 for idx, success in enumerate(result.payload):
10324 logging.warn("resume-sync of instance %s for disk %d failed",
10327 os_add_result.Raise("Could not add os for instance %s"
10328 " on node %s" % (instance, pnode_name))
10331 if self.op.mode == constants.INSTANCE_IMPORT:
10332 feedback_fn("* running the instance OS import scripts...")
10336 for idx, image in enumerate(self.src_images):
10340 # FIXME: pass debug option from opcode to backend
10341 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10342 constants.IEIO_FILE, (image, ),
10343 constants.IEIO_SCRIPT,
10344 (iobj.disks[idx], idx),
10346 transfers.append(dt)
10349 masterd.instance.TransferInstanceData(self, feedback_fn,
10350 self.op.src_node, pnode_name,
10351 self.pnode.secondary_ip,
10353 if not compat.all(import_result):
10354 self.LogWarning("Some disks for instance %s on node %s were not"
10355 " imported successfully" % (instance, pnode_name))
10357 rename_from = self._old_instance_name
10359 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10360 feedback_fn("* preparing remote import...")
10361 # The source cluster will stop the instance before attempting to make
10362 # a connection. In some cases stopping an instance can take a long
10363 # time, hence the shutdown timeout is added to the connection
10365 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10366 self.op.source_shutdown_timeout)
10367 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10369 assert iobj.primary_node == self.pnode.name
10371 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10372 self.source_x509_ca,
10373 self._cds, timeouts)
10374 if not compat.all(disk_results):
10375 # TODO: Should the instance still be started, even if some disks
10376 # failed to import (valid for local imports, too)?
10377 self.LogWarning("Some disks for instance %s on node %s were not"
10378 " imported successfully" % (instance, pnode_name))
10380 rename_from = self.source_instance_name
10383 # also checked in the prereq part
10384 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10387 # Run rename script on newly imported instance
10388 assert iobj.name == instance
10389 feedback_fn("Running rename script for %s" % instance)
10390 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10392 self.op.debug_level)
10393 if result.fail_msg:
10394 self.LogWarning("Failed to run rename script for %s on node"
10395 " %s: %s" % (instance, pnode_name, result.fail_msg))
10397 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10400 iobj.admin_state = constants.ADMINST_UP
10401 self.cfg.Update(iobj, feedback_fn)
10402 logging.info("Starting instance %s on node %s", instance, pnode_name)
10403 feedback_fn("* starting instance...")
10404 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10406 result.Raise("Could not start instance")
10408 return list(iobj.all_nodes)
10411 class LUInstanceMultiAlloc(NoHooksLU):
10412 """Allocates multiple instances at the same time.
10417 def CheckArguments(self):
10418 """Check arguments.
10422 for inst in self.op.instances:
10423 if inst.iallocator is not None:
10424 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10425 " instance objects", errors.ECODE_INVAL)
10426 nodes.append(bool(inst.pnode))
10427 if inst.disk_template in constants.DTS_INT_MIRROR:
10428 nodes.append(bool(inst.snode))
10430 has_nodes = compat.any(nodes)
10431 if compat.all(nodes) ^ has_nodes:
10432 raise errors.OpPrereqError("There are instance objects providing"
10433 " pnode/snode while others do not",
10434 errors.ECODE_INVAL)
10436 if self.op.iallocator is None:
10437 default_iallocator = self.cfg.GetDefaultIAllocator()
10438 if default_iallocator and has_nodes:
10439 self.op.iallocator = default_iallocator
10441 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10442 " given and no cluster-wide default"
10443 " iallocator found; please specify either"
10444 " an iallocator or nodes on the instances"
10445 " or set a cluster-wide default iallocator",
10446 errors.ECODE_INVAL)
10448 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10450 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10451 utils.CommaJoin(dups), errors.ECODE_INVAL)
10453 def ExpandNames(self):
10454 """Calculate the locks.
10457 self.share_locks = _ShareAll()
10458 self.needed_locks = {}
10460 if self.op.iallocator:
10461 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10462 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10465 for inst in self.op.instances:
10466 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10467 nodeslist.append(inst.pnode)
10468 if inst.snode is not None:
10469 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10470 nodeslist.append(inst.snode)
10472 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10473 # Lock resources of instance's primary and secondary nodes (copy to
10474 # prevent accidential modification)
10475 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10477 def CheckPrereq(self):
10478 """Check prerequisite.
10481 cluster = self.cfg.GetClusterInfo()
10482 default_vg = self.cfg.GetVGName()
10483 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10484 _ComputeNics(op, cluster, None,
10485 self.cfg, self.proc),
10486 _ComputeFullBeParams(op, cluster))
10487 for op in self.op.instances]
10488 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10489 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10491 ial.Run(self.op.iallocator)
10493 if not ial.success:
10494 raise errors.OpPrereqError("Can't compute nodes using"
10495 " iallocator '%s': %s" %
10496 (self.op.iallocator, ial.info),
10497 errors.ECODE_NORES)
10499 self.ia_result = ial.result
10501 if self.op.dry_run:
10502 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10503 constants.JOB_IDS_KEY: [],
10506 def _ConstructPartialResult(self):
10507 """Contructs the partial result.
10510 (allocatable, failed) = self.ia_result
10512 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10513 map(compat.fst, allocatable),
10514 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10517 def Exec(self, feedback_fn):
10518 """Executes the opcode.
10521 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10522 (allocatable, failed) = self.ia_result
10525 for (name, nodes) in allocatable:
10526 op = op2inst.pop(name)
10529 (op.pnode, op.snode) = nodes
10531 (op.pnode,) = nodes
10535 missing = set(op2inst.keys()) - set(failed)
10536 assert not missing, \
10537 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10539 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10542 def _CheckRADOSFreeSpace():
10543 """Compute disk size requirements inside the RADOS cluster.
10546 # For the RADOS cluster we assume there is always enough space.
10550 class LUInstanceConsole(NoHooksLU):
10551 """Connect to an instance's console.
10553 This is somewhat special in that it returns the command line that
10554 you need to run on the master node in order to connect to the
10560 def ExpandNames(self):
10561 self.share_locks = _ShareAll()
10562 self._ExpandAndLockInstance()
10564 def CheckPrereq(self):
10565 """Check prerequisites.
10567 This checks that the instance is in the cluster.
10570 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10571 assert self.instance is not None, \
10572 "Cannot retrieve locked instance %s" % self.op.instance_name
10573 _CheckNodeOnline(self, self.instance.primary_node)
10575 def Exec(self, feedback_fn):
10576 """Connect to the console of an instance
10579 instance = self.instance
10580 node = instance.primary_node
10582 node_insts = self.rpc.call_instance_list([node],
10583 [instance.hypervisor])[node]
10584 node_insts.Raise("Can't get node information from %s" % node)
10586 if instance.name not in node_insts.payload:
10587 if instance.admin_state == constants.ADMINST_UP:
10588 state = constants.INSTST_ERRORDOWN
10589 elif instance.admin_state == constants.ADMINST_DOWN:
10590 state = constants.INSTST_ADMINDOWN
10592 state = constants.INSTST_ADMINOFFLINE
10593 raise errors.OpExecError("Instance %s is not running (state %s)" %
10594 (instance.name, state))
10596 logging.debug("Connecting to console of %s on %s", instance.name, node)
10598 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10601 def _GetInstanceConsole(cluster, instance):
10602 """Returns console information for an instance.
10604 @type cluster: L{objects.Cluster}
10605 @type instance: L{objects.Instance}
10609 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10610 # beparams and hvparams are passed separately, to avoid editing the
10611 # instance and then saving the defaults in the instance itself.
10612 hvparams = cluster.FillHV(instance)
10613 beparams = cluster.FillBE(instance)
10614 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10616 assert console.instance == instance.name
10617 assert console.Validate()
10619 return console.ToDict()
10622 class LUInstanceReplaceDisks(LogicalUnit):
10623 """Replace the disks of an instance.
10626 HPATH = "mirrors-replace"
10627 HTYPE = constants.HTYPE_INSTANCE
10630 def CheckArguments(self):
10631 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10632 self.op.iallocator)
10634 def ExpandNames(self):
10635 self._ExpandAndLockInstance()
10637 assert locking.LEVEL_NODE not in self.needed_locks
10638 assert locking.LEVEL_NODE_RES not in self.needed_locks
10639 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10641 assert self.op.iallocator is None or self.op.remote_node is None, \
10642 "Conflicting options"
10644 if self.op.remote_node is not None:
10645 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10647 # Warning: do not remove the locking of the new secondary here
10648 # unless DRBD8.AddChildren is changed to work in parallel;
10649 # currently it doesn't since parallel invocations of
10650 # FindUnusedMinor will conflict
10651 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10652 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10654 self.needed_locks[locking.LEVEL_NODE] = []
10655 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10657 if self.op.iallocator is not None:
10658 # iallocator will select a new node in the same group
10659 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10661 self.needed_locks[locking.LEVEL_NODE_RES] = []
10663 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10664 self.op.iallocator, self.op.remote_node,
10665 self.op.disks, False, self.op.early_release,
10666 self.op.ignore_ipolicy)
10668 self.tasklets = [self.replacer]
10670 def DeclareLocks(self, level):
10671 if level == locking.LEVEL_NODEGROUP:
10672 assert self.op.remote_node is None
10673 assert self.op.iallocator is not None
10674 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10676 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10677 # Lock all groups used by instance optimistically; this requires going
10678 # via the node before it's locked, requiring verification later on
10679 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10680 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10682 elif level == locking.LEVEL_NODE:
10683 if self.op.iallocator is not None:
10684 assert self.op.remote_node is None
10685 assert not self.needed_locks[locking.LEVEL_NODE]
10687 # Lock member nodes of all locked groups
10688 self.needed_locks[locking.LEVEL_NODE] = \
10690 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10691 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10693 self._LockInstancesNodes()
10694 elif level == locking.LEVEL_NODE_RES:
10696 self.needed_locks[locking.LEVEL_NODE_RES] = \
10697 self.needed_locks[locking.LEVEL_NODE]
10699 def BuildHooksEnv(self):
10700 """Build hooks env.
10702 This runs on the master, the primary and all the secondaries.
10705 instance = self.replacer.instance
10707 "MODE": self.op.mode,
10708 "NEW_SECONDARY": self.op.remote_node,
10709 "OLD_SECONDARY": instance.secondary_nodes[0],
10711 env.update(_BuildInstanceHookEnvByObject(self, instance))
10714 def BuildHooksNodes(self):
10715 """Build hooks nodes.
10718 instance = self.replacer.instance
10720 self.cfg.GetMasterNode(),
10721 instance.primary_node,
10723 if self.op.remote_node is not None:
10724 nl.append(self.op.remote_node)
10727 def CheckPrereq(self):
10728 """Check prerequisites.
10731 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10732 self.op.iallocator is None)
10734 # Verify if node group locks are still correct
10735 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10737 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10739 return LogicalUnit.CheckPrereq(self)
10742 class TLReplaceDisks(Tasklet):
10743 """Replaces disks for an instance.
10745 Note: Locking is not within the scope of this class.
10748 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10749 disks, delay_iallocator, early_release, ignore_ipolicy):
10750 """Initializes this class.
10753 Tasklet.__init__(self, lu)
10756 self.instance_name = instance_name
10758 self.iallocator_name = iallocator_name
10759 self.remote_node = remote_node
10761 self.delay_iallocator = delay_iallocator
10762 self.early_release = early_release
10763 self.ignore_ipolicy = ignore_ipolicy
10766 self.instance = None
10767 self.new_node = None
10768 self.target_node = None
10769 self.other_node = None
10770 self.remote_node_info = None
10771 self.node_secondary_ip = None
10774 def CheckArguments(mode, remote_node, ialloc):
10775 """Helper function for users of this class.
10778 # check for valid parameter combination
10779 if mode == constants.REPLACE_DISK_CHG:
10780 if remote_node is None and ialloc is None:
10781 raise errors.OpPrereqError("When changing the secondary either an"
10782 " iallocator script must be used or the"
10783 " new node given", errors.ECODE_INVAL)
10785 if remote_node is not None and ialloc is not None:
10786 raise errors.OpPrereqError("Give either the iallocator or the new"
10787 " secondary, not both", errors.ECODE_INVAL)
10789 elif remote_node is not None or ialloc is not None:
10790 # Not replacing the secondary
10791 raise errors.OpPrereqError("The iallocator and new node options can"
10792 " only be used when changing the"
10793 " secondary node", errors.ECODE_INVAL)
10796 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10797 """Compute a new secondary node using an IAllocator.
10800 req = iallocator.IAReqRelocate(name=instance_name,
10801 relocate_from=list(relocate_from))
10802 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10804 ial.Run(iallocator_name)
10806 if not ial.success:
10807 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10808 " %s" % (iallocator_name, ial.info),
10809 errors.ECODE_NORES)
10811 remote_node_name = ial.result[0]
10813 lu.LogInfo("Selected new secondary for instance '%s': %s",
10814 instance_name, remote_node_name)
10816 return remote_node_name
10818 def _FindFaultyDisks(self, node_name):
10819 """Wrapper for L{_FindFaultyInstanceDisks}.
10822 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10825 def _CheckDisksActivated(self, instance):
10826 """Checks if the instance disks are activated.
10828 @param instance: The instance to check disks
10829 @return: True if they are activated, False otherwise
10832 nodes = instance.all_nodes
10834 for idx, dev in enumerate(instance.disks):
10836 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10837 self.cfg.SetDiskID(dev, node)
10839 result = _BlockdevFind(self, node, dev, instance)
10843 elif result.fail_msg or not result.payload:
10848 def CheckPrereq(self):
10849 """Check prerequisites.
10851 This checks that the instance is in the cluster.
10854 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10855 assert instance is not None, \
10856 "Cannot retrieve locked instance %s" % self.instance_name
10858 if instance.disk_template != constants.DT_DRBD8:
10859 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10860 " instances", errors.ECODE_INVAL)
10862 if len(instance.secondary_nodes) != 1:
10863 raise errors.OpPrereqError("The instance has a strange layout,"
10864 " expected one secondary but found %d" %
10865 len(instance.secondary_nodes),
10866 errors.ECODE_FAULT)
10868 if not self.delay_iallocator:
10869 self._CheckPrereq2()
10871 def _CheckPrereq2(self):
10872 """Check prerequisites, second part.
10874 This function should always be part of CheckPrereq. It was separated and is
10875 now called from Exec because during node evacuation iallocator was only
10876 called with an unmodified cluster model, not taking planned changes into
10880 instance = self.instance
10881 secondary_node = instance.secondary_nodes[0]
10883 if self.iallocator_name is None:
10884 remote_node = self.remote_node
10886 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10887 instance.name, instance.secondary_nodes)
10889 if remote_node is None:
10890 self.remote_node_info = None
10892 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10893 "Remote node '%s' is not locked" % remote_node
10895 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10896 assert self.remote_node_info is not None, \
10897 "Cannot retrieve locked node %s" % remote_node
10899 if remote_node == self.instance.primary_node:
10900 raise errors.OpPrereqError("The specified node is the primary node of"
10901 " the instance", errors.ECODE_INVAL)
10903 if remote_node == secondary_node:
10904 raise errors.OpPrereqError("The specified node is already the"
10905 " secondary node of the instance",
10906 errors.ECODE_INVAL)
10908 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10909 constants.REPLACE_DISK_CHG):
10910 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10911 errors.ECODE_INVAL)
10913 if self.mode == constants.REPLACE_DISK_AUTO:
10914 if not self._CheckDisksActivated(instance):
10915 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10916 " first" % self.instance_name,
10917 errors.ECODE_STATE)
10918 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10919 faulty_secondary = self._FindFaultyDisks(secondary_node)
10921 if faulty_primary and faulty_secondary:
10922 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10923 " one node and can not be repaired"
10924 " automatically" % self.instance_name,
10925 errors.ECODE_STATE)
10928 self.disks = faulty_primary
10929 self.target_node = instance.primary_node
10930 self.other_node = secondary_node
10931 check_nodes = [self.target_node, self.other_node]
10932 elif faulty_secondary:
10933 self.disks = faulty_secondary
10934 self.target_node = secondary_node
10935 self.other_node = instance.primary_node
10936 check_nodes = [self.target_node, self.other_node]
10942 # Non-automatic modes
10943 if self.mode == constants.REPLACE_DISK_PRI:
10944 self.target_node = instance.primary_node
10945 self.other_node = secondary_node
10946 check_nodes = [self.target_node, self.other_node]
10948 elif self.mode == constants.REPLACE_DISK_SEC:
10949 self.target_node = secondary_node
10950 self.other_node = instance.primary_node
10951 check_nodes = [self.target_node, self.other_node]
10953 elif self.mode == constants.REPLACE_DISK_CHG:
10954 self.new_node = remote_node
10955 self.other_node = instance.primary_node
10956 self.target_node = secondary_node
10957 check_nodes = [self.new_node, self.other_node]
10959 _CheckNodeNotDrained(self.lu, remote_node)
10960 _CheckNodeVmCapable(self.lu, remote_node)
10962 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10963 assert old_node_info is not None
10964 if old_node_info.offline and not self.early_release:
10965 # doesn't make sense to delay the release
10966 self.early_release = True
10967 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10968 " early-release mode", secondary_node)
10971 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10974 # If not specified all disks should be replaced
10976 self.disks = range(len(self.instance.disks))
10978 # TODO: This is ugly, but right now we can't distinguish between internal
10979 # submitted opcode and external one. We should fix that.
10980 if self.remote_node_info:
10981 # We change the node, lets verify it still meets instance policy
10982 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10983 cluster = self.cfg.GetClusterInfo()
10984 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10986 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10987 ignore=self.ignore_ipolicy)
10989 for node in check_nodes:
10990 _CheckNodeOnline(self.lu, node)
10992 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10995 if node_name is not None)
10997 # Release unneeded node and node resource locks
10998 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10999 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11001 # Release any owned node group
11002 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11003 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11005 # Check whether disks are valid
11006 for disk_idx in self.disks:
11007 instance.FindDisk(disk_idx)
11009 # Get secondary node IP addresses
11010 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11011 in self.cfg.GetMultiNodeInfo(touched_nodes))
11013 def Exec(self, feedback_fn):
11014 """Execute disk replacement.
11016 This dispatches the disk replacement to the appropriate handler.
11019 if self.delay_iallocator:
11020 self._CheckPrereq2()
11023 # Verify owned locks before starting operation
11024 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11025 assert set(owned_nodes) == set(self.node_secondary_ip), \
11026 ("Incorrect node locks, owning %s, expected %s" %
11027 (owned_nodes, self.node_secondary_ip.keys()))
11028 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11029 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11031 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11032 assert list(owned_instances) == [self.instance_name], \
11033 "Instance '%s' not locked" % self.instance_name
11035 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11036 "Should not own any node group lock at this point"
11039 feedback_fn("No disks need replacement")
11042 feedback_fn("Replacing disk(s) %s for %s" %
11043 (utils.CommaJoin(self.disks), self.instance.name))
11045 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11047 # Activate the instance disks if we're replacing them on a down instance
11049 _StartInstanceDisks(self.lu, self.instance, True)
11052 # Should we replace the secondary node?
11053 if self.new_node is not None:
11054 fn = self._ExecDrbd8Secondary
11056 fn = self._ExecDrbd8DiskOnly
11058 result = fn(feedback_fn)
11060 # Deactivate the instance disks if we're replacing them on a
11063 _SafeShutdownInstanceDisks(self.lu, self.instance)
11065 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11068 # Verify owned locks
11069 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11070 nodes = frozenset(self.node_secondary_ip)
11071 assert ((self.early_release and not owned_nodes) or
11072 (not self.early_release and not (set(owned_nodes) - nodes))), \
11073 ("Not owning the correct locks, early_release=%s, owned=%r,"
11074 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11078 def _CheckVolumeGroup(self, nodes):
11079 self.lu.LogInfo("Checking volume groups")
11081 vgname = self.cfg.GetVGName()
11083 # Make sure volume group exists on all involved nodes
11084 results = self.rpc.call_vg_list(nodes)
11086 raise errors.OpExecError("Can't list volume groups on the nodes")
11089 res = results[node]
11090 res.Raise("Error checking node %s" % node)
11091 if vgname not in res.payload:
11092 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11095 def _CheckDisksExistence(self, nodes):
11096 # Check disk existence
11097 for idx, dev in enumerate(self.instance.disks):
11098 if idx not in self.disks:
11102 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11103 self.cfg.SetDiskID(dev, node)
11105 result = _BlockdevFind(self, node, dev, self.instance)
11107 msg = result.fail_msg
11108 if msg or not result.payload:
11110 msg = "disk not found"
11111 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11114 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11115 for idx, dev in enumerate(self.instance.disks):
11116 if idx not in self.disks:
11119 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11122 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11123 on_primary, ldisk=ldisk):
11124 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11125 " replace disks for instance %s" %
11126 (node_name, self.instance.name))
11128 def _CreateNewStorage(self, node_name):
11129 """Create new storage on the primary or secondary node.
11131 This is only used for same-node replaces, not for changing the
11132 secondary node, hence we don't want to modify the existing disk.
11137 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11138 for idx, dev in enumerate(disks):
11139 if idx not in self.disks:
11142 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11144 self.cfg.SetDiskID(dev, node_name)
11146 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11147 names = _GenerateUniqueNames(self.lu, lv_names)
11149 (data_disk, meta_disk) = dev.children
11150 vg_data = data_disk.logical_id[0]
11151 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11152 logical_id=(vg_data, names[0]),
11153 params=data_disk.params)
11154 vg_meta = meta_disk.logical_id[0]
11155 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11156 size=constants.DRBD_META_SIZE,
11157 logical_id=(vg_meta, names[1]),
11158 params=meta_disk.params)
11160 new_lvs = [lv_data, lv_meta]
11161 old_lvs = [child.Copy() for child in dev.children]
11162 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11164 # we pass force_create=True to force the LVM creation
11165 for new_lv in new_lvs:
11166 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11167 _GetInstanceInfoText(self.instance), False)
11171 def _CheckDevices(self, node_name, iv_names):
11172 for name, (dev, _, _) in iv_names.iteritems():
11173 self.cfg.SetDiskID(dev, node_name)
11175 result = _BlockdevFind(self, node_name, dev, self.instance)
11177 msg = result.fail_msg
11178 if msg or not result.payload:
11180 msg = "disk not found"
11181 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11184 if result.payload.is_degraded:
11185 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11187 def _RemoveOldStorage(self, node_name, iv_names):
11188 for name, (_, old_lvs, _) in iv_names.iteritems():
11189 self.lu.LogInfo("Remove logical volumes for %s" % name)
11192 self.cfg.SetDiskID(lv, node_name)
11194 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11196 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11197 hint="remove unused LVs manually")
11199 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11200 """Replace a disk on the primary or secondary for DRBD 8.
11202 The algorithm for replace is quite complicated:
11204 1. for each disk to be replaced:
11206 1. create new LVs on the target node with unique names
11207 1. detach old LVs from the drbd device
11208 1. rename old LVs to name_replaced.<time_t>
11209 1. rename new LVs to old LVs
11210 1. attach the new LVs (with the old names now) to the drbd device
11212 1. wait for sync across all devices
11214 1. for each modified disk:
11216 1. remove old LVs (which have the name name_replaces.<time_t>)
11218 Failures are not very well handled.
11223 # Step: check device activation
11224 self.lu.LogStep(1, steps_total, "Check device existence")
11225 self._CheckDisksExistence([self.other_node, self.target_node])
11226 self._CheckVolumeGroup([self.target_node, self.other_node])
11228 # Step: check other node consistency
11229 self.lu.LogStep(2, steps_total, "Check peer consistency")
11230 self._CheckDisksConsistency(self.other_node,
11231 self.other_node == self.instance.primary_node,
11234 # Step: create new storage
11235 self.lu.LogStep(3, steps_total, "Allocate new storage")
11236 iv_names = self._CreateNewStorage(self.target_node)
11238 # Step: for each lv, detach+rename*2+attach
11239 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11240 for dev, old_lvs, new_lvs in iv_names.itervalues():
11241 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11243 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11245 result.Raise("Can't detach drbd from local storage on node"
11246 " %s for device %s" % (self.target_node, dev.iv_name))
11248 #cfg.Update(instance)
11250 # ok, we created the new LVs, so now we know we have the needed
11251 # storage; as such, we proceed on the target node to rename
11252 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11253 # using the assumption that logical_id == physical_id (which in
11254 # turn is the unique_id on that node)
11256 # FIXME(iustin): use a better name for the replaced LVs
11257 temp_suffix = int(time.time())
11258 ren_fn = lambda d, suff: (d.physical_id[0],
11259 d.physical_id[1] + "_replaced-%s" % suff)
11261 # Build the rename list based on what LVs exist on the node
11262 rename_old_to_new = []
11263 for to_ren in old_lvs:
11264 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11265 if not result.fail_msg and result.payload:
11267 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11269 self.lu.LogInfo("Renaming the old LVs on the target node")
11270 result = self.rpc.call_blockdev_rename(self.target_node,
11272 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11274 # Now we rename the new LVs to the old LVs
11275 self.lu.LogInfo("Renaming the new LVs on the target node")
11276 rename_new_to_old = [(new, old.physical_id)
11277 for old, new in zip(old_lvs, new_lvs)]
11278 result = self.rpc.call_blockdev_rename(self.target_node,
11280 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11282 # Intermediate steps of in memory modifications
11283 for old, new in zip(old_lvs, new_lvs):
11284 new.logical_id = old.logical_id
11285 self.cfg.SetDiskID(new, self.target_node)
11287 # We need to modify old_lvs so that removal later removes the
11288 # right LVs, not the newly added ones; note that old_lvs is a
11290 for disk in old_lvs:
11291 disk.logical_id = ren_fn(disk, temp_suffix)
11292 self.cfg.SetDiskID(disk, self.target_node)
11294 # Now that the new lvs have the old name, we can add them to the device
11295 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11296 result = self.rpc.call_blockdev_addchildren(self.target_node,
11297 (dev, self.instance), new_lvs)
11298 msg = result.fail_msg
11300 for new_lv in new_lvs:
11301 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11304 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11305 hint=("cleanup manually the unused logical"
11307 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11309 cstep = itertools.count(5)
11311 if self.early_release:
11312 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11313 self._RemoveOldStorage(self.target_node, iv_names)
11314 # TODO: Check if releasing locks early still makes sense
11315 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11317 # Release all resource locks except those used by the instance
11318 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11319 keep=self.node_secondary_ip.keys())
11321 # Release all node locks while waiting for sync
11322 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11324 # TODO: Can the instance lock be downgraded here? Take the optional disk
11325 # shutdown in the caller into consideration.
11328 # This can fail as the old devices are degraded and _WaitForSync
11329 # does a combined result over all disks, so we don't check its return value
11330 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11331 _WaitForSync(self.lu, self.instance)
11333 # Check all devices manually
11334 self._CheckDevices(self.instance.primary_node, iv_names)
11336 # Step: remove old storage
11337 if not self.early_release:
11338 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11339 self._RemoveOldStorage(self.target_node, iv_names)
11341 def _ExecDrbd8Secondary(self, feedback_fn):
11342 """Replace the secondary node for DRBD 8.
11344 The algorithm for replace is quite complicated:
11345 - for all disks of the instance:
11346 - create new LVs on the new node with same names
11347 - shutdown the drbd device on the old secondary
11348 - disconnect the drbd network on the primary
11349 - create the drbd device on the new secondary
11350 - network attach the drbd on the primary, using an artifice:
11351 the drbd code for Attach() will connect to the network if it
11352 finds a device which is connected to the good local disks but
11353 not network enabled
11354 - wait for sync across all devices
11355 - remove all disks from the old secondary
11357 Failures are not very well handled.
11362 pnode = self.instance.primary_node
11364 # Step: check device activation
11365 self.lu.LogStep(1, steps_total, "Check device existence")
11366 self._CheckDisksExistence([self.instance.primary_node])
11367 self._CheckVolumeGroup([self.instance.primary_node])
11369 # Step: check other node consistency
11370 self.lu.LogStep(2, steps_total, "Check peer consistency")
11371 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11373 # Step: create new storage
11374 self.lu.LogStep(3, steps_total, "Allocate new storage")
11375 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11376 for idx, dev in enumerate(disks):
11377 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11378 (self.new_node, idx))
11379 # we pass force_create=True to force LVM creation
11380 for new_lv in dev.children:
11381 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11382 True, _GetInstanceInfoText(self.instance), False)
11384 # Step 4: dbrd minors and drbd setups changes
11385 # after this, we must manually remove the drbd minors on both the
11386 # error and the success paths
11387 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11388 minors = self.cfg.AllocateDRBDMinor([self.new_node
11389 for dev in self.instance.disks],
11390 self.instance.name)
11391 logging.debug("Allocated minors %r", minors)
11394 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11395 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11396 (self.new_node, idx))
11397 # create new devices on new_node; note that we create two IDs:
11398 # one without port, so the drbd will be activated without
11399 # networking information on the new node at this stage, and one
11400 # with network, for the latter activation in step 4
11401 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11402 if self.instance.primary_node == o_node1:
11405 assert self.instance.primary_node == o_node2, "Three-node instance?"
11408 new_alone_id = (self.instance.primary_node, self.new_node, None,
11409 p_minor, new_minor, o_secret)
11410 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11411 p_minor, new_minor, o_secret)
11413 iv_names[idx] = (dev, dev.children, new_net_id)
11414 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11416 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11417 logical_id=new_alone_id,
11418 children=dev.children,
11421 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11424 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11426 _GetInstanceInfoText(self.instance), False)
11427 except errors.GenericError:
11428 self.cfg.ReleaseDRBDMinors(self.instance.name)
11431 # We have new devices, shutdown the drbd on the old secondary
11432 for idx, dev in enumerate(self.instance.disks):
11433 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11434 self.cfg.SetDiskID(dev, self.target_node)
11435 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11436 (dev, self.instance)).fail_msg
11438 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11439 "node: %s" % (idx, msg),
11440 hint=("Please cleanup this device manually as"
11441 " soon as possible"))
11443 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11444 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11445 self.instance.disks)[pnode]
11447 msg = result.fail_msg
11449 # detaches didn't succeed (unlikely)
11450 self.cfg.ReleaseDRBDMinors(self.instance.name)
11451 raise errors.OpExecError("Can't detach the disks from the network on"
11452 " old node: %s" % (msg,))
11454 # if we managed to detach at least one, we update all the disks of
11455 # the instance to point to the new secondary
11456 self.lu.LogInfo("Updating instance configuration")
11457 for dev, _, new_logical_id in iv_names.itervalues():
11458 dev.logical_id = new_logical_id
11459 self.cfg.SetDiskID(dev, self.instance.primary_node)
11461 self.cfg.Update(self.instance, feedback_fn)
11463 # Release all node locks (the configuration has been updated)
11464 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11466 # and now perform the drbd attach
11467 self.lu.LogInfo("Attaching primary drbds to new secondary"
11468 " (standalone => connected)")
11469 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11471 self.node_secondary_ip,
11472 (self.instance.disks, self.instance),
11473 self.instance.name,
11475 for to_node, to_result in result.items():
11476 msg = to_result.fail_msg
11478 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11480 hint=("please do a gnt-instance info to see the"
11481 " status of disks"))
11483 cstep = itertools.count(5)
11485 if self.early_release:
11486 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11487 self._RemoveOldStorage(self.target_node, iv_names)
11488 # TODO: Check if releasing locks early still makes sense
11489 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11491 # Release all resource locks except those used by the instance
11492 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11493 keep=self.node_secondary_ip.keys())
11495 # TODO: Can the instance lock be downgraded here? Take the optional disk
11496 # shutdown in the caller into consideration.
11499 # This can fail as the old devices are degraded and _WaitForSync
11500 # does a combined result over all disks, so we don't check its return value
11501 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11502 _WaitForSync(self.lu, self.instance)
11504 # Check all devices manually
11505 self._CheckDevices(self.instance.primary_node, iv_names)
11507 # Step: remove old storage
11508 if not self.early_release:
11509 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11510 self._RemoveOldStorage(self.target_node, iv_names)
11513 class LURepairNodeStorage(NoHooksLU):
11514 """Repairs the volume group on a node.
11519 def CheckArguments(self):
11520 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11522 storage_type = self.op.storage_type
11524 if (constants.SO_FIX_CONSISTENCY not in
11525 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11526 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11527 " repaired" % storage_type,
11528 errors.ECODE_INVAL)
11530 def ExpandNames(self):
11531 self.needed_locks = {
11532 locking.LEVEL_NODE: [self.op.node_name],
11535 def _CheckFaultyDisks(self, instance, node_name):
11536 """Ensure faulty disks abort the opcode or at least warn."""
11538 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11540 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11541 " node '%s'" % (instance.name, node_name),
11542 errors.ECODE_STATE)
11543 except errors.OpPrereqError, err:
11544 if self.op.ignore_consistency:
11545 self.proc.LogWarning(str(err.args[0]))
11549 def CheckPrereq(self):
11550 """Check prerequisites.
11553 # Check whether any instance on this node has faulty disks
11554 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11555 if inst.admin_state != constants.ADMINST_UP:
11557 check_nodes = set(inst.all_nodes)
11558 check_nodes.discard(self.op.node_name)
11559 for inst_node_name in check_nodes:
11560 self._CheckFaultyDisks(inst, inst_node_name)
11562 def Exec(self, feedback_fn):
11563 feedback_fn("Repairing storage unit '%s' on %s ..." %
11564 (self.op.name, self.op.node_name))
11566 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11567 result = self.rpc.call_storage_execute(self.op.node_name,
11568 self.op.storage_type, st_args,
11570 constants.SO_FIX_CONSISTENCY)
11571 result.Raise("Failed to repair storage unit '%s' on %s" %
11572 (self.op.name, self.op.node_name))
11575 class LUNodeEvacuate(NoHooksLU):
11576 """Evacuates instances off a list of nodes.
11581 _MODE2IALLOCATOR = {
11582 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11583 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11584 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11586 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11587 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11588 constants.IALLOCATOR_NEVAC_MODES)
11590 def CheckArguments(self):
11591 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11593 def ExpandNames(self):
11594 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11596 if self.op.remote_node is not None:
11597 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11598 assert self.op.remote_node
11600 if self.op.remote_node == self.op.node_name:
11601 raise errors.OpPrereqError("Can not use evacuated node as a new"
11602 " secondary node", errors.ECODE_INVAL)
11604 if self.op.mode != constants.NODE_EVAC_SEC:
11605 raise errors.OpPrereqError("Without the use of an iallocator only"
11606 " secondary instances can be evacuated",
11607 errors.ECODE_INVAL)
11610 self.share_locks = _ShareAll()
11611 self.needed_locks = {
11612 locking.LEVEL_INSTANCE: [],
11613 locking.LEVEL_NODEGROUP: [],
11614 locking.LEVEL_NODE: [],
11617 # Determine nodes (via group) optimistically, needs verification once locks
11618 # have been acquired
11619 self.lock_nodes = self._DetermineNodes()
11621 def _DetermineNodes(self):
11622 """Gets the list of nodes to operate on.
11625 if self.op.remote_node is None:
11626 # Iallocator will choose any node(s) in the same group
11627 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11629 group_nodes = frozenset([self.op.remote_node])
11631 # Determine nodes to be locked
11632 return set([self.op.node_name]) | group_nodes
11634 def _DetermineInstances(self):
11635 """Builds list of instances to operate on.
11638 assert self.op.mode in constants.NODE_EVAC_MODES
11640 if self.op.mode == constants.NODE_EVAC_PRI:
11641 # Primary instances only
11642 inst_fn = _GetNodePrimaryInstances
11643 assert self.op.remote_node is None, \
11644 "Evacuating primary instances requires iallocator"
11645 elif self.op.mode == constants.NODE_EVAC_SEC:
11646 # Secondary instances only
11647 inst_fn = _GetNodeSecondaryInstances
11650 assert self.op.mode == constants.NODE_EVAC_ALL
11651 inst_fn = _GetNodeInstances
11652 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11654 raise errors.OpPrereqError("Due to an issue with the iallocator"
11655 " interface it is not possible to evacuate"
11656 " all instances at once; specify explicitly"
11657 " whether to evacuate primary or secondary"
11659 errors.ECODE_INVAL)
11661 return inst_fn(self.cfg, self.op.node_name)
11663 def DeclareLocks(self, level):
11664 if level == locking.LEVEL_INSTANCE:
11665 # Lock instances optimistically, needs verification once node and group
11666 # locks have been acquired
11667 self.needed_locks[locking.LEVEL_INSTANCE] = \
11668 set(i.name for i in self._DetermineInstances())
11670 elif level == locking.LEVEL_NODEGROUP:
11671 # Lock node groups for all potential target nodes optimistically, needs
11672 # verification once nodes have been acquired
11673 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11674 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11676 elif level == locking.LEVEL_NODE:
11677 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11679 def CheckPrereq(self):
11681 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11682 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11683 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11685 need_nodes = self._DetermineNodes()
11687 if not owned_nodes.issuperset(need_nodes):
11688 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11689 " locks were acquired, current nodes are"
11690 " are '%s', used to be '%s'; retry the"
11692 (self.op.node_name,
11693 utils.CommaJoin(need_nodes),
11694 utils.CommaJoin(owned_nodes)),
11695 errors.ECODE_STATE)
11697 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11698 if owned_groups != wanted_groups:
11699 raise errors.OpExecError("Node groups changed since locks were acquired,"
11700 " current groups are '%s', used to be '%s';"
11701 " retry the operation" %
11702 (utils.CommaJoin(wanted_groups),
11703 utils.CommaJoin(owned_groups)))
11705 # Determine affected instances
11706 self.instances = self._DetermineInstances()
11707 self.instance_names = [i.name for i in self.instances]
11709 if set(self.instance_names) != owned_instances:
11710 raise errors.OpExecError("Instances on node '%s' changed since locks"
11711 " were acquired, current instances are '%s',"
11712 " used to be '%s'; retry the operation" %
11713 (self.op.node_name,
11714 utils.CommaJoin(self.instance_names),
11715 utils.CommaJoin(owned_instances)))
11717 if self.instance_names:
11718 self.LogInfo("Evacuating instances from node '%s': %s",
11720 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11722 self.LogInfo("No instances to evacuate from node '%s'",
11725 if self.op.remote_node is not None:
11726 for i in self.instances:
11727 if i.primary_node == self.op.remote_node:
11728 raise errors.OpPrereqError("Node %s is the primary node of"
11729 " instance %s, cannot use it as"
11731 (self.op.remote_node, i.name),
11732 errors.ECODE_INVAL)
11734 def Exec(self, feedback_fn):
11735 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11737 if not self.instance_names:
11738 # No instances to evacuate
11741 elif self.op.iallocator is not None:
11742 # TODO: Implement relocation to other group
11743 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11744 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11745 instances=list(self.instance_names))
11746 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11748 ial.Run(self.op.iallocator)
11750 if not ial.success:
11751 raise errors.OpPrereqError("Can't compute node evacuation using"
11752 " iallocator '%s': %s" %
11753 (self.op.iallocator, ial.info),
11754 errors.ECODE_NORES)
11756 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11758 elif self.op.remote_node is not None:
11759 assert self.op.mode == constants.NODE_EVAC_SEC
11761 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11762 remote_node=self.op.remote_node,
11764 mode=constants.REPLACE_DISK_CHG,
11765 early_release=self.op.early_release)]
11766 for instance_name in self.instance_names
11770 raise errors.ProgrammerError("No iallocator or remote node")
11772 return ResultWithJobs(jobs)
11775 def _SetOpEarlyRelease(early_release, op):
11776 """Sets C{early_release} flag on opcodes if available.
11780 op.early_release = early_release
11781 except AttributeError:
11782 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11787 def _NodeEvacDest(use_nodes, group, nodes):
11788 """Returns group or nodes depending on caller's choice.
11792 return utils.CommaJoin(nodes)
11797 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11798 """Unpacks the result of change-group and node-evacuate iallocator requests.
11800 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11801 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11803 @type lu: L{LogicalUnit}
11804 @param lu: Logical unit instance
11805 @type alloc_result: tuple/list
11806 @param alloc_result: Result from iallocator
11807 @type early_release: bool
11808 @param early_release: Whether to release locks early if possible
11809 @type use_nodes: bool
11810 @param use_nodes: Whether to display node names instead of groups
11813 (moved, failed, jobs) = alloc_result
11816 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11817 for (name, reason) in failed)
11818 lu.LogWarning("Unable to evacuate instances %s", failreason)
11819 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11822 lu.LogInfo("Instances to be moved: %s",
11823 utils.CommaJoin("%s (to %s)" %
11824 (name, _NodeEvacDest(use_nodes, group, nodes))
11825 for (name, group, nodes) in moved))
11827 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11828 map(opcodes.OpCode.LoadOpCode, ops))
11832 class LUInstanceGrowDisk(LogicalUnit):
11833 """Grow a disk of an instance.
11836 HPATH = "disk-grow"
11837 HTYPE = constants.HTYPE_INSTANCE
11840 def ExpandNames(self):
11841 self._ExpandAndLockInstance()
11842 self.needed_locks[locking.LEVEL_NODE] = []
11843 self.needed_locks[locking.LEVEL_NODE_RES] = []
11844 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11845 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11847 def DeclareLocks(self, level):
11848 if level == locking.LEVEL_NODE:
11849 self._LockInstancesNodes()
11850 elif level == locking.LEVEL_NODE_RES:
11852 self.needed_locks[locking.LEVEL_NODE_RES] = \
11853 self.needed_locks[locking.LEVEL_NODE][:]
11855 def BuildHooksEnv(self):
11856 """Build hooks env.
11858 This runs on the master, the primary and all the secondaries.
11862 "DISK": self.op.disk,
11863 "AMOUNT": self.op.amount,
11864 "ABSOLUTE": self.op.absolute,
11866 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11869 def BuildHooksNodes(self):
11870 """Build hooks nodes.
11873 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11876 def CheckPrereq(self):
11877 """Check prerequisites.
11879 This checks that the instance is in the cluster.
11882 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11883 assert instance is not None, \
11884 "Cannot retrieve locked instance %s" % self.op.instance_name
11885 nodenames = list(instance.all_nodes)
11886 for node in nodenames:
11887 _CheckNodeOnline(self, node)
11889 self.instance = instance
11891 if instance.disk_template not in constants.DTS_GROWABLE:
11892 raise errors.OpPrereqError("Instance's disk layout does not support"
11893 " growing", errors.ECODE_INVAL)
11895 self.disk = instance.FindDisk(self.op.disk)
11897 if self.op.absolute:
11898 self.target = self.op.amount
11899 self.delta = self.target - self.disk.size
11901 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11902 "current disk size (%s)" %
11903 (utils.FormatUnit(self.target, "h"),
11904 utils.FormatUnit(self.disk.size, "h")),
11905 errors.ECODE_STATE)
11907 self.delta = self.op.amount
11908 self.target = self.disk.size + self.delta
11910 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11911 utils.FormatUnit(self.delta, "h"),
11912 errors.ECODE_INVAL)
11914 if instance.disk_template not in (constants.DT_FILE,
11915 constants.DT_SHARED_FILE,
11917 # TODO: check the free disk space for file, when that feature will be
11919 _CheckNodesFreeDiskPerVG(self, nodenames,
11920 self.disk.ComputeGrowth(self.delta))
11922 def Exec(self, feedback_fn):
11923 """Execute disk grow.
11926 instance = self.instance
11929 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11930 assert (self.owned_locks(locking.LEVEL_NODE) ==
11931 self.owned_locks(locking.LEVEL_NODE_RES))
11933 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11935 raise errors.OpExecError("Cannot activate block device to grow")
11937 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11938 (self.op.disk, instance.name,
11939 utils.FormatUnit(self.delta, "h"),
11940 utils.FormatUnit(self.target, "h")))
11942 # First run all grow ops in dry-run mode
11943 for node in instance.all_nodes:
11944 self.cfg.SetDiskID(disk, node)
11945 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11947 result.Raise("Grow request failed to node %s" % node)
11949 # We know that (as far as we can test) operations across different
11950 # nodes will succeed, time to run it for real on the backing storage
11951 for node in instance.all_nodes:
11952 self.cfg.SetDiskID(disk, node)
11953 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11955 result.Raise("Grow request failed to node %s" % node)
11957 # And now execute it for logical storage, on the primary node
11958 node = instance.primary_node
11959 self.cfg.SetDiskID(disk, node)
11960 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11962 result.Raise("Grow request failed to node %s" % node)
11964 disk.RecordGrow(self.delta)
11965 self.cfg.Update(instance, feedback_fn)
11967 # Changes have been recorded, release node lock
11968 _ReleaseLocks(self, locking.LEVEL_NODE)
11970 # Downgrade lock while waiting for sync
11971 self.glm.downgrade(locking.LEVEL_INSTANCE)
11973 if self.op.wait_for_sync:
11974 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11976 self.proc.LogWarning("Disk sync-ing has not returned a good"
11977 " status; please check the instance")
11978 if instance.admin_state != constants.ADMINST_UP:
11979 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11980 elif instance.admin_state != constants.ADMINST_UP:
11981 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11982 " not supposed to be running because no wait for"
11983 " sync mode was requested")
11985 assert self.owned_locks(locking.LEVEL_NODE_RES)
11986 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11989 class LUInstanceQueryData(NoHooksLU):
11990 """Query runtime instance data.
11995 def ExpandNames(self):
11996 self.needed_locks = {}
11998 # Use locking if requested or when non-static information is wanted
11999 if not (self.op.static or self.op.use_locking):
12000 self.LogWarning("Non-static data requested, locks need to be acquired")
12001 self.op.use_locking = True
12003 if self.op.instances or not self.op.use_locking:
12004 # Expand instance names right here
12005 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12007 # Will use acquired locks
12008 self.wanted_names = None
12010 if self.op.use_locking:
12011 self.share_locks = _ShareAll()
12013 if self.wanted_names is None:
12014 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12016 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12018 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12019 self.needed_locks[locking.LEVEL_NODE] = []
12020 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12022 def DeclareLocks(self, level):
12023 if self.op.use_locking:
12024 if level == locking.LEVEL_NODEGROUP:
12025 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12027 # Lock all groups used by instances optimistically; this requires going
12028 # via the node before it's locked, requiring verification later on
12029 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12030 frozenset(group_uuid
12031 for instance_name in owned_instances
12033 self.cfg.GetInstanceNodeGroups(instance_name))
12035 elif level == locking.LEVEL_NODE:
12036 self._LockInstancesNodes()
12038 def CheckPrereq(self):
12039 """Check prerequisites.
12041 This only checks the optional instance list against the existing names.
12044 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12045 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12046 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12048 if self.wanted_names is None:
12049 assert self.op.use_locking, "Locking was not used"
12050 self.wanted_names = owned_instances
12052 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12054 if self.op.use_locking:
12055 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12058 assert not (owned_instances or owned_groups or owned_nodes)
12060 self.wanted_instances = instances.values()
12062 def _ComputeBlockdevStatus(self, node, instance, dev):
12063 """Returns the status of a block device
12066 if self.op.static or not node:
12069 self.cfg.SetDiskID(dev, node)
12071 result = self.rpc.call_blockdev_find(node, dev)
12075 result.Raise("Can't compute disk status for %s" % instance.name)
12077 status = result.payload
12081 return (status.dev_path, status.major, status.minor,
12082 status.sync_percent, status.estimated_time,
12083 status.is_degraded, status.ldisk_status)
12085 def _ComputeDiskStatus(self, instance, snode, dev):
12086 """Compute block device status.
12089 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12091 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12093 def _ComputeDiskStatusInner(self, instance, snode, dev):
12094 """Compute block device status.
12096 @attention: The device has to be annotated already.
12099 if dev.dev_type in constants.LDS_DRBD:
12100 # we change the snode then (otherwise we use the one passed in)
12101 if dev.logical_id[0] == instance.primary_node:
12102 snode = dev.logical_id[1]
12104 snode = dev.logical_id[0]
12106 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12108 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12111 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12118 "iv_name": dev.iv_name,
12119 "dev_type": dev.dev_type,
12120 "logical_id": dev.logical_id,
12121 "physical_id": dev.physical_id,
12122 "pstatus": dev_pstatus,
12123 "sstatus": dev_sstatus,
12124 "children": dev_children,
12129 def Exec(self, feedback_fn):
12130 """Gather and return data"""
12133 cluster = self.cfg.GetClusterInfo()
12135 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12136 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12138 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12139 for node in nodes.values()))
12141 group2name_fn = lambda uuid: groups[uuid].name
12143 for instance in self.wanted_instances:
12144 pnode = nodes[instance.primary_node]
12146 if self.op.static or pnode.offline:
12147 remote_state = None
12149 self.LogWarning("Primary node %s is marked offline, returning static"
12150 " information only for instance %s" %
12151 (pnode.name, instance.name))
12153 remote_info = self.rpc.call_instance_info(instance.primary_node,
12155 instance.hypervisor)
12156 remote_info.Raise("Error checking node %s" % instance.primary_node)
12157 remote_info = remote_info.payload
12158 if remote_info and "state" in remote_info:
12159 remote_state = "up"
12161 if instance.admin_state == constants.ADMINST_UP:
12162 remote_state = "down"
12164 remote_state = instance.admin_state
12166 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12169 snodes_group_uuids = [nodes[snode_name].group
12170 for snode_name in instance.secondary_nodes]
12172 result[instance.name] = {
12173 "name": instance.name,
12174 "config_state": instance.admin_state,
12175 "run_state": remote_state,
12176 "pnode": instance.primary_node,
12177 "pnode_group_uuid": pnode.group,
12178 "pnode_group_name": group2name_fn(pnode.group),
12179 "snodes": instance.secondary_nodes,
12180 "snodes_group_uuids": snodes_group_uuids,
12181 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12183 # this happens to be the same format used for hooks
12184 "nics": _NICListToTuple(self, instance.nics),
12185 "disk_template": instance.disk_template,
12187 "hypervisor": instance.hypervisor,
12188 "network_port": instance.network_port,
12189 "hv_instance": instance.hvparams,
12190 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12191 "be_instance": instance.beparams,
12192 "be_actual": cluster.FillBE(instance),
12193 "os_instance": instance.osparams,
12194 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12195 "serial_no": instance.serial_no,
12196 "mtime": instance.mtime,
12197 "ctime": instance.ctime,
12198 "uuid": instance.uuid,
12204 def PrepareContainerMods(mods, private_fn):
12205 """Prepares a list of container modifications by adding a private data field.
12207 @type mods: list of tuples; (operation, index, parameters)
12208 @param mods: List of modifications
12209 @type private_fn: callable or None
12210 @param private_fn: Callable for constructing a private data field for a
12215 if private_fn is None:
12220 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12223 #: Type description for changes as returned by L{ApplyContainerMods}'s
12225 _TApplyContModsCbChanges = \
12226 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12227 ht.TNonEmptyString,
12232 def ApplyContainerMods(kind, container, chgdesc, mods,
12233 create_fn, modify_fn, remove_fn):
12234 """Applies descriptions in C{mods} to C{container}.
12237 @param kind: One-word item description
12238 @type container: list
12239 @param container: Container to modify
12240 @type chgdesc: None or list
12241 @param chgdesc: List of applied changes
12243 @param mods: Modifications as returned by L{PrepareContainerMods}
12244 @type create_fn: callable
12245 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12246 receives absolute item index, parameters and private data object as added
12247 by L{PrepareContainerMods}, returns tuple containing new item and changes
12249 @type modify_fn: callable
12250 @param modify_fn: Callback for modifying an existing item
12251 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12252 and private data object as added by L{PrepareContainerMods}, returns
12254 @type remove_fn: callable
12255 @param remove_fn: Callback on removing item; receives absolute item index,
12256 item and private data object as added by L{PrepareContainerMods}
12259 for (op, idx, params, private) in mods:
12262 absidx = len(container) - 1
12264 raise IndexError("Not accepting negative indices other than -1")
12265 elif idx > len(container):
12266 raise IndexError("Got %s index %s, but there are only %s" %
12267 (kind, idx, len(container)))
12273 if op == constants.DDM_ADD:
12274 # Calculate where item will be added
12276 addidx = len(container)
12280 if create_fn is None:
12283 (item, changes) = create_fn(addidx, params, private)
12286 container.append(item)
12289 assert idx <= len(container)
12290 # list.insert does so before the specified index
12291 container.insert(idx, item)
12293 # Retrieve existing item
12295 item = container[absidx]
12297 raise IndexError("Invalid %s index %s" % (kind, idx))
12299 if op == constants.DDM_REMOVE:
12302 if remove_fn is not None:
12303 remove_fn(absidx, item, private)
12305 changes = [("%s/%s" % (kind, absidx), "remove")]
12307 assert container[absidx] == item
12308 del container[absidx]
12309 elif op == constants.DDM_MODIFY:
12310 if modify_fn is not None:
12311 changes = modify_fn(absidx, item, params, private)
12313 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12315 assert _TApplyContModsCbChanges(changes)
12317 if not (chgdesc is None or changes is None):
12318 chgdesc.extend(changes)
12321 def _UpdateIvNames(base_index, disks):
12322 """Updates the C{iv_name} attribute of disks.
12324 @type disks: list of L{objects.Disk}
12327 for (idx, disk) in enumerate(disks):
12328 disk.iv_name = "disk/%s" % (base_index + idx, )
12331 class _InstNicModPrivate:
12332 """Data structure for network interface modifications.
12334 Used by L{LUInstanceSetParams}.
12337 def __init__(self):
12342 class LUInstanceSetParams(LogicalUnit):
12343 """Modifies an instances's parameters.
12346 HPATH = "instance-modify"
12347 HTYPE = constants.HTYPE_INSTANCE
12351 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12352 assert ht.TList(mods)
12353 assert not mods or len(mods[0]) in (2, 3)
12355 if mods and len(mods[0]) == 2:
12359 for op, params in mods:
12360 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12361 result.append((op, -1, params))
12365 raise errors.OpPrereqError("Only one %s add or remove operation is"
12366 " supported at a time" % kind,
12367 errors.ECODE_INVAL)
12369 result.append((constants.DDM_MODIFY, op, params))
12371 assert verify_fn(result)
12378 def _CheckMods(kind, mods, key_types, item_fn):
12379 """Ensures requested disk/NIC modifications are valid.
12382 for (op, _, params) in mods:
12383 assert ht.TDict(params)
12385 utils.ForceDictType(params, key_types)
12387 if op == constants.DDM_REMOVE:
12389 raise errors.OpPrereqError("No settings should be passed when"
12390 " removing a %s" % kind,
12391 errors.ECODE_INVAL)
12392 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12393 item_fn(op, params)
12395 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12398 def _VerifyDiskModification(op, params):
12399 """Verifies a disk modification.
12402 if op == constants.DDM_ADD:
12403 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12404 if mode not in constants.DISK_ACCESS_SET:
12405 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12406 errors.ECODE_INVAL)
12408 size = params.get(constants.IDISK_SIZE, None)
12410 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12411 constants.IDISK_SIZE, errors.ECODE_INVAL)
12415 except (TypeError, ValueError), err:
12416 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12417 errors.ECODE_INVAL)
12419 params[constants.IDISK_SIZE] = size
12421 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12422 raise errors.OpPrereqError("Disk size change not possible, use"
12423 " grow-disk", errors.ECODE_INVAL)
12426 def _VerifyNicModification(op, params):
12427 """Verifies a network interface modification.
12430 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12431 ip = params.get(constants.INIC_IP, None)
12434 elif ip.lower() == constants.VALUE_NONE:
12435 params[constants.INIC_IP] = None
12436 elif not netutils.IPAddress.IsValid(ip):
12437 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12438 errors.ECODE_INVAL)
12440 bridge = params.get("bridge", None)
12441 link = params.get(constants.INIC_LINK, None)
12442 if bridge and link:
12443 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12444 " at the same time", errors.ECODE_INVAL)
12445 elif bridge and bridge.lower() == constants.VALUE_NONE:
12446 params["bridge"] = None
12447 elif link and link.lower() == constants.VALUE_NONE:
12448 params[constants.INIC_LINK] = None
12450 if op == constants.DDM_ADD:
12451 macaddr = params.get(constants.INIC_MAC, None)
12452 if macaddr is None:
12453 params[constants.INIC_MAC] = constants.VALUE_AUTO
12455 if constants.INIC_MAC in params:
12456 macaddr = params[constants.INIC_MAC]
12457 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12458 macaddr = utils.NormalizeAndValidateMac(macaddr)
12460 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12461 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12462 " modifying an existing NIC",
12463 errors.ECODE_INVAL)
12465 def CheckArguments(self):
12466 if not (self.op.nics or self.op.disks or self.op.disk_template or
12467 self.op.hvparams or self.op.beparams or self.op.os_name or
12468 self.op.offline is not None or self.op.runtime_mem):
12469 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12471 if self.op.hvparams:
12472 _CheckGlobalHvParams(self.op.hvparams)
12474 self.op.disks = self._UpgradeDiskNicMods(
12475 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12476 self.op.nics = self._UpgradeDiskNicMods(
12477 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12479 # Check disk modifications
12480 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12481 self._VerifyDiskModification)
12483 if self.op.disks and self.op.disk_template is not None:
12484 raise errors.OpPrereqError("Disk template conversion and other disk"
12485 " changes not supported at the same time",
12486 errors.ECODE_INVAL)
12488 if (self.op.disk_template and
12489 self.op.disk_template in constants.DTS_INT_MIRROR and
12490 self.op.remote_node is None):
12491 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12492 " one requires specifying a secondary node",
12493 errors.ECODE_INVAL)
12495 # Check NIC modifications
12496 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12497 self._VerifyNicModification)
12499 def ExpandNames(self):
12500 self._ExpandAndLockInstance()
12501 # Can't even acquire node locks in shared mode as upcoming changes in
12502 # Ganeti 2.6 will start to modify the node object on disk conversion
12503 self.needed_locks[locking.LEVEL_NODE] = []
12504 self.needed_locks[locking.LEVEL_NODE_RES] = []
12505 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12507 def DeclareLocks(self, level):
12508 # TODO: Acquire group lock in shared mode (disk parameters)
12509 if level == locking.LEVEL_NODE:
12510 self._LockInstancesNodes()
12511 if self.op.disk_template and self.op.remote_node:
12512 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12513 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12514 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12516 self.needed_locks[locking.LEVEL_NODE_RES] = \
12517 self.needed_locks[locking.LEVEL_NODE][:]
12519 def BuildHooksEnv(self):
12520 """Build hooks env.
12522 This runs on the master, primary and secondaries.
12526 if constants.BE_MINMEM in self.be_new:
12527 args["minmem"] = self.be_new[constants.BE_MINMEM]
12528 if constants.BE_MAXMEM in self.be_new:
12529 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12530 if constants.BE_VCPUS in self.be_new:
12531 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12532 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12533 # information at all.
12535 if self._new_nics is not None:
12538 for nic in self._new_nics:
12539 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12540 mode = nicparams[constants.NIC_MODE]
12541 link = nicparams[constants.NIC_LINK]
12542 nics.append((nic.ip, nic.mac, mode, link))
12544 args["nics"] = nics
12546 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12547 if self.op.disk_template:
12548 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12549 if self.op.runtime_mem:
12550 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12554 def BuildHooksNodes(self):
12555 """Build hooks nodes.
12558 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12561 def _PrepareNicModification(self, params, private, old_ip, old_params,
12563 update_params_dict = dict([(key, params[key])
12564 for key in constants.NICS_PARAMETERS
12567 if "bridge" in params:
12568 update_params_dict[constants.NIC_LINK] = params["bridge"]
12570 new_params = _GetUpdatedParams(old_params, update_params_dict)
12571 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12573 new_filled_params = cluster.SimpleFillNIC(new_params)
12574 objects.NIC.CheckParameterSyntax(new_filled_params)
12576 new_mode = new_filled_params[constants.NIC_MODE]
12577 if new_mode == constants.NIC_MODE_BRIDGED:
12578 bridge = new_filled_params[constants.NIC_LINK]
12579 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12581 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12583 self.warn.append(msg)
12585 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12587 elif new_mode == constants.NIC_MODE_ROUTED:
12588 ip = params.get(constants.INIC_IP, old_ip)
12590 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12591 " on a routed NIC", errors.ECODE_INVAL)
12593 if constants.INIC_MAC in params:
12594 mac = params[constants.INIC_MAC]
12596 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12597 errors.ECODE_INVAL)
12598 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12599 # otherwise generate the MAC address
12600 params[constants.INIC_MAC] = \
12601 self.cfg.GenerateMAC(self.proc.GetECId())
12603 # or validate/reserve the current one
12605 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12606 except errors.ReservationError:
12607 raise errors.OpPrereqError("MAC address '%s' already in use"
12608 " in cluster" % mac,
12609 errors.ECODE_NOTUNIQUE)
12611 private.params = new_params
12612 private.filled = new_filled_params
12614 def CheckPrereq(self):
12615 """Check prerequisites.
12617 This only checks the instance list against the existing names.
12620 # checking the new params on the primary/secondary nodes
12622 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12623 cluster = self.cluster = self.cfg.GetClusterInfo()
12624 assert self.instance is not None, \
12625 "Cannot retrieve locked instance %s" % self.op.instance_name
12626 pnode = instance.primary_node
12627 nodelist = list(instance.all_nodes)
12628 pnode_info = self.cfg.GetNodeInfo(pnode)
12629 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12631 # Prepare disk/NIC modifications
12632 self.diskmod = PrepareContainerMods(self.op.disks, None)
12633 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12636 if self.op.os_name and not self.op.force:
12637 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12638 self.op.force_variant)
12639 instance_os = self.op.os_name
12641 instance_os = instance.os
12643 assert not (self.op.disk_template and self.op.disks), \
12644 "Can't modify disk template and apply disk changes at the same time"
12646 if self.op.disk_template:
12647 if instance.disk_template == self.op.disk_template:
12648 raise errors.OpPrereqError("Instance already has disk template %s" %
12649 instance.disk_template, errors.ECODE_INVAL)
12651 if (instance.disk_template,
12652 self.op.disk_template) not in self._DISK_CONVERSIONS:
12653 raise errors.OpPrereqError("Unsupported disk template conversion from"
12654 " %s to %s" % (instance.disk_template,
12655 self.op.disk_template),
12656 errors.ECODE_INVAL)
12657 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12658 msg="cannot change disk template")
12659 if self.op.disk_template in constants.DTS_INT_MIRROR:
12660 if self.op.remote_node == pnode:
12661 raise errors.OpPrereqError("Given new secondary node %s is the same"
12662 " as the primary node of the instance" %
12663 self.op.remote_node, errors.ECODE_STATE)
12664 _CheckNodeOnline(self, self.op.remote_node)
12665 _CheckNodeNotDrained(self, self.op.remote_node)
12666 # FIXME: here we assume that the old instance type is DT_PLAIN
12667 assert instance.disk_template == constants.DT_PLAIN
12668 disks = [{constants.IDISK_SIZE: d.size,
12669 constants.IDISK_VG: d.logical_id[0]}
12670 for d in instance.disks]
12671 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12672 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12674 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12675 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12676 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12678 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12679 ignore=self.op.ignore_ipolicy)
12680 if pnode_info.group != snode_info.group:
12681 self.LogWarning("The primary and secondary nodes are in two"
12682 " different node groups; the disk parameters"
12683 " from the first disk's node group will be"
12686 # hvparams processing
12687 if self.op.hvparams:
12688 hv_type = instance.hypervisor
12689 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12690 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12691 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12694 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12695 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12696 self.hv_proposed = self.hv_new = hv_new # the new actual values
12697 self.hv_inst = i_hvdict # the new dict (without defaults)
12699 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12701 self.hv_new = self.hv_inst = {}
12703 # beparams processing
12704 if self.op.beparams:
12705 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12707 objects.UpgradeBeParams(i_bedict)
12708 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12709 be_new = cluster.SimpleFillBE(i_bedict)
12710 self.be_proposed = self.be_new = be_new # the new actual values
12711 self.be_inst = i_bedict # the new dict (without defaults)
12713 self.be_new = self.be_inst = {}
12714 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12715 be_old = cluster.FillBE(instance)
12717 # CPU param validation -- checking every time a parameter is
12718 # changed to cover all cases where either CPU mask or vcpus have
12720 if (constants.BE_VCPUS in self.be_proposed and
12721 constants.HV_CPU_MASK in self.hv_proposed):
12723 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12724 # Verify mask is consistent with number of vCPUs. Can skip this
12725 # test if only 1 entry in the CPU mask, which means same mask
12726 # is applied to all vCPUs.
12727 if (len(cpu_list) > 1 and
12728 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12729 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12731 (self.be_proposed[constants.BE_VCPUS],
12732 self.hv_proposed[constants.HV_CPU_MASK]),
12733 errors.ECODE_INVAL)
12735 # Only perform this test if a new CPU mask is given
12736 if constants.HV_CPU_MASK in self.hv_new:
12737 # Calculate the largest CPU number requested
12738 max_requested_cpu = max(map(max, cpu_list))
12739 # Check that all of the instance's nodes have enough physical CPUs to
12740 # satisfy the requested CPU mask
12741 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12742 max_requested_cpu + 1, instance.hypervisor)
12744 # osparams processing
12745 if self.op.osparams:
12746 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12747 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12748 self.os_inst = i_osdict # the new dict (without defaults)
12754 #TODO(dynmem): do the appropriate check involving MINMEM
12755 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12756 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12757 mem_check_list = [pnode]
12758 if be_new[constants.BE_AUTO_BALANCE]:
12759 # either we changed auto_balance to yes or it was from before
12760 mem_check_list.extend(instance.secondary_nodes)
12761 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12762 instance.hypervisor)
12763 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12764 [instance.hypervisor])
12765 pninfo = nodeinfo[pnode]
12766 msg = pninfo.fail_msg
12768 # Assume the primary node is unreachable and go ahead
12769 self.warn.append("Can't get info from primary node %s: %s" %
12772 (_, _, (pnhvinfo, )) = pninfo.payload
12773 if not isinstance(pnhvinfo.get("memory_free", None), int):
12774 self.warn.append("Node data from primary node %s doesn't contain"
12775 " free memory information" % pnode)
12776 elif instance_info.fail_msg:
12777 self.warn.append("Can't get instance runtime information: %s" %
12778 instance_info.fail_msg)
12780 if instance_info.payload:
12781 current_mem = int(instance_info.payload["memory"])
12783 # Assume instance not running
12784 # (there is a slight race condition here, but it's not very
12785 # probable, and we have no other way to check)
12786 # TODO: Describe race condition
12788 #TODO(dynmem): do the appropriate check involving MINMEM
12789 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12790 pnhvinfo["memory_free"])
12792 raise errors.OpPrereqError("This change will prevent the instance"
12793 " from starting, due to %d MB of memory"
12794 " missing on its primary node" %
12795 miss_mem, errors.ECODE_NORES)
12797 if be_new[constants.BE_AUTO_BALANCE]:
12798 for node, nres in nodeinfo.items():
12799 if node not in instance.secondary_nodes:
12801 nres.Raise("Can't get info from secondary node %s" % node,
12802 prereq=True, ecode=errors.ECODE_STATE)
12803 (_, _, (nhvinfo, )) = nres.payload
12804 if not isinstance(nhvinfo.get("memory_free", None), int):
12805 raise errors.OpPrereqError("Secondary node %s didn't return free"
12806 " memory information" % node,
12807 errors.ECODE_STATE)
12808 #TODO(dynmem): do the appropriate check involving MINMEM
12809 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12810 raise errors.OpPrereqError("This change will prevent the instance"
12811 " from failover to its secondary node"
12812 " %s, due to not enough memory" % node,
12813 errors.ECODE_STATE)
12815 if self.op.runtime_mem:
12816 remote_info = self.rpc.call_instance_info(instance.primary_node,
12818 instance.hypervisor)
12819 remote_info.Raise("Error checking node %s" % instance.primary_node)
12820 if not remote_info.payload: # not running already
12821 raise errors.OpPrereqError("Instance %s is not running" %
12822 instance.name, errors.ECODE_STATE)
12824 current_memory = remote_info.payload["memory"]
12825 if (not self.op.force and
12826 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12827 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12828 raise errors.OpPrereqError("Instance %s must have memory between %d"
12829 " and %d MB of memory unless --force is"
12832 self.be_proposed[constants.BE_MINMEM],
12833 self.be_proposed[constants.BE_MAXMEM]),
12834 errors.ECODE_INVAL)
12836 if self.op.runtime_mem > current_memory:
12837 _CheckNodeFreeMemory(self, instance.primary_node,
12838 "ballooning memory for instance %s" %
12840 self.op.memory - current_memory,
12841 instance.hypervisor)
12843 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12844 raise errors.OpPrereqError("Disk operations not supported for"
12845 " diskless instances", errors.ECODE_INVAL)
12847 def _PrepareNicCreate(_, params, private):
12848 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12849 return (None, None)
12851 def _PrepareNicMod(_, nic, params, private):
12852 self._PrepareNicModification(params, private, nic.ip,
12853 nic.nicparams, cluster, pnode)
12856 # Verify NIC changes (operating on copy)
12857 nics = instance.nics[:]
12858 ApplyContainerMods("NIC", nics, None, self.nicmod,
12859 _PrepareNicCreate, _PrepareNicMod, None)
12860 if len(nics) > constants.MAX_NICS:
12861 raise errors.OpPrereqError("Instance has too many network interfaces"
12862 " (%d), cannot add more" % constants.MAX_NICS,
12863 errors.ECODE_STATE)
12865 # Verify disk changes (operating on a copy)
12866 disks = instance.disks[:]
12867 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12868 if len(disks) > constants.MAX_DISKS:
12869 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12870 " more" % constants.MAX_DISKS,
12871 errors.ECODE_STATE)
12873 if self.op.offline is not None:
12874 if self.op.offline:
12875 msg = "can't change to offline"
12877 msg = "can't change to online"
12878 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12880 # Pre-compute NIC changes (necessary to use result in hooks)
12881 self._nic_chgdesc = []
12883 # Operate on copies as this is still in prereq
12884 nics = [nic.Copy() for nic in instance.nics]
12885 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12886 self._CreateNewNic, self._ApplyNicMods, None)
12887 self._new_nics = nics
12889 self._new_nics = None
12891 def _ConvertPlainToDrbd(self, feedback_fn):
12892 """Converts an instance from plain to drbd.
12895 feedback_fn("Converting template to drbd")
12896 instance = self.instance
12897 pnode = instance.primary_node
12898 snode = self.op.remote_node
12900 assert instance.disk_template == constants.DT_PLAIN
12902 # create a fake disk info for _GenerateDiskTemplate
12903 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12904 constants.IDISK_VG: d.logical_id[0]}
12905 for d in instance.disks]
12906 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12907 instance.name, pnode, [snode],
12908 disk_info, None, None, 0, feedback_fn,
12910 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12912 info = _GetInstanceInfoText(instance)
12913 feedback_fn("Creating additional volumes...")
12914 # first, create the missing data and meta devices
12915 for disk in anno_disks:
12916 # unfortunately this is... not too nice
12917 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12919 for child in disk.children:
12920 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12921 # at this stage, all new LVs have been created, we can rename the
12923 feedback_fn("Renaming original volumes...")
12924 rename_list = [(o, n.children[0].logical_id)
12925 for (o, n) in zip(instance.disks, new_disks)]
12926 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12927 result.Raise("Failed to rename original LVs")
12929 feedback_fn("Initializing DRBD devices...")
12930 # all child devices are in place, we can now create the DRBD devices
12931 for disk in anno_disks:
12932 for node in [pnode, snode]:
12933 f_create = node == pnode
12934 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12936 # at this point, the instance has been modified
12937 instance.disk_template = constants.DT_DRBD8
12938 instance.disks = new_disks
12939 self.cfg.Update(instance, feedback_fn)
12941 # Release node locks while waiting for sync
12942 _ReleaseLocks(self, locking.LEVEL_NODE)
12944 # disks are created, waiting for sync
12945 disk_abort = not _WaitForSync(self, instance,
12946 oneshot=not self.op.wait_for_sync)
12948 raise errors.OpExecError("There are some degraded disks for"
12949 " this instance, please cleanup manually")
12951 # Node resource locks will be released by caller
12953 def _ConvertDrbdToPlain(self, feedback_fn):
12954 """Converts an instance from drbd to plain.
12957 instance = self.instance
12959 assert len(instance.secondary_nodes) == 1
12960 assert instance.disk_template == constants.DT_DRBD8
12962 pnode = instance.primary_node
12963 snode = instance.secondary_nodes[0]
12964 feedback_fn("Converting template to plain")
12966 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12967 new_disks = [d.children[0] for d in instance.disks]
12969 # copy over size and mode
12970 for parent, child in zip(old_disks, new_disks):
12971 child.size = parent.size
12972 child.mode = parent.mode
12974 # this is a DRBD disk, return its port to the pool
12975 # NOTE: this must be done right before the call to cfg.Update!
12976 for disk in old_disks:
12977 tcp_port = disk.logical_id[2]
12978 self.cfg.AddTcpUdpPort(tcp_port)
12980 # update instance structure
12981 instance.disks = new_disks
12982 instance.disk_template = constants.DT_PLAIN
12983 self.cfg.Update(instance, feedback_fn)
12985 # Release locks in case removing disks takes a while
12986 _ReleaseLocks(self, locking.LEVEL_NODE)
12988 feedback_fn("Removing volumes on the secondary node...")
12989 for disk in old_disks:
12990 self.cfg.SetDiskID(disk, snode)
12991 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12993 self.LogWarning("Could not remove block device %s on node %s,"
12994 " continuing anyway: %s", disk.iv_name, snode, msg)
12996 feedback_fn("Removing unneeded volumes on the primary node...")
12997 for idx, disk in enumerate(old_disks):
12998 meta = disk.children[1]
12999 self.cfg.SetDiskID(meta, pnode)
13000 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13002 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13003 " continuing anyway: %s", idx, pnode, msg)
13005 def _CreateNewDisk(self, idx, params, _):
13006 """Creates a new disk.
13009 instance = self.instance
13012 if instance.disk_template in constants.DTS_FILEBASED:
13013 (file_driver, file_path) = instance.disks[0].logical_id
13014 file_path = os.path.dirname(file_path)
13016 file_driver = file_path = None
13019 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13020 instance.primary_node, instance.secondary_nodes,
13021 [params], file_path, file_driver, idx,
13022 self.Log, self.diskparams)[0]
13024 info = _GetInstanceInfoText(instance)
13026 logging.info("Creating volume %s for instance %s",
13027 disk.iv_name, instance.name)
13028 # Note: this needs to be kept in sync with _CreateDisks
13030 for node in instance.all_nodes:
13031 f_create = (node == instance.primary_node)
13033 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13034 except errors.OpExecError, err:
13035 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13036 disk.iv_name, disk, node, err)
13039 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13043 def _ModifyDisk(idx, disk, params, _):
13044 """Modifies a disk.
13047 disk.mode = params[constants.IDISK_MODE]
13050 ("disk.mode/%d" % idx, disk.mode),
13053 def _RemoveDisk(self, idx, root, _):
13057 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13058 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13059 self.cfg.SetDiskID(disk, node)
13060 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13062 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13063 " continuing anyway", idx, node, msg)
13065 # if this is a DRBD disk, return its port to the pool
13066 if root.dev_type in constants.LDS_DRBD:
13067 self.cfg.AddTcpUdpPort(root.logical_id[2])
13070 def _CreateNewNic(idx, params, private):
13071 """Creates data structure for a new network interface.
13074 mac = params[constants.INIC_MAC]
13075 ip = params.get(constants.INIC_IP, None)
13076 nicparams = private.params
13078 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13080 "add:mac=%s,ip=%s,mode=%s,link=%s" %
13081 (mac, ip, private.filled[constants.NIC_MODE],
13082 private.filled[constants.NIC_LINK])),
13086 def _ApplyNicMods(idx, nic, params, private):
13087 """Modifies a network interface.
13092 for key in [constants.INIC_MAC, constants.INIC_IP]:
13094 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13095 setattr(nic, key, params[key])
13098 nic.nicparams = private.params
13100 for (key, val) in params.items():
13101 changes.append(("nic.%s/%d" % (key, idx), val))
13105 def Exec(self, feedback_fn):
13106 """Modifies an instance.
13108 All parameters take effect only at the next restart of the instance.
13111 # Process here the warnings from CheckPrereq, as we don't have a
13112 # feedback_fn there.
13113 # TODO: Replace with self.LogWarning
13114 for warn in self.warn:
13115 feedback_fn("WARNING: %s" % warn)
13117 assert ((self.op.disk_template is None) ^
13118 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13119 "Not owning any node resource locks"
13122 instance = self.instance
13125 if self.op.runtime_mem:
13126 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13128 self.op.runtime_mem)
13129 rpcres.Raise("Cannot modify instance runtime memory")
13130 result.append(("runtime_memory", self.op.runtime_mem))
13132 # Apply disk changes
13133 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13134 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13135 _UpdateIvNames(0, instance.disks)
13137 if self.op.disk_template:
13139 check_nodes = set(instance.all_nodes)
13140 if self.op.remote_node:
13141 check_nodes.add(self.op.remote_node)
13142 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13143 owned = self.owned_locks(level)
13144 assert not (check_nodes - owned), \
13145 ("Not owning the correct locks, owning %r, expected at least %r" %
13146 (owned, check_nodes))
13148 r_shut = _ShutdownInstanceDisks(self, instance)
13150 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13151 " proceed with disk template conversion")
13152 mode = (instance.disk_template, self.op.disk_template)
13154 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13156 self.cfg.ReleaseDRBDMinors(instance.name)
13158 result.append(("disk_template", self.op.disk_template))
13160 assert instance.disk_template == self.op.disk_template, \
13161 ("Expected disk template '%s', found '%s'" %
13162 (self.op.disk_template, instance.disk_template))
13164 # Release node and resource locks if there are any (they might already have
13165 # been released during disk conversion)
13166 _ReleaseLocks(self, locking.LEVEL_NODE)
13167 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13169 # Apply NIC changes
13170 if self._new_nics is not None:
13171 instance.nics = self._new_nics
13172 result.extend(self._nic_chgdesc)
13175 if self.op.hvparams:
13176 instance.hvparams = self.hv_inst
13177 for key, val in self.op.hvparams.iteritems():
13178 result.append(("hv/%s" % key, val))
13181 if self.op.beparams:
13182 instance.beparams = self.be_inst
13183 for key, val in self.op.beparams.iteritems():
13184 result.append(("be/%s" % key, val))
13187 if self.op.os_name:
13188 instance.os = self.op.os_name
13191 if self.op.osparams:
13192 instance.osparams = self.os_inst
13193 for key, val in self.op.osparams.iteritems():
13194 result.append(("os/%s" % key, val))
13196 if self.op.offline is None:
13199 elif self.op.offline:
13200 # Mark instance as offline
13201 self.cfg.MarkInstanceOffline(instance.name)
13202 result.append(("admin_state", constants.ADMINST_OFFLINE))
13204 # Mark instance as online, but stopped
13205 self.cfg.MarkInstanceDown(instance.name)
13206 result.append(("admin_state", constants.ADMINST_DOWN))
13208 self.cfg.Update(instance, feedback_fn)
13210 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13211 self.owned_locks(locking.LEVEL_NODE)), \
13212 "All node locks should have been released by now"
13216 _DISK_CONVERSIONS = {
13217 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13218 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13222 class LUInstanceChangeGroup(LogicalUnit):
13223 HPATH = "instance-change-group"
13224 HTYPE = constants.HTYPE_INSTANCE
13227 def ExpandNames(self):
13228 self.share_locks = _ShareAll()
13229 self.needed_locks = {
13230 locking.LEVEL_NODEGROUP: [],
13231 locking.LEVEL_NODE: [],
13234 self._ExpandAndLockInstance()
13236 if self.op.target_groups:
13237 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13238 self.op.target_groups)
13240 self.req_target_uuids = None
13242 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13244 def DeclareLocks(self, level):
13245 if level == locking.LEVEL_NODEGROUP:
13246 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13248 if self.req_target_uuids:
13249 lock_groups = set(self.req_target_uuids)
13251 # Lock all groups used by instance optimistically; this requires going
13252 # via the node before it's locked, requiring verification later on
13253 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13254 lock_groups.update(instance_groups)
13256 # No target groups, need to lock all of them
13257 lock_groups = locking.ALL_SET
13259 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13261 elif level == locking.LEVEL_NODE:
13262 if self.req_target_uuids:
13263 # Lock all nodes used by instances
13264 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13265 self._LockInstancesNodes()
13267 # Lock all nodes in all potential target groups
13268 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13269 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13270 member_nodes = [node_name
13271 for group in lock_groups
13272 for node_name in self.cfg.GetNodeGroup(group).members]
13273 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13275 # Lock all nodes as all groups are potential targets
13276 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13278 def CheckPrereq(self):
13279 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13280 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13281 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13283 assert (self.req_target_uuids is None or
13284 owned_groups.issuperset(self.req_target_uuids))
13285 assert owned_instances == set([self.op.instance_name])
13287 # Get instance information
13288 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13290 # Check if node groups for locked instance are still correct
13291 assert owned_nodes.issuperset(self.instance.all_nodes), \
13292 ("Instance %s's nodes changed while we kept the lock" %
13293 self.op.instance_name)
13295 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13298 if self.req_target_uuids:
13299 # User requested specific target groups
13300 self.target_uuids = frozenset(self.req_target_uuids)
13302 # All groups except those used by the instance are potential targets
13303 self.target_uuids = owned_groups - inst_groups
13305 conflicting_groups = self.target_uuids & inst_groups
13306 if conflicting_groups:
13307 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13308 " used by the instance '%s'" %
13309 (utils.CommaJoin(conflicting_groups),
13310 self.op.instance_name),
13311 errors.ECODE_INVAL)
13313 if not self.target_uuids:
13314 raise errors.OpPrereqError("There are no possible target groups",
13315 errors.ECODE_INVAL)
13317 def BuildHooksEnv(self):
13318 """Build hooks env.
13321 assert self.target_uuids
13324 "TARGET_GROUPS": " ".join(self.target_uuids),
13327 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13331 def BuildHooksNodes(self):
13332 """Build hooks nodes.
13335 mn = self.cfg.GetMasterNode()
13336 return ([mn], [mn])
13338 def Exec(self, feedback_fn):
13339 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13341 assert instances == [self.op.instance_name], "Instance not locked"
13343 req = iallocator.IAReqGroupChange(instances=instances,
13344 target_groups=list(self.target_uuids))
13345 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13347 ial.Run(self.op.iallocator)
13349 if not ial.success:
13350 raise errors.OpPrereqError("Can't compute solution for changing group of"
13351 " instance '%s' using iallocator '%s': %s" %
13352 (self.op.instance_name, self.op.iallocator,
13353 ial.info), errors.ECODE_NORES)
13355 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13357 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13358 " instance '%s'", len(jobs), self.op.instance_name)
13360 return ResultWithJobs(jobs)
13363 class LUBackupQuery(NoHooksLU):
13364 """Query the exports list
13369 def CheckArguments(self):
13370 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13371 ["node", "export"], self.op.use_locking)
13373 def ExpandNames(self):
13374 self.expq.ExpandNames(self)
13376 def DeclareLocks(self, level):
13377 self.expq.DeclareLocks(self, level)
13379 def Exec(self, feedback_fn):
13382 for (node, expname) in self.expq.OldStyleQuery(self):
13383 if expname is None:
13384 result[node] = False
13386 result.setdefault(node, []).append(expname)
13391 class _ExportQuery(_QueryBase):
13392 FIELDS = query.EXPORT_FIELDS
13394 #: The node name is not a unique key for this query
13395 SORT_FIELD = "node"
13397 def ExpandNames(self, lu):
13398 lu.needed_locks = {}
13400 # The following variables interact with _QueryBase._GetNames
13402 self.wanted = _GetWantedNodes(lu, self.names)
13404 self.wanted = locking.ALL_SET
13406 self.do_locking = self.use_locking
13408 if self.do_locking:
13409 lu.share_locks = _ShareAll()
13410 lu.needed_locks = {
13411 locking.LEVEL_NODE: self.wanted,
13414 def DeclareLocks(self, lu, level):
13417 def _GetQueryData(self, lu):
13418 """Computes the list of nodes and their attributes.
13421 # Locking is not used
13423 assert not (compat.any(lu.glm.is_owned(level)
13424 for level in locking.LEVELS
13425 if level != locking.LEVEL_CLUSTER) or
13426 self.do_locking or self.use_locking)
13428 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13432 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13434 result.append((node, None))
13436 result.extend((node, expname) for expname in nres.payload)
13441 class LUBackupPrepare(NoHooksLU):
13442 """Prepares an instance for an export and returns useful information.
13447 def ExpandNames(self):
13448 self._ExpandAndLockInstance()
13450 def CheckPrereq(self):
13451 """Check prerequisites.
13454 instance_name = self.op.instance_name
13456 self.instance = self.cfg.GetInstanceInfo(instance_name)
13457 assert self.instance is not None, \
13458 "Cannot retrieve locked instance %s" % self.op.instance_name
13459 _CheckNodeOnline(self, self.instance.primary_node)
13461 self._cds = _GetClusterDomainSecret()
13463 def Exec(self, feedback_fn):
13464 """Prepares an instance for an export.
13467 instance = self.instance
13469 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13470 salt = utils.GenerateSecret(8)
13472 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13473 result = self.rpc.call_x509_cert_create(instance.primary_node,
13474 constants.RIE_CERT_VALIDITY)
13475 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13477 (name, cert_pem) = result.payload
13479 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13483 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13484 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13486 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13492 class LUBackupExport(LogicalUnit):
13493 """Export an instance to an image in the cluster.
13496 HPATH = "instance-export"
13497 HTYPE = constants.HTYPE_INSTANCE
13500 def CheckArguments(self):
13501 """Check the arguments.
13504 self.x509_key_name = self.op.x509_key_name
13505 self.dest_x509_ca_pem = self.op.destination_x509_ca
13507 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13508 if not self.x509_key_name:
13509 raise errors.OpPrereqError("Missing X509 key name for encryption",
13510 errors.ECODE_INVAL)
13512 if not self.dest_x509_ca_pem:
13513 raise errors.OpPrereqError("Missing destination X509 CA",
13514 errors.ECODE_INVAL)
13516 def ExpandNames(self):
13517 self._ExpandAndLockInstance()
13519 # Lock all nodes for local exports
13520 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13521 # FIXME: lock only instance primary and destination node
13523 # Sad but true, for now we have do lock all nodes, as we don't know where
13524 # the previous export might be, and in this LU we search for it and
13525 # remove it from its current node. In the future we could fix this by:
13526 # - making a tasklet to search (share-lock all), then create the
13527 # new one, then one to remove, after
13528 # - removing the removal operation altogether
13529 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13531 def DeclareLocks(self, level):
13532 """Last minute lock declaration."""
13533 # All nodes are locked anyway, so nothing to do here.
13535 def BuildHooksEnv(self):
13536 """Build hooks env.
13538 This will run on the master, primary node and target node.
13542 "EXPORT_MODE": self.op.mode,
13543 "EXPORT_NODE": self.op.target_node,
13544 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13545 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13546 # TODO: Generic function for boolean env variables
13547 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13550 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13554 def BuildHooksNodes(self):
13555 """Build hooks nodes.
13558 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13560 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13561 nl.append(self.op.target_node)
13565 def CheckPrereq(self):
13566 """Check prerequisites.
13568 This checks that the instance and node names are valid.
13571 instance_name = self.op.instance_name
13573 self.instance = self.cfg.GetInstanceInfo(instance_name)
13574 assert self.instance is not None, \
13575 "Cannot retrieve locked instance %s" % self.op.instance_name
13576 _CheckNodeOnline(self, self.instance.primary_node)
13578 if (self.op.remove_instance and
13579 self.instance.admin_state == constants.ADMINST_UP and
13580 not self.op.shutdown):
13581 raise errors.OpPrereqError("Can not remove instance without shutting it"
13582 " down before", errors.ECODE_STATE)
13584 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13585 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13586 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13587 assert self.dst_node is not None
13589 _CheckNodeOnline(self, self.dst_node.name)
13590 _CheckNodeNotDrained(self, self.dst_node.name)
13593 self.dest_disk_info = None
13594 self.dest_x509_ca = None
13596 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13597 self.dst_node = None
13599 if len(self.op.target_node) != len(self.instance.disks):
13600 raise errors.OpPrereqError(("Received destination information for %s"
13601 " disks, but instance %s has %s disks") %
13602 (len(self.op.target_node), instance_name,
13603 len(self.instance.disks)),
13604 errors.ECODE_INVAL)
13606 cds = _GetClusterDomainSecret()
13608 # Check X509 key name
13610 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13611 except (TypeError, ValueError), err:
13612 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13613 errors.ECODE_INVAL)
13615 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13616 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13617 errors.ECODE_INVAL)
13619 # Load and verify CA
13621 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13622 except OpenSSL.crypto.Error, err:
13623 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13624 (err, ), errors.ECODE_INVAL)
13626 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13627 if errcode is not None:
13628 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13629 (msg, ), errors.ECODE_INVAL)
13631 self.dest_x509_ca = cert
13633 # Verify target information
13635 for idx, disk_data in enumerate(self.op.target_node):
13637 (host, port, magic) = \
13638 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13639 except errors.GenericError, err:
13640 raise errors.OpPrereqError("Target info for disk %s: %s" %
13641 (idx, err), errors.ECODE_INVAL)
13643 disk_info.append((host, port, magic))
13645 assert len(disk_info) == len(self.op.target_node)
13646 self.dest_disk_info = disk_info
13649 raise errors.ProgrammerError("Unhandled export mode %r" %
13652 # instance disk type verification
13653 # TODO: Implement export support for file-based disks
13654 for disk in self.instance.disks:
13655 if disk.dev_type == constants.LD_FILE:
13656 raise errors.OpPrereqError("Export not supported for instances with"
13657 " file-based disks", errors.ECODE_INVAL)
13659 def _CleanupExports(self, feedback_fn):
13660 """Removes exports of current instance from all other nodes.
13662 If an instance in a cluster with nodes A..D was exported to node C, its
13663 exports will be removed from the nodes A, B and D.
13666 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13668 nodelist = self.cfg.GetNodeList()
13669 nodelist.remove(self.dst_node.name)
13671 # on one-node clusters nodelist will be empty after the removal
13672 # if we proceed the backup would be removed because OpBackupQuery
13673 # substitutes an empty list with the full cluster node list.
13674 iname = self.instance.name
13676 feedback_fn("Removing old exports for instance %s" % iname)
13677 exportlist = self.rpc.call_export_list(nodelist)
13678 for node in exportlist:
13679 if exportlist[node].fail_msg:
13681 if iname in exportlist[node].payload:
13682 msg = self.rpc.call_export_remove(node, iname).fail_msg
13684 self.LogWarning("Could not remove older export for instance %s"
13685 " on node %s: %s", iname, node, msg)
13687 def Exec(self, feedback_fn):
13688 """Export an instance to an image in the cluster.
13691 assert self.op.mode in constants.EXPORT_MODES
13693 instance = self.instance
13694 src_node = instance.primary_node
13696 if self.op.shutdown:
13697 # shutdown the instance, but not the disks
13698 feedback_fn("Shutting down instance %s" % instance.name)
13699 result = self.rpc.call_instance_shutdown(src_node, instance,
13700 self.op.shutdown_timeout)
13701 # TODO: Maybe ignore failures if ignore_remove_failures is set
13702 result.Raise("Could not shutdown instance %s on"
13703 " node %s" % (instance.name, src_node))
13705 # set the disks ID correctly since call_instance_start needs the
13706 # correct drbd minor to create the symlinks
13707 for disk in instance.disks:
13708 self.cfg.SetDiskID(disk, src_node)
13710 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13713 # Activate the instance disks if we'exporting a stopped instance
13714 feedback_fn("Activating disks for %s" % instance.name)
13715 _StartInstanceDisks(self, instance, None)
13718 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13721 helper.CreateSnapshots()
13723 if (self.op.shutdown and
13724 instance.admin_state == constants.ADMINST_UP and
13725 not self.op.remove_instance):
13726 assert not activate_disks
13727 feedback_fn("Starting instance %s" % instance.name)
13728 result = self.rpc.call_instance_start(src_node,
13729 (instance, None, None), False)
13730 msg = result.fail_msg
13732 feedback_fn("Failed to start instance: %s" % msg)
13733 _ShutdownInstanceDisks(self, instance)
13734 raise errors.OpExecError("Could not start instance: %s" % msg)
13736 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13737 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13738 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13739 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13740 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13742 (key_name, _, _) = self.x509_key_name
13745 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13748 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13749 key_name, dest_ca_pem,
13754 # Check for backwards compatibility
13755 assert len(dresults) == len(instance.disks)
13756 assert compat.all(isinstance(i, bool) for i in dresults), \
13757 "Not all results are boolean: %r" % dresults
13761 feedback_fn("Deactivating disks for %s" % instance.name)
13762 _ShutdownInstanceDisks(self, instance)
13764 if not (compat.all(dresults) and fin_resu):
13767 failures.append("export finalization")
13768 if not compat.all(dresults):
13769 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13771 failures.append("disk export: disk(s) %s" % fdsk)
13773 raise errors.OpExecError("Export failed, errors in %s" %
13774 utils.CommaJoin(failures))
13776 # At this point, the export was successful, we can cleanup/finish
13778 # Remove instance if requested
13779 if self.op.remove_instance:
13780 feedback_fn("Removing instance %s" % instance.name)
13781 _RemoveInstance(self, feedback_fn, instance,
13782 self.op.ignore_remove_failures)
13784 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13785 self._CleanupExports(feedback_fn)
13787 return fin_resu, dresults
13790 class LUBackupRemove(NoHooksLU):
13791 """Remove exports related to the named instance.
13796 def ExpandNames(self):
13797 self.needed_locks = {}
13798 # We need all nodes to be locked in order for RemoveExport to work, but we
13799 # don't need to lock the instance itself, as nothing will happen to it (and
13800 # we can remove exports also for a removed instance)
13801 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13803 def Exec(self, feedback_fn):
13804 """Remove any export.
13807 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13808 # If the instance was not found we'll try with the name that was passed in.
13809 # This will only work if it was an FQDN, though.
13811 if not instance_name:
13813 instance_name = self.op.instance_name
13815 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13816 exportlist = self.rpc.call_export_list(locked_nodes)
13818 for node in exportlist:
13819 msg = exportlist[node].fail_msg
13821 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13823 if instance_name in exportlist[node].payload:
13825 result = self.rpc.call_export_remove(node, instance_name)
13826 msg = result.fail_msg
13828 logging.error("Could not remove export for instance %s"
13829 " on node %s: %s", instance_name, node, msg)
13831 if fqdn_warn and not found:
13832 feedback_fn("Export not found. If trying to remove an export belonging"
13833 " to a deleted instance please use its Fully Qualified"
13837 class LUGroupAdd(LogicalUnit):
13838 """Logical unit for creating node groups.
13841 HPATH = "group-add"
13842 HTYPE = constants.HTYPE_GROUP
13845 def ExpandNames(self):
13846 # We need the new group's UUID here so that we can create and acquire the
13847 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13848 # that it should not check whether the UUID exists in the configuration.
13849 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13850 self.needed_locks = {}
13851 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13853 def CheckPrereq(self):
13854 """Check prerequisites.
13856 This checks that the given group name is not an existing node group
13861 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13862 except errors.OpPrereqError:
13865 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13866 " node group (UUID: %s)" %
13867 (self.op.group_name, existing_uuid),
13868 errors.ECODE_EXISTS)
13870 if self.op.ndparams:
13871 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13873 if self.op.hv_state:
13874 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13876 self.new_hv_state = None
13878 if self.op.disk_state:
13879 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13881 self.new_disk_state = None
13883 if self.op.diskparams:
13884 for templ in constants.DISK_TEMPLATES:
13885 if templ in self.op.diskparams:
13886 utils.ForceDictType(self.op.diskparams[templ],
13887 constants.DISK_DT_TYPES)
13888 self.new_diskparams = self.op.diskparams
13890 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13891 except errors.OpPrereqError, err:
13892 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13893 errors.ECODE_INVAL)
13895 self.new_diskparams = {}
13897 if self.op.ipolicy:
13898 cluster = self.cfg.GetClusterInfo()
13899 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13901 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13902 except errors.ConfigurationError, err:
13903 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13904 errors.ECODE_INVAL)
13906 def BuildHooksEnv(self):
13907 """Build hooks env.
13911 "GROUP_NAME": self.op.group_name,
13914 def BuildHooksNodes(self):
13915 """Build hooks nodes.
13918 mn = self.cfg.GetMasterNode()
13919 return ([mn], [mn])
13921 def Exec(self, feedback_fn):
13922 """Add the node group to the cluster.
13925 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13926 uuid=self.group_uuid,
13927 alloc_policy=self.op.alloc_policy,
13928 ndparams=self.op.ndparams,
13929 diskparams=self.new_diskparams,
13930 ipolicy=self.op.ipolicy,
13931 hv_state_static=self.new_hv_state,
13932 disk_state_static=self.new_disk_state)
13934 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13935 del self.remove_locks[locking.LEVEL_NODEGROUP]
13938 class LUGroupAssignNodes(NoHooksLU):
13939 """Logical unit for assigning nodes to groups.
13944 def ExpandNames(self):
13945 # These raise errors.OpPrereqError on their own:
13946 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13947 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13949 # We want to lock all the affected nodes and groups. We have readily
13950 # available the list of nodes, and the *destination* group. To gather the
13951 # list of "source" groups, we need to fetch node information later on.
13952 self.needed_locks = {
13953 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13954 locking.LEVEL_NODE: self.op.nodes,
13957 def DeclareLocks(self, level):
13958 if level == locking.LEVEL_NODEGROUP:
13959 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13961 # Try to get all affected nodes' groups without having the group or node
13962 # lock yet. Needs verification later in the code flow.
13963 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13965 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13967 def CheckPrereq(self):
13968 """Check prerequisites.
13971 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13972 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13973 frozenset(self.op.nodes))
13975 expected_locks = (set([self.group_uuid]) |
13976 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13977 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13978 if actual_locks != expected_locks:
13979 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13980 " current groups are '%s', used to be '%s'" %
13981 (utils.CommaJoin(expected_locks),
13982 utils.CommaJoin(actual_locks)))
13984 self.node_data = self.cfg.GetAllNodesInfo()
13985 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13986 instance_data = self.cfg.GetAllInstancesInfo()
13988 if self.group is None:
13989 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13990 (self.op.group_name, self.group_uuid))
13992 (new_splits, previous_splits) = \
13993 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13994 for node in self.op.nodes],
13995 self.node_data, instance_data)
13998 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14000 if not self.op.force:
14001 raise errors.OpExecError("The following instances get split by this"
14002 " change and --force was not given: %s" %
14005 self.LogWarning("This operation will split the following instances: %s",
14008 if previous_splits:
14009 self.LogWarning("In addition, these already-split instances continue"
14010 " to be split across groups: %s",
14011 utils.CommaJoin(utils.NiceSort(previous_splits)))
14013 def Exec(self, feedback_fn):
14014 """Assign nodes to a new group.
14017 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14019 self.cfg.AssignGroupNodes(mods)
14022 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14023 """Check for split instances after a node assignment.
14025 This method considers a series of node assignments as an atomic operation,
14026 and returns information about split instances after applying the set of
14029 In particular, it returns information about newly split instances, and
14030 instances that were already split, and remain so after the change.
14032 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14035 @type changes: list of (node_name, new_group_uuid) pairs.
14036 @param changes: list of node assignments to consider.
14037 @param node_data: a dict with data for all nodes
14038 @param instance_data: a dict with all instances to consider
14039 @rtype: a two-tuple
14040 @return: a list of instances that were previously okay and result split as a
14041 consequence of this change, and a list of instances that were previously
14042 split and this change does not fix.
14045 changed_nodes = dict((node, group) for node, group in changes
14046 if node_data[node].group != group)
14048 all_split_instances = set()
14049 previously_split_instances = set()
14051 def InstanceNodes(instance):
14052 return [instance.primary_node] + list(instance.secondary_nodes)
14054 for inst in instance_data.values():
14055 if inst.disk_template not in constants.DTS_INT_MIRROR:
14058 instance_nodes = InstanceNodes(inst)
14060 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14061 previously_split_instances.add(inst.name)
14063 if len(set(changed_nodes.get(node, node_data[node].group)
14064 for node in instance_nodes)) > 1:
14065 all_split_instances.add(inst.name)
14067 return (list(all_split_instances - previously_split_instances),
14068 list(previously_split_instances & all_split_instances))
14071 class _GroupQuery(_QueryBase):
14072 FIELDS = query.GROUP_FIELDS
14074 def ExpandNames(self, lu):
14075 lu.needed_locks = {}
14077 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14078 self._cluster = lu.cfg.GetClusterInfo()
14079 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14082 self.wanted = [name_to_uuid[name]
14083 for name in utils.NiceSort(name_to_uuid.keys())]
14085 # Accept names to be either names or UUIDs.
14088 all_uuid = frozenset(self._all_groups.keys())
14090 for name in self.names:
14091 if name in all_uuid:
14092 self.wanted.append(name)
14093 elif name in name_to_uuid:
14094 self.wanted.append(name_to_uuid[name])
14096 missing.append(name)
14099 raise errors.OpPrereqError("Some groups do not exist: %s" %
14100 utils.CommaJoin(missing),
14101 errors.ECODE_NOENT)
14103 def DeclareLocks(self, lu, level):
14106 def _GetQueryData(self, lu):
14107 """Computes the list of node groups and their attributes.
14110 do_nodes = query.GQ_NODE in self.requested_data
14111 do_instances = query.GQ_INST in self.requested_data
14113 group_to_nodes = None
14114 group_to_instances = None
14116 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14117 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14118 # latter GetAllInstancesInfo() is not enough, for we have to go through
14119 # instance->node. Hence, we will need to process nodes even if we only need
14120 # instance information.
14121 if do_nodes or do_instances:
14122 all_nodes = lu.cfg.GetAllNodesInfo()
14123 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14126 for node in all_nodes.values():
14127 if node.group in group_to_nodes:
14128 group_to_nodes[node.group].append(node.name)
14129 node_to_group[node.name] = node.group
14132 all_instances = lu.cfg.GetAllInstancesInfo()
14133 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14135 for instance in all_instances.values():
14136 node = instance.primary_node
14137 if node in node_to_group:
14138 group_to_instances[node_to_group[node]].append(instance.name)
14141 # Do not pass on node information if it was not requested.
14142 group_to_nodes = None
14144 return query.GroupQueryData(self._cluster,
14145 [self._all_groups[uuid]
14146 for uuid in self.wanted],
14147 group_to_nodes, group_to_instances,
14148 query.GQ_DISKPARAMS in self.requested_data)
14151 class LUGroupQuery(NoHooksLU):
14152 """Logical unit for querying node groups.
14157 def CheckArguments(self):
14158 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14159 self.op.output_fields, False)
14161 def ExpandNames(self):
14162 self.gq.ExpandNames(self)
14164 def DeclareLocks(self, level):
14165 self.gq.DeclareLocks(self, level)
14167 def Exec(self, feedback_fn):
14168 return self.gq.OldStyleQuery(self)
14171 class LUGroupSetParams(LogicalUnit):
14172 """Modifies the parameters of a node group.
14175 HPATH = "group-modify"
14176 HTYPE = constants.HTYPE_GROUP
14179 def CheckArguments(self):
14182 self.op.diskparams,
14183 self.op.alloc_policy,
14185 self.op.disk_state,
14189 if all_changes.count(None) == len(all_changes):
14190 raise errors.OpPrereqError("Please pass at least one modification",
14191 errors.ECODE_INVAL)
14193 def ExpandNames(self):
14194 # This raises errors.OpPrereqError on its own:
14195 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14197 self.needed_locks = {
14198 locking.LEVEL_INSTANCE: [],
14199 locking.LEVEL_NODEGROUP: [self.group_uuid],
14202 self.share_locks[locking.LEVEL_INSTANCE] = 1
14204 def DeclareLocks(self, level):
14205 if level == locking.LEVEL_INSTANCE:
14206 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14208 # Lock instances optimistically, needs verification once group lock has
14210 self.needed_locks[locking.LEVEL_INSTANCE] = \
14211 self.cfg.GetNodeGroupInstances(self.group_uuid)
14214 def _UpdateAndVerifyDiskParams(old, new):
14215 """Updates and verifies disk parameters.
14218 new_params = _GetUpdatedParams(old, new)
14219 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14222 def CheckPrereq(self):
14223 """Check prerequisites.
14226 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14228 # Check if locked instances are still correct
14229 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14231 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14232 cluster = self.cfg.GetClusterInfo()
14234 if self.group is None:
14235 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14236 (self.op.group_name, self.group_uuid))
14238 if self.op.ndparams:
14239 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14240 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14241 self.new_ndparams = new_ndparams
14243 if self.op.diskparams:
14244 diskparams = self.group.diskparams
14245 uavdp = self._UpdateAndVerifyDiskParams
14246 # For each disktemplate subdict update and verify the values
14247 new_diskparams = dict((dt,
14248 uavdp(diskparams.get(dt, {}),
14249 self.op.diskparams[dt]))
14250 for dt in constants.DISK_TEMPLATES
14251 if dt in self.op.diskparams)
14252 # As we've all subdicts of diskparams ready, lets merge the actual
14253 # dict with all updated subdicts
14254 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14256 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14257 except errors.OpPrereqError, err:
14258 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14259 errors.ECODE_INVAL)
14261 if self.op.hv_state:
14262 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14263 self.group.hv_state_static)
14265 if self.op.disk_state:
14266 self.new_disk_state = \
14267 _MergeAndVerifyDiskState(self.op.disk_state,
14268 self.group.disk_state_static)
14270 if self.op.ipolicy:
14271 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14275 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14276 inst_filter = lambda inst: inst.name in owned_instances
14277 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14278 gmi = ganeti.masterd.instance
14280 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14282 new_ipolicy, instances)
14285 self.LogWarning("After the ipolicy change the following instances"
14286 " violate them: %s",
14287 utils.CommaJoin(violations))
14289 def BuildHooksEnv(self):
14290 """Build hooks env.
14294 "GROUP_NAME": self.op.group_name,
14295 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14298 def BuildHooksNodes(self):
14299 """Build hooks nodes.
14302 mn = self.cfg.GetMasterNode()
14303 return ([mn], [mn])
14305 def Exec(self, feedback_fn):
14306 """Modifies the node group.
14311 if self.op.ndparams:
14312 self.group.ndparams = self.new_ndparams
14313 result.append(("ndparams", str(self.group.ndparams)))
14315 if self.op.diskparams:
14316 self.group.diskparams = self.new_diskparams
14317 result.append(("diskparams", str(self.group.diskparams)))
14319 if self.op.alloc_policy:
14320 self.group.alloc_policy = self.op.alloc_policy
14322 if self.op.hv_state:
14323 self.group.hv_state_static = self.new_hv_state
14325 if self.op.disk_state:
14326 self.group.disk_state_static = self.new_disk_state
14328 if self.op.ipolicy:
14329 self.group.ipolicy = self.new_ipolicy
14331 self.cfg.Update(self.group, feedback_fn)
14335 class LUGroupRemove(LogicalUnit):
14336 HPATH = "group-remove"
14337 HTYPE = constants.HTYPE_GROUP
14340 def ExpandNames(self):
14341 # This will raises errors.OpPrereqError on its own:
14342 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14343 self.needed_locks = {
14344 locking.LEVEL_NODEGROUP: [self.group_uuid],
14347 def CheckPrereq(self):
14348 """Check prerequisites.
14350 This checks that the given group name exists as a node group, that is
14351 empty (i.e., contains no nodes), and that is not the last group of the
14355 # Verify that the group is empty.
14356 group_nodes = [node.name
14357 for node in self.cfg.GetAllNodesInfo().values()
14358 if node.group == self.group_uuid]
14361 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14363 (self.op.group_name,
14364 utils.CommaJoin(utils.NiceSort(group_nodes))),
14365 errors.ECODE_STATE)
14367 # Verify the cluster would not be left group-less.
14368 if len(self.cfg.GetNodeGroupList()) == 1:
14369 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14370 " removed" % self.op.group_name,
14371 errors.ECODE_STATE)
14373 def BuildHooksEnv(self):
14374 """Build hooks env.
14378 "GROUP_NAME": self.op.group_name,
14381 def BuildHooksNodes(self):
14382 """Build hooks nodes.
14385 mn = self.cfg.GetMasterNode()
14386 return ([mn], [mn])
14388 def Exec(self, feedback_fn):
14389 """Remove the node group.
14393 self.cfg.RemoveNodeGroup(self.group_uuid)
14394 except errors.ConfigurationError:
14395 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14396 (self.op.group_name, self.group_uuid))
14398 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14401 class LUGroupRename(LogicalUnit):
14402 HPATH = "group-rename"
14403 HTYPE = constants.HTYPE_GROUP
14406 def ExpandNames(self):
14407 # This raises errors.OpPrereqError on its own:
14408 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14410 self.needed_locks = {
14411 locking.LEVEL_NODEGROUP: [self.group_uuid],
14414 def CheckPrereq(self):
14415 """Check prerequisites.
14417 Ensures requested new name is not yet used.
14421 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14422 except errors.OpPrereqError:
14425 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14426 " node group (UUID: %s)" %
14427 (self.op.new_name, new_name_uuid),
14428 errors.ECODE_EXISTS)
14430 def BuildHooksEnv(self):
14431 """Build hooks env.
14435 "OLD_NAME": self.op.group_name,
14436 "NEW_NAME": self.op.new_name,
14439 def BuildHooksNodes(self):
14440 """Build hooks nodes.
14443 mn = self.cfg.GetMasterNode()
14445 all_nodes = self.cfg.GetAllNodesInfo()
14446 all_nodes.pop(mn, None)
14449 run_nodes.extend(node.name for node in all_nodes.values()
14450 if node.group == self.group_uuid)
14452 return (run_nodes, run_nodes)
14454 def Exec(self, feedback_fn):
14455 """Rename the node group.
14458 group = self.cfg.GetNodeGroup(self.group_uuid)
14461 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14462 (self.op.group_name, self.group_uuid))
14464 group.name = self.op.new_name
14465 self.cfg.Update(group, feedback_fn)
14467 return self.op.new_name
14470 class LUGroupEvacuate(LogicalUnit):
14471 HPATH = "group-evacuate"
14472 HTYPE = constants.HTYPE_GROUP
14475 def ExpandNames(self):
14476 # This raises errors.OpPrereqError on its own:
14477 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14479 if self.op.target_groups:
14480 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14481 self.op.target_groups)
14483 self.req_target_uuids = []
14485 if self.group_uuid in self.req_target_uuids:
14486 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14487 " as a target group (targets are %s)" %
14489 utils.CommaJoin(self.req_target_uuids)),
14490 errors.ECODE_INVAL)
14492 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14494 self.share_locks = _ShareAll()
14495 self.needed_locks = {
14496 locking.LEVEL_INSTANCE: [],
14497 locking.LEVEL_NODEGROUP: [],
14498 locking.LEVEL_NODE: [],
14501 def DeclareLocks(self, level):
14502 if level == locking.LEVEL_INSTANCE:
14503 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14505 # Lock instances optimistically, needs verification once node and group
14506 # locks have been acquired
14507 self.needed_locks[locking.LEVEL_INSTANCE] = \
14508 self.cfg.GetNodeGroupInstances(self.group_uuid)
14510 elif level == locking.LEVEL_NODEGROUP:
14511 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14513 if self.req_target_uuids:
14514 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14516 # Lock all groups used by instances optimistically; this requires going
14517 # via the node before it's locked, requiring verification later on
14518 lock_groups.update(group_uuid
14519 for instance_name in
14520 self.owned_locks(locking.LEVEL_INSTANCE)
14522 self.cfg.GetInstanceNodeGroups(instance_name))
14524 # No target groups, need to lock all of them
14525 lock_groups = locking.ALL_SET
14527 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14529 elif level == locking.LEVEL_NODE:
14530 # This will only lock the nodes in the group to be evacuated which
14531 # contain actual instances
14532 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14533 self._LockInstancesNodes()
14535 # Lock all nodes in group to be evacuated and target groups
14536 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14537 assert self.group_uuid in owned_groups
14538 member_nodes = [node_name
14539 for group in owned_groups
14540 for node_name in self.cfg.GetNodeGroup(group).members]
14541 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14543 def CheckPrereq(self):
14544 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14545 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14546 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14548 assert owned_groups.issuperset(self.req_target_uuids)
14549 assert self.group_uuid in owned_groups
14551 # Check if locked instances are still correct
14552 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14554 # Get instance information
14555 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14557 # Check if node groups for locked instances are still correct
14558 _CheckInstancesNodeGroups(self.cfg, self.instances,
14559 owned_groups, owned_nodes, self.group_uuid)
14561 if self.req_target_uuids:
14562 # User requested specific target groups
14563 self.target_uuids = self.req_target_uuids
14565 # All groups except the one to be evacuated are potential targets
14566 self.target_uuids = [group_uuid for group_uuid in owned_groups
14567 if group_uuid != self.group_uuid]
14569 if not self.target_uuids:
14570 raise errors.OpPrereqError("There are no possible target groups",
14571 errors.ECODE_INVAL)
14573 def BuildHooksEnv(self):
14574 """Build hooks env.
14578 "GROUP_NAME": self.op.group_name,
14579 "TARGET_GROUPS": " ".join(self.target_uuids),
14582 def BuildHooksNodes(self):
14583 """Build hooks nodes.
14586 mn = self.cfg.GetMasterNode()
14588 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14590 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14592 return (run_nodes, run_nodes)
14594 def Exec(self, feedback_fn):
14595 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14597 assert self.group_uuid not in self.target_uuids
14599 req = iallocator.IAReqGroupChange(instances=instances,
14600 target_groups=self.target_uuids)
14601 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14603 ial.Run(self.op.iallocator)
14605 if not ial.success:
14606 raise errors.OpPrereqError("Can't compute group evacuation using"
14607 " iallocator '%s': %s" %
14608 (self.op.iallocator, ial.info),
14609 errors.ECODE_NORES)
14611 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14613 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14614 len(jobs), self.op.group_name)
14616 return ResultWithJobs(jobs)
14619 class TagsLU(NoHooksLU): # pylint: disable=W0223
14620 """Generic tags LU.
14622 This is an abstract class which is the parent of all the other tags LUs.
14625 def ExpandNames(self):
14626 self.group_uuid = None
14627 self.needed_locks = {}
14629 if self.op.kind == constants.TAG_NODE:
14630 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14631 lock_level = locking.LEVEL_NODE
14632 lock_name = self.op.name
14633 elif self.op.kind == constants.TAG_INSTANCE:
14634 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14635 lock_level = locking.LEVEL_INSTANCE
14636 lock_name = self.op.name
14637 elif self.op.kind == constants.TAG_NODEGROUP:
14638 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14639 lock_level = locking.LEVEL_NODEGROUP
14640 lock_name = self.group_uuid
14645 if lock_level and getattr(self.op, "use_locking", True):
14646 self.needed_locks[lock_level] = lock_name
14648 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14649 # not possible to acquire the BGL based on opcode parameters)
14651 def CheckPrereq(self):
14652 """Check prerequisites.
14655 if self.op.kind == constants.TAG_CLUSTER:
14656 self.target = self.cfg.GetClusterInfo()
14657 elif self.op.kind == constants.TAG_NODE:
14658 self.target = self.cfg.GetNodeInfo(self.op.name)
14659 elif self.op.kind == constants.TAG_INSTANCE:
14660 self.target = self.cfg.GetInstanceInfo(self.op.name)
14661 elif self.op.kind == constants.TAG_NODEGROUP:
14662 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14664 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14665 str(self.op.kind), errors.ECODE_INVAL)
14668 class LUTagsGet(TagsLU):
14669 """Returns the tags of a given object.
14674 def ExpandNames(self):
14675 TagsLU.ExpandNames(self)
14677 # Share locks as this is only a read operation
14678 self.share_locks = _ShareAll()
14680 def Exec(self, feedback_fn):
14681 """Returns the tag list.
14684 return list(self.target.GetTags())
14687 class LUTagsSearch(NoHooksLU):
14688 """Searches the tags for a given pattern.
14693 def ExpandNames(self):
14694 self.needed_locks = {}
14696 def CheckPrereq(self):
14697 """Check prerequisites.
14699 This checks the pattern passed for validity by compiling it.
14703 self.re = re.compile(self.op.pattern)
14704 except re.error, err:
14705 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14706 (self.op.pattern, err), errors.ECODE_INVAL)
14708 def Exec(self, feedback_fn):
14709 """Returns the tag list.
14713 tgts = [("/cluster", cfg.GetClusterInfo())]
14714 ilist = cfg.GetAllInstancesInfo().values()
14715 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14716 nlist = cfg.GetAllNodesInfo().values()
14717 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14718 tgts.extend(("/nodegroup/%s" % n.name, n)
14719 for n in cfg.GetAllNodeGroupsInfo().values())
14721 for path, target in tgts:
14722 for tag in target.GetTags():
14723 if self.re.search(tag):
14724 results.append((path, tag))
14728 class LUTagsSet(TagsLU):
14729 """Sets a tag on a given object.
14734 def CheckPrereq(self):
14735 """Check prerequisites.
14737 This checks the type and length of the tag name and value.
14740 TagsLU.CheckPrereq(self)
14741 for tag in self.op.tags:
14742 objects.TaggableObject.ValidateTag(tag)
14744 def Exec(self, feedback_fn):
14749 for tag in self.op.tags:
14750 self.target.AddTag(tag)
14751 except errors.TagError, err:
14752 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14753 self.cfg.Update(self.target, feedback_fn)
14756 class LUTagsDel(TagsLU):
14757 """Delete a list of tags from a given object.
14762 def CheckPrereq(self):
14763 """Check prerequisites.
14765 This checks that we have the given tag.
14768 TagsLU.CheckPrereq(self)
14769 for tag in self.op.tags:
14770 objects.TaggableObject.ValidateTag(tag)
14771 del_tags = frozenset(self.op.tags)
14772 cur_tags = self.target.GetTags()
14774 diff_tags = del_tags - cur_tags
14776 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14777 raise errors.OpPrereqError("Tag(s) %s not found" %
14778 (utils.CommaJoin(diff_names), ),
14779 errors.ECODE_NOENT)
14781 def Exec(self, feedback_fn):
14782 """Remove the tag from the object.
14785 for tag in self.op.tags:
14786 self.target.RemoveTag(tag)
14787 self.cfg.Update(self.target, feedback_fn)
14790 class LUTestDelay(NoHooksLU):
14791 """Sleep for a specified amount of time.
14793 This LU sleeps on the master and/or nodes for a specified amount of
14799 def ExpandNames(self):
14800 """Expand names and set required locks.
14802 This expands the node list, if any.
14805 self.needed_locks = {}
14806 if self.op.on_nodes:
14807 # _GetWantedNodes can be used here, but is not always appropriate to use
14808 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14809 # more information.
14810 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14811 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14813 def _TestDelay(self):
14814 """Do the actual sleep.
14817 if self.op.on_master:
14818 if not utils.TestDelay(self.op.duration):
14819 raise errors.OpExecError("Error during master delay test")
14820 if self.op.on_nodes:
14821 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14822 for node, node_result in result.items():
14823 node_result.Raise("Failure during rpc call to node %s" % node)
14825 def Exec(self, feedback_fn):
14826 """Execute the test delay opcode, with the wanted repetitions.
14829 if self.op.repeat == 0:
14832 top_value = self.op.repeat - 1
14833 for i in range(self.op.repeat):
14834 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14838 class LUTestJqueue(NoHooksLU):
14839 """Utility LU to test some aspects of the job queue.
14844 # Must be lower than default timeout for WaitForJobChange to see whether it
14845 # notices changed jobs
14846 _CLIENT_CONNECT_TIMEOUT = 20.0
14847 _CLIENT_CONFIRM_TIMEOUT = 60.0
14850 def _NotifyUsingSocket(cls, cb, errcls):
14851 """Opens a Unix socket and waits for another program to connect.
14854 @param cb: Callback to send socket name to client
14855 @type errcls: class
14856 @param errcls: Exception class to use for errors
14859 # Using a temporary directory as there's no easy way to create temporary
14860 # sockets without writing a custom loop around tempfile.mktemp and
14862 tmpdir = tempfile.mkdtemp()
14864 tmpsock = utils.PathJoin(tmpdir, "sock")
14866 logging.debug("Creating temporary socket at %s", tmpsock)
14867 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14872 # Send details to client
14875 # Wait for client to connect before continuing
14876 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14878 (conn, _) = sock.accept()
14879 except socket.error, err:
14880 raise errcls("Client didn't connect in time (%s)" % err)
14884 # Remove as soon as client is connected
14885 shutil.rmtree(tmpdir)
14887 # Wait for client to close
14890 # pylint: disable=E1101
14891 # Instance of '_socketobject' has no ... member
14892 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14894 except socket.error, err:
14895 raise errcls("Client failed to confirm notification (%s)" % err)
14899 def _SendNotification(self, test, arg, sockname):
14900 """Sends a notification to the client.
14903 @param test: Test name
14904 @param arg: Test argument (depends on test)
14905 @type sockname: string
14906 @param sockname: Socket path
14909 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14911 def _Notify(self, prereq, test, arg):
14912 """Notifies the client of a test.
14915 @param prereq: Whether this is a prereq-phase test
14917 @param test: Test name
14918 @param arg: Test argument (depends on test)
14922 errcls = errors.OpPrereqError
14924 errcls = errors.OpExecError
14926 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14930 def CheckArguments(self):
14931 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14932 self.expandnames_calls = 0
14934 def ExpandNames(self):
14935 checkargs_calls = getattr(self, "checkargs_calls", 0)
14936 if checkargs_calls < 1:
14937 raise errors.ProgrammerError("CheckArguments was not called")
14939 self.expandnames_calls += 1
14941 if self.op.notify_waitlock:
14942 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14944 self.LogInfo("Expanding names")
14946 # Get lock on master node (just to get a lock, not for a particular reason)
14947 self.needed_locks = {
14948 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14951 def Exec(self, feedback_fn):
14952 if self.expandnames_calls < 1:
14953 raise errors.ProgrammerError("ExpandNames was not called")
14955 if self.op.notify_exec:
14956 self._Notify(False, constants.JQT_EXEC, None)
14958 self.LogInfo("Executing")
14960 if self.op.log_messages:
14961 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14962 for idx, msg in enumerate(self.op.log_messages):
14963 self.LogInfo("Sending log message %s", idx + 1)
14964 feedback_fn(constants.JQT_MSGPREFIX + msg)
14965 # Report how many test messages have been sent
14966 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14969 raise errors.OpExecError("Opcode failure was requested")
14974 class LUTestAllocator(NoHooksLU):
14975 """Run allocator tests.
14977 This LU runs the allocator tests
14980 def CheckPrereq(self):
14981 """Check prerequisites.
14983 This checks the opcode parameters depending on the director and mode test.
14986 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
14987 constants.IALLOCATOR_MODE_MULTI_ALLOC):
14988 for attr in ["memory", "disks", "disk_template",
14989 "os", "tags", "nics", "vcpus"]:
14990 if not hasattr(self.op, attr):
14991 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14992 attr, errors.ECODE_INVAL)
14993 iname = self.cfg.ExpandInstanceName(self.op.name)
14994 if iname is not None:
14995 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14996 iname, errors.ECODE_EXISTS)
14997 if not isinstance(self.op.nics, list):
14998 raise errors.OpPrereqError("Invalid parameter 'nics'",
14999 errors.ECODE_INVAL)
15000 if not isinstance(self.op.disks, list):
15001 raise errors.OpPrereqError("Invalid parameter 'disks'",
15002 errors.ECODE_INVAL)
15003 for row in self.op.disks:
15004 if (not isinstance(row, dict) or
15005 constants.IDISK_SIZE not in row or
15006 not isinstance(row[constants.IDISK_SIZE], int) or
15007 constants.IDISK_MODE not in row or
15008 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15009 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15010 " parameter", errors.ECODE_INVAL)
15011 if self.op.hypervisor is None:
15012 self.op.hypervisor = self.cfg.GetHypervisorType()
15013 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15014 fname = _ExpandInstanceName(self.cfg, self.op.name)
15015 self.op.name = fname
15016 self.relocate_from = \
15017 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15018 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15019 constants.IALLOCATOR_MODE_NODE_EVAC):
15020 if not self.op.instances:
15021 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15022 self.op.instances = _GetWantedInstances(self, self.op.instances)
15024 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15025 self.op.mode, errors.ECODE_INVAL)
15027 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15028 if self.op.allocator is None:
15029 raise errors.OpPrereqError("Missing allocator name",
15030 errors.ECODE_INVAL)
15031 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15032 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15033 self.op.direction, errors.ECODE_INVAL)
15035 def Exec(self, feedback_fn):
15036 """Run the allocator test.
15039 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15040 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15041 memory=self.op.memory,
15042 disks=self.op.disks,
15043 disk_template=self.op.disk_template,
15047 vcpus=self.op.vcpus,
15048 spindle_use=self.op.spindle_use,
15049 hypervisor=self.op.hypervisor)
15050 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15051 req = iallocator.IAReqRelocate(name=self.op.name,
15052 relocate_from=list(self.relocate_from))
15053 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15054 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15055 target_groups=self.op.target_groups)
15056 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15057 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15058 evac_mode=self.op.evac_mode)
15059 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15060 disk_template = self.op.disk_template
15061 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15062 memory=self.op.memory,
15063 disks=self.op.disks,
15064 disk_template=disk_template,
15068 vcpus=self.op.vcpus,
15069 spindle_use=self.op.spindle_use,
15070 hypervisor=self.op.hypervisor)
15071 for idx in range(self.op.count)]
15072 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15074 raise errors.ProgrammerError("Uncatched mode %s in"
15075 " LUTestAllocator.Exec", self.op.mode)
15077 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15078 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15079 result = ial.in_text
15081 ial.Run(self.op.allocator, validate=False)
15082 result = ial.out_text
15086 #: Query type implementations
15088 constants.QR_CLUSTER: _ClusterQuery,
15089 constants.QR_INSTANCE: _InstanceQuery,
15090 constants.QR_NODE: _NodeQuery,
15091 constants.QR_GROUP: _GroupQuery,
15092 constants.QR_OS: _OsQuery,
15093 constants.QR_EXPORT: _ExportQuery,
15096 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15099 def _GetQueryImplementation(name):
15100 """Returns the implemtnation for a query type.
15102 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15106 return _QUERY_IMPL[name]
15108 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15109 errors.ECODE_INVAL)