4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti.masterd import iallocator
65 import ganeti.masterd.instance # pylint: disable=W0611
69 INSTANCE_DOWN = [constants.ADMINST_DOWN]
70 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
71 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
73 #: Instance status in which an instance can be marked as offline/online
74 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
75 constants.ADMINST_OFFLINE,
80 """Data container for LU results with jobs.
82 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
83 by L{mcpu._ProcessResult}. The latter will then submit the jobs
84 contained in the C{jobs} attribute and include the job IDs in the opcode
88 def __init__(self, jobs, **kwargs):
89 """Initializes this class.
91 Additional return values can be specified as keyword arguments.
93 @type jobs: list of lists of L{opcode.OpCode}
94 @param jobs: A list of lists of opcode objects
101 class LogicalUnit(object):
102 """Logical Unit base class.
104 Subclasses must follow these rules:
105 - implement ExpandNames
106 - implement CheckPrereq (except when tasklets are used)
107 - implement Exec (except when tasklets are used)
108 - implement BuildHooksEnv
109 - implement BuildHooksNodes
110 - redefine HPATH and HTYPE
111 - optionally redefine their run requirements:
112 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
114 Note that all commands require root permissions.
116 @ivar dry_run_result: the value (if any) that will be returned to the caller
117 in dry-run mode (signalled by opcode dry_run parameter)
124 def __init__(self, processor, op, context, rpc_runner):
125 """Constructor for LogicalUnit.
127 This needs to be overridden in derived classes in order to check op
131 self.proc = processor
133 self.cfg = context.cfg
134 self.glm = context.glm
136 self.owned_locks = context.glm.list_owned
137 self.context = context
138 self.rpc = rpc_runner
139 # Dicts used to declare locking needs to mcpu
140 self.needed_locks = None
141 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
143 self.remove_locks = {}
144 # Used to force good behavior when calling helper functions
145 self.recalculate_locks = {}
147 self.Log = processor.Log # pylint: disable=C0103
148 self.LogWarning = processor.LogWarning # pylint: disable=C0103
149 self.LogInfo = processor.LogInfo # pylint: disable=C0103
150 self.LogStep = processor.LogStep # pylint: disable=C0103
151 # support for dry-run
152 self.dry_run_result = None
153 # support for generic debug attribute
154 if (not hasattr(self.op, "debug_level") or
155 not isinstance(self.op.debug_level, int)):
156 self.op.debug_level = 0
161 # Validate opcode parameters and set defaults
162 self.op.Validate(True)
164 self.CheckArguments()
166 def CheckArguments(self):
167 """Check syntactic validity for the opcode arguments.
169 This method is for doing a simple syntactic check and ensure
170 validity of opcode parameters, without any cluster-related
171 checks. While the same can be accomplished in ExpandNames and/or
172 CheckPrereq, doing these separate is better because:
174 - ExpandNames is left as as purely a lock-related function
175 - CheckPrereq is run after we have acquired locks (and possible
178 The function is allowed to change the self.op attribute so that
179 later methods can no longer worry about missing parameters.
184 def ExpandNames(self):
185 """Expand names for this LU.
187 This method is called before starting to execute the opcode, and it should
188 update all the parameters of the opcode to their canonical form (e.g. a
189 short node name must be fully expanded after this method has successfully
190 completed). This way locking, hooks, logging, etc. can work correctly.
192 LUs which implement this method must also populate the self.needed_locks
193 member, as a dict with lock levels as keys, and a list of needed lock names
196 - use an empty dict if you don't need any lock
197 - if you don't need any lock at a particular level omit that
198 level (note that in this case C{DeclareLocks} won't be called
199 at all for that level)
200 - if you need locks at a level, but you can't calculate it in
201 this function, initialise that level with an empty list and do
202 further processing in L{LogicalUnit.DeclareLocks} (see that
203 function's docstring)
204 - don't put anything for the BGL level
205 - if you want all locks at a level use L{locking.ALL_SET} as a value
207 If you need to share locks (rather than acquire them exclusively) at one
208 level you can modify self.share_locks, setting a true value (usually 1) for
209 that level. By default locks are not shared.
211 This function can also define a list of tasklets, which then will be
212 executed in order instead of the usual LU-level CheckPrereq and Exec
213 functions, if those are not defined by the LU.
217 # Acquire all nodes and one instance
218 self.needed_locks = {
219 locking.LEVEL_NODE: locking.ALL_SET,
220 locking.LEVEL_INSTANCE: ['instance1.example.com'],
222 # Acquire just two nodes
223 self.needed_locks = {
224 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
227 self.needed_locks = {} # No, you can't leave it to the default value None
230 # The implementation of this method is mandatory only if the new LU is
231 # concurrent, so that old LUs don't need to be changed all at the same
234 self.needed_locks = {} # Exclusive LUs don't need locks.
236 raise NotImplementedError
238 def DeclareLocks(self, level):
239 """Declare LU locking needs for a level
241 While most LUs can just declare their locking needs at ExpandNames time,
242 sometimes there's the need to calculate some locks after having acquired
243 the ones before. This function is called just before acquiring locks at a
244 particular level, but after acquiring the ones at lower levels, and permits
245 such calculations. It can be used to modify self.needed_locks, and by
246 default it does nothing.
248 This function is only called if you have something already set in
249 self.needed_locks for the level.
251 @param level: Locking level which is going to be locked
252 @type level: member of L{ganeti.locking.LEVELS}
256 def CheckPrereq(self):
257 """Check prerequisites for this LU.
259 This method should check that the prerequisites for the execution
260 of this LU are fulfilled. It can do internode communication, but
261 it should be idempotent - no cluster or system changes are
264 The method should raise errors.OpPrereqError in case something is
265 not fulfilled. Its return value is ignored.
267 This method should also update all the parameters of the opcode to
268 their canonical form if it hasn't been done by ExpandNames before.
271 if self.tasklets is not None:
272 for (idx, tl) in enumerate(self.tasklets):
273 logging.debug("Checking prerequisites for tasklet %s/%s",
274 idx + 1, len(self.tasklets))
279 def Exec(self, feedback_fn):
282 This method should implement the actual work. It should raise
283 errors.OpExecError for failures that are somewhat dealt with in
287 if self.tasklets is not None:
288 for (idx, tl) in enumerate(self.tasklets):
289 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
292 raise NotImplementedError
294 def BuildHooksEnv(self):
295 """Build hooks environment for this LU.
298 @return: Dictionary containing the environment that will be used for
299 running the hooks for this LU. The keys of the dict must not be prefixed
300 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
301 will extend the environment with additional variables. If no environment
302 should be defined, an empty dictionary should be returned (not C{None}).
303 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
307 raise NotImplementedError
309 def BuildHooksNodes(self):
310 """Build list of nodes to run LU's hooks.
312 @rtype: tuple; (list, list)
313 @return: Tuple containing a list of node names on which the hook
314 should run before the execution and a list of node names on which the
315 hook should run after the execution. No nodes should be returned as an
316 empty list (and not None).
317 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
321 raise NotImplementedError
323 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
324 """Notify the LU about the results of its hooks.
326 This method is called every time a hooks phase is executed, and notifies
327 the Logical Unit about the hooks' result. The LU can then use it to alter
328 its result based on the hooks. By default the method does nothing and the
329 previous result is passed back unchanged but any LU can define it if it
330 wants to use the local cluster hook-scripts somehow.
332 @param phase: one of L{constants.HOOKS_PHASE_POST} or
333 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
334 @param hook_results: the results of the multi-node hooks rpc call
335 @param feedback_fn: function used send feedback back to the caller
336 @param lu_result: the previous Exec result this LU had, or None
338 @return: the new Exec result, based on the previous result
342 # API must be kept, thus we ignore the unused argument and could
343 # be a function warnings
344 # pylint: disable=W0613,R0201
347 def _ExpandAndLockInstance(self):
348 """Helper function to expand and lock an instance.
350 Many LUs that work on an instance take its name in self.op.instance_name
351 and need to expand it and then declare the expanded name for locking. This
352 function does it, and then updates self.op.instance_name to the expanded
353 name. It also initializes needed_locks as a dict, if this hasn't been done
357 if self.needed_locks is None:
358 self.needed_locks = {}
360 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
361 "_ExpandAndLockInstance called with instance-level locks set"
362 self.op.instance_name = _ExpandInstanceName(self.cfg,
363 self.op.instance_name)
364 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
366 def _LockInstancesNodes(self, primary_only=False,
367 level=locking.LEVEL_NODE):
368 """Helper function to declare instances' nodes for locking.
370 This function should be called after locking one or more instances to lock
371 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
372 with all primary or secondary nodes for instances already locked and
373 present in self.needed_locks[locking.LEVEL_INSTANCE].
375 It should be called from DeclareLocks, and for safety only works if
376 self.recalculate_locks[locking.LEVEL_NODE] is set.
378 In the future it may grow parameters to just lock some instance's nodes, or
379 to just lock primaries or secondary nodes, if needed.
381 If should be called in DeclareLocks in a way similar to::
383 if level == locking.LEVEL_NODE:
384 self._LockInstancesNodes()
386 @type primary_only: boolean
387 @param primary_only: only lock primary nodes of locked instances
388 @param level: Which lock level to use for locking nodes
391 assert level in self.recalculate_locks, \
392 "_LockInstancesNodes helper function called with no nodes to recalculate"
394 # TODO: check if we're really been called with the instance locks held
396 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
397 # future we might want to have different behaviors depending on the value
398 # of self.recalculate_locks[locking.LEVEL_NODE]
400 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
401 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
402 wanted_nodes.append(instance.primary_node)
404 wanted_nodes.extend(instance.secondary_nodes)
406 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
407 self.needed_locks[level] = wanted_nodes
408 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
409 self.needed_locks[level].extend(wanted_nodes)
411 raise errors.ProgrammerError("Unknown recalculation mode")
413 del self.recalculate_locks[level]
416 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
417 """Simple LU which runs no hooks.
419 This LU is intended as a parent for other LogicalUnits which will
420 run no hooks, in order to reduce duplicate code.
426 def BuildHooksEnv(self):
427 """Empty BuildHooksEnv for NoHooksLu.
429 This just raises an error.
432 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
434 def BuildHooksNodes(self):
435 """Empty BuildHooksNodes for NoHooksLU.
438 raise AssertionError("BuildHooksNodes called for NoHooksLU")
442 """Tasklet base class.
444 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
445 they can mix legacy code with tasklets. Locking needs to be done in the LU,
446 tasklets know nothing about locks.
448 Subclasses must follow these rules:
449 - Implement CheckPrereq
453 def __init__(self, lu):
460 def CheckPrereq(self):
461 """Check prerequisites for this tasklets.
463 This method should check whether the prerequisites for the execution of
464 this tasklet are fulfilled. It can do internode communication, but it
465 should be idempotent - no cluster or system changes are allowed.
467 The method should raise errors.OpPrereqError in case something is not
468 fulfilled. Its return value is ignored.
470 This method should also update all parameters to their canonical form if it
471 hasn't been done before.
476 def Exec(self, feedback_fn):
477 """Execute the tasklet.
479 This method should implement the actual work. It should raise
480 errors.OpExecError for failures that are somewhat dealt with in code, or
484 raise NotImplementedError
488 """Base for query utility classes.
491 #: Attribute holding field definitions
497 def __init__(self, qfilter, fields, use_locking):
498 """Initializes this class.
501 self.use_locking = use_locking
503 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
504 namefield=self.SORT_FIELD)
505 self.requested_data = self.query.RequestedData()
506 self.names = self.query.RequestedNames()
508 # Sort only if no names were requested
509 self.sort_by_name = not self.names
511 self.do_locking = None
514 def _GetNames(self, lu, all_names, lock_level):
515 """Helper function to determine names asked for in the query.
519 names = lu.owned_locks(lock_level)
523 if self.wanted == locking.ALL_SET:
524 assert not self.names
525 # caller didn't specify names, so ordering is not important
526 return utils.NiceSort(names)
528 # caller specified names and we must keep the same order
530 assert not self.do_locking or lu.glm.is_owned(lock_level)
532 missing = set(self.wanted).difference(names)
534 raise errors.OpExecError("Some items were removed before retrieving"
535 " their data: %s" % missing)
537 # Return expanded names
540 def ExpandNames(self, lu):
541 """Expand names for this query.
543 See L{LogicalUnit.ExpandNames}.
546 raise NotImplementedError()
548 def DeclareLocks(self, lu, level):
549 """Declare locks for this query.
551 See L{LogicalUnit.DeclareLocks}.
554 raise NotImplementedError()
556 def _GetQueryData(self, lu):
557 """Collects all data for this query.
559 @return: Query data object
562 raise NotImplementedError()
564 def NewStyleQuery(self, lu):
565 """Collect data and execute query.
568 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
569 sort_by_name=self.sort_by_name)
571 def OldStyleQuery(self, lu):
572 """Collect data and execute query.
575 return self.query.OldStyleQuery(self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
580 """Returns a dict declaring all lock levels shared.
583 return dict.fromkeys(locking.LEVELS, 1)
586 def _AnnotateDiskParams(instance, devs, cfg):
587 """Little helper wrapper to the rpc annotation method.
589 @param instance: The instance object
590 @type devs: List of L{objects.Disk}
591 @param devs: The root devices (not any of its children!)
592 @param cfg: The config object
593 @returns The annotated disk copies
594 @see L{rpc.AnnotateDiskParams}
597 return rpc.AnnotateDiskParams(instance.disk_template, devs,
598 cfg.GetInstanceDiskParams(instance))
601 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
603 """Checks if node groups for locked instances are still correct.
605 @type cfg: L{config.ConfigWriter}
606 @param cfg: Cluster configuration
607 @type instances: dict; string as key, L{objects.Instance} as value
608 @param instances: Dictionary, instance name as key, instance object as value
609 @type owned_groups: iterable of string
610 @param owned_groups: List of owned groups
611 @type owned_nodes: iterable of string
612 @param owned_nodes: List of owned nodes
613 @type cur_group_uuid: string or None
614 @param cur_group_uuid: Optional group UUID to check against instance's groups
617 for (name, inst) in instances.items():
618 assert owned_nodes.issuperset(inst.all_nodes), \
619 "Instance %s's nodes changed while we kept the lock" % name
621 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
623 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
624 "Instance %s has no node in group %s" % (name, cur_group_uuid)
627 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
629 """Checks if the owned node groups are still correct for an instance.
631 @type cfg: L{config.ConfigWriter}
632 @param cfg: The cluster configuration
633 @type instance_name: string
634 @param instance_name: Instance name
635 @type owned_groups: set or frozenset
636 @param owned_groups: List of currently owned node groups
637 @type primary_only: boolean
638 @param primary_only: Whether to check node groups for only the primary node
641 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
643 if not owned_groups.issuperset(inst_groups):
644 raise errors.OpPrereqError("Instance %s's node groups changed since"
645 " locks were acquired, current groups are"
646 " are '%s', owning groups '%s'; retry the"
649 utils.CommaJoin(inst_groups),
650 utils.CommaJoin(owned_groups)),
656 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
657 """Checks if the instances in a node group are still correct.
659 @type cfg: L{config.ConfigWriter}
660 @param cfg: The cluster configuration
661 @type group_uuid: string
662 @param group_uuid: Node group UUID
663 @type owned_instances: set or frozenset
664 @param owned_instances: List of currently owned instances
667 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
668 if owned_instances != wanted_instances:
669 raise errors.OpPrereqError("Instances in node group '%s' changed since"
670 " locks were acquired, wanted '%s', have '%s';"
671 " retry the operation" %
673 utils.CommaJoin(wanted_instances),
674 utils.CommaJoin(owned_instances)),
677 return wanted_instances
680 def _SupportsOob(cfg, node):
681 """Tells if node supports OOB.
683 @type cfg: L{config.ConfigWriter}
684 @param cfg: The cluster configuration
685 @type node: L{objects.Node}
686 @param node: The node
687 @return: The OOB script if supported or an empty string otherwise
690 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
693 def _CopyLockList(names):
694 """Makes a copy of a list of lock names.
696 Handles L{locking.ALL_SET} correctly.
699 if names == locking.ALL_SET:
700 return locking.ALL_SET
705 def _GetWantedNodes(lu, nodes):
706 """Returns list of checked and expanded node names.
708 @type lu: L{LogicalUnit}
709 @param lu: the logical unit on whose behalf we execute
711 @param nodes: list of node names or None for all nodes
713 @return: the list of nodes, sorted
714 @raise errors.ProgrammerError: if the nodes parameter is wrong type
718 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
720 return utils.NiceSort(lu.cfg.GetNodeList())
723 def _GetWantedInstances(lu, instances):
724 """Returns list of checked and expanded instance names.
726 @type lu: L{LogicalUnit}
727 @param lu: the logical unit on whose behalf we execute
728 @type instances: list
729 @param instances: list of instance names or None for all instances
731 @return: the list of instances, sorted
732 @raise errors.OpPrereqError: if the instances parameter is wrong type
733 @raise errors.OpPrereqError: if any of the passed instances is not found
737 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
739 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
743 def _GetUpdatedParams(old_params, update_dict,
744 use_default=True, use_none=False):
745 """Return the new version of a parameter dictionary.
747 @type old_params: dict
748 @param old_params: old parameters
749 @type update_dict: dict
750 @param update_dict: dict containing new parameter values, or
751 constants.VALUE_DEFAULT to reset the parameter to its default
753 @param use_default: boolean
754 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
755 values as 'to be deleted' values
756 @param use_none: boolean
757 @type use_none: whether to recognise C{None} values as 'to be
760 @return: the new parameter dictionary
763 params_copy = copy.deepcopy(old_params)
764 for key, val in update_dict.iteritems():
765 if ((use_default and val == constants.VALUE_DEFAULT) or
766 (use_none and val is None)):
772 params_copy[key] = val
776 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
777 """Return the new version of a instance policy.
779 @param group_policy: whether this policy applies to a group and thus
780 we should support removal of policy entries
783 use_none = use_default = group_policy
784 ipolicy = copy.deepcopy(old_ipolicy)
785 for key, value in new_ipolicy.items():
786 if key not in constants.IPOLICY_ALL_KEYS:
787 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
789 if key in constants.IPOLICY_ISPECS:
790 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
791 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
793 use_default=use_default)
795 if (not value or value == [constants.VALUE_DEFAULT] or
796 value == constants.VALUE_DEFAULT):
800 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
801 " on the cluster'" % key,
804 if key in constants.IPOLICY_PARAMETERS:
805 # FIXME: we assume all such values are float
807 ipolicy[key] = float(value)
808 except (TypeError, ValueError), err:
809 raise errors.OpPrereqError("Invalid value for attribute"
810 " '%s': '%s', error: %s" %
811 (key, value, err), errors.ECODE_INVAL)
813 # FIXME: we assume all others are lists; this should be redone
815 ipolicy[key] = list(value)
817 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
818 except errors.ConfigurationError, err:
819 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
824 def _UpdateAndVerifySubDict(base, updates, type_check):
825 """Updates and verifies a dict with sub dicts of the same type.
827 @param base: The dict with the old data
828 @param updates: The dict with the new data
829 @param type_check: Dict suitable to ForceDictType to verify correct types
830 @returns: A new dict with updated and verified values
834 new = _GetUpdatedParams(old, value)
835 utils.ForceDictType(new, type_check)
838 ret = copy.deepcopy(base)
839 ret.update(dict((key, fn(base.get(key, {}), value))
840 for key, value in updates.items()))
844 def _MergeAndVerifyHvState(op_input, obj_input):
845 """Combines the hv state from an opcode with the one of the object
847 @param op_input: The input dict from the opcode
848 @param obj_input: The input dict from the objects
849 @return: The verified and updated dict
853 invalid_hvs = set(op_input) - constants.HYPER_TYPES
855 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
856 " %s" % utils.CommaJoin(invalid_hvs),
858 if obj_input is None:
860 type_check = constants.HVSTS_PARAMETER_TYPES
861 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
866 def _MergeAndVerifyDiskState(op_input, obj_input):
867 """Combines the disk state from an opcode with the one of the object
869 @param op_input: The input dict from the opcode
870 @param obj_input: The input dict from the objects
871 @return: The verified and updated dict
874 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
876 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
877 utils.CommaJoin(invalid_dst),
879 type_check = constants.DSS_PARAMETER_TYPES
880 if obj_input is None:
882 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
884 for key, value in op_input.items())
889 def _ReleaseLocks(lu, level, names=None, keep=None):
890 """Releases locks owned by an LU.
892 @type lu: L{LogicalUnit}
893 @param level: Lock level
894 @type names: list or None
895 @param names: Names of locks to release
896 @type keep: list or None
897 @param keep: Names of locks to retain
900 assert not (keep is not None and names is not None), \
901 "Only one of the 'names' and the 'keep' parameters can be given"
903 if names is not None:
904 should_release = names.__contains__
906 should_release = lambda name: name not in keep
908 should_release = None
910 owned = lu.owned_locks(level)
912 # Not owning any lock at this level, do nothing
919 # Determine which locks to release
921 if should_release(name):
926 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
928 # Release just some locks
929 lu.glm.release(level, names=release)
931 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
934 lu.glm.release(level)
936 assert not lu.glm.is_owned(level), "No locks should be owned"
939 def _MapInstanceDisksToNodes(instances):
940 """Creates a map from (node, volume) to instance name.
942 @type instances: list of L{objects.Instance}
943 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
946 return dict(((node, vol), inst.name)
947 for inst in instances
948 for (node, vols) in inst.MapLVsByNode().items()
952 def _RunPostHook(lu, node_name):
953 """Runs the post-hook for an opcode on a single node.
956 hm = lu.proc.BuildHooksManager(lu)
958 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
959 except Exception, err: # pylint: disable=W0703
960 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
963 def _CheckOutputFields(static, dynamic, selected):
964 """Checks whether all selected fields are valid.
966 @type static: L{utils.FieldSet}
967 @param static: static fields set
968 @type dynamic: L{utils.FieldSet}
969 @param dynamic: dynamic fields set
976 delta = f.NonMatching(selected)
978 raise errors.OpPrereqError("Unknown output fields selected: %s"
979 % ",".join(delta), errors.ECODE_INVAL)
982 def _CheckGlobalHvParams(params):
983 """Validates that given hypervisor params are not global ones.
985 This will ensure that instances don't get customised versions of
989 used_globals = constants.HVC_GLOBALS.intersection(params)
991 msg = ("The following hypervisor parameters are global and cannot"
992 " be customized at instance level, please modify them at"
993 " cluster level: %s" % utils.CommaJoin(used_globals))
994 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
997 def _CheckNodeOnline(lu, node, msg=None):
998 """Ensure that a given node is online.
1000 @param lu: the LU on behalf of which we make the check
1001 @param node: the node to check
1002 @param msg: if passed, should be a message to replace the default one
1003 @raise errors.OpPrereqError: if the node is offline
1007 msg = "Can't use offline node"
1008 if lu.cfg.GetNodeInfo(node).offline:
1009 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1012 def _CheckNodeNotDrained(lu, node):
1013 """Ensure that a given node is not drained.
1015 @param lu: the LU on behalf of which we make the check
1016 @param node: the node to check
1017 @raise errors.OpPrereqError: if the node is drained
1020 if lu.cfg.GetNodeInfo(node).drained:
1021 raise errors.OpPrereqError("Can't use drained node %s" % node,
1025 def _CheckNodeVmCapable(lu, node):
1026 """Ensure that a given node is vm capable.
1028 @param lu: the LU on behalf of which we make the check
1029 @param node: the node to check
1030 @raise errors.OpPrereqError: if the node is not vm capable
1033 if not lu.cfg.GetNodeInfo(node).vm_capable:
1034 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1038 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1039 """Ensure that a node supports a given OS.
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @param os_name: the OS to query about
1044 @param force_variant: whether to ignore variant errors
1045 @raise errors.OpPrereqError: if the node is not supporting the OS
1048 result = lu.rpc.call_os_get(node, os_name)
1049 result.Raise("OS '%s' not in supported OS list for node %s" %
1051 prereq=True, ecode=errors.ECODE_INVAL)
1052 if not force_variant:
1053 _CheckOSVariant(result.payload, os_name)
1056 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1057 """Ensure that a node has the given secondary ip.
1059 @type lu: L{LogicalUnit}
1060 @param lu: the LU on behalf of which we make the check
1062 @param node: the node to check
1063 @type secondary_ip: string
1064 @param secondary_ip: the ip to check
1065 @type prereq: boolean
1066 @param prereq: whether to throw a prerequisite or an execute error
1067 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1068 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1071 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1072 result.Raise("Failure checking secondary ip on node %s" % node,
1073 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1074 if not result.payload:
1075 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1076 " please fix and re-run this command" % secondary_ip)
1078 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1080 raise errors.OpExecError(msg)
1083 def _GetClusterDomainSecret():
1084 """Reads the cluster domain secret.
1087 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1091 def _CheckInstanceState(lu, instance, req_states, msg=None):
1092 """Ensure that an instance is in one of the required states.
1094 @param lu: the LU on behalf of which we make the check
1095 @param instance: the instance to check
1096 @param msg: if passed, should be a message to replace the default one
1097 @raise errors.OpPrereqError: if the instance is not in the required state
1101 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1102 if instance.admin_state not in req_states:
1103 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1104 (instance.name, instance.admin_state, msg),
1107 if constants.ADMINST_UP not in req_states:
1108 pnode = instance.primary_node
1109 if not lu.cfg.GetNodeInfo(pnode).offline:
1110 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1111 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1112 prereq=True, ecode=errors.ECODE_ENVIRON)
1113 if instance.name in ins_l.payload:
1114 raise errors.OpPrereqError("Instance %s is running, %s" %
1115 (instance.name, msg), errors.ECODE_STATE)
1117 lu.LogWarning("Primary node offline, ignoring check that instance"
1121 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1122 """Computes if value is in the desired range.
1124 @param name: name of the parameter for which we perform the check
1125 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1127 @param ipolicy: dictionary containing min, max and std values
1128 @param value: actual value that we want to use
1129 @return: None or element not meeting the criteria
1133 if value in [None, constants.VALUE_AUTO]:
1135 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1136 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1137 if value > max_v or min_v > value:
1139 fqn = "%s/%s" % (name, qualifier)
1142 return ("%s value %s is not in range [%s, %s]" %
1143 (fqn, value, min_v, max_v))
1147 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1148 nic_count, disk_sizes, spindle_use,
1149 _compute_fn=_ComputeMinMaxSpec):
1150 """Verifies ipolicy against provided specs.
1153 @param ipolicy: The ipolicy
1155 @param mem_size: The memory size
1156 @type cpu_count: int
1157 @param cpu_count: Used cpu cores
1158 @type disk_count: int
1159 @param disk_count: Number of disks used
1160 @type nic_count: int
1161 @param nic_count: Number of nics used
1162 @type disk_sizes: list of ints
1163 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1164 @type spindle_use: int
1165 @param spindle_use: The number of spindles this instance uses
1166 @param _compute_fn: The compute function (unittest only)
1167 @return: A list of violations, or an empty list of no violations are found
1170 assert disk_count == len(disk_sizes)
1173 (constants.ISPEC_MEM_SIZE, "", mem_size),
1174 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1175 (constants.ISPEC_DISK_COUNT, "", disk_count),
1176 (constants.ISPEC_NIC_COUNT, "", nic_count),
1177 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1178 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1179 for idx, d in enumerate(disk_sizes)]
1182 (_compute_fn(name, qualifier, ipolicy, value)
1183 for (name, qualifier, value) in test_settings))
1186 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1187 _compute_fn=_ComputeIPolicySpecViolation):
1188 """Compute if instance meets the specs of ipolicy.
1191 @param ipolicy: The ipolicy to verify against
1192 @type instance: L{objects.Instance}
1193 @param instance: The instance to verify
1194 @param _compute_fn: The function to verify ipolicy (unittest only)
1195 @see: L{_ComputeIPolicySpecViolation}
1198 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1199 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1200 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1201 disk_count = len(instance.disks)
1202 disk_sizes = [disk.size for disk in instance.disks]
1203 nic_count = len(instance.nics)
1205 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1206 disk_sizes, spindle_use)
1209 def _ComputeIPolicyInstanceSpecViolation(
1210 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1211 """Compute if instance specs meets the specs of ipolicy.
1214 @param ipolicy: The ipolicy to verify against
1215 @param instance_spec: dict
1216 @param instance_spec: The instance spec to verify
1217 @param _compute_fn: The function to verify ipolicy (unittest only)
1218 @see: L{_ComputeIPolicySpecViolation}
1221 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1222 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1223 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1224 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1225 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1226 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1228 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1229 disk_sizes, spindle_use)
1232 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1234 _compute_fn=_ComputeIPolicyInstanceViolation):
1235 """Compute if instance meets the specs of the new target group.
1237 @param ipolicy: The ipolicy to verify
1238 @param instance: The instance object to verify
1239 @param current_group: The current group of the instance
1240 @param target_group: The new group of the instance
1241 @param _compute_fn: The function to verify ipolicy (unittest only)
1242 @see: L{_ComputeIPolicySpecViolation}
1245 if current_group == target_group:
1248 return _compute_fn(ipolicy, instance)
1251 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1252 _compute_fn=_ComputeIPolicyNodeViolation):
1253 """Checks that the target node is correct in terms of instance policy.
1255 @param ipolicy: The ipolicy to verify
1256 @param instance: The instance object to verify
1257 @param node: The new node to relocate
1258 @param ignore: Ignore violations of the ipolicy
1259 @param _compute_fn: The function to verify ipolicy (unittest only)
1260 @see: L{_ComputeIPolicySpecViolation}
1263 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1264 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1267 msg = ("Instance does not meet target node group's (%s) instance"
1268 " policy: %s") % (node.group, utils.CommaJoin(res))
1272 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1275 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1276 """Computes a set of any instances that would violate the new ipolicy.
1278 @param old_ipolicy: The current (still in-place) ipolicy
1279 @param new_ipolicy: The new (to become) ipolicy
1280 @param instances: List of instances to verify
1281 @return: A list of instances which violates the new ipolicy but
1285 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1286 _ComputeViolatingInstances(old_ipolicy, instances))
1289 def _ExpandItemName(fn, name, kind):
1290 """Expand an item name.
1292 @param fn: the function to use for expansion
1293 @param name: requested item name
1294 @param kind: text description ('Node' or 'Instance')
1295 @return: the resolved (full) name
1296 @raise errors.OpPrereqError: if the item is not found
1299 full_name = fn(name)
1300 if full_name is None:
1301 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1306 def _ExpandNodeName(cfg, name):
1307 """Wrapper over L{_ExpandItemName} for nodes."""
1308 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1311 def _ExpandInstanceName(cfg, name):
1312 """Wrapper over L{_ExpandItemName} for instance."""
1313 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1316 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1317 minmem, maxmem, vcpus, nics, disk_template, disks,
1318 bep, hvp, hypervisor_name, tags):
1319 """Builds instance related env variables for hooks
1321 This builds the hook environment from individual variables.
1324 @param name: the name of the instance
1325 @type primary_node: string
1326 @param primary_node: the name of the instance's primary node
1327 @type secondary_nodes: list
1328 @param secondary_nodes: list of secondary nodes as strings
1329 @type os_type: string
1330 @param os_type: the name of the instance's OS
1331 @type status: string
1332 @param status: the desired status of the instance
1333 @type minmem: string
1334 @param minmem: the minimum memory size of the instance
1335 @type maxmem: string
1336 @param maxmem: the maximum memory size of the instance
1338 @param vcpus: the count of VCPUs the instance has
1340 @param nics: list of tuples (ip, mac, mode, link) representing
1341 the NICs the instance has
1342 @type disk_template: string
1343 @param disk_template: the disk template of the instance
1345 @param disks: the list of (size, mode) pairs
1347 @param bep: the backend parameters for the instance
1349 @param hvp: the hypervisor parameters for the instance
1350 @type hypervisor_name: string
1351 @param hypervisor_name: the hypervisor for the instance
1353 @param tags: list of instance tags as strings
1355 @return: the hook environment for this instance
1360 "INSTANCE_NAME": name,
1361 "INSTANCE_PRIMARY": primary_node,
1362 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1363 "INSTANCE_OS_TYPE": os_type,
1364 "INSTANCE_STATUS": status,
1365 "INSTANCE_MINMEM": minmem,
1366 "INSTANCE_MAXMEM": maxmem,
1367 # TODO(2.7) remove deprecated "memory" value
1368 "INSTANCE_MEMORY": maxmem,
1369 "INSTANCE_VCPUS": vcpus,
1370 "INSTANCE_DISK_TEMPLATE": disk_template,
1371 "INSTANCE_HYPERVISOR": hypervisor_name,
1374 nic_count = len(nics)
1375 for idx, (ip, mac, mode, link) in enumerate(nics):
1378 env["INSTANCE_NIC%d_IP" % idx] = ip
1379 env["INSTANCE_NIC%d_MAC" % idx] = mac
1380 env["INSTANCE_NIC%d_MODE" % idx] = mode
1381 env["INSTANCE_NIC%d_LINK" % idx] = link
1382 if mode == constants.NIC_MODE_BRIDGED:
1383 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1387 env["INSTANCE_NIC_COUNT"] = nic_count
1390 disk_count = len(disks)
1391 for idx, (size, mode) in enumerate(disks):
1392 env["INSTANCE_DISK%d_SIZE" % idx] = size
1393 env["INSTANCE_DISK%d_MODE" % idx] = mode
1397 env["INSTANCE_DISK_COUNT"] = disk_count
1402 env["INSTANCE_TAGS"] = " ".join(tags)
1404 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1405 for key, value in source.items():
1406 env["INSTANCE_%s_%s" % (kind, key)] = value
1411 def _NICListToTuple(lu, nics):
1412 """Build a list of nic information tuples.
1414 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1415 value in LUInstanceQueryData.
1417 @type lu: L{LogicalUnit}
1418 @param lu: the logical unit on whose behalf we execute
1419 @type nics: list of L{objects.NIC}
1420 @param nics: list of nics to convert to hooks tuples
1424 cluster = lu.cfg.GetClusterInfo()
1428 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1429 mode = filled_params[constants.NIC_MODE]
1430 link = filled_params[constants.NIC_LINK]
1431 hooks_nics.append((ip, mac, mode, link))
1435 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1436 """Builds instance related env variables for hooks from an object.
1438 @type lu: L{LogicalUnit}
1439 @param lu: the logical unit on whose behalf we execute
1440 @type instance: L{objects.Instance}
1441 @param instance: the instance for which we should build the
1443 @type override: dict
1444 @param override: dictionary with key/values that will override
1447 @return: the hook environment dictionary
1450 cluster = lu.cfg.GetClusterInfo()
1451 bep = cluster.FillBE(instance)
1452 hvp = cluster.FillHV(instance)
1454 "name": instance.name,
1455 "primary_node": instance.primary_node,
1456 "secondary_nodes": instance.secondary_nodes,
1457 "os_type": instance.os,
1458 "status": instance.admin_state,
1459 "maxmem": bep[constants.BE_MAXMEM],
1460 "minmem": bep[constants.BE_MINMEM],
1461 "vcpus": bep[constants.BE_VCPUS],
1462 "nics": _NICListToTuple(lu, instance.nics),
1463 "disk_template": instance.disk_template,
1464 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1467 "hypervisor_name": instance.hypervisor,
1468 "tags": instance.tags,
1471 args.update(override)
1472 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1475 def _AdjustCandidatePool(lu, exceptions):
1476 """Adjust the candidate pool after node operations.
1479 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1481 lu.LogInfo("Promoted nodes to master candidate role: %s",
1482 utils.CommaJoin(node.name for node in mod_list))
1483 for name in mod_list:
1484 lu.context.ReaddNode(name)
1485 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1487 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1491 def _DecideSelfPromotion(lu, exceptions=None):
1492 """Decide whether I should promote myself as a master candidate.
1495 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1496 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1497 # the new node will increase mc_max with one, so:
1498 mc_should = min(mc_should + 1, cp_size)
1499 return mc_now < mc_should
1502 def _ComputeViolatingInstances(ipolicy, instances):
1503 """Computes a set of instances who violates given ipolicy.
1505 @param ipolicy: The ipolicy to verify
1506 @type instances: object.Instance
1507 @param instances: List of instances to verify
1508 @return: A frozenset of instance names violating the ipolicy
1511 return frozenset([inst.name for inst in instances
1512 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1515 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1516 """Check that the brigdes needed by a list of nics exist.
1519 cluster = lu.cfg.GetClusterInfo()
1520 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1521 brlist = [params[constants.NIC_LINK] for params in paramslist
1522 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1524 result = lu.rpc.call_bridges_exist(target_node, brlist)
1525 result.Raise("Error checking bridges on destination node '%s'" %
1526 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1529 def _CheckInstanceBridgesExist(lu, instance, node=None):
1530 """Check that the brigdes needed by an instance exist.
1534 node = instance.primary_node
1535 _CheckNicsBridgesExist(lu, instance.nics, node)
1538 def _CheckOSVariant(os_obj, name):
1539 """Check whether an OS name conforms to the os variants specification.
1541 @type os_obj: L{objects.OS}
1542 @param os_obj: OS object to check
1544 @param name: OS name passed by the user, to check for validity
1547 variant = objects.OS.GetVariant(name)
1548 if not os_obj.supported_variants:
1550 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1551 " passed)" % (os_obj.name, variant),
1555 raise errors.OpPrereqError("OS name must include a variant",
1558 if variant not in os_obj.supported_variants:
1559 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1562 def _GetNodeInstancesInner(cfg, fn):
1563 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1566 def _GetNodeInstances(cfg, node_name):
1567 """Returns a list of all primary and secondary instances on a node.
1571 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1574 def _GetNodePrimaryInstances(cfg, node_name):
1575 """Returns primary instances on a node.
1578 return _GetNodeInstancesInner(cfg,
1579 lambda inst: node_name == inst.primary_node)
1582 def _GetNodeSecondaryInstances(cfg, node_name):
1583 """Returns secondary instances on a node.
1586 return _GetNodeInstancesInner(cfg,
1587 lambda inst: node_name in inst.secondary_nodes)
1590 def _GetStorageTypeArgs(cfg, storage_type):
1591 """Returns the arguments for a storage type.
1594 # Special case for file storage
1595 if storage_type == constants.ST_FILE:
1596 # storage.FileStorage wants a list of storage directories
1597 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1602 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1605 for dev in instance.disks:
1606 cfg.SetDiskID(dev, node_name)
1608 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1610 result.Raise("Failed to get disk status from node %s" % node_name,
1611 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1613 for idx, bdev_status in enumerate(result.payload):
1614 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1620 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1621 """Check the sanity of iallocator and node arguments and use the
1622 cluster-wide iallocator if appropriate.
1624 Check that at most one of (iallocator, node) is specified. If none is
1625 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1626 then the LU's opcode's iallocator slot is filled with the cluster-wide
1629 @type iallocator_slot: string
1630 @param iallocator_slot: the name of the opcode iallocator slot
1631 @type node_slot: string
1632 @param node_slot: the name of the opcode target node slot
1635 node = getattr(lu.op, node_slot, None)
1636 ialloc = getattr(lu.op, iallocator_slot, None)
1640 if node is not None and ialloc is not None:
1641 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1643 elif ((node is None and ialloc is None) or
1644 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1645 default_iallocator = lu.cfg.GetDefaultIAllocator()
1646 if default_iallocator:
1647 setattr(lu.op, iallocator_slot, default_iallocator)
1649 raise errors.OpPrereqError("No iallocator or node given and no"
1650 " cluster-wide default iallocator found;"
1651 " please specify either an iallocator or a"
1652 " node, or set a cluster-wide default"
1653 " iallocator", errors.ECODE_INVAL)
1656 def _GetDefaultIAllocator(cfg, ialloc):
1657 """Decides on which iallocator to use.
1659 @type cfg: L{config.ConfigWriter}
1660 @param cfg: Cluster configuration object
1661 @type ialloc: string or None
1662 @param ialloc: Iallocator specified in opcode
1664 @return: Iallocator name
1668 # Use default iallocator
1669 ialloc = cfg.GetDefaultIAllocator()
1672 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1673 " opcode nor as a cluster-wide default",
1679 class LUClusterPostInit(LogicalUnit):
1680 """Logical unit for running hooks after cluster initialization.
1683 HPATH = "cluster-init"
1684 HTYPE = constants.HTYPE_CLUSTER
1686 def BuildHooksEnv(self):
1691 "OP_TARGET": self.cfg.GetClusterName(),
1694 def BuildHooksNodes(self):
1695 """Build hooks nodes.
1698 return ([], [self.cfg.GetMasterNode()])
1700 def Exec(self, feedback_fn):
1707 class LUClusterDestroy(LogicalUnit):
1708 """Logical unit for destroying the cluster.
1711 HPATH = "cluster-destroy"
1712 HTYPE = constants.HTYPE_CLUSTER
1714 def BuildHooksEnv(self):
1719 "OP_TARGET": self.cfg.GetClusterName(),
1722 def BuildHooksNodes(self):
1723 """Build hooks nodes.
1728 def CheckPrereq(self):
1729 """Check prerequisites.
1731 This checks whether the cluster is empty.
1733 Any errors are signaled by raising errors.OpPrereqError.
1736 master = self.cfg.GetMasterNode()
1738 nodelist = self.cfg.GetNodeList()
1739 if len(nodelist) != 1 or nodelist[0] != master:
1740 raise errors.OpPrereqError("There are still %d node(s) in"
1741 " this cluster." % (len(nodelist) - 1),
1743 instancelist = self.cfg.GetInstanceList()
1745 raise errors.OpPrereqError("There are still %d instance(s) in"
1746 " this cluster." % len(instancelist),
1749 def Exec(self, feedback_fn):
1750 """Destroys the cluster.
1753 master_params = self.cfg.GetMasterNetworkParameters()
1755 # Run post hooks on master node before it's removed
1756 _RunPostHook(self, master_params.name)
1758 ems = self.cfg.GetUseExternalMipScript()
1759 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1762 self.LogWarning("Error disabling the master IP address: %s",
1765 return master_params.name
1768 def _VerifyCertificate(filename):
1769 """Verifies a certificate for L{LUClusterVerifyConfig}.
1771 @type filename: string
1772 @param filename: Path to PEM file
1776 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1777 utils.ReadFile(filename))
1778 except Exception, err: # pylint: disable=W0703
1779 return (LUClusterVerifyConfig.ETYPE_ERROR,
1780 "Failed to load X509 certificate %s: %s" % (filename, err))
1783 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1784 constants.SSL_CERT_EXPIRATION_ERROR)
1787 fnamemsg = "While verifying %s: %s" % (filename, msg)
1792 return (None, fnamemsg)
1793 elif errcode == utils.CERT_WARNING:
1794 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1795 elif errcode == utils.CERT_ERROR:
1796 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1798 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1801 def _GetAllHypervisorParameters(cluster, instances):
1802 """Compute the set of all hypervisor parameters.
1804 @type cluster: L{objects.Cluster}
1805 @param cluster: the cluster object
1806 @param instances: list of L{objects.Instance}
1807 @param instances: additional instances from which to obtain parameters
1808 @rtype: list of (origin, hypervisor, parameters)
1809 @return: a list with all parameters found, indicating the hypervisor they
1810 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1815 for hv_name in cluster.enabled_hypervisors:
1816 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1818 for os_name, os_hvp in cluster.os_hvp.items():
1819 for hv_name, hv_params in os_hvp.items():
1821 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1822 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1824 # TODO: collapse identical parameter values in a single one
1825 for instance in instances:
1826 if instance.hvparams:
1827 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1828 cluster.FillHV(instance)))
1833 class _VerifyErrors(object):
1834 """Mix-in for cluster/group verify LUs.
1836 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1837 self.op and self._feedback_fn to be available.)
1841 ETYPE_FIELD = "code"
1842 ETYPE_ERROR = "ERROR"
1843 ETYPE_WARNING = "WARNING"
1845 def _Error(self, ecode, item, msg, *args, **kwargs):
1846 """Format an error message.
1848 Based on the opcode's error_codes parameter, either format a
1849 parseable error code, or a simpler error string.
1851 This must be called only from Exec and functions called from Exec.
1854 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1855 itype, etxt, _ = ecode
1856 # first complete the msg
1859 # then format the whole message
1860 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1861 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1867 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1868 # and finally report it via the feedback_fn
1869 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1871 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1872 """Log an error message if the passed condition is True.
1876 or self.op.debug_simulate_errors) # pylint: disable=E1101
1878 # If the error code is in the list of ignored errors, demote the error to a
1880 (_, etxt, _) = ecode
1881 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1882 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1885 self._Error(ecode, *args, **kwargs)
1887 # do not mark the operation as failed for WARN cases only
1888 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1889 self.bad = self.bad or cond
1892 class LUClusterVerify(NoHooksLU):
1893 """Submits all jobs necessary to verify the cluster.
1898 def ExpandNames(self):
1899 self.needed_locks = {}
1901 def Exec(self, feedback_fn):
1904 if self.op.group_name:
1905 groups = [self.op.group_name]
1906 depends_fn = lambda: None
1908 groups = self.cfg.GetNodeGroupList()
1910 # Verify global configuration
1912 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1915 # Always depend on global verification
1916 depends_fn = lambda: [(-len(jobs), [])]
1919 [opcodes.OpClusterVerifyGroup(group_name=group,
1920 ignore_errors=self.op.ignore_errors,
1921 depends=depends_fn())]
1922 for group in groups)
1924 # Fix up all parameters
1925 for op in itertools.chain(*jobs): # pylint: disable=W0142
1926 op.debug_simulate_errors = self.op.debug_simulate_errors
1927 op.verbose = self.op.verbose
1928 op.error_codes = self.op.error_codes
1930 op.skip_checks = self.op.skip_checks
1931 except AttributeError:
1932 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1934 return ResultWithJobs(jobs)
1937 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1938 """Verifies the cluster config.
1943 def _VerifyHVP(self, hvp_data):
1944 """Verifies locally the syntax of the hypervisor parameters.
1947 for item, hv_name, hv_params in hvp_data:
1948 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1951 hv_class = hypervisor.GetHypervisor(hv_name)
1952 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1953 hv_class.CheckParameterSyntax(hv_params)
1954 except errors.GenericError, err:
1955 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1957 def ExpandNames(self):
1958 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1959 self.share_locks = _ShareAll()
1961 def CheckPrereq(self):
1962 """Check prerequisites.
1965 # Retrieve all information
1966 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1967 self.all_node_info = self.cfg.GetAllNodesInfo()
1968 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1970 def Exec(self, feedback_fn):
1971 """Verify integrity of cluster, performing various test on nodes.
1975 self._feedback_fn = feedback_fn
1977 feedback_fn("* Verifying cluster config")
1979 for msg in self.cfg.VerifyConfig():
1980 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1982 feedback_fn("* Verifying cluster certificate files")
1984 for cert_filename in pathutils.ALL_CERT_FILES:
1985 (errcode, msg) = _VerifyCertificate(cert_filename)
1986 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1988 feedback_fn("* Verifying hypervisor parameters")
1990 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1991 self.all_inst_info.values()))
1993 feedback_fn("* Verifying all nodes belong to an existing group")
1995 # We do this verification here because, should this bogus circumstance
1996 # occur, it would never be caught by VerifyGroup, which only acts on
1997 # nodes/instances reachable from existing node groups.
1999 dangling_nodes = set(node.name for node in self.all_node_info.values()
2000 if node.group not in self.all_group_info)
2002 dangling_instances = {}
2003 no_node_instances = []
2005 for inst in self.all_inst_info.values():
2006 if inst.primary_node in dangling_nodes:
2007 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2008 elif inst.primary_node not in self.all_node_info:
2009 no_node_instances.append(inst.name)
2014 utils.CommaJoin(dangling_instances.get(node.name,
2016 for node in dangling_nodes]
2018 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2020 "the following nodes (and their instances) belong to a non"
2021 " existing group: %s", utils.CommaJoin(pretty_dangling))
2023 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2025 "the following instances have a non-existing primary-node:"
2026 " %s", utils.CommaJoin(no_node_instances))
2031 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2032 """Verifies the status of a node group.
2035 HPATH = "cluster-verify"
2036 HTYPE = constants.HTYPE_CLUSTER
2039 _HOOKS_INDENT_RE = re.compile("^", re.M)
2041 class NodeImage(object):
2042 """A class representing the logical and physical status of a node.
2045 @ivar name: the node name to which this object refers
2046 @ivar volumes: a structure as returned from
2047 L{ganeti.backend.GetVolumeList} (runtime)
2048 @ivar instances: a list of running instances (runtime)
2049 @ivar pinst: list of configured primary instances (config)
2050 @ivar sinst: list of configured secondary instances (config)
2051 @ivar sbp: dictionary of {primary-node: list of instances} for all
2052 instances for which this node is secondary (config)
2053 @ivar mfree: free memory, as reported by hypervisor (runtime)
2054 @ivar dfree: free disk, as reported by the node (runtime)
2055 @ivar offline: the offline status (config)
2056 @type rpc_fail: boolean
2057 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2058 not whether the individual keys were correct) (runtime)
2059 @type lvm_fail: boolean
2060 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2061 @type hyp_fail: boolean
2062 @ivar hyp_fail: whether the RPC call didn't return the instance list
2063 @type ghost: boolean
2064 @ivar ghost: whether this is a known node or not (config)
2065 @type os_fail: boolean
2066 @ivar os_fail: whether the RPC call didn't return valid OS data
2068 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2069 @type vm_capable: boolean
2070 @ivar vm_capable: whether the node can host instances
2073 def __init__(self, offline=False, name=None, vm_capable=True):
2082 self.offline = offline
2083 self.vm_capable = vm_capable
2084 self.rpc_fail = False
2085 self.lvm_fail = False
2086 self.hyp_fail = False
2088 self.os_fail = False
2091 def ExpandNames(self):
2092 # This raises errors.OpPrereqError on its own:
2093 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2095 # Get instances in node group; this is unsafe and needs verification later
2097 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2099 self.needed_locks = {
2100 locking.LEVEL_INSTANCE: inst_names,
2101 locking.LEVEL_NODEGROUP: [self.group_uuid],
2102 locking.LEVEL_NODE: [],
2105 self.share_locks = _ShareAll()
2107 def DeclareLocks(self, level):
2108 if level == locking.LEVEL_NODE:
2109 # Get members of node group; this is unsafe and needs verification later
2110 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2112 all_inst_info = self.cfg.GetAllInstancesInfo()
2114 # In Exec(), we warn about mirrored instances that have primary and
2115 # secondary living in separate node groups. To fully verify that
2116 # volumes for these instances are healthy, we will need to do an
2117 # extra call to their secondaries. We ensure here those nodes will
2119 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2120 # Important: access only the instances whose lock is owned
2121 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2122 nodes.update(all_inst_info[inst].secondary_nodes)
2124 self.needed_locks[locking.LEVEL_NODE] = nodes
2126 def CheckPrereq(self):
2127 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2128 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2130 group_nodes = set(self.group_info.members)
2132 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2135 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2137 unlocked_instances = \
2138 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2141 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2142 utils.CommaJoin(unlocked_nodes),
2145 if unlocked_instances:
2146 raise errors.OpPrereqError("Missing lock for instances: %s" %
2147 utils.CommaJoin(unlocked_instances),
2150 self.all_node_info = self.cfg.GetAllNodesInfo()
2151 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2153 self.my_node_names = utils.NiceSort(group_nodes)
2154 self.my_inst_names = utils.NiceSort(group_instances)
2156 self.my_node_info = dict((name, self.all_node_info[name])
2157 for name in self.my_node_names)
2159 self.my_inst_info = dict((name, self.all_inst_info[name])
2160 for name in self.my_inst_names)
2162 # We detect here the nodes that will need the extra RPC calls for verifying
2163 # split LV volumes; they should be locked.
2164 extra_lv_nodes = set()
2166 for inst in self.my_inst_info.values():
2167 if inst.disk_template in constants.DTS_INT_MIRROR:
2168 for nname in inst.all_nodes:
2169 if self.all_node_info[nname].group != self.group_uuid:
2170 extra_lv_nodes.add(nname)
2172 unlocked_lv_nodes = \
2173 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2175 if unlocked_lv_nodes:
2176 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2177 utils.CommaJoin(unlocked_lv_nodes),
2179 self.extra_lv_nodes = list(extra_lv_nodes)
2181 def _VerifyNode(self, ninfo, nresult):
2182 """Perform some basic validation on data returned from a node.
2184 - check the result data structure is well formed and has all the
2186 - check ganeti version
2188 @type ninfo: L{objects.Node}
2189 @param ninfo: the node to check
2190 @param nresult: the results from the node
2192 @return: whether overall this call was successful (and we can expect
2193 reasonable values in the respose)
2197 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2199 # main result, nresult should be a non-empty dict
2200 test = not nresult or not isinstance(nresult, dict)
2201 _ErrorIf(test, constants.CV_ENODERPC, node,
2202 "unable to verify node: no data returned")
2206 # compares ganeti version
2207 local_version = constants.PROTOCOL_VERSION
2208 remote_version = nresult.get("version", None)
2209 test = not (remote_version and
2210 isinstance(remote_version, (list, tuple)) and
2211 len(remote_version) == 2)
2212 _ErrorIf(test, constants.CV_ENODERPC, node,
2213 "connection to node returned invalid data")
2217 test = local_version != remote_version[0]
2218 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2219 "incompatible protocol versions: master %s,"
2220 " node %s", local_version, remote_version[0])
2224 # node seems compatible, we can actually try to look into its results
2226 # full package version
2227 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2228 constants.CV_ENODEVERSION, node,
2229 "software version mismatch: master %s, node %s",
2230 constants.RELEASE_VERSION, remote_version[1],
2231 code=self.ETYPE_WARNING)
2233 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2234 if ninfo.vm_capable and isinstance(hyp_result, dict):
2235 for hv_name, hv_result in hyp_result.iteritems():
2236 test = hv_result is not None
2237 _ErrorIf(test, constants.CV_ENODEHV, node,
2238 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2240 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2241 if ninfo.vm_capable and isinstance(hvp_result, list):
2242 for item, hv_name, hv_result in hvp_result:
2243 _ErrorIf(True, constants.CV_ENODEHV, node,
2244 "hypervisor %s parameter verify failure (source %s): %s",
2245 hv_name, item, hv_result)
2247 test = nresult.get(constants.NV_NODESETUP,
2248 ["Missing NODESETUP results"])
2249 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2254 def _VerifyNodeTime(self, ninfo, nresult,
2255 nvinfo_starttime, nvinfo_endtime):
2256 """Check the node time.
2258 @type ninfo: L{objects.Node}
2259 @param ninfo: the node to check
2260 @param nresult: the remote results for the node
2261 @param nvinfo_starttime: the start time of the RPC call
2262 @param nvinfo_endtime: the end time of the RPC call
2266 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2268 ntime = nresult.get(constants.NV_TIME, None)
2270 ntime_merged = utils.MergeTime(ntime)
2271 except (ValueError, TypeError):
2272 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2275 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2276 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2277 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2278 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2282 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2283 "Node time diverges by at least %s from master node time",
2286 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2287 """Check the node LVM results.
2289 @type ninfo: L{objects.Node}
2290 @param ninfo: the node to check
2291 @param nresult: the remote results for the node
2292 @param vg_name: the configured VG name
2299 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2301 # checks vg existence and size > 20G
2302 vglist = nresult.get(constants.NV_VGLIST, None)
2304 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2306 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2307 constants.MIN_VG_SIZE)
2308 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2311 pvlist = nresult.get(constants.NV_PVLIST, None)
2312 test = pvlist is None
2313 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2315 # check that ':' is not present in PV names, since it's a
2316 # special character for lvcreate (denotes the range of PEs to
2318 for _, pvname, owner_vg in pvlist:
2319 test = ":" in pvname
2320 _ErrorIf(test, constants.CV_ENODELVM, node,
2321 "Invalid character ':' in PV '%s' of VG '%s'",
2324 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2325 """Check the node bridges.
2327 @type ninfo: L{objects.Node}
2328 @param ninfo: the node to check
2329 @param nresult: the remote results for the node
2330 @param bridges: the expected list of bridges
2337 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2339 missing = nresult.get(constants.NV_BRIDGES, None)
2340 test = not isinstance(missing, list)
2341 _ErrorIf(test, constants.CV_ENODENET, node,
2342 "did not return valid bridge information")
2344 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2345 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2347 def _VerifyNodeUserScripts(self, ninfo, nresult):
2348 """Check the results of user scripts presence and executability on the node
2350 @type ninfo: L{objects.Node}
2351 @param ninfo: the node to check
2352 @param nresult: the remote results for the node
2357 test = not constants.NV_USERSCRIPTS in nresult
2358 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2359 "did not return user scripts information")
2361 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2363 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2364 "user scripts not present or not executable: %s" %
2365 utils.CommaJoin(sorted(broken_scripts)))
2367 def _VerifyNodeNetwork(self, ninfo, nresult):
2368 """Check the node network connectivity results.
2370 @type ninfo: L{objects.Node}
2371 @param ninfo: the node to check
2372 @param nresult: the remote results for the node
2376 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2378 test = constants.NV_NODELIST not in nresult
2379 _ErrorIf(test, constants.CV_ENODESSH, node,
2380 "node hasn't returned node ssh connectivity data")
2382 if nresult[constants.NV_NODELIST]:
2383 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2384 _ErrorIf(True, constants.CV_ENODESSH, node,
2385 "ssh communication with node '%s': %s", a_node, a_msg)
2387 test = constants.NV_NODENETTEST not in nresult
2388 _ErrorIf(test, constants.CV_ENODENET, node,
2389 "node hasn't returned node tcp connectivity data")
2391 if nresult[constants.NV_NODENETTEST]:
2392 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2394 _ErrorIf(True, constants.CV_ENODENET, node,
2395 "tcp communication with node '%s': %s",
2396 anode, nresult[constants.NV_NODENETTEST][anode])
2398 test = constants.NV_MASTERIP not in nresult
2399 _ErrorIf(test, constants.CV_ENODENET, node,
2400 "node hasn't returned node master IP reachability data")
2402 if not nresult[constants.NV_MASTERIP]:
2403 if node == self.master_node:
2404 msg = "the master node cannot reach the master IP (not configured?)"
2406 msg = "cannot reach the master IP"
2407 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2409 def _VerifyInstance(self, instance, instanceconfig, node_image,
2411 """Verify an instance.
2413 This function checks to see if the required block devices are
2414 available on the instance's node.
2417 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2418 node_current = instanceconfig.primary_node
2420 node_vol_should = {}
2421 instanceconfig.MapLVsByNode(node_vol_should)
2423 cluster = self.cfg.GetClusterInfo()
2424 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2426 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2427 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2429 for node in node_vol_should:
2430 n_img = node_image[node]
2431 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2432 # ignore missing volumes on offline or broken nodes
2434 for volume in node_vol_should[node]:
2435 test = volume not in n_img.volumes
2436 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2437 "volume %s missing on node %s", volume, node)
2439 if instanceconfig.admin_state == constants.ADMINST_UP:
2440 pri_img = node_image[node_current]
2441 test = instance not in pri_img.instances and not pri_img.offline
2442 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2443 "instance not running on its primary node %s",
2446 diskdata = [(nname, success, status, idx)
2447 for (nname, disks) in diskstatus.items()
2448 for idx, (success, status) in enumerate(disks)]
2450 for nname, success, bdev_status, idx in diskdata:
2451 # the 'ghost node' construction in Exec() ensures that we have a
2453 snode = node_image[nname]
2454 bad_snode = snode.ghost or snode.offline
2455 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2456 not success and not bad_snode,
2457 constants.CV_EINSTANCEFAULTYDISK, instance,
2458 "couldn't retrieve status for disk/%s on %s: %s",
2459 idx, nname, bdev_status)
2460 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2461 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2462 constants.CV_EINSTANCEFAULTYDISK, instance,
2463 "disk/%s on %s is faulty", idx, nname)
2465 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2466 """Verify if there are any unknown volumes in the cluster.
2468 The .os, .swap and backup volumes are ignored. All other volumes are
2469 reported as unknown.
2471 @type reserved: L{ganeti.utils.FieldSet}
2472 @param reserved: a FieldSet of reserved volume names
2475 for node, n_img in node_image.items():
2476 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2477 self.all_node_info[node].group != self.group_uuid):
2478 # skip non-healthy nodes
2480 for volume in n_img.volumes:
2481 test = ((node not in node_vol_should or
2482 volume not in node_vol_should[node]) and
2483 not reserved.Matches(volume))
2484 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2485 "volume %s is unknown", volume)
2487 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2488 """Verify N+1 Memory Resilience.
2490 Check that if one single node dies we can still start all the
2491 instances it was primary for.
2494 cluster_info = self.cfg.GetClusterInfo()
2495 for node, n_img in node_image.items():
2496 # This code checks that every node which is now listed as
2497 # secondary has enough memory to host all instances it is
2498 # supposed to should a single other node in the cluster fail.
2499 # FIXME: not ready for failover to an arbitrary node
2500 # FIXME: does not support file-backed instances
2501 # WARNING: we currently take into account down instances as well
2502 # as up ones, considering that even if they're down someone
2503 # might want to start them even in the event of a node failure.
2504 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2505 # we're skipping nodes marked offline and nodes in other groups from
2506 # the N+1 warning, since most likely we don't have good memory
2507 # infromation from them; we already list instances living on such
2508 # nodes, and that's enough warning
2510 #TODO(dynmem): also consider ballooning out other instances
2511 for prinode, instances in n_img.sbp.items():
2513 for instance in instances:
2514 bep = cluster_info.FillBE(instance_cfg[instance])
2515 if bep[constants.BE_AUTO_BALANCE]:
2516 needed_mem += bep[constants.BE_MINMEM]
2517 test = n_img.mfree < needed_mem
2518 self._ErrorIf(test, constants.CV_ENODEN1, node,
2519 "not enough memory to accomodate instance failovers"
2520 " should node %s fail (%dMiB needed, %dMiB available)",
2521 prinode, needed_mem, n_img.mfree)
2524 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2525 (files_all, files_opt, files_mc, files_vm)):
2526 """Verifies file checksums collected from all nodes.
2528 @param errorif: Callback for reporting errors
2529 @param nodeinfo: List of L{objects.Node} objects
2530 @param master_node: Name of master node
2531 @param all_nvinfo: RPC results
2534 # Define functions determining which nodes to consider for a file
2537 (files_mc, lambda node: (node.master_candidate or
2538 node.name == master_node)),
2539 (files_vm, lambda node: node.vm_capable),
2542 # Build mapping from filename to list of nodes which should have the file
2544 for (files, fn) in files2nodefn:
2546 filenodes = nodeinfo
2548 filenodes = filter(fn, nodeinfo)
2549 nodefiles.update((filename,
2550 frozenset(map(operator.attrgetter("name"), filenodes)))
2551 for filename in files)
2553 assert set(nodefiles) == (files_all | files_mc | files_vm)
2555 fileinfo = dict((filename, {}) for filename in nodefiles)
2556 ignore_nodes = set()
2558 for node in nodeinfo:
2560 ignore_nodes.add(node.name)
2563 nresult = all_nvinfo[node.name]
2565 if nresult.fail_msg or not nresult.payload:
2568 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2570 test = not (node_files and isinstance(node_files, dict))
2571 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2572 "Node did not return file checksum data")
2574 ignore_nodes.add(node.name)
2577 # Build per-checksum mapping from filename to nodes having it
2578 for (filename, checksum) in node_files.items():
2579 assert filename in nodefiles
2580 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2582 for (filename, checksums) in fileinfo.items():
2583 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2585 # Nodes having the file
2586 with_file = frozenset(node_name
2587 for nodes in fileinfo[filename].values()
2588 for node_name in nodes) - ignore_nodes
2590 expected_nodes = nodefiles[filename] - ignore_nodes
2592 # Nodes missing file
2593 missing_file = expected_nodes - with_file
2595 if filename in files_opt:
2597 errorif(missing_file and missing_file != expected_nodes,
2598 constants.CV_ECLUSTERFILECHECK, None,
2599 "File %s is optional, but it must exist on all or no"
2600 " nodes (not found on %s)",
2601 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2603 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2604 "File %s is missing from node(s) %s", filename,
2605 utils.CommaJoin(utils.NiceSort(missing_file)))
2607 # Warn if a node has a file it shouldn't
2608 unexpected = with_file - expected_nodes
2610 constants.CV_ECLUSTERFILECHECK, None,
2611 "File %s should not exist on node(s) %s",
2612 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2614 # See if there are multiple versions of the file
2615 test = len(checksums) > 1
2617 variants = ["variant %s on %s" %
2618 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2619 for (idx, (checksum, nodes)) in
2620 enumerate(sorted(checksums.items()))]
2624 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2625 "File %s found with %s different checksums (%s)",
2626 filename, len(checksums), "; ".join(variants))
2628 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2630 """Verifies and the node DRBD status.
2632 @type ninfo: L{objects.Node}
2633 @param ninfo: the node to check
2634 @param nresult: the remote results for the node
2635 @param instanceinfo: the dict of instances
2636 @param drbd_helper: the configured DRBD usermode helper
2637 @param drbd_map: the DRBD map as returned by
2638 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2642 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2645 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2646 test = (helper_result is None)
2647 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2648 "no drbd usermode helper returned")
2650 status, payload = helper_result
2652 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2653 "drbd usermode helper check unsuccessful: %s", payload)
2654 test = status and (payload != drbd_helper)
2655 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2656 "wrong drbd usermode helper: %s", payload)
2658 # compute the DRBD minors
2660 for minor, instance in drbd_map[node].items():
2661 test = instance not in instanceinfo
2662 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2663 "ghost instance '%s' in temporary DRBD map", instance)
2664 # ghost instance should not be running, but otherwise we
2665 # don't give double warnings (both ghost instance and
2666 # unallocated minor in use)
2668 node_drbd[minor] = (instance, False)
2670 instance = instanceinfo[instance]
2671 node_drbd[minor] = (instance.name,
2672 instance.admin_state == constants.ADMINST_UP)
2674 # and now check them
2675 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2676 test = not isinstance(used_minors, (tuple, list))
2677 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2678 "cannot parse drbd status file: %s", str(used_minors))
2680 # we cannot check drbd status
2683 for minor, (iname, must_exist) in node_drbd.items():
2684 test = minor not in used_minors and must_exist
2685 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2686 "drbd minor %d of instance %s is not active", minor, iname)
2687 for minor in used_minors:
2688 test = minor not in node_drbd
2689 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2690 "unallocated drbd minor %d is in use", minor)
2692 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2693 """Builds the node OS structures.
2695 @type ninfo: L{objects.Node}
2696 @param ninfo: the node to check
2697 @param nresult: the remote results for the node
2698 @param nimg: the node image object
2702 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2704 remote_os = nresult.get(constants.NV_OSLIST, None)
2705 test = (not isinstance(remote_os, list) or
2706 not compat.all(isinstance(v, list) and len(v) == 7
2707 for v in remote_os))
2709 _ErrorIf(test, constants.CV_ENODEOS, node,
2710 "node hasn't returned valid OS data")
2719 for (name, os_path, status, diagnose,
2720 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2722 if name not in os_dict:
2725 # parameters is a list of lists instead of list of tuples due to
2726 # JSON lacking a real tuple type, fix it:
2727 parameters = [tuple(v) for v in parameters]
2728 os_dict[name].append((os_path, status, diagnose,
2729 set(variants), set(parameters), set(api_ver)))
2731 nimg.oslist = os_dict
2733 def _VerifyNodeOS(self, ninfo, nimg, base):
2734 """Verifies the node OS list.
2736 @type ninfo: L{objects.Node}
2737 @param ninfo: the node to check
2738 @param nimg: the node image object
2739 @param base: the 'template' node we match against (e.g. from the master)
2743 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2745 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2747 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2748 for os_name, os_data in nimg.oslist.items():
2749 assert os_data, "Empty OS status for OS %s?!" % os_name
2750 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2751 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2752 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2753 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2754 "OS '%s' has multiple entries (first one shadows the rest): %s",
2755 os_name, utils.CommaJoin([v[0] for v in os_data]))
2756 # comparisons with the 'base' image
2757 test = os_name not in base.oslist
2758 _ErrorIf(test, constants.CV_ENODEOS, node,
2759 "Extra OS %s not present on reference node (%s)",
2763 assert base.oslist[os_name], "Base node has empty OS status?"
2764 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2766 # base OS is invalid, skipping
2768 for kind, a, b in [("API version", f_api, b_api),
2769 ("variants list", f_var, b_var),
2770 ("parameters", beautify_params(f_param),
2771 beautify_params(b_param))]:
2772 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2773 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2774 kind, os_name, base.name,
2775 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2777 # check any missing OSes
2778 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2779 _ErrorIf(missing, constants.CV_ENODEOS, node,
2780 "OSes present on reference node %s but missing on this node: %s",
2781 base.name, utils.CommaJoin(missing))
2783 def _VerifyOob(self, ninfo, nresult):
2784 """Verifies out of band functionality of a node.
2786 @type ninfo: L{objects.Node}
2787 @param ninfo: the node to check
2788 @param nresult: the remote results for the node
2792 # We just have to verify the paths on master and/or master candidates
2793 # as the oob helper is invoked on the master
2794 if ((ninfo.master_candidate or ninfo.master_capable) and
2795 constants.NV_OOB_PATHS in nresult):
2796 for path_result in nresult[constants.NV_OOB_PATHS]:
2797 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2799 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2800 """Verifies and updates the node volume data.
2802 This function will update a L{NodeImage}'s internal structures
2803 with data from the remote call.
2805 @type ninfo: L{objects.Node}
2806 @param ninfo: the node to check
2807 @param nresult: the remote results for the node
2808 @param nimg: the node image object
2809 @param vg_name: the configured VG name
2813 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2815 nimg.lvm_fail = True
2816 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2819 elif isinstance(lvdata, basestring):
2820 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2821 utils.SafeEncode(lvdata))
2822 elif not isinstance(lvdata, dict):
2823 _ErrorIf(True, constants.CV_ENODELVM, node,
2824 "rpc call to node failed (lvlist)")
2826 nimg.volumes = lvdata
2827 nimg.lvm_fail = False
2829 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2830 """Verifies and updates the node instance list.
2832 If the listing was successful, then updates this node's instance
2833 list. Otherwise, it marks the RPC call as failed for the instance
2836 @type ninfo: L{objects.Node}
2837 @param ninfo: the node to check
2838 @param nresult: the remote results for the node
2839 @param nimg: the node image object
2842 idata = nresult.get(constants.NV_INSTANCELIST, None)
2843 test = not isinstance(idata, list)
2844 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2845 "rpc call to node failed (instancelist): %s",
2846 utils.SafeEncode(str(idata)))
2848 nimg.hyp_fail = True
2850 nimg.instances = idata
2852 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2853 """Verifies and computes a node information map
2855 @type ninfo: L{objects.Node}
2856 @param ninfo: the node to check
2857 @param nresult: the remote results for the node
2858 @param nimg: the node image object
2859 @param vg_name: the configured VG name
2863 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2865 # try to read free memory (from the hypervisor)
2866 hv_info = nresult.get(constants.NV_HVINFO, None)
2867 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2868 _ErrorIf(test, constants.CV_ENODEHV, node,
2869 "rpc call to node failed (hvinfo)")
2872 nimg.mfree = int(hv_info["memory_free"])
2873 except (ValueError, TypeError):
2874 _ErrorIf(True, constants.CV_ENODERPC, node,
2875 "node returned invalid nodeinfo, check hypervisor")
2877 # FIXME: devise a free space model for file based instances as well
2878 if vg_name is not None:
2879 test = (constants.NV_VGLIST not in nresult or
2880 vg_name not in nresult[constants.NV_VGLIST])
2881 _ErrorIf(test, constants.CV_ENODELVM, node,
2882 "node didn't return data for the volume group '%s'"
2883 " - it is either missing or broken", vg_name)
2886 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2887 except (ValueError, TypeError):
2888 _ErrorIf(True, constants.CV_ENODERPC, node,
2889 "node returned invalid LVM info, check LVM status")
2891 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2892 """Gets per-disk status information for all instances.
2894 @type nodelist: list of strings
2895 @param nodelist: Node names
2896 @type node_image: dict of (name, L{objects.Node})
2897 @param node_image: Node objects
2898 @type instanceinfo: dict of (name, L{objects.Instance})
2899 @param instanceinfo: Instance objects
2900 @rtype: {instance: {node: [(succes, payload)]}}
2901 @return: a dictionary of per-instance dictionaries with nodes as
2902 keys and disk information as values; the disk information is a
2903 list of tuples (success, payload)
2906 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2909 node_disks_devonly = {}
2910 diskless_instances = set()
2911 diskless = constants.DT_DISKLESS
2913 for nname in nodelist:
2914 node_instances = list(itertools.chain(node_image[nname].pinst,
2915 node_image[nname].sinst))
2916 diskless_instances.update(inst for inst in node_instances
2917 if instanceinfo[inst].disk_template == diskless)
2918 disks = [(inst, disk)
2919 for inst in node_instances
2920 for disk in instanceinfo[inst].disks]
2923 # No need to collect data
2926 node_disks[nname] = disks
2928 # _AnnotateDiskParams makes already copies of the disks
2930 for (inst, dev) in disks:
2931 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2932 self.cfg.SetDiskID(anno_disk, nname)
2933 devonly.append(anno_disk)
2935 node_disks_devonly[nname] = devonly
2937 assert len(node_disks) == len(node_disks_devonly)
2939 # Collect data from all nodes with disks
2940 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2943 assert len(result) == len(node_disks)
2947 for (nname, nres) in result.items():
2948 disks = node_disks[nname]
2951 # No data from this node
2952 data = len(disks) * [(False, "node offline")]
2955 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2956 "while getting disk information: %s", msg)
2958 # No data from this node
2959 data = len(disks) * [(False, msg)]
2962 for idx, i in enumerate(nres.payload):
2963 if isinstance(i, (tuple, list)) and len(i) == 2:
2966 logging.warning("Invalid result from node %s, entry %d: %s",
2968 data.append((False, "Invalid result from the remote node"))
2970 for ((inst, _), status) in zip(disks, data):
2971 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2973 # Add empty entries for diskless instances.
2974 for inst in diskless_instances:
2975 assert inst not in instdisk
2978 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2979 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2980 compat.all(isinstance(s, (tuple, list)) and
2981 len(s) == 2 for s in statuses)
2982 for inst, nnames in instdisk.items()
2983 for nname, statuses in nnames.items())
2984 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2989 def _SshNodeSelector(group_uuid, all_nodes):
2990 """Create endless iterators for all potential SSH check hosts.
2993 nodes = [node for node in all_nodes
2994 if (node.group != group_uuid and
2996 keyfunc = operator.attrgetter("group")
2998 return map(itertools.cycle,
2999 [sorted(map(operator.attrgetter("name"), names))
3000 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3004 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3005 """Choose which nodes should talk to which other nodes.
3007 We will make nodes contact all nodes in their group, and one node from
3010 @warning: This algorithm has a known issue if one node group is much
3011 smaller than others (e.g. just one node). In such a case all other
3012 nodes will talk to the single node.
3015 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3016 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3018 return (online_nodes,
3019 dict((name, sorted([i.next() for i in sel]))
3020 for name in online_nodes))
3022 def BuildHooksEnv(self):
3025 Cluster-Verify hooks just ran in the post phase and their failure makes
3026 the output be logged in the verify output and the verification to fail.
3030 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3033 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3034 for node in self.my_node_info.values())
3038 def BuildHooksNodes(self):
3039 """Build hooks nodes.
3042 return ([], self.my_node_names)
3044 def Exec(self, feedback_fn):
3045 """Verify integrity of the node group, performing various test on nodes.
3048 # This method has too many local variables. pylint: disable=R0914
3049 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3051 if not self.my_node_names:
3053 feedback_fn("* Empty node group, skipping verification")
3057 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3058 verbose = self.op.verbose
3059 self._feedback_fn = feedback_fn
3061 vg_name = self.cfg.GetVGName()
3062 drbd_helper = self.cfg.GetDRBDHelper()
3063 cluster = self.cfg.GetClusterInfo()
3064 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3065 hypervisors = cluster.enabled_hypervisors
3066 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3068 i_non_redundant = [] # Non redundant instances
3069 i_non_a_balanced = [] # Non auto-balanced instances
3070 i_offline = 0 # Count of offline instances
3071 n_offline = 0 # Count of offline nodes
3072 n_drained = 0 # Count of nodes being drained
3073 node_vol_should = {}
3075 # FIXME: verify OS list
3078 filemap = _ComputeAncillaryFiles(cluster, False)
3080 # do local checksums
3081 master_node = self.master_node = self.cfg.GetMasterNode()
3082 master_ip = self.cfg.GetMasterIP()
3084 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3087 if self.cfg.GetUseExternalMipScript():
3088 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3090 node_verify_param = {
3091 constants.NV_FILELIST:
3092 utils.UniqueSequence(filename
3093 for files in filemap
3094 for filename in files),
3095 constants.NV_NODELIST:
3096 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3097 self.all_node_info.values()),
3098 constants.NV_HYPERVISOR: hypervisors,
3099 constants.NV_HVPARAMS:
3100 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3101 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3102 for node in node_data_list
3103 if not node.offline],
3104 constants.NV_INSTANCELIST: hypervisors,
3105 constants.NV_VERSION: None,
3106 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3107 constants.NV_NODESETUP: None,
3108 constants.NV_TIME: None,
3109 constants.NV_MASTERIP: (master_node, master_ip),
3110 constants.NV_OSLIST: None,
3111 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3112 constants.NV_USERSCRIPTS: user_scripts,
3115 if vg_name is not None:
3116 node_verify_param[constants.NV_VGLIST] = None
3117 node_verify_param[constants.NV_LVLIST] = vg_name
3118 node_verify_param[constants.NV_PVLIST] = [vg_name]
3121 node_verify_param[constants.NV_DRBDLIST] = None
3122 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3125 # FIXME: this needs to be changed per node-group, not cluster-wide
3127 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3128 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3129 bridges.add(default_nicpp[constants.NIC_LINK])
3130 for instance in self.my_inst_info.values():
3131 for nic in instance.nics:
3132 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3133 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3134 bridges.add(full_nic[constants.NIC_LINK])
3137 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3139 # Build our expected cluster state
3140 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3142 vm_capable=node.vm_capable))
3143 for node in node_data_list)
3147 for node in self.all_node_info.values():
3148 path = _SupportsOob(self.cfg, node)
3149 if path and path not in oob_paths:
3150 oob_paths.append(path)
3153 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3155 for instance in self.my_inst_names:
3156 inst_config = self.my_inst_info[instance]
3157 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3160 for nname in inst_config.all_nodes:
3161 if nname not in node_image:
3162 gnode = self.NodeImage(name=nname)
3163 gnode.ghost = (nname not in self.all_node_info)
3164 node_image[nname] = gnode
3166 inst_config.MapLVsByNode(node_vol_should)
3168 pnode = inst_config.primary_node
3169 node_image[pnode].pinst.append(instance)
3171 for snode in inst_config.secondary_nodes:
3172 nimg = node_image[snode]
3173 nimg.sinst.append(instance)
3174 if pnode not in nimg.sbp:
3175 nimg.sbp[pnode] = []
3176 nimg.sbp[pnode].append(instance)
3178 # At this point, we have the in-memory data structures complete,
3179 # except for the runtime information, which we'll gather next
3181 # Due to the way our RPC system works, exact response times cannot be
3182 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3183 # time before and after executing the request, we can at least have a time
3185 nvinfo_starttime = time.time()
3186 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3188 self.cfg.GetClusterName())
3189 nvinfo_endtime = time.time()
3191 if self.extra_lv_nodes and vg_name is not None:
3193 self.rpc.call_node_verify(self.extra_lv_nodes,
3194 {constants.NV_LVLIST: vg_name},
3195 self.cfg.GetClusterName())
3197 extra_lv_nvinfo = {}
3199 all_drbd_map = self.cfg.ComputeDRBDMap()
3201 feedback_fn("* Gathering disk information (%s nodes)" %
3202 len(self.my_node_names))
3203 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3206 feedback_fn("* Verifying configuration file consistency")
3208 # If not all nodes are being checked, we need to make sure the master node
3209 # and a non-checked vm_capable node are in the list.
3210 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3212 vf_nvinfo = all_nvinfo.copy()
3213 vf_node_info = list(self.my_node_info.values())
3214 additional_nodes = []
3215 if master_node not in self.my_node_info:
3216 additional_nodes.append(master_node)
3217 vf_node_info.append(self.all_node_info[master_node])
3218 # Add the first vm_capable node we find which is not included,
3219 # excluding the master node (which we already have)
3220 for node in absent_nodes:
3221 nodeinfo = self.all_node_info[node]
3222 if (nodeinfo.vm_capable and not nodeinfo.offline and
3223 node != master_node):
3224 additional_nodes.append(node)
3225 vf_node_info.append(self.all_node_info[node])
3227 key = constants.NV_FILELIST
3228 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3229 {key: node_verify_param[key]},
3230 self.cfg.GetClusterName()))
3232 vf_nvinfo = all_nvinfo
3233 vf_node_info = self.my_node_info.values()
3235 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3237 feedback_fn("* Verifying node status")
3241 for node_i in node_data_list:
3243 nimg = node_image[node]
3247 feedback_fn("* Skipping offline node %s" % (node,))
3251 if node == master_node:
3253 elif node_i.master_candidate:
3254 ntype = "master candidate"
3255 elif node_i.drained:
3261 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3263 msg = all_nvinfo[node].fail_msg
3264 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3267 nimg.rpc_fail = True
3270 nresult = all_nvinfo[node].payload
3272 nimg.call_ok = self._VerifyNode(node_i, nresult)
3273 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3274 self._VerifyNodeNetwork(node_i, nresult)
3275 self._VerifyNodeUserScripts(node_i, nresult)
3276 self._VerifyOob(node_i, nresult)
3279 self._VerifyNodeLVM(node_i, nresult, vg_name)
3280 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3283 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3284 self._UpdateNodeInstances(node_i, nresult, nimg)
3285 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3286 self._UpdateNodeOS(node_i, nresult, nimg)
3288 if not nimg.os_fail:
3289 if refos_img is None:
3291 self._VerifyNodeOS(node_i, nimg, refos_img)
3292 self._VerifyNodeBridges(node_i, nresult, bridges)
3294 # Check whether all running instancies are primary for the node. (This
3295 # can no longer be done from _VerifyInstance below, since some of the
3296 # wrong instances could be from other node groups.)
3297 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3299 for inst in non_primary_inst:
3300 test = inst in self.all_inst_info
3301 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3302 "instance should not run on node %s", node_i.name)
3303 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3304 "node is running unknown instance %s", inst)
3306 for node, result in extra_lv_nvinfo.items():
3307 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3308 node_image[node], vg_name)
3310 feedback_fn("* Verifying instance status")
3311 for instance in self.my_inst_names:
3313 feedback_fn("* Verifying instance %s" % instance)
3314 inst_config = self.my_inst_info[instance]
3315 self._VerifyInstance(instance, inst_config, node_image,
3317 inst_nodes_offline = []
3319 pnode = inst_config.primary_node
3320 pnode_img = node_image[pnode]
3321 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3322 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3323 " primary node failed", instance)
3325 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3327 constants.CV_EINSTANCEBADNODE, instance,
3328 "instance is marked as running and lives on offline node %s",
3329 inst_config.primary_node)
3331 # If the instance is non-redundant we cannot survive losing its primary
3332 # node, so we are not N+1 compliant. On the other hand we have no disk
3333 # templates with more than one secondary so that situation is not well
3335 # FIXME: does not support file-backed instances
3336 if not inst_config.secondary_nodes:
3337 i_non_redundant.append(instance)
3339 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3340 constants.CV_EINSTANCELAYOUT,
3341 instance, "instance has multiple secondary nodes: %s",
3342 utils.CommaJoin(inst_config.secondary_nodes),
3343 code=self.ETYPE_WARNING)
3345 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3346 pnode = inst_config.primary_node
3347 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3348 instance_groups = {}
3350 for node in instance_nodes:
3351 instance_groups.setdefault(self.all_node_info[node].group,
3355 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3356 # Sort so that we always list the primary node first.
3357 for group, nodes in sorted(instance_groups.items(),
3358 key=lambda (_, nodes): pnode in nodes,
3361 self._ErrorIf(len(instance_groups) > 1,
3362 constants.CV_EINSTANCESPLITGROUPS,
3363 instance, "instance has primary and secondary nodes in"
3364 " different groups: %s", utils.CommaJoin(pretty_list),
3365 code=self.ETYPE_WARNING)
3367 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3368 i_non_a_balanced.append(instance)
3370 for snode in inst_config.secondary_nodes:
3371 s_img = node_image[snode]
3372 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3373 snode, "instance %s, connection to secondary node failed",
3377 inst_nodes_offline.append(snode)
3379 # warn that the instance lives on offline nodes
3380 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3381 "instance has offline secondary node(s) %s",
3382 utils.CommaJoin(inst_nodes_offline))
3383 # ... or ghost/non-vm_capable nodes
3384 for node in inst_config.all_nodes:
3385 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3386 instance, "instance lives on ghost node %s", node)
3387 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3388 instance, "instance lives on non-vm_capable node %s", node)
3390 feedback_fn("* Verifying orphan volumes")
3391 reserved = utils.FieldSet(*cluster.reserved_lvs)
3393 # We will get spurious "unknown volume" warnings if any node of this group
3394 # is secondary for an instance whose primary is in another group. To avoid
3395 # them, we find these instances and add their volumes to node_vol_should.
3396 for inst in self.all_inst_info.values():
3397 for secondary in inst.secondary_nodes:
3398 if (secondary in self.my_node_info
3399 and inst.name not in self.my_inst_info):
3400 inst.MapLVsByNode(node_vol_should)
3403 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3405 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3406 feedback_fn("* Verifying N+1 Memory redundancy")
3407 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3409 feedback_fn("* Other Notes")
3411 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3412 % len(i_non_redundant))
3414 if i_non_a_balanced:
3415 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3416 % len(i_non_a_balanced))
3419 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3422 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3425 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3429 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3430 """Analyze the post-hooks' result
3432 This method analyses the hook result, handles it, and sends some
3433 nicely-formatted feedback back to the user.
3435 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3436 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3437 @param hooks_results: the results of the multi-node hooks rpc call
3438 @param feedback_fn: function used send feedback back to the caller
3439 @param lu_result: previous Exec result
3440 @return: the new Exec result, based on the previous result
3444 # We only really run POST phase hooks, only for non-empty groups,
3445 # and are only interested in their results
3446 if not self.my_node_names:
3449 elif phase == constants.HOOKS_PHASE_POST:
3450 # Used to change hooks' output to proper indentation
3451 feedback_fn("* Hooks Results")
3452 assert hooks_results, "invalid result from hooks"
3454 for node_name in hooks_results:
3455 res = hooks_results[node_name]
3457 test = msg and not res.offline
3458 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3459 "Communication failure in hooks execution: %s", msg)
3460 if res.offline or msg:
3461 # No need to investigate payload if node is offline or gave
3464 for script, hkr, output in res.payload:
3465 test = hkr == constants.HKR_FAIL
3466 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3467 "Script %s failed, output:", script)
3469 output = self._HOOKS_INDENT_RE.sub(" ", output)
3470 feedback_fn("%s" % output)
3476 class LUClusterVerifyDisks(NoHooksLU):
3477 """Verifies the cluster disks status.
3482 def ExpandNames(self):
3483 self.share_locks = _ShareAll()
3484 self.needed_locks = {
3485 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3488 def Exec(self, feedback_fn):
3489 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3491 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3492 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3493 for group in group_names])
3496 class LUGroupVerifyDisks(NoHooksLU):
3497 """Verifies the status of all disks in a node group.
3502 def ExpandNames(self):
3503 # Raises errors.OpPrereqError on its own if group can't be found
3504 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3506 self.share_locks = _ShareAll()
3507 self.needed_locks = {
3508 locking.LEVEL_INSTANCE: [],
3509 locking.LEVEL_NODEGROUP: [],
3510 locking.LEVEL_NODE: [],
3513 def DeclareLocks(self, level):
3514 if level == locking.LEVEL_INSTANCE:
3515 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3517 # Lock instances optimistically, needs verification once node and group
3518 # locks have been acquired
3519 self.needed_locks[locking.LEVEL_INSTANCE] = \
3520 self.cfg.GetNodeGroupInstances(self.group_uuid)
3522 elif level == locking.LEVEL_NODEGROUP:
3523 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3525 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3526 set([self.group_uuid] +
3527 # Lock all groups used by instances optimistically; this requires
3528 # going via the node before it's locked, requiring verification
3531 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3532 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3534 elif level == locking.LEVEL_NODE:
3535 # This will only lock the nodes in the group to be verified which contain
3537 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3538 self._LockInstancesNodes()
3540 # Lock all nodes in group to be verified
3541 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3542 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3543 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3545 def CheckPrereq(self):
3546 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3547 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3548 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3550 assert self.group_uuid in owned_groups
3552 # Check if locked instances are still correct
3553 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3555 # Get instance information
3556 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3558 # Check if node groups for locked instances are still correct
3559 _CheckInstancesNodeGroups(self.cfg, self.instances,
3560 owned_groups, owned_nodes, self.group_uuid)
3562 def Exec(self, feedback_fn):
3563 """Verify integrity of cluster disks.
3565 @rtype: tuple of three items
3566 @return: a tuple of (dict of node-to-node_error, list of instances
3567 which need activate-disks, dict of instance: (node, volume) for
3572 res_instances = set()
3575 nv_dict = _MapInstanceDisksToNodes(
3576 [inst for inst in self.instances.values()
3577 if inst.admin_state == constants.ADMINST_UP])
3580 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3581 set(self.cfg.GetVmCapableNodeList()))
3583 node_lvs = self.rpc.call_lv_list(nodes, [])
3585 for (node, node_res) in node_lvs.items():
3586 if node_res.offline:
3589 msg = node_res.fail_msg
3591 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3592 res_nodes[node] = msg
3595 for lv_name, (_, _, lv_online) in node_res.payload.items():
3596 inst = nv_dict.pop((node, lv_name), None)
3597 if not (lv_online or inst is None):
3598 res_instances.add(inst)
3600 # any leftover items in nv_dict are missing LVs, let's arrange the data
3602 for key, inst in nv_dict.iteritems():
3603 res_missing.setdefault(inst, []).append(list(key))
3605 return (res_nodes, list(res_instances), res_missing)
3608 class LUClusterRepairDiskSizes(NoHooksLU):
3609 """Verifies the cluster disks sizes.
3614 def ExpandNames(self):
3615 if self.op.instances:
3616 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3617 self.needed_locks = {
3618 locking.LEVEL_NODE_RES: [],
3619 locking.LEVEL_INSTANCE: self.wanted_names,
3621 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3623 self.wanted_names = None
3624 self.needed_locks = {
3625 locking.LEVEL_NODE_RES: locking.ALL_SET,
3626 locking.LEVEL_INSTANCE: locking.ALL_SET,
3628 self.share_locks = {
3629 locking.LEVEL_NODE_RES: 1,
3630 locking.LEVEL_INSTANCE: 0,
3633 def DeclareLocks(self, level):
3634 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3635 self._LockInstancesNodes(primary_only=True, level=level)
3637 def CheckPrereq(self):
3638 """Check prerequisites.
3640 This only checks the optional instance list against the existing names.
3643 if self.wanted_names is None:
3644 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3646 self.wanted_instances = \
3647 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3649 def _EnsureChildSizes(self, disk):
3650 """Ensure children of the disk have the needed disk size.
3652 This is valid mainly for DRBD8 and fixes an issue where the
3653 children have smaller disk size.
3655 @param disk: an L{ganeti.objects.Disk} object
3658 if disk.dev_type == constants.LD_DRBD8:
3659 assert disk.children, "Empty children for DRBD8?"
3660 fchild = disk.children[0]
3661 mismatch = fchild.size < disk.size
3663 self.LogInfo("Child disk has size %d, parent %d, fixing",
3664 fchild.size, disk.size)
3665 fchild.size = disk.size
3667 # and we recurse on this child only, not on the metadev
3668 return self._EnsureChildSizes(fchild) or mismatch
3672 def Exec(self, feedback_fn):
3673 """Verify the size of cluster disks.
3676 # TODO: check child disks too
3677 # TODO: check differences in size between primary/secondary nodes
3679 for instance in self.wanted_instances:
3680 pnode = instance.primary_node
3681 if pnode not in per_node_disks:
3682 per_node_disks[pnode] = []
3683 for idx, disk in enumerate(instance.disks):
3684 per_node_disks[pnode].append((instance, idx, disk))
3686 assert not (frozenset(per_node_disks.keys()) -
3687 self.owned_locks(locking.LEVEL_NODE_RES)), \
3688 "Not owning correct locks"
3689 assert not self.owned_locks(locking.LEVEL_NODE)
3692 for node, dskl in per_node_disks.items():
3693 newl = [v[2].Copy() for v in dskl]
3695 self.cfg.SetDiskID(dsk, node)
3696 result = self.rpc.call_blockdev_getsize(node, newl)
3698 self.LogWarning("Failure in blockdev_getsize call to node"
3699 " %s, ignoring", node)
3701 if len(result.payload) != len(dskl):
3702 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3703 " result.payload=%s", node, len(dskl), result.payload)
3704 self.LogWarning("Invalid result from node %s, ignoring node results",
3707 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3709 self.LogWarning("Disk %d of instance %s did not return size"
3710 " information, ignoring", idx, instance.name)
3712 if not isinstance(size, (int, long)):
3713 self.LogWarning("Disk %d of instance %s did not return valid"
3714 " size information, ignoring", idx, instance.name)
3717 if size != disk.size:
3718 self.LogInfo("Disk %d of instance %s has mismatched size,"
3719 " correcting: recorded %d, actual %d", idx,
3720 instance.name, disk.size, size)
3722 self.cfg.Update(instance, feedback_fn)
3723 changed.append((instance.name, idx, size))
3724 if self._EnsureChildSizes(disk):
3725 self.cfg.Update(instance, feedback_fn)
3726 changed.append((instance.name, idx, disk.size))
3730 class LUClusterRename(LogicalUnit):
3731 """Rename the cluster.
3734 HPATH = "cluster-rename"
3735 HTYPE = constants.HTYPE_CLUSTER
3737 def BuildHooksEnv(self):
3742 "OP_TARGET": self.cfg.GetClusterName(),
3743 "NEW_NAME": self.op.name,
3746 def BuildHooksNodes(self):
3747 """Build hooks nodes.
3750 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3752 def CheckPrereq(self):
3753 """Verify that the passed name is a valid one.
3756 hostname = netutils.GetHostname(name=self.op.name,
3757 family=self.cfg.GetPrimaryIPFamily())
3759 new_name = hostname.name
3760 self.ip = new_ip = hostname.ip
3761 old_name = self.cfg.GetClusterName()
3762 old_ip = self.cfg.GetMasterIP()
3763 if new_name == old_name and new_ip == old_ip:
3764 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3765 " cluster has changed",
3767 if new_ip != old_ip:
3768 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3769 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3770 " reachable on the network" %
3771 new_ip, errors.ECODE_NOTUNIQUE)
3773 self.op.name = new_name
3775 def Exec(self, feedback_fn):
3776 """Rename the cluster.
3779 clustername = self.op.name
3782 # shutdown the master IP
3783 master_params = self.cfg.GetMasterNetworkParameters()
3784 ems = self.cfg.GetUseExternalMipScript()
3785 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3787 result.Raise("Could not disable the master role")
3790 cluster = self.cfg.GetClusterInfo()
3791 cluster.cluster_name = clustername
3792 cluster.master_ip = new_ip
3793 self.cfg.Update(cluster, feedback_fn)
3795 # update the known hosts file
3796 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3797 node_list = self.cfg.GetOnlineNodeList()
3799 node_list.remove(master_params.name)
3802 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3804 master_params.ip = new_ip
3805 result = self.rpc.call_node_activate_master_ip(master_params.name,
3807 msg = result.fail_msg
3809 self.LogWarning("Could not re-enable the master role on"
3810 " the master, please restart manually: %s", msg)
3815 def _ValidateNetmask(cfg, netmask):
3816 """Checks if a netmask is valid.
3818 @type cfg: L{config.ConfigWriter}
3819 @param cfg: The cluster configuration
3821 @param netmask: the netmask to be verified
3822 @raise errors.OpPrereqError: if the validation fails
3825 ip_family = cfg.GetPrimaryIPFamily()
3827 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3828 except errors.ProgrammerError:
3829 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3830 ip_family, errors.ECODE_INVAL)
3831 if not ipcls.ValidateNetmask(netmask):
3832 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3833 (netmask), errors.ECODE_INVAL)
3836 class LUClusterSetParams(LogicalUnit):
3837 """Change the parameters of the cluster.
3840 HPATH = "cluster-modify"
3841 HTYPE = constants.HTYPE_CLUSTER
3844 def CheckArguments(self):
3848 if self.op.uid_pool:
3849 uidpool.CheckUidPool(self.op.uid_pool)
3851 if self.op.add_uids:
3852 uidpool.CheckUidPool(self.op.add_uids)
3854 if self.op.remove_uids:
3855 uidpool.CheckUidPool(self.op.remove_uids)
3857 if self.op.master_netmask is not None:
3858 _ValidateNetmask(self.cfg, self.op.master_netmask)
3860 if self.op.diskparams:
3861 for dt_params in self.op.diskparams.values():
3862 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3864 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3865 except errors.OpPrereqError, err:
3866 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3869 def ExpandNames(self):
3870 # FIXME: in the future maybe other cluster params won't require checking on
3871 # all nodes to be modified.
3872 self.needed_locks = {
3873 locking.LEVEL_NODE: locking.ALL_SET,
3874 locking.LEVEL_INSTANCE: locking.ALL_SET,
3875 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3877 self.share_locks = {
3878 locking.LEVEL_NODE: 1,
3879 locking.LEVEL_INSTANCE: 1,
3880 locking.LEVEL_NODEGROUP: 1,
3883 def BuildHooksEnv(self):
3888 "OP_TARGET": self.cfg.GetClusterName(),
3889 "NEW_VG_NAME": self.op.vg_name,
3892 def BuildHooksNodes(self):
3893 """Build hooks nodes.
3896 mn = self.cfg.GetMasterNode()
3899 def CheckPrereq(self):
3900 """Check prerequisites.
3902 This checks whether the given params don't conflict and
3903 if the given volume group is valid.
3906 if self.op.vg_name is not None and not self.op.vg_name:
3907 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3908 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3909 " instances exist", errors.ECODE_INVAL)
3911 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3912 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3913 raise errors.OpPrereqError("Cannot disable drbd helper while"
3914 " drbd-based instances exist",
3917 node_list = self.owned_locks(locking.LEVEL_NODE)
3919 # if vg_name not None, checks given volume group on all nodes
3921 vglist = self.rpc.call_vg_list(node_list)
3922 for node in node_list:
3923 msg = vglist[node].fail_msg
3925 # ignoring down node
3926 self.LogWarning("Error while gathering data on node %s"
3927 " (ignoring node): %s", node, msg)
3929 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3931 constants.MIN_VG_SIZE)
3933 raise errors.OpPrereqError("Error on node '%s': %s" %
3934 (node, vgstatus), errors.ECODE_ENVIRON)
3936 if self.op.drbd_helper:
3937 # checks given drbd helper on all nodes
3938 helpers = self.rpc.call_drbd_helper(node_list)
3939 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3941 self.LogInfo("Not checking drbd helper on offline node %s", node)
3943 msg = helpers[node].fail_msg
3945 raise errors.OpPrereqError("Error checking drbd helper on node"
3946 " '%s': %s" % (node, msg),
3947 errors.ECODE_ENVIRON)
3948 node_helper = helpers[node].payload
3949 if node_helper != self.op.drbd_helper:
3950 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3951 (node, node_helper), errors.ECODE_ENVIRON)
3953 self.cluster = cluster = self.cfg.GetClusterInfo()
3954 # validate params changes
3955 if self.op.beparams:
3956 objects.UpgradeBeParams(self.op.beparams)
3957 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3958 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3960 if self.op.ndparams:
3961 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3962 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3964 # TODO: we need a more general way to handle resetting
3965 # cluster-level parameters to default values
3966 if self.new_ndparams["oob_program"] == "":
3967 self.new_ndparams["oob_program"] = \
3968 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3970 if self.op.hv_state:
3971 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3972 self.cluster.hv_state_static)
3973 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3974 for hv, values in new_hv_state.items())
3976 if self.op.disk_state:
3977 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3978 self.cluster.disk_state_static)
3979 self.new_disk_state = \
3980 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3981 for name, values in svalues.items()))
3982 for storage, svalues in new_disk_state.items())
3985 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3988 all_instances = self.cfg.GetAllInstancesInfo().values()
3990 for group in self.cfg.GetAllNodeGroupsInfo().values():
3991 instances = frozenset([inst for inst in all_instances
3992 if compat.any(node in group.members
3993 for node in inst.all_nodes)])
3994 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3995 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3996 new = _ComputeNewInstanceViolations(ipol,
3997 new_ipolicy, instances)
3999 violations.update(new)
4002 self.LogWarning("After the ipolicy change the following instances"
4003 " violate them: %s",
4004 utils.CommaJoin(utils.NiceSort(violations)))
4006 if self.op.nicparams:
4007 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4008 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4009 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4012 # check all instances for consistency
4013 for instance in self.cfg.GetAllInstancesInfo().values():
4014 for nic_idx, nic in enumerate(instance.nics):
4015 params_copy = copy.deepcopy(nic.nicparams)
4016 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4018 # check parameter syntax
4020 objects.NIC.CheckParameterSyntax(params_filled)
4021 except errors.ConfigurationError, err:
4022 nic_errors.append("Instance %s, nic/%d: %s" %
4023 (instance.name, nic_idx, err))
4025 # if we're moving instances to routed, check that they have an ip
4026 target_mode = params_filled[constants.NIC_MODE]
4027 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4028 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4029 " address" % (instance.name, nic_idx))
4031 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4032 "\n".join(nic_errors), errors.ECODE_INVAL)
4034 # hypervisor list/parameters
4035 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4036 if self.op.hvparams:
4037 for hv_name, hv_dict in self.op.hvparams.items():
4038 if hv_name not in self.new_hvparams:
4039 self.new_hvparams[hv_name] = hv_dict
4041 self.new_hvparams[hv_name].update(hv_dict)
4043 # disk template parameters
4044 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4045 if self.op.diskparams:
4046 for dt_name, dt_params in self.op.diskparams.items():
4047 if dt_name not in self.op.diskparams:
4048 self.new_diskparams[dt_name] = dt_params
4050 self.new_diskparams[dt_name].update(dt_params)
4052 # os hypervisor parameters
4053 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4055 for os_name, hvs in self.op.os_hvp.items():
4056 if os_name not in self.new_os_hvp:
4057 self.new_os_hvp[os_name] = hvs
4059 for hv_name, hv_dict in hvs.items():
4060 if hv_name not in self.new_os_hvp[os_name]:
4061 self.new_os_hvp[os_name][hv_name] = hv_dict
4063 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4066 self.new_osp = objects.FillDict(cluster.osparams, {})
4067 if self.op.osparams:
4068 for os_name, osp in self.op.osparams.items():
4069 if os_name not in self.new_osp:
4070 self.new_osp[os_name] = {}
4072 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4075 if not self.new_osp[os_name]:
4076 # we removed all parameters
4077 del self.new_osp[os_name]
4079 # check the parameter validity (remote check)
4080 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4081 os_name, self.new_osp[os_name])
4083 # changes to the hypervisor list
4084 if self.op.enabled_hypervisors is not None:
4085 self.hv_list = self.op.enabled_hypervisors
4086 for hv in self.hv_list:
4087 # if the hypervisor doesn't already exist in the cluster
4088 # hvparams, we initialize it to empty, and then (in both
4089 # cases) we make sure to fill the defaults, as we might not
4090 # have a complete defaults list if the hypervisor wasn't
4092 if hv not in new_hvp:
4094 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4095 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4097 self.hv_list = cluster.enabled_hypervisors
4099 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4100 # either the enabled list has changed, or the parameters have, validate
4101 for hv_name, hv_params in self.new_hvparams.items():
4102 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4103 (self.op.enabled_hypervisors and
4104 hv_name in self.op.enabled_hypervisors)):
4105 # either this is a new hypervisor, or its parameters have changed
4106 hv_class = hypervisor.GetHypervisor(hv_name)
4107 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4108 hv_class.CheckParameterSyntax(hv_params)
4109 _CheckHVParams(self, node_list, hv_name, hv_params)
4112 # no need to check any newly-enabled hypervisors, since the
4113 # defaults have already been checked in the above code-block
4114 for os_name, os_hvp in self.new_os_hvp.items():
4115 for hv_name, hv_params in os_hvp.items():
4116 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4117 # we need to fill in the new os_hvp on top of the actual hv_p
4118 cluster_defaults = self.new_hvparams.get(hv_name, {})
4119 new_osp = objects.FillDict(cluster_defaults, hv_params)
4120 hv_class = hypervisor.GetHypervisor(hv_name)
4121 hv_class.CheckParameterSyntax(new_osp)
4122 _CheckHVParams(self, node_list, hv_name, new_osp)
4124 if self.op.default_iallocator:
4125 alloc_script = utils.FindFile(self.op.default_iallocator,
4126 constants.IALLOCATOR_SEARCH_PATH,
4128 if alloc_script is None:
4129 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4130 " specified" % self.op.default_iallocator,
4133 def Exec(self, feedback_fn):
4134 """Change the parameters of the cluster.
4137 if self.op.vg_name is not None:
4138 new_volume = self.op.vg_name
4141 if new_volume != self.cfg.GetVGName():
4142 self.cfg.SetVGName(new_volume)
4144 feedback_fn("Cluster LVM configuration already in desired"
4145 " state, not changing")
4146 if self.op.drbd_helper is not None:
4147 new_helper = self.op.drbd_helper
4150 if new_helper != self.cfg.GetDRBDHelper():
4151 self.cfg.SetDRBDHelper(new_helper)
4153 feedback_fn("Cluster DRBD helper already in desired state,"
4155 if self.op.hvparams:
4156 self.cluster.hvparams = self.new_hvparams
4158 self.cluster.os_hvp = self.new_os_hvp
4159 if self.op.enabled_hypervisors is not None:
4160 self.cluster.hvparams = self.new_hvparams
4161 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4162 if self.op.beparams:
4163 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4164 if self.op.nicparams:
4165 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4167 self.cluster.ipolicy = self.new_ipolicy
4168 if self.op.osparams:
4169 self.cluster.osparams = self.new_osp
4170 if self.op.ndparams:
4171 self.cluster.ndparams = self.new_ndparams
4172 if self.op.diskparams:
4173 self.cluster.diskparams = self.new_diskparams
4174 if self.op.hv_state:
4175 self.cluster.hv_state_static = self.new_hv_state
4176 if self.op.disk_state:
4177 self.cluster.disk_state_static = self.new_disk_state
4179 if self.op.candidate_pool_size is not None:
4180 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4181 # we need to update the pool size here, otherwise the save will fail
4182 _AdjustCandidatePool(self, [])
4184 if self.op.maintain_node_health is not None:
4185 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4186 feedback_fn("Note: CONFD was disabled at build time, node health"
4187 " maintenance is not useful (still enabling it)")
4188 self.cluster.maintain_node_health = self.op.maintain_node_health
4190 if self.op.prealloc_wipe_disks is not None:
4191 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4193 if self.op.add_uids is not None:
4194 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4196 if self.op.remove_uids is not None:
4197 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4199 if self.op.uid_pool is not None:
4200 self.cluster.uid_pool = self.op.uid_pool
4202 if self.op.default_iallocator is not None:
4203 self.cluster.default_iallocator = self.op.default_iallocator
4205 if self.op.reserved_lvs is not None:
4206 self.cluster.reserved_lvs = self.op.reserved_lvs
4208 if self.op.use_external_mip_script is not None:
4209 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4211 def helper_os(aname, mods, desc):
4213 lst = getattr(self.cluster, aname)
4214 for key, val in mods:
4215 if key == constants.DDM_ADD:
4217 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4220 elif key == constants.DDM_REMOVE:
4224 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4226 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4228 if self.op.hidden_os:
4229 helper_os("hidden_os", self.op.hidden_os, "hidden")
4231 if self.op.blacklisted_os:
4232 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4234 if self.op.master_netdev:
4235 master_params = self.cfg.GetMasterNetworkParameters()
4236 ems = self.cfg.GetUseExternalMipScript()
4237 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4238 self.cluster.master_netdev)
4239 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4241 result.Raise("Could not disable the master ip")
4242 feedback_fn("Changing master_netdev from %s to %s" %
4243 (master_params.netdev, self.op.master_netdev))
4244 self.cluster.master_netdev = self.op.master_netdev
4246 if self.op.master_netmask:
4247 master_params = self.cfg.GetMasterNetworkParameters()
4248 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4249 result = self.rpc.call_node_change_master_netmask(master_params.name,
4250 master_params.netmask,
4251 self.op.master_netmask,
4253 master_params.netdev)
4255 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4258 self.cluster.master_netmask = self.op.master_netmask
4260 self.cfg.Update(self.cluster, feedback_fn)
4262 if self.op.master_netdev:
4263 master_params = self.cfg.GetMasterNetworkParameters()
4264 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4265 self.op.master_netdev)
4266 ems = self.cfg.GetUseExternalMipScript()
4267 result = self.rpc.call_node_activate_master_ip(master_params.name,
4270 self.LogWarning("Could not re-enable the master ip on"
4271 " the master, please restart manually: %s",
4275 def _UploadHelper(lu, nodes, fname):
4276 """Helper for uploading a file and showing warnings.
4279 if os.path.exists(fname):
4280 result = lu.rpc.call_upload_file(nodes, fname)
4281 for to_node, to_result in result.items():
4282 msg = to_result.fail_msg
4284 msg = ("Copy of file %s to node %s failed: %s" %
4285 (fname, to_node, msg))
4286 lu.proc.LogWarning(msg)
4289 def _ComputeAncillaryFiles(cluster, redist):
4290 """Compute files external to Ganeti which need to be consistent.
4292 @type redist: boolean
4293 @param redist: Whether to include files which need to be redistributed
4296 # Compute files for all nodes
4298 pathutils.SSH_KNOWN_HOSTS_FILE,
4299 pathutils.CONFD_HMAC_KEY,
4300 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4301 pathutils.SPICE_CERT_FILE,
4302 pathutils.SPICE_CACERT_FILE,
4303 pathutils.RAPI_USERS_FILE,
4307 # we need to ship at least the RAPI certificate
4308 files_all.add(pathutils.RAPI_CERT_FILE)
4310 files_all.update(pathutils.ALL_CERT_FILES)
4311 files_all.update(ssconf.SimpleStore().GetFileList())
4313 if cluster.modify_etc_hosts:
4314 files_all.add(pathutils.ETC_HOSTS)
4316 if cluster.use_external_mip_script:
4317 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4319 # Files which are optional, these must:
4320 # - be present in one other category as well
4321 # - either exist or not exist on all nodes of that category (mc, vm all)
4323 pathutils.RAPI_USERS_FILE,
4326 # Files which should only be on master candidates
4330 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4334 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4335 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4336 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4338 # Files which should only be on VM-capable nodes
4341 for hv_name in cluster.enabled_hypervisors
4342 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4346 for hv_name in cluster.enabled_hypervisors
4347 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4349 # Filenames in each category must be unique
4350 all_files_set = files_all | files_mc | files_vm
4351 assert (len(all_files_set) ==
4352 sum(map(len, [files_all, files_mc, files_vm]))), \
4353 "Found file listed in more than one file list"
4355 # Optional files must be present in one other category
4356 assert all_files_set.issuperset(files_opt), \
4357 "Optional file not in a different required list"
4359 # This one file should never ever be re-distributed via RPC
4360 assert not (redist and
4361 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4363 return (files_all, files_opt, files_mc, files_vm)
4366 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4367 """Distribute additional files which are part of the cluster configuration.
4369 ConfigWriter takes care of distributing the config and ssconf files, but
4370 there are more files which should be distributed to all nodes. This function
4371 makes sure those are copied.
4373 @param lu: calling logical unit
4374 @param additional_nodes: list of nodes not in the config to distribute to
4375 @type additional_vm: boolean
4376 @param additional_vm: whether the additional nodes are vm-capable or not
4379 # Gather target nodes
4380 cluster = lu.cfg.GetClusterInfo()
4381 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4383 online_nodes = lu.cfg.GetOnlineNodeList()
4384 online_set = frozenset(online_nodes)
4385 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4387 if additional_nodes is not None:
4388 online_nodes.extend(additional_nodes)
4390 vm_nodes.extend(additional_nodes)
4392 # Never distribute to master node
4393 for nodelist in [online_nodes, vm_nodes]:
4394 if master_info.name in nodelist:
4395 nodelist.remove(master_info.name)
4398 (files_all, _, files_mc, files_vm) = \
4399 _ComputeAncillaryFiles(cluster, True)
4401 # Never re-distribute configuration file from here
4402 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4403 pathutils.CLUSTER_CONF_FILE in files_vm)
4404 assert not files_mc, "Master candidates not handled in this function"
4407 (online_nodes, files_all),
4408 (vm_nodes, files_vm),
4412 for (node_list, files) in filemap:
4414 _UploadHelper(lu, node_list, fname)
4417 class LUClusterRedistConf(NoHooksLU):
4418 """Force the redistribution of cluster configuration.
4420 This is a very simple LU.
4425 def ExpandNames(self):
4426 self.needed_locks = {
4427 locking.LEVEL_NODE: locking.ALL_SET,
4429 self.share_locks[locking.LEVEL_NODE] = 1
4431 def Exec(self, feedback_fn):
4432 """Redistribute the configuration.
4435 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4436 _RedistributeAncillaryFiles(self)
4439 class LUClusterActivateMasterIp(NoHooksLU):
4440 """Activate the master IP on the master node.
4443 def Exec(self, feedback_fn):
4444 """Activate the master IP.
4447 master_params = self.cfg.GetMasterNetworkParameters()
4448 ems = self.cfg.GetUseExternalMipScript()
4449 result = self.rpc.call_node_activate_master_ip(master_params.name,
4451 result.Raise("Could not activate the master IP")
4454 class LUClusterDeactivateMasterIp(NoHooksLU):
4455 """Deactivate the master IP on the master node.
4458 def Exec(self, feedback_fn):
4459 """Deactivate the master IP.
4462 master_params = self.cfg.GetMasterNetworkParameters()
4463 ems = self.cfg.GetUseExternalMipScript()
4464 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4466 result.Raise("Could not deactivate the master IP")
4469 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4470 """Sleep and poll for an instance's disk to sync.
4473 if not instance.disks or disks is not None and not disks:
4476 disks = _ExpandCheckDisks(instance, disks)
4479 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4481 node = instance.primary_node
4484 lu.cfg.SetDiskID(dev, node)
4486 # TODO: Convert to utils.Retry
4489 degr_retries = 10 # in seconds, as we sleep 1 second each time
4493 cumul_degraded = False
4494 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4495 msg = rstats.fail_msg
4497 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4500 raise errors.RemoteError("Can't contact node %s for mirror data,"
4501 " aborting." % node)
4504 rstats = rstats.payload
4506 for i, mstat in enumerate(rstats):
4508 lu.LogWarning("Can't compute data for node %s/%s",
4509 node, disks[i].iv_name)
4512 cumul_degraded = (cumul_degraded or
4513 (mstat.is_degraded and mstat.sync_percent is None))
4514 if mstat.sync_percent is not None:
4516 if mstat.estimated_time is not None:
4517 rem_time = ("%s remaining (estimated)" %
4518 utils.FormatSeconds(mstat.estimated_time))
4519 max_time = mstat.estimated_time
4521 rem_time = "no time estimate"
4522 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4523 (disks[i].iv_name, mstat.sync_percent, rem_time))
4525 # if we're done but degraded, let's do a few small retries, to
4526 # make sure we see a stable and not transient situation; therefore
4527 # we force restart of the loop
4528 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4529 logging.info("Degraded disks found, %d retries left", degr_retries)
4537 time.sleep(min(60, max_time))
4540 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4541 return not cumul_degraded
4544 def _BlockdevFind(lu, node, dev, instance):
4545 """Wrapper around call_blockdev_find to annotate diskparams.
4547 @param lu: A reference to the lu object
4548 @param node: The node to call out
4549 @param dev: The device to find
4550 @param instance: The instance object the device belongs to
4551 @returns The result of the rpc call
4554 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4555 return lu.rpc.call_blockdev_find(node, disk)
4558 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4559 """Wrapper around L{_CheckDiskConsistencyInner}.
4562 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4563 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4567 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4569 """Check that mirrors are not degraded.
4571 @attention: The device has to be annotated already.
4573 The ldisk parameter, if True, will change the test from the
4574 is_degraded attribute (which represents overall non-ok status for
4575 the device(s)) to the ldisk (representing the local storage status).
4578 lu.cfg.SetDiskID(dev, node)
4582 if on_primary or dev.AssembleOnSecondary():
4583 rstats = lu.rpc.call_blockdev_find(node, dev)
4584 msg = rstats.fail_msg
4586 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4588 elif not rstats.payload:
4589 lu.LogWarning("Can't find disk on node %s", node)
4593 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4595 result = result and not rstats.payload.is_degraded
4598 for child in dev.children:
4599 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4605 class LUOobCommand(NoHooksLU):
4606 """Logical unit for OOB handling.
4610 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4612 def ExpandNames(self):
4613 """Gather locks we need.
4616 if self.op.node_names:
4617 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4618 lock_names = self.op.node_names
4620 lock_names = locking.ALL_SET
4622 self.needed_locks = {
4623 locking.LEVEL_NODE: lock_names,
4626 def CheckPrereq(self):
4627 """Check prerequisites.
4630 - the node exists in the configuration
4633 Any errors are signaled by raising errors.OpPrereqError.
4637 self.master_node = self.cfg.GetMasterNode()
4639 assert self.op.power_delay >= 0.0
4641 if self.op.node_names:
4642 if (self.op.command in self._SKIP_MASTER and
4643 self.master_node in self.op.node_names):
4644 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4645 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4647 if master_oob_handler:
4648 additional_text = ("run '%s %s %s' if you want to operate on the"
4649 " master regardless") % (master_oob_handler,
4653 additional_text = "it does not support out-of-band operations"
4655 raise errors.OpPrereqError(("Operating on the master node %s is not"
4656 " allowed for %s; %s") %
4657 (self.master_node, self.op.command,
4658 additional_text), errors.ECODE_INVAL)
4660 self.op.node_names = self.cfg.GetNodeList()
4661 if self.op.command in self._SKIP_MASTER:
4662 self.op.node_names.remove(self.master_node)
4664 if self.op.command in self._SKIP_MASTER:
4665 assert self.master_node not in self.op.node_names
4667 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4669 raise errors.OpPrereqError("Node %s not found" % node_name,
4672 self.nodes.append(node)
4674 if (not self.op.ignore_status and
4675 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4676 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4677 " not marked offline") % node_name,
4680 def Exec(self, feedback_fn):
4681 """Execute OOB and return result if we expect any.
4684 master_node = self.master_node
4687 for idx, node in enumerate(utils.NiceSort(self.nodes,
4688 key=lambda node: node.name)):
4689 node_entry = [(constants.RS_NORMAL, node.name)]
4690 ret.append(node_entry)
4692 oob_program = _SupportsOob(self.cfg, node)
4695 node_entry.append((constants.RS_UNAVAIL, None))
4698 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4699 self.op.command, oob_program, node.name)
4700 result = self.rpc.call_run_oob(master_node, oob_program,
4701 self.op.command, node.name,
4705 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4706 node.name, result.fail_msg)
4707 node_entry.append((constants.RS_NODATA, None))
4710 self._CheckPayload(result)
4711 except errors.OpExecError, err:
4712 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4714 node_entry.append((constants.RS_NODATA, None))
4716 if self.op.command == constants.OOB_HEALTH:
4717 # For health we should log important events
4718 for item, status in result.payload:
4719 if status in [constants.OOB_STATUS_WARNING,
4720 constants.OOB_STATUS_CRITICAL]:
4721 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4722 item, node.name, status)
4724 if self.op.command == constants.OOB_POWER_ON:
4726 elif self.op.command == constants.OOB_POWER_OFF:
4727 node.powered = False
4728 elif self.op.command == constants.OOB_POWER_STATUS:
4729 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4730 if powered != node.powered:
4731 logging.warning(("Recorded power state (%s) of node '%s' does not"
4732 " match actual power state (%s)"), node.powered,
4735 # For configuration changing commands we should update the node
4736 if self.op.command in (constants.OOB_POWER_ON,
4737 constants.OOB_POWER_OFF):
4738 self.cfg.Update(node, feedback_fn)
4740 node_entry.append((constants.RS_NORMAL, result.payload))
4742 if (self.op.command == constants.OOB_POWER_ON and
4743 idx < len(self.nodes) - 1):
4744 time.sleep(self.op.power_delay)
4748 def _CheckPayload(self, result):
4749 """Checks if the payload is valid.
4751 @param result: RPC result
4752 @raises errors.OpExecError: If payload is not valid
4756 if self.op.command == constants.OOB_HEALTH:
4757 if not isinstance(result.payload, list):
4758 errs.append("command 'health' is expected to return a list but got %s" %
4759 type(result.payload))
4761 for item, status in result.payload:
4762 if status not in constants.OOB_STATUSES:
4763 errs.append("health item '%s' has invalid status '%s'" %
4766 if self.op.command == constants.OOB_POWER_STATUS:
4767 if not isinstance(result.payload, dict):
4768 errs.append("power-status is expected to return a dict but got %s" %
4769 type(result.payload))
4771 if self.op.command in [
4772 constants.OOB_POWER_ON,
4773 constants.OOB_POWER_OFF,
4774 constants.OOB_POWER_CYCLE,
4776 if result.payload is not None:
4777 errs.append("%s is expected to not return payload but got '%s'" %
4778 (self.op.command, result.payload))
4781 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4782 utils.CommaJoin(errs))
4785 class _OsQuery(_QueryBase):
4786 FIELDS = query.OS_FIELDS
4788 def ExpandNames(self, lu):
4789 # Lock all nodes in shared mode
4790 # Temporary removal of locks, should be reverted later
4791 # TODO: reintroduce locks when they are lighter-weight
4792 lu.needed_locks = {}
4793 #self.share_locks[locking.LEVEL_NODE] = 1
4794 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4796 # The following variables interact with _QueryBase._GetNames
4798 self.wanted = self.names
4800 self.wanted = locking.ALL_SET
4802 self.do_locking = self.use_locking
4804 def DeclareLocks(self, lu, level):
4808 def _DiagnoseByOS(rlist):
4809 """Remaps a per-node return list into an a per-os per-node dictionary
4811 @param rlist: a map with node names as keys and OS objects as values
4814 @return: a dictionary with osnames as keys and as value another
4815 map, with nodes as keys and tuples of (path, status, diagnose,
4816 variants, parameters, api_versions) as values, eg::
4818 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4819 (/srv/..., False, "invalid api")],
4820 "node2": [(/srv/..., True, "", [], [])]}
4825 # we build here the list of nodes that didn't fail the RPC (at RPC
4826 # level), so that nodes with a non-responding node daemon don't
4827 # make all OSes invalid
4828 good_nodes = [node_name for node_name in rlist
4829 if not rlist[node_name].fail_msg]
4830 for node_name, nr in rlist.items():
4831 if nr.fail_msg or not nr.payload:
4833 for (name, path, status, diagnose, variants,
4834 params, api_versions) in nr.payload:
4835 if name not in all_os:
4836 # build a list of nodes for this os containing empty lists
4837 # for each node in node_list
4839 for nname in good_nodes:
4840 all_os[name][nname] = []
4841 # convert params from [name, help] to (name, help)
4842 params = [tuple(v) for v in params]
4843 all_os[name][node_name].append((path, status, diagnose,
4844 variants, params, api_versions))
4847 def _GetQueryData(self, lu):
4848 """Computes the list of nodes and their attributes.
4851 # Locking is not used
4852 assert not (compat.any(lu.glm.is_owned(level)
4853 for level in locking.LEVELS
4854 if level != locking.LEVEL_CLUSTER) or
4855 self.do_locking or self.use_locking)
4857 valid_nodes = [node.name
4858 for node in lu.cfg.GetAllNodesInfo().values()
4859 if not node.offline and node.vm_capable]
4860 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4861 cluster = lu.cfg.GetClusterInfo()
4865 for (os_name, os_data) in pol.items():
4866 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4867 hidden=(os_name in cluster.hidden_os),
4868 blacklisted=(os_name in cluster.blacklisted_os))
4872 api_versions = set()
4874 for idx, osl in enumerate(os_data.values()):
4875 info.valid = bool(info.valid and osl and osl[0][1])
4879 (node_variants, node_params, node_api) = osl[0][3:6]
4882 variants.update(node_variants)
4883 parameters.update(node_params)
4884 api_versions.update(node_api)
4886 # Filter out inconsistent values
4887 variants.intersection_update(node_variants)
4888 parameters.intersection_update(node_params)
4889 api_versions.intersection_update(node_api)
4891 info.variants = list(variants)
4892 info.parameters = list(parameters)
4893 info.api_versions = list(api_versions)
4895 data[os_name] = info
4897 # Prepare data in requested order
4898 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4902 class LUOsDiagnose(NoHooksLU):
4903 """Logical unit for OS diagnose/query.
4909 def _BuildFilter(fields, names):
4910 """Builds a filter for querying OSes.
4913 name_filter = qlang.MakeSimpleFilter("name", names)
4915 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4916 # respective field is not requested
4917 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4918 for fname in ["hidden", "blacklisted"]
4919 if fname not in fields]
4920 if "valid" not in fields:
4921 status_filter.append([qlang.OP_TRUE, "valid"])
4924 status_filter.insert(0, qlang.OP_AND)
4926 status_filter = None
4928 if name_filter and status_filter:
4929 return [qlang.OP_AND, name_filter, status_filter]
4933 return status_filter
4935 def CheckArguments(self):
4936 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4937 self.op.output_fields, False)
4939 def ExpandNames(self):
4940 self.oq.ExpandNames(self)
4942 def Exec(self, feedback_fn):
4943 return self.oq.OldStyleQuery(self)
4946 class LUNodeRemove(LogicalUnit):
4947 """Logical unit for removing a node.
4950 HPATH = "node-remove"
4951 HTYPE = constants.HTYPE_NODE
4953 def BuildHooksEnv(self):
4958 "OP_TARGET": self.op.node_name,
4959 "NODE_NAME": self.op.node_name,
4962 def BuildHooksNodes(self):
4963 """Build hooks nodes.
4965 This doesn't run on the target node in the pre phase as a failed
4966 node would then be impossible to remove.
4969 all_nodes = self.cfg.GetNodeList()
4971 all_nodes.remove(self.op.node_name)
4974 return (all_nodes, all_nodes)
4976 def CheckPrereq(self):
4977 """Check prerequisites.
4980 - the node exists in the configuration
4981 - it does not have primary or secondary instances
4982 - it's not the master
4984 Any errors are signaled by raising errors.OpPrereqError.
4987 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4988 node = self.cfg.GetNodeInfo(self.op.node_name)
4989 assert node is not None
4991 masternode = self.cfg.GetMasterNode()
4992 if node.name == masternode:
4993 raise errors.OpPrereqError("Node is the master node, failover to another"
4994 " node is required", errors.ECODE_INVAL)
4996 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4997 if node.name in instance.all_nodes:
4998 raise errors.OpPrereqError("Instance %s is still running on the node,"
4999 " please remove first" % instance_name,
5001 self.op.node_name = node.name
5004 def Exec(self, feedback_fn):
5005 """Removes the node from the cluster.
5009 logging.info("Stopping the node daemon and removing configs from node %s",
5012 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5014 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5017 # Promote nodes to master candidate as needed
5018 _AdjustCandidatePool(self, exceptions=[node.name])
5019 self.context.RemoveNode(node.name)
5021 # Run post hooks on the node before it's removed
5022 _RunPostHook(self, node.name)
5024 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5025 msg = result.fail_msg
5027 self.LogWarning("Errors encountered on the remote node while leaving"
5028 " the cluster: %s", msg)
5030 # Remove node from our /etc/hosts
5031 if self.cfg.GetClusterInfo().modify_etc_hosts:
5032 master_node = self.cfg.GetMasterNode()
5033 result = self.rpc.call_etc_hosts_modify(master_node,
5034 constants.ETC_HOSTS_REMOVE,
5036 result.Raise("Can't update hosts file with new host data")
5037 _RedistributeAncillaryFiles(self)
5040 class _NodeQuery(_QueryBase):
5041 FIELDS = query.NODE_FIELDS
5043 def ExpandNames(self, lu):
5044 lu.needed_locks = {}
5045 lu.share_locks = _ShareAll()
5048 self.wanted = _GetWantedNodes(lu, self.names)
5050 self.wanted = locking.ALL_SET
5052 self.do_locking = (self.use_locking and
5053 query.NQ_LIVE in self.requested_data)
5056 # If any non-static field is requested we need to lock the nodes
5057 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5059 def DeclareLocks(self, lu, level):
5062 def _GetQueryData(self, lu):
5063 """Computes the list of nodes and their attributes.
5066 all_info = lu.cfg.GetAllNodesInfo()
5068 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5070 # Gather data as requested
5071 if query.NQ_LIVE in self.requested_data:
5072 # filter out non-vm_capable nodes
5073 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5075 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5076 [lu.cfg.GetHypervisorType()])
5077 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5078 for (name, nresult) in node_data.items()
5079 if not nresult.fail_msg and nresult.payload)
5083 if query.NQ_INST in self.requested_data:
5084 node_to_primary = dict([(name, set()) for name in nodenames])
5085 node_to_secondary = dict([(name, set()) for name in nodenames])
5087 inst_data = lu.cfg.GetAllInstancesInfo()
5089 for inst in inst_data.values():
5090 if inst.primary_node in node_to_primary:
5091 node_to_primary[inst.primary_node].add(inst.name)
5092 for secnode in inst.secondary_nodes:
5093 if secnode in node_to_secondary:
5094 node_to_secondary[secnode].add(inst.name)
5096 node_to_primary = None
5097 node_to_secondary = None
5099 if query.NQ_OOB in self.requested_data:
5100 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5101 for name, node in all_info.iteritems())
5105 if query.NQ_GROUP in self.requested_data:
5106 groups = lu.cfg.GetAllNodeGroupsInfo()
5110 return query.NodeQueryData([all_info[name] for name in nodenames],
5111 live_data, lu.cfg.GetMasterNode(),
5112 node_to_primary, node_to_secondary, groups,
5113 oob_support, lu.cfg.GetClusterInfo())
5116 class LUNodeQuery(NoHooksLU):
5117 """Logical unit for querying nodes.
5120 # pylint: disable=W0142
5123 def CheckArguments(self):
5124 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5125 self.op.output_fields, self.op.use_locking)
5127 def ExpandNames(self):
5128 self.nq.ExpandNames(self)
5130 def DeclareLocks(self, level):
5131 self.nq.DeclareLocks(self, level)
5133 def Exec(self, feedback_fn):
5134 return self.nq.OldStyleQuery(self)
5137 class LUNodeQueryvols(NoHooksLU):
5138 """Logical unit for getting volumes on node(s).
5142 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5143 _FIELDS_STATIC = utils.FieldSet("node")
5145 def CheckArguments(self):
5146 _CheckOutputFields(static=self._FIELDS_STATIC,
5147 dynamic=self._FIELDS_DYNAMIC,
5148 selected=self.op.output_fields)
5150 def ExpandNames(self):
5151 self.share_locks = _ShareAll()
5152 self.needed_locks = {}
5154 if not self.op.nodes:
5155 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5157 self.needed_locks[locking.LEVEL_NODE] = \
5158 _GetWantedNodes(self, self.op.nodes)
5160 def Exec(self, feedback_fn):
5161 """Computes the list of nodes and their attributes.
5164 nodenames = self.owned_locks(locking.LEVEL_NODE)
5165 volumes = self.rpc.call_node_volumes(nodenames)
5167 ilist = self.cfg.GetAllInstancesInfo()
5168 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5171 for node in nodenames:
5172 nresult = volumes[node]
5175 msg = nresult.fail_msg
5177 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5180 node_vols = sorted(nresult.payload,
5181 key=operator.itemgetter("dev"))
5183 for vol in node_vols:
5185 for field in self.op.output_fields:
5188 elif field == "phys":
5192 elif field == "name":
5194 elif field == "size":
5195 val = int(float(vol["size"]))
5196 elif field == "instance":
5197 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5199 raise errors.ParameterError(field)
5200 node_output.append(str(val))
5202 output.append(node_output)
5207 class LUNodeQueryStorage(NoHooksLU):
5208 """Logical unit for getting information on storage units on node(s).
5211 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5214 def CheckArguments(self):
5215 _CheckOutputFields(static=self._FIELDS_STATIC,
5216 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5217 selected=self.op.output_fields)
5219 def ExpandNames(self):
5220 self.share_locks = _ShareAll()
5221 self.needed_locks = {}
5224 self.needed_locks[locking.LEVEL_NODE] = \
5225 _GetWantedNodes(self, self.op.nodes)
5227 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5229 def Exec(self, feedback_fn):
5230 """Computes the list of nodes and their attributes.
5233 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5235 # Always get name to sort by
5236 if constants.SF_NAME in self.op.output_fields:
5237 fields = self.op.output_fields[:]
5239 fields = [constants.SF_NAME] + self.op.output_fields
5241 # Never ask for node or type as it's only known to the LU
5242 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5243 while extra in fields:
5244 fields.remove(extra)
5246 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5247 name_idx = field_idx[constants.SF_NAME]
5249 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5250 data = self.rpc.call_storage_list(self.nodes,
5251 self.op.storage_type, st_args,
5252 self.op.name, fields)
5256 for node in utils.NiceSort(self.nodes):
5257 nresult = data[node]
5261 msg = nresult.fail_msg
5263 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5266 rows = dict([(row[name_idx], row) for row in nresult.payload])
5268 for name in utils.NiceSort(rows.keys()):
5273 for field in self.op.output_fields:
5274 if field == constants.SF_NODE:
5276 elif field == constants.SF_TYPE:
5277 val = self.op.storage_type
5278 elif field in field_idx:
5279 val = row[field_idx[field]]
5281 raise errors.ParameterError(field)
5290 class _InstanceQuery(_QueryBase):
5291 FIELDS = query.INSTANCE_FIELDS
5293 def ExpandNames(self, lu):
5294 lu.needed_locks = {}
5295 lu.share_locks = _ShareAll()
5298 self.wanted = _GetWantedInstances(lu, self.names)
5300 self.wanted = locking.ALL_SET
5302 self.do_locking = (self.use_locking and
5303 query.IQ_LIVE in self.requested_data)
5305 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5306 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5307 lu.needed_locks[locking.LEVEL_NODE] = []
5308 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5310 self.do_grouplocks = (self.do_locking and
5311 query.IQ_NODES in self.requested_data)
5313 def DeclareLocks(self, lu, level):
5315 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5316 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5318 # Lock all groups used by instances optimistically; this requires going
5319 # via the node before it's locked, requiring verification later on
5320 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5322 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5323 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5324 elif level == locking.LEVEL_NODE:
5325 lu._LockInstancesNodes() # pylint: disable=W0212
5328 def _CheckGroupLocks(lu):
5329 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5330 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5332 # Check if node groups for locked instances are still correct
5333 for instance_name in owned_instances:
5334 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5336 def _GetQueryData(self, lu):
5337 """Computes the list of instances and their attributes.
5340 if self.do_grouplocks:
5341 self._CheckGroupLocks(lu)
5343 cluster = lu.cfg.GetClusterInfo()
5344 all_info = lu.cfg.GetAllInstancesInfo()
5346 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5348 instance_list = [all_info[name] for name in instance_names]
5349 nodes = frozenset(itertools.chain(*(inst.all_nodes
5350 for inst in instance_list)))
5351 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5354 wrongnode_inst = set()
5356 # Gather data as requested
5357 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5359 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5361 result = node_data[name]
5363 # offline nodes will be in both lists
5364 assert result.fail_msg
5365 offline_nodes.append(name)
5367 bad_nodes.append(name)
5368 elif result.payload:
5369 for inst in result.payload:
5370 if inst in all_info:
5371 if all_info[inst].primary_node == name:
5372 live_data.update(result.payload)
5374 wrongnode_inst.add(inst)
5376 # orphan instance; we don't list it here as we don't
5377 # handle this case yet in the output of instance listing
5378 logging.warning("Orphan instance '%s' found on node %s",
5380 # else no instance is alive
5384 if query.IQ_DISKUSAGE in self.requested_data:
5385 gmi = ganeti.masterd.instance
5386 disk_usage = dict((inst.name,
5387 gmi.ComputeDiskSize(inst.disk_template,
5388 [{constants.IDISK_SIZE: disk.size}
5389 for disk in inst.disks]))
5390 for inst in instance_list)
5394 if query.IQ_CONSOLE in self.requested_data:
5396 for inst in instance_list:
5397 if inst.name in live_data:
5398 # Instance is running
5399 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5401 consinfo[inst.name] = None
5402 assert set(consinfo.keys()) == set(instance_names)
5406 if query.IQ_NODES in self.requested_data:
5407 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5409 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5410 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5411 for uuid in set(map(operator.attrgetter("group"),
5417 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5418 disk_usage, offline_nodes, bad_nodes,
5419 live_data, wrongnode_inst, consinfo,
5423 class LUQuery(NoHooksLU):
5424 """Query for resources/items of a certain kind.
5427 # pylint: disable=W0142
5430 def CheckArguments(self):
5431 qcls = _GetQueryImplementation(self.op.what)
5433 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5435 def ExpandNames(self):
5436 self.impl.ExpandNames(self)
5438 def DeclareLocks(self, level):
5439 self.impl.DeclareLocks(self, level)
5441 def Exec(self, feedback_fn):
5442 return self.impl.NewStyleQuery(self)
5445 class LUQueryFields(NoHooksLU):
5446 """Query for resources/items of a certain kind.
5449 # pylint: disable=W0142
5452 def CheckArguments(self):
5453 self.qcls = _GetQueryImplementation(self.op.what)
5455 def ExpandNames(self):
5456 self.needed_locks = {}
5458 def Exec(self, feedback_fn):
5459 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5462 class LUNodeModifyStorage(NoHooksLU):
5463 """Logical unit for modifying a storage volume on a node.
5468 def CheckArguments(self):
5469 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5471 storage_type = self.op.storage_type
5474 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5476 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5477 " modified" % storage_type,
5480 diff = set(self.op.changes.keys()) - modifiable
5482 raise errors.OpPrereqError("The following fields can not be modified for"
5483 " storage units of type '%s': %r" %
5484 (storage_type, list(diff)),
5487 def ExpandNames(self):
5488 self.needed_locks = {
5489 locking.LEVEL_NODE: self.op.node_name,
5492 def Exec(self, feedback_fn):
5493 """Computes the list of nodes and their attributes.
5496 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5497 result = self.rpc.call_storage_modify(self.op.node_name,
5498 self.op.storage_type, st_args,
5499 self.op.name, self.op.changes)
5500 result.Raise("Failed to modify storage unit '%s' on %s" %
5501 (self.op.name, self.op.node_name))
5504 class LUNodeAdd(LogicalUnit):
5505 """Logical unit for adding node to the cluster.
5509 HTYPE = constants.HTYPE_NODE
5510 _NFLAGS = ["master_capable", "vm_capable"]
5512 def CheckArguments(self):
5513 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5514 # validate/normalize the node name
5515 self.hostname = netutils.GetHostname(name=self.op.node_name,
5516 family=self.primary_ip_family)
5517 self.op.node_name = self.hostname.name
5519 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5520 raise errors.OpPrereqError("Cannot readd the master node",
5523 if self.op.readd and self.op.group:
5524 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5525 " being readded", errors.ECODE_INVAL)
5527 def BuildHooksEnv(self):
5530 This will run on all nodes before, and on all nodes + the new node after.
5534 "OP_TARGET": self.op.node_name,
5535 "NODE_NAME": self.op.node_name,
5536 "NODE_PIP": self.op.primary_ip,
5537 "NODE_SIP": self.op.secondary_ip,
5538 "MASTER_CAPABLE": str(self.op.master_capable),
5539 "VM_CAPABLE": str(self.op.vm_capable),
5542 def BuildHooksNodes(self):
5543 """Build hooks nodes.
5546 # Exclude added node
5547 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5548 post_nodes = pre_nodes + [self.op.node_name, ]
5550 return (pre_nodes, post_nodes)
5552 def CheckPrereq(self):
5553 """Check prerequisites.
5556 - the new node is not already in the config
5558 - its parameters (single/dual homed) matches the cluster
5560 Any errors are signaled by raising errors.OpPrereqError.
5564 hostname = self.hostname
5565 node = hostname.name
5566 primary_ip = self.op.primary_ip = hostname.ip
5567 if self.op.secondary_ip is None:
5568 if self.primary_ip_family == netutils.IP6Address.family:
5569 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5570 " IPv4 address must be given as secondary",
5572 self.op.secondary_ip = primary_ip
5574 secondary_ip = self.op.secondary_ip
5575 if not netutils.IP4Address.IsValid(secondary_ip):
5576 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5577 " address" % secondary_ip, errors.ECODE_INVAL)
5579 node_list = cfg.GetNodeList()
5580 if not self.op.readd and node in node_list:
5581 raise errors.OpPrereqError("Node %s is already in the configuration" %
5582 node, errors.ECODE_EXISTS)
5583 elif self.op.readd and node not in node_list:
5584 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5587 self.changed_primary_ip = False
5589 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5590 if self.op.readd and node == existing_node_name:
5591 if existing_node.secondary_ip != secondary_ip:
5592 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5593 " address configuration as before",
5595 if existing_node.primary_ip != primary_ip:
5596 self.changed_primary_ip = True
5600 if (existing_node.primary_ip == primary_ip or
5601 existing_node.secondary_ip == primary_ip or
5602 existing_node.primary_ip == secondary_ip or
5603 existing_node.secondary_ip == secondary_ip):
5604 raise errors.OpPrereqError("New node ip address(es) conflict with"
5605 " existing node %s" % existing_node.name,
5606 errors.ECODE_NOTUNIQUE)
5608 # After this 'if' block, None is no longer a valid value for the
5609 # _capable op attributes
5611 old_node = self.cfg.GetNodeInfo(node)
5612 assert old_node is not None, "Can't retrieve locked node %s" % node
5613 for attr in self._NFLAGS:
5614 if getattr(self.op, attr) is None:
5615 setattr(self.op, attr, getattr(old_node, attr))
5617 for attr in self._NFLAGS:
5618 if getattr(self.op, attr) is None:
5619 setattr(self.op, attr, True)
5621 if self.op.readd and not self.op.vm_capable:
5622 pri, sec = cfg.GetNodeInstances(node)
5624 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5625 " flag set to false, but it already holds"
5626 " instances" % node,
5629 # check that the type of the node (single versus dual homed) is the
5630 # same as for the master
5631 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5632 master_singlehomed = myself.secondary_ip == myself.primary_ip
5633 newbie_singlehomed = secondary_ip == primary_ip
5634 if master_singlehomed != newbie_singlehomed:
5635 if master_singlehomed:
5636 raise errors.OpPrereqError("The master has no secondary ip but the"
5637 " new node has one",
5640 raise errors.OpPrereqError("The master has a secondary ip but the"
5641 " new node doesn't have one",
5644 # checks reachability
5645 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5646 raise errors.OpPrereqError("Node not reachable by ping",
5647 errors.ECODE_ENVIRON)
5649 if not newbie_singlehomed:
5650 # check reachability from my secondary ip to newbie's secondary ip
5651 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5652 source=myself.secondary_ip):
5653 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5654 " based ping to node daemon port",
5655 errors.ECODE_ENVIRON)
5662 if self.op.master_capable:
5663 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5665 self.master_candidate = False
5668 self.new_node = old_node
5670 node_group = cfg.LookupNodeGroup(self.op.group)
5671 self.new_node = objects.Node(name=node,
5672 primary_ip=primary_ip,
5673 secondary_ip=secondary_ip,
5674 master_candidate=self.master_candidate,
5675 offline=False, drained=False,
5678 if self.op.ndparams:
5679 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5681 if self.op.hv_state:
5682 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5684 if self.op.disk_state:
5685 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5687 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5688 # it a property on the base class.
5689 result = rpc.DnsOnlyRunner().call_version([node])[node]
5690 result.Raise("Can't get version information from node %s" % node)
5691 if constants.PROTOCOL_VERSION == result.payload:
5692 logging.info("Communication to node %s fine, sw version %s match",
5693 node, result.payload)
5695 raise errors.OpPrereqError("Version mismatch master version %s,"
5696 " node version %s" %
5697 (constants.PROTOCOL_VERSION, result.payload),
5698 errors.ECODE_ENVIRON)
5700 def Exec(self, feedback_fn):
5701 """Adds the new node to the cluster.
5704 new_node = self.new_node
5705 node = new_node.name
5707 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5710 # We adding a new node so we assume it's powered
5711 new_node.powered = True
5713 # for re-adds, reset the offline/drained/master-candidate flags;
5714 # we need to reset here, otherwise offline would prevent RPC calls
5715 # later in the procedure; this also means that if the re-add
5716 # fails, we are left with a non-offlined, broken node
5718 new_node.drained = new_node.offline = False # pylint: disable=W0201
5719 self.LogInfo("Readding a node, the offline/drained flags were reset")
5720 # if we demote the node, we do cleanup later in the procedure
5721 new_node.master_candidate = self.master_candidate
5722 if self.changed_primary_ip:
5723 new_node.primary_ip = self.op.primary_ip
5725 # copy the master/vm_capable flags
5726 for attr in self._NFLAGS:
5727 setattr(new_node, attr, getattr(self.op, attr))
5729 # notify the user about any possible mc promotion
5730 if new_node.master_candidate:
5731 self.LogInfo("Node will be a master candidate")
5733 if self.op.ndparams:
5734 new_node.ndparams = self.op.ndparams
5736 new_node.ndparams = {}
5738 if self.op.hv_state:
5739 new_node.hv_state_static = self.new_hv_state
5741 if self.op.disk_state:
5742 new_node.disk_state_static = self.new_disk_state
5744 # Add node to our /etc/hosts, and add key to known_hosts
5745 if self.cfg.GetClusterInfo().modify_etc_hosts:
5746 master_node = self.cfg.GetMasterNode()
5747 result = self.rpc.call_etc_hosts_modify(master_node,
5748 constants.ETC_HOSTS_ADD,
5751 result.Raise("Can't update hosts file with new host data")
5753 if new_node.secondary_ip != new_node.primary_ip:
5754 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5757 node_verify_list = [self.cfg.GetMasterNode()]
5758 node_verify_param = {
5759 constants.NV_NODELIST: ([node], {}),
5760 # TODO: do a node-net-test as well?
5763 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5764 self.cfg.GetClusterName())
5765 for verifier in node_verify_list:
5766 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5767 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5769 for failed in nl_payload:
5770 feedback_fn("ssh/hostname verification failed"
5771 " (checking from %s): %s" %
5772 (verifier, nl_payload[failed]))
5773 raise errors.OpExecError("ssh/hostname verification failed")
5776 _RedistributeAncillaryFiles(self)
5777 self.context.ReaddNode(new_node)
5778 # make sure we redistribute the config
5779 self.cfg.Update(new_node, feedback_fn)
5780 # and make sure the new node will not have old files around
5781 if not new_node.master_candidate:
5782 result = self.rpc.call_node_demote_from_mc(new_node.name)
5783 msg = result.fail_msg
5785 self.LogWarning("Node failed to demote itself from master"
5786 " candidate status: %s" % msg)
5788 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5789 additional_vm=self.op.vm_capable)
5790 self.context.AddNode(new_node, self.proc.GetECId())
5793 class LUNodeSetParams(LogicalUnit):
5794 """Modifies the parameters of a node.
5796 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5797 to the node role (as _ROLE_*)
5798 @cvar _R2F: a dictionary from node role to tuples of flags
5799 @cvar _FLAGS: a list of attribute names corresponding to the flags
5802 HPATH = "node-modify"
5803 HTYPE = constants.HTYPE_NODE
5805 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5807 (True, False, False): _ROLE_CANDIDATE,
5808 (False, True, False): _ROLE_DRAINED,
5809 (False, False, True): _ROLE_OFFLINE,
5810 (False, False, False): _ROLE_REGULAR,
5812 _R2F = dict((v, k) for k, v in _F2R.items())
5813 _FLAGS = ["master_candidate", "drained", "offline"]
5815 def CheckArguments(self):
5816 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5817 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5818 self.op.master_capable, self.op.vm_capable,
5819 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5821 if all_mods.count(None) == len(all_mods):
5822 raise errors.OpPrereqError("Please pass at least one modification",
5824 if all_mods.count(True) > 1:
5825 raise errors.OpPrereqError("Can't set the node into more than one"
5826 " state at the same time",
5829 # Boolean value that tells us whether we might be demoting from MC
5830 self.might_demote = (self.op.master_candidate is False or
5831 self.op.offline is True or
5832 self.op.drained is True or
5833 self.op.master_capable is False)
5835 if self.op.secondary_ip:
5836 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5837 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5838 " address" % self.op.secondary_ip,
5841 self.lock_all = self.op.auto_promote and self.might_demote
5842 self.lock_instances = self.op.secondary_ip is not None
5844 def _InstanceFilter(self, instance):
5845 """Filter for getting affected instances.
5848 return (instance.disk_template in constants.DTS_INT_MIRROR and
5849 self.op.node_name in instance.all_nodes)
5851 def ExpandNames(self):
5853 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5855 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5857 # Since modifying a node can have severe effects on currently running
5858 # operations the resource lock is at least acquired in shared mode
5859 self.needed_locks[locking.LEVEL_NODE_RES] = \
5860 self.needed_locks[locking.LEVEL_NODE]
5862 # Get node resource and instance locks in shared mode; they are not used
5863 # for anything but read-only access
5864 self.share_locks[locking.LEVEL_NODE_RES] = 1
5865 self.share_locks[locking.LEVEL_INSTANCE] = 1
5867 if self.lock_instances:
5868 self.needed_locks[locking.LEVEL_INSTANCE] = \
5869 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5871 def BuildHooksEnv(self):
5874 This runs on the master node.
5878 "OP_TARGET": self.op.node_name,
5879 "MASTER_CANDIDATE": str(self.op.master_candidate),
5880 "OFFLINE": str(self.op.offline),
5881 "DRAINED": str(self.op.drained),
5882 "MASTER_CAPABLE": str(self.op.master_capable),
5883 "VM_CAPABLE": str(self.op.vm_capable),
5886 def BuildHooksNodes(self):
5887 """Build hooks nodes.
5890 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5893 def CheckPrereq(self):
5894 """Check prerequisites.
5896 This only checks the instance list against the existing names.
5899 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5901 if self.lock_instances:
5902 affected_instances = \
5903 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5905 # Verify instance locks
5906 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5907 wanted_instances = frozenset(affected_instances.keys())
5908 if wanted_instances - owned_instances:
5909 raise errors.OpPrereqError("Instances affected by changing node %s's"
5910 " secondary IP address have changed since"
5911 " locks were acquired, wanted '%s', have"
5912 " '%s'; retry the operation" %
5914 utils.CommaJoin(wanted_instances),
5915 utils.CommaJoin(owned_instances)),
5918 affected_instances = None
5920 if (self.op.master_candidate is not None or
5921 self.op.drained is not None or
5922 self.op.offline is not None):
5923 # we can't change the master's node flags
5924 if self.op.node_name == self.cfg.GetMasterNode():
5925 raise errors.OpPrereqError("The master role can be changed"
5926 " only via master-failover",
5929 if self.op.master_candidate and not node.master_capable:
5930 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5931 " it a master candidate" % node.name,
5934 if self.op.vm_capable is False:
5935 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5937 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5938 " the vm_capable flag" % node.name,
5941 if node.master_candidate and self.might_demote and not self.lock_all:
5942 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5943 # check if after removing the current node, we're missing master
5945 (mc_remaining, mc_should, _) = \
5946 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5947 if mc_remaining < mc_should:
5948 raise errors.OpPrereqError("Not enough master candidates, please"
5949 " pass auto promote option to allow"
5950 " promotion (--auto-promote or RAPI"
5951 " auto_promote=True)", errors.ECODE_STATE)
5953 self.old_flags = old_flags = (node.master_candidate,
5954 node.drained, node.offline)
5955 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5956 self.old_role = old_role = self._F2R[old_flags]
5958 # Check for ineffective changes
5959 for attr in self._FLAGS:
5960 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5961 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5962 setattr(self.op, attr, None)
5964 # Past this point, any flag change to False means a transition
5965 # away from the respective state, as only real changes are kept
5967 # TODO: We might query the real power state if it supports OOB
5968 if _SupportsOob(self.cfg, node):
5969 if self.op.offline is False and not (node.powered or
5970 self.op.powered is True):
5971 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5972 " offline status can be reset") %
5973 self.op.node_name, errors.ECODE_STATE)
5974 elif self.op.powered is not None:
5975 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5976 " as it does not support out-of-band"
5977 " handling") % self.op.node_name,
5980 # If we're being deofflined/drained, we'll MC ourself if needed
5981 if (self.op.drained is False or self.op.offline is False or
5982 (self.op.master_capable and not node.master_capable)):
5983 if _DecideSelfPromotion(self):
5984 self.op.master_candidate = True
5985 self.LogInfo("Auto-promoting node to master candidate")
5987 # If we're no longer master capable, we'll demote ourselves from MC
5988 if self.op.master_capable is False and node.master_candidate:
5989 self.LogInfo("Demoting from master candidate")
5990 self.op.master_candidate = False
5993 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5994 if self.op.master_candidate:
5995 new_role = self._ROLE_CANDIDATE
5996 elif self.op.drained:
5997 new_role = self._ROLE_DRAINED
5998 elif self.op.offline:
5999 new_role = self._ROLE_OFFLINE
6000 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6001 # False is still in new flags, which means we're un-setting (the
6003 new_role = self._ROLE_REGULAR
6004 else: # no new flags, nothing, keep old role
6007 self.new_role = new_role
6009 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6010 # Trying to transition out of offline status
6011 result = self.rpc.call_version([node.name])[node.name]
6013 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6014 " to report its version: %s" %
6015 (node.name, result.fail_msg),
6018 self.LogWarning("Transitioning node from offline to online state"
6019 " without using re-add. Please make sure the node"
6022 # When changing the secondary ip, verify if this is a single-homed to
6023 # multi-homed transition or vice versa, and apply the relevant
6025 if self.op.secondary_ip:
6026 # Ok even without locking, because this can't be changed by any LU
6027 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6028 master_singlehomed = master.secondary_ip == master.primary_ip
6029 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6030 if self.op.force and node.name == master.name:
6031 self.LogWarning("Transitioning from single-homed to multi-homed"
6032 " cluster. All nodes will require a secondary ip.")
6034 raise errors.OpPrereqError("Changing the secondary ip on a"
6035 " single-homed cluster requires the"
6036 " --force option to be passed, and the"
6037 " target node to be the master",
6039 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6040 if self.op.force and node.name == master.name:
6041 self.LogWarning("Transitioning from multi-homed to single-homed"
6042 " cluster. Secondary IPs will have to be removed.")
6044 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6045 " same as the primary IP on a multi-homed"
6046 " cluster, unless the --force option is"
6047 " passed, and the target node is the"
6048 " master", errors.ECODE_INVAL)
6050 assert not (frozenset(affected_instances) -
6051 self.owned_locks(locking.LEVEL_INSTANCE))
6054 if affected_instances:
6055 msg = ("Cannot change secondary IP address: offline node has"
6056 " instances (%s) configured to use it" %
6057 utils.CommaJoin(affected_instances.keys()))
6058 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6060 # On online nodes, check that no instances are running, and that
6061 # the node has the new ip and we can reach it.
6062 for instance in affected_instances.values():
6063 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6064 msg="cannot change secondary ip")
6066 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6067 if master.name != node.name:
6068 # check reachability from master secondary ip to new secondary ip
6069 if not netutils.TcpPing(self.op.secondary_ip,
6070 constants.DEFAULT_NODED_PORT,
6071 source=master.secondary_ip):
6072 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6073 " based ping to node daemon port",
6074 errors.ECODE_ENVIRON)
6076 if self.op.ndparams:
6077 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6078 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6079 self.new_ndparams = new_ndparams
6081 if self.op.hv_state:
6082 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6083 self.node.hv_state_static)
6085 if self.op.disk_state:
6086 self.new_disk_state = \
6087 _MergeAndVerifyDiskState(self.op.disk_state,
6088 self.node.disk_state_static)
6090 def Exec(self, feedback_fn):
6095 old_role = self.old_role
6096 new_role = self.new_role
6100 if self.op.ndparams:
6101 node.ndparams = self.new_ndparams
6103 if self.op.powered is not None:
6104 node.powered = self.op.powered
6106 if self.op.hv_state:
6107 node.hv_state_static = self.new_hv_state
6109 if self.op.disk_state:
6110 node.disk_state_static = self.new_disk_state
6112 for attr in ["master_capable", "vm_capable"]:
6113 val = getattr(self.op, attr)
6115 setattr(node, attr, val)
6116 result.append((attr, str(val)))
6118 if new_role != old_role:
6119 # Tell the node to demote itself, if no longer MC and not offline
6120 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6121 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6123 self.LogWarning("Node failed to demote itself: %s", msg)
6125 new_flags = self._R2F[new_role]
6126 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6128 result.append((desc, str(nf)))
6129 (node.master_candidate, node.drained, node.offline) = new_flags
6131 # we locked all nodes, we adjust the CP before updating this node
6133 _AdjustCandidatePool(self, [node.name])
6135 if self.op.secondary_ip:
6136 node.secondary_ip = self.op.secondary_ip
6137 result.append(("secondary_ip", self.op.secondary_ip))
6139 # this will trigger configuration file update, if needed
6140 self.cfg.Update(node, feedback_fn)
6142 # this will trigger job queue propagation or cleanup if the mc
6144 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6145 self.context.ReaddNode(node)
6150 class LUNodePowercycle(NoHooksLU):
6151 """Powercycles a node.
6156 def CheckArguments(self):
6157 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6158 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6159 raise errors.OpPrereqError("The node is the master and the force"
6160 " parameter was not set",
6163 def ExpandNames(self):
6164 """Locking for PowercycleNode.
6166 This is a last-resort option and shouldn't block on other
6167 jobs. Therefore, we grab no locks.
6170 self.needed_locks = {}
6172 def Exec(self, feedback_fn):
6176 result = self.rpc.call_node_powercycle(self.op.node_name,
6177 self.cfg.GetHypervisorType())
6178 result.Raise("Failed to schedule the reboot")
6179 return result.payload
6182 class LUClusterQuery(NoHooksLU):
6183 """Query cluster configuration.
6188 def ExpandNames(self):
6189 self.needed_locks = {}
6191 def Exec(self, feedback_fn):
6192 """Return cluster config.
6195 cluster = self.cfg.GetClusterInfo()
6198 # Filter just for enabled hypervisors
6199 for os_name, hv_dict in cluster.os_hvp.items():
6200 os_hvp[os_name] = {}
6201 for hv_name, hv_params in hv_dict.items():
6202 if hv_name in cluster.enabled_hypervisors:
6203 os_hvp[os_name][hv_name] = hv_params
6205 # Convert ip_family to ip_version
6206 primary_ip_version = constants.IP4_VERSION
6207 if cluster.primary_ip_family == netutils.IP6Address.family:
6208 primary_ip_version = constants.IP6_VERSION
6211 "software_version": constants.RELEASE_VERSION,
6212 "protocol_version": constants.PROTOCOL_VERSION,
6213 "config_version": constants.CONFIG_VERSION,
6214 "os_api_version": max(constants.OS_API_VERSIONS),
6215 "export_version": constants.EXPORT_VERSION,
6216 "architecture": runtime.GetArchInfo(),
6217 "name": cluster.cluster_name,
6218 "master": cluster.master_node,
6219 "default_hypervisor": cluster.primary_hypervisor,
6220 "enabled_hypervisors": cluster.enabled_hypervisors,
6221 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6222 for hypervisor_name in cluster.enabled_hypervisors]),
6224 "beparams": cluster.beparams,
6225 "osparams": cluster.osparams,
6226 "ipolicy": cluster.ipolicy,
6227 "nicparams": cluster.nicparams,
6228 "ndparams": cluster.ndparams,
6229 "diskparams": cluster.diskparams,
6230 "candidate_pool_size": cluster.candidate_pool_size,
6231 "master_netdev": cluster.master_netdev,
6232 "master_netmask": cluster.master_netmask,
6233 "use_external_mip_script": cluster.use_external_mip_script,
6234 "volume_group_name": cluster.volume_group_name,
6235 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6236 "file_storage_dir": cluster.file_storage_dir,
6237 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6238 "maintain_node_health": cluster.maintain_node_health,
6239 "ctime": cluster.ctime,
6240 "mtime": cluster.mtime,
6241 "uuid": cluster.uuid,
6242 "tags": list(cluster.GetTags()),
6243 "uid_pool": cluster.uid_pool,
6244 "default_iallocator": cluster.default_iallocator,
6245 "reserved_lvs": cluster.reserved_lvs,
6246 "primary_ip_version": primary_ip_version,
6247 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6248 "hidden_os": cluster.hidden_os,
6249 "blacklisted_os": cluster.blacklisted_os,
6255 class LUClusterConfigQuery(NoHooksLU):
6256 """Return configuration values.
6261 def CheckArguments(self):
6262 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6264 def ExpandNames(self):
6265 self.cq.ExpandNames(self)
6267 def DeclareLocks(self, level):
6268 self.cq.DeclareLocks(self, level)
6270 def Exec(self, feedback_fn):
6271 result = self.cq.OldStyleQuery(self)
6273 assert len(result) == 1
6278 class _ClusterQuery(_QueryBase):
6279 FIELDS = query.CLUSTER_FIELDS
6281 #: Do not sort (there is only one item)
6284 def ExpandNames(self, lu):
6285 lu.needed_locks = {}
6287 # The following variables interact with _QueryBase._GetNames
6288 self.wanted = locking.ALL_SET
6289 self.do_locking = self.use_locking
6292 raise errors.OpPrereqError("Can not use locking for cluster queries",
6295 def DeclareLocks(self, lu, level):
6298 def _GetQueryData(self, lu):
6299 """Computes the list of nodes and their attributes.
6302 # Locking is not used
6303 assert not (compat.any(lu.glm.is_owned(level)
6304 for level in locking.LEVELS
6305 if level != locking.LEVEL_CLUSTER) or
6306 self.do_locking or self.use_locking)
6308 if query.CQ_CONFIG in self.requested_data:
6309 cluster = lu.cfg.GetClusterInfo()
6311 cluster = NotImplemented
6313 if query.CQ_QUEUE_DRAINED in self.requested_data:
6314 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6316 drain_flag = NotImplemented
6318 if query.CQ_WATCHER_PAUSE in self.requested_data:
6319 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6321 watcher_pause = NotImplemented
6323 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6326 class LUInstanceActivateDisks(NoHooksLU):
6327 """Bring up an instance's disks.
6332 def ExpandNames(self):
6333 self._ExpandAndLockInstance()
6334 self.needed_locks[locking.LEVEL_NODE] = []
6335 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6337 def DeclareLocks(self, level):
6338 if level == locking.LEVEL_NODE:
6339 self._LockInstancesNodes()
6341 def CheckPrereq(self):
6342 """Check prerequisites.
6344 This checks that the instance is in the cluster.
6347 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6348 assert self.instance is not None, \
6349 "Cannot retrieve locked instance %s" % self.op.instance_name
6350 _CheckNodeOnline(self, self.instance.primary_node)
6352 def Exec(self, feedback_fn):
6353 """Activate the disks.
6356 disks_ok, disks_info = \
6357 _AssembleInstanceDisks(self, self.instance,
6358 ignore_size=self.op.ignore_size)
6360 raise errors.OpExecError("Cannot activate block devices")
6362 if self.op.wait_for_sync:
6363 if not _WaitForSync(self, self.instance):
6364 raise errors.OpExecError("Some disks of the instance are degraded!")
6369 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6371 """Prepare the block devices for an instance.
6373 This sets up the block devices on all nodes.
6375 @type lu: L{LogicalUnit}
6376 @param lu: the logical unit on whose behalf we execute
6377 @type instance: L{objects.Instance}
6378 @param instance: the instance for whose disks we assemble
6379 @type disks: list of L{objects.Disk} or None
6380 @param disks: which disks to assemble (or all, if None)
6381 @type ignore_secondaries: boolean
6382 @param ignore_secondaries: if true, errors on secondary nodes
6383 won't result in an error return from the function
6384 @type ignore_size: boolean
6385 @param ignore_size: if true, the current known size of the disk
6386 will not be used during the disk activation, useful for cases
6387 when the size is wrong
6388 @return: False if the operation failed, otherwise a list of
6389 (host, instance_visible_name, node_visible_name)
6390 with the mapping from node devices to instance devices
6395 iname = instance.name
6396 disks = _ExpandCheckDisks(instance, disks)
6398 # With the two passes mechanism we try to reduce the window of
6399 # opportunity for the race condition of switching DRBD to primary
6400 # before handshaking occured, but we do not eliminate it
6402 # The proper fix would be to wait (with some limits) until the
6403 # connection has been made and drbd transitions from WFConnection
6404 # into any other network-connected state (Connected, SyncTarget,
6407 # 1st pass, assemble on all nodes in secondary mode
6408 for idx, inst_disk in enumerate(disks):
6409 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6411 node_disk = node_disk.Copy()
6412 node_disk.UnsetSize()
6413 lu.cfg.SetDiskID(node_disk, node)
6414 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6416 msg = result.fail_msg
6418 is_offline_secondary = (node in instance.secondary_nodes and
6420 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6421 " (is_primary=False, pass=1): %s",
6422 inst_disk.iv_name, node, msg)
6423 if not (ignore_secondaries or is_offline_secondary):
6426 # FIXME: race condition on drbd migration to primary
6428 # 2nd pass, do only the primary node
6429 for idx, inst_disk in enumerate(disks):
6432 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6433 if node != instance.primary_node:
6436 node_disk = node_disk.Copy()
6437 node_disk.UnsetSize()
6438 lu.cfg.SetDiskID(node_disk, node)
6439 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6441 msg = result.fail_msg
6443 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6444 " (is_primary=True, pass=2): %s",
6445 inst_disk.iv_name, node, msg)
6448 dev_path = result.payload
6450 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6452 # leave the disks configured for the primary node
6453 # this is a workaround that would be fixed better by
6454 # improving the logical/physical id handling
6456 lu.cfg.SetDiskID(disk, instance.primary_node)
6458 return disks_ok, device_info
6461 def _StartInstanceDisks(lu, instance, force):
6462 """Start the disks of an instance.
6465 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6466 ignore_secondaries=force)
6468 _ShutdownInstanceDisks(lu, instance)
6469 if force is not None and not force:
6470 lu.proc.LogWarning("", hint="If the message above refers to a"
6472 " you can retry the operation using '--force'.")
6473 raise errors.OpExecError("Disk consistency error")
6476 class LUInstanceDeactivateDisks(NoHooksLU):
6477 """Shutdown an instance's disks.
6482 def ExpandNames(self):
6483 self._ExpandAndLockInstance()
6484 self.needed_locks[locking.LEVEL_NODE] = []
6485 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6487 def DeclareLocks(self, level):
6488 if level == locking.LEVEL_NODE:
6489 self._LockInstancesNodes()
6491 def CheckPrereq(self):
6492 """Check prerequisites.
6494 This checks that the instance is in the cluster.
6497 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6498 assert self.instance is not None, \
6499 "Cannot retrieve locked instance %s" % self.op.instance_name
6501 def Exec(self, feedback_fn):
6502 """Deactivate the disks
6505 instance = self.instance
6507 _ShutdownInstanceDisks(self, instance)
6509 _SafeShutdownInstanceDisks(self, instance)
6512 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6513 """Shutdown block devices of an instance.
6515 This function checks if an instance is running, before calling
6516 _ShutdownInstanceDisks.
6519 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6520 _ShutdownInstanceDisks(lu, instance, disks=disks)
6523 def _ExpandCheckDisks(instance, disks):
6524 """Return the instance disks selected by the disks list
6526 @type disks: list of L{objects.Disk} or None
6527 @param disks: selected disks
6528 @rtype: list of L{objects.Disk}
6529 @return: selected instance disks to act on
6533 return instance.disks
6535 if not set(disks).issubset(instance.disks):
6536 raise errors.ProgrammerError("Can only act on disks belonging to the"
6541 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6542 """Shutdown block devices of an instance.
6544 This does the shutdown on all nodes of the instance.
6546 If the ignore_primary is false, errors on the primary node are
6551 disks = _ExpandCheckDisks(instance, disks)
6554 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6555 lu.cfg.SetDiskID(top_disk, node)
6556 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6557 msg = result.fail_msg
6559 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6560 disk.iv_name, node, msg)
6561 if ((node == instance.primary_node and not ignore_primary) or
6562 (node != instance.primary_node and not result.offline)):
6567 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6568 """Checks if a node has enough free memory.
6570 This function check if a given node has the needed amount of free
6571 memory. In case the node has less memory or we cannot get the
6572 information from the node, this function raise an OpPrereqError
6575 @type lu: C{LogicalUnit}
6576 @param lu: a logical unit from which we get configuration data
6578 @param node: the node to check
6579 @type reason: C{str}
6580 @param reason: string to use in the error message
6581 @type requested: C{int}
6582 @param requested: the amount of memory in MiB to check for
6583 @type hypervisor_name: C{str}
6584 @param hypervisor_name: the hypervisor to ask for memory stats
6586 @return: node current free memory
6587 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6588 we cannot check the node
6591 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6592 nodeinfo[node].Raise("Can't get data from node %s" % node,
6593 prereq=True, ecode=errors.ECODE_ENVIRON)
6594 (_, _, (hv_info, )) = nodeinfo[node].payload
6596 free_mem = hv_info.get("memory_free", None)
6597 if not isinstance(free_mem, int):
6598 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6599 " was '%s'" % (node, free_mem),
6600 errors.ECODE_ENVIRON)
6601 if requested > free_mem:
6602 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6603 " needed %s MiB, available %s MiB" %
6604 (node, reason, requested, free_mem),
6609 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6610 """Checks if nodes have enough free disk space in the all VGs.
6612 This function check if all given nodes have the needed amount of
6613 free disk. In case any node has less disk or we cannot get the
6614 information from the node, this function raise an OpPrereqError
6617 @type lu: C{LogicalUnit}
6618 @param lu: a logical unit from which we get configuration data
6619 @type nodenames: C{list}
6620 @param nodenames: the list of node names to check
6621 @type req_sizes: C{dict}
6622 @param req_sizes: the hash of vg and corresponding amount of disk in
6624 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6625 or we cannot check the node
6628 for vg, req_size in req_sizes.items():
6629 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6632 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6633 """Checks if nodes have enough free disk space in the specified VG.
6635 This function check if all given nodes have the needed amount of
6636 free disk. In case any node has less disk or we cannot get the
6637 information from the node, this function raise an OpPrereqError
6640 @type lu: C{LogicalUnit}
6641 @param lu: a logical unit from which we get configuration data
6642 @type nodenames: C{list}
6643 @param nodenames: the list of node names to check
6645 @param vg: the volume group to check
6646 @type requested: C{int}
6647 @param requested: the amount of disk in MiB to check for
6648 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6649 or we cannot check the node
6652 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6653 for node in nodenames:
6654 info = nodeinfo[node]
6655 info.Raise("Cannot get current information from node %s" % node,
6656 prereq=True, ecode=errors.ECODE_ENVIRON)
6657 (_, (vg_info, ), _) = info.payload
6658 vg_free = vg_info.get("vg_free", None)
6659 if not isinstance(vg_free, int):
6660 raise errors.OpPrereqError("Can't compute free disk space on node"
6661 " %s for vg %s, result was '%s'" %
6662 (node, vg, vg_free), errors.ECODE_ENVIRON)
6663 if requested > vg_free:
6664 raise errors.OpPrereqError("Not enough disk space on target node %s"
6665 " vg %s: required %d MiB, available %d MiB" %
6666 (node, vg, requested, vg_free),
6670 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6671 """Checks if nodes have enough physical CPUs
6673 This function checks if all given nodes have the needed number of
6674 physical CPUs. In case any node has less CPUs or we cannot get the
6675 information from the node, this function raises an OpPrereqError
6678 @type lu: C{LogicalUnit}
6679 @param lu: a logical unit from which we get configuration data
6680 @type nodenames: C{list}
6681 @param nodenames: the list of node names to check
6682 @type requested: C{int}
6683 @param requested: the minimum acceptable number of physical CPUs
6684 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6685 or we cannot check the node
6688 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6689 for node in nodenames:
6690 info = nodeinfo[node]
6691 info.Raise("Cannot get current information from node %s" % node,
6692 prereq=True, ecode=errors.ECODE_ENVIRON)
6693 (_, _, (hv_info, )) = info.payload
6694 num_cpus = hv_info.get("cpu_total", None)
6695 if not isinstance(num_cpus, int):
6696 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6697 " on node %s, result was '%s'" %
6698 (node, num_cpus), errors.ECODE_ENVIRON)
6699 if requested > num_cpus:
6700 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6701 "required" % (node, num_cpus, requested),
6705 class LUInstanceStartup(LogicalUnit):
6706 """Starts an instance.
6709 HPATH = "instance-start"
6710 HTYPE = constants.HTYPE_INSTANCE
6713 def CheckArguments(self):
6715 if self.op.beparams:
6716 # fill the beparams dict
6717 objects.UpgradeBeParams(self.op.beparams)
6718 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6720 def ExpandNames(self):
6721 self._ExpandAndLockInstance()
6722 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6724 def DeclareLocks(self, level):
6725 if level == locking.LEVEL_NODE_RES:
6726 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6728 def BuildHooksEnv(self):
6731 This runs on master, primary and secondary nodes of the instance.
6735 "FORCE": self.op.force,
6738 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6742 def BuildHooksNodes(self):
6743 """Build hooks nodes.
6746 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6749 def CheckPrereq(self):
6750 """Check prerequisites.
6752 This checks that the instance is in the cluster.
6755 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6756 assert self.instance is not None, \
6757 "Cannot retrieve locked instance %s" % self.op.instance_name
6760 if self.op.hvparams:
6761 # check hypervisor parameter syntax (locally)
6762 cluster = self.cfg.GetClusterInfo()
6763 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6764 filled_hvp = cluster.FillHV(instance)
6765 filled_hvp.update(self.op.hvparams)
6766 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6767 hv_type.CheckParameterSyntax(filled_hvp)
6768 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6770 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6772 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6774 if self.primary_offline and self.op.ignore_offline_nodes:
6775 self.proc.LogWarning("Ignoring offline primary node")
6777 if self.op.hvparams or self.op.beparams:
6778 self.proc.LogWarning("Overridden parameters are ignored")
6780 _CheckNodeOnline(self, instance.primary_node)
6782 bep = self.cfg.GetClusterInfo().FillBE(instance)
6783 bep.update(self.op.beparams)
6785 # check bridges existence
6786 _CheckInstanceBridgesExist(self, instance)
6788 remote_info = self.rpc.call_instance_info(instance.primary_node,
6790 instance.hypervisor)
6791 remote_info.Raise("Error checking node %s" % instance.primary_node,
6792 prereq=True, ecode=errors.ECODE_ENVIRON)
6793 if not remote_info.payload: # not running already
6794 _CheckNodeFreeMemory(self, instance.primary_node,
6795 "starting instance %s" % instance.name,
6796 bep[constants.BE_MINMEM], instance.hypervisor)
6798 def Exec(self, feedback_fn):
6799 """Start the instance.
6802 instance = self.instance
6803 force = self.op.force
6805 if not self.op.no_remember:
6806 self.cfg.MarkInstanceUp(instance.name)
6808 if self.primary_offline:
6809 assert self.op.ignore_offline_nodes
6810 self.proc.LogInfo("Primary node offline, marked instance as started")
6812 node_current = instance.primary_node
6814 _StartInstanceDisks(self, instance, force)
6817 self.rpc.call_instance_start(node_current,
6818 (instance, self.op.hvparams,
6820 self.op.startup_paused)
6821 msg = result.fail_msg
6823 _ShutdownInstanceDisks(self, instance)
6824 raise errors.OpExecError("Could not start instance: %s" % msg)
6827 class LUInstanceReboot(LogicalUnit):
6828 """Reboot an instance.
6831 HPATH = "instance-reboot"
6832 HTYPE = constants.HTYPE_INSTANCE
6835 def ExpandNames(self):
6836 self._ExpandAndLockInstance()
6838 def BuildHooksEnv(self):
6841 This runs on master, primary and secondary nodes of the instance.
6845 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6846 "REBOOT_TYPE": self.op.reboot_type,
6847 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6850 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6854 def BuildHooksNodes(self):
6855 """Build hooks nodes.
6858 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6861 def CheckPrereq(self):
6862 """Check prerequisites.
6864 This checks that the instance is in the cluster.
6867 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6868 assert self.instance is not None, \
6869 "Cannot retrieve locked instance %s" % self.op.instance_name
6870 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6871 _CheckNodeOnline(self, instance.primary_node)
6873 # check bridges existence
6874 _CheckInstanceBridgesExist(self, instance)
6876 def Exec(self, feedback_fn):
6877 """Reboot the instance.
6880 instance = self.instance
6881 ignore_secondaries = self.op.ignore_secondaries
6882 reboot_type = self.op.reboot_type
6884 remote_info = self.rpc.call_instance_info(instance.primary_node,
6886 instance.hypervisor)
6887 remote_info.Raise("Error checking node %s" % instance.primary_node)
6888 instance_running = bool(remote_info.payload)
6890 node_current = instance.primary_node
6892 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6893 constants.INSTANCE_REBOOT_HARD]:
6894 for disk in instance.disks:
6895 self.cfg.SetDiskID(disk, node_current)
6896 result = self.rpc.call_instance_reboot(node_current, instance,
6898 self.op.shutdown_timeout)
6899 result.Raise("Could not reboot instance")
6901 if instance_running:
6902 result = self.rpc.call_instance_shutdown(node_current, instance,
6903 self.op.shutdown_timeout)
6904 result.Raise("Could not shutdown instance for full reboot")
6905 _ShutdownInstanceDisks(self, instance)
6907 self.LogInfo("Instance %s was already stopped, starting now",
6909 _StartInstanceDisks(self, instance, ignore_secondaries)
6910 result = self.rpc.call_instance_start(node_current,
6911 (instance, None, None), False)
6912 msg = result.fail_msg
6914 _ShutdownInstanceDisks(self, instance)
6915 raise errors.OpExecError("Could not start instance for"
6916 " full reboot: %s" % msg)
6918 self.cfg.MarkInstanceUp(instance.name)
6921 class LUInstanceShutdown(LogicalUnit):
6922 """Shutdown an instance.
6925 HPATH = "instance-stop"
6926 HTYPE = constants.HTYPE_INSTANCE
6929 def ExpandNames(self):
6930 self._ExpandAndLockInstance()
6932 def BuildHooksEnv(self):
6935 This runs on master, primary and secondary nodes of the instance.
6938 env = _BuildInstanceHookEnvByObject(self, self.instance)
6939 env["TIMEOUT"] = self.op.timeout
6942 def BuildHooksNodes(self):
6943 """Build hooks nodes.
6946 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6949 def CheckPrereq(self):
6950 """Check prerequisites.
6952 This checks that the instance is in the cluster.
6955 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6956 assert self.instance is not None, \
6957 "Cannot retrieve locked instance %s" % self.op.instance_name
6959 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6961 self.primary_offline = \
6962 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6964 if self.primary_offline and self.op.ignore_offline_nodes:
6965 self.proc.LogWarning("Ignoring offline primary node")
6967 _CheckNodeOnline(self, self.instance.primary_node)
6969 def Exec(self, feedback_fn):
6970 """Shutdown the instance.
6973 instance = self.instance
6974 node_current = instance.primary_node
6975 timeout = self.op.timeout
6977 if not self.op.no_remember:
6978 self.cfg.MarkInstanceDown(instance.name)
6980 if self.primary_offline:
6981 assert self.op.ignore_offline_nodes
6982 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6984 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6985 msg = result.fail_msg
6987 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6989 _ShutdownInstanceDisks(self, instance)
6992 class LUInstanceReinstall(LogicalUnit):
6993 """Reinstall an instance.
6996 HPATH = "instance-reinstall"
6997 HTYPE = constants.HTYPE_INSTANCE
7000 def ExpandNames(self):
7001 self._ExpandAndLockInstance()
7003 def BuildHooksEnv(self):
7006 This runs on master, primary and secondary nodes of the instance.
7009 return _BuildInstanceHookEnvByObject(self, self.instance)
7011 def BuildHooksNodes(self):
7012 """Build hooks nodes.
7015 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7018 def CheckPrereq(self):
7019 """Check prerequisites.
7021 This checks that the instance is in the cluster and is not running.
7024 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7025 assert instance is not None, \
7026 "Cannot retrieve locked instance %s" % self.op.instance_name
7027 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7028 " offline, cannot reinstall")
7030 if instance.disk_template == constants.DT_DISKLESS:
7031 raise errors.OpPrereqError("Instance '%s' has no disks" %
7032 self.op.instance_name,
7034 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7036 if self.op.os_type is not None:
7038 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7039 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7040 instance_os = self.op.os_type
7042 instance_os = instance.os
7044 nodelist = list(instance.all_nodes)
7046 if self.op.osparams:
7047 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7048 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7049 self.os_inst = i_osdict # the new dict (without defaults)
7053 self.instance = instance
7055 def Exec(self, feedback_fn):
7056 """Reinstall the instance.
7059 inst = self.instance
7061 if self.op.os_type is not None:
7062 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7063 inst.os = self.op.os_type
7064 # Write to configuration
7065 self.cfg.Update(inst, feedback_fn)
7067 _StartInstanceDisks(self, inst, None)
7069 feedback_fn("Running the instance OS create scripts...")
7070 # FIXME: pass debug option from opcode to backend
7071 result = self.rpc.call_instance_os_add(inst.primary_node,
7072 (inst, self.os_inst), True,
7073 self.op.debug_level)
7074 result.Raise("Could not install OS for instance %s on node %s" %
7075 (inst.name, inst.primary_node))
7077 _ShutdownInstanceDisks(self, inst)
7080 class LUInstanceRecreateDisks(LogicalUnit):
7081 """Recreate an instance's missing disks.
7084 HPATH = "instance-recreate-disks"
7085 HTYPE = constants.HTYPE_INSTANCE
7088 _MODIFYABLE = frozenset([
7089 constants.IDISK_SIZE,
7090 constants.IDISK_MODE,
7093 # New or changed disk parameters may have different semantics
7094 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7095 constants.IDISK_ADOPT,
7097 # TODO: Implement support changing VG while recreating
7099 constants.IDISK_METAVG,
7102 def _RunAllocator(self):
7103 """Run the allocator based on input opcode.
7106 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7109 # The allocator should actually run in "relocate" mode, but current
7110 # allocators don't support relocating all the nodes of an instance at
7111 # the same time. As a workaround we use "allocate" mode, but this is
7112 # suboptimal for two reasons:
7113 # - The instance name passed to the allocator is present in the list of
7114 # existing instances, so there could be a conflict within the
7115 # internal structures of the allocator. This doesn't happen with the
7116 # current allocators, but it's a liability.
7117 # - The allocator counts the resources used by the instance twice: once
7118 # because the instance exists already, and once because it tries to
7119 # allocate a new instance.
7120 # The allocator could choose some of the nodes on which the instance is
7121 # running, but that's not a problem. If the instance nodes are broken,
7122 # they should be already be marked as drained or offline, and hence
7123 # skipped by the allocator. If instance disks have been lost for other
7124 # reasons, then recreating the disks on the same nodes should be fine.
7125 disk_template = self.instance.disk_template
7126 spindle_use = be_full[constants.BE_SPINDLE_USE]
7127 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7128 disk_template=disk_template,
7129 tags=list(self.instance.GetTags()),
7130 os=self.instance.os,
7132 vcpus=be_full[constants.BE_VCPUS],
7133 memory=be_full[constants.BE_MAXMEM],
7134 spindle_use=spindle_use,
7135 disks=[{constants.IDISK_SIZE: d.size,
7136 constants.IDISK_MODE: d.mode}
7137 for d in self.instance.disks],
7138 hypervisor=self.instance.hypervisor)
7139 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7141 ial.Run(self.op.iallocator)
7143 assert req.RequiredNodes() == len(self.instance.all_nodes)
7146 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7147 " %s" % (self.op.iallocator, ial.info),
7150 self.op.nodes = ial.result
7151 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7152 self.op.instance_name, self.op.iallocator,
7153 utils.CommaJoin(ial.result))
7155 def CheckArguments(self):
7156 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7157 # Normalize and convert deprecated list of disk indices
7158 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7160 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7162 raise errors.OpPrereqError("Some disks have been specified more than"
7163 " once: %s" % utils.CommaJoin(duplicates),
7166 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7167 # when neither iallocator nor nodes are specified
7168 if self.op.iallocator or self.op.nodes:
7169 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7171 for (idx, params) in self.op.disks:
7172 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7173 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7175 raise errors.OpPrereqError("Parameters for disk %s try to change"
7176 " unmodifyable parameter(s): %s" %
7177 (idx, utils.CommaJoin(unsupported)),
7180 def ExpandNames(self):
7181 self._ExpandAndLockInstance()
7182 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7184 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7185 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7187 self.needed_locks[locking.LEVEL_NODE] = []
7188 if self.op.iallocator:
7189 # iallocator will select a new node in the same group
7190 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7191 self.needed_locks[locking.LEVEL_NODE_RES] = []
7193 def DeclareLocks(self, level):
7194 if level == locking.LEVEL_NODEGROUP:
7195 assert self.op.iallocator is not None
7196 assert not self.op.nodes
7197 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7198 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7199 # Lock the primary group used by the instance optimistically; this
7200 # requires going via the node before it's locked, requiring
7201 # verification later on
7202 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7203 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7205 elif level == locking.LEVEL_NODE:
7206 # If an allocator is used, then we lock all the nodes in the current
7207 # instance group, as we don't know yet which ones will be selected;
7208 # if we replace the nodes without using an allocator, locks are
7209 # already declared in ExpandNames; otherwise, we need to lock all the
7210 # instance nodes for disk re-creation
7211 if self.op.iallocator:
7212 assert not self.op.nodes
7213 assert not self.needed_locks[locking.LEVEL_NODE]
7214 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7216 # Lock member nodes of the group of the primary node
7217 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7218 self.needed_locks[locking.LEVEL_NODE].extend(
7219 self.cfg.GetNodeGroup(group_uuid).members)
7220 elif not self.op.nodes:
7221 self._LockInstancesNodes(primary_only=False)
7222 elif level == locking.LEVEL_NODE_RES:
7224 self.needed_locks[locking.LEVEL_NODE_RES] = \
7225 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7227 def BuildHooksEnv(self):
7230 This runs on master, primary and secondary nodes of the instance.
7233 return _BuildInstanceHookEnvByObject(self, self.instance)
7235 def BuildHooksNodes(self):
7236 """Build hooks nodes.
7239 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7242 def CheckPrereq(self):
7243 """Check prerequisites.
7245 This checks that the instance is in the cluster and is not running.
7248 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7249 assert instance is not None, \
7250 "Cannot retrieve locked instance %s" % self.op.instance_name
7252 if len(self.op.nodes) != len(instance.all_nodes):
7253 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7254 " %d replacement nodes were specified" %
7255 (instance.name, len(instance.all_nodes),
7256 len(self.op.nodes)),
7258 assert instance.disk_template != constants.DT_DRBD8 or \
7259 len(self.op.nodes) == 2
7260 assert instance.disk_template != constants.DT_PLAIN or \
7261 len(self.op.nodes) == 1
7262 primary_node = self.op.nodes[0]
7264 primary_node = instance.primary_node
7265 if not self.op.iallocator:
7266 _CheckNodeOnline(self, primary_node)
7268 if instance.disk_template == constants.DT_DISKLESS:
7269 raise errors.OpPrereqError("Instance '%s' has no disks" %
7270 self.op.instance_name, errors.ECODE_INVAL)
7272 # Verify if node group locks are still correct
7273 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7275 # Node group locks are acquired only for the primary node (and only
7276 # when the allocator is used)
7277 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7280 # if we replace nodes *and* the old primary is offline, we don't
7281 # check the instance state
7282 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7283 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7284 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7285 msg="cannot recreate disks")
7288 self.disks = dict(self.op.disks)
7290 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7292 maxidx = max(self.disks.keys())
7293 if maxidx >= len(instance.disks):
7294 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7297 if ((self.op.nodes or self.op.iallocator) and
7298 sorted(self.disks.keys()) != range(len(instance.disks))):
7299 raise errors.OpPrereqError("Can't recreate disks partially and"
7300 " change the nodes at the same time",
7303 self.instance = instance
7305 if self.op.iallocator:
7306 self._RunAllocator()
7307 # Release unneeded node and node resource locks
7308 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7309 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7311 def Exec(self, feedback_fn):
7312 """Recreate the disks.
7315 instance = self.instance
7317 assert (self.owned_locks(locking.LEVEL_NODE) ==
7318 self.owned_locks(locking.LEVEL_NODE_RES))
7321 mods = [] # keeps track of needed changes
7323 for idx, disk in enumerate(instance.disks):
7325 changes = self.disks[idx]
7327 # Disk should not be recreated
7331 # update secondaries for disks, if needed
7332 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7333 # need to update the nodes and minors
7334 assert len(self.op.nodes) == 2
7335 assert len(disk.logical_id) == 6 # otherwise disk internals
7337 (_, _, old_port, _, _, old_secret) = disk.logical_id
7338 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7339 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7340 new_minors[0], new_minors[1], old_secret)
7341 assert len(disk.logical_id) == len(new_id)
7345 mods.append((idx, new_id, changes))
7347 # now that we have passed all asserts above, we can apply the mods
7348 # in a single run (to avoid partial changes)
7349 for idx, new_id, changes in mods:
7350 disk = instance.disks[idx]
7351 if new_id is not None:
7352 assert disk.dev_type == constants.LD_DRBD8
7353 disk.logical_id = new_id
7355 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7356 mode=changes.get(constants.IDISK_MODE, None))
7358 # change primary node, if needed
7360 instance.primary_node = self.op.nodes[0]
7361 self.LogWarning("Changing the instance's nodes, you will have to"
7362 " remove any disks left on the older nodes manually")
7365 self.cfg.Update(instance, feedback_fn)
7367 # All touched nodes must be locked
7368 mylocks = self.owned_locks(locking.LEVEL_NODE)
7369 assert mylocks.issuperset(frozenset(instance.all_nodes))
7370 _CreateDisks(self, instance, to_skip=to_skip)
7373 class LUInstanceRename(LogicalUnit):
7374 """Rename an instance.
7377 HPATH = "instance-rename"
7378 HTYPE = constants.HTYPE_INSTANCE
7380 def CheckArguments(self):
7384 if self.op.ip_check and not self.op.name_check:
7385 # TODO: make the ip check more flexible and not depend on the name check
7386 raise errors.OpPrereqError("IP address check requires a name check",
7389 def BuildHooksEnv(self):
7392 This runs on master, primary and secondary nodes of the instance.
7395 env = _BuildInstanceHookEnvByObject(self, self.instance)
7396 env["INSTANCE_NEW_NAME"] = self.op.new_name
7399 def BuildHooksNodes(self):
7400 """Build hooks nodes.
7403 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7406 def CheckPrereq(self):
7407 """Check prerequisites.
7409 This checks that the instance is in the cluster and is not running.
7412 self.op.instance_name = _ExpandInstanceName(self.cfg,
7413 self.op.instance_name)
7414 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7415 assert instance is not None
7416 _CheckNodeOnline(self, instance.primary_node)
7417 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7418 msg="cannot rename")
7419 self.instance = instance
7421 new_name = self.op.new_name
7422 if self.op.name_check:
7423 hostname = netutils.GetHostname(name=new_name)
7424 if hostname.name != new_name:
7425 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7427 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7428 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7429 " same as given hostname '%s'") %
7430 (hostname.name, self.op.new_name),
7432 new_name = self.op.new_name = hostname.name
7433 if (self.op.ip_check and
7434 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7435 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7436 (hostname.ip, new_name),
7437 errors.ECODE_NOTUNIQUE)
7439 instance_list = self.cfg.GetInstanceList()
7440 if new_name in instance_list and new_name != instance.name:
7441 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7442 new_name, errors.ECODE_EXISTS)
7444 def Exec(self, feedback_fn):
7445 """Rename the instance.
7448 inst = self.instance
7449 old_name = inst.name
7451 rename_file_storage = False
7452 if (inst.disk_template in constants.DTS_FILEBASED and
7453 self.op.new_name != inst.name):
7454 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7455 rename_file_storage = True
7457 self.cfg.RenameInstance(inst.name, self.op.new_name)
7458 # Change the instance lock. This is definitely safe while we hold the BGL.
7459 # Otherwise the new lock would have to be added in acquired mode.
7461 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7462 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7464 # re-read the instance from the configuration after rename
7465 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7467 if rename_file_storage:
7468 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7469 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7470 old_file_storage_dir,
7471 new_file_storage_dir)
7472 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7473 " (but the instance has been renamed in Ganeti)" %
7474 (inst.primary_node, old_file_storage_dir,
7475 new_file_storage_dir))
7477 _StartInstanceDisks(self, inst, None)
7479 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7480 old_name, self.op.debug_level)
7481 msg = result.fail_msg
7483 msg = ("Could not run OS rename script for instance %s on node %s"
7484 " (but the instance has been renamed in Ganeti): %s" %
7485 (inst.name, inst.primary_node, msg))
7486 self.proc.LogWarning(msg)
7488 _ShutdownInstanceDisks(self, inst)
7493 class LUInstanceRemove(LogicalUnit):
7494 """Remove an instance.
7497 HPATH = "instance-remove"
7498 HTYPE = constants.HTYPE_INSTANCE
7501 def ExpandNames(self):
7502 self._ExpandAndLockInstance()
7503 self.needed_locks[locking.LEVEL_NODE] = []
7504 self.needed_locks[locking.LEVEL_NODE_RES] = []
7505 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7507 def DeclareLocks(self, level):
7508 if level == locking.LEVEL_NODE:
7509 self._LockInstancesNodes()
7510 elif level == locking.LEVEL_NODE_RES:
7512 self.needed_locks[locking.LEVEL_NODE_RES] = \
7513 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7515 def BuildHooksEnv(self):
7518 This runs on master, primary and secondary nodes of the instance.
7521 env = _BuildInstanceHookEnvByObject(self, self.instance)
7522 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7525 def BuildHooksNodes(self):
7526 """Build hooks nodes.
7529 nl = [self.cfg.GetMasterNode()]
7530 nl_post = list(self.instance.all_nodes) + nl
7531 return (nl, nl_post)
7533 def CheckPrereq(self):
7534 """Check prerequisites.
7536 This checks that the instance is in the cluster.
7539 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7540 assert self.instance is not None, \
7541 "Cannot retrieve locked instance %s" % self.op.instance_name
7543 def Exec(self, feedback_fn):
7544 """Remove the instance.
7547 instance = self.instance
7548 logging.info("Shutting down instance %s on node %s",
7549 instance.name, instance.primary_node)
7551 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7552 self.op.shutdown_timeout)
7553 msg = result.fail_msg
7555 if self.op.ignore_failures:
7556 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7558 raise errors.OpExecError("Could not shutdown instance %s on"
7560 (instance.name, instance.primary_node, msg))
7562 assert (self.owned_locks(locking.LEVEL_NODE) ==
7563 self.owned_locks(locking.LEVEL_NODE_RES))
7564 assert not (set(instance.all_nodes) -
7565 self.owned_locks(locking.LEVEL_NODE)), \
7566 "Not owning correct locks"
7568 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7571 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7572 """Utility function to remove an instance.
7575 logging.info("Removing block devices for instance %s", instance.name)
7577 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7578 if not ignore_failures:
7579 raise errors.OpExecError("Can't remove instance's disks")
7580 feedback_fn("Warning: can't remove instance's disks")
7582 logging.info("Removing instance %s out of cluster config", instance.name)
7584 lu.cfg.RemoveInstance(instance.name)
7586 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7587 "Instance lock removal conflict"
7589 # Remove lock for the instance
7590 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7593 class LUInstanceQuery(NoHooksLU):
7594 """Logical unit for querying instances.
7597 # pylint: disable=W0142
7600 def CheckArguments(self):
7601 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7602 self.op.output_fields, self.op.use_locking)
7604 def ExpandNames(self):
7605 self.iq.ExpandNames(self)
7607 def DeclareLocks(self, level):
7608 self.iq.DeclareLocks(self, level)
7610 def Exec(self, feedback_fn):
7611 return self.iq.OldStyleQuery(self)
7614 class LUInstanceFailover(LogicalUnit):
7615 """Failover an instance.
7618 HPATH = "instance-failover"
7619 HTYPE = constants.HTYPE_INSTANCE
7622 def CheckArguments(self):
7623 """Check the arguments.
7626 self.iallocator = getattr(self.op, "iallocator", None)
7627 self.target_node = getattr(self.op, "target_node", None)
7629 def ExpandNames(self):
7630 self._ExpandAndLockInstance()
7632 if self.op.target_node is not None:
7633 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7635 self.needed_locks[locking.LEVEL_NODE] = []
7636 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7638 self.needed_locks[locking.LEVEL_NODE_RES] = []
7639 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7641 ignore_consistency = self.op.ignore_consistency
7642 shutdown_timeout = self.op.shutdown_timeout
7643 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7646 ignore_consistency=ignore_consistency,
7647 shutdown_timeout=shutdown_timeout,
7648 ignore_ipolicy=self.op.ignore_ipolicy)
7649 self.tasklets = [self._migrater]
7651 def DeclareLocks(self, level):
7652 if level == locking.LEVEL_NODE:
7653 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7654 if instance.disk_template in constants.DTS_EXT_MIRROR:
7655 if self.op.target_node is None:
7656 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7658 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7659 self.op.target_node]
7660 del self.recalculate_locks[locking.LEVEL_NODE]
7662 self._LockInstancesNodes()
7663 elif level == locking.LEVEL_NODE_RES:
7665 self.needed_locks[locking.LEVEL_NODE_RES] = \
7666 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7668 def BuildHooksEnv(self):
7671 This runs on master, primary and secondary nodes of the instance.
7674 instance = self._migrater.instance
7675 source_node = instance.primary_node
7676 target_node = self.op.target_node
7678 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7679 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7680 "OLD_PRIMARY": source_node,
7681 "NEW_PRIMARY": target_node,
7684 if instance.disk_template in constants.DTS_INT_MIRROR:
7685 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7686 env["NEW_SECONDARY"] = source_node
7688 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7690 env.update(_BuildInstanceHookEnvByObject(self, instance))
7694 def BuildHooksNodes(self):
7695 """Build hooks nodes.
7698 instance = self._migrater.instance
7699 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7700 return (nl, nl + [instance.primary_node])
7703 class LUInstanceMigrate(LogicalUnit):
7704 """Migrate an instance.
7706 This is migration without shutting down, compared to the failover,
7707 which is done with shutdown.
7710 HPATH = "instance-migrate"
7711 HTYPE = constants.HTYPE_INSTANCE
7714 def ExpandNames(self):
7715 self._ExpandAndLockInstance()
7717 if self.op.target_node is not None:
7718 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7720 self.needed_locks[locking.LEVEL_NODE] = []
7721 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7723 self.needed_locks[locking.LEVEL_NODE] = []
7724 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7727 TLMigrateInstance(self, self.op.instance_name,
7728 cleanup=self.op.cleanup,
7730 fallback=self.op.allow_failover,
7731 allow_runtime_changes=self.op.allow_runtime_changes,
7732 ignore_ipolicy=self.op.ignore_ipolicy)
7733 self.tasklets = [self._migrater]
7735 def DeclareLocks(self, level):
7736 if level == locking.LEVEL_NODE:
7737 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7738 if instance.disk_template in constants.DTS_EXT_MIRROR:
7739 if self.op.target_node is None:
7740 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7742 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7743 self.op.target_node]
7744 del self.recalculate_locks[locking.LEVEL_NODE]
7746 self._LockInstancesNodes()
7747 elif level == locking.LEVEL_NODE_RES:
7749 self.needed_locks[locking.LEVEL_NODE_RES] = \
7750 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7752 def BuildHooksEnv(self):
7755 This runs on master, primary and secondary nodes of the instance.
7758 instance = self._migrater.instance
7759 source_node = instance.primary_node
7760 target_node = self.op.target_node
7761 env = _BuildInstanceHookEnvByObject(self, instance)
7763 "MIGRATE_LIVE": self._migrater.live,
7764 "MIGRATE_CLEANUP": self.op.cleanup,
7765 "OLD_PRIMARY": source_node,
7766 "NEW_PRIMARY": target_node,
7767 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7770 if instance.disk_template in constants.DTS_INT_MIRROR:
7771 env["OLD_SECONDARY"] = target_node
7772 env["NEW_SECONDARY"] = source_node
7774 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7778 def BuildHooksNodes(self):
7779 """Build hooks nodes.
7782 instance = self._migrater.instance
7783 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7784 return (nl, nl + [instance.primary_node])
7787 class LUInstanceMove(LogicalUnit):
7788 """Move an instance by data-copying.
7791 HPATH = "instance-move"
7792 HTYPE = constants.HTYPE_INSTANCE
7795 def ExpandNames(self):
7796 self._ExpandAndLockInstance()
7797 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7798 self.op.target_node = target_node
7799 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7800 self.needed_locks[locking.LEVEL_NODE_RES] = []
7801 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7803 def DeclareLocks(self, level):
7804 if level == locking.LEVEL_NODE:
7805 self._LockInstancesNodes(primary_only=True)
7806 elif level == locking.LEVEL_NODE_RES:
7808 self.needed_locks[locking.LEVEL_NODE_RES] = \
7809 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7811 def BuildHooksEnv(self):
7814 This runs on master, primary and secondary nodes of the instance.
7818 "TARGET_NODE": self.op.target_node,
7819 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7821 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7824 def BuildHooksNodes(self):
7825 """Build hooks nodes.
7829 self.cfg.GetMasterNode(),
7830 self.instance.primary_node,
7831 self.op.target_node,
7835 def CheckPrereq(self):
7836 """Check prerequisites.
7838 This checks that the instance is in the cluster.
7841 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7842 assert self.instance is not None, \
7843 "Cannot retrieve locked instance %s" % self.op.instance_name
7845 node = self.cfg.GetNodeInfo(self.op.target_node)
7846 assert node is not None, \
7847 "Cannot retrieve locked node %s" % self.op.target_node
7849 self.target_node = target_node = node.name
7851 if target_node == instance.primary_node:
7852 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7853 (instance.name, target_node),
7856 bep = self.cfg.GetClusterInfo().FillBE(instance)
7858 for idx, dsk in enumerate(instance.disks):
7859 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7860 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7861 " cannot copy" % idx, errors.ECODE_STATE)
7863 _CheckNodeOnline(self, target_node)
7864 _CheckNodeNotDrained(self, target_node)
7865 _CheckNodeVmCapable(self, target_node)
7866 cluster = self.cfg.GetClusterInfo()
7867 group_info = self.cfg.GetNodeGroup(node.group)
7868 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7869 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7870 ignore=self.op.ignore_ipolicy)
7872 if instance.admin_state == constants.ADMINST_UP:
7873 # check memory requirements on the secondary node
7874 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7875 instance.name, bep[constants.BE_MAXMEM],
7876 instance.hypervisor)
7878 self.LogInfo("Not checking memory on the secondary node as"
7879 " instance will not be started")
7881 # check bridge existance
7882 _CheckInstanceBridgesExist(self, instance, node=target_node)
7884 def Exec(self, feedback_fn):
7885 """Move an instance.
7887 The move is done by shutting it down on its present node, copying
7888 the data over (slow) and starting it on the new node.
7891 instance = self.instance
7893 source_node = instance.primary_node
7894 target_node = self.target_node
7896 self.LogInfo("Shutting down instance %s on source node %s",
7897 instance.name, source_node)
7899 assert (self.owned_locks(locking.LEVEL_NODE) ==
7900 self.owned_locks(locking.LEVEL_NODE_RES))
7902 result = self.rpc.call_instance_shutdown(source_node, instance,
7903 self.op.shutdown_timeout)
7904 msg = result.fail_msg
7906 if self.op.ignore_consistency:
7907 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7908 " Proceeding anyway. Please make sure node"
7909 " %s is down. Error details: %s",
7910 instance.name, source_node, source_node, msg)
7912 raise errors.OpExecError("Could not shutdown instance %s on"
7914 (instance.name, source_node, msg))
7916 # create the target disks
7918 _CreateDisks(self, instance, target_node=target_node)
7919 except errors.OpExecError:
7920 self.LogWarning("Device creation failed, reverting...")
7922 _RemoveDisks(self, instance, target_node=target_node)
7924 self.cfg.ReleaseDRBDMinors(instance.name)
7927 cluster_name = self.cfg.GetClusterInfo().cluster_name
7930 # activate, get path, copy the data over
7931 for idx, disk in enumerate(instance.disks):
7932 self.LogInfo("Copying data for disk %d", idx)
7933 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7934 instance.name, True, idx)
7936 self.LogWarning("Can't assemble newly created disk %d: %s",
7937 idx, result.fail_msg)
7938 errs.append(result.fail_msg)
7940 dev_path = result.payload
7941 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7942 target_node, dev_path,
7945 self.LogWarning("Can't copy data over for disk %d: %s",
7946 idx, result.fail_msg)
7947 errs.append(result.fail_msg)
7951 self.LogWarning("Some disks failed to copy, aborting")
7953 _RemoveDisks(self, instance, target_node=target_node)
7955 self.cfg.ReleaseDRBDMinors(instance.name)
7956 raise errors.OpExecError("Errors during disk copy: %s" %
7959 instance.primary_node = target_node
7960 self.cfg.Update(instance, feedback_fn)
7962 self.LogInfo("Removing the disks on the original node")
7963 _RemoveDisks(self, instance, target_node=source_node)
7965 # Only start the instance if it's marked as up
7966 if instance.admin_state == constants.ADMINST_UP:
7967 self.LogInfo("Starting instance %s on node %s",
7968 instance.name, target_node)
7970 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7971 ignore_secondaries=True)
7973 _ShutdownInstanceDisks(self, instance)
7974 raise errors.OpExecError("Can't activate the instance's disks")
7976 result = self.rpc.call_instance_start(target_node,
7977 (instance, None, None), False)
7978 msg = result.fail_msg
7980 _ShutdownInstanceDisks(self, instance)
7981 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7982 (instance.name, target_node, msg))
7985 class LUNodeMigrate(LogicalUnit):
7986 """Migrate all instances from a node.
7989 HPATH = "node-migrate"
7990 HTYPE = constants.HTYPE_NODE
7993 def CheckArguments(self):
7996 def ExpandNames(self):
7997 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7999 self.share_locks = _ShareAll()
8000 self.needed_locks = {
8001 locking.LEVEL_NODE: [self.op.node_name],
8004 def BuildHooksEnv(self):
8007 This runs on the master, the primary and all the secondaries.
8011 "NODE_NAME": self.op.node_name,
8012 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8015 def BuildHooksNodes(self):
8016 """Build hooks nodes.
8019 nl = [self.cfg.GetMasterNode()]
8022 def CheckPrereq(self):
8025 def Exec(self, feedback_fn):
8026 # Prepare jobs for migration instances
8027 allow_runtime_changes = self.op.allow_runtime_changes
8029 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8032 iallocator=self.op.iallocator,
8033 target_node=self.op.target_node,
8034 allow_runtime_changes=allow_runtime_changes,
8035 ignore_ipolicy=self.op.ignore_ipolicy)]
8036 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8039 # TODO: Run iallocator in this opcode and pass correct placement options to
8040 # OpInstanceMigrate. Since other jobs can modify the cluster between
8041 # running the iallocator and the actual migration, a good consistency model
8042 # will have to be found.
8044 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8045 frozenset([self.op.node_name]))
8047 return ResultWithJobs(jobs)
8050 class TLMigrateInstance(Tasklet):
8051 """Tasklet class for instance migration.
8054 @ivar live: whether the migration will be done live or non-live;
8055 this variable is initalized only after CheckPrereq has run
8056 @type cleanup: boolean
8057 @ivar cleanup: Wheater we cleanup from a failed migration
8058 @type iallocator: string
8059 @ivar iallocator: The iallocator used to determine target_node
8060 @type target_node: string
8061 @ivar target_node: If given, the target_node to reallocate the instance to
8062 @type failover: boolean
8063 @ivar failover: Whether operation results in failover or migration
8064 @type fallback: boolean
8065 @ivar fallback: Whether fallback to failover is allowed if migration not
8067 @type ignore_consistency: boolean
8068 @ivar ignore_consistency: Wheter we should ignore consistency between source
8070 @type shutdown_timeout: int
8071 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8072 @type ignore_ipolicy: bool
8073 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8078 _MIGRATION_POLL_INTERVAL = 1 # seconds
8079 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8081 def __init__(self, lu, instance_name, cleanup=False,
8082 failover=False, fallback=False,
8083 ignore_consistency=False,
8084 allow_runtime_changes=True,
8085 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8086 ignore_ipolicy=False):
8087 """Initializes this class.
8090 Tasklet.__init__(self, lu)
8093 self.instance_name = instance_name
8094 self.cleanup = cleanup
8095 self.live = False # will be overridden later
8096 self.failover = failover
8097 self.fallback = fallback
8098 self.ignore_consistency = ignore_consistency
8099 self.shutdown_timeout = shutdown_timeout
8100 self.ignore_ipolicy = ignore_ipolicy
8101 self.allow_runtime_changes = allow_runtime_changes
8103 def CheckPrereq(self):
8104 """Check prerequisites.
8106 This checks that the instance is in the cluster.
8109 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8110 instance = self.cfg.GetInstanceInfo(instance_name)
8111 assert instance is not None
8112 self.instance = instance
8113 cluster = self.cfg.GetClusterInfo()
8115 if (not self.cleanup and
8116 not instance.admin_state == constants.ADMINST_UP and
8117 not self.failover and self.fallback):
8118 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8119 " switching to failover")
8120 self.failover = True
8122 if instance.disk_template not in constants.DTS_MIRRORED:
8127 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8128 " %s" % (instance.disk_template, text),
8131 if instance.disk_template in constants.DTS_EXT_MIRROR:
8132 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8134 if self.lu.op.iallocator:
8135 self._RunAllocator()
8137 # We set set self.target_node as it is required by
8139 self.target_node = self.lu.op.target_node
8141 # Check that the target node is correct in terms of instance policy
8142 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8143 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8144 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8146 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8147 ignore=self.ignore_ipolicy)
8149 # self.target_node is already populated, either directly or by the
8151 target_node = self.target_node
8152 if self.target_node == instance.primary_node:
8153 raise errors.OpPrereqError("Cannot migrate instance %s"
8154 " to its primary (%s)" %
8155 (instance.name, instance.primary_node),
8158 if len(self.lu.tasklets) == 1:
8159 # It is safe to release locks only when we're the only tasklet
8161 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8162 keep=[instance.primary_node, self.target_node])
8165 secondary_nodes = instance.secondary_nodes
8166 if not secondary_nodes:
8167 raise errors.ConfigurationError("No secondary node but using"
8168 " %s disk template" %
8169 instance.disk_template)
8170 target_node = secondary_nodes[0]
8171 if self.lu.op.iallocator or (self.lu.op.target_node and
8172 self.lu.op.target_node != target_node):
8174 text = "failed over"
8177 raise errors.OpPrereqError("Instances with disk template %s cannot"
8178 " be %s to arbitrary nodes"
8179 " (neither an iallocator nor a target"
8180 " node can be passed)" %
8181 (instance.disk_template, text),
8183 nodeinfo = self.cfg.GetNodeInfo(target_node)
8184 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8185 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8187 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8188 ignore=self.ignore_ipolicy)
8190 i_be = cluster.FillBE(instance)
8192 # check memory requirements on the secondary node
8193 if (not self.cleanup and
8194 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8195 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8196 "migrating instance %s" %
8198 i_be[constants.BE_MINMEM],
8199 instance.hypervisor)
8201 self.lu.LogInfo("Not checking memory on the secondary node as"
8202 " instance will not be started")
8204 # check if failover must be forced instead of migration
8205 if (not self.cleanup and not self.failover and
8206 i_be[constants.BE_ALWAYS_FAILOVER]):
8207 self.lu.LogInfo("Instance configured to always failover; fallback"
8209 self.failover = True
8211 # check bridge existance
8212 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8214 if not self.cleanup:
8215 _CheckNodeNotDrained(self.lu, target_node)
8216 if not self.failover:
8217 result = self.rpc.call_instance_migratable(instance.primary_node,
8219 if result.fail_msg and self.fallback:
8220 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8222 self.failover = True
8224 result.Raise("Can't migrate, please use failover",
8225 prereq=True, ecode=errors.ECODE_STATE)
8227 assert not (self.failover and self.cleanup)
8229 if not self.failover:
8230 if self.lu.op.live is not None and self.lu.op.mode is not None:
8231 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8232 " parameters are accepted",
8234 if self.lu.op.live is not None:
8236 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8238 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8239 # reset the 'live' parameter to None so that repeated
8240 # invocations of CheckPrereq do not raise an exception
8241 self.lu.op.live = None
8242 elif self.lu.op.mode is None:
8243 # read the default value from the hypervisor
8244 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8245 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8247 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8249 # Failover is never live
8252 if not (self.failover or self.cleanup):
8253 remote_info = self.rpc.call_instance_info(instance.primary_node,
8255 instance.hypervisor)
8256 remote_info.Raise("Error checking instance on node %s" %
8257 instance.primary_node)
8258 instance_running = bool(remote_info.payload)
8259 if instance_running:
8260 self.current_mem = int(remote_info.payload["memory"])
8262 def _RunAllocator(self):
8263 """Run the allocator based on input opcode.
8266 # FIXME: add a self.ignore_ipolicy option
8267 req = iallocator.IAReqRelocate(name=self.instance_name,
8268 relocate_from=[self.instance.primary_node])
8269 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8271 ial.Run(self.lu.op.iallocator)
8274 raise errors.OpPrereqError("Can't compute nodes using"
8275 " iallocator '%s': %s" %
8276 (self.lu.op.iallocator, ial.info),
8278 self.target_node = ial.result[0]
8279 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8280 self.instance_name, self.lu.op.iallocator,
8281 utils.CommaJoin(ial.result))
8283 def _WaitUntilSync(self):
8284 """Poll with custom rpc for disk sync.
8286 This uses our own step-based rpc call.
8289 self.feedback_fn("* wait until resync is done")
8293 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8295 (self.instance.disks,
8298 for node, nres in result.items():
8299 nres.Raise("Cannot resync disks on node %s" % node)
8300 node_done, node_percent = nres.payload
8301 all_done = all_done and node_done
8302 if node_percent is not None:
8303 min_percent = min(min_percent, node_percent)
8305 if min_percent < 100:
8306 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8309 def _EnsureSecondary(self, node):
8310 """Demote a node to secondary.
8313 self.feedback_fn("* switching node %s to secondary mode" % node)
8315 for dev in self.instance.disks:
8316 self.cfg.SetDiskID(dev, node)
8318 result = self.rpc.call_blockdev_close(node, self.instance.name,
8319 self.instance.disks)
8320 result.Raise("Cannot change disk to secondary on node %s" % node)
8322 def _GoStandalone(self):
8323 """Disconnect from the network.
8326 self.feedback_fn("* changing into standalone mode")
8327 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8328 self.instance.disks)
8329 for node, nres in result.items():
8330 nres.Raise("Cannot disconnect disks node %s" % node)
8332 def _GoReconnect(self, multimaster):
8333 """Reconnect to the network.
8339 msg = "single-master"
8340 self.feedback_fn("* changing disks into %s mode" % msg)
8341 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8342 (self.instance.disks, self.instance),
8343 self.instance.name, multimaster)
8344 for node, nres in result.items():
8345 nres.Raise("Cannot change disks config on node %s" % node)
8347 def _ExecCleanup(self):
8348 """Try to cleanup after a failed migration.
8350 The cleanup is done by:
8351 - check that the instance is running only on one node
8352 (and update the config if needed)
8353 - change disks on its secondary node to secondary
8354 - wait until disks are fully synchronized
8355 - disconnect from the network
8356 - change disks into single-master mode
8357 - wait again until disks are fully synchronized
8360 instance = self.instance
8361 target_node = self.target_node
8362 source_node = self.source_node
8364 # check running on only one node
8365 self.feedback_fn("* checking where the instance actually runs"
8366 " (if this hangs, the hypervisor might be in"
8368 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8369 for node, result in ins_l.items():
8370 result.Raise("Can't contact node %s" % node)
8372 runningon_source = instance.name in ins_l[source_node].payload
8373 runningon_target = instance.name in ins_l[target_node].payload
8375 if runningon_source and runningon_target:
8376 raise errors.OpExecError("Instance seems to be running on two nodes,"
8377 " or the hypervisor is confused; you will have"
8378 " to ensure manually that it runs only on one"
8379 " and restart this operation")
8381 if not (runningon_source or runningon_target):
8382 raise errors.OpExecError("Instance does not seem to be running at all;"
8383 " in this case it's safer to repair by"
8384 " running 'gnt-instance stop' to ensure disk"
8385 " shutdown, and then restarting it")
8387 if runningon_target:
8388 # the migration has actually succeeded, we need to update the config
8389 self.feedback_fn("* instance running on secondary node (%s),"
8390 " updating config" % target_node)
8391 instance.primary_node = target_node
8392 self.cfg.Update(instance, self.feedback_fn)
8393 demoted_node = source_node
8395 self.feedback_fn("* instance confirmed to be running on its"
8396 " primary node (%s)" % source_node)
8397 demoted_node = target_node
8399 if instance.disk_template in constants.DTS_INT_MIRROR:
8400 self._EnsureSecondary(demoted_node)
8402 self._WaitUntilSync()
8403 except errors.OpExecError:
8404 # we ignore here errors, since if the device is standalone, it
8405 # won't be able to sync
8407 self._GoStandalone()
8408 self._GoReconnect(False)
8409 self._WaitUntilSync()
8411 self.feedback_fn("* done")
8413 def _RevertDiskStatus(self):
8414 """Try to revert the disk status after a failed migration.
8417 target_node = self.target_node
8418 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8422 self._EnsureSecondary(target_node)
8423 self._GoStandalone()
8424 self._GoReconnect(False)
8425 self._WaitUntilSync()
8426 except errors.OpExecError, err:
8427 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8428 " please try to recover the instance manually;"
8429 " error '%s'" % str(err))
8431 def _AbortMigration(self):
8432 """Call the hypervisor code to abort a started migration.
8435 instance = self.instance
8436 target_node = self.target_node
8437 source_node = self.source_node
8438 migration_info = self.migration_info
8440 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8444 abort_msg = abort_result.fail_msg
8446 logging.error("Aborting migration failed on target node %s: %s",
8447 target_node, abort_msg)
8448 # Don't raise an exception here, as we stil have to try to revert the
8449 # disk status, even if this step failed.
8451 abort_result = self.rpc.call_instance_finalize_migration_src(
8452 source_node, instance, False, self.live)
8453 abort_msg = abort_result.fail_msg
8455 logging.error("Aborting migration failed on source node %s: %s",
8456 source_node, abort_msg)
8458 def _ExecMigration(self):
8459 """Migrate an instance.
8461 The migrate is done by:
8462 - change the disks into dual-master mode
8463 - wait until disks are fully synchronized again
8464 - migrate the instance
8465 - change disks on the new secondary node (the old primary) to secondary
8466 - wait until disks are fully synchronized
8467 - change disks into single-master mode
8470 instance = self.instance
8471 target_node = self.target_node
8472 source_node = self.source_node
8474 # Check for hypervisor version mismatch and warn the user.
8475 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8476 None, [self.instance.hypervisor])
8477 for ninfo in nodeinfo.values():
8478 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8480 (_, _, (src_info, )) = nodeinfo[source_node].payload
8481 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8483 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8484 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8485 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8486 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8487 if src_version != dst_version:
8488 self.feedback_fn("* warning: hypervisor version mismatch between"
8489 " source (%s) and target (%s) node" %
8490 (src_version, dst_version))
8492 self.feedback_fn("* checking disk consistency between source and target")
8493 for (idx, dev) in enumerate(instance.disks):
8494 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8495 raise errors.OpExecError("Disk %s is degraded or not fully"
8496 " synchronized on target node,"
8497 " aborting migration" % idx)
8499 if self.current_mem > self.tgt_free_mem:
8500 if not self.allow_runtime_changes:
8501 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8502 " free memory to fit instance %s on target"
8503 " node %s (have %dMB, need %dMB)" %
8504 (instance.name, target_node,
8505 self.tgt_free_mem, self.current_mem))
8506 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8507 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8510 rpcres.Raise("Cannot modify instance runtime memory")
8512 # First get the migration information from the remote node
8513 result = self.rpc.call_migration_info(source_node, instance)
8514 msg = result.fail_msg
8516 log_err = ("Failed fetching source migration information from %s: %s" %
8518 logging.error(log_err)
8519 raise errors.OpExecError(log_err)
8521 self.migration_info = migration_info = result.payload
8523 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8524 # Then switch the disks to master/master mode
8525 self._EnsureSecondary(target_node)
8526 self._GoStandalone()
8527 self._GoReconnect(True)
8528 self._WaitUntilSync()
8530 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8531 result = self.rpc.call_accept_instance(target_node,
8534 self.nodes_ip[target_node])
8536 msg = result.fail_msg
8538 logging.error("Instance pre-migration failed, trying to revert"
8539 " disk status: %s", msg)
8540 self.feedback_fn("Pre-migration failed, aborting")
8541 self._AbortMigration()
8542 self._RevertDiskStatus()
8543 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8544 (instance.name, msg))
8546 self.feedback_fn("* migrating instance to %s" % target_node)
8547 result = self.rpc.call_instance_migrate(source_node, instance,
8548 self.nodes_ip[target_node],
8550 msg = result.fail_msg
8552 logging.error("Instance migration failed, trying to revert"
8553 " disk status: %s", msg)
8554 self.feedback_fn("Migration failed, aborting")
8555 self._AbortMigration()
8556 self._RevertDiskStatus()
8557 raise errors.OpExecError("Could not migrate instance %s: %s" %
8558 (instance.name, msg))
8560 self.feedback_fn("* starting memory transfer")
8561 last_feedback = time.time()
8563 result = self.rpc.call_instance_get_migration_status(source_node,
8565 msg = result.fail_msg
8566 ms = result.payload # MigrationStatus instance
8567 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8568 logging.error("Instance migration failed, trying to revert"
8569 " disk status: %s", msg)
8570 self.feedback_fn("Migration failed, aborting")
8571 self._AbortMigration()
8572 self._RevertDiskStatus()
8573 raise errors.OpExecError("Could not migrate instance %s: %s" %
8574 (instance.name, msg))
8576 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8577 self.feedback_fn("* memory transfer complete")
8580 if (utils.TimeoutExpired(last_feedback,
8581 self._MIGRATION_FEEDBACK_INTERVAL) and
8582 ms.transferred_ram is not None):
8583 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8584 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8585 last_feedback = time.time()
8587 time.sleep(self._MIGRATION_POLL_INTERVAL)
8589 result = self.rpc.call_instance_finalize_migration_src(source_node,
8593 msg = result.fail_msg
8595 logging.error("Instance migration succeeded, but finalization failed"
8596 " on the source node: %s", msg)
8597 raise errors.OpExecError("Could not finalize instance migration: %s" %
8600 instance.primary_node = target_node
8602 # distribute new instance config to the other nodes
8603 self.cfg.Update(instance, self.feedback_fn)
8605 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8609 msg = result.fail_msg
8611 logging.error("Instance migration succeeded, but finalization failed"
8612 " on the target node: %s", msg)
8613 raise errors.OpExecError("Could not finalize instance migration: %s" %
8616 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8617 self._EnsureSecondary(source_node)
8618 self._WaitUntilSync()
8619 self._GoStandalone()
8620 self._GoReconnect(False)
8621 self._WaitUntilSync()
8623 # If the instance's disk template is `rbd' and there was a successful
8624 # migration, unmap the device from the source node.
8625 if self.instance.disk_template == constants.DT_RBD:
8626 disks = _ExpandCheckDisks(instance, instance.disks)
8627 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8629 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8630 msg = result.fail_msg
8632 logging.error("Migration was successful, but couldn't unmap the"
8633 " block device %s on source node %s: %s",
8634 disk.iv_name, source_node, msg)
8635 logging.error("You need to unmap the device %s manually on %s",
8636 disk.iv_name, source_node)
8638 self.feedback_fn("* done")
8640 def _ExecFailover(self):
8641 """Failover an instance.
8643 The failover is done by shutting it down on its present node and
8644 starting it on the secondary.
8647 instance = self.instance
8648 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8650 source_node = instance.primary_node
8651 target_node = self.target_node
8653 if instance.admin_state == constants.ADMINST_UP:
8654 self.feedback_fn("* checking disk consistency between source and target")
8655 for (idx, dev) in enumerate(instance.disks):
8656 # for drbd, these are drbd over lvm
8657 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8659 if primary_node.offline:
8660 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8662 (primary_node.name, idx, target_node))
8663 elif not self.ignore_consistency:
8664 raise errors.OpExecError("Disk %s is degraded on target node,"
8665 " aborting failover" % idx)
8667 self.feedback_fn("* not checking disk consistency as instance is not"
8670 self.feedback_fn("* shutting down instance on source node")
8671 logging.info("Shutting down instance %s on node %s",
8672 instance.name, source_node)
8674 result = self.rpc.call_instance_shutdown(source_node, instance,
8675 self.shutdown_timeout)
8676 msg = result.fail_msg
8678 if self.ignore_consistency or primary_node.offline:
8679 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8680 " proceeding anyway; please make sure node"
8681 " %s is down; error details: %s",
8682 instance.name, source_node, source_node, msg)
8684 raise errors.OpExecError("Could not shutdown instance %s on"
8686 (instance.name, source_node, msg))
8688 self.feedback_fn("* deactivating the instance's disks on source node")
8689 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8690 raise errors.OpExecError("Can't shut down the instance's disks")
8692 instance.primary_node = target_node
8693 # distribute new instance config to the other nodes
8694 self.cfg.Update(instance, self.feedback_fn)
8696 # Only start the instance if it's marked as up
8697 if instance.admin_state == constants.ADMINST_UP:
8698 self.feedback_fn("* activating the instance's disks on target node %s" %
8700 logging.info("Starting instance %s on node %s",
8701 instance.name, target_node)
8703 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8704 ignore_secondaries=True)
8706 _ShutdownInstanceDisks(self.lu, instance)
8707 raise errors.OpExecError("Can't activate the instance's disks")
8709 self.feedback_fn("* starting the instance on the target node %s" %
8711 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8713 msg = result.fail_msg
8715 _ShutdownInstanceDisks(self.lu, instance)
8716 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8717 (instance.name, target_node, msg))
8719 def Exec(self, feedback_fn):
8720 """Perform the migration.
8723 self.feedback_fn = feedback_fn
8724 self.source_node = self.instance.primary_node
8726 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8727 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8728 self.target_node = self.instance.secondary_nodes[0]
8729 # Otherwise self.target_node has been populated either
8730 # directly, or through an iallocator.
8732 self.all_nodes = [self.source_node, self.target_node]
8733 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8734 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8737 feedback_fn("Failover instance %s" % self.instance.name)
8738 self._ExecFailover()
8740 feedback_fn("Migrating instance %s" % self.instance.name)
8743 return self._ExecCleanup()
8745 return self._ExecMigration()
8748 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8750 """Wrapper around L{_CreateBlockDevInner}.
8752 This method annotates the root device first.
8755 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8756 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8760 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8762 """Create a tree of block devices on a given node.
8764 If this device type has to be created on secondaries, create it and
8767 If not, just recurse to children keeping the same 'force' value.
8769 @attention: The device has to be annotated already.
8771 @param lu: the lu on whose behalf we execute
8772 @param node: the node on which to create the device
8773 @type instance: L{objects.Instance}
8774 @param instance: the instance which owns the device
8775 @type device: L{objects.Disk}
8776 @param device: the device to create
8777 @type force_create: boolean
8778 @param force_create: whether to force creation of this device; this
8779 will be change to True whenever we find a device which has
8780 CreateOnSecondary() attribute
8781 @param info: the extra 'metadata' we should attach to the device
8782 (this will be represented as a LVM tag)
8783 @type force_open: boolean
8784 @param force_open: this parameter will be passes to the
8785 L{backend.BlockdevCreate} function where it specifies
8786 whether we run on primary or not, and it affects both
8787 the child assembly and the device own Open() execution
8790 if device.CreateOnSecondary():
8794 for child in device.children:
8795 _CreateBlockDevInner(lu, node, instance, child, force_create,
8798 if not force_create:
8801 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8804 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8805 """Create a single block device on a given node.
8807 This will not recurse over children of the device, so they must be
8810 @param lu: the lu on whose behalf we execute
8811 @param node: the node on which to create the device
8812 @type instance: L{objects.Instance}
8813 @param instance: the instance which owns the device
8814 @type device: L{objects.Disk}
8815 @param device: the device to create
8816 @param info: the extra 'metadata' we should attach to the device
8817 (this will be represented as a LVM tag)
8818 @type force_open: boolean
8819 @param force_open: this parameter will be passes to the
8820 L{backend.BlockdevCreate} function where it specifies
8821 whether we run on primary or not, and it affects both
8822 the child assembly and the device own Open() execution
8825 lu.cfg.SetDiskID(device, node)
8826 result = lu.rpc.call_blockdev_create(node, device, device.size,
8827 instance.name, force_open, info)
8828 result.Raise("Can't create block device %s on"
8829 " node %s for instance %s" % (device, node, instance.name))
8830 if device.physical_id is None:
8831 device.physical_id = result.payload
8834 def _GenerateUniqueNames(lu, exts):
8835 """Generate a suitable LV name.
8837 This will generate a logical volume name for the given instance.
8842 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8843 results.append("%s%s" % (new_id, val))
8847 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8848 iv_name, p_minor, s_minor):
8849 """Generate a drbd8 device complete with its children.
8852 assert len(vgnames) == len(names) == 2
8853 port = lu.cfg.AllocatePort()
8854 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8856 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8857 logical_id=(vgnames[0], names[0]),
8859 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8860 size=constants.DRBD_META_SIZE,
8861 logical_id=(vgnames[1], names[1]),
8863 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8864 logical_id=(primary, secondary, port,
8867 children=[dev_data, dev_meta],
8868 iv_name=iv_name, params={})
8872 _DISK_TEMPLATE_NAME_PREFIX = {
8873 constants.DT_PLAIN: "",
8874 constants.DT_RBD: ".rbd",
8878 _DISK_TEMPLATE_DEVICE_TYPE = {
8879 constants.DT_PLAIN: constants.LD_LV,
8880 constants.DT_FILE: constants.LD_FILE,
8881 constants.DT_SHARED_FILE: constants.LD_FILE,
8882 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8883 constants.DT_RBD: constants.LD_RBD,
8887 def _GenerateDiskTemplate(
8888 lu, template_name, instance_name, primary_node, secondary_nodes,
8889 disk_info, file_storage_dir, file_driver, base_index,
8890 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8891 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8892 """Generate the entire disk layout for a given template type.
8895 #TODO: compute space requirements
8897 vgname = lu.cfg.GetVGName()
8898 disk_count = len(disk_info)
8901 if template_name == constants.DT_DISKLESS:
8903 elif template_name == constants.DT_DRBD8:
8904 if len(secondary_nodes) != 1:
8905 raise errors.ProgrammerError("Wrong template configuration")
8906 remote_node = secondary_nodes[0]
8907 minors = lu.cfg.AllocateDRBDMinor(
8908 [primary_node, remote_node] * len(disk_info), instance_name)
8910 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8912 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8915 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8916 for i in range(disk_count)]):
8917 names.append(lv_prefix + "_data")
8918 names.append(lv_prefix + "_meta")
8919 for idx, disk in enumerate(disk_info):
8920 disk_index = idx + base_index
8921 data_vg = disk.get(constants.IDISK_VG, vgname)
8922 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8923 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8924 disk[constants.IDISK_SIZE],
8926 names[idx * 2:idx * 2 + 2],
8927 "disk/%d" % disk_index,
8928 minors[idx * 2], minors[idx * 2 + 1])
8929 disk_dev.mode = disk[constants.IDISK_MODE]
8930 disks.append(disk_dev)
8933 raise errors.ProgrammerError("Wrong template configuration")
8935 if template_name == constants.DT_FILE:
8937 elif template_name == constants.DT_SHARED_FILE:
8938 _req_shr_file_storage()
8940 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8941 if name_prefix is None:
8944 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8945 (name_prefix, base_index + i)
8946 for i in range(disk_count)])
8948 if template_name == constants.DT_PLAIN:
8949 def logical_id_fn(idx, _, disk):
8950 vg = disk.get(constants.IDISK_VG, vgname)
8951 return (vg, names[idx])
8952 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8954 lambda _, disk_index, disk: (file_driver,
8955 "%s/disk%d" % (file_storage_dir,
8957 elif template_name == constants.DT_BLOCK:
8959 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8960 disk[constants.IDISK_ADOPT])
8961 elif template_name == constants.DT_RBD:
8962 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8964 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8966 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8968 for idx, disk in enumerate(disk_info):
8969 disk_index = idx + base_index
8970 size = disk[constants.IDISK_SIZE]
8971 feedback_fn("* disk %s, size %s" %
8972 (disk_index, utils.FormatUnit(size, "h")))
8973 disks.append(objects.Disk(dev_type=dev_type, size=size,
8974 logical_id=logical_id_fn(idx, disk_index, disk),
8975 iv_name="disk/%d" % disk_index,
8976 mode=disk[constants.IDISK_MODE],
8982 def _GetInstanceInfoText(instance):
8983 """Compute that text that should be added to the disk's metadata.
8986 return "originstname+%s" % instance.name
8989 def _CalcEta(time_taken, written, total_size):
8990 """Calculates the ETA based on size written and total size.
8992 @param time_taken: The time taken so far
8993 @param written: amount written so far
8994 @param total_size: The total size of data to be written
8995 @return: The remaining time in seconds
8998 avg_time = time_taken / float(written)
8999 return (total_size - written) * avg_time
9002 def _WipeDisks(lu, instance, disks=None):
9003 """Wipes instance disks.
9005 @type lu: L{LogicalUnit}
9006 @param lu: the logical unit on whose behalf we execute
9007 @type instance: L{objects.Instance}
9008 @param instance: the instance whose disks we should create
9009 @return: the success of the wipe
9012 node = instance.primary_node
9015 disks = [(idx, disk, 0)
9016 for (idx, disk) in enumerate(instance.disks)]
9018 for (_, device, _) in disks:
9019 lu.cfg.SetDiskID(device, node)
9021 logging.info("Pausing synchronization of disks of instance '%s'",
9023 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9024 (map(compat.snd, disks),
9027 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9029 for idx, success in enumerate(result.payload):
9031 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9032 " failed", idx, instance.name)
9035 for (idx, device, offset) in disks:
9036 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9037 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9039 int(min(constants.MAX_WIPE_CHUNK,
9040 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9044 start_time = time.time()
9049 info_text = (" (from %s to %s)" %
9050 (utils.FormatUnit(offset, "h"),
9051 utils.FormatUnit(size, "h")))
9053 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9055 logging.info("Wiping disk %d for instance %s on node %s using"
9056 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9058 while offset < size:
9059 wipe_size = min(wipe_chunk_size, size - offset)
9061 logging.debug("Wiping disk %d, offset %s, chunk %s",
9062 idx, offset, wipe_size)
9064 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9066 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9067 (idx, offset, wipe_size))
9071 if now - last_output >= 60:
9072 eta = _CalcEta(now - start_time, offset, size)
9073 lu.LogInfo(" - done: %.1f%% ETA: %s",
9074 offset / float(size) * 100, utils.FormatSeconds(eta))
9077 logging.info("Resuming synchronization of disks for instance '%s'",
9080 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9081 (map(compat.snd, disks),
9086 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9087 node, result.fail_msg)
9089 for idx, success in enumerate(result.payload):
9091 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9092 " failed", idx, instance.name)
9095 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9096 """Create all disks for an instance.
9098 This abstracts away some work from AddInstance.
9100 @type lu: L{LogicalUnit}
9101 @param lu: the logical unit on whose behalf we execute
9102 @type instance: L{objects.Instance}
9103 @param instance: the instance whose disks we should create
9105 @param to_skip: list of indices to skip
9106 @type target_node: string
9107 @param target_node: if passed, overrides the target node for creation
9109 @return: the success of the creation
9112 info = _GetInstanceInfoText(instance)
9113 if target_node is None:
9114 pnode = instance.primary_node
9115 all_nodes = instance.all_nodes
9120 if instance.disk_template in constants.DTS_FILEBASED:
9121 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9122 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9124 result.Raise("Failed to create directory '%s' on"
9125 " node %s" % (file_storage_dir, pnode))
9127 # Note: this needs to be kept in sync with adding of disks in
9128 # LUInstanceSetParams
9129 for idx, device in enumerate(instance.disks):
9130 if to_skip and idx in to_skip:
9132 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9134 for node in all_nodes:
9135 f_create = node == pnode
9136 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9139 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9140 """Remove all disks for an instance.
9142 This abstracts away some work from `AddInstance()` and
9143 `RemoveInstance()`. Note that in case some of the devices couldn't
9144 be removed, the removal will continue with the other ones (compare
9145 with `_CreateDisks()`).
9147 @type lu: L{LogicalUnit}
9148 @param lu: the logical unit on whose behalf we execute
9149 @type instance: L{objects.Instance}
9150 @param instance: the instance whose disks we should remove
9151 @type target_node: string
9152 @param target_node: used to override the node on which to remove the disks
9154 @return: the success of the removal
9157 logging.info("Removing block devices for instance %s", instance.name)
9160 ports_to_release = set()
9161 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9162 for (idx, device) in enumerate(anno_disks):
9164 edata = [(target_node, device)]
9166 edata = device.ComputeNodeTree(instance.primary_node)
9167 for node, disk in edata:
9168 lu.cfg.SetDiskID(disk, node)
9169 result = lu.rpc.call_blockdev_remove(node, disk)
9171 lu.LogWarning("Could not remove disk %s on node %s,"
9172 " continuing anyway: %s", idx, node, result.fail_msg)
9173 if not (result.offline and node != instance.primary_node):
9176 # if this is a DRBD disk, return its port to the pool
9177 if device.dev_type in constants.LDS_DRBD:
9178 ports_to_release.add(device.logical_id[2])
9180 if all_result or ignore_failures:
9181 for port in ports_to_release:
9182 lu.cfg.AddTcpUdpPort(port)
9184 if instance.disk_template == constants.DT_FILE:
9185 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9189 tgt = instance.primary_node
9190 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9192 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9193 file_storage_dir, instance.primary_node, result.fail_msg)
9199 def _ComputeDiskSizePerVG(disk_template, disks):
9200 """Compute disk size requirements in the volume group
9203 def _compute(disks, payload):
9204 """Universal algorithm.
9209 vgs[disk[constants.IDISK_VG]] = \
9210 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9214 # Required free disk space as a function of disk and swap space
9216 constants.DT_DISKLESS: {},
9217 constants.DT_PLAIN: _compute(disks, 0),
9218 # 128 MB are added for drbd metadata for each disk
9219 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9220 constants.DT_FILE: {},
9221 constants.DT_SHARED_FILE: {},
9224 if disk_template not in req_size_dict:
9225 raise errors.ProgrammerError("Disk template '%s' size requirement"
9226 " is unknown" % disk_template)
9228 return req_size_dict[disk_template]
9231 def _FilterVmNodes(lu, nodenames):
9232 """Filters out non-vm_capable nodes from a list.
9234 @type lu: L{LogicalUnit}
9235 @param lu: the logical unit for which we check
9236 @type nodenames: list
9237 @param nodenames: the list of nodes on which we should check
9239 @return: the list of vm-capable nodes
9242 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9243 return [name for name in nodenames if name not in vm_nodes]
9246 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9247 """Hypervisor parameter validation.
9249 This function abstract the hypervisor parameter validation to be
9250 used in both instance create and instance modify.
9252 @type lu: L{LogicalUnit}
9253 @param lu: the logical unit for which we check
9254 @type nodenames: list
9255 @param nodenames: the list of nodes on which we should check
9256 @type hvname: string
9257 @param hvname: the name of the hypervisor we should use
9258 @type hvparams: dict
9259 @param hvparams: the parameters which we need to check
9260 @raise errors.OpPrereqError: if the parameters are not valid
9263 nodenames = _FilterVmNodes(lu, nodenames)
9265 cluster = lu.cfg.GetClusterInfo()
9266 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9268 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9269 for node in nodenames:
9273 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9276 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9277 """OS parameters validation.
9279 @type lu: L{LogicalUnit}
9280 @param lu: the logical unit for which we check
9281 @type required: boolean
9282 @param required: whether the validation should fail if the OS is not
9284 @type nodenames: list
9285 @param nodenames: the list of nodes on which we should check
9286 @type osname: string
9287 @param osname: the name of the hypervisor we should use
9288 @type osparams: dict
9289 @param osparams: the parameters which we need to check
9290 @raise errors.OpPrereqError: if the parameters are not valid
9293 nodenames = _FilterVmNodes(lu, nodenames)
9294 result = lu.rpc.call_os_validate(nodenames, required, osname,
9295 [constants.OS_VALIDATE_PARAMETERS],
9297 for node, nres in result.items():
9298 # we don't check for offline cases since this should be run only
9299 # against the master node and/or an instance's nodes
9300 nres.Raise("OS Parameters validation failed on node %s" % node)
9301 if not nres.payload:
9302 lu.LogInfo("OS %s not found on node %s, validation skipped",
9306 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9307 """Wrapper around IAReqInstanceAlloc.
9309 @param op: The instance opcode
9310 @param disks: The computed disks
9311 @param nics: The computed nics
9312 @param beparams: The full filled beparams
9314 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9317 spindle_use = beparams[constants.BE_SPINDLE_USE]
9318 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9319 disk_template=op.disk_template,
9322 vcpus=beparams[constants.BE_VCPUS],
9323 memory=beparams[constants.BE_MAXMEM],
9324 spindle_use=spindle_use,
9326 nics=[n.ToDict() for n in nics],
9327 hypervisor=op.hypervisor)
9330 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9331 """Computes the nics.
9333 @param op: The instance opcode
9334 @param cluster: Cluster configuration object
9335 @param default_ip: The default ip to assign
9336 @param cfg: An instance of the configuration object
9337 @param proc: The executer instance
9339 @returns: The build up nics
9343 for idx, nic in enumerate(op.nics):
9344 nic_mode_req = nic.get(constants.INIC_MODE, None)
9345 nic_mode = nic_mode_req
9346 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9347 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9349 # in routed mode, for the first nic, the default ip is 'auto'
9350 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9351 default_ip_mode = constants.VALUE_AUTO
9353 default_ip_mode = constants.VALUE_NONE
9355 # ip validity checks
9356 ip = nic.get(constants.INIC_IP, default_ip_mode)
9357 if ip is None or ip.lower() == constants.VALUE_NONE:
9359 elif ip.lower() == constants.VALUE_AUTO:
9360 if not op.name_check:
9361 raise errors.OpPrereqError("IP address set to auto but name checks"
9362 " have been skipped",
9366 if not netutils.IPAddress.IsValid(ip):
9367 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9371 # TODO: check the ip address for uniqueness
9372 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9373 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9376 # MAC address verification
9377 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9378 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9379 mac = utils.NormalizeAndValidateMac(mac)
9382 # TODO: We need to factor this out
9383 cfg.ReserveMAC(mac, proc.GetECId())
9384 except errors.ReservationError:
9385 raise errors.OpPrereqError("MAC address %s already in use"
9386 " in cluster" % mac,
9387 errors.ECODE_NOTUNIQUE)
9389 # Build nic parameters
9390 link = nic.get(constants.INIC_LINK, None)
9391 if link == constants.VALUE_AUTO:
9392 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9395 nicparams[constants.NIC_MODE] = nic_mode
9397 nicparams[constants.NIC_LINK] = link
9399 check_params = cluster.SimpleFillNIC(nicparams)
9400 objects.NIC.CheckParameterSyntax(check_params)
9401 nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9406 def _ComputeDisks(op, default_vg):
9407 """Computes the instance disks.
9409 @param op: The instance opcode
9410 @param default_vg: The default_vg to assume
9412 @return: The computer disks
9416 for disk in op.disks:
9417 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9418 if mode not in constants.DISK_ACCESS_SET:
9419 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9420 mode, errors.ECODE_INVAL)
9421 size = disk.get(constants.IDISK_SIZE, None)
9423 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9426 except (TypeError, ValueError):
9427 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9430 data_vg = disk.get(constants.IDISK_VG, default_vg)
9432 constants.IDISK_SIZE: size,
9433 constants.IDISK_MODE: mode,
9434 constants.IDISK_VG: data_vg,
9436 if constants.IDISK_METAVG in disk:
9437 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9438 if constants.IDISK_ADOPT in disk:
9439 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9440 disks.append(new_disk)
9445 def _ComputeFullBeParams(op, cluster):
9446 """Computes the full beparams.
9448 @param op: The instance opcode
9449 @param cluster: The cluster config object
9451 @return: The fully filled beparams
9454 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9455 for param, value in op.beparams.iteritems():
9456 if value == constants.VALUE_AUTO:
9457 op.beparams[param] = default_beparams[param]
9458 objects.UpgradeBeParams(op.beparams)
9459 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9460 return cluster.SimpleFillBE(op.beparams)
9463 class LUInstanceCreate(LogicalUnit):
9464 """Create an instance.
9467 HPATH = "instance-add"
9468 HTYPE = constants.HTYPE_INSTANCE
9471 def CheckArguments(self):
9475 # do not require name_check to ease forward/backward compatibility
9477 if self.op.no_install and self.op.start:
9478 self.LogInfo("No-installation mode selected, disabling startup")
9479 self.op.start = False
9480 # validate/normalize the instance name
9481 self.op.instance_name = \
9482 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9484 if self.op.ip_check and not self.op.name_check:
9485 # TODO: make the ip check more flexible and not depend on the name check
9486 raise errors.OpPrereqError("Cannot do IP address check without a name"
9487 " check", errors.ECODE_INVAL)
9489 # check nics' parameter names
9490 for nic in self.op.nics:
9491 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9493 # check disks. parameter names and consistent adopt/no-adopt strategy
9494 has_adopt = has_no_adopt = False
9495 for disk in self.op.disks:
9496 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9497 if constants.IDISK_ADOPT in disk:
9501 if has_adopt and has_no_adopt:
9502 raise errors.OpPrereqError("Either all disks are adopted or none is",
9505 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9506 raise errors.OpPrereqError("Disk adoption is not supported for the"
9507 " '%s' disk template" %
9508 self.op.disk_template,
9510 if self.op.iallocator is not None:
9511 raise errors.OpPrereqError("Disk adoption not allowed with an"
9512 " iallocator script", errors.ECODE_INVAL)
9513 if self.op.mode == constants.INSTANCE_IMPORT:
9514 raise errors.OpPrereqError("Disk adoption not allowed for"
9515 " instance import", errors.ECODE_INVAL)
9517 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9518 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9519 " but no 'adopt' parameter given" %
9520 self.op.disk_template,
9523 self.adopt_disks = has_adopt
9525 # instance name verification
9526 if self.op.name_check:
9527 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9528 self.op.instance_name = self.hostname1.name
9529 # used in CheckPrereq for ip ping check
9530 self.check_ip = self.hostname1.ip
9532 self.check_ip = None
9534 # file storage checks
9535 if (self.op.file_driver and
9536 not self.op.file_driver in constants.FILE_DRIVER):
9537 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9538 self.op.file_driver, errors.ECODE_INVAL)
9540 if self.op.disk_template == constants.DT_FILE:
9541 opcodes.RequireFileStorage()
9542 elif self.op.disk_template == constants.DT_SHARED_FILE:
9543 opcodes.RequireSharedFileStorage()
9545 ### Node/iallocator related checks
9546 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9548 if self.op.pnode is not None:
9549 if self.op.disk_template in constants.DTS_INT_MIRROR:
9550 if self.op.snode is None:
9551 raise errors.OpPrereqError("The networked disk templates need"
9552 " a mirror node", errors.ECODE_INVAL)
9554 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9556 self.op.snode = None
9558 self._cds = _GetClusterDomainSecret()
9560 if self.op.mode == constants.INSTANCE_IMPORT:
9561 # On import force_variant must be True, because if we forced it at
9562 # initial install, our only chance when importing it back is that it
9564 self.op.force_variant = True
9566 if self.op.no_install:
9567 self.LogInfo("No-installation mode has no effect during import")
9569 elif self.op.mode == constants.INSTANCE_CREATE:
9570 if self.op.os_type is None:
9571 raise errors.OpPrereqError("No guest OS specified",
9573 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9574 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9575 " installation" % self.op.os_type,
9577 if self.op.disk_template is None:
9578 raise errors.OpPrereqError("No disk template specified",
9581 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9582 # Check handshake to ensure both clusters have the same domain secret
9583 src_handshake = self.op.source_handshake
9584 if not src_handshake:
9585 raise errors.OpPrereqError("Missing source handshake",
9588 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9591 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9594 # Load and check source CA
9595 self.source_x509_ca_pem = self.op.source_x509_ca
9596 if not self.source_x509_ca_pem:
9597 raise errors.OpPrereqError("Missing source X509 CA",
9601 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9603 except OpenSSL.crypto.Error, err:
9604 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9605 (err, ), errors.ECODE_INVAL)
9607 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9608 if errcode is not None:
9609 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9612 self.source_x509_ca = cert
9614 src_instance_name = self.op.source_instance_name
9615 if not src_instance_name:
9616 raise errors.OpPrereqError("Missing source instance name",
9619 self.source_instance_name = \
9620 netutils.GetHostname(name=src_instance_name).name
9623 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9624 self.op.mode, errors.ECODE_INVAL)
9626 def ExpandNames(self):
9627 """ExpandNames for CreateInstance.
9629 Figure out the right locks for instance creation.
9632 self.needed_locks = {}
9634 instance_name = self.op.instance_name
9635 # this is just a preventive check, but someone might still add this
9636 # instance in the meantime, and creation will fail at lock-add time
9637 if instance_name in self.cfg.GetInstanceList():
9638 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9639 instance_name, errors.ECODE_EXISTS)
9641 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9643 if self.op.iallocator:
9644 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9645 # specifying a group on instance creation and then selecting nodes from
9647 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9648 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9650 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9651 nodelist = [self.op.pnode]
9652 if self.op.snode is not None:
9653 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9654 nodelist.append(self.op.snode)
9655 self.needed_locks[locking.LEVEL_NODE] = nodelist
9656 # Lock resources of instance's primary and secondary nodes (copy to
9657 # prevent accidential modification)
9658 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9660 # in case of import lock the source node too
9661 if self.op.mode == constants.INSTANCE_IMPORT:
9662 src_node = self.op.src_node
9663 src_path = self.op.src_path
9665 if src_path is None:
9666 self.op.src_path = src_path = self.op.instance_name
9668 if src_node is None:
9669 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9670 self.op.src_node = None
9671 if os.path.isabs(src_path):
9672 raise errors.OpPrereqError("Importing an instance from a path"
9673 " requires a source node option",
9676 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9677 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9678 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9679 if not os.path.isabs(src_path):
9680 self.op.src_path = src_path = \
9681 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9683 def _RunAllocator(self):
9684 """Run the allocator based on input opcode.
9687 req = _CreateInstanceAllocRequest(self.op, self.disks,
9688 self.nics, self.be_full)
9689 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9691 ial.Run(self.op.iallocator)
9694 raise errors.OpPrereqError("Can't compute nodes using"
9695 " iallocator '%s': %s" %
9696 (self.op.iallocator, ial.info),
9698 self.op.pnode = ial.result[0]
9699 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9700 self.op.instance_name, self.op.iallocator,
9701 utils.CommaJoin(ial.result))
9703 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9705 if req.RequiredNodes() == 2:
9706 self.op.snode = ial.result[1]
9708 def BuildHooksEnv(self):
9711 This runs on master, primary and secondary nodes of the instance.
9715 "ADD_MODE": self.op.mode,
9717 if self.op.mode == constants.INSTANCE_IMPORT:
9718 env["SRC_NODE"] = self.op.src_node
9719 env["SRC_PATH"] = self.op.src_path
9720 env["SRC_IMAGES"] = self.src_images
9722 env.update(_BuildInstanceHookEnv(
9723 name=self.op.instance_name,
9724 primary_node=self.op.pnode,
9725 secondary_nodes=self.secondaries,
9726 status=self.op.start,
9727 os_type=self.op.os_type,
9728 minmem=self.be_full[constants.BE_MINMEM],
9729 maxmem=self.be_full[constants.BE_MAXMEM],
9730 vcpus=self.be_full[constants.BE_VCPUS],
9731 nics=_NICListToTuple(self, self.nics),
9732 disk_template=self.op.disk_template,
9733 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9734 for d in self.disks],
9737 hypervisor_name=self.op.hypervisor,
9743 def BuildHooksNodes(self):
9744 """Build hooks nodes.
9747 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9750 def _ReadExportInfo(self):
9751 """Reads the export information from disk.
9753 It will override the opcode source node and path with the actual
9754 information, if these two were not specified before.
9756 @return: the export information
9759 assert self.op.mode == constants.INSTANCE_IMPORT
9761 src_node = self.op.src_node
9762 src_path = self.op.src_path
9764 if src_node is None:
9765 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9766 exp_list = self.rpc.call_export_list(locked_nodes)
9768 for node in exp_list:
9769 if exp_list[node].fail_msg:
9771 if src_path in exp_list[node].payload:
9773 self.op.src_node = src_node = node
9774 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9778 raise errors.OpPrereqError("No export found for relative path %s" %
9779 src_path, errors.ECODE_INVAL)
9781 _CheckNodeOnline(self, src_node)
9782 result = self.rpc.call_export_info(src_node, src_path)
9783 result.Raise("No export or invalid export found in dir %s" % src_path)
9785 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9786 if not export_info.has_section(constants.INISECT_EXP):
9787 raise errors.ProgrammerError("Corrupted export config",
9788 errors.ECODE_ENVIRON)
9790 ei_version = export_info.get(constants.INISECT_EXP, "version")
9791 if (int(ei_version) != constants.EXPORT_VERSION):
9792 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9793 (ei_version, constants.EXPORT_VERSION),
9794 errors.ECODE_ENVIRON)
9797 def _ReadExportParams(self, einfo):
9798 """Use export parameters as defaults.
9800 In case the opcode doesn't specify (as in override) some instance
9801 parameters, then try to use them from the export information, if
9805 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9807 if self.op.disk_template is None:
9808 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9809 self.op.disk_template = einfo.get(constants.INISECT_INS,
9811 if self.op.disk_template not in constants.DISK_TEMPLATES:
9812 raise errors.OpPrereqError("Disk template specified in configuration"
9813 " file is not one of the allowed values:"
9815 " ".join(constants.DISK_TEMPLATES),
9818 raise errors.OpPrereqError("No disk template specified and the export"
9819 " is missing the disk_template information",
9822 if not self.op.disks:
9824 # TODO: import the disk iv_name too
9825 for idx in range(constants.MAX_DISKS):
9826 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9827 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9828 disks.append({constants.IDISK_SIZE: disk_sz})
9829 self.op.disks = disks
9830 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9831 raise errors.OpPrereqError("No disk info specified and the export"
9832 " is missing the disk information",
9835 if not self.op.nics:
9837 for idx in range(constants.MAX_NICS):
9838 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9840 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9841 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9848 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9849 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9851 if (self.op.hypervisor is None and
9852 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9853 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9855 if einfo.has_section(constants.INISECT_HYP):
9856 # use the export parameters but do not override the ones
9857 # specified by the user
9858 for name, value in einfo.items(constants.INISECT_HYP):
9859 if name not in self.op.hvparams:
9860 self.op.hvparams[name] = value
9862 if einfo.has_section(constants.INISECT_BEP):
9863 # use the parameters, without overriding
9864 for name, value in einfo.items(constants.INISECT_BEP):
9865 if name not in self.op.beparams:
9866 self.op.beparams[name] = value
9867 # Compatibility for the old "memory" be param
9868 if name == constants.BE_MEMORY:
9869 if constants.BE_MAXMEM not in self.op.beparams:
9870 self.op.beparams[constants.BE_MAXMEM] = value
9871 if constants.BE_MINMEM not in self.op.beparams:
9872 self.op.beparams[constants.BE_MINMEM] = value
9874 # try to read the parameters old style, from the main section
9875 for name in constants.BES_PARAMETERS:
9876 if (name not in self.op.beparams and
9877 einfo.has_option(constants.INISECT_INS, name)):
9878 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9880 if einfo.has_section(constants.INISECT_OSP):
9881 # use the parameters, without overriding
9882 for name, value in einfo.items(constants.INISECT_OSP):
9883 if name not in self.op.osparams:
9884 self.op.osparams[name] = value
9886 def _RevertToDefaults(self, cluster):
9887 """Revert the instance parameters to the default values.
9891 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9892 for name in self.op.hvparams.keys():
9893 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9894 del self.op.hvparams[name]
9896 be_defs = cluster.SimpleFillBE({})
9897 for name in self.op.beparams.keys():
9898 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9899 del self.op.beparams[name]
9901 nic_defs = cluster.SimpleFillNIC({})
9902 for nic in self.op.nics:
9903 for name in constants.NICS_PARAMETERS:
9904 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9907 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9908 for name in self.op.osparams.keys():
9909 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9910 del self.op.osparams[name]
9912 def _CalculateFileStorageDir(self):
9913 """Calculate final instance file storage dir.
9916 # file storage dir calculation/check
9917 self.instance_file_storage_dir = None
9918 if self.op.disk_template in constants.DTS_FILEBASED:
9919 # build the full file storage dir path
9922 if self.op.disk_template == constants.DT_SHARED_FILE:
9923 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9925 get_fsd_fn = self.cfg.GetFileStorageDir
9927 cfg_storagedir = get_fsd_fn()
9928 if not cfg_storagedir:
9929 raise errors.OpPrereqError("Cluster file storage dir not defined",
9931 joinargs.append(cfg_storagedir)
9933 if self.op.file_storage_dir is not None:
9934 joinargs.append(self.op.file_storage_dir)
9936 joinargs.append(self.op.instance_name)
9938 # pylint: disable=W0142
9939 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9941 def CheckPrereq(self): # pylint: disable=R0914
9942 """Check prerequisites.
9945 self._CalculateFileStorageDir()
9947 if self.op.mode == constants.INSTANCE_IMPORT:
9948 export_info = self._ReadExportInfo()
9949 self._ReadExportParams(export_info)
9950 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9952 self._old_instance_name = None
9954 if (not self.cfg.GetVGName() and
9955 self.op.disk_template not in constants.DTS_NOT_LVM):
9956 raise errors.OpPrereqError("Cluster does not support lvm-based"
9957 " instances", errors.ECODE_STATE)
9959 if (self.op.hypervisor is None or
9960 self.op.hypervisor == constants.VALUE_AUTO):
9961 self.op.hypervisor = self.cfg.GetHypervisorType()
9963 cluster = self.cfg.GetClusterInfo()
9964 enabled_hvs = cluster.enabled_hypervisors
9965 if self.op.hypervisor not in enabled_hvs:
9966 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9968 (self.op.hypervisor, ",".join(enabled_hvs)),
9971 # Check tag validity
9972 for tag in self.op.tags:
9973 objects.TaggableObject.ValidateTag(tag)
9975 # check hypervisor parameter syntax (locally)
9976 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9977 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9979 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9980 hv_type.CheckParameterSyntax(filled_hvp)
9981 self.hv_full = filled_hvp
9982 # check that we don't specify global parameters on an instance
9983 _CheckGlobalHvParams(self.op.hvparams)
9985 # fill and remember the beparams dict
9986 self.be_full = _ComputeFullBeParams(self.op, cluster)
9988 # build os parameters
9989 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9991 # now that hvp/bep are in final format, let's reset to defaults,
9993 if self.op.identify_defaults:
9994 self._RevertToDefaults(cluster)
9997 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10000 # disk checks/pre-build
10001 default_vg = self.cfg.GetVGName()
10002 self.disks = _ComputeDisks(self.op, default_vg)
10004 if self.op.mode == constants.INSTANCE_IMPORT:
10006 for idx in range(len(self.disks)):
10007 option = "disk%d_dump" % idx
10008 if export_info.has_option(constants.INISECT_INS, option):
10009 # FIXME: are the old os-es, disk sizes, etc. useful?
10010 export_name = export_info.get(constants.INISECT_INS, option)
10011 image = utils.PathJoin(self.op.src_path, export_name)
10012 disk_images.append(image)
10014 disk_images.append(False)
10016 self.src_images = disk_images
10018 if self.op.instance_name == self._old_instance_name:
10019 for idx, nic in enumerate(self.nics):
10020 if nic.mac == constants.VALUE_AUTO:
10021 nic_mac_ini = "nic%d_mac" % idx
10022 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10024 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10026 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10027 if self.op.ip_check:
10028 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10029 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10030 (self.check_ip, self.op.instance_name),
10031 errors.ECODE_NOTUNIQUE)
10033 #### mac address generation
10034 # By generating here the mac address both the allocator and the hooks get
10035 # the real final mac address rather than the 'auto' or 'generate' value.
10036 # There is a race condition between the generation and the instance object
10037 # creation, which means that we know the mac is valid now, but we're not
10038 # sure it will be when we actually add the instance. If things go bad
10039 # adding the instance will abort because of a duplicate mac, and the
10040 # creation job will fail.
10041 for nic in self.nics:
10042 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10043 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10047 if self.op.iallocator is not None:
10048 self._RunAllocator()
10050 # Release all unneeded node locks
10051 _ReleaseLocks(self, locking.LEVEL_NODE,
10052 keep=filter(None, [self.op.pnode, self.op.snode,
10053 self.op.src_node]))
10054 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10055 keep=filter(None, [self.op.pnode, self.op.snode,
10056 self.op.src_node]))
10058 #### node related checks
10060 # check primary node
10061 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10062 assert self.pnode is not None, \
10063 "Cannot retrieve locked node %s" % self.op.pnode
10065 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10066 pnode.name, errors.ECODE_STATE)
10068 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10069 pnode.name, errors.ECODE_STATE)
10070 if not pnode.vm_capable:
10071 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10072 " '%s'" % pnode.name, errors.ECODE_STATE)
10074 self.secondaries = []
10076 # mirror node verification
10077 if self.op.disk_template in constants.DTS_INT_MIRROR:
10078 if self.op.snode == pnode.name:
10079 raise errors.OpPrereqError("The secondary node cannot be the"
10080 " primary node", errors.ECODE_INVAL)
10081 _CheckNodeOnline(self, self.op.snode)
10082 _CheckNodeNotDrained(self, self.op.snode)
10083 _CheckNodeVmCapable(self, self.op.snode)
10084 self.secondaries.append(self.op.snode)
10086 snode = self.cfg.GetNodeInfo(self.op.snode)
10087 if pnode.group != snode.group:
10088 self.LogWarning("The primary and secondary nodes are in two"
10089 " different node groups; the disk parameters"
10090 " from the first disk's node group will be"
10093 nodenames = [pnode.name] + self.secondaries
10095 # Verify instance specs
10096 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10098 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10099 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10100 constants.ISPEC_DISK_COUNT: len(self.disks),
10101 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10102 constants.ISPEC_NIC_COUNT: len(self.nics),
10103 constants.ISPEC_SPINDLE_USE: spindle_use,
10106 group_info = self.cfg.GetNodeGroup(pnode.group)
10107 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10108 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10109 if not self.op.ignore_ipolicy and res:
10110 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10111 (pnode.group, group_info.name, utils.CommaJoin(res)))
10112 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10114 if not self.adopt_disks:
10115 if self.op.disk_template == constants.DT_RBD:
10116 # _CheckRADOSFreeSpace() is just a placeholder.
10117 # Any function that checks prerequisites can be placed here.
10118 # Check if there is enough space on the RADOS cluster.
10119 _CheckRADOSFreeSpace()
10121 # Check lv size requirements, if not adopting
10122 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10123 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10125 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10126 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10127 disk[constants.IDISK_ADOPT])
10128 for disk in self.disks])
10129 if len(all_lvs) != len(self.disks):
10130 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10131 errors.ECODE_INVAL)
10132 for lv_name in all_lvs:
10134 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10135 # to ReserveLV uses the same syntax
10136 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10137 except errors.ReservationError:
10138 raise errors.OpPrereqError("LV named %s used by another instance" %
10139 lv_name, errors.ECODE_NOTUNIQUE)
10141 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10142 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10144 node_lvs = self.rpc.call_lv_list([pnode.name],
10145 vg_names.payload.keys())[pnode.name]
10146 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10147 node_lvs = node_lvs.payload
10149 delta = all_lvs.difference(node_lvs.keys())
10151 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10152 utils.CommaJoin(delta),
10153 errors.ECODE_INVAL)
10154 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10156 raise errors.OpPrereqError("Online logical volumes found, cannot"
10157 " adopt: %s" % utils.CommaJoin(online_lvs),
10158 errors.ECODE_STATE)
10159 # update the size of disk based on what is found
10160 for dsk in self.disks:
10161 dsk[constants.IDISK_SIZE] = \
10162 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10163 dsk[constants.IDISK_ADOPT])][0]))
10165 elif self.op.disk_template == constants.DT_BLOCK:
10166 # Normalize and de-duplicate device paths
10167 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10168 for disk in self.disks])
10169 if len(all_disks) != len(self.disks):
10170 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10171 errors.ECODE_INVAL)
10172 baddisks = [d for d in all_disks
10173 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10175 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10176 " cannot be adopted" %
10177 (", ".join(baddisks),
10178 constants.ADOPTABLE_BLOCKDEV_ROOT),
10179 errors.ECODE_INVAL)
10181 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10182 list(all_disks))[pnode.name]
10183 node_disks.Raise("Cannot get block device information from node %s" %
10185 node_disks = node_disks.payload
10186 delta = all_disks.difference(node_disks.keys())
10188 raise errors.OpPrereqError("Missing block device(s): %s" %
10189 utils.CommaJoin(delta),
10190 errors.ECODE_INVAL)
10191 for dsk in self.disks:
10192 dsk[constants.IDISK_SIZE] = \
10193 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10195 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10197 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10198 # check OS parameters (remotely)
10199 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10201 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10203 # memory check on primary node
10204 #TODO(dynmem): use MINMEM for checking
10206 _CheckNodeFreeMemory(self, self.pnode.name,
10207 "creating instance %s" % self.op.instance_name,
10208 self.be_full[constants.BE_MAXMEM],
10209 self.op.hypervisor)
10211 self.dry_run_result = list(nodenames)
10213 def Exec(self, feedback_fn):
10214 """Create and add the instance to the cluster.
10217 instance = self.op.instance_name
10218 pnode_name = self.pnode.name
10220 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10221 self.owned_locks(locking.LEVEL_NODE)), \
10222 "Node locks differ from node resource locks"
10224 ht_kind = self.op.hypervisor
10225 if ht_kind in constants.HTS_REQ_PORT:
10226 network_port = self.cfg.AllocatePort()
10228 network_port = None
10230 # This is ugly but we got a chicken-egg problem here
10231 # We can only take the group disk parameters, as the instance
10232 # has no disks yet (we are generating them right here).
10233 node = self.cfg.GetNodeInfo(pnode_name)
10234 nodegroup = self.cfg.GetNodeGroup(node.group)
10235 disks = _GenerateDiskTemplate(self,
10236 self.op.disk_template,
10237 instance, pnode_name,
10240 self.instance_file_storage_dir,
10241 self.op.file_driver,
10244 self.cfg.GetGroupDiskParams(nodegroup))
10246 iobj = objects.Instance(name=instance, os=self.op.os_type,
10247 primary_node=pnode_name,
10248 nics=self.nics, disks=disks,
10249 disk_template=self.op.disk_template,
10250 admin_state=constants.ADMINST_DOWN,
10251 network_port=network_port,
10252 beparams=self.op.beparams,
10253 hvparams=self.op.hvparams,
10254 hypervisor=self.op.hypervisor,
10255 osparams=self.op.osparams,
10259 for tag in self.op.tags:
10262 if self.adopt_disks:
10263 if self.op.disk_template == constants.DT_PLAIN:
10264 # rename LVs to the newly-generated names; we need to construct
10265 # 'fake' LV disks with the old data, plus the new unique_id
10266 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10268 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10269 rename_to.append(t_dsk.logical_id)
10270 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10271 self.cfg.SetDiskID(t_dsk, pnode_name)
10272 result = self.rpc.call_blockdev_rename(pnode_name,
10273 zip(tmp_disks, rename_to))
10274 result.Raise("Failed to rename adoped LVs")
10276 feedback_fn("* creating instance disks...")
10278 _CreateDisks(self, iobj)
10279 except errors.OpExecError:
10280 self.LogWarning("Device creation failed, reverting...")
10282 _RemoveDisks(self, iobj)
10284 self.cfg.ReleaseDRBDMinors(instance)
10287 feedback_fn("adding instance %s to cluster config" % instance)
10289 self.cfg.AddInstance(iobj, self.proc.GetECId())
10291 # Declare that we don't want to remove the instance lock anymore, as we've
10292 # added the instance to the config
10293 del self.remove_locks[locking.LEVEL_INSTANCE]
10295 if self.op.mode == constants.INSTANCE_IMPORT:
10296 # Release unused nodes
10297 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10299 # Release all nodes
10300 _ReleaseLocks(self, locking.LEVEL_NODE)
10303 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10304 feedback_fn("* wiping instance disks...")
10306 _WipeDisks(self, iobj)
10307 except errors.OpExecError, err:
10308 logging.exception("Wiping disks failed")
10309 self.LogWarning("Wiping instance disks failed (%s)", err)
10313 # Something is already wrong with the disks, don't do anything else
10315 elif self.op.wait_for_sync:
10316 disk_abort = not _WaitForSync(self, iobj)
10317 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10318 # make sure the disks are not degraded (still sync-ing is ok)
10319 feedback_fn("* checking mirrors status")
10320 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10325 _RemoveDisks(self, iobj)
10326 self.cfg.RemoveInstance(iobj.name)
10327 # Make sure the instance lock gets removed
10328 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10329 raise errors.OpExecError("There are some degraded disks for"
10332 # Release all node resource locks
10333 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10335 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10336 # we need to set the disks ID to the primary node, since the
10337 # preceding code might or might have not done it, depending on
10338 # disk template and other options
10339 for disk in iobj.disks:
10340 self.cfg.SetDiskID(disk, pnode_name)
10341 if self.op.mode == constants.INSTANCE_CREATE:
10342 if not self.op.no_install:
10343 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10344 not self.op.wait_for_sync)
10346 feedback_fn("* pausing disk sync to install instance OS")
10347 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10350 for idx, success in enumerate(result.payload):
10352 logging.warn("pause-sync of instance %s for disk %d failed",
10355 feedback_fn("* running the instance OS create scripts...")
10356 # FIXME: pass debug option from opcode to backend
10358 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10359 self.op.debug_level)
10361 feedback_fn("* resuming disk sync")
10362 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10365 for idx, success in enumerate(result.payload):
10367 logging.warn("resume-sync of instance %s for disk %d failed",
10370 os_add_result.Raise("Could not add os for instance %s"
10371 " on node %s" % (instance, pnode_name))
10374 if self.op.mode == constants.INSTANCE_IMPORT:
10375 feedback_fn("* running the instance OS import scripts...")
10379 for idx, image in enumerate(self.src_images):
10383 # FIXME: pass debug option from opcode to backend
10384 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10385 constants.IEIO_FILE, (image, ),
10386 constants.IEIO_SCRIPT,
10387 (iobj.disks[idx], idx),
10389 transfers.append(dt)
10392 masterd.instance.TransferInstanceData(self, feedback_fn,
10393 self.op.src_node, pnode_name,
10394 self.pnode.secondary_ip,
10396 if not compat.all(import_result):
10397 self.LogWarning("Some disks for instance %s on node %s were not"
10398 " imported successfully" % (instance, pnode_name))
10400 rename_from = self._old_instance_name
10402 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10403 feedback_fn("* preparing remote import...")
10404 # The source cluster will stop the instance before attempting to make
10405 # a connection. In some cases stopping an instance can take a long
10406 # time, hence the shutdown timeout is added to the connection
10408 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10409 self.op.source_shutdown_timeout)
10410 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10412 assert iobj.primary_node == self.pnode.name
10414 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10415 self.source_x509_ca,
10416 self._cds, timeouts)
10417 if not compat.all(disk_results):
10418 # TODO: Should the instance still be started, even if some disks
10419 # failed to import (valid for local imports, too)?
10420 self.LogWarning("Some disks for instance %s on node %s were not"
10421 " imported successfully" % (instance, pnode_name))
10423 rename_from = self.source_instance_name
10426 # also checked in the prereq part
10427 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10430 # Run rename script on newly imported instance
10431 assert iobj.name == instance
10432 feedback_fn("Running rename script for %s" % instance)
10433 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10435 self.op.debug_level)
10436 if result.fail_msg:
10437 self.LogWarning("Failed to run rename script for %s on node"
10438 " %s: %s" % (instance, pnode_name, result.fail_msg))
10440 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10443 iobj.admin_state = constants.ADMINST_UP
10444 self.cfg.Update(iobj, feedback_fn)
10445 logging.info("Starting instance %s on node %s", instance, pnode_name)
10446 feedback_fn("* starting instance...")
10447 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10449 result.Raise("Could not start instance")
10451 return list(iobj.all_nodes)
10454 class LUInstanceMultiAlloc(NoHooksLU):
10455 """Allocates multiple instances at the same time.
10460 def CheckArguments(self):
10461 """Check arguments.
10465 for inst in self.op.instances:
10466 if inst.iallocator is not None:
10467 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10468 " instance objects", errors.ECODE_INVAL)
10469 nodes.append(bool(inst.pnode))
10470 if inst.disk_template in constants.DTS_INT_MIRROR:
10471 nodes.append(bool(inst.snode))
10473 has_nodes = compat.any(nodes)
10474 if compat.all(nodes) ^ has_nodes:
10475 raise errors.OpPrereqError("There are instance objects providing"
10476 " pnode/snode while others do not",
10477 errors.ECODE_INVAL)
10479 if self.op.iallocator is None:
10480 default_iallocator = self.cfg.GetDefaultIAllocator()
10481 if default_iallocator and has_nodes:
10482 self.op.iallocator = default_iallocator
10484 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10485 " given and no cluster-wide default"
10486 " iallocator found; please specify either"
10487 " an iallocator or nodes on the instances"
10488 " or set a cluster-wide default iallocator",
10489 errors.ECODE_INVAL)
10491 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10493 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10494 utils.CommaJoin(dups), errors.ECODE_INVAL)
10496 def ExpandNames(self):
10497 """Calculate the locks.
10500 self.share_locks = _ShareAll()
10501 self.needed_locks = {}
10503 if self.op.iallocator:
10504 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10505 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10508 for inst in self.op.instances:
10509 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10510 nodeslist.append(inst.pnode)
10511 if inst.snode is not None:
10512 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10513 nodeslist.append(inst.snode)
10515 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10516 # Lock resources of instance's primary and secondary nodes (copy to
10517 # prevent accidential modification)
10518 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10520 def CheckPrereq(self):
10521 """Check prerequisite.
10524 cluster = self.cfg.GetClusterInfo()
10525 default_vg = self.cfg.GetVGName()
10526 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10527 _ComputeNics(op, cluster, None,
10528 self.cfg, self.proc),
10529 _ComputeFullBeParams(op, cluster))
10530 for op in self.op.instances]
10531 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10532 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10534 ial.Run(self.op.iallocator)
10536 if not ial.success:
10537 raise errors.OpPrereqError("Can't compute nodes using"
10538 " iallocator '%s': %s" %
10539 (self.op.iallocator, ial.info),
10540 errors.ECODE_NORES)
10542 self.ia_result = ial.result
10544 if self.op.dry_run:
10545 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10546 constants.JOB_IDS_KEY: [],
10549 def _ConstructPartialResult(self):
10550 """Contructs the partial result.
10553 (allocatable, failed) = self.ia_result
10555 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10556 map(compat.fst, allocatable),
10557 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10560 def Exec(self, feedback_fn):
10561 """Executes the opcode.
10564 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10565 (allocatable, failed) = self.ia_result
10568 for (name, nodes) in allocatable:
10569 op = op2inst.pop(name)
10572 (op.pnode, op.snode) = nodes
10574 (op.pnode,) = nodes
10578 missing = set(op2inst.keys()) - set(failed)
10579 assert not missing, \
10580 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10582 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10585 def _CheckRADOSFreeSpace():
10586 """Compute disk size requirements inside the RADOS cluster.
10589 # For the RADOS cluster we assume there is always enough space.
10593 class LUInstanceConsole(NoHooksLU):
10594 """Connect to an instance's console.
10596 This is somewhat special in that it returns the command line that
10597 you need to run on the master node in order to connect to the
10603 def ExpandNames(self):
10604 self.share_locks = _ShareAll()
10605 self._ExpandAndLockInstance()
10607 def CheckPrereq(self):
10608 """Check prerequisites.
10610 This checks that the instance is in the cluster.
10613 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10614 assert self.instance is not None, \
10615 "Cannot retrieve locked instance %s" % self.op.instance_name
10616 _CheckNodeOnline(self, self.instance.primary_node)
10618 def Exec(self, feedback_fn):
10619 """Connect to the console of an instance
10622 instance = self.instance
10623 node = instance.primary_node
10625 node_insts = self.rpc.call_instance_list([node],
10626 [instance.hypervisor])[node]
10627 node_insts.Raise("Can't get node information from %s" % node)
10629 if instance.name not in node_insts.payload:
10630 if instance.admin_state == constants.ADMINST_UP:
10631 state = constants.INSTST_ERRORDOWN
10632 elif instance.admin_state == constants.ADMINST_DOWN:
10633 state = constants.INSTST_ADMINDOWN
10635 state = constants.INSTST_ADMINOFFLINE
10636 raise errors.OpExecError("Instance %s is not running (state %s)" %
10637 (instance.name, state))
10639 logging.debug("Connecting to console of %s on %s", instance.name, node)
10641 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10644 def _GetInstanceConsole(cluster, instance):
10645 """Returns console information for an instance.
10647 @type cluster: L{objects.Cluster}
10648 @type instance: L{objects.Instance}
10652 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10653 # beparams and hvparams are passed separately, to avoid editing the
10654 # instance and then saving the defaults in the instance itself.
10655 hvparams = cluster.FillHV(instance)
10656 beparams = cluster.FillBE(instance)
10657 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10659 assert console.instance == instance.name
10660 assert console.Validate()
10662 return console.ToDict()
10665 class LUInstanceReplaceDisks(LogicalUnit):
10666 """Replace the disks of an instance.
10669 HPATH = "mirrors-replace"
10670 HTYPE = constants.HTYPE_INSTANCE
10673 def CheckArguments(self):
10674 """Check arguments.
10677 remote_node = self.op.remote_node
10678 ialloc = self.op.iallocator
10679 if self.op.mode == constants.REPLACE_DISK_CHG:
10680 if remote_node is None and ialloc is None:
10681 raise errors.OpPrereqError("When changing the secondary either an"
10682 " iallocator script must be used or the"
10683 " new node given", errors.ECODE_INVAL)
10685 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10687 elif remote_node is not None or ialloc is not None:
10688 # Not replacing the secondary
10689 raise errors.OpPrereqError("The iallocator and new node options can"
10690 " only be used when changing the"
10691 " secondary node", errors.ECODE_INVAL)
10693 def ExpandNames(self):
10694 self._ExpandAndLockInstance()
10696 assert locking.LEVEL_NODE not in self.needed_locks
10697 assert locking.LEVEL_NODE_RES not in self.needed_locks
10698 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10700 assert self.op.iallocator is None or self.op.remote_node is None, \
10701 "Conflicting options"
10703 if self.op.remote_node is not None:
10704 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10706 # Warning: do not remove the locking of the new secondary here
10707 # unless DRBD8.AddChildren is changed to work in parallel;
10708 # currently it doesn't since parallel invocations of
10709 # FindUnusedMinor will conflict
10710 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10711 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10713 self.needed_locks[locking.LEVEL_NODE] = []
10714 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10716 if self.op.iallocator is not None:
10717 # iallocator will select a new node in the same group
10718 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10720 self.needed_locks[locking.LEVEL_NODE_RES] = []
10722 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10723 self.op.iallocator, self.op.remote_node,
10724 self.op.disks, False, self.op.early_release,
10725 self.op.ignore_ipolicy)
10727 self.tasklets = [self.replacer]
10729 def DeclareLocks(self, level):
10730 if level == locking.LEVEL_NODEGROUP:
10731 assert self.op.remote_node is None
10732 assert self.op.iallocator is not None
10733 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10735 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10736 # Lock all groups used by instance optimistically; this requires going
10737 # via the node before it's locked, requiring verification later on
10738 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10739 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10741 elif level == locking.LEVEL_NODE:
10742 if self.op.iallocator is not None:
10743 assert self.op.remote_node is None
10744 assert not self.needed_locks[locking.LEVEL_NODE]
10746 # Lock member nodes of all locked groups
10747 self.needed_locks[locking.LEVEL_NODE] = \
10749 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10750 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10752 self._LockInstancesNodes()
10753 elif level == locking.LEVEL_NODE_RES:
10755 self.needed_locks[locking.LEVEL_NODE_RES] = \
10756 self.needed_locks[locking.LEVEL_NODE]
10758 def BuildHooksEnv(self):
10759 """Build hooks env.
10761 This runs on the master, the primary and all the secondaries.
10764 instance = self.replacer.instance
10766 "MODE": self.op.mode,
10767 "NEW_SECONDARY": self.op.remote_node,
10768 "OLD_SECONDARY": instance.secondary_nodes[0],
10770 env.update(_BuildInstanceHookEnvByObject(self, instance))
10773 def BuildHooksNodes(self):
10774 """Build hooks nodes.
10777 instance = self.replacer.instance
10779 self.cfg.GetMasterNode(),
10780 instance.primary_node,
10782 if self.op.remote_node is not None:
10783 nl.append(self.op.remote_node)
10786 def CheckPrereq(self):
10787 """Check prerequisites.
10790 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10791 self.op.iallocator is None)
10793 # Verify if node group locks are still correct
10794 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10796 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10798 return LogicalUnit.CheckPrereq(self)
10801 class TLReplaceDisks(Tasklet):
10802 """Replaces disks for an instance.
10804 Note: Locking is not within the scope of this class.
10807 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10808 disks, delay_iallocator, early_release, ignore_ipolicy):
10809 """Initializes this class.
10812 Tasklet.__init__(self, lu)
10815 self.instance_name = instance_name
10817 self.iallocator_name = iallocator_name
10818 self.remote_node = remote_node
10820 self.delay_iallocator = delay_iallocator
10821 self.early_release = early_release
10822 self.ignore_ipolicy = ignore_ipolicy
10825 self.instance = None
10826 self.new_node = None
10827 self.target_node = None
10828 self.other_node = None
10829 self.remote_node_info = None
10830 self.node_secondary_ip = None
10833 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10834 """Compute a new secondary node using an IAllocator.
10837 req = iallocator.IAReqRelocate(name=instance_name,
10838 relocate_from=list(relocate_from))
10839 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10841 ial.Run(iallocator_name)
10843 if not ial.success:
10844 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10845 " %s" % (iallocator_name, ial.info),
10846 errors.ECODE_NORES)
10848 remote_node_name = ial.result[0]
10850 lu.LogInfo("Selected new secondary for instance '%s': %s",
10851 instance_name, remote_node_name)
10853 return remote_node_name
10855 def _FindFaultyDisks(self, node_name):
10856 """Wrapper for L{_FindFaultyInstanceDisks}.
10859 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10862 def _CheckDisksActivated(self, instance):
10863 """Checks if the instance disks are activated.
10865 @param instance: The instance to check disks
10866 @return: True if they are activated, False otherwise
10869 nodes = instance.all_nodes
10871 for idx, dev in enumerate(instance.disks):
10873 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10874 self.cfg.SetDiskID(dev, node)
10876 result = _BlockdevFind(self, node, dev, instance)
10880 elif result.fail_msg or not result.payload:
10885 def CheckPrereq(self):
10886 """Check prerequisites.
10888 This checks that the instance is in the cluster.
10891 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10892 assert instance is not None, \
10893 "Cannot retrieve locked instance %s" % self.instance_name
10895 if instance.disk_template != constants.DT_DRBD8:
10896 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10897 " instances", errors.ECODE_INVAL)
10899 if len(instance.secondary_nodes) != 1:
10900 raise errors.OpPrereqError("The instance has a strange layout,"
10901 " expected one secondary but found %d" %
10902 len(instance.secondary_nodes),
10903 errors.ECODE_FAULT)
10905 if not self.delay_iallocator:
10906 self._CheckPrereq2()
10908 def _CheckPrereq2(self):
10909 """Check prerequisites, second part.
10911 This function should always be part of CheckPrereq. It was separated and is
10912 now called from Exec because during node evacuation iallocator was only
10913 called with an unmodified cluster model, not taking planned changes into
10917 instance = self.instance
10918 secondary_node = instance.secondary_nodes[0]
10920 if self.iallocator_name is None:
10921 remote_node = self.remote_node
10923 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10924 instance.name, instance.secondary_nodes)
10926 if remote_node is None:
10927 self.remote_node_info = None
10929 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10930 "Remote node '%s' is not locked" % remote_node
10932 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10933 assert self.remote_node_info is not None, \
10934 "Cannot retrieve locked node %s" % remote_node
10936 if remote_node == self.instance.primary_node:
10937 raise errors.OpPrereqError("The specified node is the primary node of"
10938 " the instance", errors.ECODE_INVAL)
10940 if remote_node == secondary_node:
10941 raise errors.OpPrereqError("The specified node is already the"
10942 " secondary node of the instance",
10943 errors.ECODE_INVAL)
10945 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10946 constants.REPLACE_DISK_CHG):
10947 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10948 errors.ECODE_INVAL)
10950 if self.mode == constants.REPLACE_DISK_AUTO:
10951 if not self._CheckDisksActivated(instance):
10952 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10953 " first" % self.instance_name,
10954 errors.ECODE_STATE)
10955 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10956 faulty_secondary = self._FindFaultyDisks(secondary_node)
10958 if faulty_primary and faulty_secondary:
10959 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10960 " one node and can not be repaired"
10961 " automatically" % self.instance_name,
10962 errors.ECODE_STATE)
10965 self.disks = faulty_primary
10966 self.target_node = instance.primary_node
10967 self.other_node = secondary_node
10968 check_nodes = [self.target_node, self.other_node]
10969 elif faulty_secondary:
10970 self.disks = faulty_secondary
10971 self.target_node = secondary_node
10972 self.other_node = instance.primary_node
10973 check_nodes = [self.target_node, self.other_node]
10979 # Non-automatic modes
10980 if self.mode == constants.REPLACE_DISK_PRI:
10981 self.target_node = instance.primary_node
10982 self.other_node = secondary_node
10983 check_nodes = [self.target_node, self.other_node]
10985 elif self.mode == constants.REPLACE_DISK_SEC:
10986 self.target_node = secondary_node
10987 self.other_node = instance.primary_node
10988 check_nodes = [self.target_node, self.other_node]
10990 elif self.mode == constants.REPLACE_DISK_CHG:
10991 self.new_node = remote_node
10992 self.other_node = instance.primary_node
10993 self.target_node = secondary_node
10994 check_nodes = [self.new_node, self.other_node]
10996 _CheckNodeNotDrained(self.lu, remote_node)
10997 _CheckNodeVmCapable(self.lu, remote_node)
10999 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11000 assert old_node_info is not None
11001 if old_node_info.offline and not self.early_release:
11002 # doesn't make sense to delay the release
11003 self.early_release = True
11004 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11005 " early-release mode", secondary_node)
11008 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11011 # If not specified all disks should be replaced
11013 self.disks = range(len(self.instance.disks))
11015 # TODO: This is ugly, but right now we can't distinguish between internal
11016 # submitted opcode and external one. We should fix that.
11017 if self.remote_node_info:
11018 # We change the node, lets verify it still meets instance policy
11019 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11020 cluster = self.cfg.GetClusterInfo()
11021 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11023 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11024 ignore=self.ignore_ipolicy)
11026 for node in check_nodes:
11027 _CheckNodeOnline(self.lu, node)
11029 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11032 if node_name is not None)
11034 # Release unneeded node and node resource locks
11035 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11036 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11038 # Release any owned node group
11039 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11040 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11042 # Check whether disks are valid
11043 for disk_idx in self.disks:
11044 instance.FindDisk(disk_idx)
11046 # Get secondary node IP addresses
11047 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11048 in self.cfg.GetMultiNodeInfo(touched_nodes))
11050 def Exec(self, feedback_fn):
11051 """Execute disk replacement.
11053 This dispatches the disk replacement to the appropriate handler.
11056 if self.delay_iallocator:
11057 self._CheckPrereq2()
11060 # Verify owned locks before starting operation
11061 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11062 assert set(owned_nodes) == set(self.node_secondary_ip), \
11063 ("Incorrect node locks, owning %s, expected %s" %
11064 (owned_nodes, self.node_secondary_ip.keys()))
11065 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11066 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11068 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11069 assert list(owned_instances) == [self.instance_name], \
11070 "Instance '%s' not locked" % self.instance_name
11072 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11073 "Should not own any node group lock at this point"
11076 feedback_fn("No disks need replacement for instance '%s'" %
11077 self.instance.name)
11080 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11081 (utils.CommaJoin(self.disks), self.instance.name))
11082 feedback_fn("Current primary node: %s", self.instance.primary_node)
11083 feedback_fn("Current seconary node: %s",
11084 utils.CommaJoin(self.instance.secondary_nodes))
11086 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11088 # Activate the instance disks if we're replacing them on a down instance
11090 _StartInstanceDisks(self.lu, self.instance, True)
11093 # Should we replace the secondary node?
11094 if self.new_node is not None:
11095 fn = self._ExecDrbd8Secondary
11097 fn = self._ExecDrbd8DiskOnly
11099 result = fn(feedback_fn)
11101 # Deactivate the instance disks if we're replacing them on a
11104 _SafeShutdownInstanceDisks(self.lu, self.instance)
11106 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11109 # Verify owned locks
11110 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11111 nodes = frozenset(self.node_secondary_ip)
11112 assert ((self.early_release and not owned_nodes) or
11113 (not self.early_release and not (set(owned_nodes) - nodes))), \
11114 ("Not owning the correct locks, early_release=%s, owned=%r,"
11115 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11119 def _CheckVolumeGroup(self, nodes):
11120 self.lu.LogInfo("Checking volume groups")
11122 vgname = self.cfg.GetVGName()
11124 # Make sure volume group exists on all involved nodes
11125 results = self.rpc.call_vg_list(nodes)
11127 raise errors.OpExecError("Can't list volume groups on the nodes")
11130 res = results[node]
11131 res.Raise("Error checking node %s" % node)
11132 if vgname not in res.payload:
11133 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11136 def _CheckDisksExistence(self, nodes):
11137 # Check disk existence
11138 for idx, dev in enumerate(self.instance.disks):
11139 if idx not in self.disks:
11143 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11144 self.cfg.SetDiskID(dev, node)
11146 result = _BlockdevFind(self, node, dev, self.instance)
11148 msg = result.fail_msg
11149 if msg or not result.payload:
11151 msg = "disk not found"
11152 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11155 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11156 for idx, dev in enumerate(self.instance.disks):
11157 if idx not in self.disks:
11160 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11163 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11164 on_primary, ldisk=ldisk):
11165 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11166 " replace disks for instance %s" %
11167 (node_name, self.instance.name))
11169 def _CreateNewStorage(self, node_name):
11170 """Create new storage on the primary or secondary node.
11172 This is only used for same-node replaces, not for changing the
11173 secondary node, hence we don't want to modify the existing disk.
11178 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11179 for idx, dev in enumerate(disks):
11180 if idx not in self.disks:
11183 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11185 self.cfg.SetDiskID(dev, node_name)
11187 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11188 names = _GenerateUniqueNames(self.lu, lv_names)
11190 (data_disk, meta_disk) = dev.children
11191 vg_data = data_disk.logical_id[0]
11192 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11193 logical_id=(vg_data, names[0]),
11194 params=data_disk.params)
11195 vg_meta = meta_disk.logical_id[0]
11196 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11197 size=constants.DRBD_META_SIZE,
11198 logical_id=(vg_meta, names[1]),
11199 params=meta_disk.params)
11201 new_lvs = [lv_data, lv_meta]
11202 old_lvs = [child.Copy() for child in dev.children]
11203 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11205 # we pass force_create=True to force the LVM creation
11206 for new_lv in new_lvs:
11207 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11208 _GetInstanceInfoText(self.instance), False)
11212 def _CheckDevices(self, node_name, iv_names):
11213 for name, (dev, _, _) in iv_names.iteritems():
11214 self.cfg.SetDiskID(dev, node_name)
11216 result = _BlockdevFind(self, node_name, dev, self.instance)
11218 msg = result.fail_msg
11219 if msg or not result.payload:
11221 msg = "disk not found"
11222 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11225 if result.payload.is_degraded:
11226 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11228 def _RemoveOldStorage(self, node_name, iv_names):
11229 for name, (_, old_lvs, _) in iv_names.iteritems():
11230 self.lu.LogInfo("Remove logical volumes for %s" % name)
11233 self.cfg.SetDiskID(lv, node_name)
11235 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11237 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11238 hint="remove unused LVs manually")
11240 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11241 """Replace a disk on the primary or secondary for DRBD 8.
11243 The algorithm for replace is quite complicated:
11245 1. for each disk to be replaced:
11247 1. create new LVs on the target node with unique names
11248 1. detach old LVs from the drbd device
11249 1. rename old LVs to name_replaced.<time_t>
11250 1. rename new LVs to old LVs
11251 1. attach the new LVs (with the old names now) to the drbd device
11253 1. wait for sync across all devices
11255 1. for each modified disk:
11257 1. remove old LVs (which have the name name_replaces.<time_t>)
11259 Failures are not very well handled.
11264 # Step: check device activation
11265 self.lu.LogStep(1, steps_total, "Check device existence")
11266 self._CheckDisksExistence([self.other_node, self.target_node])
11267 self._CheckVolumeGroup([self.target_node, self.other_node])
11269 # Step: check other node consistency
11270 self.lu.LogStep(2, steps_total, "Check peer consistency")
11271 self._CheckDisksConsistency(self.other_node,
11272 self.other_node == self.instance.primary_node,
11275 # Step: create new storage
11276 self.lu.LogStep(3, steps_total, "Allocate new storage")
11277 iv_names = self._CreateNewStorage(self.target_node)
11279 # Step: for each lv, detach+rename*2+attach
11280 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11281 for dev, old_lvs, new_lvs in iv_names.itervalues():
11282 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11284 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11286 result.Raise("Can't detach drbd from local storage on node"
11287 " %s for device %s" % (self.target_node, dev.iv_name))
11289 #cfg.Update(instance)
11291 # ok, we created the new LVs, so now we know we have the needed
11292 # storage; as such, we proceed on the target node to rename
11293 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11294 # using the assumption that logical_id == physical_id (which in
11295 # turn is the unique_id on that node)
11297 # FIXME(iustin): use a better name for the replaced LVs
11298 temp_suffix = int(time.time())
11299 ren_fn = lambda d, suff: (d.physical_id[0],
11300 d.physical_id[1] + "_replaced-%s" % suff)
11302 # Build the rename list based on what LVs exist on the node
11303 rename_old_to_new = []
11304 for to_ren in old_lvs:
11305 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11306 if not result.fail_msg and result.payload:
11308 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11310 self.lu.LogInfo("Renaming the old LVs on the target node")
11311 result = self.rpc.call_blockdev_rename(self.target_node,
11313 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11315 # Now we rename the new LVs to the old LVs
11316 self.lu.LogInfo("Renaming the new LVs on the target node")
11317 rename_new_to_old = [(new, old.physical_id)
11318 for old, new in zip(old_lvs, new_lvs)]
11319 result = self.rpc.call_blockdev_rename(self.target_node,
11321 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11323 # Intermediate steps of in memory modifications
11324 for old, new in zip(old_lvs, new_lvs):
11325 new.logical_id = old.logical_id
11326 self.cfg.SetDiskID(new, self.target_node)
11328 # We need to modify old_lvs so that removal later removes the
11329 # right LVs, not the newly added ones; note that old_lvs is a
11331 for disk in old_lvs:
11332 disk.logical_id = ren_fn(disk, temp_suffix)
11333 self.cfg.SetDiskID(disk, self.target_node)
11335 # Now that the new lvs have the old name, we can add them to the device
11336 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11337 result = self.rpc.call_blockdev_addchildren(self.target_node,
11338 (dev, self.instance), new_lvs)
11339 msg = result.fail_msg
11341 for new_lv in new_lvs:
11342 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11345 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11346 hint=("cleanup manually the unused logical"
11348 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11350 cstep = itertools.count(5)
11352 if self.early_release:
11353 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11354 self._RemoveOldStorage(self.target_node, iv_names)
11355 # TODO: Check if releasing locks early still makes sense
11356 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11358 # Release all resource locks except those used by the instance
11359 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11360 keep=self.node_secondary_ip.keys())
11362 # Release all node locks while waiting for sync
11363 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11365 # TODO: Can the instance lock be downgraded here? Take the optional disk
11366 # shutdown in the caller into consideration.
11369 # This can fail as the old devices are degraded and _WaitForSync
11370 # does a combined result over all disks, so we don't check its return value
11371 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11372 _WaitForSync(self.lu, self.instance)
11374 # Check all devices manually
11375 self._CheckDevices(self.instance.primary_node, iv_names)
11377 # Step: remove old storage
11378 if not self.early_release:
11379 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11380 self._RemoveOldStorage(self.target_node, iv_names)
11382 def _ExecDrbd8Secondary(self, feedback_fn):
11383 """Replace the secondary node for DRBD 8.
11385 The algorithm for replace is quite complicated:
11386 - for all disks of the instance:
11387 - create new LVs on the new node with same names
11388 - shutdown the drbd device on the old secondary
11389 - disconnect the drbd network on the primary
11390 - create the drbd device on the new secondary
11391 - network attach the drbd on the primary, using an artifice:
11392 the drbd code for Attach() will connect to the network if it
11393 finds a device which is connected to the good local disks but
11394 not network enabled
11395 - wait for sync across all devices
11396 - remove all disks from the old secondary
11398 Failures are not very well handled.
11403 pnode = self.instance.primary_node
11405 # Step: check device activation
11406 self.lu.LogStep(1, steps_total, "Check device existence")
11407 self._CheckDisksExistence([self.instance.primary_node])
11408 self._CheckVolumeGroup([self.instance.primary_node])
11410 # Step: check other node consistency
11411 self.lu.LogStep(2, steps_total, "Check peer consistency")
11412 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11414 # Step: create new storage
11415 self.lu.LogStep(3, steps_total, "Allocate new storage")
11416 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11417 for idx, dev in enumerate(disks):
11418 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11419 (self.new_node, idx))
11420 # we pass force_create=True to force LVM creation
11421 for new_lv in dev.children:
11422 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11423 True, _GetInstanceInfoText(self.instance), False)
11425 # Step 4: dbrd minors and drbd setups changes
11426 # after this, we must manually remove the drbd minors on both the
11427 # error and the success paths
11428 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11429 minors = self.cfg.AllocateDRBDMinor([self.new_node
11430 for dev in self.instance.disks],
11431 self.instance.name)
11432 logging.debug("Allocated minors %r", minors)
11435 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11436 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11437 (self.new_node, idx))
11438 # create new devices on new_node; note that we create two IDs:
11439 # one without port, so the drbd will be activated without
11440 # networking information on the new node at this stage, and one
11441 # with network, for the latter activation in step 4
11442 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11443 if self.instance.primary_node == o_node1:
11446 assert self.instance.primary_node == o_node2, "Three-node instance?"
11449 new_alone_id = (self.instance.primary_node, self.new_node, None,
11450 p_minor, new_minor, o_secret)
11451 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11452 p_minor, new_minor, o_secret)
11454 iv_names[idx] = (dev, dev.children, new_net_id)
11455 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11457 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11458 logical_id=new_alone_id,
11459 children=dev.children,
11462 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11465 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11467 _GetInstanceInfoText(self.instance), False)
11468 except errors.GenericError:
11469 self.cfg.ReleaseDRBDMinors(self.instance.name)
11472 # We have new devices, shutdown the drbd on the old secondary
11473 for idx, dev in enumerate(self.instance.disks):
11474 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11475 self.cfg.SetDiskID(dev, self.target_node)
11476 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11477 (dev, self.instance)).fail_msg
11479 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11480 "node: %s" % (idx, msg),
11481 hint=("Please cleanup this device manually as"
11482 " soon as possible"))
11484 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11485 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11486 self.instance.disks)[pnode]
11488 msg = result.fail_msg
11490 # detaches didn't succeed (unlikely)
11491 self.cfg.ReleaseDRBDMinors(self.instance.name)
11492 raise errors.OpExecError("Can't detach the disks from the network on"
11493 " old node: %s" % (msg,))
11495 # if we managed to detach at least one, we update all the disks of
11496 # the instance to point to the new secondary
11497 self.lu.LogInfo("Updating instance configuration")
11498 for dev, _, new_logical_id in iv_names.itervalues():
11499 dev.logical_id = new_logical_id
11500 self.cfg.SetDiskID(dev, self.instance.primary_node)
11502 self.cfg.Update(self.instance, feedback_fn)
11504 # Release all node locks (the configuration has been updated)
11505 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11507 # and now perform the drbd attach
11508 self.lu.LogInfo("Attaching primary drbds to new secondary"
11509 " (standalone => connected)")
11510 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11512 self.node_secondary_ip,
11513 (self.instance.disks, self.instance),
11514 self.instance.name,
11516 for to_node, to_result in result.items():
11517 msg = to_result.fail_msg
11519 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11521 hint=("please do a gnt-instance info to see the"
11522 " status of disks"))
11524 cstep = itertools.count(5)
11526 if self.early_release:
11527 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11528 self._RemoveOldStorage(self.target_node, iv_names)
11529 # TODO: Check if releasing locks early still makes sense
11530 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11532 # Release all resource locks except those used by the instance
11533 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11534 keep=self.node_secondary_ip.keys())
11536 # TODO: Can the instance lock be downgraded here? Take the optional disk
11537 # shutdown in the caller into consideration.
11540 # This can fail as the old devices are degraded and _WaitForSync
11541 # does a combined result over all disks, so we don't check its return value
11542 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11543 _WaitForSync(self.lu, self.instance)
11545 # Check all devices manually
11546 self._CheckDevices(self.instance.primary_node, iv_names)
11548 # Step: remove old storage
11549 if not self.early_release:
11550 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11551 self._RemoveOldStorage(self.target_node, iv_names)
11554 class LURepairNodeStorage(NoHooksLU):
11555 """Repairs the volume group on a node.
11560 def CheckArguments(self):
11561 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11563 storage_type = self.op.storage_type
11565 if (constants.SO_FIX_CONSISTENCY not in
11566 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11567 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11568 " repaired" % storage_type,
11569 errors.ECODE_INVAL)
11571 def ExpandNames(self):
11572 self.needed_locks = {
11573 locking.LEVEL_NODE: [self.op.node_name],
11576 def _CheckFaultyDisks(self, instance, node_name):
11577 """Ensure faulty disks abort the opcode or at least warn."""
11579 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11581 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11582 " node '%s'" % (instance.name, node_name),
11583 errors.ECODE_STATE)
11584 except errors.OpPrereqError, err:
11585 if self.op.ignore_consistency:
11586 self.proc.LogWarning(str(err.args[0]))
11590 def CheckPrereq(self):
11591 """Check prerequisites.
11594 # Check whether any instance on this node has faulty disks
11595 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11596 if inst.admin_state != constants.ADMINST_UP:
11598 check_nodes = set(inst.all_nodes)
11599 check_nodes.discard(self.op.node_name)
11600 for inst_node_name in check_nodes:
11601 self._CheckFaultyDisks(inst, inst_node_name)
11603 def Exec(self, feedback_fn):
11604 feedback_fn("Repairing storage unit '%s' on %s ..." %
11605 (self.op.name, self.op.node_name))
11607 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11608 result = self.rpc.call_storage_execute(self.op.node_name,
11609 self.op.storage_type, st_args,
11611 constants.SO_FIX_CONSISTENCY)
11612 result.Raise("Failed to repair storage unit '%s' on %s" %
11613 (self.op.name, self.op.node_name))
11616 class LUNodeEvacuate(NoHooksLU):
11617 """Evacuates instances off a list of nodes.
11622 _MODE2IALLOCATOR = {
11623 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11624 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11625 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11627 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11628 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11629 constants.IALLOCATOR_NEVAC_MODES)
11631 def CheckArguments(self):
11632 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11634 def ExpandNames(self):
11635 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11637 if self.op.remote_node is not None:
11638 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11639 assert self.op.remote_node
11641 if self.op.remote_node == self.op.node_name:
11642 raise errors.OpPrereqError("Can not use evacuated node as a new"
11643 " secondary node", errors.ECODE_INVAL)
11645 if self.op.mode != constants.NODE_EVAC_SEC:
11646 raise errors.OpPrereqError("Without the use of an iallocator only"
11647 " secondary instances can be evacuated",
11648 errors.ECODE_INVAL)
11651 self.share_locks = _ShareAll()
11652 self.needed_locks = {
11653 locking.LEVEL_INSTANCE: [],
11654 locking.LEVEL_NODEGROUP: [],
11655 locking.LEVEL_NODE: [],
11658 # Determine nodes (via group) optimistically, needs verification once locks
11659 # have been acquired
11660 self.lock_nodes = self._DetermineNodes()
11662 def _DetermineNodes(self):
11663 """Gets the list of nodes to operate on.
11666 if self.op.remote_node is None:
11667 # Iallocator will choose any node(s) in the same group
11668 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11670 group_nodes = frozenset([self.op.remote_node])
11672 # Determine nodes to be locked
11673 return set([self.op.node_name]) | group_nodes
11675 def _DetermineInstances(self):
11676 """Builds list of instances to operate on.
11679 assert self.op.mode in constants.NODE_EVAC_MODES
11681 if self.op.mode == constants.NODE_EVAC_PRI:
11682 # Primary instances only
11683 inst_fn = _GetNodePrimaryInstances
11684 assert self.op.remote_node is None, \
11685 "Evacuating primary instances requires iallocator"
11686 elif self.op.mode == constants.NODE_EVAC_SEC:
11687 # Secondary instances only
11688 inst_fn = _GetNodeSecondaryInstances
11691 assert self.op.mode == constants.NODE_EVAC_ALL
11692 inst_fn = _GetNodeInstances
11693 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11695 raise errors.OpPrereqError("Due to an issue with the iallocator"
11696 " interface it is not possible to evacuate"
11697 " all instances at once; specify explicitly"
11698 " whether to evacuate primary or secondary"
11700 errors.ECODE_INVAL)
11702 return inst_fn(self.cfg, self.op.node_name)
11704 def DeclareLocks(self, level):
11705 if level == locking.LEVEL_INSTANCE:
11706 # Lock instances optimistically, needs verification once node and group
11707 # locks have been acquired
11708 self.needed_locks[locking.LEVEL_INSTANCE] = \
11709 set(i.name for i in self._DetermineInstances())
11711 elif level == locking.LEVEL_NODEGROUP:
11712 # Lock node groups for all potential target nodes optimistically, needs
11713 # verification once nodes have been acquired
11714 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11715 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11717 elif level == locking.LEVEL_NODE:
11718 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11720 def CheckPrereq(self):
11722 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11723 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11724 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11726 need_nodes = self._DetermineNodes()
11728 if not owned_nodes.issuperset(need_nodes):
11729 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11730 " locks were acquired, current nodes are"
11731 " are '%s', used to be '%s'; retry the"
11733 (self.op.node_name,
11734 utils.CommaJoin(need_nodes),
11735 utils.CommaJoin(owned_nodes)),
11736 errors.ECODE_STATE)
11738 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11739 if owned_groups != wanted_groups:
11740 raise errors.OpExecError("Node groups changed since locks were acquired,"
11741 " current groups are '%s', used to be '%s';"
11742 " retry the operation" %
11743 (utils.CommaJoin(wanted_groups),
11744 utils.CommaJoin(owned_groups)))
11746 # Determine affected instances
11747 self.instances = self._DetermineInstances()
11748 self.instance_names = [i.name for i in self.instances]
11750 if set(self.instance_names) != owned_instances:
11751 raise errors.OpExecError("Instances on node '%s' changed since locks"
11752 " were acquired, current instances are '%s',"
11753 " used to be '%s'; retry the operation" %
11754 (self.op.node_name,
11755 utils.CommaJoin(self.instance_names),
11756 utils.CommaJoin(owned_instances)))
11758 if self.instance_names:
11759 self.LogInfo("Evacuating instances from node '%s': %s",
11761 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11763 self.LogInfo("No instances to evacuate from node '%s'",
11766 if self.op.remote_node is not None:
11767 for i in self.instances:
11768 if i.primary_node == self.op.remote_node:
11769 raise errors.OpPrereqError("Node %s is the primary node of"
11770 " instance %s, cannot use it as"
11772 (self.op.remote_node, i.name),
11773 errors.ECODE_INVAL)
11775 def Exec(self, feedback_fn):
11776 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11778 if not self.instance_names:
11779 # No instances to evacuate
11782 elif self.op.iallocator is not None:
11783 # TODO: Implement relocation to other group
11784 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11785 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11786 instances=list(self.instance_names))
11787 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11789 ial.Run(self.op.iallocator)
11791 if not ial.success:
11792 raise errors.OpPrereqError("Can't compute node evacuation using"
11793 " iallocator '%s': %s" %
11794 (self.op.iallocator, ial.info),
11795 errors.ECODE_NORES)
11797 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11799 elif self.op.remote_node is not None:
11800 assert self.op.mode == constants.NODE_EVAC_SEC
11802 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11803 remote_node=self.op.remote_node,
11805 mode=constants.REPLACE_DISK_CHG,
11806 early_release=self.op.early_release)]
11807 for instance_name in self.instance_names
11811 raise errors.ProgrammerError("No iallocator or remote node")
11813 return ResultWithJobs(jobs)
11816 def _SetOpEarlyRelease(early_release, op):
11817 """Sets C{early_release} flag on opcodes if available.
11821 op.early_release = early_release
11822 except AttributeError:
11823 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11828 def _NodeEvacDest(use_nodes, group, nodes):
11829 """Returns group or nodes depending on caller's choice.
11833 return utils.CommaJoin(nodes)
11838 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11839 """Unpacks the result of change-group and node-evacuate iallocator requests.
11841 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11842 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11844 @type lu: L{LogicalUnit}
11845 @param lu: Logical unit instance
11846 @type alloc_result: tuple/list
11847 @param alloc_result: Result from iallocator
11848 @type early_release: bool
11849 @param early_release: Whether to release locks early if possible
11850 @type use_nodes: bool
11851 @param use_nodes: Whether to display node names instead of groups
11854 (moved, failed, jobs) = alloc_result
11857 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11858 for (name, reason) in failed)
11859 lu.LogWarning("Unable to evacuate instances %s", failreason)
11860 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11863 lu.LogInfo("Instances to be moved: %s",
11864 utils.CommaJoin("%s (to %s)" %
11865 (name, _NodeEvacDest(use_nodes, group, nodes))
11866 for (name, group, nodes) in moved))
11868 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11869 map(opcodes.OpCode.LoadOpCode, ops))
11873 def _DiskSizeInBytesToMebibytes(lu, size):
11874 """Converts a disk size in bytes to mebibytes.
11876 Warns and rounds up if the size isn't an even multiple of 1 MiB.
11879 (mib, remainder) = divmod(size, 1024 * 1024)
11882 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
11883 " to not overwrite existing data (%s bytes will not be"
11884 " wiped)", (1024 * 1024) - remainder)
11890 class LUInstanceGrowDisk(LogicalUnit):
11891 """Grow a disk of an instance.
11894 HPATH = "disk-grow"
11895 HTYPE = constants.HTYPE_INSTANCE
11898 def ExpandNames(self):
11899 self._ExpandAndLockInstance()
11900 self.needed_locks[locking.LEVEL_NODE] = []
11901 self.needed_locks[locking.LEVEL_NODE_RES] = []
11902 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11903 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11905 def DeclareLocks(self, level):
11906 if level == locking.LEVEL_NODE:
11907 self._LockInstancesNodes()
11908 elif level == locking.LEVEL_NODE_RES:
11910 self.needed_locks[locking.LEVEL_NODE_RES] = \
11911 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11913 def BuildHooksEnv(self):
11914 """Build hooks env.
11916 This runs on the master, the primary and all the secondaries.
11920 "DISK": self.op.disk,
11921 "AMOUNT": self.op.amount,
11922 "ABSOLUTE": self.op.absolute,
11924 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11927 def BuildHooksNodes(self):
11928 """Build hooks nodes.
11931 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11934 def CheckPrereq(self):
11935 """Check prerequisites.
11937 This checks that the instance is in the cluster.
11940 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11941 assert instance is not None, \
11942 "Cannot retrieve locked instance %s" % self.op.instance_name
11943 nodenames = list(instance.all_nodes)
11944 for node in nodenames:
11945 _CheckNodeOnline(self, node)
11947 self.instance = instance
11949 if instance.disk_template not in constants.DTS_GROWABLE:
11950 raise errors.OpPrereqError("Instance's disk layout does not support"
11951 " growing", errors.ECODE_INVAL)
11953 self.disk = instance.FindDisk(self.op.disk)
11955 if self.op.absolute:
11956 self.target = self.op.amount
11957 self.delta = self.target - self.disk.size
11959 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11960 "current disk size (%s)" %
11961 (utils.FormatUnit(self.target, "h"),
11962 utils.FormatUnit(self.disk.size, "h")),
11963 errors.ECODE_STATE)
11965 self.delta = self.op.amount
11966 self.target = self.disk.size + self.delta
11968 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11969 utils.FormatUnit(self.delta, "h"),
11970 errors.ECODE_INVAL)
11972 if instance.disk_template not in (constants.DT_FILE,
11973 constants.DT_SHARED_FILE,
11975 # TODO: check the free disk space for file, when that feature will be
11977 _CheckNodesFreeDiskPerVG(self, nodenames,
11978 self.disk.ComputeGrowth(self.delta))
11980 def Exec(self, feedback_fn):
11981 """Execute disk grow.
11984 instance = self.instance
11987 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11988 assert (self.owned_locks(locking.LEVEL_NODE) ==
11989 self.owned_locks(locking.LEVEL_NODE_RES))
11991 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
11993 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11995 raise errors.OpExecError("Cannot activate block device to grow")
11997 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11998 (self.op.disk, instance.name,
11999 utils.FormatUnit(self.delta, "h"),
12000 utils.FormatUnit(self.target, "h")))
12002 # First run all grow ops in dry-run mode
12003 for node in instance.all_nodes:
12004 self.cfg.SetDiskID(disk, node)
12005 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12007 result.Raise("Dry-run grow request failed to node %s" % node)
12010 # Get disk size from primary node for wiping
12011 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12012 result.Raise("Failed to retrieve disk size from node '%s'" %
12013 instance.primary_node)
12015 (disk_size_in_bytes, ) = result.payload
12017 if disk_size_in_bytes is None:
12018 raise errors.OpExecError("Failed to retrieve disk size from primary"
12019 " node '%s'" % instance.primary_node)
12021 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12023 assert old_disk_size >= disk.size, \
12024 ("Retrieved disk size too small (got %s, should be at least %s)" %
12025 (old_disk_size, disk.size))
12027 old_disk_size = None
12029 # We know that (as far as we can test) operations across different
12030 # nodes will succeed, time to run it for real on the backing storage
12031 for node in instance.all_nodes:
12032 self.cfg.SetDiskID(disk, node)
12033 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12035 result.Raise("Grow request failed to node %s" % node)
12037 # And now execute it for logical storage, on the primary node
12038 node = instance.primary_node
12039 self.cfg.SetDiskID(disk, node)
12040 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12042 result.Raise("Grow request failed to node %s" % node)
12044 disk.RecordGrow(self.delta)
12045 self.cfg.Update(instance, feedback_fn)
12047 # Changes have been recorded, release node lock
12048 _ReleaseLocks(self, locking.LEVEL_NODE)
12050 # Downgrade lock while waiting for sync
12051 self.glm.downgrade(locking.LEVEL_INSTANCE)
12053 assert wipe_disks ^ (old_disk_size is None)
12056 assert instance.disks[self.op.disk] == disk
12058 # Wipe newly added disk space
12059 _WipeDisks(self, instance,
12060 disks=[(self.op.disk, disk, old_disk_size)])
12062 if self.op.wait_for_sync:
12063 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12065 self.proc.LogWarning("Disk sync-ing has not returned a good"
12066 " status; please check the instance")
12067 if instance.admin_state != constants.ADMINST_UP:
12068 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12069 elif instance.admin_state != constants.ADMINST_UP:
12070 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12071 " not supposed to be running because no wait for"
12072 " sync mode was requested")
12074 assert self.owned_locks(locking.LEVEL_NODE_RES)
12075 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12078 class LUInstanceQueryData(NoHooksLU):
12079 """Query runtime instance data.
12084 def ExpandNames(self):
12085 self.needed_locks = {}
12087 # Use locking if requested or when non-static information is wanted
12088 if not (self.op.static or self.op.use_locking):
12089 self.LogWarning("Non-static data requested, locks need to be acquired")
12090 self.op.use_locking = True
12092 if self.op.instances or not self.op.use_locking:
12093 # Expand instance names right here
12094 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12096 # Will use acquired locks
12097 self.wanted_names = None
12099 if self.op.use_locking:
12100 self.share_locks = _ShareAll()
12102 if self.wanted_names is None:
12103 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12105 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12107 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12108 self.needed_locks[locking.LEVEL_NODE] = []
12109 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12111 def DeclareLocks(self, level):
12112 if self.op.use_locking:
12113 if level == locking.LEVEL_NODEGROUP:
12114 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12116 # Lock all groups used by instances optimistically; this requires going
12117 # via the node before it's locked, requiring verification later on
12118 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12119 frozenset(group_uuid
12120 for instance_name in owned_instances
12122 self.cfg.GetInstanceNodeGroups(instance_name))
12124 elif level == locking.LEVEL_NODE:
12125 self._LockInstancesNodes()
12127 def CheckPrereq(self):
12128 """Check prerequisites.
12130 This only checks the optional instance list against the existing names.
12133 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12134 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12135 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12137 if self.wanted_names is None:
12138 assert self.op.use_locking, "Locking was not used"
12139 self.wanted_names = owned_instances
12141 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12143 if self.op.use_locking:
12144 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12147 assert not (owned_instances or owned_groups or owned_nodes)
12149 self.wanted_instances = instances.values()
12151 def _ComputeBlockdevStatus(self, node, instance, dev):
12152 """Returns the status of a block device
12155 if self.op.static or not node:
12158 self.cfg.SetDiskID(dev, node)
12160 result = self.rpc.call_blockdev_find(node, dev)
12164 result.Raise("Can't compute disk status for %s" % instance.name)
12166 status = result.payload
12170 return (status.dev_path, status.major, status.minor,
12171 status.sync_percent, status.estimated_time,
12172 status.is_degraded, status.ldisk_status)
12174 def _ComputeDiskStatus(self, instance, snode, dev):
12175 """Compute block device status.
12178 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12180 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12182 def _ComputeDiskStatusInner(self, instance, snode, dev):
12183 """Compute block device status.
12185 @attention: The device has to be annotated already.
12188 if dev.dev_type in constants.LDS_DRBD:
12189 # we change the snode then (otherwise we use the one passed in)
12190 if dev.logical_id[0] == instance.primary_node:
12191 snode = dev.logical_id[1]
12193 snode = dev.logical_id[0]
12195 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12197 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12200 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12207 "iv_name": dev.iv_name,
12208 "dev_type": dev.dev_type,
12209 "logical_id": dev.logical_id,
12210 "physical_id": dev.physical_id,
12211 "pstatus": dev_pstatus,
12212 "sstatus": dev_sstatus,
12213 "children": dev_children,
12218 def Exec(self, feedback_fn):
12219 """Gather and return data"""
12222 cluster = self.cfg.GetClusterInfo()
12224 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12225 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12227 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12228 for node in nodes.values()))
12230 group2name_fn = lambda uuid: groups[uuid].name
12232 for instance in self.wanted_instances:
12233 pnode = nodes[instance.primary_node]
12235 if self.op.static or pnode.offline:
12236 remote_state = None
12238 self.LogWarning("Primary node %s is marked offline, returning static"
12239 " information only for instance %s" %
12240 (pnode.name, instance.name))
12242 remote_info = self.rpc.call_instance_info(instance.primary_node,
12244 instance.hypervisor)
12245 remote_info.Raise("Error checking node %s" % instance.primary_node)
12246 remote_info = remote_info.payload
12247 if remote_info and "state" in remote_info:
12248 remote_state = "up"
12250 if instance.admin_state == constants.ADMINST_UP:
12251 remote_state = "down"
12253 remote_state = instance.admin_state
12255 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12258 snodes_group_uuids = [nodes[snode_name].group
12259 for snode_name in instance.secondary_nodes]
12261 result[instance.name] = {
12262 "name": instance.name,
12263 "config_state": instance.admin_state,
12264 "run_state": remote_state,
12265 "pnode": instance.primary_node,
12266 "pnode_group_uuid": pnode.group,
12267 "pnode_group_name": group2name_fn(pnode.group),
12268 "snodes": instance.secondary_nodes,
12269 "snodes_group_uuids": snodes_group_uuids,
12270 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12272 # this happens to be the same format used for hooks
12273 "nics": _NICListToTuple(self, instance.nics),
12274 "disk_template": instance.disk_template,
12276 "hypervisor": instance.hypervisor,
12277 "network_port": instance.network_port,
12278 "hv_instance": instance.hvparams,
12279 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12280 "be_instance": instance.beparams,
12281 "be_actual": cluster.FillBE(instance),
12282 "os_instance": instance.osparams,
12283 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12284 "serial_no": instance.serial_no,
12285 "mtime": instance.mtime,
12286 "ctime": instance.ctime,
12287 "uuid": instance.uuid,
12293 def PrepareContainerMods(mods, private_fn):
12294 """Prepares a list of container modifications by adding a private data field.
12296 @type mods: list of tuples; (operation, index, parameters)
12297 @param mods: List of modifications
12298 @type private_fn: callable or None
12299 @param private_fn: Callable for constructing a private data field for a
12304 if private_fn is None:
12309 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12312 #: Type description for changes as returned by L{ApplyContainerMods}'s
12314 _TApplyContModsCbChanges = \
12315 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12316 ht.TNonEmptyString,
12321 def ApplyContainerMods(kind, container, chgdesc, mods,
12322 create_fn, modify_fn, remove_fn):
12323 """Applies descriptions in C{mods} to C{container}.
12326 @param kind: One-word item description
12327 @type container: list
12328 @param container: Container to modify
12329 @type chgdesc: None or list
12330 @param chgdesc: List of applied changes
12332 @param mods: Modifications as returned by L{PrepareContainerMods}
12333 @type create_fn: callable
12334 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12335 receives absolute item index, parameters and private data object as added
12336 by L{PrepareContainerMods}, returns tuple containing new item and changes
12338 @type modify_fn: callable
12339 @param modify_fn: Callback for modifying an existing item
12340 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12341 and private data object as added by L{PrepareContainerMods}, returns
12343 @type remove_fn: callable
12344 @param remove_fn: Callback on removing item; receives absolute item index,
12345 item and private data object as added by L{PrepareContainerMods}
12348 for (op, idx, params, private) in mods:
12351 absidx = len(container) - 1
12353 raise IndexError("Not accepting negative indices other than -1")
12354 elif idx > len(container):
12355 raise IndexError("Got %s index %s, but there are only %s" %
12356 (kind, idx, len(container)))
12362 if op == constants.DDM_ADD:
12363 # Calculate where item will be added
12365 addidx = len(container)
12369 if create_fn is None:
12372 (item, changes) = create_fn(addidx, params, private)
12375 container.append(item)
12378 assert idx <= len(container)
12379 # list.insert does so before the specified index
12380 container.insert(idx, item)
12382 # Retrieve existing item
12384 item = container[absidx]
12386 raise IndexError("Invalid %s index %s" % (kind, idx))
12388 if op == constants.DDM_REMOVE:
12391 if remove_fn is not None:
12392 remove_fn(absidx, item, private)
12394 changes = [("%s/%s" % (kind, absidx), "remove")]
12396 assert container[absidx] == item
12397 del container[absidx]
12398 elif op == constants.DDM_MODIFY:
12399 if modify_fn is not None:
12400 changes = modify_fn(absidx, item, params, private)
12402 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12404 assert _TApplyContModsCbChanges(changes)
12406 if not (chgdesc is None or changes is None):
12407 chgdesc.extend(changes)
12410 def _UpdateIvNames(base_index, disks):
12411 """Updates the C{iv_name} attribute of disks.
12413 @type disks: list of L{objects.Disk}
12416 for (idx, disk) in enumerate(disks):
12417 disk.iv_name = "disk/%s" % (base_index + idx, )
12420 class _InstNicModPrivate:
12421 """Data structure for network interface modifications.
12423 Used by L{LUInstanceSetParams}.
12426 def __init__(self):
12431 class LUInstanceSetParams(LogicalUnit):
12432 """Modifies an instances's parameters.
12435 HPATH = "instance-modify"
12436 HTYPE = constants.HTYPE_INSTANCE
12440 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12441 assert ht.TList(mods)
12442 assert not mods or len(mods[0]) in (2, 3)
12444 if mods and len(mods[0]) == 2:
12448 for op, params in mods:
12449 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12450 result.append((op, -1, params))
12454 raise errors.OpPrereqError("Only one %s add or remove operation is"
12455 " supported at a time" % kind,
12456 errors.ECODE_INVAL)
12458 result.append((constants.DDM_MODIFY, op, params))
12460 assert verify_fn(result)
12467 def _CheckMods(kind, mods, key_types, item_fn):
12468 """Ensures requested disk/NIC modifications are valid.
12471 for (op, _, params) in mods:
12472 assert ht.TDict(params)
12474 utils.ForceDictType(params, key_types)
12476 if op == constants.DDM_REMOVE:
12478 raise errors.OpPrereqError("No settings should be passed when"
12479 " removing a %s" % kind,
12480 errors.ECODE_INVAL)
12481 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12482 item_fn(op, params)
12484 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12487 def _VerifyDiskModification(op, params):
12488 """Verifies a disk modification.
12491 if op == constants.DDM_ADD:
12492 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12493 if mode not in constants.DISK_ACCESS_SET:
12494 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12495 errors.ECODE_INVAL)
12497 size = params.get(constants.IDISK_SIZE, None)
12499 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12500 constants.IDISK_SIZE, errors.ECODE_INVAL)
12504 except (TypeError, ValueError), err:
12505 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12506 errors.ECODE_INVAL)
12508 params[constants.IDISK_SIZE] = size
12510 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12511 raise errors.OpPrereqError("Disk size change not possible, use"
12512 " grow-disk", errors.ECODE_INVAL)
12515 def _VerifyNicModification(op, params):
12516 """Verifies a network interface modification.
12519 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12520 ip = params.get(constants.INIC_IP, None)
12523 elif ip.lower() == constants.VALUE_NONE:
12524 params[constants.INIC_IP] = None
12525 elif not netutils.IPAddress.IsValid(ip):
12526 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12527 errors.ECODE_INVAL)
12529 bridge = params.get("bridge", None)
12530 link = params.get(constants.INIC_LINK, None)
12531 if bridge and link:
12532 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12533 " at the same time", errors.ECODE_INVAL)
12534 elif bridge and bridge.lower() == constants.VALUE_NONE:
12535 params["bridge"] = None
12536 elif link and link.lower() == constants.VALUE_NONE:
12537 params[constants.INIC_LINK] = None
12539 if op == constants.DDM_ADD:
12540 macaddr = params.get(constants.INIC_MAC, None)
12541 if macaddr is None:
12542 params[constants.INIC_MAC] = constants.VALUE_AUTO
12544 if constants.INIC_MAC in params:
12545 macaddr = params[constants.INIC_MAC]
12546 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12547 macaddr = utils.NormalizeAndValidateMac(macaddr)
12549 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12550 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12551 " modifying an existing NIC",
12552 errors.ECODE_INVAL)
12554 def CheckArguments(self):
12555 if not (self.op.nics or self.op.disks or self.op.disk_template or
12556 self.op.hvparams or self.op.beparams or self.op.os_name or
12557 self.op.offline is not None or self.op.runtime_mem):
12558 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12560 if self.op.hvparams:
12561 _CheckGlobalHvParams(self.op.hvparams)
12563 self.op.disks = self._UpgradeDiskNicMods(
12564 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12565 self.op.nics = self._UpgradeDiskNicMods(
12566 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12568 # Check disk modifications
12569 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12570 self._VerifyDiskModification)
12572 if self.op.disks and self.op.disk_template is not None:
12573 raise errors.OpPrereqError("Disk template conversion and other disk"
12574 " changes not supported at the same time",
12575 errors.ECODE_INVAL)
12577 if (self.op.disk_template and
12578 self.op.disk_template in constants.DTS_INT_MIRROR and
12579 self.op.remote_node is None):
12580 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12581 " one requires specifying a secondary node",
12582 errors.ECODE_INVAL)
12584 # Check NIC modifications
12585 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12586 self._VerifyNicModification)
12588 def ExpandNames(self):
12589 self._ExpandAndLockInstance()
12590 # Can't even acquire node locks in shared mode as upcoming changes in
12591 # Ganeti 2.6 will start to modify the node object on disk conversion
12592 self.needed_locks[locking.LEVEL_NODE] = []
12593 self.needed_locks[locking.LEVEL_NODE_RES] = []
12594 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12596 def DeclareLocks(self, level):
12597 # TODO: Acquire group lock in shared mode (disk parameters)
12598 if level == locking.LEVEL_NODE:
12599 self._LockInstancesNodes()
12600 if self.op.disk_template and self.op.remote_node:
12601 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12602 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12603 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12605 self.needed_locks[locking.LEVEL_NODE_RES] = \
12606 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12608 def BuildHooksEnv(self):
12609 """Build hooks env.
12611 This runs on the master, primary and secondaries.
12615 if constants.BE_MINMEM in self.be_new:
12616 args["minmem"] = self.be_new[constants.BE_MINMEM]
12617 if constants.BE_MAXMEM in self.be_new:
12618 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12619 if constants.BE_VCPUS in self.be_new:
12620 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12621 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12622 # information at all.
12624 if self._new_nics is not None:
12627 for nic in self._new_nics:
12628 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12629 mode = nicparams[constants.NIC_MODE]
12630 link = nicparams[constants.NIC_LINK]
12631 nics.append((nic.ip, nic.mac, mode, link))
12633 args["nics"] = nics
12635 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12636 if self.op.disk_template:
12637 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12638 if self.op.runtime_mem:
12639 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12643 def BuildHooksNodes(self):
12644 """Build hooks nodes.
12647 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12650 def _PrepareNicModification(self, params, private, old_ip, old_params,
12652 update_params_dict = dict([(key, params[key])
12653 for key in constants.NICS_PARAMETERS
12656 if "bridge" in params:
12657 update_params_dict[constants.NIC_LINK] = params["bridge"]
12659 new_params = _GetUpdatedParams(old_params, update_params_dict)
12660 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12662 new_filled_params = cluster.SimpleFillNIC(new_params)
12663 objects.NIC.CheckParameterSyntax(new_filled_params)
12665 new_mode = new_filled_params[constants.NIC_MODE]
12666 if new_mode == constants.NIC_MODE_BRIDGED:
12667 bridge = new_filled_params[constants.NIC_LINK]
12668 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12670 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12672 self.warn.append(msg)
12674 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12676 elif new_mode == constants.NIC_MODE_ROUTED:
12677 ip = params.get(constants.INIC_IP, old_ip)
12679 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12680 " on a routed NIC", errors.ECODE_INVAL)
12682 if constants.INIC_MAC in params:
12683 mac = params[constants.INIC_MAC]
12685 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12686 errors.ECODE_INVAL)
12687 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12688 # otherwise generate the MAC address
12689 params[constants.INIC_MAC] = \
12690 self.cfg.GenerateMAC(self.proc.GetECId())
12692 # or validate/reserve the current one
12694 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12695 except errors.ReservationError:
12696 raise errors.OpPrereqError("MAC address '%s' already in use"
12697 " in cluster" % mac,
12698 errors.ECODE_NOTUNIQUE)
12700 private.params = new_params
12701 private.filled = new_filled_params
12703 def CheckPrereq(self):
12704 """Check prerequisites.
12706 This only checks the instance list against the existing names.
12709 # checking the new params on the primary/secondary nodes
12711 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12712 cluster = self.cluster = self.cfg.GetClusterInfo()
12713 assert self.instance is not None, \
12714 "Cannot retrieve locked instance %s" % self.op.instance_name
12715 pnode = instance.primary_node
12716 nodelist = list(instance.all_nodes)
12717 pnode_info = self.cfg.GetNodeInfo(pnode)
12718 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12720 # Prepare disk/NIC modifications
12721 self.diskmod = PrepareContainerMods(self.op.disks, None)
12722 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12725 if self.op.os_name and not self.op.force:
12726 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12727 self.op.force_variant)
12728 instance_os = self.op.os_name
12730 instance_os = instance.os
12732 assert not (self.op.disk_template and self.op.disks), \
12733 "Can't modify disk template and apply disk changes at the same time"
12735 if self.op.disk_template:
12736 if instance.disk_template == self.op.disk_template:
12737 raise errors.OpPrereqError("Instance already has disk template %s" %
12738 instance.disk_template, errors.ECODE_INVAL)
12740 if (instance.disk_template,
12741 self.op.disk_template) not in self._DISK_CONVERSIONS:
12742 raise errors.OpPrereqError("Unsupported disk template conversion from"
12743 " %s to %s" % (instance.disk_template,
12744 self.op.disk_template),
12745 errors.ECODE_INVAL)
12746 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12747 msg="cannot change disk template")
12748 if self.op.disk_template in constants.DTS_INT_MIRROR:
12749 if self.op.remote_node == pnode:
12750 raise errors.OpPrereqError("Given new secondary node %s is the same"
12751 " as the primary node of the instance" %
12752 self.op.remote_node, errors.ECODE_STATE)
12753 _CheckNodeOnline(self, self.op.remote_node)
12754 _CheckNodeNotDrained(self, self.op.remote_node)
12755 # FIXME: here we assume that the old instance type is DT_PLAIN
12756 assert instance.disk_template == constants.DT_PLAIN
12757 disks = [{constants.IDISK_SIZE: d.size,
12758 constants.IDISK_VG: d.logical_id[0]}
12759 for d in instance.disks]
12760 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12761 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12763 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12764 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12765 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12767 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12768 ignore=self.op.ignore_ipolicy)
12769 if pnode_info.group != snode_info.group:
12770 self.LogWarning("The primary and secondary nodes are in two"
12771 " different node groups; the disk parameters"
12772 " from the first disk's node group will be"
12775 # hvparams processing
12776 if self.op.hvparams:
12777 hv_type = instance.hypervisor
12778 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12779 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12780 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12783 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12784 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12785 self.hv_proposed = self.hv_new = hv_new # the new actual values
12786 self.hv_inst = i_hvdict # the new dict (without defaults)
12788 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12790 self.hv_new = self.hv_inst = {}
12792 # beparams processing
12793 if self.op.beparams:
12794 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12796 objects.UpgradeBeParams(i_bedict)
12797 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12798 be_new = cluster.SimpleFillBE(i_bedict)
12799 self.be_proposed = self.be_new = be_new # the new actual values
12800 self.be_inst = i_bedict # the new dict (without defaults)
12802 self.be_new = self.be_inst = {}
12803 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12804 be_old = cluster.FillBE(instance)
12806 # CPU param validation -- checking every time a parameter is
12807 # changed to cover all cases where either CPU mask or vcpus have
12809 if (constants.BE_VCPUS in self.be_proposed and
12810 constants.HV_CPU_MASK in self.hv_proposed):
12812 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12813 # Verify mask is consistent with number of vCPUs. Can skip this
12814 # test if only 1 entry in the CPU mask, which means same mask
12815 # is applied to all vCPUs.
12816 if (len(cpu_list) > 1 and
12817 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12818 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12820 (self.be_proposed[constants.BE_VCPUS],
12821 self.hv_proposed[constants.HV_CPU_MASK]),
12822 errors.ECODE_INVAL)
12824 # Only perform this test if a new CPU mask is given
12825 if constants.HV_CPU_MASK in self.hv_new:
12826 # Calculate the largest CPU number requested
12827 max_requested_cpu = max(map(max, cpu_list))
12828 # Check that all of the instance's nodes have enough physical CPUs to
12829 # satisfy the requested CPU mask
12830 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12831 max_requested_cpu + 1, instance.hypervisor)
12833 # osparams processing
12834 if self.op.osparams:
12835 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12836 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12837 self.os_inst = i_osdict # the new dict (without defaults)
12843 #TODO(dynmem): do the appropriate check involving MINMEM
12844 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12845 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12846 mem_check_list = [pnode]
12847 if be_new[constants.BE_AUTO_BALANCE]:
12848 # either we changed auto_balance to yes or it was from before
12849 mem_check_list.extend(instance.secondary_nodes)
12850 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12851 instance.hypervisor)
12852 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12853 [instance.hypervisor])
12854 pninfo = nodeinfo[pnode]
12855 msg = pninfo.fail_msg
12857 # Assume the primary node is unreachable and go ahead
12858 self.warn.append("Can't get info from primary node %s: %s" %
12861 (_, _, (pnhvinfo, )) = pninfo.payload
12862 if not isinstance(pnhvinfo.get("memory_free", None), int):
12863 self.warn.append("Node data from primary node %s doesn't contain"
12864 " free memory information" % pnode)
12865 elif instance_info.fail_msg:
12866 self.warn.append("Can't get instance runtime information: %s" %
12867 instance_info.fail_msg)
12869 if instance_info.payload:
12870 current_mem = int(instance_info.payload["memory"])
12872 # Assume instance not running
12873 # (there is a slight race condition here, but it's not very
12874 # probable, and we have no other way to check)
12875 # TODO: Describe race condition
12877 #TODO(dynmem): do the appropriate check involving MINMEM
12878 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12879 pnhvinfo["memory_free"])
12881 raise errors.OpPrereqError("This change will prevent the instance"
12882 " from starting, due to %d MB of memory"
12883 " missing on its primary node" %
12884 miss_mem, errors.ECODE_NORES)
12886 if be_new[constants.BE_AUTO_BALANCE]:
12887 for node, nres in nodeinfo.items():
12888 if node not in instance.secondary_nodes:
12890 nres.Raise("Can't get info from secondary node %s" % node,
12891 prereq=True, ecode=errors.ECODE_STATE)
12892 (_, _, (nhvinfo, )) = nres.payload
12893 if not isinstance(nhvinfo.get("memory_free", None), int):
12894 raise errors.OpPrereqError("Secondary node %s didn't return free"
12895 " memory information" % node,
12896 errors.ECODE_STATE)
12897 #TODO(dynmem): do the appropriate check involving MINMEM
12898 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12899 raise errors.OpPrereqError("This change will prevent the instance"
12900 " from failover to its secondary node"
12901 " %s, due to not enough memory" % node,
12902 errors.ECODE_STATE)
12904 if self.op.runtime_mem:
12905 remote_info = self.rpc.call_instance_info(instance.primary_node,
12907 instance.hypervisor)
12908 remote_info.Raise("Error checking node %s" % instance.primary_node)
12909 if not remote_info.payload: # not running already
12910 raise errors.OpPrereqError("Instance %s is not running" %
12911 instance.name, errors.ECODE_STATE)
12913 current_memory = remote_info.payload["memory"]
12914 if (not self.op.force and
12915 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12916 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12917 raise errors.OpPrereqError("Instance %s must have memory between %d"
12918 " and %d MB of memory unless --force is"
12921 self.be_proposed[constants.BE_MINMEM],
12922 self.be_proposed[constants.BE_MAXMEM]),
12923 errors.ECODE_INVAL)
12925 if self.op.runtime_mem > current_memory:
12926 _CheckNodeFreeMemory(self, instance.primary_node,
12927 "ballooning memory for instance %s" %
12929 self.op.memory - current_memory,
12930 instance.hypervisor)
12932 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12933 raise errors.OpPrereqError("Disk operations not supported for"
12934 " diskless instances", errors.ECODE_INVAL)
12936 def _PrepareNicCreate(_, params, private):
12937 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12938 return (None, None)
12940 def _PrepareNicMod(_, nic, params, private):
12941 self._PrepareNicModification(params, private, nic.ip,
12942 nic.nicparams, cluster, pnode)
12945 # Verify NIC changes (operating on copy)
12946 nics = instance.nics[:]
12947 ApplyContainerMods("NIC", nics, None, self.nicmod,
12948 _PrepareNicCreate, _PrepareNicMod, None)
12949 if len(nics) > constants.MAX_NICS:
12950 raise errors.OpPrereqError("Instance has too many network interfaces"
12951 " (%d), cannot add more" % constants.MAX_NICS,
12952 errors.ECODE_STATE)
12954 # Verify disk changes (operating on a copy)
12955 disks = instance.disks[:]
12956 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12957 if len(disks) > constants.MAX_DISKS:
12958 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12959 " more" % constants.MAX_DISKS,
12960 errors.ECODE_STATE)
12962 if self.op.offline is not None:
12963 if self.op.offline:
12964 msg = "can't change to offline"
12966 msg = "can't change to online"
12967 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12969 # Pre-compute NIC changes (necessary to use result in hooks)
12970 self._nic_chgdesc = []
12972 # Operate on copies as this is still in prereq
12973 nics = [nic.Copy() for nic in instance.nics]
12974 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12975 self._CreateNewNic, self._ApplyNicMods, None)
12976 self._new_nics = nics
12978 self._new_nics = None
12980 def _ConvertPlainToDrbd(self, feedback_fn):
12981 """Converts an instance from plain to drbd.
12984 feedback_fn("Converting template to drbd")
12985 instance = self.instance
12986 pnode = instance.primary_node
12987 snode = self.op.remote_node
12989 assert instance.disk_template == constants.DT_PLAIN
12991 # create a fake disk info for _GenerateDiskTemplate
12992 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12993 constants.IDISK_VG: d.logical_id[0]}
12994 for d in instance.disks]
12995 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12996 instance.name, pnode, [snode],
12997 disk_info, None, None, 0, feedback_fn,
12999 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13001 info = _GetInstanceInfoText(instance)
13002 feedback_fn("Creating additional volumes...")
13003 # first, create the missing data and meta devices
13004 for disk in anno_disks:
13005 # unfortunately this is... not too nice
13006 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13008 for child in disk.children:
13009 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13010 # at this stage, all new LVs have been created, we can rename the
13012 feedback_fn("Renaming original volumes...")
13013 rename_list = [(o, n.children[0].logical_id)
13014 for (o, n) in zip(instance.disks, new_disks)]
13015 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13016 result.Raise("Failed to rename original LVs")
13018 feedback_fn("Initializing DRBD devices...")
13019 # all child devices are in place, we can now create the DRBD devices
13020 for disk in anno_disks:
13021 for node in [pnode, snode]:
13022 f_create = node == pnode
13023 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13025 # at this point, the instance has been modified
13026 instance.disk_template = constants.DT_DRBD8
13027 instance.disks = new_disks
13028 self.cfg.Update(instance, feedback_fn)
13030 # Release node locks while waiting for sync
13031 _ReleaseLocks(self, locking.LEVEL_NODE)
13033 # disks are created, waiting for sync
13034 disk_abort = not _WaitForSync(self, instance,
13035 oneshot=not self.op.wait_for_sync)
13037 raise errors.OpExecError("There are some degraded disks for"
13038 " this instance, please cleanup manually")
13040 # Node resource locks will be released by caller
13042 def _ConvertDrbdToPlain(self, feedback_fn):
13043 """Converts an instance from drbd to plain.
13046 instance = self.instance
13048 assert len(instance.secondary_nodes) == 1
13049 assert instance.disk_template == constants.DT_DRBD8
13051 pnode = instance.primary_node
13052 snode = instance.secondary_nodes[0]
13053 feedback_fn("Converting template to plain")
13055 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13056 new_disks = [d.children[0] for d in instance.disks]
13058 # copy over size and mode
13059 for parent, child in zip(old_disks, new_disks):
13060 child.size = parent.size
13061 child.mode = parent.mode
13063 # this is a DRBD disk, return its port to the pool
13064 # NOTE: this must be done right before the call to cfg.Update!
13065 for disk in old_disks:
13066 tcp_port = disk.logical_id[2]
13067 self.cfg.AddTcpUdpPort(tcp_port)
13069 # update instance structure
13070 instance.disks = new_disks
13071 instance.disk_template = constants.DT_PLAIN
13072 self.cfg.Update(instance, feedback_fn)
13074 # Release locks in case removing disks takes a while
13075 _ReleaseLocks(self, locking.LEVEL_NODE)
13077 feedback_fn("Removing volumes on the secondary node...")
13078 for disk in old_disks:
13079 self.cfg.SetDiskID(disk, snode)
13080 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13082 self.LogWarning("Could not remove block device %s on node %s,"
13083 " continuing anyway: %s", disk.iv_name, snode, msg)
13085 feedback_fn("Removing unneeded volumes on the primary node...")
13086 for idx, disk in enumerate(old_disks):
13087 meta = disk.children[1]
13088 self.cfg.SetDiskID(meta, pnode)
13089 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13091 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13092 " continuing anyway: %s", idx, pnode, msg)
13094 def _CreateNewDisk(self, idx, params, _):
13095 """Creates a new disk.
13098 instance = self.instance
13101 if instance.disk_template in constants.DTS_FILEBASED:
13102 (file_driver, file_path) = instance.disks[0].logical_id
13103 file_path = os.path.dirname(file_path)
13105 file_driver = file_path = None
13108 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13109 instance.primary_node, instance.secondary_nodes,
13110 [params], file_path, file_driver, idx,
13111 self.Log, self.diskparams)[0]
13113 info = _GetInstanceInfoText(instance)
13115 logging.info("Creating volume %s for instance %s",
13116 disk.iv_name, instance.name)
13117 # Note: this needs to be kept in sync with _CreateDisks
13119 for node in instance.all_nodes:
13120 f_create = (node == instance.primary_node)
13122 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13123 except errors.OpExecError, err:
13124 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13125 disk.iv_name, disk, node, err)
13128 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13132 def _ModifyDisk(idx, disk, params, _):
13133 """Modifies a disk.
13136 disk.mode = params[constants.IDISK_MODE]
13139 ("disk.mode/%d" % idx, disk.mode),
13142 def _RemoveDisk(self, idx, root, _):
13146 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13147 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13148 self.cfg.SetDiskID(disk, node)
13149 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13151 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13152 " continuing anyway", idx, node, msg)
13154 # if this is a DRBD disk, return its port to the pool
13155 if root.dev_type in constants.LDS_DRBD:
13156 self.cfg.AddTcpUdpPort(root.logical_id[2])
13159 def _CreateNewNic(idx, params, private):
13160 """Creates data structure for a new network interface.
13163 mac = params[constants.INIC_MAC]
13164 ip = params.get(constants.INIC_IP, None)
13165 nicparams = private.params
13167 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13169 "add:mac=%s,ip=%s,mode=%s,link=%s" %
13170 (mac, ip, private.filled[constants.NIC_MODE],
13171 private.filled[constants.NIC_LINK])),
13175 def _ApplyNicMods(idx, nic, params, private):
13176 """Modifies a network interface.
13181 for key in [constants.INIC_MAC, constants.INIC_IP]:
13183 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13184 setattr(nic, key, params[key])
13187 nic.nicparams = private.params
13189 for (key, val) in params.items():
13190 changes.append(("nic.%s/%d" % (key, idx), val))
13194 def Exec(self, feedback_fn):
13195 """Modifies an instance.
13197 All parameters take effect only at the next restart of the instance.
13200 # Process here the warnings from CheckPrereq, as we don't have a
13201 # feedback_fn there.
13202 # TODO: Replace with self.LogWarning
13203 for warn in self.warn:
13204 feedback_fn("WARNING: %s" % warn)
13206 assert ((self.op.disk_template is None) ^
13207 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13208 "Not owning any node resource locks"
13211 instance = self.instance
13214 if self.op.runtime_mem:
13215 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13217 self.op.runtime_mem)
13218 rpcres.Raise("Cannot modify instance runtime memory")
13219 result.append(("runtime_memory", self.op.runtime_mem))
13221 # Apply disk changes
13222 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13223 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13224 _UpdateIvNames(0, instance.disks)
13226 if self.op.disk_template:
13228 check_nodes = set(instance.all_nodes)
13229 if self.op.remote_node:
13230 check_nodes.add(self.op.remote_node)
13231 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13232 owned = self.owned_locks(level)
13233 assert not (check_nodes - owned), \
13234 ("Not owning the correct locks, owning %r, expected at least %r" %
13235 (owned, check_nodes))
13237 r_shut = _ShutdownInstanceDisks(self, instance)
13239 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13240 " proceed with disk template conversion")
13241 mode = (instance.disk_template, self.op.disk_template)
13243 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13245 self.cfg.ReleaseDRBDMinors(instance.name)
13247 result.append(("disk_template", self.op.disk_template))
13249 assert instance.disk_template == self.op.disk_template, \
13250 ("Expected disk template '%s', found '%s'" %
13251 (self.op.disk_template, instance.disk_template))
13253 # Release node and resource locks if there are any (they might already have
13254 # been released during disk conversion)
13255 _ReleaseLocks(self, locking.LEVEL_NODE)
13256 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13258 # Apply NIC changes
13259 if self._new_nics is not None:
13260 instance.nics = self._new_nics
13261 result.extend(self._nic_chgdesc)
13264 if self.op.hvparams:
13265 instance.hvparams = self.hv_inst
13266 for key, val in self.op.hvparams.iteritems():
13267 result.append(("hv/%s" % key, val))
13270 if self.op.beparams:
13271 instance.beparams = self.be_inst
13272 for key, val in self.op.beparams.iteritems():
13273 result.append(("be/%s" % key, val))
13276 if self.op.os_name:
13277 instance.os = self.op.os_name
13280 if self.op.osparams:
13281 instance.osparams = self.os_inst
13282 for key, val in self.op.osparams.iteritems():
13283 result.append(("os/%s" % key, val))
13285 if self.op.offline is None:
13288 elif self.op.offline:
13289 # Mark instance as offline
13290 self.cfg.MarkInstanceOffline(instance.name)
13291 result.append(("admin_state", constants.ADMINST_OFFLINE))
13293 # Mark instance as online, but stopped
13294 self.cfg.MarkInstanceDown(instance.name)
13295 result.append(("admin_state", constants.ADMINST_DOWN))
13297 self.cfg.Update(instance, feedback_fn)
13299 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13300 self.owned_locks(locking.LEVEL_NODE)), \
13301 "All node locks should have been released by now"
13305 _DISK_CONVERSIONS = {
13306 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13307 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13311 class LUInstanceChangeGroup(LogicalUnit):
13312 HPATH = "instance-change-group"
13313 HTYPE = constants.HTYPE_INSTANCE
13316 def ExpandNames(self):
13317 self.share_locks = _ShareAll()
13318 self.needed_locks = {
13319 locking.LEVEL_NODEGROUP: [],
13320 locking.LEVEL_NODE: [],
13323 self._ExpandAndLockInstance()
13325 if self.op.target_groups:
13326 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13327 self.op.target_groups)
13329 self.req_target_uuids = None
13331 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13333 def DeclareLocks(self, level):
13334 if level == locking.LEVEL_NODEGROUP:
13335 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13337 if self.req_target_uuids:
13338 lock_groups = set(self.req_target_uuids)
13340 # Lock all groups used by instance optimistically; this requires going
13341 # via the node before it's locked, requiring verification later on
13342 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13343 lock_groups.update(instance_groups)
13345 # No target groups, need to lock all of them
13346 lock_groups = locking.ALL_SET
13348 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13350 elif level == locking.LEVEL_NODE:
13351 if self.req_target_uuids:
13352 # Lock all nodes used by instances
13353 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13354 self._LockInstancesNodes()
13356 # Lock all nodes in all potential target groups
13357 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13358 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13359 member_nodes = [node_name
13360 for group in lock_groups
13361 for node_name in self.cfg.GetNodeGroup(group).members]
13362 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13364 # Lock all nodes as all groups are potential targets
13365 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13367 def CheckPrereq(self):
13368 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13369 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13370 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13372 assert (self.req_target_uuids is None or
13373 owned_groups.issuperset(self.req_target_uuids))
13374 assert owned_instances == set([self.op.instance_name])
13376 # Get instance information
13377 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13379 # Check if node groups for locked instance are still correct
13380 assert owned_nodes.issuperset(self.instance.all_nodes), \
13381 ("Instance %s's nodes changed while we kept the lock" %
13382 self.op.instance_name)
13384 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13387 if self.req_target_uuids:
13388 # User requested specific target groups
13389 self.target_uuids = frozenset(self.req_target_uuids)
13391 # All groups except those used by the instance are potential targets
13392 self.target_uuids = owned_groups - inst_groups
13394 conflicting_groups = self.target_uuids & inst_groups
13395 if conflicting_groups:
13396 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13397 " used by the instance '%s'" %
13398 (utils.CommaJoin(conflicting_groups),
13399 self.op.instance_name),
13400 errors.ECODE_INVAL)
13402 if not self.target_uuids:
13403 raise errors.OpPrereqError("There are no possible target groups",
13404 errors.ECODE_INVAL)
13406 def BuildHooksEnv(self):
13407 """Build hooks env.
13410 assert self.target_uuids
13413 "TARGET_GROUPS": " ".join(self.target_uuids),
13416 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13420 def BuildHooksNodes(self):
13421 """Build hooks nodes.
13424 mn = self.cfg.GetMasterNode()
13425 return ([mn], [mn])
13427 def Exec(self, feedback_fn):
13428 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13430 assert instances == [self.op.instance_name], "Instance not locked"
13432 req = iallocator.IAReqGroupChange(instances=instances,
13433 target_groups=list(self.target_uuids))
13434 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13436 ial.Run(self.op.iallocator)
13438 if not ial.success:
13439 raise errors.OpPrereqError("Can't compute solution for changing group of"
13440 " instance '%s' using iallocator '%s': %s" %
13441 (self.op.instance_name, self.op.iallocator,
13442 ial.info), errors.ECODE_NORES)
13444 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13446 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13447 " instance '%s'", len(jobs), self.op.instance_name)
13449 return ResultWithJobs(jobs)
13452 class LUBackupQuery(NoHooksLU):
13453 """Query the exports list
13458 def CheckArguments(self):
13459 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13460 ["node", "export"], self.op.use_locking)
13462 def ExpandNames(self):
13463 self.expq.ExpandNames(self)
13465 def DeclareLocks(self, level):
13466 self.expq.DeclareLocks(self, level)
13468 def Exec(self, feedback_fn):
13471 for (node, expname) in self.expq.OldStyleQuery(self):
13472 if expname is None:
13473 result[node] = False
13475 result.setdefault(node, []).append(expname)
13480 class _ExportQuery(_QueryBase):
13481 FIELDS = query.EXPORT_FIELDS
13483 #: The node name is not a unique key for this query
13484 SORT_FIELD = "node"
13486 def ExpandNames(self, lu):
13487 lu.needed_locks = {}
13489 # The following variables interact with _QueryBase._GetNames
13491 self.wanted = _GetWantedNodes(lu, self.names)
13493 self.wanted = locking.ALL_SET
13495 self.do_locking = self.use_locking
13497 if self.do_locking:
13498 lu.share_locks = _ShareAll()
13499 lu.needed_locks = {
13500 locking.LEVEL_NODE: self.wanted,
13503 def DeclareLocks(self, lu, level):
13506 def _GetQueryData(self, lu):
13507 """Computes the list of nodes and their attributes.
13510 # Locking is not used
13512 assert not (compat.any(lu.glm.is_owned(level)
13513 for level in locking.LEVELS
13514 if level != locking.LEVEL_CLUSTER) or
13515 self.do_locking or self.use_locking)
13517 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13521 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13523 result.append((node, None))
13525 result.extend((node, expname) for expname in nres.payload)
13530 class LUBackupPrepare(NoHooksLU):
13531 """Prepares an instance for an export and returns useful information.
13536 def ExpandNames(self):
13537 self._ExpandAndLockInstance()
13539 def CheckPrereq(self):
13540 """Check prerequisites.
13543 instance_name = self.op.instance_name
13545 self.instance = self.cfg.GetInstanceInfo(instance_name)
13546 assert self.instance is not None, \
13547 "Cannot retrieve locked instance %s" % self.op.instance_name
13548 _CheckNodeOnline(self, self.instance.primary_node)
13550 self._cds = _GetClusterDomainSecret()
13552 def Exec(self, feedback_fn):
13553 """Prepares an instance for an export.
13556 instance = self.instance
13558 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13559 salt = utils.GenerateSecret(8)
13561 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13562 result = self.rpc.call_x509_cert_create(instance.primary_node,
13563 constants.RIE_CERT_VALIDITY)
13564 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13566 (name, cert_pem) = result.payload
13568 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13572 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13573 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13575 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13581 class LUBackupExport(LogicalUnit):
13582 """Export an instance to an image in the cluster.
13585 HPATH = "instance-export"
13586 HTYPE = constants.HTYPE_INSTANCE
13589 def CheckArguments(self):
13590 """Check the arguments.
13593 self.x509_key_name = self.op.x509_key_name
13594 self.dest_x509_ca_pem = self.op.destination_x509_ca
13596 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13597 if not self.x509_key_name:
13598 raise errors.OpPrereqError("Missing X509 key name for encryption",
13599 errors.ECODE_INVAL)
13601 if not self.dest_x509_ca_pem:
13602 raise errors.OpPrereqError("Missing destination X509 CA",
13603 errors.ECODE_INVAL)
13605 def ExpandNames(self):
13606 self._ExpandAndLockInstance()
13608 # Lock all nodes for local exports
13609 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13610 # FIXME: lock only instance primary and destination node
13612 # Sad but true, for now we have do lock all nodes, as we don't know where
13613 # the previous export might be, and in this LU we search for it and
13614 # remove it from its current node. In the future we could fix this by:
13615 # - making a tasklet to search (share-lock all), then create the
13616 # new one, then one to remove, after
13617 # - removing the removal operation altogether
13618 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13620 def DeclareLocks(self, level):
13621 """Last minute lock declaration."""
13622 # All nodes are locked anyway, so nothing to do here.
13624 def BuildHooksEnv(self):
13625 """Build hooks env.
13627 This will run on the master, primary node and target node.
13631 "EXPORT_MODE": self.op.mode,
13632 "EXPORT_NODE": self.op.target_node,
13633 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13634 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13635 # TODO: Generic function for boolean env variables
13636 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13639 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13643 def BuildHooksNodes(self):
13644 """Build hooks nodes.
13647 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13649 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13650 nl.append(self.op.target_node)
13654 def CheckPrereq(self):
13655 """Check prerequisites.
13657 This checks that the instance and node names are valid.
13660 instance_name = self.op.instance_name
13662 self.instance = self.cfg.GetInstanceInfo(instance_name)
13663 assert self.instance is not None, \
13664 "Cannot retrieve locked instance %s" % self.op.instance_name
13665 _CheckNodeOnline(self, self.instance.primary_node)
13667 if (self.op.remove_instance and
13668 self.instance.admin_state == constants.ADMINST_UP and
13669 not self.op.shutdown):
13670 raise errors.OpPrereqError("Can not remove instance without shutting it"
13671 " down before", errors.ECODE_STATE)
13673 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13674 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13675 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13676 assert self.dst_node is not None
13678 _CheckNodeOnline(self, self.dst_node.name)
13679 _CheckNodeNotDrained(self, self.dst_node.name)
13682 self.dest_disk_info = None
13683 self.dest_x509_ca = None
13685 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13686 self.dst_node = None
13688 if len(self.op.target_node) != len(self.instance.disks):
13689 raise errors.OpPrereqError(("Received destination information for %s"
13690 " disks, but instance %s has %s disks") %
13691 (len(self.op.target_node), instance_name,
13692 len(self.instance.disks)),
13693 errors.ECODE_INVAL)
13695 cds = _GetClusterDomainSecret()
13697 # Check X509 key name
13699 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13700 except (TypeError, ValueError), err:
13701 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13702 errors.ECODE_INVAL)
13704 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13705 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13706 errors.ECODE_INVAL)
13708 # Load and verify CA
13710 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13711 except OpenSSL.crypto.Error, err:
13712 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13713 (err, ), errors.ECODE_INVAL)
13715 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13716 if errcode is not None:
13717 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13718 (msg, ), errors.ECODE_INVAL)
13720 self.dest_x509_ca = cert
13722 # Verify target information
13724 for idx, disk_data in enumerate(self.op.target_node):
13726 (host, port, magic) = \
13727 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13728 except errors.GenericError, err:
13729 raise errors.OpPrereqError("Target info for disk %s: %s" %
13730 (idx, err), errors.ECODE_INVAL)
13732 disk_info.append((host, port, magic))
13734 assert len(disk_info) == len(self.op.target_node)
13735 self.dest_disk_info = disk_info
13738 raise errors.ProgrammerError("Unhandled export mode %r" %
13741 # instance disk type verification
13742 # TODO: Implement export support for file-based disks
13743 for disk in self.instance.disks:
13744 if disk.dev_type == constants.LD_FILE:
13745 raise errors.OpPrereqError("Export not supported for instances with"
13746 " file-based disks", errors.ECODE_INVAL)
13748 def _CleanupExports(self, feedback_fn):
13749 """Removes exports of current instance from all other nodes.
13751 If an instance in a cluster with nodes A..D was exported to node C, its
13752 exports will be removed from the nodes A, B and D.
13755 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13757 nodelist = self.cfg.GetNodeList()
13758 nodelist.remove(self.dst_node.name)
13760 # on one-node clusters nodelist will be empty after the removal
13761 # if we proceed the backup would be removed because OpBackupQuery
13762 # substitutes an empty list with the full cluster node list.
13763 iname = self.instance.name
13765 feedback_fn("Removing old exports for instance %s" % iname)
13766 exportlist = self.rpc.call_export_list(nodelist)
13767 for node in exportlist:
13768 if exportlist[node].fail_msg:
13770 if iname in exportlist[node].payload:
13771 msg = self.rpc.call_export_remove(node, iname).fail_msg
13773 self.LogWarning("Could not remove older export for instance %s"
13774 " on node %s: %s", iname, node, msg)
13776 def Exec(self, feedback_fn):
13777 """Export an instance to an image in the cluster.
13780 assert self.op.mode in constants.EXPORT_MODES
13782 instance = self.instance
13783 src_node = instance.primary_node
13785 if self.op.shutdown:
13786 # shutdown the instance, but not the disks
13787 feedback_fn("Shutting down instance %s" % instance.name)
13788 result = self.rpc.call_instance_shutdown(src_node, instance,
13789 self.op.shutdown_timeout)
13790 # TODO: Maybe ignore failures if ignore_remove_failures is set
13791 result.Raise("Could not shutdown instance %s on"
13792 " node %s" % (instance.name, src_node))
13794 # set the disks ID correctly since call_instance_start needs the
13795 # correct drbd minor to create the symlinks
13796 for disk in instance.disks:
13797 self.cfg.SetDiskID(disk, src_node)
13799 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13802 # Activate the instance disks if we'exporting a stopped instance
13803 feedback_fn("Activating disks for %s" % instance.name)
13804 _StartInstanceDisks(self, instance, None)
13807 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13810 helper.CreateSnapshots()
13812 if (self.op.shutdown and
13813 instance.admin_state == constants.ADMINST_UP and
13814 not self.op.remove_instance):
13815 assert not activate_disks
13816 feedback_fn("Starting instance %s" % instance.name)
13817 result = self.rpc.call_instance_start(src_node,
13818 (instance, None, None), False)
13819 msg = result.fail_msg
13821 feedback_fn("Failed to start instance: %s" % msg)
13822 _ShutdownInstanceDisks(self, instance)
13823 raise errors.OpExecError("Could not start instance: %s" % msg)
13825 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13826 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13827 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13828 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13829 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13831 (key_name, _, _) = self.x509_key_name
13834 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13837 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13838 key_name, dest_ca_pem,
13843 # Check for backwards compatibility
13844 assert len(dresults) == len(instance.disks)
13845 assert compat.all(isinstance(i, bool) for i in dresults), \
13846 "Not all results are boolean: %r" % dresults
13850 feedback_fn("Deactivating disks for %s" % instance.name)
13851 _ShutdownInstanceDisks(self, instance)
13853 if not (compat.all(dresults) and fin_resu):
13856 failures.append("export finalization")
13857 if not compat.all(dresults):
13858 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13860 failures.append("disk export: disk(s) %s" % fdsk)
13862 raise errors.OpExecError("Export failed, errors in %s" %
13863 utils.CommaJoin(failures))
13865 # At this point, the export was successful, we can cleanup/finish
13867 # Remove instance if requested
13868 if self.op.remove_instance:
13869 feedback_fn("Removing instance %s" % instance.name)
13870 _RemoveInstance(self, feedback_fn, instance,
13871 self.op.ignore_remove_failures)
13873 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13874 self._CleanupExports(feedback_fn)
13876 return fin_resu, dresults
13879 class LUBackupRemove(NoHooksLU):
13880 """Remove exports related to the named instance.
13885 def ExpandNames(self):
13886 self.needed_locks = {}
13887 # We need all nodes to be locked in order for RemoveExport to work, but we
13888 # don't need to lock the instance itself, as nothing will happen to it (and
13889 # we can remove exports also for a removed instance)
13890 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13892 def Exec(self, feedback_fn):
13893 """Remove any export.
13896 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13897 # If the instance was not found we'll try with the name that was passed in.
13898 # This will only work if it was an FQDN, though.
13900 if not instance_name:
13902 instance_name = self.op.instance_name
13904 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13905 exportlist = self.rpc.call_export_list(locked_nodes)
13907 for node in exportlist:
13908 msg = exportlist[node].fail_msg
13910 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13912 if instance_name in exportlist[node].payload:
13914 result = self.rpc.call_export_remove(node, instance_name)
13915 msg = result.fail_msg
13917 logging.error("Could not remove export for instance %s"
13918 " on node %s: %s", instance_name, node, msg)
13920 if fqdn_warn and not found:
13921 feedback_fn("Export not found. If trying to remove an export belonging"
13922 " to a deleted instance please use its Fully Qualified"
13926 class LUGroupAdd(LogicalUnit):
13927 """Logical unit for creating node groups.
13930 HPATH = "group-add"
13931 HTYPE = constants.HTYPE_GROUP
13934 def ExpandNames(self):
13935 # We need the new group's UUID here so that we can create and acquire the
13936 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13937 # that it should not check whether the UUID exists in the configuration.
13938 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13939 self.needed_locks = {}
13940 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13942 def CheckPrereq(self):
13943 """Check prerequisites.
13945 This checks that the given group name is not an existing node group
13950 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13951 except errors.OpPrereqError:
13954 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13955 " node group (UUID: %s)" %
13956 (self.op.group_name, existing_uuid),
13957 errors.ECODE_EXISTS)
13959 if self.op.ndparams:
13960 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13962 if self.op.hv_state:
13963 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13965 self.new_hv_state = None
13967 if self.op.disk_state:
13968 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13970 self.new_disk_state = None
13972 if self.op.diskparams:
13973 for templ in constants.DISK_TEMPLATES:
13974 if templ in self.op.diskparams:
13975 utils.ForceDictType(self.op.diskparams[templ],
13976 constants.DISK_DT_TYPES)
13977 self.new_diskparams = self.op.diskparams
13979 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13980 except errors.OpPrereqError, err:
13981 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13982 errors.ECODE_INVAL)
13984 self.new_diskparams = {}
13986 if self.op.ipolicy:
13987 cluster = self.cfg.GetClusterInfo()
13988 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13990 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13991 except errors.ConfigurationError, err:
13992 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13993 errors.ECODE_INVAL)
13995 def BuildHooksEnv(self):
13996 """Build hooks env.
14000 "GROUP_NAME": self.op.group_name,
14003 def BuildHooksNodes(self):
14004 """Build hooks nodes.
14007 mn = self.cfg.GetMasterNode()
14008 return ([mn], [mn])
14010 def Exec(self, feedback_fn):
14011 """Add the node group to the cluster.
14014 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14015 uuid=self.group_uuid,
14016 alloc_policy=self.op.alloc_policy,
14017 ndparams=self.op.ndparams,
14018 diskparams=self.new_diskparams,
14019 ipolicy=self.op.ipolicy,
14020 hv_state_static=self.new_hv_state,
14021 disk_state_static=self.new_disk_state)
14023 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14024 del self.remove_locks[locking.LEVEL_NODEGROUP]
14027 class LUGroupAssignNodes(NoHooksLU):
14028 """Logical unit for assigning nodes to groups.
14033 def ExpandNames(self):
14034 # These raise errors.OpPrereqError on their own:
14035 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14036 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14038 # We want to lock all the affected nodes and groups. We have readily
14039 # available the list of nodes, and the *destination* group. To gather the
14040 # list of "source" groups, we need to fetch node information later on.
14041 self.needed_locks = {
14042 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14043 locking.LEVEL_NODE: self.op.nodes,
14046 def DeclareLocks(self, level):
14047 if level == locking.LEVEL_NODEGROUP:
14048 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14050 # Try to get all affected nodes' groups without having the group or node
14051 # lock yet. Needs verification later in the code flow.
14052 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14054 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14056 def CheckPrereq(self):
14057 """Check prerequisites.
14060 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14061 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14062 frozenset(self.op.nodes))
14064 expected_locks = (set([self.group_uuid]) |
14065 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14066 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14067 if actual_locks != expected_locks:
14068 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14069 " current groups are '%s', used to be '%s'" %
14070 (utils.CommaJoin(expected_locks),
14071 utils.CommaJoin(actual_locks)))
14073 self.node_data = self.cfg.GetAllNodesInfo()
14074 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14075 instance_data = self.cfg.GetAllInstancesInfo()
14077 if self.group is None:
14078 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14079 (self.op.group_name, self.group_uuid))
14081 (new_splits, previous_splits) = \
14082 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14083 for node in self.op.nodes],
14084 self.node_data, instance_data)
14087 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14089 if not self.op.force:
14090 raise errors.OpExecError("The following instances get split by this"
14091 " change and --force was not given: %s" %
14094 self.LogWarning("This operation will split the following instances: %s",
14097 if previous_splits:
14098 self.LogWarning("In addition, these already-split instances continue"
14099 " to be split across groups: %s",
14100 utils.CommaJoin(utils.NiceSort(previous_splits)))
14102 def Exec(self, feedback_fn):
14103 """Assign nodes to a new group.
14106 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14108 self.cfg.AssignGroupNodes(mods)
14111 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14112 """Check for split instances after a node assignment.
14114 This method considers a series of node assignments as an atomic operation,
14115 and returns information about split instances after applying the set of
14118 In particular, it returns information about newly split instances, and
14119 instances that were already split, and remain so after the change.
14121 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14124 @type changes: list of (node_name, new_group_uuid) pairs.
14125 @param changes: list of node assignments to consider.
14126 @param node_data: a dict with data for all nodes
14127 @param instance_data: a dict with all instances to consider
14128 @rtype: a two-tuple
14129 @return: a list of instances that were previously okay and result split as a
14130 consequence of this change, and a list of instances that were previously
14131 split and this change does not fix.
14134 changed_nodes = dict((node, group) for node, group in changes
14135 if node_data[node].group != group)
14137 all_split_instances = set()
14138 previously_split_instances = set()
14140 def InstanceNodes(instance):
14141 return [instance.primary_node] + list(instance.secondary_nodes)
14143 for inst in instance_data.values():
14144 if inst.disk_template not in constants.DTS_INT_MIRROR:
14147 instance_nodes = InstanceNodes(inst)
14149 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14150 previously_split_instances.add(inst.name)
14152 if len(set(changed_nodes.get(node, node_data[node].group)
14153 for node in instance_nodes)) > 1:
14154 all_split_instances.add(inst.name)
14156 return (list(all_split_instances - previously_split_instances),
14157 list(previously_split_instances & all_split_instances))
14160 class _GroupQuery(_QueryBase):
14161 FIELDS = query.GROUP_FIELDS
14163 def ExpandNames(self, lu):
14164 lu.needed_locks = {}
14166 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14167 self._cluster = lu.cfg.GetClusterInfo()
14168 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14171 self.wanted = [name_to_uuid[name]
14172 for name in utils.NiceSort(name_to_uuid.keys())]
14174 # Accept names to be either names or UUIDs.
14177 all_uuid = frozenset(self._all_groups.keys())
14179 for name in self.names:
14180 if name in all_uuid:
14181 self.wanted.append(name)
14182 elif name in name_to_uuid:
14183 self.wanted.append(name_to_uuid[name])
14185 missing.append(name)
14188 raise errors.OpPrereqError("Some groups do not exist: %s" %
14189 utils.CommaJoin(missing),
14190 errors.ECODE_NOENT)
14192 def DeclareLocks(self, lu, level):
14195 def _GetQueryData(self, lu):
14196 """Computes the list of node groups and their attributes.
14199 do_nodes = query.GQ_NODE in self.requested_data
14200 do_instances = query.GQ_INST in self.requested_data
14202 group_to_nodes = None
14203 group_to_instances = None
14205 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14206 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14207 # latter GetAllInstancesInfo() is not enough, for we have to go through
14208 # instance->node. Hence, we will need to process nodes even if we only need
14209 # instance information.
14210 if do_nodes or do_instances:
14211 all_nodes = lu.cfg.GetAllNodesInfo()
14212 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14215 for node in all_nodes.values():
14216 if node.group in group_to_nodes:
14217 group_to_nodes[node.group].append(node.name)
14218 node_to_group[node.name] = node.group
14221 all_instances = lu.cfg.GetAllInstancesInfo()
14222 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14224 for instance in all_instances.values():
14225 node = instance.primary_node
14226 if node in node_to_group:
14227 group_to_instances[node_to_group[node]].append(instance.name)
14230 # Do not pass on node information if it was not requested.
14231 group_to_nodes = None
14233 return query.GroupQueryData(self._cluster,
14234 [self._all_groups[uuid]
14235 for uuid in self.wanted],
14236 group_to_nodes, group_to_instances,
14237 query.GQ_DISKPARAMS in self.requested_data)
14240 class LUGroupQuery(NoHooksLU):
14241 """Logical unit for querying node groups.
14246 def CheckArguments(self):
14247 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14248 self.op.output_fields, False)
14250 def ExpandNames(self):
14251 self.gq.ExpandNames(self)
14253 def DeclareLocks(self, level):
14254 self.gq.DeclareLocks(self, level)
14256 def Exec(self, feedback_fn):
14257 return self.gq.OldStyleQuery(self)
14260 class LUGroupSetParams(LogicalUnit):
14261 """Modifies the parameters of a node group.
14264 HPATH = "group-modify"
14265 HTYPE = constants.HTYPE_GROUP
14268 def CheckArguments(self):
14271 self.op.diskparams,
14272 self.op.alloc_policy,
14274 self.op.disk_state,
14278 if all_changes.count(None) == len(all_changes):
14279 raise errors.OpPrereqError("Please pass at least one modification",
14280 errors.ECODE_INVAL)
14282 def ExpandNames(self):
14283 # This raises errors.OpPrereqError on its own:
14284 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14286 self.needed_locks = {
14287 locking.LEVEL_INSTANCE: [],
14288 locking.LEVEL_NODEGROUP: [self.group_uuid],
14291 self.share_locks[locking.LEVEL_INSTANCE] = 1
14293 def DeclareLocks(self, level):
14294 if level == locking.LEVEL_INSTANCE:
14295 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14297 # Lock instances optimistically, needs verification once group lock has
14299 self.needed_locks[locking.LEVEL_INSTANCE] = \
14300 self.cfg.GetNodeGroupInstances(self.group_uuid)
14303 def _UpdateAndVerifyDiskParams(old, new):
14304 """Updates and verifies disk parameters.
14307 new_params = _GetUpdatedParams(old, new)
14308 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14311 def CheckPrereq(self):
14312 """Check prerequisites.
14315 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14317 # Check if locked instances are still correct
14318 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14320 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14321 cluster = self.cfg.GetClusterInfo()
14323 if self.group is None:
14324 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14325 (self.op.group_name, self.group_uuid))
14327 if self.op.ndparams:
14328 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14329 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14330 self.new_ndparams = new_ndparams
14332 if self.op.diskparams:
14333 diskparams = self.group.diskparams
14334 uavdp = self._UpdateAndVerifyDiskParams
14335 # For each disktemplate subdict update and verify the values
14336 new_diskparams = dict((dt,
14337 uavdp(diskparams.get(dt, {}),
14338 self.op.diskparams[dt]))
14339 for dt in constants.DISK_TEMPLATES
14340 if dt in self.op.diskparams)
14341 # As we've all subdicts of diskparams ready, lets merge the actual
14342 # dict with all updated subdicts
14343 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14345 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14346 except errors.OpPrereqError, err:
14347 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14348 errors.ECODE_INVAL)
14350 if self.op.hv_state:
14351 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14352 self.group.hv_state_static)
14354 if self.op.disk_state:
14355 self.new_disk_state = \
14356 _MergeAndVerifyDiskState(self.op.disk_state,
14357 self.group.disk_state_static)
14359 if self.op.ipolicy:
14360 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14364 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14365 inst_filter = lambda inst: inst.name in owned_instances
14366 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14367 gmi = ganeti.masterd.instance
14369 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14371 new_ipolicy, instances)
14374 self.LogWarning("After the ipolicy change the following instances"
14375 " violate them: %s",
14376 utils.CommaJoin(violations))
14378 def BuildHooksEnv(self):
14379 """Build hooks env.
14383 "GROUP_NAME": self.op.group_name,
14384 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14387 def BuildHooksNodes(self):
14388 """Build hooks nodes.
14391 mn = self.cfg.GetMasterNode()
14392 return ([mn], [mn])
14394 def Exec(self, feedback_fn):
14395 """Modifies the node group.
14400 if self.op.ndparams:
14401 self.group.ndparams = self.new_ndparams
14402 result.append(("ndparams", str(self.group.ndparams)))
14404 if self.op.diskparams:
14405 self.group.diskparams = self.new_diskparams
14406 result.append(("diskparams", str(self.group.diskparams)))
14408 if self.op.alloc_policy:
14409 self.group.alloc_policy = self.op.alloc_policy
14411 if self.op.hv_state:
14412 self.group.hv_state_static = self.new_hv_state
14414 if self.op.disk_state:
14415 self.group.disk_state_static = self.new_disk_state
14417 if self.op.ipolicy:
14418 self.group.ipolicy = self.new_ipolicy
14420 self.cfg.Update(self.group, feedback_fn)
14424 class LUGroupRemove(LogicalUnit):
14425 HPATH = "group-remove"
14426 HTYPE = constants.HTYPE_GROUP
14429 def ExpandNames(self):
14430 # This will raises errors.OpPrereqError on its own:
14431 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14432 self.needed_locks = {
14433 locking.LEVEL_NODEGROUP: [self.group_uuid],
14436 def CheckPrereq(self):
14437 """Check prerequisites.
14439 This checks that the given group name exists as a node group, that is
14440 empty (i.e., contains no nodes), and that is not the last group of the
14444 # Verify that the group is empty.
14445 group_nodes = [node.name
14446 for node in self.cfg.GetAllNodesInfo().values()
14447 if node.group == self.group_uuid]
14450 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14452 (self.op.group_name,
14453 utils.CommaJoin(utils.NiceSort(group_nodes))),
14454 errors.ECODE_STATE)
14456 # Verify the cluster would not be left group-less.
14457 if len(self.cfg.GetNodeGroupList()) == 1:
14458 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14459 " removed" % self.op.group_name,
14460 errors.ECODE_STATE)
14462 def BuildHooksEnv(self):
14463 """Build hooks env.
14467 "GROUP_NAME": self.op.group_name,
14470 def BuildHooksNodes(self):
14471 """Build hooks nodes.
14474 mn = self.cfg.GetMasterNode()
14475 return ([mn], [mn])
14477 def Exec(self, feedback_fn):
14478 """Remove the node group.
14482 self.cfg.RemoveNodeGroup(self.group_uuid)
14483 except errors.ConfigurationError:
14484 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14485 (self.op.group_name, self.group_uuid))
14487 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14490 class LUGroupRename(LogicalUnit):
14491 HPATH = "group-rename"
14492 HTYPE = constants.HTYPE_GROUP
14495 def ExpandNames(self):
14496 # This raises errors.OpPrereqError on its own:
14497 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14499 self.needed_locks = {
14500 locking.LEVEL_NODEGROUP: [self.group_uuid],
14503 def CheckPrereq(self):
14504 """Check prerequisites.
14506 Ensures requested new name is not yet used.
14510 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14511 except errors.OpPrereqError:
14514 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14515 " node group (UUID: %s)" %
14516 (self.op.new_name, new_name_uuid),
14517 errors.ECODE_EXISTS)
14519 def BuildHooksEnv(self):
14520 """Build hooks env.
14524 "OLD_NAME": self.op.group_name,
14525 "NEW_NAME": self.op.new_name,
14528 def BuildHooksNodes(self):
14529 """Build hooks nodes.
14532 mn = self.cfg.GetMasterNode()
14534 all_nodes = self.cfg.GetAllNodesInfo()
14535 all_nodes.pop(mn, None)
14538 run_nodes.extend(node.name for node in all_nodes.values()
14539 if node.group == self.group_uuid)
14541 return (run_nodes, run_nodes)
14543 def Exec(self, feedback_fn):
14544 """Rename the node group.
14547 group = self.cfg.GetNodeGroup(self.group_uuid)
14550 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14551 (self.op.group_name, self.group_uuid))
14553 group.name = self.op.new_name
14554 self.cfg.Update(group, feedback_fn)
14556 return self.op.new_name
14559 class LUGroupEvacuate(LogicalUnit):
14560 HPATH = "group-evacuate"
14561 HTYPE = constants.HTYPE_GROUP
14564 def ExpandNames(self):
14565 # This raises errors.OpPrereqError on its own:
14566 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14568 if self.op.target_groups:
14569 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14570 self.op.target_groups)
14572 self.req_target_uuids = []
14574 if self.group_uuid in self.req_target_uuids:
14575 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14576 " as a target group (targets are %s)" %
14578 utils.CommaJoin(self.req_target_uuids)),
14579 errors.ECODE_INVAL)
14581 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14583 self.share_locks = _ShareAll()
14584 self.needed_locks = {
14585 locking.LEVEL_INSTANCE: [],
14586 locking.LEVEL_NODEGROUP: [],
14587 locking.LEVEL_NODE: [],
14590 def DeclareLocks(self, level):
14591 if level == locking.LEVEL_INSTANCE:
14592 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14594 # Lock instances optimistically, needs verification once node and group
14595 # locks have been acquired
14596 self.needed_locks[locking.LEVEL_INSTANCE] = \
14597 self.cfg.GetNodeGroupInstances(self.group_uuid)
14599 elif level == locking.LEVEL_NODEGROUP:
14600 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14602 if self.req_target_uuids:
14603 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14605 # Lock all groups used by instances optimistically; this requires going
14606 # via the node before it's locked, requiring verification later on
14607 lock_groups.update(group_uuid
14608 for instance_name in
14609 self.owned_locks(locking.LEVEL_INSTANCE)
14611 self.cfg.GetInstanceNodeGroups(instance_name))
14613 # No target groups, need to lock all of them
14614 lock_groups = locking.ALL_SET
14616 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14618 elif level == locking.LEVEL_NODE:
14619 # This will only lock the nodes in the group to be evacuated which
14620 # contain actual instances
14621 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14622 self._LockInstancesNodes()
14624 # Lock all nodes in group to be evacuated and target groups
14625 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14626 assert self.group_uuid in owned_groups
14627 member_nodes = [node_name
14628 for group in owned_groups
14629 for node_name in self.cfg.GetNodeGroup(group).members]
14630 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14632 def CheckPrereq(self):
14633 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14634 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14635 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14637 assert owned_groups.issuperset(self.req_target_uuids)
14638 assert self.group_uuid in owned_groups
14640 # Check if locked instances are still correct
14641 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14643 # Get instance information
14644 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14646 # Check if node groups for locked instances are still correct
14647 _CheckInstancesNodeGroups(self.cfg, self.instances,
14648 owned_groups, owned_nodes, self.group_uuid)
14650 if self.req_target_uuids:
14651 # User requested specific target groups
14652 self.target_uuids = self.req_target_uuids
14654 # All groups except the one to be evacuated are potential targets
14655 self.target_uuids = [group_uuid for group_uuid in owned_groups
14656 if group_uuid != self.group_uuid]
14658 if not self.target_uuids:
14659 raise errors.OpPrereqError("There are no possible target groups",
14660 errors.ECODE_INVAL)
14662 def BuildHooksEnv(self):
14663 """Build hooks env.
14667 "GROUP_NAME": self.op.group_name,
14668 "TARGET_GROUPS": " ".join(self.target_uuids),
14671 def BuildHooksNodes(self):
14672 """Build hooks nodes.
14675 mn = self.cfg.GetMasterNode()
14677 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14679 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14681 return (run_nodes, run_nodes)
14683 def Exec(self, feedback_fn):
14684 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14686 assert self.group_uuid not in self.target_uuids
14688 req = iallocator.IAReqGroupChange(instances=instances,
14689 target_groups=self.target_uuids)
14690 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14692 ial.Run(self.op.iallocator)
14694 if not ial.success:
14695 raise errors.OpPrereqError("Can't compute group evacuation using"
14696 " iallocator '%s': %s" %
14697 (self.op.iallocator, ial.info),
14698 errors.ECODE_NORES)
14700 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14702 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14703 len(jobs), self.op.group_name)
14705 return ResultWithJobs(jobs)
14708 class TagsLU(NoHooksLU): # pylint: disable=W0223
14709 """Generic tags LU.
14711 This is an abstract class which is the parent of all the other tags LUs.
14714 def ExpandNames(self):
14715 self.group_uuid = None
14716 self.needed_locks = {}
14718 if self.op.kind == constants.TAG_NODE:
14719 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14720 lock_level = locking.LEVEL_NODE
14721 lock_name = self.op.name
14722 elif self.op.kind == constants.TAG_INSTANCE:
14723 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14724 lock_level = locking.LEVEL_INSTANCE
14725 lock_name = self.op.name
14726 elif self.op.kind == constants.TAG_NODEGROUP:
14727 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14728 lock_level = locking.LEVEL_NODEGROUP
14729 lock_name = self.group_uuid
14734 if lock_level and getattr(self.op, "use_locking", True):
14735 self.needed_locks[lock_level] = lock_name
14737 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14738 # not possible to acquire the BGL based on opcode parameters)
14740 def CheckPrereq(self):
14741 """Check prerequisites.
14744 if self.op.kind == constants.TAG_CLUSTER:
14745 self.target = self.cfg.GetClusterInfo()
14746 elif self.op.kind == constants.TAG_NODE:
14747 self.target = self.cfg.GetNodeInfo(self.op.name)
14748 elif self.op.kind == constants.TAG_INSTANCE:
14749 self.target = self.cfg.GetInstanceInfo(self.op.name)
14750 elif self.op.kind == constants.TAG_NODEGROUP:
14751 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14753 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14754 str(self.op.kind), errors.ECODE_INVAL)
14757 class LUTagsGet(TagsLU):
14758 """Returns the tags of a given object.
14763 def ExpandNames(self):
14764 TagsLU.ExpandNames(self)
14766 # Share locks as this is only a read operation
14767 self.share_locks = _ShareAll()
14769 def Exec(self, feedback_fn):
14770 """Returns the tag list.
14773 return list(self.target.GetTags())
14776 class LUTagsSearch(NoHooksLU):
14777 """Searches the tags for a given pattern.
14782 def ExpandNames(self):
14783 self.needed_locks = {}
14785 def CheckPrereq(self):
14786 """Check prerequisites.
14788 This checks the pattern passed for validity by compiling it.
14792 self.re = re.compile(self.op.pattern)
14793 except re.error, err:
14794 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14795 (self.op.pattern, err), errors.ECODE_INVAL)
14797 def Exec(self, feedback_fn):
14798 """Returns the tag list.
14802 tgts = [("/cluster", cfg.GetClusterInfo())]
14803 ilist = cfg.GetAllInstancesInfo().values()
14804 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14805 nlist = cfg.GetAllNodesInfo().values()
14806 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14807 tgts.extend(("/nodegroup/%s" % n.name, n)
14808 for n in cfg.GetAllNodeGroupsInfo().values())
14810 for path, target in tgts:
14811 for tag in target.GetTags():
14812 if self.re.search(tag):
14813 results.append((path, tag))
14817 class LUTagsSet(TagsLU):
14818 """Sets a tag on a given object.
14823 def CheckPrereq(self):
14824 """Check prerequisites.
14826 This checks the type and length of the tag name and value.
14829 TagsLU.CheckPrereq(self)
14830 for tag in self.op.tags:
14831 objects.TaggableObject.ValidateTag(tag)
14833 def Exec(self, feedback_fn):
14838 for tag in self.op.tags:
14839 self.target.AddTag(tag)
14840 except errors.TagError, err:
14841 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14842 self.cfg.Update(self.target, feedback_fn)
14845 class LUTagsDel(TagsLU):
14846 """Delete a list of tags from a given object.
14851 def CheckPrereq(self):
14852 """Check prerequisites.
14854 This checks that we have the given tag.
14857 TagsLU.CheckPrereq(self)
14858 for tag in self.op.tags:
14859 objects.TaggableObject.ValidateTag(tag)
14860 del_tags = frozenset(self.op.tags)
14861 cur_tags = self.target.GetTags()
14863 diff_tags = del_tags - cur_tags
14865 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14866 raise errors.OpPrereqError("Tag(s) %s not found" %
14867 (utils.CommaJoin(diff_names), ),
14868 errors.ECODE_NOENT)
14870 def Exec(self, feedback_fn):
14871 """Remove the tag from the object.
14874 for tag in self.op.tags:
14875 self.target.RemoveTag(tag)
14876 self.cfg.Update(self.target, feedback_fn)
14879 class LUTestDelay(NoHooksLU):
14880 """Sleep for a specified amount of time.
14882 This LU sleeps on the master and/or nodes for a specified amount of
14888 def ExpandNames(self):
14889 """Expand names and set required locks.
14891 This expands the node list, if any.
14894 self.needed_locks = {}
14895 if self.op.on_nodes:
14896 # _GetWantedNodes can be used here, but is not always appropriate to use
14897 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14898 # more information.
14899 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14900 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14902 def _TestDelay(self):
14903 """Do the actual sleep.
14906 if self.op.on_master:
14907 if not utils.TestDelay(self.op.duration):
14908 raise errors.OpExecError("Error during master delay test")
14909 if self.op.on_nodes:
14910 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14911 for node, node_result in result.items():
14912 node_result.Raise("Failure during rpc call to node %s" % node)
14914 def Exec(self, feedback_fn):
14915 """Execute the test delay opcode, with the wanted repetitions.
14918 if self.op.repeat == 0:
14921 top_value = self.op.repeat - 1
14922 for i in range(self.op.repeat):
14923 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14927 class LUTestJqueue(NoHooksLU):
14928 """Utility LU to test some aspects of the job queue.
14933 # Must be lower than default timeout for WaitForJobChange to see whether it
14934 # notices changed jobs
14935 _CLIENT_CONNECT_TIMEOUT = 20.0
14936 _CLIENT_CONFIRM_TIMEOUT = 60.0
14939 def _NotifyUsingSocket(cls, cb, errcls):
14940 """Opens a Unix socket and waits for another program to connect.
14943 @param cb: Callback to send socket name to client
14944 @type errcls: class
14945 @param errcls: Exception class to use for errors
14948 # Using a temporary directory as there's no easy way to create temporary
14949 # sockets without writing a custom loop around tempfile.mktemp and
14951 tmpdir = tempfile.mkdtemp()
14953 tmpsock = utils.PathJoin(tmpdir, "sock")
14955 logging.debug("Creating temporary socket at %s", tmpsock)
14956 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14961 # Send details to client
14964 # Wait for client to connect before continuing
14965 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14967 (conn, _) = sock.accept()
14968 except socket.error, err:
14969 raise errcls("Client didn't connect in time (%s)" % err)
14973 # Remove as soon as client is connected
14974 shutil.rmtree(tmpdir)
14976 # Wait for client to close
14979 # pylint: disable=E1101
14980 # Instance of '_socketobject' has no ... member
14981 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14983 except socket.error, err:
14984 raise errcls("Client failed to confirm notification (%s)" % err)
14988 def _SendNotification(self, test, arg, sockname):
14989 """Sends a notification to the client.
14992 @param test: Test name
14993 @param arg: Test argument (depends on test)
14994 @type sockname: string
14995 @param sockname: Socket path
14998 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15000 def _Notify(self, prereq, test, arg):
15001 """Notifies the client of a test.
15004 @param prereq: Whether this is a prereq-phase test
15006 @param test: Test name
15007 @param arg: Test argument (depends on test)
15011 errcls = errors.OpPrereqError
15013 errcls = errors.OpExecError
15015 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15019 def CheckArguments(self):
15020 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15021 self.expandnames_calls = 0
15023 def ExpandNames(self):
15024 checkargs_calls = getattr(self, "checkargs_calls", 0)
15025 if checkargs_calls < 1:
15026 raise errors.ProgrammerError("CheckArguments was not called")
15028 self.expandnames_calls += 1
15030 if self.op.notify_waitlock:
15031 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15033 self.LogInfo("Expanding names")
15035 # Get lock on master node (just to get a lock, not for a particular reason)
15036 self.needed_locks = {
15037 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15040 def Exec(self, feedback_fn):
15041 if self.expandnames_calls < 1:
15042 raise errors.ProgrammerError("ExpandNames was not called")
15044 if self.op.notify_exec:
15045 self._Notify(False, constants.JQT_EXEC, None)
15047 self.LogInfo("Executing")
15049 if self.op.log_messages:
15050 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15051 for idx, msg in enumerate(self.op.log_messages):
15052 self.LogInfo("Sending log message %s", idx + 1)
15053 feedback_fn(constants.JQT_MSGPREFIX + msg)
15054 # Report how many test messages have been sent
15055 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15058 raise errors.OpExecError("Opcode failure was requested")
15063 class LUTestAllocator(NoHooksLU):
15064 """Run allocator tests.
15066 This LU runs the allocator tests
15069 def CheckPrereq(self):
15070 """Check prerequisites.
15072 This checks the opcode parameters depending on the director and mode test.
15075 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15076 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15077 for attr in ["memory", "disks", "disk_template",
15078 "os", "tags", "nics", "vcpus"]:
15079 if not hasattr(self.op, attr):
15080 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15081 attr, errors.ECODE_INVAL)
15082 iname = self.cfg.ExpandInstanceName(self.op.name)
15083 if iname is not None:
15084 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15085 iname, errors.ECODE_EXISTS)
15086 if not isinstance(self.op.nics, list):
15087 raise errors.OpPrereqError("Invalid parameter 'nics'",
15088 errors.ECODE_INVAL)
15089 if not isinstance(self.op.disks, list):
15090 raise errors.OpPrereqError("Invalid parameter 'disks'",
15091 errors.ECODE_INVAL)
15092 for row in self.op.disks:
15093 if (not isinstance(row, dict) or
15094 constants.IDISK_SIZE not in row or
15095 not isinstance(row[constants.IDISK_SIZE], int) or
15096 constants.IDISK_MODE not in row or
15097 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15098 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15099 " parameter", errors.ECODE_INVAL)
15100 if self.op.hypervisor is None:
15101 self.op.hypervisor = self.cfg.GetHypervisorType()
15102 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15103 fname = _ExpandInstanceName(self.cfg, self.op.name)
15104 self.op.name = fname
15105 self.relocate_from = \
15106 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15107 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15108 constants.IALLOCATOR_MODE_NODE_EVAC):
15109 if not self.op.instances:
15110 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15111 self.op.instances = _GetWantedInstances(self, self.op.instances)
15113 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15114 self.op.mode, errors.ECODE_INVAL)
15116 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15117 if self.op.allocator is None:
15118 raise errors.OpPrereqError("Missing allocator name",
15119 errors.ECODE_INVAL)
15120 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15121 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15122 self.op.direction, errors.ECODE_INVAL)
15124 def Exec(self, feedback_fn):
15125 """Run the allocator test.
15128 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15129 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15130 memory=self.op.memory,
15131 disks=self.op.disks,
15132 disk_template=self.op.disk_template,
15136 vcpus=self.op.vcpus,
15137 spindle_use=self.op.spindle_use,
15138 hypervisor=self.op.hypervisor)
15139 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15140 req = iallocator.IAReqRelocate(name=self.op.name,
15141 relocate_from=list(self.relocate_from))
15142 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15143 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15144 target_groups=self.op.target_groups)
15145 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15146 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15147 evac_mode=self.op.evac_mode)
15148 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15149 disk_template = self.op.disk_template
15150 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15151 memory=self.op.memory,
15152 disks=self.op.disks,
15153 disk_template=disk_template,
15157 vcpus=self.op.vcpus,
15158 spindle_use=self.op.spindle_use,
15159 hypervisor=self.op.hypervisor)
15160 for idx in range(self.op.count)]
15161 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15163 raise errors.ProgrammerError("Uncatched mode %s in"
15164 " LUTestAllocator.Exec", self.op.mode)
15166 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15167 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15168 result = ial.in_text
15170 ial.Run(self.op.allocator, validate=False)
15171 result = ial.out_text
15175 #: Query type implementations
15177 constants.QR_CLUSTER: _ClusterQuery,
15178 constants.QR_INSTANCE: _InstanceQuery,
15179 constants.QR_NODE: _NodeQuery,
15180 constants.QR_GROUP: _GroupQuery,
15181 constants.QR_OS: _OsQuery,
15182 constants.QR_EXPORT: _ExportQuery,
15185 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15188 def _GetQueryImplementation(name):
15189 """Returns the implemtnation for a query type.
15191 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15195 return _QUERY_IMPL[name]
15197 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15198 errors.ECODE_INVAL)