4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
63 from ganeti import pathutils
64 from ganeti import vcluster
65 from ganeti import network
66 from ganeti.masterd import iallocator
68 import ganeti.masterd.instance # pylint: disable=W0611
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
74 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76 #: Instance status in which an instance can be marked as offline/online
77 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
78 constants.ADMINST_OFFLINE,
83 """Data container for LU results with jobs.
85 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
86 by L{mcpu._ProcessResult}. The latter will then submit the jobs
87 contained in the C{jobs} attribute and include the job IDs in the opcode
91 def __init__(self, jobs, **kwargs):
92 """Initializes this class.
94 Additional return values can be specified as keyword arguments.
96 @type jobs: list of lists of L{opcode.OpCode}
97 @param jobs: A list of lists of opcode objects
104 class LogicalUnit(object):
105 """Logical Unit base class.
107 Subclasses must follow these rules:
108 - implement ExpandNames
109 - implement CheckPrereq (except when tasklets are used)
110 - implement Exec (except when tasklets are used)
111 - implement BuildHooksEnv
112 - implement BuildHooksNodes
113 - redefine HPATH and HTYPE
114 - optionally redefine their run requirements:
115 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
117 Note that all commands require root permissions.
119 @ivar dry_run_result: the value (if any) that will be returned to the caller
120 in dry-run mode (signalled by opcode dry_run parameter)
127 def __init__(self, processor, op, context, rpc_runner):
128 """Constructor for LogicalUnit.
130 This needs to be overridden in derived classes in order to check op
134 self.proc = processor
136 self.cfg = context.cfg
137 self.glm = context.glm
139 self.owned_locks = context.glm.list_owned
140 self.context = context
141 self.rpc = rpc_runner
142 # Dicts used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
146 self.remove_locks = {}
147 # Used to force good behavior when calling helper functions
148 self.recalculate_locks = {}
150 self.Log = processor.Log # pylint: disable=C0103
151 self.LogWarning = processor.LogWarning # pylint: disable=C0103
152 self.LogInfo = processor.LogInfo # pylint: disable=C0103
153 self.LogStep = processor.LogStep # pylint: disable=C0103
154 # support for dry-run
155 self.dry_run_result = None
156 # support for generic debug attribute
157 if (not hasattr(self.op, "debug_level") or
158 not isinstance(self.op.debug_level, int)):
159 self.op.debug_level = 0
164 # Validate opcode parameters and set defaults
165 self.op.Validate(True)
167 self.CheckArguments()
169 def CheckArguments(self):
170 """Check syntactic validity for the opcode arguments.
172 This method is for doing a simple syntactic check and ensure
173 validity of opcode parameters, without any cluster-related
174 checks. While the same can be accomplished in ExpandNames and/or
175 CheckPrereq, doing these separate is better because:
177 - ExpandNames is left as as purely a lock-related function
178 - CheckPrereq is run after we have acquired locks (and possible
181 The function is allowed to change the self.op attribute so that
182 later methods can no longer worry about missing parameters.
187 def ExpandNames(self):
188 """Expand names for this LU.
190 This method is called before starting to execute the opcode, and it should
191 update all the parameters of the opcode to their canonical form (e.g. a
192 short node name must be fully expanded after this method has successfully
193 completed). This way locking, hooks, logging, etc. can work correctly.
195 LUs which implement this method must also populate the self.needed_locks
196 member, as a dict with lock levels as keys, and a list of needed lock names
199 - use an empty dict if you don't need any lock
200 - if you don't need any lock at a particular level omit that
201 level (note that in this case C{DeclareLocks} won't be called
202 at all for that level)
203 - if you need locks at a level, but you can't calculate it in
204 this function, initialise that level with an empty list and do
205 further processing in L{LogicalUnit.DeclareLocks} (see that
206 function's docstring)
207 - don't put anything for the BGL level
208 - if you want all locks at a level use L{locking.ALL_SET} as a value
210 If you need to share locks (rather than acquire them exclusively) at one
211 level you can modify self.share_locks, setting a true value (usually 1) for
212 that level. By default locks are not shared.
214 This function can also define a list of tasklets, which then will be
215 executed in order instead of the usual LU-level CheckPrereq and Exec
216 functions, if those are not defined by the LU.
220 # Acquire all nodes and one instance
221 self.needed_locks = {
222 locking.LEVEL_NODE: locking.ALL_SET,
223 locking.LEVEL_INSTANCE: ['instance1.example.com'],
225 # Acquire just two nodes
226 self.needed_locks = {
227 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
230 self.needed_locks = {} # No, you can't leave it to the default value None
233 # The implementation of this method is mandatory only if the new LU is
234 # concurrent, so that old LUs don't need to be changed all at the same
237 self.needed_locks = {} # Exclusive LUs don't need locks.
239 raise NotImplementedError
241 def DeclareLocks(self, level):
242 """Declare LU locking needs for a level
244 While most LUs can just declare their locking needs at ExpandNames time,
245 sometimes there's the need to calculate some locks after having acquired
246 the ones before. This function is called just before acquiring locks at a
247 particular level, but after acquiring the ones at lower levels, and permits
248 such calculations. It can be used to modify self.needed_locks, and by
249 default it does nothing.
251 This function is only called if you have something already set in
252 self.needed_locks for the level.
254 @param level: Locking level which is going to be locked
255 @type level: member of L{ganeti.locking.LEVELS}
259 def CheckPrereq(self):
260 """Check prerequisites for this LU.
262 This method should check that the prerequisites for the execution
263 of this LU are fulfilled. It can do internode communication, but
264 it should be idempotent - no cluster or system changes are
267 The method should raise errors.OpPrereqError in case something is
268 not fulfilled. Its return value is ignored.
270 This method should also update all the parameters of the opcode to
271 their canonical form if it hasn't been done by ExpandNames before.
274 if self.tasklets is not None:
275 for (idx, tl) in enumerate(self.tasklets):
276 logging.debug("Checking prerequisites for tasklet %s/%s",
277 idx + 1, len(self.tasklets))
282 def Exec(self, feedback_fn):
285 This method should implement the actual work. It should raise
286 errors.OpExecError for failures that are somewhat dealt with in
290 if self.tasklets is not None:
291 for (idx, tl) in enumerate(self.tasklets):
292 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
295 raise NotImplementedError
297 def BuildHooksEnv(self):
298 """Build hooks environment for this LU.
301 @return: Dictionary containing the environment that will be used for
302 running the hooks for this LU. The keys of the dict must not be prefixed
303 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
304 will extend the environment with additional variables. If no environment
305 should be defined, an empty dictionary should be returned (not C{None}).
306 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
310 raise NotImplementedError
312 def BuildHooksNodes(self):
313 """Build list of nodes to run LU's hooks.
315 @rtype: tuple; (list, list)
316 @return: Tuple containing a list of node names on which the hook
317 should run before the execution and a list of node names on which the
318 hook should run after the execution. No nodes should be returned as an
319 empty list (and not None).
320 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
324 raise NotImplementedError
326 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
327 """Notify the LU about the results of its hooks.
329 This method is called every time a hooks phase is executed, and notifies
330 the Logical Unit about the hooks' result. The LU can then use it to alter
331 its result based on the hooks. By default the method does nothing and the
332 previous result is passed back unchanged but any LU can define it if it
333 wants to use the local cluster hook-scripts somehow.
335 @param phase: one of L{constants.HOOKS_PHASE_POST} or
336 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
337 @param hook_results: the results of the multi-node hooks rpc call
338 @param feedback_fn: function used send feedback back to the caller
339 @param lu_result: the previous Exec result this LU had, or None
341 @return: the new Exec result, based on the previous result
345 # API must be kept, thus we ignore the unused argument and could
346 # be a function warnings
347 # pylint: disable=W0613,R0201
350 def _ExpandAndLockInstance(self):
351 """Helper function to expand and lock an instance.
353 Many LUs that work on an instance take its name in self.op.instance_name
354 and need to expand it and then declare the expanded name for locking. This
355 function does it, and then updates self.op.instance_name to the expanded
356 name. It also initializes needed_locks as a dict, if this hasn't been done
360 if self.needed_locks is None:
361 self.needed_locks = {}
363 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
364 "_ExpandAndLockInstance called with instance-level locks set"
365 self.op.instance_name = _ExpandInstanceName(self.cfg,
366 self.op.instance_name)
367 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
369 def _LockInstancesNodes(self, primary_only=False,
370 level=locking.LEVEL_NODE):
371 """Helper function to declare instances' nodes for locking.
373 This function should be called after locking one or more instances to lock
374 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
375 with all primary or secondary nodes for instances already locked and
376 present in self.needed_locks[locking.LEVEL_INSTANCE].
378 It should be called from DeclareLocks, and for safety only works if
379 self.recalculate_locks[locking.LEVEL_NODE] is set.
381 In the future it may grow parameters to just lock some instance's nodes, or
382 to just lock primaries or secondary nodes, if needed.
384 If should be called in DeclareLocks in a way similar to::
386 if level == locking.LEVEL_NODE:
387 self._LockInstancesNodes()
389 @type primary_only: boolean
390 @param primary_only: only lock primary nodes of locked instances
391 @param level: Which lock level to use for locking nodes
394 assert level in self.recalculate_locks, \
395 "_LockInstancesNodes helper function called with no nodes to recalculate"
397 # TODO: check if we're really been called with the instance locks held
399 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
400 # future we might want to have different behaviors depending on the value
401 # of self.recalculate_locks[locking.LEVEL_NODE]
403 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
404 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
405 wanted_nodes.append(instance.primary_node)
407 wanted_nodes.extend(instance.secondary_nodes)
409 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
410 self.needed_locks[level] = wanted_nodes
411 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
412 self.needed_locks[level].extend(wanted_nodes)
414 raise errors.ProgrammerError("Unknown recalculation mode")
416 del self.recalculate_locks[level]
419 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
420 """Simple LU which runs no hooks.
422 This LU is intended as a parent for other LogicalUnits which will
423 run no hooks, in order to reduce duplicate code.
429 def BuildHooksEnv(self):
430 """Empty BuildHooksEnv for NoHooksLu.
432 This just raises an error.
435 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
437 def BuildHooksNodes(self):
438 """Empty BuildHooksNodes for NoHooksLU.
441 raise AssertionError("BuildHooksNodes called for NoHooksLU")
445 """Tasklet base class.
447 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
448 they can mix legacy code with tasklets. Locking needs to be done in the LU,
449 tasklets know nothing about locks.
451 Subclasses must follow these rules:
452 - Implement CheckPrereq
456 def __init__(self, lu):
463 def CheckPrereq(self):
464 """Check prerequisites for this tasklets.
466 This method should check whether the prerequisites for the execution of
467 this tasklet are fulfilled. It can do internode communication, but it
468 should be idempotent - no cluster or system changes are allowed.
470 The method should raise errors.OpPrereqError in case something is not
471 fulfilled. Its return value is ignored.
473 This method should also update all parameters to their canonical form if it
474 hasn't been done before.
479 def Exec(self, feedback_fn):
480 """Execute the tasklet.
482 This method should implement the actual work. It should raise
483 errors.OpExecError for failures that are somewhat dealt with in code, or
487 raise NotImplementedError
491 """Base for query utility classes.
494 #: Attribute holding field definitions
500 def __init__(self, qfilter, fields, use_locking):
501 """Initializes this class.
504 self.use_locking = use_locking
506 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
507 namefield=self.SORT_FIELD)
508 self.requested_data = self.query.RequestedData()
509 self.names = self.query.RequestedNames()
511 # Sort only if no names were requested
512 self.sort_by_name = not self.names
514 self.do_locking = None
517 def _GetNames(self, lu, all_names, lock_level):
518 """Helper function to determine names asked for in the query.
522 names = lu.owned_locks(lock_level)
526 if self.wanted == locking.ALL_SET:
527 assert not self.names
528 # caller didn't specify names, so ordering is not important
529 return utils.NiceSort(names)
531 # caller specified names and we must keep the same order
533 assert not self.do_locking or lu.glm.is_owned(lock_level)
535 missing = set(self.wanted).difference(names)
537 raise errors.OpExecError("Some items were removed before retrieving"
538 " their data: %s" % missing)
540 # Return expanded names
543 def ExpandNames(self, lu):
544 """Expand names for this query.
546 See L{LogicalUnit.ExpandNames}.
549 raise NotImplementedError()
551 def DeclareLocks(self, lu, level):
552 """Declare locks for this query.
554 See L{LogicalUnit.DeclareLocks}.
557 raise NotImplementedError()
559 def _GetQueryData(self, lu):
560 """Collects all data for this query.
562 @return: Query data object
565 raise NotImplementedError()
567 def NewStyleQuery(self, lu):
568 """Collect data and execute query.
571 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
572 sort_by_name=self.sort_by_name)
574 def OldStyleQuery(self, lu):
575 """Collect data and execute query.
578 return self.query.OldStyleQuery(self._GetQueryData(lu),
579 sort_by_name=self.sort_by_name)
583 """Returns a dict declaring all lock levels shared.
586 return dict.fromkeys(locking.LEVELS, 1)
589 def _AnnotateDiskParams(instance, devs, cfg):
590 """Little helper wrapper to the rpc annotation method.
592 @param instance: The instance object
593 @type devs: List of L{objects.Disk}
594 @param devs: The root devices (not any of its children!)
595 @param cfg: The config object
596 @returns The annotated disk copies
597 @see L{rpc.AnnotateDiskParams}
600 return rpc.AnnotateDiskParams(instance.disk_template, devs,
601 cfg.GetInstanceDiskParams(instance))
604 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
606 """Checks if node groups for locked instances are still correct.
608 @type cfg: L{config.ConfigWriter}
609 @param cfg: Cluster configuration
610 @type instances: dict; string as key, L{objects.Instance} as value
611 @param instances: Dictionary, instance name as key, instance object as value
612 @type owned_groups: iterable of string
613 @param owned_groups: List of owned groups
614 @type owned_nodes: iterable of string
615 @param owned_nodes: List of owned nodes
616 @type cur_group_uuid: string or None
617 @param cur_group_uuid: Optional group UUID to check against instance's groups
620 for (name, inst) in instances.items():
621 assert owned_nodes.issuperset(inst.all_nodes), \
622 "Instance %s's nodes changed while we kept the lock" % name
624 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
626 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
627 "Instance %s has no node in group %s" % (name, cur_group_uuid)
630 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
632 """Checks if the owned node groups are still correct for an instance.
634 @type cfg: L{config.ConfigWriter}
635 @param cfg: The cluster configuration
636 @type instance_name: string
637 @param instance_name: Instance name
638 @type owned_groups: set or frozenset
639 @param owned_groups: List of currently owned node groups
640 @type primary_only: boolean
641 @param primary_only: Whether to check node groups for only the primary node
644 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
646 if not owned_groups.issuperset(inst_groups):
647 raise errors.OpPrereqError("Instance %s's node groups changed since"
648 " locks were acquired, current groups are"
649 " are '%s', owning groups '%s'; retry the"
652 utils.CommaJoin(inst_groups),
653 utils.CommaJoin(owned_groups)),
659 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
660 """Checks if the instances in a node group are still correct.
662 @type cfg: L{config.ConfigWriter}
663 @param cfg: The cluster configuration
664 @type group_uuid: string
665 @param group_uuid: Node group UUID
666 @type owned_instances: set or frozenset
667 @param owned_instances: List of currently owned instances
670 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
671 if owned_instances != wanted_instances:
672 raise errors.OpPrereqError("Instances in node group '%s' changed since"
673 " locks were acquired, wanted '%s', have '%s';"
674 " retry the operation" %
676 utils.CommaJoin(wanted_instances),
677 utils.CommaJoin(owned_instances)),
680 return wanted_instances
683 def _SupportsOob(cfg, node):
684 """Tells if node supports OOB.
686 @type cfg: L{config.ConfigWriter}
687 @param cfg: The cluster configuration
688 @type node: L{objects.Node}
689 @param node: The node
690 @return: The OOB script if supported or an empty string otherwise
693 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
696 def _CopyLockList(names):
697 """Makes a copy of a list of lock names.
699 Handles L{locking.ALL_SET} correctly.
702 if names == locking.ALL_SET:
703 return locking.ALL_SET
708 def _GetWantedNodes(lu, nodes):
709 """Returns list of checked and expanded node names.
711 @type lu: L{LogicalUnit}
712 @param lu: the logical unit on whose behalf we execute
714 @param nodes: list of node names or None for all nodes
716 @return: the list of nodes, sorted
717 @raise errors.ProgrammerError: if the nodes parameter is wrong type
721 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
723 return utils.NiceSort(lu.cfg.GetNodeList())
726 def _GetWantedInstances(lu, instances):
727 """Returns list of checked and expanded instance names.
729 @type lu: L{LogicalUnit}
730 @param lu: the logical unit on whose behalf we execute
731 @type instances: list
732 @param instances: list of instance names or None for all instances
734 @return: the list of instances, sorted
735 @raise errors.OpPrereqError: if the instances parameter is wrong type
736 @raise errors.OpPrereqError: if any of the passed instances is not found
740 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
742 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
746 def _GetUpdatedParams(old_params, update_dict,
747 use_default=True, use_none=False):
748 """Return the new version of a parameter dictionary.
750 @type old_params: dict
751 @param old_params: old parameters
752 @type update_dict: dict
753 @param update_dict: dict containing new parameter values, or
754 constants.VALUE_DEFAULT to reset the parameter to its default
756 @param use_default: boolean
757 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
758 values as 'to be deleted' values
759 @param use_none: boolean
760 @type use_none: whether to recognise C{None} values as 'to be
763 @return: the new parameter dictionary
766 params_copy = copy.deepcopy(old_params)
767 for key, val in update_dict.iteritems():
768 if ((use_default and val == constants.VALUE_DEFAULT) or
769 (use_none and val is None)):
775 params_copy[key] = val
779 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
780 """Return the new version of a instance policy.
782 @param group_policy: whether this policy applies to a group and thus
783 we should support removal of policy entries
786 use_none = use_default = group_policy
787 ipolicy = copy.deepcopy(old_ipolicy)
788 for key, value in new_ipolicy.items():
789 if key not in constants.IPOLICY_ALL_KEYS:
790 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
792 if key in constants.IPOLICY_ISPECS:
793 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
794 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
796 use_default=use_default)
798 if (not value or value == [constants.VALUE_DEFAULT] or
799 value == constants.VALUE_DEFAULT):
803 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
804 " on the cluster'" % key,
807 if key in constants.IPOLICY_PARAMETERS:
808 # FIXME: we assume all such values are float
810 ipolicy[key] = float(value)
811 except (TypeError, ValueError), err:
812 raise errors.OpPrereqError("Invalid value for attribute"
813 " '%s': '%s', error: %s" %
814 (key, value, err), errors.ECODE_INVAL)
816 # FIXME: we assume all others are lists; this should be redone
818 ipolicy[key] = list(value)
820 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
821 except errors.ConfigurationError, err:
822 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
827 def _UpdateAndVerifySubDict(base, updates, type_check):
828 """Updates and verifies a dict with sub dicts of the same type.
830 @param base: The dict with the old data
831 @param updates: The dict with the new data
832 @param type_check: Dict suitable to ForceDictType to verify correct types
833 @returns: A new dict with updated and verified values
837 new = _GetUpdatedParams(old, value)
838 utils.ForceDictType(new, type_check)
841 ret = copy.deepcopy(base)
842 ret.update(dict((key, fn(base.get(key, {}), value))
843 for key, value in updates.items()))
847 def _MergeAndVerifyHvState(op_input, obj_input):
848 """Combines the hv state from an opcode with the one of the object
850 @param op_input: The input dict from the opcode
851 @param obj_input: The input dict from the objects
852 @return: The verified and updated dict
856 invalid_hvs = set(op_input) - constants.HYPER_TYPES
858 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
859 " %s" % utils.CommaJoin(invalid_hvs),
861 if obj_input is None:
863 type_check = constants.HVSTS_PARAMETER_TYPES
864 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
869 def _MergeAndVerifyDiskState(op_input, obj_input):
870 """Combines the disk state from an opcode with the one of the object
872 @param op_input: The input dict from the opcode
873 @param obj_input: The input dict from the objects
874 @return: The verified and updated dict
877 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
879 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
880 utils.CommaJoin(invalid_dst),
882 type_check = constants.DSS_PARAMETER_TYPES
883 if obj_input is None:
885 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
887 for key, value in op_input.items())
892 def _ReleaseLocks(lu, level, names=None, keep=None):
893 """Releases locks owned by an LU.
895 @type lu: L{LogicalUnit}
896 @param level: Lock level
897 @type names: list or None
898 @param names: Names of locks to release
899 @type keep: list or None
900 @param keep: Names of locks to retain
903 assert not (keep is not None and names is not None), \
904 "Only one of the 'names' and the 'keep' parameters can be given"
906 if names is not None:
907 should_release = names.__contains__
909 should_release = lambda name: name not in keep
911 should_release = None
913 owned = lu.owned_locks(level)
915 # Not owning any lock at this level, do nothing
922 # Determine which locks to release
924 if should_release(name):
929 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
931 # Release just some locks
932 lu.glm.release(level, names=release)
934 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
937 lu.glm.release(level)
939 assert not lu.glm.is_owned(level), "No locks should be owned"
942 def _MapInstanceDisksToNodes(instances):
943 """Creates a map from (node, volume) to instance name.
945 @type instances: list of L{objects.Instance}
946 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
949 return dict(((node, vol), inst.name)
950 for inst in instances
951 for (node, vols) in inst.MapLVsByNode().items()
955 def _RunPostHook(lu, node_name):
956 """Runs the post-hook for an opcode on a single node.
959 hm = lu.proc.BuildHooksManager(lu)
961 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
962 except Exception, err: # pylint: disable=W0703
963 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
966 def _CheckOutputFields(static, dynamic, selected):
967 """Checks whether all selected fields are valid.
969 @type static: L{utils.FieldSet}
970 @param static: static fields set
971 @type dynamic: L{utils.FieldSet}
972 @param dynamic: dynamic fields set
979 delta = f.NonMatching(selected)
981 raise errors.OpPrereqError("Unknown output fields selected: %s"
982 % ",".join(delta), errors.ECODE_INVAL)
985 def _CheckGlobalHvParams(params):
986 """Validates that given hypervisor params are not global ones.
988 This will ensure that instances don't get customised versions of
992 used_globals = constants.HVC_GLOBALS.intersection(params)
994 msg = ("The following hypervisor parameters are global and cannot"
995 " be customized at instance level, please modify them at"
996 " cluster level: %s" % utils.CommaJoin(used_globals))
997 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1000 def _CheckNodeOnline(lu, node, msg=None):
1001 """Ensure that a given node is online.
1003 @param lu: the LU on behalf of which we make the check
1004 @param node: the node to check
1005 @param msg: if passed, should be a message to replace the default one
1006 @raise errors.OpPrereqError: if the node is offline
1010 msg = "Can't use offline node"
1011 if lu.cfg.GetNodeInfo(node).offline:
1012 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1015 def _CheckNodeNotDrained(lu, node):
1016 """Ensure that a given node is not drained.
1018 @param lu: the LU on behalf of which we make the check
1019 @param node: the node to check
1020 @raise errors.OpPrereqError: if the node is drained
1023 if lu.cfg.GetNodeInfo(node).drained:
1024 raise errors.OpPrereqError("Can't use drained node %s" % node,
1028 def _CheckNodeVmCapable(lu, node):
1029 """Ensure that a given node is vm capable.
1031 @param lu: the LU on behalf of which we make the check
1032 @param node: the node to check
1033 @raise errors.OpPrereqError: if the node is not vm capable
1036 if not lu.cfg.GetNodeInfo(node).vm_capable:
1037 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1041 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1042 """Ensure that a node supports a given OS.
1044 @param lu: the LU on behalf of which we make the check
1045 @param node: the node to check
1046 @param os_name: the OS to query about
1047 @param force_variant: whether to ignore variant errors
1048 @raise errors.OpPrereqError: if the node is not supporting the OS
1051 result = lu.rpc.call_os_get(node, os_name)
1052 result.Raise("OS '%s' not in supported OS list for node %s" %
1054 prereq=True, ecode=errors.ECODE_INVAL)
1055 if not force_variant:
1056 _CheckOSVariant(result.payload, os_name)
1059 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1060 """Ensure that a node has the given secondary ip.
1062 @type lu: L{LogicalUnit}
1063 @param lu: the LU on behalf of which we make the check
1065 @param node: the node to check
1066 @type secondary_ip: string
1067 @param secondary_ip: the ip to check
1068 @type prereq: boolean
1069 @param prereq: whether to throw a prerequisite or an execute error
1070 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1071 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1074 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1075 result.Raise("Failure checking secondary ip on node %s" % node,
1076 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1077 if not result.payload:
1078 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1079 " please fix and re-run this command" % secondary_ip)
1081 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1083 raise errors.OpExecError(msg)
1086 def _GetClusterDomainSecret():
1087 """Reads the cluster domain secret.
1090 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1094 def _CheckInstanceState(lu, instance, req_states, msg=None):
1095 """Ensure that an instance is in one of the required states.
1097 @param lu: the LU on behalf of which we make the check
1098 @param instance: the instance to check
1099 @param msg: if passed, should be a message to replace the default one
1100 @raise errors.OpPrereqError: if the instance is not in the required state
1104 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1105 if instance.admin_state not in req_states:
1106 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1107 (instance.name, instance.admin_state, msg),
1110 if constants.ADMINST_UP not in req_states:
1111 pnode = instance.primary_node
1112 if not lu.cfg.GetNodeInfo(pnode).offline:
1113 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1114 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1115 prereq=True, ecode=errors.ECODE_ENVIRON)
1116 if instance.name in ins_l.payload:
1117 raise errors.OpPrereqError("Instance %s is running, %s" %
1118 (instance.name, msg), errors.ECODE_STATE)
1120 lu.LogWarning("Primary node offline, ignoring check that instance"
1124 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1125 """Computes if value is in the desired range.
1127 @param name: name of the parameter for which we perform the check
1128 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1130 @param ipolicy: dictionary containing min, max and std values
1131 @param value: actual value that we want to use
1132 @return: None or element not meeting the criteria
1136 if value in [None, constants.VALUE_AUTO]:
1138 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1139 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1140 if value > max_v or min_v > value:
1142 fqn = "%s/%s" % (name, qualifier)
1145 return ("%s value %s is not in range [%s, %s]" %
1146 (fqn, value, min_v, max_v))
1150 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1151 nic_count, disk_sizes, spindle_use,
1152 _compute_fn=_ComputeMinMaxSpec):
1153 """Verifies ipolicy against provided specs.
1156 @param ipolicy: The ipolicy
1158 @param mem_size: The memory size
1159 @type cpu_count: int
1160 @param cpu_count: Used cpu cores
1161 @type disk_count: int
1162 @param disk_count: Number of disks used
1163 @type nic_count: int
1164 @param nic_count: Number of nics used
1165 @type disk_sizes: list of ints
1166 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1167 @type spindle_use: int
1168 @param spindle_use: The number of spindles this instance uses
1169 @param _compute_fn: The compute function (unittest only)
1170 @return: A list of violations, or an empty list of no violations are found
1173 assert disk_count == len(disk_sizes)
1176 (constants.ISPEC_MEM_SIZE, "", mem_size),
1177 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1178 (constants.ISPEC_DISK_COUNT, "", disk_count),
1179 (constants.ISPEC_NIC_COUNT, "", nic_count),
1180 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1181 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1182 for idx, d in enumerate(disk_sizes)]
1185 (_compute_fn(name, qualifier, ipolicy, value)
1186 for (name, qualifier, value) in test_settings))
1189 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1190 _compute_fn=_ComputeIPolicySpecViolation):
1191 """Compute if instance meets the specs of ipolicy.
1194 @param ipolicy: The ipolicy to verify against
1195 @type instance: L{objects.Instance}
1196 @param instance: The instance to verify
1197 @param _compute_fn: The function to verify ipolicy (unittest only)
1198 @see: L{_ComputeIPolicySpecViolation}
1201 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1202 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1203 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1204 disk_count = len(instance.disks)
1205 disk_sizes = [disk.size for disk in instance.disks]
1206 nic_count = len(instance.nics)
1208 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1209 disk_sizes, spindle_use)
1212 def _ComputeIPolicyInstanceSpecViolation(
1213 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1214 """Compute if instance specs meets the specs of ipolicy.
1217 @param ipolicy: The ipolicy to verify against
1218 @param instance_spec: dict
1219 @param instance_spec: The instance spec to verify
1220 @param _compute_fn: The function to verify ipolicy (unittest only)
1221 @see: L{_ComputeIPolicySpecViolation}
1224 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1225 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1226 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1227 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1228 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1229 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1231 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1232 disk_sizes, spindle_use)
1235 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1237 _compute_fn=_ComputeIPolicyInstanceViolation):
1238 """Compute if instance meets the specs of the new target group.
1240 @param ipolicy: The ipolicy to verify
1241 @param instance: The instance object to verify
1242 @param current_group: The current group of the instance
1243 @param target_group: The new group of the instance
1244 @param _compute_fn: The function to verify ipolicy (unittest only)
1245 @see: L{_ComputeIPolicySpecViolation}
1248 if current_group == target_group:
1251 return _compute_fn(ipolicy, instance)
1254 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1255 _compute_fn=_ComputeIPolicyNodeViolation):
1256 """Checks that the target node is correct in terms of instance policy.
1258 @param ipolicy: The ipolicy to verify
1259 @param instance: The instance object to verify
1260 @param node: The new node to relocate
1261 @param ignore: Ignore violations of the ipolicy
1262 @param _compute_fn: The function to verify ipolicy (unittest only)
1263 @see: L{_ComputeIPolicySpecViolation}
1266 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1267 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1270 msg = ("Instance does not meet target node group's (%s) instance"
1271 " policy: %s") % (node.group, utils.CommaJoin(res))
1275 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1278 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1279 """Computes a set of any instances that would violate the new ipolicy.
1281 @param old_ipolicy: The current (still in-place) ipolicy
1282 @param new_ipolicy: The new (to become) ipolicy
1283 @param instances: List of instances to verify
1284 @return: A list of instances which violates the new ipolicy but
1288 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1289 _ComputeViolatingInstances(old_ipolicy, instances))
1292 def _ExpandItemName(fn, name, kind):
1293 """Expand an item name.
1295 @param fn: the function to use for expansion
1296 @param name: requested item name
1297 @param kind: text description ('Node' or 'Instance')
1298 @return: the resolved (full) name
1299 @raise errors.OpPrereqError: if the item is not found
1302 full_name = fn(name)
1303 if full_name is None:
1304 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1309 def _ExpandNodeName(cfg, name):
1310 """Wrapper over L{_ExpandItemName} for nodes."""
1311 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1314 def _ExpandInstanceName(cfg, name):
1315 """Wrapper over L{_ExpandItemName} for instance."""
1316 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1319 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1320 minmem, maxmem, vcpus, nics, disk_template, disks,
1321 bep, hvp, hypervisor_name, tags):
1322 """Builds instance related env variables for hooks
1324 This builds the hook environment from individual variables.
1327 @param name: the name of the instance
1328 @type primary_node: string
1329 @param primary_node: the name of the instance's primary node
1330 @type secondary_nodes: list
1331 @param secondary_nodes: list of secondary nodes as strings
1332 @type os_type: string
1333 @param os_type: the name of the instance's OS
1334 @type status: string
1335 @param status: the desired status of the instance
1336 @type minmem: string
1337 @param minmem: the minimum memory size of the instance
1338 @type maxmem: string
1339 @param maxmem: the maximum memory size of the instance
1341 @param vcpus: the count of VCPUs the instance has
1343 @param nics: list of tuples (ip, mac, mode, link, network) representing
1344 the NICs the instance has
1345 @type disk_template: string
1346 @param disk_template: the disk template of the instance
1348 @param disks: the list of (size, mode) pairs
1350 @param bep: the backend parameters for the instance
1352 @param hvp: the hypervisor parameters for the instance
1353 @type hypervisor_name: string
1354 @param hypervisor_name: the hypervisor for the instance
1356 @param tags: list of instance tags as strings
1358 @return: the hook environment for this instance
1363 "INSTANCE_NAME": name,
1364 "INSTANCE_PRIMARY": primary_node,
1365 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1366 "INSTANCE_OS_TYPE": os_type,
1367 "INSTANCE_STATUS": status,
1368 "INSTANCE_MINMEM": minmem,
1369 "INSTANCE_MAXMEM": maxmem,
1370 # TODO(2.7) remove deprecated "memory" value
1371 "INSTANCE_MEMORY": maxmem,
1372 "INSTANCE_VCPUS": vcpus,
1373 "INSTANCE_DISK_TEMPLATE": disk_template,
1374 "INSTANCE_HYPERVISOR": hypervisor_name,
1377 nic_count = len(nics)
1378 for idx, (ip, mac, mode, link, network) in enumerate(nics):
1381 env["INSTANCE_NIC%d_IP" % idx] = ip
1382 env["INSTANCE_NIC%d_MAC" % idx] = mac
1383 env["INSTANCE_NIC%d_MODE" % idx] = mode
1384 env["INSTANCE_NIC%d_LINK" % idx] = link
1385 env["INSTANCE_NIC%d_NETWORK" % idx] = network
1386 if mode == constants.NIC_MODE_BRIDGED:
1387 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1391 env["INSTANCE_NIC_COUNT"] = nic_count
1394 disk_count = len(disks)
1395 for idx, (size, mode) in enumerate(disks):
1396 env["INSTANCE_DISK%d_SIZE" % idx] = size
1397 env["INSTANCE_DISK%d_MODE" % idx] = mode
1401 env["INSTANCE_DISK_COUNT"] = disk_count
1406 env["INSTANCE_TAGS"] = " ".join(tags)
1408 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1409 for key, value in source.items():
1410 env["INSTANCE_%s_%s" % (kind, key)] = value
1415 def _NICListToTuple(lu, nics):
1416 """Build a list of nic information tuples.
1418 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1419 value in LUInstanceQueryData.
1421 @type lu: L{LogicalUnit}
1422 @param lu: the logical unit on whose behalf we execute
1423 @type nics: list of L{objects.NIC}
1424 @param nics: list of nics to convert to hooks tuples
1428 cluster = lu.cfg.GetClusterInfo()
1432 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1433 mode = filled_params[constants.NIC_MODE]
1434 link = filled_params[constants.NIC_LINK]
1435 network = nic.network
1436 hooks_nics.append((ip, mac, mode, link, network))
1440 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1441 """Builds instance related env variables for hooks from an object.
1443 @type lu: L{LogicalUnit}
1444 @param lu: the logical unit on whose behalf we execute
1445 @type instance: L{objects.Instance}
1446 @param instance: the instance for which we should build the
1448 @type override: dict
1449 @param override: dictionary with key/values that will override
1452 @return: the hook environment dictionary
1455 cluster = lu.cfg.GetClusterInfo()
1456 bep = cluster.FillBE(instance)
1457 hvp = cluster.FillHV(instance)
1459 "name": instance.name,
1460 "primary_node": instance.primary_node,
1461 "secondary_nodes": instance.secondary_nodes,
1462 "os_type": instance.os,
1463 "status": instance.admin_state,
1464 "maxmem": bep[constants.BE_MAXMEM],
1465 "minmem": bep[constants.BE_MINMEM],
1466 "vcpus": bep[constants.BE_VCPUS],
1467 "nics": _NICListToTuple(lu, instance.nics),
1468 "disk_template": instance.disk_template,
1469 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1472 "hypervisor_name": instance.hypervisor,
1473 "tags": instance.tags,
1476 args.update(override)
1477 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1480 def _AdjustCandidatePool(lu, exceptions):
1481 """Adjust the candidate pool after node operations.
1484 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1486 lu.LogInfo("Promoted nodes to master candidate role: %s",
1487 utils.CommaJoin(node.name for node in mod_list))
1488 for name in mod_list:
1489 lu.context.ReaddNode(name)
1490 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1492 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1496 def _DecideSelfPromotion(lu, exceptions=None):
1497 """Decide whether I should promote myself as a master candidate.
1500 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1501 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1502 # the new node will increase mc_max with one, so:
1503 mc_should = min(mc_should + 1, cp_size)
1504 return mc_now < mc_should
1507 def _ComputeViolatingInstances(ipolicy, instances):
1508 """Computes a set of instances who violates given ipolicy.
1510 @param ipolicy: The ipolicy to verify
1511 @type instances: object.Instance
1512 @param instances: List of instances to verify
1513 @return: A frozenset of instance names violating the ipolicy
1516 return frozenset([inst.name for inst in instances
1517 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1520 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1521 """Check that the brigdes needed by a list of nics exist.
1524 cluster = lu.cfg.GetClusterInfo()
1525 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1526 brlist = [params[constants.NIC_LINK] for params in paramslist
1527 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1529 result = lu.rpc.call_bridges_exist(target_node, brlist)
1530 result.Raise("Error checking bridges on destination node '%s'" %
1531 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1534 def _CheckInstanceBridgesExist(lu, instance, node=None):
1535 """Check that the brigdes needed by an instance exist.
1539 node = instance.primary_node
1540 _CheckNicsBridgesExist(lu, instance.nics, node)
1543 def _CheckOSVariant(os_obj, name):
1544 """Check whether an OS name conforms to the os variants specification.
1546 @type os_obj: L{objects.OS}
1547 @param os_obj: OS object to check
1549 @param name: OS name passed by the user, to check for validity
1552 variant = objects.OS.GetVariant(name)
1553 if not os_obj.supported_variants:
1555 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1556 " passed)" % (os_obj.name, variant),
1560 raise errors.OpPrereqError("OS name must include a variant",
1563 if variant not in os_obj.supported_variants:
1564 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1567 def _GetNodeInstancesInner(cfg, fn):
1568 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1571 def _GetNodeInstances(cfg, node_name):
1572 """Returns a list of all primary and secondary instances on a node.
1576 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1579 def _GetNodePrimaryInstances(cfg, node_name):
1580 """Returns primary instances on a node.
1583 return _GetNodeInstancesInner(cfg,
1584 lambda inst: node_name == inst.primary_node)
1587 def _GetNodeSecondaryInstances(cfg, node_name):
1588 """Returns secondary instances on a node.
1591 return _GetNodeInstancesInner(cfg,
1592 lambda inst: node_name in inst.secondary_nodes)
1595 def _GetStorageTypeArgs(cfg, storage_type):
1596 """Returns the arguments for a storage type.
1599 # Special case for file storage
1600 if storage_type == constants.ST_FILE:
1601 # storage.FileStorage wants a list of storage directories
1602 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1607 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1610 for dev in instance.disks:
1611 cfg.SetDiskID(dev, node_name)
1613 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1615 result.Raise("Failed to get disk status from node %s" % node_name,
1616 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1618 for idx, bdev_status in enumerate(result.payload):
1619 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1625 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1626 """Check the sanity of iallocator and node arguments and use the
1627 cluster-wide iallocator if appropriate.
1629 Check that at most one of (iallocator, node) is specified. If none is
1630 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1631 then the LU's opcode's iallocator slot is filled with the cluster-wide
1634 @type iallocator_slot: string
1635 @param iallocator_slot: the name of the opcode iallocator slot
1636 @type node_slot: string
1637 @param node_slot: the name of the opcode target node slot
1640 node = getattr(lu.op, node_slot, None)
1641 ialloc = getattr(lu.op, iallocator_slot, None)
1645 if node is not None and ialloc is not None:
1646 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1648 elif ((node is None and ialloc is None) or
1649 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1650 default_iallocator = lu.cfg.GetDefaultIAllocator()
1651 if default_iallocator:
1652 setattr(lu.op, iallocator_slot, default_iallocator)
1654 raise errors.OpPrereqError("No iallocator or node given and no"
1655 " cluster-wide default iallocator found;"
1656 " please specify either an iallocator or a"
1657 " node, or set a cluster-wide default"
1658 " iallocator", errors.ECODE_INVAL)
1661 def _GetDefaultIAllocator(cfg, ialloc):
1662 """Decides on which iallocator to use.
1664 @type cfg: L{config.ConfigWriter}
1665 @param cfg: Cluster configuration object
1666 @type ialloc: string or None
1667 @param ialloc: Iallocator specified in opcode
1669 @return: Iallocator name
1673 # Use default iallocator
1674 ialloc = cfg.GetDefaultIAllocator()
1677 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1678 " opcode nor as a cluster-wide default",
1684 def _CheckHostnameSane(lu, name):
1685 """Ensures that a given hostname resolves to a 'sane' name.
1687 The given name is required to be a prefix of the resolved hostname,
1688 to prevent accidental mismatches.
1690 @param lu: the logical unit on behalf of which we're checking
1691 @param name: the name we should resolve and check
1692 @return: the resolved hostname object
1695 hostname = netutils.GetHostname(name=name)
1696 if hostname.name != name:
1697 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1698 if not utils.MatchNameComponent(name, [hostname.name]):
1699 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1700 " same as given hostname '%s'") %
1701 (hostname.name, name), errors.ECODE_INVAL)
1705 class LUClusterPostInit(LogicalUnit):
1706 """Logical unit for running hooks after cluster initialization.
1709 HPATH = "cluster-init"
1710 HTYPE = constants.HTYPE_CLUSTER
1712 def BuildHooksEnv(self):
1717 "OP_TARGET": self.cfg.GetClusterName(),
1720 def BuildHooksNodes(self):
1721 """Build hooks nodes.
1724 return ([], [self.cfg.GetMasterNode()])
1726 def Exec(self, feedback_fn):
1733 class LUClusterDestroy(LogicalUnit):
1734 """Logical unit for destroying the cluster.
1737 HPATH = "cluster-destroy"
1738 HTYPE = constants.HTYPE_CLUSTER
1740 def BuildHooksEnv(self):
1745 "OP_TARGET": self.cfg.GetClusterName(),
1748 def BuildHooksNodes(self):
1749 """Build hooks nodes.
1754 def CheckPrereq(self):
1755 """Check prerequisites.
1757 This checks whether the cluster is empty.
1759 Any errors are signaled by raising errors.OpPrereqError.
1762 master = self.cfg.GetMasterNode()
1764 nodelist = self.cfg.GetNodeList()
1765 if len(nodelist) != 1 or nodelist[0] != master:
1766 raise errors.OpPrereqError("There are still %d node(s) in"
1767 " this cluster." % (len(nodelist) - 1),
1769 instancelist = self.cfg.GetInstanceList()
1771 raise errors.OpPrereqError("There are still %d instance(s) in"
1772 " this cluster." % len(instancelist),
1775 def Exec(self, feedback_fn):
1776 """Destroys the cluster.
1779 master_params = self.cfg.GetMasterNetworkParameters()
1781 # Run post hooks on master node before it's removed
1782 _RunPostHook(self, master_params.name)
1784 ems = self.cfg.GetUseExternalMipScript()
1785 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1788 self.LogWarning("Error disabling the master IP address: %s",
1791 return master_params.name
1794 def _VerifyCertificate(filename):
1795 """Verifies a certificate for L{LUClusterVerifyConfig}.
1797 @type filename: string
1798 @param filename: Path to PEM file
1802 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1803 utils.ReadFile(filename))
1804 except Exception, err: # pylint: disable=W0703
1805 return (LUClusterVerifyConfig.ETYPE_ERROR,
1806 "Failed to load X509 certificate %s: %s" % (filename, err))
1809 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1810 constants.SSL_CERT_EXPIRATION_ERROR)
1813 fnamemsg = "While verifying %s: %s" % (filename, msg)
1818 return (None, fnamemsg)
1819 elif errcode == utils.CERT_WARNING:
1820 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1821 elif errcode == utils.CERT_ERROR:
1822 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1824 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1827 def _GetAllHypervisorParameters(cluster, instances):
1828 """Compute the set of all hypervisor parameters.
1830 @type cluster: L{objects.Cluster}
1831 @param cluster: the cluster object
1832 @param instances: list of L{objects.Instance}
1833 @param instances: additional instances from which to obtain parameters
1834 @rtype: list of (origin, hypervisor, parameters)
1835 @return: a list with all parameters found, indicating the hypervisor they
1836 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1841 for hv_name in cluster.enabled_hypervisors:
1842 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1844 for os_name, os_hvp in cluster.os_hvp.items():
1845 for hv_name, hv_params in os_hvp.items():
1847 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1848 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1850 # TODO: collapse identical parameter values in a single one
1851 for instance in instances:
1852 if instance.hvparams:
1853 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1854 cluster.FillHV(instance)))
1859 class _VerifyErrors(object):
1860 """Mix-in for cluster/group verify LUs.
1862 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1863 self.op and self._feedback_fn to be available.)
1867 ETYPE_FIELD = "code"
1868 ETYPE_ERROR = "ERROR"
1869 ETYPE_WARNING = "WARNING"
1871 def _Error(self, ecode, item, msg, *args, **kwargs):
1872 """Format an error message.
1874 Based on the opcode's error_codes parameter, either format a
1875 parseable error code, or a simpler error string.
1877 This must be called only from Exec and functions called from Exec.
1880 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1881 itype, etxt, _ = ecode
1882 # first complete the msg
1885 # then format the whole message
1886 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1887 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1893 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1894 # and finally report it via the feedback_fn
1895 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1897 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1898 """Log an error message if the passed condition is True.
1902 or self.op.debug_simulate_errors) # pylint: disable=E1101
1904 # If the error code is in the list of ignored errors, demote the error to a
1906 (_, etxt, _) = ecode
1907 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1908 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1911 self._Error(ecode, *args, **kwargs)
1913 # do not mark the operation as failed for WARN cases only
1914 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1915 self.bad = self.bad or cond
1918 class LUClusterVerify(NoHooksLU):
1919 """Submits all jobs necessary to verify the cluster.
1924 def ExpandNames(self):
1925 self.needed_locks = {}
1927 def Exec(self, feedback_fn):
1930 if self.op.group_name:
1931 groups = [self.op.group_name]
1932 depends_fn = lambda: None
1934 groups = self.cfg.GetNodeGroupList()
1936 # Verify global configuration
1938 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1941 # Always depend on global verification
1942 depends_fn = lambda: [(-len(jobs), [])]
1945 [opcodes.OpClusterVerifyGroup(group_name=group,
1946 ignore_errors=self.op.ignore_errors,
1947 depends=depends_fn())]
1948 for group in groups)
1950 # Fix up all parameters
1951 for op in itertools.chain(*jobs): # pylint: disable=W0142
1952 op.debug_simulate_errors = self.op.debug_simulate_errors
1953 op.verbose = self.op.verbose
1954 op.error_codes = self.op.error_codes
1956 op.skip_checks = self.op.skip_checks
1957 except AttributeError:
1958 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1960 return ResultWithJobs(jobs)
1963 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1964 """Verifies the cluster config.
1969 def _VerifyHVP(self, hvp_data):
1970 """Verifies locally the syntax of the hypervisor parameters.
1973 for item, hv_name, hv_params in hvp_data:
1974 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1977 hv_class = hypervisor.GetHypervisor(hv_name)
1978 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1979 hv_class.CheckParameterSyntax(hv_params)
1980 except errors.GenericError, err:
1981 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1983 def ExpandNames(self):
1984 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1985 self.share_locks = _ShareAll()
1987 def CheckPrereq(self):
1988 """Check prerequisites.
1991 # Retrieve all information
1992 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1993 self.all_node_info = self.cfg.GetAllNodesInfo()
1994 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1996 def Exec(self, feedback_fn):
1997 """Verify integrity of cluster, performing various test on nodes.
2001 self._feedback_fn = feedback_fn
2003 feedback_fn("* Verifying cluster config")
2005 for msg in self.cfg.VerifyConfig():
2006 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2008 feedback_fn("* Verifying cluster certificate files")
2010 for cert_filename in pathutils.ALL_CERT_FILES:
2011 (errcode, msg) = _VerifyCertificate(cert_filename)
2012 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2014 feedback_fn("* Verifying hypervisor parameters")
2016 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2017 self.all_inst_info.values()))
2019 feedback_fn("* Verifying all nodes belong to an existing group")
2021 # We do this verification here because, should this bogus circumstance
2022 # occur, it would never be caught by VerifyGroup, which only acts on
2023 # nodes/instances reachable from existing node groups.
2025 dangling_nodes = set(node.name for node in self.all_node_info.values()
2026 if node.group not in self.all_group_info)
2028 dangling_instances = {}
2029 no_node_instances = []
2031 for inst in self.all_inst_info.values():
2032 if inst.primary_node in dangling_nodes:
2033 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2034 elif inst.primary_node not in self.all_node_info:
2035 no_node_instances.append(inst.name)
2040 utils.CommaJoin(dangling_instances.get(node.name,
2042 for node in dangling_nodes]
2044 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2046 "the following nodes (and their instances) belong to a non"
2047 " existing group: %s", utils.CommaJoin(pretty_dangling))
2049 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2051 "the following instances have a non-existing primary-node:"
2052 " %s", utils.CommaJoin(no_node_instances))
2057 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2058 """Verifies the status of a node group.
2061 HPATH = "cluster-verify"
2062 HTYPE = constants.HTYPE_CLUSTER
2065 _HOOKS_INDENT_RE = re.compile("^", re.M)
2067 class NodeImage(object):
2068 """A class representing the logical and physical status of a node.
2071 @ivar name: the node name to which this object refers
2072 @ivar volumes: a structure as returned from
2073 L{ganeti.backend.GetVolumeList} (runtime)
2074 @ivar instances: a list of running instances (runtime)
2075 @ivar pinst: list of configured primary instances (config)
2076 @ivar sinst: list of configured secondary instances (config)
2077 @ivar sbp: dictionary of {primary-node: list of instances} for all
2078 instances for which this node is secondary (config)
2079 @ivar mfree: free memory, as reported by hypervisor (runtime)
2080 @ivar dfree: free disk, as reported by the node (runtime)
2081 @ivar offline: the offline status (config)
2082 @type rpc_fail: boolean
2083 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2084 not whether the individual keys were correct) (runtime)
2085 @type lvm_fail: boolean
2086 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2087 @type hyp_fail: boolean
2088 @ivar hyp_fail: whether the RPC call didn't return the instance list
2089 @type ghost: boolean
2090 @ivar ghost: whether this is a known node or not (config)
2091 @type os_fail: boolean
2092 @ivar os_fail: whether the RPC call didn't return valid OS data
2094 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2095 @type vm_capable: boolean
2096 @ivar vm_capable: whether the node can host instances
2099 def __init__(self, offline=False, name=None, vm_capable=True):
2108 self.offline = offline
2109 self.vm_capable = vm_capable
2110 self.rpc_fail = False
2111 self.lvm_fail = False
2112 self.hyp_fail = False
2114 self.os_fail = False
2117 def ExpandNames(self):
2118 # This raises errors.OpPrereqError on its own:
2119 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2121 # Get instances in node group; this is unsafe and needs verification later
2123 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2125 self.needed_locks = {
2126 locking.LEVEL_INSTANCE: inst_names,
2127 locking.LEVEL_NODEGROUP: [self.group_uuid],
2128 locking.LEVEL_NODE: [],
2131 self.share_locks = _ShareAll()
2133 def DeclareLocks(self, level):
2134 if level == locking.LEVEL_NODE:
2135 # Get members of node group; this is unsafe and needs verification later
2136 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2138 all_inst_info = self.cfg.GetAllInstancesInfo()
2140 # In Exec(), we warn about mirrored instances that have primary and
2141 # secondary living in separate node groups. To fully verify that
2142 # volumes for these instances are healthy, we will need to do an
2143 # extra call to their secondaries. We ensure here those nodes will
2145 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2146 # Important: access only the instances whose lock is owned
2147 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2148 nodes.update(all_inst_info[inst].secondary_nodes)
2150 self.needed_locks[locking.LEVEL_NODE] = nodes
2152 def CheckPrereq(self):
2153 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2154 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2156 group_nodes = set(self.group_info.members)
2158 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2161 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2163 unlocked_instances = \
2164 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2167 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2168 utils.CommaJoin(unlocked_nodes),
2171 if unlocked_instances:
2172 raise errors.OpPrereqError("Missing lock for instances: %s" %
2173 utils.CommaJoin(unlocked_instances),
2176 self.all_node_info = self.cfg.GetAllNodesInfo()
2177 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2179 self.my_node_names = utils.NiceSort(group_nodes)
2180 self.my_inst_names = utils.NiceSort(group_instances)
2182 self.my_node_info = dict((name, self.all_node_info[name])
2183 for name in self.my_node_names)
2185 self.my_inst_info = dict((name, self.all_inst_info[name])
2186 for name in self.my_inst_names)
2188 # We detect here the nodes that will need the extra RPC calls for verifying
2189 # split LV volumes; they should be locked.
2190 extra_lv_nodes = set()
2192 for inst in self.my_inst_info.values():
2193 if inst.disk_template in constants.DTS_INT_MIRROR:
2194 for nname in inst.all_nodes:
2195 if self.all_node_info[nname].group != self.group_uuid:
2196 extra_lv_nodes.add(nname)
2198 unlocked_lv_nodes = \
2199 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2201 if unlocked_lv_nodes:
2202 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2203 utils.CommaJoin(unlocked_lv_nodes),
2205 self.extra_lv_nodes = list(extra_lv_nodes)
2207 def _VerifyNode(self, ninfo, nresult):
2208 """Perform some basic validation on data returned from a node.
2210 - check the result data structure is well formed and has all the
2212 - check ganeti version
2214 @type ninfo: L{objects.Node}
2215 @param ninfo: the node to check
2216 @param nresult: the results from the node
2218 @return: whether overall this call was successful (and we can expect
2219 reasonable values in the respose)
2223 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2225 # main result, nresult should be a non-empty dict
2226 test = not nresult or not isinstance(nresult, dict)
2227 _ErrorIf(test, constants.CV_ENODERPC, node,
2228 "unable to verify node: no data returned")
2232 # compares ganeti version
2233 local_version = constants.PROTOCOL_VERSION
2234 remote_version = nresult.get("version", None)
2235 test = not (remote_version and
2236 isinstance(remote_version, (list, tuple)) and
2237 len(remote_version) == 2)
2238 _ErrorIf(test, constants.CV_ENODERPC, node,
2239 "connection to node returned invalid data")
2243 test = local_version != remote_version[0]
2244 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2245 "incompatible protocol versions: master %s,"
2246 " node %s", local_version, remote_version[0])
2250 # node seems compatible, we can actually try to look into its results
2252 # full package version
2253 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2254 constants.CV_ENODEVERSION, node,
2255 "software version mismatch: master %s, node %s",
2256 constants.RELEASE_VERSION, remote_version[1],
2257 code=self.ETYPE_WARNING)
2259 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2260 if ninfo.vm_capable and isinstance(hyp_result, dict):
2261 for hv_name, hv_result in hyp_result.iteritems():
2262 test = hv_result is not None
2263 _ErrorIf(test, constants.CV_ENODEHV, node,
2264 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2266 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2267 if ninfo.vm_capable and isinstance(hvp_result, list):
2268 for item, hv_name, hv_result in hvp_result:
2269 _ErrorIf(True, constants.CV_ENODEHV, node,
2270 "hypervisor %s parameter verify failure (source %s): %s",
2271 hv_name, item, hv_result)
2273 test = nresult.get(constants.NV_NODESETUP,
2274 ["Missing NODESETUP results"])
2275 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2280 def _VerifyNodeTime(self, ninfo, nresult,
2281 nvinfo_starttime, nvinfo_endtime):
2282 """Check the node time.
2284 @type ninfo: L{objects.Node}
2285 @param ninfo: the node to check
2286 @param nresult: the remote results for the node
2287 @param nvinfo_starttime: the start time of the RPC call
2288 @param nvinfo_endtime: the end time of the RPC call
2292 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2294 ntime = nresult.get(constants.NV_TIME, None)
2296 ntime_merged = utils.MergeTime(ntime)
2297 except (ValueError, TypeError):
2298 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2301 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2302 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2303 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2304 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2308 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2309 "Node time diverges by at least %s from master node time",
2312 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2313 """Check the node LVM results.
2315 @type ninfo: L{objects.Node}
2316 @param ninfo: the node to check
2317 @param nresult: the remote results for the node
2318 @param vg_name: the configured VG name
2325 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2327 # checks vg existence and size > 20G
2328 vglist = nresult.get(constants.NV_VGLIST, None)
2330 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2332 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2333 constants.MIN_VG_SIZE)
2334 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2337 pvlist = nresult.get(constants.NV_PVLIST, None)
2338 test = pvlist is None
2339 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2341 # check that ':' is not present in PV names, since it's a
2342 # special character for lvcreate (denotes the range of PEs to
2344 for _, pvname, owner_vg in pvlist:
2345 test = ":" in pvname
2346 _ErrorIf(test, constants.CV_ENODELVM, node,
2347 "Invalid character ':' in PV '%s' of VG '%s'",
2350 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2351 """Check the node bridges.
2353 @type ninfo: L{objects.Node}
2354 @param ninfo: the node to check
2355 @param nresult: the remote results for the node
2356 @param bridges: the expected list of bridges
2363 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2365 missing = nresult.get(constants.NV_BRIDGES, None)
2366 test = not isinstance(missing, list)
2367 _ErrorIf(test, constants.CV_ENODENET, node,
2368 "did not return valid bridge information")
2370 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2371 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2373 def _VerifyNodeUserScripts(self, ninfo, nresult):
2374 """Check the results of user scripts presence and executability on the node
2376 @type ninfo: L{objects.Node}
2377 @param ninfo: the node to check
2378 @param nresult: the remote results for the node
2383 test = not constants.NV_USERSCRIPTS in nresult
2384 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2385 "did not return user scripts information")
2387 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2389 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2390 "user scripts not present or not executable: %s" %
2391 utils.CommaJoin(sorted(broken_scripts)))
2393 def _VerifyNodeNetwork(self, ninfo, nresult):
2394 """Check the node network connectivity results.
2396 @type ninfo: L{objects.Node}
2397 @param ninfo: the node to check
2398 @param nresult: the remote results for the node
2402 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2404 test = constants.NV_NODELIST not in nresult
2405 _ErrorIf(test, constants.CV_ENODESSH, node,
2406 "node hasn't returned node ssh connectivity data")
2408 if nresult[constants.NV_NODELIST]:
2409 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2410 _ErrorIf(True, constants.CV_ENODESSH, node,
2411 "ssh communication with node '%s': %s", a_node, a_msg)
2413 test = constants.NV_NODENETTEST not in nresult
2414 _ErrorIf(test, constants.CV_ENODENET, node,
2415 "node hasn't returned node tcp connectivity data")
2417 if nresult[constants.NV_NODENETTEST]:
2418 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2420 _ErrorIf(True, constants.CV_ENODENET, node,
2421 "tcp communication with node '%s': %s",
2422 anode, nresult[constants.NV_NODENETTEST][anode])
2424 test = constants.NV_MASTERIP not in nresult
2425 _ErrorIf(test, constants.CV_ENODENET, node,
2426 "node hasn't returned node master IP reachability data")
2428 if not nresult[constants.NV_MASTERIP]:
2429 if node == self.master_node:
2430 msg = "the master node cannot reach the master IP (not configured?)"
2432 msg = "cannot reach the master IP"
2433 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2435 def _VerifyInstance(self, instance, instanceconfig, node_image,
2437 """Verify an instance.
2439 This function checks to see if the required block devices are
2440 available on the instance's node.
2443 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444 node_current = instanceconfig.primary_node
2446 node_vol_should = {}
2447 instanceconfig.MapLVsByNode(node_vol_should)
2449 cluster = self.cfg.GetClusterInfo()
2450 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2452 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2453 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2455 for node in node_vol_should:
2456 n_img = node_image[node]
2457 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2458 # ignore missing volumes on offline or broken nodes
2460 for volume in node_vol_should[node]:
2461 test = volume not in n_img.volumes
2462 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2463 "volume %s missing on node %s", volume, node)
2465 if instanceconfig.admin_state == constants.ADMINST_UP:
2466 pri_img = node_image[node_current]
2467 test = instance not in pri_img.instances and not pri_img.offline
2468 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2469 "instance not running on its primary node %s",
2472 diskdata = [(nname, success, status, idx)
2473 for (nname, disks) in diskstatus.items()
2474 for idx, (success, status) in enumerate(disks)]
2476 for nname, success, bdev_status, idx in diskdata:
2477 # the 'ghost node' construction in Exec() ensures that we have a
2479 snode = node_image[nname]
2480 bad_snode = snode.ghost or snode.offline
2481 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2482 not success and not bad_snode,
2483 constants.CV_EINSTANCEFAULTYDISK, instance,
2484 "couldn't retrieve status for disk/%s on %s: %s",
2485 idx, nname, bdev_status)
2486 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2487 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2488 constants.CV_EINSTANCEFAULTYDISK, instance,
2489 "disk/%s on %s is faulty", idx, nname)
2491 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2492 """Verify if there are any unknown volumes in the cluster.
2494 The .os, .swap and backup volumes are ignored. All other volumes are
2495 reported as unknown.
2497 @type reserved: L{ganeti.utils.FieldSet}
2498 @param reserved: a FieldSet of reserved volume names
2501 for node, n_img in node_image.items():
2502 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2503 self.all_node_info[node].group != self.group_uuid):
2504 # skip non-healthy nodes
2506 for volume in n_img.volumes:
2507 test = ((node not in node_vol_should or
2508 volume not in node_vol_should[node]) and
2509 not reserved.Matches(volume))
2510 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2511 "volume %s is unknown", volume)
2513 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2514 """Verify N+1 Memory Resilience.
2516 Check that if one single node dies we can still start all the
2517 instances it was primary for.
2520 cluster_info = self.cfg.GetClusterInfo()
2521 for node, n_img in node_image.items():
2522 # This code checks that every node which is now listed as
2523 # secondary has enough memory to host all instances it is
2524 # supposed to should a single other node in the cluster fail.
2525 # FIXME: not ready for failover to an arbitrary node
2526 # FIXME: does not support file-backed instances
2527 # WARNING: we currently take into account down instances as well
2528 # as up ones, considering that even if they're down someone
2529 # might want to start them even in the event of a node failure.
2530 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2531 # we're skipping nodes marked offline and nodes in other groups from
2532 # the N+1 warning, since most likely we don't have good memory
2533 # infromation from them; we already list instances living on such
2534 # nodes, and that's enough warning
2536 #TODO(dynmem): also consider ballooning out other instances
2537 for prinode, instances in n_img.sbp.items():
2539 for instance in instances:
2540 bep = cluster_info.FillBE(instance_cfg[instance])
2541 if bep[constants.BE_AUTO_BALANCE]:
2542 needed_mem += bep[constants.BE_MINMEM]
2543 test = n_img.mfree < needed_mem
2544 self._ErrorIf(test, constants.CV_ENODEN1, node,
2545 "not enough memory to accomodate instance failovers"
2546 " should node %s fail (%dMiB needed, %dMiB available)",
2547 prinode, needed_mem, n_img.mfree)
2550 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2551 (files_all, files_opt, files_mc, files_vm)):
2552 """Verifies file checksums collected from all nodes.
2554 @param errorif: Callback for reporting errors
2555 @param nodeinfo: List of L{objects.Node} objects
2556 @param master_node: Name of master node
2557 @param all_nvinfo: RPC results
2560 # Define functions determining which nodes to consider for a file
2563 (files_mc, lambda node: (node.master_candidate or
2564 node.name == master_node)),
2565 (files_vm, lambda node: node.vm_capable),
2568 # Build mapping from filename to list of nodes which should have the file
2570 for (files, fn) in files2nodefn:
2572 filenodes = nodeinfo
2574 filenodes = filter(fn, nodeinfo)
2575 nodefiles.update((filename,
2576 frozenset(map(operator.attrgetter("name"), filenodes)))
2577 for filename in files)
2579 assert set(nodefiles) == (files_all | files_mc | files_vm)
2581 fileinfo = dict((filename, {}) for filename in nodefiles)
2582 ignore_nodes = set()
2584 for node in nodeinfo:
2586 ignore_nodes.add(node.name)
2589 nresult = all_nvinfo[node.name]
2591 if nresult.fail_msg or not nresult.payload:
2594 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2595 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2596 for (key, value) in fingerprints.items())
2599 test = not (node_files and isinstance(node_files, dict))
2600 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2601 "Node did not return file checksum data")
2603 ignore_nodes.add(node.name)
2606 # Build per-checksum mapping from filename to nodes having it
2607 for (filename, checksum) in node_files.items():
2608 assert filename in nodefiles
2609 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2611 for (filename, checksums) in fileinfo.items():
2612 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2614 # Nodes having the file
2615 with_file = frozenset(node_name
2616 for nodes in fileinfo[filename].values()
2617 for node_name in nodes) - ignore_nodes
2619 expected_nodes = nodefiles[filename] - ignore_nodes
2621 # Nodes missing file
2622 missing_file = expected_nodes - with_file
2624 if filename in files_opt:
2626 errorif(missing_file and missing_file != expected_nodes,
2627 constants.CV_ECLUSTERFILECHECK, None,
2628 "File %s is optional, but it must exist on all or no"
2629 " nodes (not found on %s)",
2630 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2632 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2633 "File %s is missing from node(s) %s", filename,
2634 utils.CommaJoin(utils.NiceSort(missing_file)))
2636 # Warn if a node has a file it shouldn't
2637 unexpected = with_file - expected_nodes
2639 constants.CV_ECLUSTERFILECHECK, None,
2640 "File %s should not exist on node(s) %s",
2641 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2643 # See if there are multiple versions of the file
2644 test = len(checksums) > 1
2646 variants = ["variant %s on %s" %
2647 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2648 for (idx, (checksum, nodes)) in
2649 enumerate(sorted(checksums.items()))]
2653 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2654 "File %s found with %s different checksums (%s)",
2655 filename, len(checksums), "; ".join(variants))
2657 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2659 """Verifies and the node DRBD status.
2661 @type ninfo: L{objects.Node}
2662 @param ninfo: the node to check
2663 @param nresult: the remote results for the node
2664 @param instanceinfo: the dict of instances
2665 @param drbd_helper: the configured DRBD usermode helper
2666 @param drbd_map: the DRBD map as returned by
2667 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2671 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2674 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2675 test = (helper_result is None)
2676 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2677 "no drbd usermode helper returned")
2679 status, payload = helper_result
2681 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2682 "drbd usermode helper check unsuccessful: %s", payload)
2683 test = status and (payload != drbd_helper)
2684 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2685 "wrong drbd usermode helper: %s", payload)
2687 # compute the DRBD minors
2689 for minor, instance in drbd_map[node].items():
2690 test = instance not in instanceinfo
2691 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2692 "ghost instance '%s' in temporary DRBD map", instance)
2693 # ghost instance should not be running, but otherwise we
2694 # don't give double warnings (both ghost instance and
2695 # unallocated minor in use)
2697 node_drbd[minor] = (instance, False)
2699 instance = instanceinfo[instance]
2700 node_drbd[minor] = (instance.name,
2701 instance.admin_state == constants.ADMINST_UP)
2703 # and now check them
2704 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2705 test = not isinstance(used_minors, (tuple, list))
2706 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2707 "cannot parse drbd status file: %s", str(used_minors))
2709 # we cannot check drbd status
2712 for minor, (iname, must_exist) in node_drbd.items():
2713 test = minor not in used_minors and must_exist
2714 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2715 "drbd minor %d of instance %s is not active", minor, iname)
2716 for minor in used_minors:
2717 test = minor not in node_drbd
2718 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2719 "unallocated drbd minor %d is in use", minor)
2721 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2722 """Builds the node OS structures.
2724 @type ninfo: L{objects.Node}
2725 @param ninfo: the node to check
2726 @param nresult: the remote results for the node
2727 @param nimg: the node image object
2731 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2733 remote_os = nresult.get(constants.NV_OSLIST, None)
2734 test = (not isinstance(remote_os, list) or
2735 not compat.all(isinstance(v, list) and len(v) == 7
2736 for v in remote_os))
2738 _ErrorIf(test, constants.CV_ENODEOS, node,
2739 "node hasn't returned valid OS data")
2748 for (name, os_path, status, diagnose,
2749 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2751 if name not in os_dict:
2754 # parameters is a list of lists instead of list of tuples due to
2755 # JSON lacking a real tuple type, fix it:
2756 parameters = [tuple(v) for v in parameters]
2757 os_dict[name].append((os_path, status, diagnose,
2758 set(variants), set(parameters), set(api_ver)))
2760 nimg.oslist = os_dict
2762 def _VerifyNodeOS(self, ninfo, nimg, base):
2763 """Verifies the node OS list.
2765 @type ninfo: L{objects.Node}
2766 @param ninfo: the node to check
2767 @param nimg: the node image object
2768 @param base: the 'template' node we match against (e.g. from the master)
2772 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2774 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2776 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2777 for os_name, os_data in nimg.oslist.items():
2778 assert os_data, "Empty OS status for OS %s?!" % os_name
2779 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2780 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2781 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2782 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2783 "OS '%s' has multiple entries (first one shadows the rest): %s",
2784 os_name, utils.CommaJoin([v[0] for v in os_data]))
2785 # comparisons with the 'base' image
2786 test = os_name not in base.oslist
2787 _ErrorIf(test, constants.CV_ENODEOS, node,
2788 "Extra OS %s not present on reference node (%s)",
2792 assert base.oslist[os_name], "Base node has empty OS status?"
2793 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2795 # base OS is invalid, skipping
2797 for kind, a, b in [("API version", f_api, b_api),
2798 ("variants list", f_var, b_var),
2799 ("parameters", beautify_params(f_param),
2800 beautify_params(b_param))]:
2801 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2802 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2803 kind, os_name, base.name,
2804 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2806 # check any missing OSes
2807 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2808 _ErrorIf(missing, constants.CV_ENODEOS, node,
2809 "OSes present on reference node %s but missing on this node: %s",
2810 base.name, utils.CommaJoin(missing))
2812 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2813 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2815 @type ninfo: L{objects.Node}
2816 @param ninfo: the node to check
2817 @param nresult: the remote results for the node
2818 @type is_master: bool
2819 @param is_master: Whether node is the master node
2825 (constants.ENABLE_FILE_STORAGE or
2826 constants.ENABLE_SHARED_FILE_STORAGE)):
2828 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2830 # This should never happen
2831 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2832 "Node did not return forbidden file storage paths")
2834 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2835 "Found forbidden file storage paths: %s",
2836 utils.CommaJoin(fspaths))
2838 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2839 constants.CV_ENODEFILESTORAGEPATHS, node,
2840 "Node should not have returned forbidden file storage"
2843 def _VerifyOob(self, ninfo, nresult):
2844 """Verifies out of band functionality of a node.
2846 @type ninfo: L{objects.Node}
2847 @param ninfo: the node to check
2848 @param nresult: the remote results for the node
2852 # We just have to verify the paths on master and/or master candidates
2853 # as the oob helper is invoked on the master
2854 if ((ninfo.master_candidate or ninfo.master_capable) and
2855 constants.NV_OOB_PATHS in nresult):
2856 for path_result in nresult[constants.NV_OOB_PATHS]:
2857 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2859 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2860 """Verifies and updates the node volume data.
2862 This function will update a L{NodeImage}'s internal structures
2863 with data from the remote call.
2865 @type ninfo: L{objects.Node}
2866 @param ninfo: the node to check
2867 @param nresult: the remote results for the node
2868 @param nimg: the node image object
2869 @param vg_name: the configured VG name
2873 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2875 nimg.lvm_fail = True
2876 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2879 elif isinstance(lvdata, basestring):
2880 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2881 utils.SafeEncode(lvdata))
2882 elif not isinstance(lvdata, dict):
2883 _ErrorIf(True, constants.CV_ENODELVM, node,
2884 "rpc call to node failed (lvlist)")
2886 nimg.volumes = lvdata
2887 nimg.lvm_fail = False
2889 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2890 """Verifies and updates the node instance list.
2892 If the listing was successful, then updates this node's instance
2893 list. Otherwise, it marks the RPC call as failed for the instance
2896 @type ninfo: L{objects.Node}
2897 @param ninfo: the node to check
2898 @param nresult: the remote results for the node
2899 @param nimg: the node image object
2902 idata = nresult.get(constants.NV_INSTANCELIST, None)
2903 test = not isinstance(idata, list)
2904 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2905 "rpc call to node failed (instancelist): %s",
2906 utils.SafeEncode(str(idata)))
2908 nimg.hyp_fail = True
2910 nimg.instances = idata
2912 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2913 """Verifies and computes a node information map
2915 @type ninfo: L{objects.Node}
2916 @param ninfo: the node to check
2917 @param nresult: the remote results for the node
2918 @param nimg: the node image object
2919 @param vg_name: the configured VG name
2923 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2925 # try to read free memory (from the hypervisor)
2926 hv_info = nresult.get(constants.NV_HVINFO, None)
2927 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2928 _ErrorIf(test, constants.CV_ENODEHV, node,
2929 "rpc call to node failed (hvinfo)")
2932 nimg.mfree = int(hv_info["memory_free"])
2933 except (ValueError, TypeError):
2934 _ErrorIf(True, constants.CV_ENODERPC, node,
2935 "node returned invalid nodeinfo, check hypervisor")
2937 # FIXME: devise a free space model for file based instances as well
2938 if vg_name is not None:
2939 test = (constants.NV_VGLIST not in nresult or
2940 vg_name not in nresult[constants.NV_VGLIST])
2941 _ErrorIf(test, constants.CV_ENODELVM, node,
2942 "node didn't return data for the volume group '%s'"
2943 " - it is either missing or broken", vg_name)
2946 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2947 except (ValueError, TypeError):
2948 _ErrorIf(True, constants.CV_ENODERPC, node,
2949 "node returned invalid LVM info, check LVM status")
2951 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2952 """Gets per-disk status information for all instances.
2954 @type nodelist: list of strings
2955 @param nodelist: Node names
2956 @type node_image: dict of (name, L{objects.Node})
2957 @param node_image: Node objects
2958 @type instanceinfo: dict of (name, L{objects.Instance})
2959 @param instanceinfo: Instance objects
2960 @rtype: {instance: {node: [(succes, payload)]}}
2961 @return: a dictionary of per-instance dictionaries with nodes as
2962 keys and disk information as values; the disk information is a
2963 list of tuples (success, payload)
2966 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2969 node_disks_devonly = {}
2970 diskless_instances = set()
2971 diskless = constants.DT_DISKLESS
2973 for nname in nodelist:
2974 node_instances = list(itertools.chain(node_image[nname].pinst,
2975 node_image[nname].sinst))
2976 diskless_instances.update(inst for inst in node_instances
2977 if instanceinfo[inst].disk_template == diskless)
2978 disks = [(inst, disk)
2979 for inst in node_instances
2980 for disk in instanceinfo[inst].disks]
2983 # No need to collect data
2986 node_disks[nname] = disks
2988 # _AnnotateDiskParams makes already copies of the disks
2990 for (inst, dev) in disks:
2991 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2992 self.cfg.SetDiskID(anno_disk, nname)
2993 devonly.append(anno_disk)
2995 node_disks_devonly[nname] = devonly
2997 assert len(node_disks) == len(node_disks_devonly)
2999 # Collect data from all nodes with disks
3000 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3003 assert len(result) == len(node_disks)
3007 for (nname, nres) in result.items():
3008 disks = node_disks[nname]
3011 # No data from this node
3012 data = len(disks) * [(False, "node offline")]
3015 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3016 "while getting disk information: %s", msg)
3018 # No data from this node
3019 data = len(disks) * [(False, msg)]
3022 for idx, i in enumerate(nres.payload):
3023 if isinstance(i, (tuple, list)) and len(i) == 2:
3026 logging.warning("Invalid result from node %s, entry %d: %s",
3028 data.append((False, "Invalid result from the remote node"))
3030 for ((inst, _), status) in zip(disks, data):
3031 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3033 # Add empty entries for diskless instances.
3034 for inst in diskless_instances:
3035 assert inst not in instdisk
3038 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3039 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3040 compat.all(isinstance(s, (tuple, list)) and
3041 len(s) == 2 for s in statuses)
3042 for inst, nnames in instdisk.items()
3043 for nname, statuses in nnames.items())
3044 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3049 def _SshNodeSelector(group_uuid, all_nodes):
3050 """Create endless iterators for all potential SSH check hosts.
3053 nodes = [node for node in all_nodes
3054 if (node.group != group_uuid and
3056 keyfunc = operator.attrgetter("group")
3058 return map(itertools.cycle,
3059 [sorted(map(operator.attrgetter("name"), names))
3060 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3064 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3065 """Choose which nodes should talk to which other nodes.
3067 We will make nodes contact all nodes in their group, and one node from
3070 @warning: This algorithm has a known issue if one node group is much
3071 smaller than others (e.g. just one node). In such a case all other
3072 nodes will talk to the single node.
3075 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3076 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3078 return (online_nodes,
3079 dict((name, sorted([i.next() for i in sel]))
3080 for name in online_nodes))
3082 def BuildHooksEnv(self):
3085 Cluster-Verify hooks just ran in the post phase and their failure makes
3086 the output be logged in the verify output and the verification to fail.
3090 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3093 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3094 for node in self.my_node_info.values())
3098 def BuildHooksNodes(self):
3099 """Build hooks nodes.
3102 return ([], self.my_node_names)
3104 def Exec(self, feedback_fn):
3105 """Verify integrity of the node group, performing various test on nodes.
3108 # This method has too many local variables. pylint: disable=R0914
3109 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3111 if not self.my_node_names:
3113 feedback_fn("* Empty node group, skipping verification")
3117 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3118 verbose = self.op.verbose
3119 self._feedback_fn = feedback_fn
3121 vg_name = self.cfg.GetVGName()
3122 drbd_helper = self.cfg.GetDRBDHelper()
3123 cluster = self.cfg.GetClusterInfo()
3124 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3125 hypervisors = cluster.enabled_hypervisors
3126 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3128 i_non_redundant = [] # Non redundant instances
3129 i_non_a_balanced = [] # Non auto-balanced instances
3130 i_offline = 0 # Count of offline instances
3131 n_offline = 0 # Count of offline nodes
3132 n_drained = 0 # Count of nodes being drained
3133 node_vol_should = {}
3135 # FIXME: verify OS list
3138 filemap = _ComputeAncillaryFiles(cluster, False)
3140 # do local checksums
3141 master_node = self.master_node = self.cfg.GetMasterNode()
3142 master_ip = self.cfg.GetMasterIP()
3144 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3147 if self.cfg.GetUseExternalMipScript():
3148 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3150 node_verify_param = {
3151 constants.NV_FILELIST:
3152 map(vcluster.MakeVirtualPath,
3153 utils.UniqueSequence(filename
3154 for files in filemap
3155 for filename in files)),
3156 constants.NV_NODELIST:
3157 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3158 self.all_node_info.values()),
3159 constants.NV_HYPERVISOR: hypervisors,
3160 constants.NV_HVPARAMS:
3161 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3162 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3163 for node in node_data_list
3164 if not node.offline],
3165 constants.NV_INSTANCELIST: hypervisors,
3166 constants.NV_VERSION: None,
3167 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3168 constants.NV_NODESETUP: None,
3169 constants.NV_TIME: None,
3170 constants.NV_MASTERIP: (master_node, master_ip),
3171 constants.NV_OSLIST: None,
3172 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3173 constants.NV_USERSCRIPTS: user_scripts,
3176 if vg_name is not None:
3177 node_verify_param[constants.NV_VGLIST] = None
3178 node_verify_param[constants.NV_LVLIST] = vg_name
3179 node_verify_param[constants.NV_PVLIST] = [vg_name]
3182 node_verify_param[constants.NV_DRBDLIST] = None
3183 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3185 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3186 # Load file storage paths only from master node
3187 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3190 # FIXME: this needs to be changed per node-group, not cluster-wide
3192 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3193 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3194 bridges.add(default_nicpp[constants.NIC_LINK])
3195 for instance in self.my_inst_info.values():
3196 for nic in instance.nics:
3197 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3198 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3199 bridges.add(full_nic[constants.NIC_LINK])
3202 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3204 # Build our expected cluster state
3205 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3207 vm_capable=node.vm_capable))
3208 for node in node_data_list)
3212 for node in self.all_node_info.values():
3213 path = _SupportsOob(self.cfg, node)
3214 if path and path not in oob_paths:
3215 oob_paths.append(path)
3218 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3220 for instance in self.my_inst_names:
3221 inst_config = self.my_inst_info[instance]
3222 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3225 for nname in inst_config.all_nodes:
3226 if nname not in node_image:
3227 gnode = self.NodeImage(name=nname)
3228 gnode.ghost = (nname not in self.all_node_info)
3229 node_image[nname] = gnode
3231 inst_config.MapLVsByNode(node_vol_should)
3233 pnode = inst_config.primary_node
3234 node_image[pnode].pinst.append(instance)
3236 for snode in inst_config.secondary_nodes:
3237 nimg = node_image[snode]
3238 nimg.sinst.append(instance)
3239 if pnode not in nimg.sbp:
3240 nimg.sbp[pnode] = []
3241 nimg.sbp[pnode].append(instance)
3243 # At this point, we have the in-memory data structures complete,
3244 # except for the runtime information, which we'll gather next
3246 # Due to the way our RPC system works, exact response times cannot be
3247 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3248 # time before and after executing the request, we can at least have a time
3250 nvinfo_starttime = time.time()
3251 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3253 self.cfg.GetClusterName())
3254 nvinfo_endtime = time.time()
3256 if self.extra_lv_nodes and vg_name is not None:
3258 self.rpc.call_node_verify(self.extra_lv_nodes,
3259 {constants.NV_LVLIST: vg_name},
3260 self.cfg.GetClusterName())
3262 extra_lv_nvinfo = {}
3264 all_drbd_map = self.cfg.ComputeDRBDMap()
3266 feedback_fn("* Gathering disk information (%s nodes)" %
3267 len(self.my_node_names))
3268 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3271 feedback_fn("* Verifying configuration file consistency")
3273 # If not all nodes are being checked, we need to make sure the master node
3274 # and a non-checked vm_capable node are in the list.
3275 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3277 vf_nvinfo = all_nvinfo.copy()
3278 vf_node_info = list(self.my_node_info.values())
3279 additional_nodes = []
3280 if master_node not in self.my_node_info:
3281 additional_nodes.append(master_node)
3282 vf_node_info.append(self.all_node_info[master_node])
3283 # Add the first vm_capable node we find which is not included,
3284 # excluding the master node (which we already have)
3285 for node in absent_nodes:
3286 nodeinfo = self.all_node_info[node]
3287 if (nodeinfo.vm_capable and not nodeinfo.offline and
3288 node != master_node):
3289 additional_nodes.append(node)
3290 vf_node_info.append(self.all_node_info[node])
3292 key = constants.NV_FILELIST
3293 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3294 {key: node_verify_param[key]},
3295 self.cfg.GetClusterName()))
3297 vf_nvinfo = all_nvinfo
3298 vf_node_info = self.my_node_info.values()
3300 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3302 feedback_fn("* Verifying node status")
3306 for node_i in node_data_list:
3308 nimg = node_image[node]
3312 feedback_fn("* Skipping offline node %s" % (node,))
3316 if node == master_node:
3318 elif node_i.master_candidate:
3319 ntype = "master candidate"
3320 elif node_i.drained:
3326 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3328 msg = all_nvinfo[node].fail_msg
3329 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3332 nimg.rpc_fail = True
3335 nresult = all_nvinfo[node].payload
3337 nimg.call_ok = self._VerifyNode(node_i, nresult)
3338 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3339 self._VerifyNodeNetwork(node_i, nresult)
3340 self._VerifyNodeUserScripts(node_i, nresult)
3341 self._VerifyOob(node_i, nresult)
3342 self._VerifyFileStoragePaths(node_i, nresult,
3343 node == master_node)
3346 self._VerifyNodeLVM(node_i, nresult, vg_name)
3347 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3350 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3351 self._UpdateNodeInstances(node_i, nresult, nimg)
3352 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3353 self._UpdateNodeOS(node_i, nresult, nimg)
3355 if not nimg.os_fail:
3356 if refos_img is None:
3358 self._VerifyNodeOS(node_i, nimg, refos_img)
3359 self._VerifyNodeBridges(node_i, nresult, bridges)
3361 # Check whether all running instancies are primary for the node. (This
3362 # can no longer be done from _VerifyInstance below, since some of the
3363 # wrong instances could be from other node groups.)
3364 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3366 for inst in non_primary_inst:
3367 test = inst in self.all_inst_info
3368 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3369 "instance should not run on node %s", node_i.name)
3370 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3371 "node is running unknown instance %s", inst)
3373 for node, result in extra_lv_nvinfo.items():
3374 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3375 node_image[node], vg_name)
3377 feedback_fn("* Verifying instance status")
3378 for instance in self.my_inst_names:
3380 feedback_fn("* Verifying instance %s" % instance)
3381 inst_config = self.my_inst_info[instance]
3382 self._VerifyInstance(instance, inst_config, node_image,
3384 inst_nodes_offline = []
3386 pnode = inst_config.primary_node
3387 pnode_img = node_image[pnode]
3388 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3389 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3390 " primary node failed", instance)
3392 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3394 constants.CV_EINSTANCEBADNODE, instance,
3395 "instance is marked as running and lives on offline node %s",
3396 inst_config.primary_node)
3398 # If the instance is non-redundant we cannot survive losing its primary
3399 # node, so we are not N+1 compliant.
3400 if inst_config.disk_template not in constants.DTS_MIRRORED:
3401 i_non_redundant.append(instance)
3403 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3404 constants.CV_EINSTANCELAYOUT,
3405 instance, "instance has multiple secondary nodes: %s",
3406 utils.CommaJoin(inst_config.secondary_nodes),
3407 code=self.ETYPE_WARNING)
3409 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3410 pnode = inst_config.primary_node
3411 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3412 instance_groups = {}
3414 for node in instance_nodes:
3415 instance_groups.setdefault(self.all_node_info[node].group,
3419 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3420 # Sort so that we always list the primary node first.
3421 for group, nodes in sorted(instance_groups.items(),
3422 key=lambda (_, nodes): pnode in nodes,
3425 self._ErrorIf(len(instance_groups) > 1,
3426 constants.CV_EINSTANCESPLITGROUPS,
3427 instance, "instance has primary and secondary nodes in"
3428 " different groups: %s", utils.CommaJoin(pretty_list),
3429 code=self.ETYPE_WARNING)
3431 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3432 i_non_a_balanced.append(instance)
3434 for snode in inst_config.secondary_nodes:
3435 s_img = node_image[snode]
3436 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3437 snode, "instance %s, connection to secondary node failed",
3441 inst_nodes_offline.append(snode)
3443 # warn that the instance lives on offline nodes
3444 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3445 "instance has offline secondary node(s) %s",
3446 utils.CommaJoin(inst_nodes_offline))
3447 # ... or ghost/non-vm_capable nodes
3448 for node in inst_config.all_nodes:
3449 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3450 instance, "instance lives on ghost node %s", node)
3451 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3452 instance, "instance lives on non-vm_capable node %s", node)
3454 feedback_fn("* Verifying orphan volumes")
3455 reserved = utils.FieldSet(*cluster.reserved_lvs)
3457 # We will get spurious "unknown volume" warnings if any node of this group
3458 # is secondary for an instance whose primary is in another group. To avoid
3459 # them, we find these instances and add their volumes to node_vol_should.
3460 for inst in self.all_inst_info.values():
3461 for secondary in inst.secondary_nodes:
3462 if (secondary in self.my_node_info
3463 and inst.name not in self.my_inst_info):
3464 inst.MapLVsByNode(node_vol_should)
3467 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3469 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3470 feedback_fn("* Verifying N+1 Memory redundancy")
3471 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3473 feedback_fn("* Other Notes")
3475 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3476 % len(i_non_redundant))
3478 if i_non_a_balanced:
3479 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3480 % len(i_non_a_balanced))
3483 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3486 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3489 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3493 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3494 """Analyze the post-hooks' result
3496 This method analyses the hook result, handles it, and sends some
3497 nicely-formatted feedback back to the user.
3499 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3500 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3501 @param hooks_results: the results of the multi-node hooks rpc call
3502 @param feedback_fn: function used send feedback back to the caller
3503 @param lu_result: previous Exec result
3504 @return: the new Exec result, based on the previous result
3508 # We only really run POST phase hooks, only for non-empty groups,
3509 # and are only interested in their results
3510 if not self.my_node_names:
3513 elif phase == constants.HOOKS_PHASE_POST:
3514 # Used to change hooks' output to proper indentation
3515 feedback_fn("* Hooks Results")
3516 assert hooks_results, "invalid result from hooks"
3518 for node_name in hooks_results:
3519 res = hooks_results[node_name]
3521 test = msg and not res.offline
3522 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3523 "Communication failure in hooks execution: %s", msg)
3524 if res.offline or msg:
3525 # No need to investigate payload if node is offline or gave
3528 for script, hkr, output in res.payload:
3529 test = hkr == constants.HKR_FAIL
3530 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3531 "Script %s failed, output:", script)
3533 output = self._HOOKS_INDENT_RE.sub(" ", output)
3534 feedback_fn("%s" % output)
3540 class LUClusterVerifyDisks(NoHooksLU):
3541 """Verifies the cluster disks status.
3546 def ExpandNames(self):
3547 self.share_locks = _ShareAll()
3548 self.needed_locks = {
3549 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3552 def Exec(self, feedback_fn):
3553 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3555 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3556 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3557 for group in group_names])
3560 class LUGroupVerifyDisks(NoHooksLU):
3561 """Verifies the status of all disks in a node group.
3566 def ExpandNames(self):
3567 # Raises errors.OpPrereqError on its own if group can't be found
3568 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3570 self.share_locks = _ShareAll()
3571 self.needed_locks = {
3572 locking.LEVEL_INSTANCE: [],
3573 locking.LEVEL_NODEGROUP: [],
3574 locking.LEVEL_NODE: [],
3577 def DeclareLocks(self, level):
3578 if level == locking.LEVEL_INSTANCE:
3579 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3581 # Lock instances optimistically, needs verification once node and group
3582 # locks have been acquired
3583 self.needed_locks[locking.LEVEL_INSTANCE] = \
3584 self.cfg.GetNodeGroupInstances(self.group_uuid)
3586 elif level == locking.LEVEL_NODEGROUP:
3587 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3589 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3590 set([self.group_uuid] +
3591 # Lock all groups used by instances optimistically; this requires
3592 # going via the node before it's locked, requiring verification
3595 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3596 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3598 elif level == locking.LEVEL_NODE:
3599 # This will only lock the nodes in the group to be verified which contain
3601 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3602 self._LockInstancesNodes()
3604 # Lock all nodes in group to be verified
3605 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3606 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3607 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3609 def CheckPrereq(self):
3610 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3611 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3612 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3614 assert self.group_uuid in owned_groups
3616 # Check if locked instances are still correct
3617 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3619 # Get instance information
3620 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3622 # Check if node groups for locked instances are still correct
3623 _CheckInstancesNodeGroups(self.cfg, self.instances,
3624 owned_groups, owned_nodes, self.group_uuid)
3626 def Exec(self, feedback_fn):
3627 """Verify integrity of cluster disks.
3629 @rtype: tuple of three items
3630 @return: a tuple of (dict of node-to-node_error, list of instances
3631 which need activate-disks, dict of instance: (node, volume) for
3636 res_instances = set()
3639 nv_dict = _MapInstanceDisksToNodes(
3640 [inst for inst in self.instances.values()
3641 if inst.admin_state == constants.ADMINST_UP])
3644 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3645 set(self.cfg.GetVmCapableNodeList()))
3647 node_lvs = self.rpc.call_lv_list(nodes, [])
3649 for (node, node_res) in node_lvs.items():
3650 if node_res.offline:
3653 msg = node_res.fail_msg
3655 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3656 res_nodes[node] = msg
3659 for lv_name, (_, _, lv_online) in node_res.payload.items():
3660 inst = nv_dict.pop((node, lv_name), None)
3661 if not (lv_online or inst is None):
3662 res_instances.add(inst)
3664 # any leftover items in nv_dict are missing LVs, let's arrange the data
3666 for key, inst in nv_dict.iteritems():
3667 res_missing.setdefault(inst, []).append(list(key))
3669 return (res_nodes, list(res_instances), res_missing)
3672 class LUClusterRepairDiskSizes(NoHooksLU):
3673 """Verifies the cluster disks sizes.
3678 def ExpandNames(self):
3679 if self.op.instances:
3680 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3681 self.needed_locks = {
3682 locking.LEVEL_NODE_RES: [],
3683 locking.LEVEL_INSTANCE: self.wanted_names,
3685 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3687 self.wanted_names = None
3688 self.needed_locks = {
3689 locking.LEVEL_NODE_RES: locking.ALL_SET,
3690 locking.LEVEL_INSTANCE: locking.ALL_SET,
3692 self.share_locks = {
3693 locking.LEVEL_NODE_RES: 1,
3694 locking.LEVEL_INSTANCE: 0,
3697 def DeclareLocks(self, level):
3698 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3699 self._LockInstancesNodes(primary_only=True, level=level)
3701 def CheckPrereq(self):
3702 """Check prerequisites.
3704 This only checks the optional instance list against the existing names.
3707 if self.wanted_names is None:
3708 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3710 self.wanted_instances = \
3711 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3713 def _EnsureChildSizes(self, disk):
3714 """Ensure children of the disk have the needed disk size.
3716 This is valid mainly for DRBD8 and fixes an issue where the
3717 children have smaller disk size.
3719 @param disk: an L{ganeti.objects.Disk} object
3722 if disk.dev_type == constants.LD_DRBD8:
3723 assert disk.children, "Empty children for DRBD8?"
3724 fchild = disk.children[0]
3725 mismatch = fchild.size < disk.size
3727 self.LogInfo("Child disk has size %d, parent %d, fixing",
3728 fchild.size, disk.size)
3729 fchild.size = disk.size
3731 # and we recurse on this child only, not on the metadev
3732 return self._EnsureChildSizes(fchild) or mismatch
3736 def Exec(self, feedback_fn):
3737 """Verify the size of cluster disks.
3740 # TODO: check child disks too
3741 # TODO: check differences in size between primary/secondary nodes
3743 for instance in self.wanted_instances:
3744 pnode = instance.primary_node
3745 if pnode not in per_node_disks:
3746 per_node_disks[pnode] = []
3747 for idx, disk in enumerate(instance.disks):
3748 per_node_disks[pnode].append((instance, idx, disk))
3750 assert not (frozenset(per_node_disks.keys()) -
3751 self.owned_locks(locking.LEVEL_NODE_RES)), \
3752 "Not owning correct locks"
3753 assert not self.owned_locks(locking.LEVEL_NODE)
3756 for node, dskl in per_node_disks.items():
3757 newl = [v[2].Copy() for v in dskl]
3759 self.cfg.SetDiskID(dsk, node)
3760 result = self.rpc.call_blockdev_getsize(node, newl)
3762 self.LogWarning("Failure in blockdev_getsize call to node"
3763 " %s, ignoring", node)
3765 if len(result.payload) != len(dskl):
3766 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3767 " result.payload=%s", node, len(dskl), result.payload)
3768 self.LogWarning("Invalid result from node %s, ignoring node results",
3771 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3773 self.LogWarning("Disk %d of instance %s did not return size"
3774 " information, ignoring", idx, instance.name)
3776 if not isinstance(size, (int, long)):
3777 self.LogWarning("Disk %d of instance %s did not return valid"
3778 " size information, ignoring", idx, instance.name)
3781 if size != disk.size:
3782 self.LogInfo("Disk %d of instance %s has mismatched size,"
3783 " correcting: recorded %d, actual %d", idx,
3784 instance.name, disk.size, size)
3786 self.cfg.Update(instance, feedback_fn)
3787 changed.append((instance.name, idx, size))
3788 if self._EnsureChildSizes(disk):
3789 self.cfg.Update(instance, feedback_fn)
3790 changed.append((instance.name, idx, disk.size))
3794 class LUClusterRename(LogicalUnit):
3795 """Rename the cluster.
3798 HPATH = "cluster-rename"
3799 HTYPE = constants.HTYPE_CLUSTER
3801 def BuildHooksEnv(self):
3806 "OP_TARGET": self.cfg.GetClusterName(),
3807 "NEW_NAME": self.op.name,
3810 def BuildHooksNodes(self):
3811 """Build hooks nodes.
3814 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3816 def CheckPrereq(self):
3817 """Verify that the passed name is a valid one.
3820 hostname = netutils.GetHostname(name=self.op.name,
3821 family=self.cfg.GetPrimaryIPFamily())
3823 new_name = hostname.name
3824 self.ip = new_ip = hostname.ip
3825 old_name = self.cfg.GetClusterName()
3826 old_ip = self.cfg.GetMasterIP()
3827 if new_name == old_name and new_ip == old_ip:
3828 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3829 " cluster has changed",
3831 if new_ip != old_ip:
3832 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3833 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3834 " reachable on the network" %
3835 new_ip, errors.ECODE_NOTUNIQUE)
3837 self.op.name = new_name
3839 def Exec(self, feedback_fn):
3840 """Rename the cluster.
3843 clustername = self.op.name
3846 # shutdown the master IP
3847 master_params = self.cfg.GetMasterNetworkParameters()
3848 ems = self.cfg.GetUseExternalMipScript()
3849 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3851 result.Raise("Could not disable the master role")
3854 cluster = self.cfg.GetClusterInfo()
3855 cluster.cluster_name = clustername
3856 cluster.master_ip = new_ip
3857 self.cfg.Update(cluster, feedback_fn)
3859 # update the known hosts file
3860 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3861 node_list = self.cfg.GetOnlineNodeList()
3863 node_list.remove(master_params.name)
3866 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3868 master_params.ip = new_ip
3869 result = self.rpc.call_node_activate_master_ip(master_params.name,
3871 msg = result.fail_msg
3873 self.LogWarning("Could not re-enable the master role on"
3874 " the master, please restart manually: %s", msg)
3879 def _ValidateNetmask(cfg, netmask):
3880 """Checks if a netmask is valid.
3882 @type cfg: L{config.ConfigWriter}
3883 @param cfg: The cluster configuration
3885 @param netmask: the netmask to be verified
3886 @raise errors.OpPrereqError: if the validation fails
3889 ip_family = cfg.GetPrimaryIPFamily()
3891 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3892 except errors.ProgrammerError:
3893 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3894 ip_family, errors.ECODE_INVAL)
3895 if not ipcls.ValidateNetmask(netmask):
3896 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3897 (netmask), errors.ECODE_INVAL)
3900 class LUClusterSetParams(LogicalUnit):
3901 """Change the parameters of the cluster.
3904 HPATH = "cluster-modify"
3905 HTYPE = constants.HTYPE_CLUSTER
3908 def CheckArguments(self):
3912 if self.op.uid_pool:
3913 uidpool.CheckUidPool(self.op.uid_pool)
3915 if self.op.add_uids:
3916 uidpool.CheckUidPool(self.op.add_uids)
3918 if self.op.remove_uids:
3919 uidpool.CheckUidPool(self.op.remove_uids)
3921 if self.op.master_netmask is not None:
3922 _ValidateNetmask(self.cfg, self.op.master_netmask)
3924 if self.op.diskparams:
3925 for dt_params in self.op.diskparams.values():
3926 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3928 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3929 except errors.OpPrereqError, err:
3930 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3933 def ExpandNames(self):
3934 # FIXME: in the future maybe other cluster params won't require checking on
3935 # all nodes to be modified.
3936 self.needed_locks = {
3937 locking.LEVEL_NODE: locking.ALL_SET,
3938 locking.LEVEL_INSTANCE: locking.ALL_SET,
3939 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3941 self.share_locks = {
3942 locking.LEVEL_NODE: 1,
3943 locking.LEVEL_INSTANCE: 1,
3944 locking.LEVEL_NODEGROUP: 1,
3947 def BuildHooksEnv(self):
3952 "OP_TARGET": self.cfg.GetClusterName(),
3953 "NEW_VG_NAME": self.op.vg_name,
3956 def BuildHooksNodes(self):
3957 """Build hooks nodes.
3960 mn = self.cfg.GetMasterNode()
3963 def CheckPrereq(self):
3964 """Check prerequisites.
3966 This checks whether the given params don't conflict and
3967 if the given volume group is valid.
3970 if self.op.vg_name is not None and not self.op.vg_name:
3971 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3972 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3973 " instances exist", errors.ECODE_INVAL)
3975 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3976 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3977 raise errors.OpPrereqError("Cannot disable drbd helper while"
3978 " drbd-based instances exist",
3981 node_list = self.owned_locks(locking.LEVEL_NODE)
3983 # if vg_name not None, checks given volume group on all nodes
3985 vglist = self.rpc.call_vg_list(node_list)
3986 for node in node_list:
3987 msg = vglist[node].fail_msg
3989 # ignoring down node
3990 self.LogWarning("Error while gathering data on node %s"
3991 " (ignoring node): %s", node, msg)
3993 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3995 constants.MIN_VG_SIZE)
3997 raise errors.OpPrereqError("Error on node '%s': %s" %
3998 (node, vgstatus), errors.ECODE_ENVIRON)
4000 if self.op.drbd_helper:
4001 # checks given drbd helper on all nodes
4002 helpers = self.rpc.call_drbd_helper(node_list)
4003 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4005 self.LogInfo("Not checking drbd helper on offline node %s", node)
4007 msg = helpers[node].fail_msg
4009 raise errors.OpPrereqError("Error checking drbd helper on node"
4010 " '%s': %s" % (node, msg),
4011 errors.ECODE_ENVIRON)
4012 node_helper = helpers[node].payload
4013 if node_helper != self.op.drbd_helper:
4014 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4015 (node, node_helper), errors.ECODE_ENVIRON)
4017 self.cluster = cluster = self.cfg.GetClusterInfo()
4018 # validate params changes
4019 if self.op.beparams:
4020 objects.UpgradeBeParams(self.op.beparams)
4021 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4022 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4024 if self.op.ndparams:
4025 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4026 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4028 # TODO: we need a more general way to handle resetting
4029 # cluster-level parameters to default values
4030 if self.new_ndparams["oob_program"] == "":
4031 self.new_ndparams["oob_program"] = \
4032 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4034 if self.op.hv_state:
4035 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4036 self.cluster.hv_state_static)
4037 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4038 for hv, values in new_hv_state.items())
4040 if self.op.disk_state:
4041 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4042 self.cluster.disk_state_static)
4043 self.new_disk_state = \
4044 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4045 for name, values in svalues.items()))
4046 for storage, svalues in new_disk_state.items())
4049 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4052 all_instances = self.cfg.GetAllInstancesInfo().values()
4054 for group in self.cfg.GetAllNodeGroupsInfo().values():
4055 instances = frozenset([inst for inst in all_instances
4056 if compat.any(node in group.members
4057 for node in inst.all_nodes)])
4058 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4059 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4060 new = _ComputeNewInstanceViolations(ipol,
4061 new_ipolicy, instances)
4063 violations.update(new)
4066 self.LogWarning("After the ipolicy change the following instances"
4067 " violate them: %s",
4068 utils.CommaJoin(utils.NiceSort(violations)))
4070 if self.op.nicparams:
4071 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4072 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4073 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4076 # check all instances for consistency
4077 for instance in self.cfg.GetAllInstancesInfo().values():
4078 for nic_idx, nic in enumerate(instance.nics):
4079 params_copy = copy.deepcopy(nic.nicparams)
4080 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4082 # check parameter syntax
4084 objects.NIC.CheckParameterSyntax(params_filled)
4085 except errors.ConfigurationError, err:
4086 nic_errors.append("Instance %s, nic/%d: %s" %
4087 (instance.name, nic_idx, err))
4089 # if we're moving instances to routed, check that they have an ip
4090 target_mode = params_filled[constants.NIC_MODE]
4091 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4092 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4093 " address" % (instance.name, nic_idx))
4095 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4096 "\n".join(nic_errors), errors.ECODE_INVAL)
4098 # hypervisor list/parameters
4099 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4100 if self.op.hvparams:
4101 for hv_name, hv_dict in self.op.hvparams.items():
4102 if hv_name not in self.new_hvparams:
4103 self.new_hvparams[hv_name] = hv_dict
4105 self.new_hvparams[hv_name].update(hv_dict)
4107 # disk template parameters
4108 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4109 if self.op.diskparams:
4110 for dt_name, dt_params in self.op.diskparams.items():
4111 if dt_name not in self.op.diskparams:
4112 self.new_diskparams[dt_name] = dt_params
4114 self.new_diskparams[dt_name].update(dt_params)
4116 # os hypervisor parameters
4117 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4119 for os_name, hvs in self.op.os_hvp.items():
4120 if os_name not in self.new_os_hvp:
4121 self.new_os_hvp[os_name] = hvs
4123 for hv_name, hv_dict in hvs.items():
4124 if hv_name not in self.new_os_hvp[os_name]:
4125 self.new_os_hvp[os_name][hv_name] = hv_dict
4127 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4130 self.new_osp = objects.FillDict(cluster.osparams, {})
4131 if self.op.osparams:
4132 for os_name, osp in self.op.osparams.items():
4133 if os_name not in self.new_osp:
4134 self.new_osp[os_name] = {}
4136 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4139 if not self.new_osp[os_name]:
4140 # we removed all parameters
4141 del self.new_osp[os_name]
4143 # check the parameter validity (remote check)
4144 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4145 os_name, self.new_osp[os_name])
4147 # changes to the hypervisor list
4148 if self.op.enabled_hypervisors is not None:
4149 self.hv_list = self.op.enabled_hypervisors
4150 for hv in self.hv_list:
4151 # if the hypervisor doesn't already exist in the cluster
4152 # hvparams, we initialize it to empty, and then (in both
4153 # cases) we make sure to fill the defaults, as we might not
4154 # have a complete defaults list if the hypervisor wasn't
4156 if hv not in new_hvp:
4158 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4159 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4161 self.hv_list = cluster.enabled_hypervisors
4163 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4164 # either the enabled list has changed, or the parameters have, validate
4165 for hv_name, hv_params in self.new_hvparams.items():
4166 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4167 (self.op.enabled_hypervisors and
4168 hv_name in self.op.enabled_hypervisors)):
4169 # either this is a new hypervisor, or its parameters have changed
4170 hv_class = hypervisor.GetHypervisor(hv_name)
4171 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4172 hv_class.CheckParameterSyntax(hv_params)
4173 _CheckHVParams(self, node_list, hv_name, hv_params)
4176 # no need to check any newly-enabled hypervisors, since the
4177 # defaults have already been checked in the above code-block
4178 for os_name, os_hvp in self.new_os_hvp.items():
4179 for hv_name, hv_params in os_hvp.items():
4180 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4181 # we need to fill in the new os_hvp on top of the actual hv_p
4182 cluster_defaults = self.new_hvparams.get(hv_name, {})
4183 new_osp = objects.FillDict(cluster_defaults, hv_params)
4184 hv_class = hypervisor.GetHypervisor(hv_name)
4185 hv_class.CheckParameterSyntax(new_osp)
4186 _CheckHVParams(self, node_list, hv_name, new_osp)
4188 if self.op.default_iallocator:
4189 alloc_script = utils.FindFile(self.op.default_iallocator,
4190 constants.IALLOCATOR_SEARCH_PATH,
4192 if alloc_script is None:
4193 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4194 " specified" % self.op.default_iallocator,
4197 def Exec(self, feedback_fn):
4198 """Change the parameters of the cluster.
4201 if self.op.vg_name is not None:
4202 new_volume = self.op.vg_name
4205 if new_volume != self.cfg.GetVGName():
4206 self.cfg.SetVGName(new_volume)
4208 feedback_fn("Cluster LVM configuration already in desired"
4209 " state, not changing")
4210 if self.op.drbd_helper is not None:
4211 new_helper = self.op.drbd_helper
4214 if new_helper != self.cfg.GetDRBDHelper():
4215 self.cfg.SetDRBDHelper(new_helper)
4217 feedback_fn("Cluster DRBD helper already in desired state,"
4219 if self.op.hvparams:
4220 self.cluster.hvparams = self.new_hvparams
4222 self.cluster.os_hvp = self.new_os_hvp
4223 if self.op.enabled_hypervisors is not None:
4224 self.cluster.hvparams = self.new_hvparams
4225 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4226 if self.op.beparams:
4227 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4228 if self.op.nicparams:
4229 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4231 self.cluster.ipolicy = self.new_ipolicy
4232 if self.op.osparams:
4233 self.cluster.osparams = self.new_osp
4234 if self.op.ndparams:
4235 self.cluster.ndparams = self.new_ndparams
4236 if self.op.diskparams:
4237 self.cluster.diskparams = self.new_diskparams
4238 if self.op.hv_state:
4239 self.cluster.hv_state_static = self.new_hv_state
4240 if self.op.disk_state:
4241 self.cluster.disk_state_static = self.new_disk_state
4243 if self.op.candidate_pool_size is not None:
4244 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4245 # we need to update the pool size here, otherwise the save will fail
4246 _AdjustCandidatePool(self, [])
4248 if self.op.maintain_node_health is not None:
4249 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4250 feedback_fn("Note: CONFD was disabled at build time, node health"
4251 " maintenance is not useful (still enabling it)")
4252 self.cluster.maintain_node_health = self.op.maintain_node_health
4254 if self.op.prealloc_wipe_disks is not None:
4255 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4257 if self.op.add_uids is not None:
4258 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4260 if self.op.remove_uids is not None:
4261 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4263 if self.op.uid_pool is not None:
4264 self.cluster.uid_pool = self.op.uid_pool
4266 if self.op.default_iallocator is not None:
4267 self.cluster.default_iallocator = self.op.default_iallocator
4269 if self.op.reserved_lvs is not None:
4270 self.cluster.reserved_lvs = self.op.reserved_lvs
4272 if self.op.use_external_mip_script is not None:
4273 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4275 def helper_os(aname, mods, desc):
4277 lst = getattr(self.cluster, aname)
4278 for key, val in mods:
4279 if key == constants.DDM_ADD:
4281 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4284 elif key == constants.DDM_REMOVE:
4288 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4290 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4292 if self.op.hidden_os:
4293 helper_os("hidden_os", self.op.hidden_os, "hidden")
4295 if self.op.blacklisted_os:
4296 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4298 if self.op.master_netdev:
4299 master_params = self.cfg.GetMasterNetworkParameters()
4300 ems = self.cfg.GetUseExternalMipScript()
4301 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4302 self.cluster.master_netdev)
4303 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4305 result.Raise("Could not disable the master ip")
4306 feedback_fn("Changing master_netdev from %s to %s" %
4307 (master_params.netdev, self.op.master_netdev))
4308 self.cluster.master_netdev = self.op.master_netdev
4310 if self.op.master_netmask:
4311 master_params = self.cfg.GetMasterNetworkParameters()
4312 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4313 result = self.rpc.call_node_change_master_netmask(master_params.name,
4314 master_params.netmask,
4315 self.op.master_netmask,
4317 master_params.netdev)
4319 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4322 self.cluster.master_netmask = self.op.master_netmask
4324 self.cfg.Update(self.cluster, feedback_fn)
4326 if self.op.master_netdev:
4327 master_params = self.cfg.GetMasterNetworkParameters()
4328 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4329 self.op.master_netdev)
4330 ems = self.cfg.GetUseExternalMipScript()
4331 result = self.rpc.call_node_activate_master_ip(master_params.name,
4334 self.LogWarning("Could not re-enable the master ip on"
4335 " the master, please restart manually: %s",
4339 def _UploadHelper(lu, nodes, fname):
4340 """Helper for uploading a file and showing warnings.
4343 if os.path.exists(fname):
4344 result = lu.rpc.call_upload_file(nodes, fname)
4345 for to_node, to_result in result.items():
4346 msg = to_result.fail_msg
4348 msg = ("Copy of file %s to node %s failed: %s" %
4349 (fname, to_node, msg))
4350 lu.proc.LogWarning(msg)
4353 def _ComputeAncillaryFiles(cluster, redist):
4354 """Compute files external to Ganeti which need to be consistent.
4356 @type redist: boolean
4357 @param redist: Whether to include files which need to be redistributed
4360 # Compute files for all nodes
4362 pathutils.SSH_KNOWN_HOSTS_FILE,
4363 pathutils.CONFD_HMAC_KEY,
4364 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4365 pathutils.SPICE_CERT_FILE,
4366 pathutils.SPICE_CACERT_FILE,
4367 pathutils.RAPI_USERS_FILE,
4371 # we need to ship at least the RAPI certificate
4372 files_all.add(pathutils.RAPI_CERT_FILE)
4374 files_all.update(pathutils.ALL_CERT_FILES)
4375 files_all.update(ssconf.SimpleStore().GetFileList())
4377 if cluster.modify_etc_hosts:
4378 files_all.add(pathutils.ETC_HOSTS)
4380 if cluster.use_external_mip_script:
4381 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4383 # Files which are optional, these must:
4384 # - be present in one other category as well
4385 # - either exist or not exist on all nodes of that category (mc, vm all)
4387 pathutils.RAPI_USERS_FILE,
4390 # Files which should only be on master candidates
4394 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4398 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4399 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4400 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4402 # Files which should only be on VM-capable nodes
4405 for hv_name in cluster.enabled_hypervisors
4406 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4410 for hv_name in cluster.enabled_hypervisors
4411 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4413 # Filenames in each category must be unique
4414 all_files_set = files_all | files_mc | files_vm
4415 assert (len(all_files_set) ==
4416 sum(map(len, [files_all, files_mc, files_vm]))), \
4417 "Found file listed in more than one file list"
4419 # Optional files must be present in one other category
4420 assert all_files_set.issuperset(files_opt), \
4421 "Optional file not in a different required list"
4423 # This one file should never ever be re-distributed via RPC
4424 assert not (redist and
4425 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4427 return (files_all, files_opt, files_mc, files_vm)
4430 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4431 """Distribute additional files which are part of the cluster configuration.
4433 ConfigWriter takes care of distributing the config and ssconf files, but
4434 there are more files which should be distributed to all nodes. This function
4435 makes sure those are copied.
4437 @param lu: calling logical unit
4438 @param additional_nodes: list of nodes not in the config to distribute to
4439 @type additional_vm: boolean
4440 @param additional_vm: whether the additional nodes are vm-capable or not
4443 # Gather target nodes
4444 cluster = lu.cfg.GetClusterInfo()
4445 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4447 online_nodes = lu.cfg.GetOnlineNodeList()
4448 online_set = frozenset(online_nodes)
4449 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4451 if additional_nodes is not None:
4452 online_nodes.extend(additional_nodes)
4454 vm_nodes.extend(additional_nodes)
4456 # Never distribute to master node
4457 for nodelist in [online_nodes, vm_nodes]:
4458 if master_info.name in nodelist:
4459 nodelist.remove(master_info.name)
4462 (files_all, _, files_mc, files_vm) = \
4463 _ComputeAncillaryFiles(cluster, True)
4465 # Never re-distribute configuration file from here
4466 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4467 pathutils.CLUSTER_CONF_FILE in files_vm)
4468 assert not files_mc, "Master candidates not handled in this function"
4471 (online_nodes, files_all),
4472 (vm_nodes, files_vm),
4476 for (node_list, files) in filemap:
4478 _UploadHelper(lu, node_list, fname)
4481 class LUClusterRedistConf(NoHooksLU):
4482 """Force the redistribution of cluster configuration.
4484 This is a very simple LU.
4489 def ExpandNames(self):
4490 self.needed_locks = {
4491 locking.LEVEL_NODE: locking.ALL_SET,
4493 self.share_locks[locking.LEVEL_NODE] = 1
4495 def Exec(self, feedback_fn):
4496 """Redistribute the configuration.
4499 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4500 _RedistributeAncillaryFiles(self)
4503 class LUClusterActivateMasterIp(NoHooksLU):
4504 """Activate the master IP on the master node.
4507 def Exec(self, feedback_fn):
4508 """Activate the master IP.
4511 master_params = self.cfg.GetMasterNetworkParameters()
4512 ems = self.cfg.GetUseExternalMipScript()
4513 result = self.rpc.call_node_activate_master_ip(master_params.name,
4515 result.Raise("Could not activate the master IP")
4518 class LUClusterDeactivateMasterIp(NoHooksLU):
4519 """Deactivate the master IP on the master node.
4522 def Exec(self, feedback_fn):
4523 """Deactivate the master IP.
4526 master_params = self.cfg.GetMasterNetworkParameters()
4527 ems = self.cfg.GetUseExternalMipScript()
4528 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4530 result.Raise("Could not deactivate the master IP")
4533 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4534 """Sleep and poll for an instance's disk to sync.
4537 if not instance.disks or disks is not None and not disks:
4540 disks = _ExpandCheckDisks(instance, disks)
4543 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4545 node = instance.primary_node
4548 lu.cfg.SetDiskID(dev, node)
4550 # TODO: Convert to utils.Retry
4553 degr_retries = 10 # in seconds, as we sleep 1 second each time
4557 cumul_degraded = False
4558 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4559 msg = rstats.fail_msg
4561 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4564 raise errors.RemoteError("Can't contact node %s for mirror data,"
4565 " aborting." % node)
4568 rstats = rstats.payload
4570 for i, mstat in enumerate(rstats):
4572 lu.LogWarning("Can't compute data for node %s/%s",
4573 node, disks[i].iv_name)
4576 cumul_degraded = (cumul_degraded or
4577 (mstat.is_degraded and mstat.sync_percent is None))
4578 if mstat.sync_percent is not None:
4580 if mstat.estimated_time is not None:
4581 rem_time = ("%s remaining (estimated)" %
4582 utils.FormatSeconds(mstat.estimated_time))
4583 max_time = mstat.estimated_time
4585 rem_time = "no time estimate"
4586 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4587 (disks[i].iv_name, mstat.sync_percent, rem_time))
4589 # if we're done but degraded, let's do a few small retries, to
4590 # make sure we see a stable and not transient situation; therefore
4591 # we force restart of the loop
4592 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4593 logging.info("Degraded disks found, %d retries left", degr_retries)
4601 time.sleep(min(60, max_time))
4604 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4605 return not cumul_degraded
4608 def _BlockdevFind(lu, node, dev, instance):
4609 """Wrapper around call_blockdev_find to annotate diskparams.
4611 @param lu: A reference to the lu object
4612 @param node: The node to call out
4613 @param dev: The device to find
4614 @param instance: The instance object the device belongs to
4615 @returns The result of the rpc call
4618 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4619 return lu.rpc.call_blockdev_find(node, disk)
4622 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4623 """Wrapper around L{_CheckDiskConsistencyInner}.
4626 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4627 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4631 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4633 """Check that mirrors are not degraded.
4635 @attention: The device has to be annotated already.
4637 The ldisk parameter, if True, will change the test from the
4638 is_degraded attribute (which represents overall non-ok status for
4639 the device(s)) to the ldisk (representing the local storage status).
4642 lu.cfg.SetDiskID(dev, node)
4646 if on_primary or dev.AssembleOnSecondary():
4647 rstats = lu.rpc.call_blockdev_find(node, dev)
4648 msg = rstats.fail_msg
4650 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4652 elif not rstats.payload:
4653 lu.LogWarning("Can't find disk on node %s", node)
4657 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4659 result = result and not rstats.payload.is_degraded
4662 for child in dev.children:
4663 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4669 class LUOobCommand(NoHooksLU):
4670 """Logical unit for OOB handling.
4674 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4676 def ExpandNames(self):
4677 """Gather locks we need.
4680 if self.op.node_names:
4681 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4682 lock_names = self.op.node_names
4684 lock_names = locking.ALL_SET
4686 self.needed_locks = {
4687 locking.LEVEL_NODE: lock_names,
4690 def CheckPrereq(self):
4691 """Check prerequisites.
4694 - the node exists in the configuration
4697 Any errors are signaled by raising errors.OpPrereqError.
4701 self.master_node = self.cfg.GetMasterNode()
4703 assert self.op.power_delay >= 0.0
4705 if self.op.node_names:
4706 if (self.op.command in self._SKIP_MASTER and
4707 self.master_node in self.op.node_names):
4708 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4709 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4711 if master_oob_handler:
4712 additional_text = ("run '%s %s %s' if you want to operate on the"
4713 " master regardless") % (master_oob_handler,
4717 additional_text = "it does not support out-of-band operations"
4719 raise errors.OpPrereqError(("Operating on the master node %s is not"
4720 " allowed for %s; %s") %
4721 (self.master_node, self.op.command,
4722 additional_text), errors.ECODE_INVAL)
4724 self.op.node_names = self.cfg.GetNodeList()
4725 if self.op.command in self._SKIP_MASTER:
4726 self.op.node_names.remove(self.master_node)
4728 if self.op.command in self._SKIP_MASTER:
4729 assert self.master_node not in self.op.node_names
4731 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4733 raise errors.OpPrereqError("Node %s not found" % node_name,
4736 self.nodes.append(node)
4738 if (not self.op.ignore_status and
4739 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4740 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4741 " not marked offline") % node_name,
4744 def Exec(self, feedback_fn):
4745 """Execute OOB and return result if we expect any.
4748 master_node = self.master_node
4751 for idx, node in enumerate(utils.NiceSort(self.nodes,
4752 key=lambda node: node.name)):
4753 node_entry = [(constants.RS_NORMAL, node.name)]
4754 ret.append(node_entry)
4756 oob_program = _SupportsOob(self.cfg, node)
4759 node_entry.append((constants.RS_UNAVAIL, None))
4762 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4763 self.op.command, oob_program, node.name)
4764 result = self.rpc.call_run_oob(master_node, oob_program,
4765 self.op.command, node.name,
4769 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4770 node.name, result.fail_msg)
4771 node_entry.append((constants.RS_NODATA, None))
4774 self._CheckPayload(result)
4775 except errors.OpExecError, err:
4776 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4778 node_entry.append((constants.RS_NODATA, None))
4780 if self.op.command == constants.OOB_HEALTH:
4781 # For health we should log important events
4782 for item, status in result.payload:
4783 if status in [constants.OOB_STATUS_WARNING,
4784 constants.OOB_STATUS_CRITICAL]:
4785 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4786 item, node.name, status)
4788 if self.op.command == constants.OOB_POWER_ON:
4790 elif self.op.command == constants.OOB_POWER_OFF:
4791 node.powered = False
4792 elif self.op.command == constants.OOB_POWER_STATUS:
4793 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4794 if powered != node.powered:
4795 logging.warning(("Recorded power state (%s) of node '%s' does not"
4796 " match actual power state (%s)"), node.powered,
4799 # For configuration changing commands we should update the node
4800 if self.op.command in (constants.OOB_POWER_ON,
4801 constants.OOB_POWER_OFF):
4802 self.cfg.Update(node, feedback_fn)
4804 node_entry.append((constants.RS_NORMAL, result.payload))
4806 if (self.op.command == constants.OOB_POWER_ON and
4807 idx < len(self.nodes) - 1):
4808 time.sleep(self.op.power_delay)
4812 def _CheckPayload(self, result):
4813 """Checks if the payload is valid.
4815 @param result: RPC result
4816 @raises errors.OpExecError: If payload is not valid
4820 if self.op.command == constants.OOB_HEALTH:
4821 if not isinstance(result.payload, list):
4822 errs.append("command 'health' is expected to return a list but got %s" %
4823 type(result.payload))
4825 for item, status in result.payload:
4826 if status not in constants.OOB_STATUSES:
4827 errs.append("health item '%s' has invalid status '%s'" %
4830 if self.op.command == constants.OOB_POWER_STATUS:
4831 if not isinstance(result.payload, dict):
4832 errs.append("power-status is expected to return a dict but got %s" %
4833 type(result.payload))
4835 if self.op.command in [
4836 constants.OOB_POWER_ON,
4837 constants.OOB_POWER_OFF,
4838 constants.OOB_POWER_CYCLE,
4840 if result.payload is not None:
4841 errs.append("%s is expected to not return payload but got '%s'" %
4842 (self.op.command, result.payload))
4845 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4846 utils.CommaJoin(errs))
4849 class _OsQuery(_QueryBase):
4850 FIELDS = query.OS_FIELDS
4852 def ExpandNames(self, lu):
4853 # Lock all nodes in shared mode
4854 # Temporary removal of locks, should be reverted later
4855 # TODO: reintroduce locks when they are lighter-weight
4856 lu.needed_locks = {}
4857 #self.share_locks[locking.LEVEL_NODE] = 1
4858 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4860 # The following variables interact with _QueryBase._GetNames
4862 self.wanted = self.names
4864 self.wanted = locking.ALL_SET
4866 self.do_locking = self.use_locking
4868 def DeclareLocks(self, lu, level):
4872 def _DiagnoseByOS(rlist):
4873 """Remaps a per-node return list into an a per-os per-node dictionary
4875 @param rlist: a map with node names as keys and OS objects as values
4878 @return: a dictionary with osnames as keys and as value another
4879 map, with nodes as keys and tuples of (path, status, diagnose,
4880 variants, parameters, api_versions) as values, eg::
4882 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4883 (/srv/..., False, "invalid api")],
4884 "node2": [(/srv/..., True, "", [], [])]}
4889 # we build here the list of nodes that didn't fail the RPC (at RPC
4890 # level), so that nodes with a non-responding node daemon don't
4891 # make all OSes invalid
4892 good_nodes = [node_name for node_name in rlist
4893 if not rlist[node_name].fail_msg]
4894 for node_name, nr in rlist.items():
4895 if nr.fail_msg or not nr.payload:
4897 for (name, path, status, diagnose, variants,
4898 params, api_versions) in nr.payload:
4899 if name not in all_os:
4900 # build a list of nodes for this os containing empty lists
4901 # for each node in node_list
4903 for nname in good_nodes:
4904 all_os[name][nname] = []
4905 # convert params from [name, help] to (name, help)
4906 params = [tuple(v) for v in params]
4907 all_os[name][node_name].append((path, status, diagnose,
4908 variants, params, api_versions))
4911 def _GetQueryData(self, lu):
4912 """Computes the list of nodes and their attributes.
4915 # Locking is not used
4916 assert not (compat.any(lu.glm.is_owned(level)
4917 for level in locking.LEVELS
4918 if level != locking.LEVEL_CLUSTER) or
4919 self.do_locking or self.use_locking)
4921 valid_nodes = [node.name
4922 for node in lu.cfg.GetAllNodesInfo().values()
4923 if not node.offline and node.vm_capable]
4924 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4925 cluster = lu.cfg.GetClusterInfo()
4929 for (os_name, os_data) in pol.items():
4930 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4931 hidden=(os_name in cluster.hidden_os),
4932 blacklisted=(os_name in cluster.blacklisted_os))
4936 api_versions = set()
4938 for idx, osl in enumerate(os_data.values()):
4939 info.valid = bool(info.valid and osl and osl[0][1])
4943 (node_variants, node_params, node_api) = osl[0][3:6]
4946 variants.update(node_variants)
4947 parameters.update(node_params)
4948 api_versions.update(node_api)
4950 # Filter out inconsistent values
4951 variants.intersection_update(node_variants)
4952 parameters.intersection_update(node_params)
4953 api_versions.intersection_update(node_api)
4955 info.variants = list(variants)
4956 info.parameters = list(parameters)
4957 info.api_versions = list(api_versions)
4959 data[os_name] = info
4961 # Prepare data in requested order
4962 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4966 class LUOsDiagnose(NoHooksLU):
4967 """Logical unit for OS diagnose/query.
4973 def _BuildFilter(fields, names):
4974 """Builds a filter for querying OSes.
4977 name_filter = qlang.MakeSimpleFilter("name", names)
4979 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4980 # respective field is not requested
4981 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4982 for fname in ["hidden", "blacklisted"]
4983 if fname not in fields]
4984 if "valid" not in fields:
4985 status_filter.append([qlang.OP_TRUE, "valid"])
4988 status_filter.insert(0, qlang.OP_AND)
4990 status_filter = None
4992 if name_filter and status_filter:
4993 return [qlang.OP_AND, name_filter, status_filter]
4997 return status_filter
4999 def CheckArguments(self):
5000 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5001 self.op.output_fields, False)
5003 def ExpandNames(self):
5004 self.oq.ExpandNames(self)
5006 def Exec(self, feedback_fn):
5007 return self.oq.OldStyleQuery(self)
5010 class LUNodeRemove(LogicalUnit):
5011 """Logical unit for removing a node.
5014 HPATH = "node-remove"
5015 HTYPE = constants.HTYPE_NODE
5017 def BuildHooksEnv(self):
5022 "OP_TARGET": self.op.node_name,
5023 "NODE_NAME": self.op.node_name,
5026 def BuildHooksNodes(self):
5027 """Build hooks nodes.
5029 This doesn't run on the target node in the pre phase as a failed
5030 node would then be impossible to remove.
5033 all_nodes = self.cfg.GetNodeList()
5035 all_nodes.remove(self.op.node_name)
5038 return (all_nodes, all_nodes)
5040 def CheckPrereq(self):
5041 """Check prerequisites.
5044 - the node exists in the configuration
5045 - it does not have primary or secondary instances
5046 - it's not the master
5048 Any errors are signaled by raising errors.OpPrereqError.
5051 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5052 node = self.cfg.GetNodeInfo(self.op.node_name)
5053 assert node is not None
5055 masternode = self.cfg.GetMasterNode()
5056 if node.name == masternode:
5057 raise errors.OpPrereqError("Node is the master node, failover to another"
5058 " node is required", errors.ECODE_INVAL)
5060 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5061 if node.name in instance.all_nodes:
5062 raise errors.OpPrereqError("Instance %s is still running on the node,"
5063 " please remove first" % instance_name,
5065 self.op.node_name = node.name
5068 def Exec(self, feedback_fn):
5069 """Removes the node from the cluster.
5073 logging.info("Stopping the node daemon and removing configs from node %s",
5076 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5078 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5081 # Promote nodes to master candidate as needed
5082 _AdjustCandidatePool(self, exceptions=[node.name])
5083 self.context.RemoveNode(node.name)
5085 # Run post hooks on the node before it's removed
5086 _RunPostHook(self, node.name)
5088 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5089 msg = result.fail_msg
5091 self.LogWarning("Errors encountered on the remote node while leaving"
5092 " the cluster: %s", msg)
5094 # Remove node from our /etc/hosts
5095 if self.cfg.GetClusterInfo().modify_etc_hosts:
5096 master_node = self.cfg.GetMasterNode()
5097 result = self.rpc.call_etc_hosts_modify(master_node,
5098 constants.ETC_HOSTS_REMOVE,
5100 result.Raise("Can't update hosts file with new host data")
5101 _RedistributeAncillaryFiles(self)
5104 class _NodeQuery(_QueryBase):
5105 FIELDS = query.NODE_FIELDS
5107 def ExpandNames(self, lu):
5108 lu.needed_locks = {}
5109 lu.share_locks = _ShareAll()
5112 self.wanted = _GetWantedNodes(lu, self.names)
5114 self.wanted = locking.ALL_SET
5116 self.do_locking = (self.use_locking and
5117 query.NQ_LIVE in self.requested_data)
5120 # If any non-static field is requested we need to lock the nodes
5121 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5123 def DeclareLocks(self, lu, level):
5126 def _GetQueryData(self, lu):
5127 """Computes the list of nodes and their attributes.
5130 all_info = lu.cfg.GetAllNodesInfo()
5132 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5134 # Gather data as requested
5135 if query.NQ_LIVE in self.requested_data:
5136 # filter out non-vm_capable nodes
5137 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5139 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5140 [lu.cfg.GetHypervisorType()])
5141 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5142 for (name, nresult) in node_data.items()
5143 if not nresult.fail_msg and nresult.payload)
5147 if query.NQ_INST in self.requested_data:
5148 node_to_primary = dict([(name, set()) for name in nodenames])
5149 node_to_secondary = dict([(name, set()) for name in nodenames])
5151 inst_data = lu.cfg.GetAllInstancesInfo()
5153 for inst in inst_data.values():
5154 if inst.primary_node in node_to_primary:
5155 node_to_primary[inst.primary_node].add(inst.name)
5156 for secnode in inst.secondary_nodes:
5157 if secnode in node_to_secondary:
5158 node_to_secondary[secnode].add(inst.name)
5160 node_to_primary = None
5161 node_to_secondary = None
5163 if query.NQ_OOB in self.requested_data:
5164 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5165 for name, node in all_info.iteritems())
5169 if query.NQ_GROUP in self.requested_data:
5170 groups = lu.cfg.GetAllNodeGroupsInfo()
5174 return query.NodeQueryData([all_info[name] for name in nodenames],
5175 live_data, lu.cfg.GetMasterNode(),
5176 node_to_primary, node_to_secondary, groups,
5177 oob_support, lu.cfg.GetClusterInfo())
5180 class LUNodeQuery(NoHooksLU):
5181 """Logical unit for querying nodes.
5184 # pylint: disable=W0142
5187 def CheckArguments(self):
5188 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5189 self.op.output_fields, self.op.use_locking)
5191 def ExpandNames(self):
5192 self.nq.ExpandNames(self)
5194 def DeclareLocks(self, level):
5195 self.nq.DeclareLocks(self, level)
5197 def Exec(self, feedback_fn):
5198 return self.nq.OldStyleQuery(self)
5201 class LUNodeQueryvols(NoHooksLU):
5202 """Logical unit for getting volumes on node(s).
5206 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5207 _FIELDS_STATIC = utils.FieldSet("node")
5209 def CheckArguments(self):
5210 _CheckOutputFields(static=self._FIELDS_STATIC,
5211 dynamic=self._FIELDS_DYNAMIC,
5212 selected=self.op.output_fields)
5214 def ExpandNames(self):
5215 self.share_locks = _ShareAll()
5216 self.needed_locks = {}
5218 if not self.op.nodes:
5219 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5221 self.needed_locks[locking.LEVEL_NODE] = \
5222 _GetWantedNodes(self, self.op.nodes)
5224 def Exec(self, feedback_fn):
5225 """Computes the list of nodes and their attributes.
5228 nodenames = self.owned_locks(locking.LEVEL_NODE)
5229 volumes = self.rpc.call_node_volumes(nodenames)
5231 ilist = self.cfg.GetAllInstancesInfo()
5232 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5235 for node in nodenames:
5236 nresult = volumes[node]
5239 msg = nresult.fail_msg
5241 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5244 node_vols = sorted(nresult.payload,
5245 key=operator.itemgetter("dev"))
5247 for vol in node_vols:
5249 for field in self.op.output_fields:
5252 elif field == "phys":
5256 elif field == "name":
5258 elif field == "size":
5259 val = int(float(vol["size"]))
5260 elif field == "instance":
5261 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5263 raise errors.ParameterError(field)
5264 node_output.append(str(val))
5266 output.append(node_output)
5271 class LUNodeQueryStorage(NoHooksLU):
5272 """Logical unit for getting information on storage units on node(s).
5275 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5278 def CheckArguments(self):
5279 _CheckOutputFields(static=self._FIELDS_STATIC,
5280 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5281 selected=self.op.output_fields)
5283 def ExpandNames(self):
5284 self.share_locks = _ShareAll()
5285 self.needed_locks = {}
5288 self.needed_locks[locking.LEVEL_NODE] = \
5289 _GetWantedNodes(self, self.op.nodes)
5291 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5293 def Exec(self, feedback_fn):
5294 """Computes the list of nodes and their attributes.
5297 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5299 # Always get name to sort by
5300 if constants.SF_NAME in self.op.output_fields:
5301 fields = self.op.output_fields[:]
5303 fields = [constants.SF_NAME] + self.op.output_fields
5305 # Never ask for node or type as it's only known to the LU
5306 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5307 while extra in fields:
5308 fields.remove(extra)
5310 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5311 name_idx = field_idx[constants.SF_NAME]
5313 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5314 data = self.rpc.call_storage_list(self.nodes,
5315 self.op.storage_type, st_args,
5316 self.op.name, fields)
5320 for node in utils.NiceSort(self.nodes):
5321 nresult = data[node]
5325 msg = nresult.fail_msg
5327 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5330 rows = dict([(row[name_idx], row) for row in nresult.payload])
5332 for name in utils.NiceSort(rows.keys()):
5337 for field in self.op.output_fields:
5338 if field == constants.SF_NODE:
5340 elif field == constants.SF_TYPE:
5341 val = self.op.storage_type
5342 elif field in field_idx:
5343 val = row[field_idx[field]]
5345 raise errors.ParameterError(field)
5354 class _InstanceQuery(_QueryBase):
5355 FIELDS = query.INSTANCE_FIELDS
5357 def ExpandNames(self, lu):
5358 lu.needed_locks = {}
5359 lu.share_locks = _ShareAll()
5362 self.wanted = _GetWantedInstances(lu, self.names)
5364 self.wanted = locking.ALL_SET
5366 self.do_locking = (self.use_locking and
5367 query.IQ_LIVE in self.requested_data)
5369 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5370 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5371 lu.needed_locks[locking.LEVEL_NODE] = []
5372 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5374 self.do_grouplocks = (self.do_locking and
5375 query.IQ_NODES in self.requested_data)
5377 def DeclareLocks(self, lu, level):
5379 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5380 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5382 # Lock all groups used by instances optimistically; this requires going
5383 # via the node before it's locked, requiring verification later on
5384 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5386 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5387 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5388 elif level == locking.LEVEL_NODE:
5389 lu._LockInstancesNodes() # pylint: disable=W0212
5392 def _CheckGroupLocks(lu):
5393 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5394 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5396 # Check if node groups for locked instances are still correct
5397 for instance_name in owned_instances:
5398 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5400 def _GetQueryData(self, lu):
5401 """Computes the list of instances and their attributes.
5404 if self.do_grouplocks:
5405 self._CheckGroupLocks(lu)
5407 cluster = lu.cfg.GetClusterInfo()
5408 all_info = lu.cfg.GetAllInstancesInfo()
5410 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5412 instance_list = [all_info[name] for name in instance_names]
5413 nodes = frozenset(itertools.chain(*(inst.all_nodes
5414 for inst in instance_list)))
5415 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5418 wrongnode_inst = set()
5420 # Gather data as requested
5421 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5423 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5425 result = node_data[name]
5427 # offline nodes will be in both lists
5428 assert result.fail_msg
5429 offline_nodes.append(name)
5431 bad_nodes.append(name)
5432 elif result.payload:
5433 for inst in result.payload:
5434 if inst in all_info:
5435 if all_info[inst].primary_node == name:
5436 live_data.update(result.payload)
5438 wrongnode_inst.add(inst)
5440 # orphan instance; we don't list it here as we don't
5441 # handle this case yet in the output of instance listing
5442 logging.warning("Orphan instance '%s' found on node %s",
5444 # else no instance is alive
5448 if query.IQ_DISKUSAGE in self.requested_data:
5449 gmi = ganeti.masterd.instance
5450 disk_usage = dict((inst.name,
5451 gmi.ComputeDiskSize(inst.disk_template,
5452 [{constants.IDISK_SIZE: disk.size}
5453 for disk in inst.disks]))
5454 for inst in instance_list)
5458 if query.IQ_CONSOLE in self.requested_data:
5460 for inst in instance_list:
5461 if inst.name in live_data:
5462 # Instance is running
5463 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5465 consinfo[inst.name] = None
5466 assert set(consinfo.keys()) == set(instance_names)
5470 if query.IQ_NODES in self.requested_data:
5471 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5473 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5474 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5475 for uuid in set(map(operator.attrgetter("group"),
5481 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5482 disk_usage, offline_nodes, bad_nodes,
5483 live_data, wrongnode_inst, consinfo,
5487 class LUQuery(NoHooksLU):
5488 """Query for resources/items of a certain kind.
5491 # pylint: disable=W0142
5494 def CheckArguments(self):
5495 qcls = _GetQueryImplementation(self.op.what)
5497 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5499 def ExpandNames(self):
5500 self.impl.ExpandNames(self)
5502 def DeclareLocks(self, level):
5503 self.impl.DeclareLocks(self, level)
5505 def Exec(self, feedback_fn):
5506 return self.impl.NewStyleQuery(self)
5509 class LUQueryFields(NoHooksLU):
5510 """Query for resources/items of a certain kind.
5513 # pylint: disable=W0142
5516 def CheckArguments(self):
5517 self.qcls = _GetQueryImplementation(self.op.what)
5519 def ExpandNames(self):
5520 self.needed_locks = {}
5522 def Exec(self, feedback_fn):
5523 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5526 class LUNodeModifyStorage(NoHooksLU):
5527 """Logical unit for modifying a storage volume on a node.
5532 def CheckArguments(self):
5533 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5535 storage_type = self.op.storage_type
5538 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5540 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5541 " modified" % storage_type,
5544 diff = set(self.op.changes.keys()) - modifiable
5546 raise errors.OpPrereqError("The following fields can not be modified for"
5547 " storage units of type '%s': %r" %
5548 (storage_type, list(diff)),
5551 def ExpandNames(self):
5552 self.needed_locks = {
5553 locking.LEVEL_NODE: self.op.node_name,
5556 def Exec(self, feedback_fn):
5557 """Computes the list of nodes and their attributes.
5560 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5561 result = self.rpc.call_storage_modify(self.op.node_name,
5562 self.op.storage_type, st_args,
5563 self.op.name, self.op.changes)
5564 result.Raise("Failed to modify storage unit '%s' on %s" %
5565 (self.op.name, self.op.node_name))
5568 class LUNodeAdd(LogicalUnit):
5569 """Logical unit for adding node to the cluster.
5573 HTYPE = constants.HTYPE_NODE
5574 _NFLAGS = ["master_capable", "vm_capable"]
5576 def CheckArguments(self):
5577 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5578 # validate/normalize the node name
5579 self.hostname = netutils.GetHostname(name=self.op.node_name,
5580 family=self.primary_ip_family)
5581 self.op.node_name = self.hostname.name
5583 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5584 raise errors.OpPrereqError("Cannot readd the master node",
5587 if self.op.readd and self.op.group:
5588 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5589 " being readded", errors.ECODE_INVAL)
5591 def BuildHooksEnv(self):
5594 This will run on all nodes before, and on all nodes + the new node after.
5598 "OP_TARGET": self.op.node_name,
5599 "NODE_NAME": self.op.node_name,
5600 "NODE_PIP": self.op.primary_ip,
5601 "NODE_SIP": self.op.secondary_ip,
5602 "MASTER_CAPABLE": str(self.op.master_capable),
5603 "VM_CAPABLE": str(self.op.vm_capable),
5606 def BuildHooksNodes(self):
5607 """Build hooks nodes.
5610 # Exclude added node
5611 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5612 post_nodes = pre_nodes + [self.op.node_name, ]
5614 return (pre_nodes, post_nodes)
5616 def CheckPrereq(self):
5617 """Check prerequisites.
5620 - the new node is not already in the config
5622 - its parameters (single/dual homed) matches the cluster
5624 Any errors are signaled by raising errors.OpPrereqError.
5628 hostname = self.hostname
5629 node = hostname.name
5630 primary_ip = self.op.primary_ip = hostname.ip
5631 if self.op.secondary_ip is None:
5632 if self.primary_ip_family == netutils.IP6Address.family:
5633 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5634 " IPv4 address must be given as secondary",
5636 self.op.secondary_ip = primary_ip
5638 secondary_ip = self.op.secondary_ip
5639 if not netutils.IP4Address.IsValid(secondary_ip):
5640 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5641 " address" % secondary_ip, errors.ECODE_INVAL)
5643 node_list = cfg.GetNodeList()
5644 if not self.op.readd and node in node_list:
5645 raise errors.OpPrereqError("Node %s is already in the configuration" %
5646 node, errors.ECODE_EXISTS)
5647 elif self.op.readd and node not in node_list:
5648 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5651 self.changed_primary_ip = False
5653 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5654 if self.op.readd and node == existing_node_name:
5655 if existing_node.secondary_ip != secondary_ip:
5656 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5657 " address configuration as before",
5659 if existing_node.primary_ip != primary_ip:
5660 self.changed_primary_ip = True
5664 if (existing_node.primary_ip == primary_ip or
5665 existing_node.secondary_ip == primary_ip or
5666 existing_node.primary_ip == secondary_ip or
5667 existing_node.secondary_ip == secondary_ip):
5668 raise errors.OpPrereqError("New node ip address(es) conflict with"
5669 " existing node %s" % existing_node.name,
5670 errors.ECODE_NOTUNIQUE)
5672 # After this 'if' block, None is no longer a valid value for the
5673 # _capable op attributes
5675 old_node = self.cfg.GetNodeInfo(node)
5676 assert old_node is not None, "Can't retrieve locked node %s" % node
5677 for attr in self._NFLAGS:
5678 if getattr(self.op, attr) is None:
5679 setattr(self.op, attr, getattr(old_node, attr))
5681 for attr in self._NFLAGS:
5682 if getattr(self.op, attr) is None:
5683 setattr(self.op, attr, True)
5685 if self.op.readd and not self.op.vm_capable:
5686 pri, sec = cfg.GetNodeInstances(node)
5688 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5689 " flag set to false, but it already holds"
5690 " instances" % node,
5693 # check that the type of the node (single versus dual homed) is the
5694 # same as for the master
5695 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5696 master_singlehomed = myself.secondary_ip == myself.primary_ip
5697 newbie_singlehomed = secondary_ip == primary_ip
5698 if master_singlehomed != newbie_singlehomed:
5699 if master_singlehomed:
5700 raise errors.OpPrereqError("The master has no secondary ip but the"
5701 " new node has one",
5704 raise errors.OpPrereqError("The master has a secondary ip but the"
5705 " new node doesn't have one",
5708 # checks reachability
5709 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5710 raise errors.OpPrereqError("Node not reachable by ping",
5711 errors.ECODE_ENVIRON)
5713 if not newbie_singlehomed:
5714 # check reachability from my secondary ip to newbie's secondary ip
5715 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5716 source=myself.secondary_ip):
5717 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5718 " based ping to node daemon port",
5719 errors.ECODE_ENVIRON)
5726 if self.op.master_capable:
5727 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5729 self.master_candidate = False
5732 self.new_node = old_node
5734 node_group = cfg.LookupNodeGroup(self.op.group)
5735 self.new_node = objects.Node(name=node,
5736 primary_ip=primary_ip,
5737 secondary_ip=secondary_ip,
5738 master_candidate=self.master_candidate,
5739 offline=False, drained=False,
5742 if self.op.ndparams:
5743 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5745 if self.op.hv_state:
5746 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5748 if self.op.disk_state:
5749 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5751 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5752 # it a property on the base class.
5753 result = rpc.DnsOnlyRunner().call_version([node])[node]
5754 result.Raise("Can't get version information from node %s" % node)
5755 if constants.PROTOCOL_VERSION == result.payload:
5756 logging.info("Communication to node %s fine, sw version %s match",
5757 node, result.payload)
5759 raise errors.OpPrereqError("Version mismatch master version %s,"
5760 " node version %s" %
5761 (constants.PROTOCOL_VERSION, result.payload),
5762 errors.ECODE_ENVIRON)
5764 def Exec(self, feedback_fn):
5765 """Adds the new node to the cluster.
5768 new_node = self.new_node
5769 node = new_node.name
5771 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5774 # We adding a new node so we assume it's powered
5775 new_node.powered = True
5777 # for re-adds, reset the offline/drained/master-candidate flags;
5778 # we need to reset here, otherwise offline would prevent RPC calls
5779 # later in the procedure; this also means that if the re-add
5780 # fails, we are left with a non-offlined, broken node
5782 new_node.drained = new_node.offline = False # pylint: disable=W0201
5783 self.LogInfo("Readding a node, the offline/drained flags were reset")
5784 # if we demote the node, we do cleanup later in the procedure
5785 new_node.master_candidate = self.master_candidate
5786 if self.changed_primary_ip:
5787 new_node.primary_ip = self.op.primary_ip
5789 # copy the master/vm_capable flags
5790 for attr in self._NFLAGS:
5791 setattr(new_node, attr, getattr(self.op, attr))
5793 # notify the user about any possible mc promotion
5794 if new_node.master_candidate:
5795 self.LogInfo("Node will be a master candidate")
5797 if self.op.ndparams:
5798 new_node.ndparams = self.op.ndparams
5800 new_node.ndparams = {}
5802 if self.op.hv_state:
5803 new_node.hv_state_static = self.new_hv_state
5805 if self.op.disk_state:
5806 new_node.disk_state_static = self.new_disk_state
5808 # Add node to our /etc/hosts, and add key to known_hosts
5809 if self.cfg.GetClusterInfo().modify_etc_hosts:
5810 master_node = self.cfg.GetMasterNode()
5811 result = self.rpc.call_etc_hosts_modify(master_node,
5812 constants.ETC_HOSTS_ADD,
5815 result.Raise("Can't update hosts file with new host data")
5817 if new_node.secondary_ip != new_node.primary_ip:
5818 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5821 node_verify_list = [self.cfg.GetMasterNode()]
5822 node_verify_param = {
5823 constants.NV_NODELIST: ([node], {}),
5824 # TODO: do a node-net-test as well?
5827 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5828 self.cfg.GetClusterName())
5829 for verifier in node_verify_list:
5830 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5831 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5833 for failed in nl_payload:
5834 feedback_fn("ssh/hostname verification failed"
5835 " (checking from %s): %s" %
5836 (verifier, nl_payload[failed]))
5837 raise errors.OpExecError("ssh/hostname verification failed")
5840 _RedistributeAncillaryFiles(self)
5841 self.context.ReaddNode(new_node)
5842 # make sure we redistribute the config
5843 self.cfg.Update(new_node, feedback_fn)
5844 # and make sure the new node will not have old files around
5845 if not new_node.master_candidate:
5846 result = self.rpc.call_node_demote_from_mc(new_node.name)
5847 msg = result.fail_msg
5849 self.LogWarning("Node failed to demote itself from master"
5850 " candidate status: %s" % msg)
5852 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5853 additional_vm=self.op.vm_capable)
5854 self.context.AddNode(new_node, self.proc.GetECId())
5857 class LUNodeSetParams(LogicalUnit):
5858 """Modifies the parameters of a node.
5860 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5861 to the node role (as _ROLE_*)
5862 @cvar _R2F: a dictionary from node role to tuples of flags
5863 @cvar _FLAGS: a list of attribute names corresponding to the flags
5866 HPATH = "node-modify"
5867 HTYPE = constants.HTYPE_NODE
5869 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5871 (True, False, False): _ROLE_CANDIDATE,
5872 (False, True, False): _ROLE_DRAINED,
5873 (False, False, True): _ROLE_OFFLINE,
5874 (False, False, False): _ROLE_REGULAR,
5876 _R2F = dict((v, k) for k, v in _F2R.items())
5877 _FLAGS = ["master_candidate", "drained", "offline"]
5879 def CheckArguments(self):
5880 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5881 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5882 self.op.master_capable, self.op.vm_capable,
5883 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5885 if all_mods.count(None) == len(all_mods):
5886 raise errors.OpPrereqError("Please pass at least one modification",
5888 if all_mods.count(True) > 1:
5889 raise errors.OpPrereqError("Can't set the node into more than one"
5890 " state at the same time",
5893 # Boolean value that tells us whether we might be demoting from MC
5894 self.might_demote = (self.op.master_candidate is False or
5895 self.op.offline is True or
5896 self.op.drained is True or
5897 self.op.master_capable is False)
5899 if self.op.secondary_ip:
5900 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5901 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5902 " address" % self.op.secondary_ip,
5905 self.lock_all = self.op.auto_promote and self.might_demote
5906 self.lock_instances = self.op.secondary_ip is not None
5908 def _InstanceFilter(self, instance):
5909 """Filter for getting affected instances.
5912 return (instance.disk_template in constants.DTS_INT_MIRROR and
5913 self.op.node_name in instance.all_nodes)
5915 def ExpandNames(self):
5917 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5919 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5921 # Since modifying a node can have severe effects on currently running
5922 # operations the resource lock is at least acquired in shared mode
5923 self.needed_locks[locking.LEVEL_NODE_RES] = \
5924 self.needed_locks[locking.LEVEL_NODE]
5926 # Get node resource and instance locks in shared mode; they are not used
5927 # for anything but read-only access
5928 self.share_locks[locking.LEVEL_NODE_RES] = 1
5929 self.share_locks[locking.LEVEL_INSTANCE] = 1
5931 if self.lock_instances:
5932 self.needed_locks[locking.LEVEL_INSTANCE] = \
5933 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5935 def BuildHooksEnv(self):
5938 This runs on the master node.
5942 "OP_TARGET": self.op.node_name,
5943 "MASTER_CANDIDATE": str(self.op.master_candidate),
5944 "OFFLINE": str(self.op.offline),
5945 "DRAINED": str(self.op.drained),
5946 "MASTER_CAPABLE": str(self.op.master_capable),
5947 "VM_CAPABLE": str(self.op.vm_capable),
5950 def BuildHooksNodes(self):
5951 """Build hooks nodes.
5954 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5957 def CheckPrereq(self):
5958 """Check prerequisites.
5960 This only checks the instance list against the existing names.
5963 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5965 if self.lock_instances:
5966 affected_instances = \
5967 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5969 # Verify instance locks
5970 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5971 wanted_instances = frozenset(affected_instances.keys())
5972 if wanted_instances - owned_instances:
5973 raise errors.OpPrereqError("Instances affected by changing node %s's"
5974 " secondary IP address have changed since"
5975 " locks were acquired, wanted '%s', have"
5976 " '%s'; retry the operation" %
5978 utils.CommaJoin(wanted_instances),
5979 utils.CommaJoin(owned_instances)),
5982 affected_instances = None
5984 if (self.op.master_candidate is not None or
5985 self.op.drained is not None or
5986 self.op.offline is not None):
5987 # we can't change the master's node flags
5988 if self.op.node_name == self.cfg.GetMasterNode():
5989 raise errors.OpPrereqError("The master role can be changed"
5990 " only via master-failover",
5993 if self.op.master_candidate and not node.master_capable:
5994 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5995 " it a master candidate" % node.name,
5998 if self.op.vm_capable is False:
5999 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6001 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6002 " the vm_capable flag" % node.name,
6005 if node.master_candidate and self.might_demote and not self.lock_all:
6006 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6007 # check if after removing the current node, we're missing master
6009 (mc_remaining, mc_should, _) = \
6010 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6011 if mc_remaining < mc_should:
6012 raise errors.OpPrereqError("Not enough master candidates, please"
6013 " pass auto promote option to allow"
6014 " promotion (--auto-promote or RAPI"
6015 " auto_promote=True)", errors.ECODE_STATE)
6017 self.old_flags = old_flags = (node.master_candidate,
6018 node.drained, node.offline)
6019 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6020 self.old_role = old_role = self._F2R[old_flags]
6022 # Check for ineffective changes
6023 for attr in self._FLAGS:
6024 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6025 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6026 setattr(self.op, attr, None)
6028 # Past this point, any flag change to False means a transition
6029 # away from the respective state, as only real changes are kept
6031 # TODO: We might query the real power state if it supports OOB
6032 if _SupportsOob(self.cfg, node):
6033 if self.op.offline is False and not (node.powered or
6034 self.op.powered is True):
6035 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6036 " offline status can be reset") %
6037 self.op.node_name, errors.ECODE_STATE)
6038 elif self.op.powered is not None:
6039 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6040 " as it does not support out-of-band"
6041 " handling") % self.op.node_name,
6044 # If we're being deofflined/drained, we'll MC ourself if needed
6045 if (self.op.drained is False or self.op.offline is False or
6046 (self.op.master_capable and not node.master_capable)):
6047 if _DecideSelfPromotion(self):
6048 self.op.master_candidate = True
6049 self.LogInfo("Auto-promoting node to master candidate")
6051 # If we're no longer master capable, we'll demote ourselves from MC
6052 if self.op.master_capable is False and node.master_candidate:
6053 self.LogInfo("Demoting from master candidate")
6054 self.op.master_candidate = False
6057 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6058 if self.op.master_candidate:
6059 new_role = self._ROLE_CANDIDATE
6060 elif self.op.drained:
6061 new_role = self._ROLE_DRAINED
6062 elif self.op.offline:
6063 new_role = self._ROLE_OFFLINE
6064 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6065 # False is still in new flags, which means we're un-setting (the
6067 new_role = self._ROLE_REGULAR
6068 else: # no new flags, nothing, keep old role
6071 self.new_role = new_role
6073 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6074 # Trying to transition out of offline status
6075 result = self.rpc.call_version([node.name])[node.name]
6077 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6078 " to report its version: %s" %
6079 (node.name, result.fail_msg),
6082 self.LogWarning("Transitioning node from offline to online state"
6083 " without using re-add. Please make sure the node"
6086 # When changing the secondary ip, verify if this is a single-homed to
6087 # multi-homed transition or vice versa, and apply the relevant
6089 if self.op.secondary_ip:
6090 # Ok even without locking, because this can't be changed by any LU
6091 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6092 master_singlehomed = master.secondary_ip == master.primary_ip
6093 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6094 if self.op.force and node.name == master.name:
6095 self.LogWarning("Transitioning from single-homed to multi-homed"
6096 " cluster. All nodes will require a secondary ip.")
6098 raise errors.OpPrereqError("Changing the secondary ip on a"
6099 " single-homed cluster requires the"
6100 " --force option to be passed, and the"
6101 " target node to be the master",
6103 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6104 if self.op.force and node.name == master.name:
6105 self.LogWarning("Transitioning from multi-homed to single-homed"
6106 " cluster. Secondary IPs will have to be removed.")
6108 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6109 " same as the primary IP on a multi-homed"
6110 " cluster, unless the --force option is"
6111 " passed, and the target node is the"
6112 " master", errors.ECODE_INVAL)
6114 assert not (frozenset(affected_instances) -
6115 self.owned_locks(locking.LEVEL_INSTANCE))
6118 if affected_instances:
6119 msg = ("Cannot change secondary IP address: offline node has"
6120 " instances (%s) configured to use it" %
6121 utils.CommaJoin(affected_instances.keys()))
6122 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6124 # On online nodes, check that no instances are running, and that
6125 # the node has the new ip and we can reach it.
6126 for instance in affected_instances.values():
6127 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6128 msg="cannot change secondary ip")
6130 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6131 if master.name != node.name:
6132 # check reachability from master secondary ip to new secondary ip
6133 if not netutils.TcpPing(self.op.secondary_ip,
6134 constants.DEFAULT_NODED_PORT,
6135 source=master.secondary_ip):
6136 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6137 " based ping to node daemon port",
6138 errors.ECODE_ENVIRON)
6140 if self.op.ndparams:
6141 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6142 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6143 self.new_ndparams = new_ndparams
6145 if self.op.hv_state:
6146 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6147 self.node.hv_state_static)
6149 if self.op.disk_state:
6150 self.new_disk_state = \
6151 _MergeAndVerifyDiskState(self.op.disk_state,
6152 self.node.disk_state_static)
6154 def Exec(self, feedback_fn):
6159 old_role = self.old_role
6160 new_role = self.new_role
6164 if self.op.ndparams:
6165 node.ndparams = self.new_ndparams
6167 if self.op.powered is not None:
6168 node.powered = self.op.powered
6170 if self.op.hv_state:
6171 node.hv_state_static = self.new_hv_state
6173 if self.op.disk_state:
6174 node.disk_state_static = self.new_disk_state
6176 for attr in ["master_capable", "vm_capable"]:
6177 val = getattr(self.op, attr)
6179 setattr(node, attr, val)
6180 result.append((attr, str(val)))
6182 if new_role != old_role:
6183 # Tell the node to demote itself, if no longer MC and not offline
6184 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6185 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6187 self.LogWarning("Node failed to demote itself: %s", msg)
6189 new_flags = self._R2F[new_role]
6190 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6192 result.append((desc, str(nf)))
6193 (node.master_candidate, node.drained, node.offline) = new_flags
6195 # we locked all nodes, we adjust the CP before updating this node
6197 _AdjustCandidatePool(self, [node.name])
6199 if self.op.secondary_ip:
6200 node.secondary_ip = self.op.secondary_ip
6201 result.append(("secondary_ip", self.op.secondary_ip))
6203 # this will trigger configuration file update, if needed
6204 self.cfg.Update(node, feedback_fn)
6206 # this will trigger job queue propagation or cleanup if the mc
6208 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6209 self.context.ReaddNode(node)
6214 class LUNodePowercycle(NoHooksLU):
6215 """Powercycles a node.
6220 def CheckArguments(self):
6221 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6222 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6223 raise errors.OpPrereqError("The node is the master and the force"
6224 " parameter was not set",
6227 def ExpandNames(self):
6228 """Locking for PowercycleNode.
6230 This is a last-resort option and shouldn't block on other
6231 jobs. Therefore, we grab no locks.
6234 self.needed_locks = {}
6236 def Exec(self, feedback_fn):
6240 result = self.rpc.call_node_powercycle(self.op.node_name,
6241 self.cfg.GetHypervisorType())
6242 result.Raise("Failed to schedule the reboot")
6243 return result.payload
6246 class LUClusterQuery(NoHooksLU):
6247 """Query cluster configuration.
6252 def ExpandNames(self):
6253 self.needed_locks = {}
6255 def Exec(self, feedback_fn):
6256 """Return cluster config.
6259 cluster = self.cfg.GetClusterInfo()
6262 # Filter just for enabled hypervisors
6263 for os_name, hv_dict in cluster.os_hvp.items():
6264 os_hvp[os_name] = {}
6265 for hv_name, hv_params in hv_dict.items():
6266 if hv_name in cluster.enabled_hypervisors:
6267 os_hvp[os_name][hv_name] = hv_params
6269 # Convert ip_family to ip_version
6270 primary_ip_version = constants.IP4_VERSION
6271 if cluster.primary_ip_family == netutils.IP6Address.family:
6272 primary_ip_version = constants.IP6_VERSION
6275 "software_version": constants.RELEASE_VERSION,
6276 "protocol_version": constants.PROTOCOL_VERSION,
6277 "config_version": constants.CONFIG_VERSION,
6278 "os_api_version": max(constants.OS_API_VERSIONS),
6279 "export_version": constants.EXPORT_VERSION,
6280 "architecture": runtime.GetArchInfo(),
6281 "name": cluster.cluster_name,
6282 "master": cluster.master_node,
6283 "default_hypervisor": cluster.primary_hypervisor,
6284 "enabled_hypervisors": cluster.enabled_hypervisors,
6285 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6286 for hypervisor_name in cluster.enabled_hypervisors]),
6288 "beparams": cluster.beparams,
6289 "osparams": cluster.osparams,
6290 "ipolicy": cluster.ipolicy,
6291 "nicparams": cluster.nicparams,
6292 "ndparams": cluster.ndparams,
6293 "diskparams": cluster.diskparams,
6294 "candidate_pool_size": cluster.candidate_pool_size,
6295 "master_netdev": cluster.master_netdev,
6296 "master_netmask": cluster.master_netmask,
6297 "use_external_mip_script": cluster.use_external_mip_script,
6298 "volume_group_name": cluster.volume_group_name,
6299 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6300 "file_storage_dir": cluster.file_storage_dir,
6301 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6302 "maintain_node_health": cluster.maintain_node_health,
6303 "ctime": cluster.ctime,
6304 "mtime": cluster.mtime,
6305 "uuid": cluster.uuid,
6306 "tags": list(cluster.GetTags()),
6307 "uid_pool": cluster.uid_pool,
6308 "default_iallocator": cluster.default_iallocator,
6309 "reserved_lvs": cluster.reserved_lvs,
6310 "primary_ip_version": primary_ip_version,
6311 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6312 "hidden_os": cluster.hidden_os,
6313 "blacklisted_os": cluster.blacklisted_os,
6319 class LUClusterConfigQuery(NoHooksLU):
6320 """Return configuration values.
6325 def CheckArguments(self):
6326 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6328 def ExpandNames(self):
6329 self.cq.ExpandNames(self)
6331 def DeclareLocks(self, level):
6332 self.cq.DeclareLocks(self, level)
6334 def Exec(self, feedback_fn):
6335 result = self.cq.OldStyleQuery(self)
6337 assert len(result) == 1
6342 class _ClusterQuery(_QueryBase):
6343 FIELDS = query.CLUSTER_FIELDS
6345 #: Do not sort (there is only one item)
6348 def ExpandNames(self, lu):
6349 lu.needed_locks = {}
6351 # The following variables interact with _QueryBase._GetNames
6352 self.wanted = locking.ALL_SET
6353 self.do_locking = self.use_locking
6356 raise errors.OpPrereqError("Can not use locking for cluster queries",
6359 def DeclareLocks(self, lu, level):
6362 def _GetQueryData(self, lu):
6363 """Computes the list of nodes and their attributes.
6366 # Locking is not used
6367 assert not (compat.any(lu.glm.is_owned(level)
6368 for level in locking.LEVELS
6369 if level != locking.LEVEL_CLUSTER) or
6370 self.do_locking or self.use_locking)
6372 if query.CQ_CONFIG in self.requested_data:
6373 cluster = lu.cfg.GetClusterInfo()
6375 cluster = NotImplemented
6377 if query.CQ_QUEUE_DRAINED in self.requested_data:
6378 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6380 drain_flag = NotImplemented
6382 if query.CQ_WATCHER_PAUSE in self.requested_data:
6383 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6385 watcher_pause = NotImplemented
6387 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6390 class LUInstanceActivateDisks(NoHooksLU):
6391 """Bring up an instance's disks.
6396 def ExpandNames(self):
6397 self._ExpandAndLockInstance()
6398 self.needed_locks[locking.LEVEL_NODE] = []
6399 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6401 def DeclareLocks(self, level):
6402 if level == locking.LEVEL_NODE:
6403 self._LockInstancesNodes()
6405 def CheckPrereq(self):
6406 """Check prerequisites.
6408 This checks that the instance is in the cluster.
6411 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6412 assert self.instance is not None, \
6413 "Cannot retrieve locked instance %s" % self.op.instance_name
6414 _CheckNodeOnline(self, self.instance.primary_node)
6416 def Exec(self, feedback_fn):
6417 """Activate the disks.
6420 disks_ok, disks_info = \
6421 _AssembleInstanceDisks(self, self.instance,
6422 ignore_size=self.op.ignore_size)
6424 raise errors.OpExecError("Cannot activate block devices")
6426 if self.op.wait_for_sync:
6427 if not _WaitForSync(self, self.instance):
6428 raise errors.OpExecError("Some disks of the instance are degraded!")
6433 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6435 """Prepare the block devices for an instance.
6437 This sets up the block devices on all nodes.
6439 @type lu: L{LogicalUnit}
6440 @param lu: the logical unit on whose behalf we execute
6441 @type instance: L{objects.Instance}
6442 @param instance: the instance for whose disks we assemble
6443 @type disks: list of L{objects.Disk} or None
6444 @param disks: which disks to assemble (or all, if None)
6445 @type ignore_secondaries: boolean
6446 @param ignore_secondaries: if true, errors on secondary nodes
6447 won't result in an error return from the function
6448 @type ignore_size: boolean
6449 @param ignore_size: if true, the current known size of the disk
6450 will not be used during the disk activation, useful for cases
6451 when the size is wrong
6452 @return: False if the operation failed, otherwise a list of
6453 (host, instance_visible_name, node_visible_name)
6454 with the mapping from node devices to instance devices
6459 iname = instance.name
6460 disks = _ExpandCheckDisks(instance, disks)
6462 # With the two passes mechanism we try to reduce the window of
6463 # opportunity for the race condition of switching DRBD to primary
6464 # before handshaking occured, but we do not eliminate it
6466 # The proper fix would be to wait (with some limits) until the
6467 # connection has been made and drbd transitions from WFConnection
6468 # into any other network-connected state (Connected, SyncTarget,
6471 # 1st pass, assemble on all nodes in secondary mode
6472 for idx, inst_disk in enumerate(disks):
6473 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6475 node_disk = node_disk.Copy()
6476 node_disk.UnsetSize()
6477 lu.cfg.SetDiskID(node_disk, node)
6478 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6480 msg = result.fail_msg
6482 is_offline_secondary = (node in instance.secondary_nodes and
6484 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6485 " (is_primary=False, pass=1): %s",
6486 inst_disk.iv_name, node, msg)
6487 if not (ignore_secondaries or is_offline_secondary):
6490 # FIXME: race condition on drbd migration to primary
6492 # 2nd pass, do only the primary node
6493 for idx, inst_disk in enumerate(disks):
6496 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6497 if node != instance.primary_node:
6500 node_disk = node_disk.Copy()
6501 node_disk.UnsetSize()
6502 lu.cfg.SetDiskID(node_disk, node)
6503 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6505 msg = result.fail_msg
6507 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6508 " (is_primary=True, pass=2): %s",
6509 inst_disk.iv_name, node, msg)
6512 dev_path = result.payload
6514 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6516 # leave the disks configured for the primary node
6517 # this is a workaround that would be fixed better by
6518 # improving the logical/physical id handling
6520 lu.cfg.SetDiskID(disk, instance.primary_node)
6522 return disks_ok, device_info
6525 def _StartInstanceDisks(lu, instance, force):
6526 """Start the disks of an instance.
6529 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6530 ignore_secondaries=force)
6532 _ShutdownInstanceDisks(lu, instance)
6533 if force is not None and not force:
6534 lu.proc.LogWarning("", hint="If the message above refers to a"
6536 " you can retry the operation using '--force'.")
6537 raise errors.OpExecError("Disk consistency error")
6540 class LUInstanceDeactivateDisks(NoHooksLU):
6541 """Shutdown an instance's disks.
6546 def ExpandNames(self):
6547 self._ExpandAndLockInstance()
6548 self.needed_locks[locking.LEVEL_NODE] = []
6549 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6551 def DeclareLocks(self, level):
6552 if level == locking.LEVEL_NODE:
6553 self._LockInstancesNodes()
6555 def CheckPrereq(self):
6556 """Check prerequisites.
6558 This checks that the instance is in the cluster.
6561 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6562 assert self.instance is not None, \
6563 "Cannot retrieve locked instance %s" % self.op.instance_name
6565 def Exec(self, feedback_fn):
6566 """Deactivate the disks
6569 instance = self.instance
6571 _ShutdownInstanceDisks(self, instance)
6573 _SafeShutdownInstanceDisks(self, instance)
6576 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6577 """Shutdown block devices of an instance.
6579 This function checks if an instance is running, before calling
6580 _ShutdownInstanceDisks.
6583 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6584 _ShutdownInstanceDisks(lu, instance, disks=disks)
6587 def _ExpandCheckDisks(instance, disks):
6588 """Return the instance disks selected by the disks list
6590 @type disks: list of L{objects.Disk} or None
6591 @param disks: selected disks
6592 @rtype: list of L{objects.Disk}
6593 @return: selected instance disks to act on
6597 return instance.disks
6599 if not set(disks).issubset(instance.disks):
6600 raise errors.ProgrammerError("Can only act on disks belonging to the"
6605 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6606 """Shutdown block devices of an instance.
6608 This does the shutdown on all nodes of the instance.
6610 If the ignore_primary is false, errors on the primary node are
6615 disks = _ExpandCheckDisks(instance, disks)
6618 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6619 lu.cfg.SetDiskID(top_disk, node)
6620 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6621 msg = result.fail_msg
6623 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6624 disk.iv_name, node, msg)
6625 if ((node == instance.primary_node and not ignore_primary) or
6626 (node != instance.primary_node and not result.offline)):
6631 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6632 """Checks if a node has enough free memory.
6634 This function check if a given node has the needed amount of free
6635 memory. In case the node has less memory or we cannot get the
6636 information from the node, this function raise an OpPrereqError
6639 @type lu: C{LogicalUnit}
6640 @param lu: a logical unit from which we get configuration data
6642 @param node: the node to check
6643 @type reason: C{str}
6644 @param reason: string to use in the error message
6645 @type requested: C{int}
6646 @param requested: the amount of memory in MiB to check for
6647 @type hypervisor_name: C{str}
6648 @param hypervisor_name: the hypervisor to ask for memory stats
6650 @return: node current free memory
6651 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6652 we cannot check the node
6655 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6656 nodeinfo[node].Raise("Can't get data from node %s" % node,
6657 prereq=True, ecode=errors.ECODE_ENVIRON)
6658 (_, _, (hv_info, )) = nodeinfo[node].payload
6660 free_mem = hv_info.get("memory_free", None)
6661 if not isinstance(free_mem, int):
6662 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6663 " was '%s'" % (node, free_mem),
6664 errors.ECODE_ENVIRON)
6665 if requested > free_mem:
6666 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6667 " needed %s MiB, available %s MiB" %
6668 (node, reason, requested, free_mem),
6673 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6674 """Checks if nodes have enough free disk space in the all VGs.
6676 This function check if all given nodes have the needed amount of
6677 free disk. In case any node has less disk or we cannot get the
6678 information from the node, this function raise an OpPrereqError
6681 @type lu: C{LogicalUnit}
6682 @param lu: a logical unit from which we get configuration data
6683 @type nodenames: C{list}
6684 @param nodenames: the list of node names to check
6685 @type req_sizes: C{dict}
6686 @param req_sizes: the hash of vg and corresponding amount of disk in
6688 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6689 or we cannot check the node
6692 for vg, req_size in req_sizes.items():
6693 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6696 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6697 """Checks if nodes have enough free disk space in the specified VG.
6699 This function check if all given nodes have the needed amount of
6700 free disk. In case any node has less disk or we cannot get the
6701 information from the node, this function raise an OpPrereqError
6704 @type lu: C{LogicalUnit}
6705 @param lu: a logical unit from which we get configuration data
6706 @type nodenames: C{list}
6707 @param nodenames: the list of node names to check
6709 @param vg: the volume group to check
6710 @type requested: C{int}
6711 @param requested: the amount of disk in MiB to check for
6712 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6713 or we cannot check the node
6716 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6717 for node in nodenames:
6718 info = nodeinfo[node]
6719 info.Raise("Cannot get current information from node %s" % node,
6720 prereq=True, ecode=errors.ECODE_ENVIRON)
6721 (_, (vg_info, ), _) = info.payload
6722 vg_free = vg_info.get("vg_free", None)
6723 if not isinstance(vg_free, int):
6724 raise errors.OpPrereqError("Can't compute free disk space on node"
6725 " %s for vg %s, result was '%s'" %
6726 (node, vg, vg_free), errors.ECODE_ENVIRON)
6727 if requested > vg_free:
6728 raise errors.OpPrereqError("Not enough disk space on target node %s"
6729 " vg %s: required %d MiB, available %d MiB" %
6730 (node, vg, requested, vg_free),
6734 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6735 """Checks if nodes have enough physical CPUs
6737 This function checks if all given nodes have the needed number of
6738 physical CPUs. In case any node has less CPUs or we cannot get the
6739 information from the node, this function raises an OpPrereqError
6742 @type lu: C{LogicalUnit}
6743 @param lu: a logical unit from which we get configuration data
6744 @type nodenames: C{list}
6745 @param nodenames: the list of node names to check
6746 @type requested: C{int}
6747 @param requested: the minimum acceptable number of physical CPUs
6748 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6749 or we cannot check the node
6752 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6753 for node in nodenames:
6754 info = nodeinfo[node]
6755 info.Raise("Cannot get current information from node %s" % node,
6756 prereq=True, ecode=errors.ECODE_ENVIRON)
6757 (_, _, (hv_info, )) = info.payload
6758 num_cpus = hv_info.get("cpu_total", None)
6759 if not isinstance(num_cpus, int):
6760 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6761 " on node %s, result was '%s'" %
6762 (node, num_cpus), errors.ECODE_ENVIRON)
6763 if requested > num_cpus:
6764 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6765 "required" % (node, num_cpus, requested),
6769 class LUInstanceStartup(LogicalUnit):
6770 """Starts an instance.
6773 HPATH = "instance-start"
6774 HTYPE = constants.HTYPE_INSTANCE
6777 def CheckArguments(self):
6779 if self.op.beparams:
6780 # fill the beparams dict
6781 objects.UpgradeBeParams(self.op.beparams)
6782 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6784 def ExpandNames(self):
6785 self._ExpandAndLockInstance()
6786 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6788 def DeclareLocks(self, level):
6789 if level == locking.LEVEL_NODE_RES:
6790 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6792 def BuildHooksEnv(self):
6795 This runs on master, primary and secondary nodes of the instance.
6799 "FORCE": self.op.force,
6802 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6806 def BuildHooksNodes(self):
6807 """Build hooks nodes.
6810 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6813 def CheckPrereq(self):
6814 """Check prerequisites.
6816 This checks that the instance is in the cluster.
6819 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6820 assert self.instance is not None, \
6821 "Cannot retrieve locked instance %s" % self.op.instance_name
6824 if self.op.hvparams:
6825 # check hypervisor parameter syntax (locally)
6826 cluster = self.cfg.GetClusterInfo()
6827 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6828 filled_hvp = cluster.FillHV(instance)
6829 filled_hvp.update(self.op.hvparams)
6830 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6831 hv_type.CheckParameterSyntax(filled_hvp)
6832 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6834 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6836 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6838 if self.primary_offline and self.op.ignore_offline_nodes:
6839 self.proc.LogWarning("Ignoring offline primary node")
6841 if self.op.hvparams or self.op.beparams:
6842 self.proc.LogWarning("Overridden parameters are ignored")
6844 _CheckNodeOnline(self, instance.primary_node)
6846 bep = self.cfg.GetClusterInfo().FillBE(instance)
6847 bep.update(self.op.beparams)
6849 # check bridges existence
6850 _CheckInstanceBridgesExist(self, instance)
6852 remote_info = self.rpc.call_instance_info(instance.primary_node,
6854 instance.hypervisor)
6855 remote_info.Raise("Error checking node %s" % instance.primary_node,
6856 prereq=True, ecode=errors.ECODE_ENVIRON)
6857 if not remote_info.payload: # not running already
6858 _CheckNodeFreeMemory(self, instance.primary_node,
6859 "starting instance %s" % instance.name,
6860 bep[constants.BE_MINMEM], instance.hypervisor)
6862 def Exec(self, feedback_fn):
6863 """Start the instance.
6866 instance = self.instance
6867 force = self.op.force
6869 if not self.op.no_remember:
6870 self.cfg.MarkInstanceUp(instance.name)
6872 if self.primary_offline:
6873 assert self.op.ignore_offline_nodes
6874 self.proc.LogInfo("Primary node offline, marked instance as started")
6876 node_current = instance.primary_node
6878 _StartInstanceDisks(self, instance, force)
6881 self.rpc.call_instance_start(node_current,
6882 (instance, self.op.hvparams,
6884 self.op.startup_paused)
6885 msg = result.fail_msg
6887 _ShutdownInstanceDisks(self, instance)
6888 raise errors.OpExecError("Could not start instance: %s" % msg)
6891 class LUInstanceReboot(LogicalUnit):
6892 """Reboot an instance.
6895 HPATH = "instance-reboot"
6896 HTYPE = constants.HTYPE_INSTANCE
6899 def ExpandNames(self):
6900 self._ExpandAndLockInstance()
6902 def BuildHooksEnv(self):
6905 This runs on master, primary and secondary nodes of the instance.
6909 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6910 "REBOOT_TYPE": self.op.reboot_type,
6911 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6914 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6918 def BuildHooksNodes(self):
6919 """Build hooks nodes.
6922 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6925 def CheckPrereq(self):
6926 """Check prerequisites.
6928 This checks that the instance is in the cluster.
6931 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6932 assert self.instance is not None, \
6933 "Cannot retrieve locked instance %s" % self.op.instance_name
6934 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6935 _CheckNodeOnline(self, instance.primary_node)
6937 # check bridges existence
6938 _CheckInstanceBridgesExist(self, instance)
6940 def Exec(self, feedback_fn):
6941 """Reboot the instance.
6944 instance = self.instance
6945 ignore_secondaries = self.op.ignore_secondaries
6946 reboot_type = self.op.reboot_type
6948 remote_info = self.rpc.call_instance_info(instance.primary_node,
6950 instance.hypervisor)
6951 remote_info.Raise("Error checking node %s" % instance.primary_node)
6952 instance_running = bool(remote_info.payload)
6954 node_current = instance.primary_node
6956 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6957 constants.INSTANCE_REBOOT_HARD]:
6958 for disk in instance.disks:
6959 self.cfg.SetDiskID(disk, node_current)
6960 result = self.rpc.call_instance_reboot(node_current, instance,
6962 self.op.shutdown_timeout)
6963 result.Raise("Could not reboot instance")
6965 if instance_running:
6966 result = self.rpc.call_instance_shutdown(node_current, instance,
6967 self.op.shutdown_timeout)
6968 result.Raise("Could not shutdown instance for full reboot")
6969 _ShutdownInstanceDisks(self, instance)
6971 self.LogInfo("Instance %s was already stopped, starting now",
6973 _StartInstanceDisks(self, instance, ignore_secondaries)
6974 result = self.rpc.call_instance_start(node_current,
6975 (instance, None, None), False)
6976 msg = result.fail_msg
6978 _ShutdownInstanceDisks(self, instance)
6979 raise errors.OpExecError("Could not start instance for"
6980 " full reboot: %s" % msg)
6982 self.cfg.MarkInstanceUp(instance.name)
6985 class LUInstanceShutdown(LogicalUnit):
6986 """Shutdown an instance.
6989 HPATH = "instance-stop"
6990 HTYPE = constants.HTYPE_INSTANCE
6993 def ExpandNames(self):
6994 self._ExpandAndLockInstance()
6996 def BuildHooksEnv(self):
6999 This runs on master, primary and secondary nodes of the instance.
7002 env = _BuildInstanceHookEnvByObject(self, self.instance)
7003 env["TIMEOUT"] = self.op.timeout
7006 def BuildHooksNodes(self):
7007 """Build hooks nodes.
7010 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7013 def CheckPrereq(self):
7014 """Check prerequisites.
7016 This checks that the instance is in the cluster.
7019 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7020 assert self.instance is not None, \
7021 "Cannot retrieve locked instance %s" % self.op.instance_name
7023 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7025 self.primary_offline = \
7026 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7028 if self.primary_offline and self.op.ignore_offline_nodes:
7029 self.proc.LogWarning("Ignoring offline primary node")
7031 _CheckNodeOnline(self, self.instance.primary_node)
7033 def Exec(self, feedback_fn):
7034 """Shutdown the instance.
7037 instance = self.instance
7038 node_current = instance.primary_node
7039 timeout = self.op.timeout
7041 if not self.op.no_remember:
7042 self.cfg.MarkInstanceDown(instance.name)
7044 if self.primary_offline:
7045 assert self.op.ignore_offline_nodes
7046 self.proc.LogInfo("Primary node offline, marked instance as stopped")
7048 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7049 msg = result.fail_msg
7051 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7053 _ShutdownInstanceDisks(self, instance)
7056 class LUInstanceReinstall(LogicalUnit):
7057 """Reinstall an instance.
7060 HPATH = "instance-reinstall"
7061 HTYPE = constants.HTYPE_INSTANCE
7064 def ExpandNames(self):
7065 self._ExpandAndLockInstance()
7067 def BuildHooksEnv(self):
7070 This runs on master, primary and secondary nodes of the instance.
7073 return _BuildInstanceHookEnvByObject(self, self.instance)
7075 def BuildHooksNodes(self):
7076 """Build hooks nodes.
7079 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7082 def CheckPrereq(self):
7083 """Check prerequisites.
7085 This checks that the instance is in the cluster and is not running.
7088 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7089 assert instance is not None, \
7090 "Cannot retrieve locked instance %s" % self.op.instance_name
7091 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7092 " offline, cannot reinstall")
7094 if instance.disk_template == constants.DT_DISKLESS:
7095 raise errors.OpPrereqError("Instance '%s' has no disks" %
7096 self.op.instance_name,
7098 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7100 if self.op.os_type is not None:
7102 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7103 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7104 instance_os = self.op.os_type
7106 instance_os = instance.os
7108 nodelist = list(instance.all_nodes)
7110 if self.op.osparams:
7111 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7112 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7113 self.os_inst = i_osdict # the new dict (without defaults)
7117 self.instance = instance
7119 def Exec(self, feedback_fn):
7120 """Reinstall the instance.
7123 inst = self.instance
7125 if self.op.os_type is not None:
7126 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7127 inst.os = self.op.os_type
7128 # Write to configuration
7129 self.cfg.Update(inst, feedback_fn)
7131 _StartInstanceDisks(self, inst, None)
7133 feedback_fn("Running the instance OS create scripts...")
7134 # FIXME: pass debug option from opcode to backend
7135 result = self.rpc.call_instance_os_add(inst.primary_node,
7136 (inst, self.os_inst), True,
7137 self.op.debug_level)
7138 result.Raise("Could not install OS for instance %s on node %s" %
7139 (inst.name, inst.primary_node))
7141 _ShutdownInstanceDisks(self, inst)
7144 class LUInstanceRecreateDisks(LogicalUnit):
7145 """Recreate an instance's missing disks.
7148 HPATH = "instance-recreate-disks"
7149 HTYPE = constants.HTYPE_INSTANCE
7152 _MODIFYABLE = frozenset([
7153 constants.IDISK_SIZE,
7154 constants.IDISK_MODE,
7157 # New or changed disk parameters may have different semantics
7158 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7159 constants.IDISK_ADOPT,
7161 # TODO: Implement support changing VG while recreating
7163 constants.IDISK_METAVG,
7166 def _RunAllocator(self):
7167 """Run the allocator based on input opcode.
7170 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7173 # The allocator should actually run in "relocate" mode, but current
7174 # allocators don't support relocating all the nodes of an instance at
7175 # the same time. As a workaround we use "allocate" mode, but this is
7176 # suboptimal for two reasons:
7177 # - The instance name passed to the allocator is present in the list of
7178 # existing instances, so there could be a conflict within the
7179 # internal structures of the allocator. This doesn't happen with the
7180 # current allocators, but it's a liability.
7181 # - The allocator counts the resources used by the instance twice: once
7182 # because the instance exists already, and once because it tries to
7183 # allocate a new instance.
7184 # The allocator could choose some of the nodes on which the instance is
7185 # running, but that's not a problem. If the instance nodes are broken,
7186 # they should be already be marked as drained or offline, and hence
7187 # skipped by the allocator. If instance disks have been lost for other
7188 # reasons, then recreating the disks on the same nodes should be fine.
7189 disk_template = self.instance.disk_template
7190 spindle_use = be_full[constants.BE_SPINDLE_USE]
7191 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7192 disk_template=disk_template,
7193 tags=list(self.instance.GetTags()),
7194 os=self.instance.os,
7196 vcpus=be_full[constants.BE_VCPUS],
7197 memory=be_full[constants.BE_MAXMEM],
7198 spindle_use=spindle_use,
7199 disks=[{constants.IDISK_SIZE: d.size,
7200 constants.IDISK_MODE: d.mode}
7201 for d in self.instance.disks],
7202 hypervisor=self.instance.hypervisor)
7203 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7205 ial.Run(self.op.iallocator)
7207 assert req.RequiredNodes() == len(self.instance.all_nodes)
7210 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7211 " %s" % (self.op.iallocator, ial.info),
7214 self.op.nodes = ial.result
7215 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7216 self.op.instance_name, self.op.iallocator,
7217 utils.CommaJoin(ial.result))
7219 def CheckArguments(self):
7220 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7221 # Normalize and convert deprecated list of disk indices
7222 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7224 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7226 raise errors.OpPrereqError("Some disks have been specified more than"
7227 " once: %s" % utils.CommaJoin(duplicates),
7230 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7231 # when neither iallocator nor nodes are specified
7232 if self.op.iallocator or self.op.nodes:
7233 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7235 for (idx, params) in self.op.disks:
7236 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7237 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7239 raise errors.OpPrereqError("Parameters for disk %s try to change"
7240 " unmodifyable parameter(s): %s" %
7241 (idx, utils.CommaJoin(unsupported)),
7244 def ExpandNames(self):
7245 self._ExpandAndLockInstance()
7246 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7248 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7249 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7251 self.needed_locks[locking.LEVEL_NODE] = []
7252 if self.op.iallocator:
7253 # iallocator will select a new node in the same group
7254 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7255 self.needed_locks[locking.LEVEL_NODE_RES] = []
7257 def DeclareLocks(self, level):
7258 if level == locking.LEVEL_NODEGROUP:
7259 assert self.op.iallocator is not None
7260 assert not self.op.nodes
7261 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7262 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7263 # Lock the primary group used by the instance optimistically; this
7264 # requires going via the node before it's locked, requiring
7265 # verification later on
7266 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7267 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7269 elif level == locking.LEVEL_NODE:
7270 # If an allocator is used, then we lock all the nodes in the current
7271 # instance group, as we don't know yet which ones will be selected;
7272 # if we replace the nodes without using an allocator, locks are
7273 # already declared in ExpandNames; otherwise, we need to lock all the
7274 # instance nodes for disk re-creation
7275 if self.op.iallocator:
7276 assert not self.op.nodes
7277 assert not self.needed_locks[locking.LEVEL_NODE]
7278 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7280 # Lock member nodes of the group of the primary node
7281 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7282 self.needed_locks[locking.LEVEL_NODE].extend(
7283 self.cfg.GetNodeGroup(group_uuid).members)
7284 elif not self.op.nodes:
7285 self._LockInstancesNodes(primary_only=False)
7286 elif level == locking.LEVEL_NODE_RES:
7288 self.needed_locks[locking.LEVEL_NODE_RES] = \
7289 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7291 def BuildHooksEnv(self):
7294 This runs on master, primary and secondary nodes of the instance.
7297 return _BuildInstanceHookEnvByObject(self, self.instance)
7299 def BuildHooksNodes(self):
7300 """Build hooks nodes.
7303 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7306 def CheckPrereq(self):
7307 """Check prerequisites.
7309 This checks that the instance is in the cluster and is not running.
7312 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7313 assert instance is not None, \
7314 "Cannot retrieve locked instance %s" % self.op.instance_name
7316 if len(self.op.nodes) != len(instance.all_nodes):
7317 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7318 " %d replacement nodes were specified" %
7319 (instance.name, len(instance.all_nodes),
7320 len(self.op.nodes)),
7322 assert instance.disk_template != constants.DT_DRBD8 or \
7323 len(self.op.nodes) == 2
7324 assert instance.disk_template != constants.DT_PLAIN or \
7325 len(self.op.nodes) == 1
7326 primary_node = self.op.nodes[0]
7328 primary_node = instance.primary_node
7329 if not self.op.iallocator:
7330 _CheckNodeOnline(self, primary_node)
7332 if instance.disk_template == constants.DT_DISKLESS:
7333 raise errors.OpPrereqError("Instance '%s' has no disks" %
7334 self.op.instance_name, errors.ECODE_INVAL)
7336 # Verify if node group locks are still correct
7337 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7339 # Node group locks are acquired only for the primary node (and only
7340 # when the allocator is used)
7341 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7344 # if we replace nodes *and* the old primary is offline, we don't
7345 # check the instance state
7346 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7347 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7348 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7349 msg="cannot recreate disks")
7352 self.disks = dict(self.op.disks)
7354 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7356 maxidx = max(self.disks.keys())
7357 if maxidx >= len(instance.disks):
7358 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7361 if ((self.op.nodes or self.op.iallocator) and
7362 sorted(self.disks.keys()) != range(len(instance.disks))):
7363 raise errors.OpPrereqError("Can't recreate disks partially and"
7364 " change the nodes at the same time",
7367 self.instance = instance
7369 if self.op.iallocator:
7370 self._RunAllocator()
7371 # Release unneeded node and node resource locks
7372 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7373 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7375 def Exec(self, feedback_fn):
7376 """Recreate the disks.
7379 instance = self.instance
7381 assert (self.owned_locks(locking.LEVEL_NODE) ==
7382 self.owned_locks(locking.LEVEL_NODE_RES))
7385 mods = [] # keeps track of needed changes
7387 for idx, disk in enumerate(instance.disks):
7389 changes = self.disks[idx]
7391 # Disk should not be recreated
7395 # update secondaries for disks, if needed
7396 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7397 # need to update the nodes and minors
7398 assert len(self.op.nodes) == 2
7399 assert len(disk.logical_id) == 6 # otherwise disk internals
7401 (_, _, old_port, _, _, old_secret) = disk.logical_id
7402 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7403 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7404 new_minors[0], new_minors[1], old_secret)
7405 assert len(disk.logical_id) == len(new_id)
7409 mods.append((idx, new_id, changes))
7411 # now that we have passed all asserts above, we can apply the mods
7412 # in a single run (to avoid partial changes)
7413 for idx, new_id, changes in mods:
7414 disk = instance.disks[idx]
7415 if new_id is not None:
7416 assert disk.dev_type == constants.LD_DRBD8
7417 disk.logical_id = new_id
7419 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7420 mode=changes.get(constants.IDISK_MODE, None))
7422 # change primary node, if needed
7424 instance.primary_node = self.op.nodes[0]
7425 self.LogWarning("Changing the instance's nodes, you will have to"
7426 " remove any disks left on the older nodes manually")
7429 self.cfg.Update(instance, feedback_fn)
7431 # All touched nodes must be locked
7432 mylocks = self.owned_locks(locking.LEVEL_NODE)
7433 assert mylocks.issuperset(frozenset(instance.all_nodes))
7434 _CreateDisks(self, instance, to_skip=to_skip)
7437 class LUInstanceRename(LogicalUnit):
7438 """Rename an instance.
7441 HPATH = "instance-rename"
7442 HTYPE = constants.HTYPE_INSTANCE
7444 def CheckArguments(self):
7448 if self.op.ip_check and not self.op.name_check:
7449 # TODO: make the ip check more flexible and not depend on the name check
7450 raise errors.OpPrereqError("IP address check requires a name check",
7453 def BuildHooksEnv(self):
7456 This runs on master, primary and secondary nodes of the instance.
7459 env = _BuildInstanceHookEnvByObject(self, self.instance)
7460 env["INSTANCE_NEW_NAME"] = self.op.new_name
7463 def BuildHooksNodes(self):
7464 """Build hooks nodes.
7467 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7470 def CheckPrereq(self):
7471 """Check prerequisites.
7473 This checks that the instance is in the cluster and is not running.
7476 self.op.instance_name = _ExpandInstanceName(self.cfg,
7477 self.op.instance_name)
7478 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7479 assert instance is not None
7480 _CheckNodeOnline(self, instance.primary_node)
7481 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7482 msg="cannot rename")
7483 self.instance = instance
7485 new_name = self.op.new_name
7486 if self.op.name_check:
7487 hostname = _CheckHostnameSane(self, new_name)
7488 new_name = self.op.new_name = hostname.name
7489 if (self.op.ip_check and
7490 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7491 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7492 (hostname.ip, new_name),
7493 errors.ECODE_NOTUNIQUE)
7495 instance_list = self.cfg.GetInstanceList()
7496 if new_name in instance_list and new_name != instance.name:
7497 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7498 new_name, errors.ECODE_EXISTS)
7500 def Exec(self, feedback_fn):
7501 """Rename the instance.
7504 inst = self.instance
7505 old_name = inst.name
7507 rename_file_storage = False
7508 if (inst.disk_template in constants.DTS_FILEBASED and
7509 self.op.new_name != inst.name):
7510 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7511 rename_file_storage = True
7513 self.cfg.RenameInstance(inst.name, self.op.new_name)
7514 # Change the instance lock. This is definitely safe while we hold the BGL.
7515 # Otherwise the new lock would have to be added in acquired mode.
7517 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7518 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7520 # re-read the instance from the configuration after rename
7521 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7523 if rename_file_storage:
7524 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7525 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7526 old_file_storage_dir,
7527 new_file_storage_dir)
7528 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7529 " (but the instance has been renamed in Ganeti)" %
7530 (inst.primary_node, old_file_storage_dir,
7531 new_file_storage_dir))
7533 _StartInstanceDisks(self, inst, None)
7534 # update info on disks
7535 info = _GetInstanceInfoText(inst)
7536 for (idx, disk) in enumerate(inst.disks):
7537 for node in inst.all_nodes:
7538 self.cfg.SetDiskID(disk, node)
7539 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7541 self.LogWarning("Error setting info on node %s for disk %s: %s",
7542 node, idx, result.fail_msg)
7544 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7545 old_name, self.op.debug_level)
7546 msg = result.fail_msg
7548 msg = ("Could not run OS rename script for instance %s on node %s"
7549 " (but the instance has been renamed in Ganeti): %s" %
7550 (inst.name, inst.primary_node, msg))
7551 self.proc.LogWarning(msg)
7553 _ShutdownInstanceDisks(self, inst)
7558 class LUInstanceRemove(LogicalUnit):
7559 """Remove an instance.
7562 HPATH = "instance-remove"
7563 HTYPE = constants.HTYPE_INSTANCE
7566 def ExpandNames(self):
7567 self._ExpandAndLockInstance()
7568 self.needed_locks[locking.LEVEL_NODE] = []
7569 self.needed_locks[locking.LEVEL_NODE_RES] = []
7570 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7572 def DeclareLocks(self, level):
7573 if level == locking.LEVEL_NODE:
7574 self._LockInstancesNodes()
7575 elif level == locking.LEVEL_NODE_RES:
7577 self.needed_locks[locking.LEVEL_NODE_RES] = \
7578 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7580 def BuildHooksEnv(self):
7583 This runs on master, primary and secondary nodes of the instance.
7586 env = _BuildInstanceHookEnvByObject(self, self.instance)
7587 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7590 def BuildHooksNodes(self):
7591 """Build hooks nodes.
7594 nl = [self.cfg.GetMasterNode()]
7595 nl_post = list(self.instance.all_nodes) + nl
7596 return (nl, nl_post)
7598 def CheckPrereq(self):
7599 """Check prerequisites.
7601 This checks that the instance is in the cluster.
7604 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7605 assert self.instance is not None, \
7606 "Cannot retrieve locked instance %s" % self.op.instance_name
7608 def Exec(self, feedback_fn):
7609 """Remove the instance.
7612 instance = self.instance
7613 logging.info("Shutting down instance %s on node %s",
7614 instance.name, instance.primary_node)
7616 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7617 self.op.shutdown_timeout)
7618 msg = result.fail_msg
7620 if self.op.ignore_failures:
7621 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7623 raise errors.OpExecError("Could not shutdown instance %s on"
7625 (instance.name, instance.primary_node, msg))
7627 assert (self.owned_locks(locking.LEVEL_NODE) ==
7628 self.owned_locks(locking.LEVEL_NODE_RES))
7629 assert not (set(instance.all_nodes) -
7630 self.owned_locks(locking.LEVEL_NODE)), \
7631 "Not owning correct locks"
7633 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7636 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7637 """Utility function to remove an instance.
7640 logging.info("Removing block devices for instance %s", instance.name)
7642 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7643 if not ignore_failures:
7644 raise errors.OpExecError("Can't remove instance's disks")
7645 feedback_fn("Warning: can't remove instance's disks")
7647 logging.info("Removing instance %s out of cluster config", instance.name)
7649 lu.cfg.RemoveInstance(instance.name)
7651 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7652 "Instance lock removal conflict"
7654 # Remove lock for the instance
7655 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7658 class LUInstanceQuery(NoHooksLU):
7659 """Logical unit for querying instances.
7662 # pylint: disable=W0142
7665 def CheckArguments(self):
7666 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7667 self.op.output_fields, self.op.use_locking)
7669 def ExpandNames(self):
7670 self.iq.ExpandNames(self)
7672 def DeclareLocks(self, level):
7673 self.iq.DeclareLocks(self, level)
7675 def Exec(self, feedback_fn):
7676 return self.iq.OldStyleQuery(self)
7679 class LUInstanceFailover(LogicalUnit):
7680 """Failover an instance.
7683 HPATH = "instance-failover"
7684 HTYPE = constants.HTYPE_INSTANCE
7687 def CheckArguments(self):
7688 """Check the arguments.
7691 self.iallocator = getattr(self.op, "iallocator", None)
7692 self.target_node = getattr(self.op, "target_node", None)
7694 def ExpandNames(self):
7695 self._ExpandAndLockInstance()
7697 if self.op.target_node is not None:
7698 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7700 self.needed_locks[locking.LEVEL_NODE] = []
7701 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7703 self.needed_locks[locking.LEVEL_NODE_RES] = []
7704 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7706 ignore_consistency = self.op.ignore_consistency
7707 shutdown_timeout = self.op.shutdown_timeout
7708 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7711 ignore_consistency=ignore_consistency,
7712 shutdown_timeout=shutdown_timeout,
7713 ignore_ipolicy=self.op.ignore_ipolicy)
7714 self.tasklets = [self._migrater]
7716 def DeclareLocks(self, level):
7717 if level == locking.LEVEL_NODE:
7718 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7719 if instance.disk_template in constants.DTS_EXT_MIRROR:
7720 if self.op.target_node is None:
7721 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7723 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7724 self.op.target_node]
7725 del self.recalculate_locks[locking.LEVEL_NODE]
7727 self._LockInstancesNodes()
7728 elif level == locking.LEVEL_NODE_RES:
7730 self.needed_locks[locking.LEVEL_NODE_RES] = \
7731 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7733 def BuildHooksEnv(self):
7736 This runs on master, primary and secondary nodes of the instance.
7739 instance = self._migrater.instance
7740 source_node = instance.primary_node
7741 target_node = self.op.target_node
7743 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7744 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7745 "OLD_PRIMARY": source_node,
7746 "NEW_PRIMARY": target_node,
7749 if instance.disk_template in constants.DTS_INT_MIRROR:
7750 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7751 env["NEW_SECONDARY"] = source_node
7753 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7755 env.update(_BuildInstanceHookEnvByObject(self, instance))
7759 def BuildHooksNodes(self):
7760 """Build hooks nodes.
7763 instance = self._migrater.instance
7764 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7765 return (nl, nl + [instance.primary_node])
7768 class LUInstanceMigrate(LogicalUnit):
7769 """Migrate an instance.
7771 This is migration without shutting down, compared to the failover,
7772 which is done with shutdown.
7775 HPATH = "instance-migrate"
7776 HTYPE = constants.HTYPE_INSTANCE
7779 def ExpandNames(self):
7780 self._ExpandAndLockInstance()
7782 if self.op.target_node is not None:
7783 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7785 self.needed_locks[locking.LEVEL_NODE] = []
7786 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7788 self.needed_locks[locking.LEVEL_NODE] = []
7789 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7792 TLMigrateInstance(self, self.op.instance_name,
7793 cleanup=self.op.cleanup,
7795 fallback=self.op.allow_failover,
7796 allow_runtime_changes=self.op.allow_runtime_changes,
7797 ignore_ipolicy=self.op.ignore_ipolicy)
7798 self.tasklets = [self._migrater]
7800 def DeclareLocks(self, level):
7801 if level == locking.LEVEL_NODE:
7802 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7803 if instance.disk_template in constants.DTS_EXT_MIRROR:
7804 if self.op.target_node is None:
7805 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7807 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7808 self.op.target_node]
7809 del self.recalculate_locks[locking.LEVEL_NODE]
7811 self._LockInstancesNodes()
7812 elif level == locking.LEVEL_NODE_RES:
7814 self.needed_locks[locking.LEVEL_NODE_RES] = \
7815 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7817 def BuildHooksEnv(self):
7820 This runs on master, primary and secondary nodes of the instance.
7823 instance = self._migrater.instance
7824 source_node = instance.primary_node
7825 target_node = self.op.target_node
7826 env = _BuildInstanceHookEnvByObject(self, instance)
7828 "MIGRATE_LIVE": self._migrater.live,
7829 "MIGRATE_CLEANUP": self.op.cleanup,
7830 "OLD_PRIMARY": source_node,
7831 "NEW_PRIMARY": target_node,
7832 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7835 if instance.disk_template in constants.DTS_INT_MIRROR:
7836 env["OLD_SECONDARY"] = target_node
7837 env["NEW_SECONDARY"] = source_node
7839 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7843 def BuildHooksNodes(self):
7844 """Build hooks nodes.
7847 instance = self._migrater.instance
7848 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7849 return (nl, nl + [instance.primary_node])
7852 class LUInstanceMove(LogicalUnit):
7853 """Move an instance by data-copying.
7856 HPATH = "instance-move"
7857 HTYPE = constants.HTYPE_INSTANCE
7860 def ExpandNames(self):
7861 self._ExpandAndLockInstance()
7862 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7863 self.op.target_node = target_node
7864 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7865 self.needed_locks[locking.LEVEL_NODE_RES] = []
7866 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7868 def DeclareLocks(self, level):
7869 if level == locking.LEVEL_NODE:
7870 self._LockInstancesNodes(primary_only=True)
7871 elif level == locking.LEVEL_NODE_RES:
7873 self.needed_locks[locking.LEVEL_NODE_RES] = \
7874 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7876 def BuildHooksEnv(self):
7879 This runs on master, primary and secondary nodes of the instance.
7883 "TARGET_NODE": self.op.target_node,
7884 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7886 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7889 def BuildHooksNodes(self):
7890 """Build hooks nodes.
7894 self.cfg.GetMasterNode(),
7895 self.instance.primary_node,
7896 self.op.target_node,
7900 def CheckPrereq(self):
7901 """Check prerequisites.
7903 This checks that the instance is in the cluster.
7906 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7907 assert self.instance is not None, \
7908 "Cannot retrieve locked instance %s" % self.op.instance_name
7910 node = self.cfg.GetNodeInfo(self.op.target_node)
7911 assert node is not None, \
7912 "Cannot retrieve locked node %s" % self.op.target_node
7914 self.target_node = target_node = node.name
7916 if target_node == instance.primary_node:
7917 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7918 (instance.name, target_node),
7921 bep = self.cfg.GetClusterInfo().FillBE(instance)
7923 for idx, dsk in enumerate(instance.disks):
7924 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7925 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7926 " cannot copy" % idx, errors.ECODE_STATE)
7928 _CheckNodeOnline(self, target_node)
7929 _CheckNodeNotDrained(self, target_node)
7930 _CheckNodeVmCapable(self, target_node)
7931 cluster = self.cfg.GetClusterInfo()
7932 group_info = self.cfg.GetNodeGroup(node.group)
7933 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7934 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7935 ignore=self.op.ignore_ipolicy)
7937 if instance.admin_state == constants.ADMINST_UP:
7938 # check memory requirements on the secondary node
7939 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7940 instance.name, bep[constants.BE_MAXMEM],
7941 instance.hypervisor)
7943 self.LogInfo("Not checking memory on the secondary node as"
7944 " instance will not be started")
7946 # check bridge existance
7947 _CheckInstanceBridgesExist(self, instance, node=target_node)
7949 def Exec(self, feedback_fn):
7950 """Move an instance.
7952 The move is done by shutting it down on its present node, copying
7953 the data over (slow) and starting it on the new node.
7956 instance = self.instance
7958 source_node = instance.primary_node
7959 target_node = self.target_node
7961 self.LogInfo("Shutting down instance %s on source node %s",
7962 instance.name, source_node)
7964 assert (self.owned_locks(locking.LEVEL_NODE) ==
7965 self.owned_locks(locking.LEVEL_NODE_RES))
7967 result = self.rpc.call_instance_shutdown(source_node, instance,
7968 self.op.shutdown_timeout)
7969 msg = result.fail_msg
7971 if self.op.ignore_consistency:
7972 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7973 " Proceeding anyway. Please make sure node"
7974 " %s is down. Error details: %s",
7975 instance.name, source_node, source_node, msg)
7977 raise errors.OpExecError("Could not shutdown instance %s on"
7979 (instance.name, source_node, msg))
7981 # create the target disks
7983 _CreateDisks(self, instance, target_node=target_node)
7984 except errors.OpExecError:
7985 self.LogWarning("Device creation failed, reverting...")
7987 _RemoveDisks(self, instance, target_node=target_node)
7989 self.cfg.ReleaseDRBDMinors(instance.name)
7992 cluster_name = self.cfg.GetClusterInfo().cluster_name
7995 # activate, get path, copy the data over
7996 for idx, disk in enumerate(instance.disks):
7997 self.LogInfo("Copying data for disk %d", idx)
7998 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7999 instance.name, True, idx)
8001 self.LogWarning("Can't assemble newly created disk %d: %s",
8002 idx, result.fail_msg)
8003 errs.append(result.fail_msg)
8005 dev_path = result.payload
8006 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8007 target_node, dev_path,
8010 self.LogWarning("Can't copy data over for disk %d: %s",
8011 idx, result.fail_msg)
8012 errs.append(result.fail_msg)
8016 self.LogWarning("Some disks failed to copy, aborting")
8018 _RemoveDisks(self, instance, target_node=target_node)
8020 self.cfg.ReleaseDRBDMinors(instance.name)
8021 raise errors.OpExecError("Errors during disk copy: %s" %
8024 instance.primary_node = target_node
8025 self.cfg.Update(instance, feedback_fn)
8027 self.LogInfo("Removing the disks on the original node")
8028 _RemoveDisks(self, instance, target_node=source_node)
8030 # Only start the instance if it's marked as up
8031 if instance.admin_state == constants.ADMINST_UP:
8032 self.LogInfo("Starting instance %s on node %s",
8033 instance.name, target_node)
8035 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8036 ignore_secondaries=True)
8038 _ShutdownInstanceDisks(self, instance)
8039 raise errors.OpExecError("Can't activate the instance's disks")
8041 result = self.rpc.call_instance_start(target_node,
8042 (instance, None, None), False)
8043 msg = result.fail_msg
8045 _ShutdownInstanceDisks(self, instance)
8046 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8047 (instance.name, target_node, msg))
8050 class LUNodeMigrate(LogicalUnit):
8051 """Migrate all instances from a node.
8054 HPATH = "node-migrate"
8055 HTYPE = constants.HTYPE_NODE
8058 def CheckArguments(self):
8061 def ExpandNames(self):
8062 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8064 self.share_locks = _ShareAll()
8065 self.needed_locks = {
8066 locking.LEVEL_NODE: [self.op.node_name],
8069 def BuildHooksEnv(self):
8072 This runs on the master, the primary and all the secondaries.
8076 "NODE_NAME": self.op.node_name,
8077 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8080 def BuildHooksNodes(self):
8081 """Build hooks nodes.
8084 nl = [self.cfg.GetMasterNode()]
8087 def CheckPrereq(self):
8090 def Exec(self, feedback_fn):
8091 # Prepare jobs for migration instances
8092 allow_runtime_changes = self.op.allow_runtime_changes
8094 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8097 iallocator=self.op.iallocator,
8098 target_node=self.op.target_node,
8099 allow_runtime_changes=allow_runtime_changes,
8100 ignore_ipolicy=self.op.ignore_ipolicy)]
8101 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8104 # TODO: Run iallocator in this opcode and pass correct placement options to
8105 # OpInstanceMigrate. Since other jobs can modify the cluster between
8106 # running the iallocator and the actual migration, a good consistency model
8107 # will have to be found.
8109 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8110 frozenset([self.op.node_name]))
8112 return ResultWithJobs(jobs)
8115 class TLMigrateInstance(Tasklet):
8116 """Tasklet class for instance migration.
8119 @ivar live: whether the migration will be done live or non-live;
8120 this variable is initalized only after CheckPrereq has run
8121 @type cleanup: boolean
8122 @ivar cleanup: Wheater we cleanup from a failed migration
8123 @type iallocator: string
8124 @ivar iallocator: The iallocator used to determine target_node
8125 @type target_node: string
8126 @ivar target_node: If given, the target_node to reallocate the instance to
8127 @type failover: boolean
8128 @ivar failover: Whether operation results in failover or migration
8129 @type fallback: boolean
8130 @ivar fallback: Whether fallback to failover is allowed if migration not
8132 @type ignore_consistency: boolean
8133 @ivar ignore_consistency: Wheter we should ignore consistency between source
8135 @type shutdown_timeout: int
8136 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8137 @type ignore_ipolicy: bool
8138 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8143 _MIGRATION_POLL_INTERVAL = 1 # seconds
8144 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8146 def __init__(self, lu, instance_name, cleanup=False,
8147 failover=False, fallback=False,
8148 ignore_consistency=False,
8149 allow_runtime_changes=True,
8150 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8151 ignore_ipolicy=False):
8152 """Initializes this class.
8155 Tasklet.__init__(self, lu)
8158 self.instance_name = instance_name
8159 self.cleanup = cleanup
8160 self.live = False # will be overridden later
8161 self.failover = failover
8162 self.fallback = fallback
8163 self.ignore_consistency = ignore_consistency
8164 self.shutdown_timeout = shutdown_timeout
8165 self.ignore_ipolicy = ignore_ipolicy
8166 self.allow_runtime_changes = allow_runtime_changes
8168 def CheckPrereq(self):
8169 """Check prerequisites.
8171 This checks that the instance is in the cluster.
8174 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8175 instance = self.cfg.GetInstanceInfo(instance_name)
8176 assert instance is not None
8177 self.instance = instance
8178 cluster = self.cfg.GetClusterInfo()
8180 if (not self.cleanup and
8181 not instance.admin_state == constants.ADMINST_UP and
8182 not self.failover and self.fallback):
8183 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8184 " switching to failover")
8185 self.failover = True
8187 if instance.disk_template not in constants.DTS_MIRRORED:
8192 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8193 " %s" % (instance.disk_template, text),
8196 if instance.disk_template in constants.DTS_EXT_MIRROR:
8197 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8199 if self.lu.op.iallocator:
8200 self._RunAllocator()
8202 # We set set self.target_node as it is required by
8204 self.target_node = self.lu.op.target_node
8206 # Check that the target node is correct in terms of instance policy
8207 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8208 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8209 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8211 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8212 ignore=self.ignore_ipolicy)
8214 # self.target_node is already populated, either directly or by the
8216 target_node = self.target_node
8217 if self.target_node == instance.primary_node:
8218 raise errors.OpPrereqError("Cannot migrate instance %s"
8219 " to its primary (%s)" %
8220 (instance.name, instance.primary_node),
8223 if len(self.lu.tasklets) == 1:
8224 # It is safe to release locks only when we're the only tasklet
8226 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8227 keep=[instance.primary_node, self.target_node])
8230 secondary_nodes = instance.secondary_nodes
8231 if not secondary_nodes:
8232 raise errors.ConfigurationError("No secondary node but using"
8233 " %s disk template" %
8234 instance.disk_template)
8235 target_node = secondary_nodes[0]
8236 if self.lu.op.iallocator or (self.lu.op.target_node and
8237 self.lu.op.target_node != target_node):
8239 text = "failed over"
8242 raise errors.OpPrereqError("Instances with disk template %s cannot"
8243 " be %s to arbitrary nodes"
8244 " (neither an iallocator nor a target"
8245 " node can be passed)" %
8246 (instance.disk_template, text),
8248 nodeinfo = self.cfg.GetNodeInfo(target_node)
8249 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8250 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8252 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8253 ignore=self.ignore_ipolicy)
8255 i_be = cluster.FillBE(instance)
8257 # check memory requirements on the secondary node
8258 if (not self.cleanup and
8259 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8260 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8261 "migrating instance %s" %
8263 i_be[constants.BE_MINMEM],
8264 instance.hypervisor)
8266 self.lu.LogInfo("Not checking memory on the secondary node as"
8267 " instance will not be started")
8269 # check if failover must be forced instead of migration
8270 if (not self.cleanup and not self.failover and
8271 i_be[constants.BE_ALWAYS_FAILOVER]):
8272 self.lu.LogInfo("Instance configured to always failover; fallback"
8274 self.failover = True
8276 # check bridge existance
8277 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8279 if not self.cleanup:
8280 _CheckNodeNotDrained(self.lu, target_node)
8281 if not self.failover:
8282 result = self.rpc.call_instance_migratable(instance.primary_node,
8284 if result.fail_msg and self.fallback:
8285 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8287 self.failover = True
8289 result.Raise("Can't migrate, please use failover",
8290 prereq=True, ecode=errors.ECODE_STATE)
8292 assert not (self.failover and self.cleanup)
8294 if not self.failover:
8295 if self.lu.op.live is not None and self.lu.op.mode is not None:
8296 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8297 " parameters are accepted",
8299 if self.lu.op.live is not None:
8301 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8303 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8304 # reset the 'live' parameter to None so that repeated
8305 # invocations of CheckPrereq do not raise an exception
8306 self.lu.op.live = None
8307 elif self.lu.op.mode is None:
8308 # read the default value from the hypervisor
8309 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8310 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8312 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8314 # Failover is never live
8317 if not (self.failover or self.cleanup):
8318 remote_info = self.rpc.call_instance_info(instance.primary_node,
8320 instance.hypervisor)
8321 remote_info.Raise("Error checking instance on node %s" %
8322 instance.primary_node)
8323 instance_running = bool(remote_info.payload)
8324 if instance_running:
8325 self.current_mem = int(remote_info.payload["memory"])
8327 def _RunAllocator(self):
8328 """Run the allocator based on input opcode.
8331 # FIXME: add a self.ignore_ipolicy option
8332 req = iallocator.IAReqRelocate(name=self.instance_name,
8333 relocate_from=[self.instance.primary_node])
8334 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8336 ial.Run(self.lu.op.iallocator)
8339 raise errors.OpPrereqError("Can't compute nodes using"
8340 " iallocator '%s': %s" %
8341 (self.lu.op.iallocator, ial.info),
8343 self.target_node = ial.result[0]
8344 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8345 self.instance_name, self.lu.op.iallocator,
8346 utils.CommaJoin(ial.result))
8348 def _WaitUntilSync(self):
8349 """Poll with custom rpc for disk sync.
8351 This uses our own step-based rpc call.
8354 self.feedback_fn("* wait until resync is done")
8358 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8360 (self.instance.disks,
8363 for node, nres in result.items():
8364 nres.Raise("Cannot resync disks on node %s" % node)
8365 node_done, node_percent = nres.payload
8366 all_done = all_done and node_done
8367 if node_percent is not None:
8368 min_percent = min(min_percent, node_percent)
8370 if min_percent < 100:
8371 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8374 def _EnsureSecondary(self, node):
8375 """Demote a node to secondary.
8378 self.feedback_fn("* switching node %s to secondary mode" % node)
8380 for dev in self.instance.disks:
8381 self.cfg.SetDiskID(dev, node)
8383 result = self.rpc.call_blockdev_close(node, self.instance.name,
8384 self.instance.disks)
8385 result.Raise("Cannot change disk to secondary on node %s" % node)
8387 def _GoStandalone(self):
8388 """Disconnect from the network.
8391 self.feedback_fn("* changing into standalone mode")
8392 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8393 self.instance.disks)
8394 for node, nres in result.items():
8395 nres.Raise("Cannot disconnect disks node %s" % node)
8397 def _GoReconnect(self, multimaster):
8398 """Reconnect to the network.
8404 msg = "single-master"
8405 self.feedback_fn("* changing disks into %s mode" % msg)
8406 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8407 (self.instance.disks, self.instance),
8408 self.instance.name, multimaster)
8409 for node, nres in result.items():
8410 nres.Raise("Cannot change disks config on node %s" % node)
8412 def _ExecCleanup(self):
8413 """Try to cleanup after a failed migration.
8415 The cleanup is done by:
8416 - check that the instance is running only on one node
8417 (and update the config if needed)
8418 - change disks on its secondary node to secondary
8419 - wait until disks are fully synchronized
8420 - disconnect from the network
8421 - change disks into single-master mode
8422 - wait again until disks are fully synchronized
8425 instance = self.instance
8426 target_node = self.target_node
8427 source_node = self.source_node
8429 # check running on only one node
8430 self.feedback_fn("* checking where the instance actually runs"
8431 " (if this hangs, the hypervisor might be in"
8433 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8434 for node, result in ins_l.items():
8435 result.Raise("Can't contact node %s" % node)
8437 runningon_source = instance.name in ins_l[source_node].payload
8438 runningon_target = instance.name in ins_l[target_node].payload
8440 if runningon_source and runningon_target:
8441 raise errors.OpExecError("Instance seems to be running on two nodes,"
8442 " or the hypervisor is confused; you will have"
8443 " to ensure manually that it runs only on one"
8444 " and restart this operation")
8446 if not (runningon_source or runningon_target):
8447 raise errors.OpExecError("Instance does not seem to be running at all;"
8448 " in this case it's safer to repair by"
8449 " running 'gnt-instance stop' to ensure disk"
8450 " shutdown, and then restarting it")
8452 if runningon_target:
8453 # the migration has actually succeeded, we need to update the config
8454 self.feedback_fn("* instance running on secondary node (%s),"
8455 " updating config" % target_node)
8456 instance.primary_node = target_node
8457 self.cfg.Update(instance, self.feedback_fn)
8458 demoted_node = source_node
8460 self.feedback_fn("* instance confirmed to be running on its"
8461 " primary node (%s)" % source_node)
8462 demoted_node = target_node
8464 if instance.disk_template in constants.DTS_INT_MIRROR:
8465 self._EnsureSecondary(demoted_node)
8467 self._WaitUntilSync()
8468 except errors.OpExecError:
8469 # we ignore here errors, since if the device is standalone, it
8470 # won't be able to sync
8472 self._GoStandalone()
8473 self._GoReconnect(False)
8474 self._WaitUntilSync()
8476 self.feedback_fn("* done")
8478 def _RevertDiskStatus(self):
8479 """Try to revert the disk status after a failed migration.
8482 target_node = self.target_node
8483 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8487 self._EnsureSecondary(target_node)
8488 self._GoStandalone()
8489 self._GoReconnect(False)
8490 self._WaitUntilSync()
8491 except errors.OpExecError, err:
8492 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8493 " please try to recover the instance manually;"
8494 " error '%s'" % str(err))
8496 def _AbortMigration(self):
8497 """Call the hypervisor code to abort a started migration.
8500 instance = self.instance
8501 target_node = self.target_node
8502 source_node = self.source_node
8503 migration_info = self.migration_info
8505 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8509 abort_msg = abort_result.fail_msg
8511 logging.error("Aborting migration failed on target node %s: %s",
8512 target_node, abort_msg)
8513 # Don't raise an exception here, as we stil have to try to revert the
8514 # disk status, even if this step failed.
8516 abort_result = self.rpc.call_instance_finalize_migration_src(
8517 source_node, instance, False, self.live)
8518 abort_msg = abort_result.fail_msg
8520 logging.error("Aborting migration failed on source node %s: %s",
8521 source_node, abort_msg)
8523 def _ExecMigration(self):
8524 """Migrate an instance.
8526 The migrate is done by:
8527 - change the disks into dual-master mode
8528 - wait until disks are fully synchronized again
8529 - migrate the instance
8530 - change disks on the new secondary node (the old primary) to secondary
8531 - wait until disks are fully synchronized
8532 - change disks into single-master mode
8535 instance = self.instance
8536 target_node = self.target_node
8537 source_node = self.source_node
8539 # Check for hypervisor version mismatch and warn the user.
8540 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8541 None, [self.instance.hypervisor])
8542 for ninfo in nodeinfo.values():
8543 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8545 (_, _, (src_info, )) = nodeinfo[source_node].payload
8546 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8548 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8549 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8550 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8551 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8552 if src_version != dst_version:
8553 self.feedback_fn("* warning: hypervisor version mismatch between"
8554 " source (%s) and target (%s) node" %
8555 (src_version, dst_version))
8557 self.feedback_fn("* checking disk consistency between source and target")
8558 for (idx, dev) in enumerate(instance.disks):
8559 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8560 raise errors.OpExecError("Disk %s is degraded or not fully"
8561 " synchronized on target node,"
8562 " aborting migration" % idx)
8564 if self.current_mem > self.tgt_free_mem:
8565 if not self.allow_runtime_changes:
8566 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8567 " free memory to fit instance %s on target"
8568 " node %s (have %dMB, need %dMB)" %
8569 (instance.name, target_node,
8570 self.tgt_free_mem, self.current_mem))
8571 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8572 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8575 rpcres.Raise("Cannot modify instance runtime memory")
8577 # First get the migration information from the remote node
8578 result = self.rpc.call_migration_info(source_node, instance)
8579 msg = result.fail_msg
8581 log_err = ("Failed fetching source migration information from %s: %s" %
8583 logging.error(log_err)
8584 raise errors.OpExecError(log_err)
8586 self.migration_info = migration_info = result.payload
8588 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8589 # Then switch the disks to master/master mode
8590 self._EnsureSecondary(target_node)
8591 self._GoStandalone()
8592 self._GoReconnect(True)
8593 self._WaitUntilSync()
8595 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8596 result = self.rpc.call_accept_instance(target_node,
8599 self.nodes_ip[target_node])
8601 msg = result.fail_msg
8603 logging.error("Instance pre-migration failed, trying to revert"
8604 " disk status: %s", msg)
8605 self.feedback_fn("Pre-migration failed, aborting")
8606 self._AbortMigration()
8607 self._RevertDiskStatus()
8608 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8609 (instance.name, msg))
8611 self.feedback_fn("* migrating instance to %s" % target_node)
8612 result = self.rpc.call_instance_migrate(source_node, instance,
8613 self.nodes_ip[target_node],
8615 msg = result.fail_msg
8617 logging.error("Instance migration failed, trying to revert"
8618 " disk status: %s", msg)
8619 self.feedback_fn("Migration failed, aborting")
8620 self._AbortMigration()
8621 self._RevertDiskStatus()
8622 raise errors.OpExecError("Could not migrate instance %s: %s" %
8623 (instance.name, msg))
8625 self.feedback_fn("* starting memory transfer")
8626 last_feedback = time.time()
8628 result = self.rpc.call_instance_get_migration_status(source_node,
8630 msg = result.fail_msg
8631 ms = result.payload # MigrationStatus instance
8632 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8633 logging.error("Instance migration failed, trying to revert"
8634 " disk status: %s", msg)
8635 self.feedback_fn("Migration failed, aborting")
8636 self._AbortMigration()
8637 self._RevertDiskStatus()
8639 msg = "hypervisor returned failure"
8640 raise errors.OpExecError("Could not migrate instance %s: %s" %
8641 (instance.name, msg))
8643 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8644 self.feedback_fn("* memory transfer complete")
8647 if (utils.TimeoutExpired(last_feedback,
8648 self._MIGRATION_FEEDBACK_INTERVAL) and
8649 ms.transferred_ram is not None):
8650 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8651 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8652 last_feedback = time.time()
8654 time.sleep(self._MIGRATION_POLL_INTERVAL)
8656 result = self.rpc.call_instance_finalize_migration_src(source_node,
8660 msg = result.fail_msg
8662 logging.error("Instance migration succeeded, but finalization failed"
8663 " on the source node: %s", msg)
8664 raise errors.OpExecError("Could not finalize instance migration: %s" %
8667 instance.primary_node = target_node
8669 # distribute new instance config to the other nodes
8670 self.cfg.Update(instance, self.feedback_fn)
8672 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8676 msg = result.fail_msg
8678 logging.error("Instance migration succeeded, but finalization failed"
8679 " on the target node: %s", msg)
8680 raise errors.OpExecError("Could not finalize instance migration: %s" %
8683 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8684 self._EnsureSecondary(source_node)
8685 self._WaitUntilSync()
8686 self._GoStandalone()
8687 self._GoReconnect(False)
8688 self._WaitUntilSync()
8690 # If the instance's disk template is `rbd' and there was a successful
8691 # migration, unmap the device from the source node.
8692 if self.instance.disk_template == constants.DT_RBD:
8693 disks = _ExpandCheckDisks(instance, instance.disks)
8694 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8696 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8697 msg = result.fail_msg
8699 logging.error("Migration was successful, but couldn't unmap the"
8700 " block device %s on source node %s: %s",
8701 disk.iv_name, source_node, msg)
8702 logging.error("You need to unmap the device %s manually on %s",
8703 disk.iv_name, source_node)
8705 self.feedback_fn("* done")
8707 def _ExecFailover(self):
8708 """Failover an instance.
8710 The failover is done by shutting it down on its present node and
8711 starting it on the secondary.
8714 instance = self.instance
8715 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8717 source_node = instance.primary_node
8718 target_node = self.target_node
8720 if instance.admin_state == constants.ADMINST_UP:
8721 self.feedback_fn("* checking disk consistency between source and target")
8722 for (idx, dev) in enumerate(instance.disks):
8723 # for drbd, these are drbd over lvm
8724 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8726 if primary_node.offline:
8727 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8729 (primary_node.name, idx, target_node))
8730 elif not self.ignore_consistency:
8731 raise errors.OpExecError("Disk %s is degraded on target node,"
8732 " aborting failover" % idx)
8734 self.feedback_fn("* not checking disk consistency as instance is not"
8737 self.feedback_fn("* shutting down instance on source node")
8738 logging.info("Shutting down instance %s on node %s",
8739 instance.name, source_node)
8741 result = self.rpc.call_instance_shutdown(source_node, instance,
8742 self.shutdown_timeout)
8743 msg = result.fail_msg
8745 if self.ignore_consistency or primary_node.offline:
8746 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8747 " proceeding anyway; please make sure node"
8748 " %s is down; error details: %s",
8749 instance.name, source_node, source_node, msg)
8751 raise errors.OpExecError("Could not shutdown instance %s on"
8753 (instance.name, source_node, msg))
8755 self.feedback_fn("* deactivating the instance's disks on source node")
8756 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8757 raise errors.OpExecError("Can't shut down the instance's disks")
8759 instance.primary_node = target_node
8760 # distribute new instance config to the other nodes
8761 self.cfg.Update(instance, self.feedback_fn)
8763 # Only start the instance if it's marked as up
8764 if instance.admin_state == constants.ADMINST_UP:
8765 self.feedback_fn("* activating the instance's disks on target node %s" %
8767 logging.info("Starting instance %s on node %s",
8768 instance.name, target_node)
8770 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8771 ignore_secondaries=True)
8773 _ShutdownInstanceDisks(self.lu, instance)
8774 raise errors.OpExecError("Can't activate the instance's disks")
8776 self.feedback_fn("* starting the instance on the target node %s" %
8778 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8780 msg = result.fail_msg
8782 _ShutdownInstanceDisks(self.lu, instance)
8783 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8784 (instance.name, target_node, msg))
8786 def Exec(self, feedback_fn):
8787 """Perform the migration.
8790 self.feedback_fn = feedback_fn
8791 self.source_node = self.instance.primary_node
8793 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8794 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8795 self.target_node = self.instance.secondary_nodes[0]
8796 # Otherwise self.target_node has been populated either
8797 # directly, or through an iallocator.
8799 self.all_nodes = [self.source_node, self.target_node]
8800 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8801 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8804 feedback_fn("Failover instance %s" % self.instance.name)
8805 self._ExecFailover()
8807 feedback_fn("Migrating instance %s" % self.instance.name)
8810 return self._ExecCleanup()
8812 return self._ExecMigration()
8815 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8817 """Wrapper around L{_CreateBlockDevInner}.
8819 This method annotates the root device first.
8822 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8823 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8827 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8829 """Create a tree of block devices on a given node.
8831 If this device type has to be created on secondaries, create it and
8834 If not, just recurse to children keeping the same 'force' value.
8836 @attention: The device has to be annotated already.
8838 @param lu: the lu on whose behalf we execute
8839 @param node: the node on which to create the device
8840 @type instance: L{objects.Instance}
8841 @param instance: the instance which owns the device
8842 @type device: L{objects.Disk}
8843 @param device: the device to create
8844 @type force_create: boolean
8845 @param force_create: whether to force creation of this device; this
8846 will be change to True whenever we find a device which has
8847 CreateOnSecondary() attribute
8848 @param info: the extra 'metadata' we should attach to the device
8849 (this will be represented as a LVM tag)
8850 @type force_open: boolean
8851 @param force_open: this parameter will be passes to the
8852 L{backend.BlockdevCreate} function where it specifies
8853 whether we run on primary or not, and it affects both
8854 the child assembly and the device own Open() execution
8857 if device.CreateOnSecondary():
8861 for child in device.children:
8862 _CreateBlockDevInner(lu, node, instance, child, force_create,
8865 if not force_create:
8868 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8871 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8872 """Create a single block device on a given node.
8874 This will not recurse over children of the device, so they must be
8877 @param lu: the lu on whose behalf we execute
8878 @param node: the node on which to create the device
8879 @type instance: L{objects.Instance}
8880 @param instance: the instance which owns the device
8881 @type device: L{objects.Disk}
8882 @param device: the device to create
8883 @param info: the extra 'metadata' we should attach to the device
8884 (this will be represented as a LVM tag)
8885 @type force_open: boolean
8886 @param force_open: this parameter will be passes to the
8887 L{backend.BlockdevCreate} function where it specifies
8888 whether we run on primary or not, and it affects both
8889 the child assembly and the device own Open() execution
8892 lu.cfg.SetDiskID(device, node)
8893 result = lu.rpc.call_blockdev_create(node, device, device.size,
8894 instance.name, force_open, info)
8895 result.Raise("Can't create block device %s on"
8896 " node %s for instance %s" % (device, node, instance.name))
8897 if device.physical_id is None:
8898 device.physical_id = result.payload
8901 def _GenerateUniqueNames(lu, exts):
8902 """Generate a suitable LV name.
8904 This will generate a logical volume name for the given instance.
8909 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8910 results.append("%s%s" % (new_id, val))
8914 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8915 iv_name, p_minor, s_minor):
8916 """Generate a drbd8 device complete with its children.
8919 assert len(vgnames) == len(names) == 2
8920 port = lu.cfg.AllocatePort()
8921 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8923 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8924 logical_id=(vgnames[0], names[0]),
8926 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8927 size=constants.DRBD_META_SIZE,
8928 logical_id=(vgnames[1], names[1]),
8930 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8931 logical_id=(primary, secondary, port,
8934 children=[dev_data, dev_meta],
8935 iv_name=iv_name, params={})
8939 _DISK_TEMPLATE_NAME_PREFIX = {
8940 constants.DT_PLAIN: "",
8941 constants.DT_RBD: ".rbd",
8945 _DISK_TEMPLATE_DEVICE_TYPE = {
8946 constants.DT_PLAIN: constants.LD_LV,
8947 constants.DT_FILE: constants.LD_FILE,
8948 constants.DT_SHARED_FILE: constants.LD_FILE,
8949 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8950 constants.DT_RBD: constants.LD_RBD,
8954 def _GenerateDiskTemplate(
8955 lu, template_name, instance_name, primary_node, secondary_nodes,
8956 disk_info, file_storage_dir, file_driver, base_index,
8957 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8958 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8959 """Generate the entire disk layout for a given template type.
8962 #TODO: compute space requirements
8964 vgname = lu.cfg.GetVGName()
8965 disk_count = len(disk_info)
8968 if template_name == constants.DT_DISKLESS:
8970 elif template_name == constants.DT_DRBD8:
8971 if len(secondary_nodes) != 1:
8972 raise errors.ProgrammerError("Wrong template configuration")
8973 remote_node = secondary_nodes[0]
8974 minors = lu.cfg.AllocateDRBDMinor(
8975 [primary_node, remote_node] * len(disk_info), instance_name)
8977 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8979 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8982 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8983 for i in range(disk_count)]):
8984 names.append(lv_prefix + "_data")
8985 names.append(lv_prefix + "_meta")
8986 for idx, disk in enumerate(disk_info):
8987 disk_index = idx + base_index
8988 data_vg = disk.get(constants.IDISK_VG, vgname)
8989 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8990 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8991 disk[constants.IDISK_SIZE],
8993 names[idx * 2:idx * 2 + 2],
8994 "disk/%d" % disk_index,
8995 minors[idx * 2], minors[idx * 2 + 1])
8996 disk_dev.mode = disk[constants.IDISK_MODE]
8997 disks.append(disk_dev)
9000 raise errors.ProgrammerError("Wrong template configuration")
9002 if template_name == constants.DT_FILE:
9004 elif template_name == constants.DT_SHARED_FILE:
9005 _req_shr_file_storage()
9007 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9008 if name_prefix is None:
9011 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9012 (name_prefix, base_index + i)
9013 for i in range(disk_count)])
9015 if template_name == constants.DT_PLAIN:
9016 def logical_id_fn(idx, _, disk):
9017 vg = disk.get(constants.IDISK_VG, vgname)
9018 return (vg, names[idx])
9019 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9021 lambda _, disk_index, disk: (file_driver,
9022 "%s/disk%d" % (file_storage_dir,
9024 elif template_name == constants.DT_BLOCK:
9026 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9027 disk[constants.IDISK_ADOPT])
9028 elif template_name == constants.DT_RBD:
9029 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9031 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9033 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9035 for idx, disk in enumerate(disk_info):
9036 disk_index = idx + base_index
9037 size = disk[constants.IDISK_SIZE]
9038 feedback_fn("* disk %s, size %s" %
9039 (disk_index, utils.FormatUnit(size, "h")))
9040 disks.append(objects.Disk(dev_type=dev_type, size=size,
9041 logical_id=logical_id_fn(idx, disk_index, disk),
9042 iv_name="disk/%d" % disk_index,
9043 mode=disk[constants.IDISK_MODE],
9049 def _GetInstanceInfoText(instance):
9050 """Compute that text that should be added to the disk's metadata.
9053 return "originstname+%s" % instance.name
9056 def _CalcEta(time_taken, written, total_size):
9057 """Calculates the ETA based on size written and total size.
9059 @param time_taken: The time taken so far
9060 @param written: amount written so far
9061 @param total_size: The total size of data to be written
9062 @return: The remaining time in seconds
9065 avg_time = time_taken / float(written)
9066 return (total_size - written) * avg_time
9069 def _WipeDisks(lu, instance, disks=None):
9070 """Wipes instance disks.
9072 @type lu: L{LogicalUnit}
9073 @param lu: the logical unit on whose behalf we execute
9074 @type instance: L{objects.Instance}
9075 @param instance: the instance whose disks we should create
9076 @return: the success of the wipe
9079 node = instance.primary_node
9082 disks = [(idx, disk, 0)
9083 for (idx, disk) in enumerate(instance.disks)]
9085 for (_, device, _) in disks:
9086 lu.cfg.SetDiskID(device, node)
9088 logging.info("Pausing synchronization of disks of instance '%s'",
9090 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9091 (map(compat.snd, disks),
9094 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9096 for idx, success in enumerate(result.payload):
9098 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9099 " failed", idx, instance.name)
9102 for (idx, device, offset) in disks:
9103 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9104 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9106 int(min(constants.MAX_WIPE_CHUNK,
9107 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9111 start_time = time.time()
9116 info_text = (" (from %s to %s)" %
9117 (utils.FormatUnit(offset, "h"),
9118 utils.FormatUnit(size, "h")))
9120 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9122 logging.info("Wiping disk %d for instance %s on node %s using"
9123 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9125 while offset < size:
9126 wipe_size = min(wipe_chunk_size, size - offset)
9128 logging.debug("Wiping disk %d, offset %s, chunk %s",
9129 idx, offset, wipe_size)
9131 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9133 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9134 (idx, offset, wipe_size))
9138 if now - last_output >= 60:
9139 eta = _CalcEta(now - start_time, offset, size)
9140 lu.LogInfo(" - done: %.1f%% ETA: %s",
9141 offset / float(size) * 100, utils.FormatSeconds(eta))
9144 logging.info("Resuming synchronization of disks for instance '%s'",
9147 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9148 (map(compat.snd, disks),
9153 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9154 node, result.fail_msg)
9156 for idx, success in enumerate(result.payload):
9158 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9159 " failed", idx, instance.name)
9162 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9163 """Create all disks for an instance.
9165 This abstracts away some work from AddInstance.
9167 @type lu: L{LogicalUnit}
9168 @param lu: the logical unit on whose behalf we execute
9169 @type instance: L{objects.Instance}
9170 @param instance: the instance whose disks we should create
9172 @param to_skip: list of indices to skip
9173 @type target_node: string
9174 @param target_node: if passed, overrides the target node for creation
9176 @return: the success of the creation
9179 info = _GetInstanceInfoText(instance)
9180 if target_node is None:
9181 pnode = instance.primary_node
9182 all_nodes = instance.all_nodes
9187 if instance.disk_template in constants.DTS_FILEBASED:
9188 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9189 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9191 result.Raise("Failed to create directory '%s' on"
9192 " node %s" % (file_storage_dir, pnode))
9194 # Note: this needs to be kept in sync with adding of disks in
9195 # LUInstanceSetParams
9196 for idx, device in enumerate(instance.disks):
9197 if to_skip and idx in to_skip:
9199 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9201 for node in all_nodes:
9202 f_create = node == pnode
9203 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9206 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9207 """Remove all disks for an instance.
9209 This abstracts away some work from `AddInstance()` and
9210 `RemoveInstance()`. Note that in case some of the devices couldn't
9211 be removed, the removal will continue with the other ones (compare
9212 with `_CreateDisks()`).
9214 @type lu: L{LogicalUnit}
9215 @param lu: the logical unit on whose behalf we execute
9216 @type instance: L{objects.Instance}
9217 @param instance: the instance whose disks we should remove
9218 @type target_node: string
9219 @param target_node: used to override the node on which to remove the disks
9221 @return: the success of the removal
9224 logging.info("Removing block devices for instance %s", instance.name)
9227 ports_to_release = set()
9228 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9229 for (idx, device) in enumerate(anno_disks):
9231 edata = [(target_node, device)]
9233 edata = device.ComputeNodeTree(instance.primary_node)
9234 for node, disk in edata:
9235 lu.cfg.SetDiskID(disk, node)
9236 result = lu.rpc.call_blockdev_remove(node, disk)
9238 lu.LogWarning("Could not remove disk %s on node %s,"
9239 " continuing anyway: %s", idx, node, result.fail_msg)
9240 if not (result.offline and node != instance.primary_node):
9243 # if this is a DRBD disk, return its port to the pool
9244 if device.dev_type in constants.LDS_DRBD:
9245 ports_to_release.add(device.logical_id[2])
9247 if all_result or ignore_failures:
9248 for port in ports_to_release:
9249 lu.cfg.AddTcpUdpPort(port)
9251 if instance.disk_template in constants.DTS_FILEBASED:
9252 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9256 tgt = instance.primary_node
9257 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9259 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9260 file_storage_dir, instance.primary_node, result.fail_msg)
9266 def _ComputeDiskSizePerVG(disk_template, disks):
9267 """Compute disk size requirements in the volume group
9270 def _compute(disks, payload):
9271 """Universal algorithm.
9276 vgs[disk[constants.IDISK_VG]] = \
9277 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9281 # Required free disk space as a function of disk and swap space
9283 constants.DT_DISKLESS: {},
9284 constants.DT_PLAIN: _compute(disks, 0),
9285 # 128 MB are added for drbd metadata for each disk
9286 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9287 constants.DT_FILE: {},
9288 constants.DT_SHARED_FILE: {},
9291 if disk_template not in req_size_dict:
9292 raise errors.ProgrammerError("Disk template '%s' size requirement"
9293 " is unknown" % disk_template)
9295 return req_size_dict[disk_template]
9298 def _FilterVmNodes(lu, nodenames):
9299 """Filters out non-vm_capable nodes from a list.
9301 @type lu: L{LogicalUnit}
9302 @param lu: the logical unit for which we check
9303 @type nodenames: list
9304 @param nodenames: the list of nodes on which we should check
9306 @return: the list of vm-capable nodes
9309 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9310 return [name for name in nodenames if name not in vm_nodes]
9313 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9314 """Hypervisor parameter validation.
9316 This function abstract the hypervisor parameter validation to be
9317 used in both instance create and instance modify.
9319 @type lu: L{LogicalUnit}
9320 @param lu: the logical unit for which we check
9321 @type nodenames: list
9322 @param nodenames: the list of nodes on which we should check
9323 @type hvname: string
9324 @param hvname: the name of the hypervisor we should use
9325 @type hvparams: dict
9326 @param hvparams: the parameters which we need to check
9327 @raise errors.OpPrereqError: if the parameters are not valid
9330 nodenames = _FilterVmNodes(lu, nodenames)
9332 cluster = lu.cfg.GetClusterInfo()
9333 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9335 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9336 for node in nodenames:
9340 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9343 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9344 """OS parameters validation.
9346 @type lu: L{LogicalUnit}
9347 @param lu: the logical unit for which we check
9348 @type required: boolean
9349 @param required: whether the validation should fail if the OS is not
9351 @type nodenames: list
9352 @param nodenames: the list of nodes on which we should check
9353 @type osname: string
9354 @param osname: the name of the hypervisor we should use
9355 @type osparams: dict
9356 @param osparams: the parameters which we need to check
9357 @raise errors.OpPrereqError: if the parameters are not valid
9360 nodenames = _FilterVmNodes(lu, nodenames)
9361 result = lu.rpc.call_os_validate(nodenames, required, osname,
9362 [constants.OS_VALIDATE_PARAMETERS],
9364 for node, nres in result.items():
9365 # we don't check for offline cases since this should be run only
9366 # against the master node and/or an instance's nodes
9367 nres.Raise("OS Parameters validation failed on node %s" % node)
9368 if not nres.payload:
9369 lu.LogInfo("OS %s not found on node %s, validation skipped",
9373 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9374 """Wrapper around IAReqInstanceAlloc.
9376 @param op: The instance opcode
9377 @param disks: The computed disks
9378 @param nics: The computed nics
9379 @param beparams: The full filled beparams
9381 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9384 spindle_use = beparams[constants.BE_SPINDLE_USE]
9385 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9386 disk_template=op.disk_template,
9389 vcpus=beparams[constants.BE_VCPUS],
9390 memory=beparams[constants.BE_MAXMEM],
9391 spindle_use=spindle_use,
9393 nics=[n.ToDict() for n in nics],
9394 hypervisor=op.hypervisor)
9397 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9398 """Computes the nics.
9400 @param op: The instance opcode
9401 @param cluster: Cluster configuration object
9402 @param default_ip: The default ip to assign
9403 @param cfg: An instance of the configuration object
9404 @param proc: The executer instance
9406 @returns: The build up nics
9410 for idx, nic in enumerate(op.nics):
9411 nic_mode_req = nic.get(constants.INIC_MODE, None)
9412 nic_mode = nic_mode_req
9413 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9414 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9416 net = nic.get(constants.INIC_NETWORK, None)
9417 link = nic.get(constants.NIC_LINK, None)
9418 ip = nic.get(constants.INIC_IP, None)
9420 if net is None or net.lower() == constants.VALUE_NONE:
9423 if nic_mode_req is not None or link is not None:
9424 raise errors.OpPrereqError("If network is given, no mode or link"
9425 " is allowed to be passed",
9428 # ip validity checks
9429 if ip is None or ip.lower() == constants.VALUE_NONE:
9431 elif ip.lower() == constants.VALUE_AUTO:
9432 if not op.name_check:
9433 raise errors.OpPrereqError("IP address set to auto but name checks"
9434 " have been skipped",
9438 # We defer pool operations until later, so that the iallocator has
9439 # filled in the instance's node(s) dimara
9440 if ip.lower() == constants.NIC_IP_POOL:
9442 raise errors.OpPrereqError("if ip=pool, parameter network"
9443 " must be passed too",
9446 elif not netutils.IPAddress.IsValid(ip):
9447 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9452 # TODO: check the ip address for uniqueness
9453 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9454 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9457 # MAC address verification
9458 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9459 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9460 mac = utils.NormalizeAndValidateMac(mac)
9463 # TODO: We need to factor this out
9464 cfg.ReserveMAC(mac, proc.GetECId())
9465 except errors.ReservationError:
9466 raise errors.OpPrereqError("MAC address %s already in use"
9467 " in cluster" % mac,
9468 errors.ECODE_NOTUNIQUE)
9470 # Build nic parameters
9473 nicparams[constants.NIC_MODE] = nic_mode
9475 nicparams[constants.NIC_LINK] = link
9477 check_params = cluster.SimpleFillNIC(nicparams)
9478 objects.NIC.CheckParameterSyntax(check_params)
9479 nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9484 def _ComputeDisks(op, default_vg):
9485 """Computes the instance disks.
9487 @param op: The instance opcode
9488 @param default_vg: The default_vg to assume
9490 @return: The computer disks
9494 for disk in op.disks:
9495 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9496 if mode not in constants.DISK_ACCESS_SET:
9497 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9498 mode, errors.ECODE_INVAL)
9499 size = disk.get(constants.IDISK_SIZE, None)
9501 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9504 except (TypeError, ValueError):
9505 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9508 data_vg = disk.get(constants.IDISK_VG, default_vg)
9510 constants.IDISK_SIZE: size,
9511 constants.IDISK_MODE: mode,
9512 constants.IDISK_VG: data_vg,
9514 if constants.IDISK_METAVG in disk:
9515 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9516 if constants.IDISK_ADOPT in disk:
9517 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9518 disks.append(new_disk)
9523 def _ComputeFullBeParams(op, cluster):
9524 """Computes the full beparams.
9526 @param op: The instance opcode
9527 @param cluster: The cluster config object
9529 @return: The fully filled beparams
9532 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9533 for param, value in op.beparams.iteritems():
9534 if value == constants.VALUE_AUTO:
9535 op.beparams[param] = default_beparams[param]
9536 objects.UpgradeBeParams(op.beparams)
9537 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9538 return cluster.SimpleFillBE(op.beparams)
9541 class LUInstanceCreate(LogicalUnit):
9542 """Create an instance.
9545 HPATH = "instance-add"
9546 HTYPE = constants.HTYPE_INSTANCE
9549 def CheckArguments(self):
9553 # do not require name_check to ease forward/backward compatibility
9555 if self.op.no_install and self.op.start:
9556 self.LogInfo("No-installation mode selected, disabling startup")
9557 self.op.start = False
9558 # validate/normalize the instance name
9559 self.op.instance_name = \
9560 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9562 if self.op.ip_check and not self.op.name_check:
9563 # TODO: make the ip check more flexible and not depend on the name check
9564 raise errors.OpPrereqError("Cannot do IP address check without a name"
9565 " check", errors.ECODE_INVAL)
9567 # check nics' parameter names
9568 for nic in self.op.nics:
9569 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9571 # check disks. parameter names and consistent adopt/no-adopt strategy
9572 has_adopt = has_no_adopt = False
9573 for disk in self.op.disks:
9574 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9575 if constants.IDISK_ADOPT in disk:
9579 if has_adopt and has_no_adopt:
9580 raise errors.OpPrereqError("Either all disks are adopted or none is",
9583 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9584 raise errors.OpPrereqError("Disk adoption is not supported for the"
9585 " '%s' disk template" %
9586 self.op.disk_template,
9588 if self.op.iallocator is not None:
9589 raise errors.OpPrereqError("Disk adoption not allowed with an"
9590 " iallocator script", errors.ECODE_INVAL)
9591 if self.op.mode == constants.INSTANCE_IMPORT:
9592 raise errors.OpPrereqError("Disk adoption not allowed for"
9593 " instance import", errors.ECODE_INVAL)
9595 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9596 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9597 " but no 'adopt' parameter given" %
9598 self.op.disk_template,
9601 self.adopt_disks = has_adopt
9603 # instance name verification
9604 if self.op.name_check:
9605 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9606 self.op.instance_name = self.hostname1.name
9607 # used in CheckPrereq for ip ping check
9608 self.check_ip = self.hostname1.ip
9610 self.check_ip = None
9612 # file storage checks
9613 if (self.op.file_driver and
9614 not self.op.file_driver in constants.FILE_DRIVER):
9615 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9616 self.op.file_driver, errors.ECODE_INVAL)
9618 if self.op.disk_template == constants.DT_FILE:
9619 opcodes.RequireFileStorage()
9620 elif self.op.disk_template == constants.DT_SHARED_FILE:
9621 opcodes.RequireSharedFileStorage()
9623 ### Node/iallocator related checks
9624 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9626 if self.op.pnode is not None:
9627 if self.op.disk_template in constants.DTS_INT_MIRROR:
9628 if self.op.snode is None:
9629 raise errors.OpPrereqError("The networked disk templates need"
9630 " a mirror node", errors.ECODE_INVAL)
9632 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9634 self.op.snode = None
9636 self._cds = _GetClusterDomainSecret()
9638 if self.op.mode == constants.INSTANCE_IMPORT:
9639 # On import force_variant must be True, because if we forced it at
9640 # initial install, our only chance when importing it back is that it
9642 self.op.force_variant = True
9644 if self.op.no_install:
9645 self.LogInfo("No-installation mode has no effect during import")
9647 elif self.op.mode == constants.INSTANCE_CREATE:
9648 if self.op.os_type is None:
9649 raise errors.OpPrereqError("No guest OS specified",
9651 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9652 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9653 " installation" % self.op.os_type,
9655 if self.op.disk_template is None:
9656 raise errors.OpPrereqError("No disk template specified",
9659 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9660 # Check handshake to ensure both clusters have the same domain secret
9661 src_handshake = self.op.source_handshake
9662 if not src_handshake:
9663 raise errors.OpPrereqError("Missing source handshake",
9666 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9669 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9672 # Load and check source CA
9673 self.source_x509_ca_pem = self.op.source_x509_ca
9674 if not self.source_x509_ca_pem:
9675 raise errors.OpPrereqError("Missing source X509 CA",
9679 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9681 except OpenSSL.crypto.Error, err:
9682 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9683 (err, ), errors.ECODE_INVAL)
9685 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9686 if errcode is not None:
9687 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9690 self.source_x509_ca = cert
9692 src_instance_name = self.op.source_instance_name
9693 if not src_instance_name:
9694 raise errors.OpPrereqError("Missing source instance name",
9697 self.source_instance_name = \
9698 netutils.GetHostname(name=src_instance_name).name
9701 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9702 self.op.mode, errors.ECODE_INVAL)
9704 def ExpandNames(self):
9705 """ExpandNames for CreateInstance.
9707 Figure out the right locks for instance creation.
9710 self.needed_locks = {}
9712 instance_name = self.op.instance_name
9713 # this is just a preventive check, but someone might still add this
9714 # instance in the meantime, and creation will fail at lock-add time
9715 if instance_name in self.cfg.GetInstanceList():
9716 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9717 instance_name, errors.ECODE_EXISTS)
9719 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9721 if self.op.iallocator:
9722 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9723 # specifying a group on instance creation and then selecting nodes from
9725 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9726 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9728 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9729 nodelist = [self.op.pnode]
9730 if self.op.snode is not None:
9731 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9732 nodelist.append(self.op.snode)
9733 self.needed_locks[locking.LEVEL_NODE] = nodelist
9734 # Lock resources of instance's primary and secondary nodes (copy to
9735 # prevent accidential modification)
9736 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9738 # in case of import lock the source node too
9739 if self.op.mode == constants.INSTANCE_IMPORT:
9740 src_node = self.op.src_node
9741 src_path = self.op.src_path
9743 if src_path is None:
9744 self.op.src_path = src_path = self.op.instance_name
9746 if src_node is None:
9747 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9748 self.op.src_node = None
9749 if os.path.isabs(src_path):
9750 raise errors.OpPrereqError("Importing an instance from a path"
9751 " requires a source node option",
9754 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9755 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9756 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9757 if not os.path.isabs(src_path):
9758 self.op.src_path = src_path = \
9759 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9761 def _RunAllocator(self):
9762 """Run the allocator based on input opcode.
9765 #TODO Export network to iallocator so that it chooses a pnode
9766 # in a nodegroup that has the desired network connected to
9767 req = _CreateInstanceAllocRequest(self.op, self.disks,
9768 self.nics, self.be_full)
9769 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9771 ial.Run(self.op.iallocator)
9774 raise errors.OpPrereqError("Can't compute nodes using"
9775 " iallocator '%s': %s" %
9776 (self.op.iallocator, ial.info),
9778 self.op.pnode = ial.result[0]
9779 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9780 self.op.instance_name, self.op.iallocator,
9781 utils.CommaJoin(ial.result))
9783 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9785 if req.RequiredNodes() == 2:
9786 self.op.snode = ial.result[1]
9788 def BuildHooksEnv(self):
9791 This runs on master, primary and secondary nodes of the instance.
9795 "ADD_MODE": self.op.mode,
9797 if self.op.mode == constants.INSTANCE_IMPORT:
9798 env["SRC_NODE"] = self.op.src_node
9799 env["SRC_PATH"] = self.op.src_path
9800 env["SRC_IMAGES"] = self.src_images
9802 env.update(_BuildInstanceHookEnv(
9803 name=self.op.instance_name,
9804 primary_node=self.op.pnode,
9805 secondary_nodes=self.secondaries,
9806 status=self.op.start,
9807 os_type=self.op.os_type,
9808 minmem=self.be_full[constants.BE_MINMEM],
9809 maxmem=self.be_full[constants.BE_MAXMEM],
9810 vcpus=self.be_full[constants.BE_VCPUS],
9811 nics=_NICListToTuple(self, self.nics),
9812 disk_template=self.op.disk_template,
9813 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9814 for d in self.disks],
9817 hypervisor_name=self.op.hypervisor,
9823 def BuildHooksNodes(self):
9824 """Build hooks nodes.
9827 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9830 def _ReadExportInfo(self):
9831 """Reads the export information from disk.
9833 It will override the opcode source node and path with the actual
9834 information, if these two were not specified before.
9836 @return: the export information
9839 assert self.op.mode == constants.INSTANCE_IMPORT
9841 src_node = self.op.src_node
9842 src_path = self.op.src_path
9844 if src_node is None:
9845 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9846 exp_list = self.rpc.call_export_list(locked_nodes)
9848 for node in exp_list:
9849 if exp_list[node].fail_msg:
9851 if src_path in exp_list[node].payload:
9853 self.op.src_node = src_node = node
9854 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9858 raise errors.OpPrereqError("No export found for relative path %s" %
9859 src_path, errors.ECODE_INVAL)
9861 _CheckNodeOnline(self, src_node)
9862 result = self.rpc.call_export_info(src_node, src_path)
9863 result.Raise("No export or invalid export found in dir %s" % src_path)
9865 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9866 if not export_info.has_section(constants.INISECT_EXP):
9867 raise errors.ProgrammerError("Corrupted export config",
9868 errors.ECODE_ENVIRON)
9870 ei_version = export_info.get(constants.INISECT_EXP, "version")
9871 if (int(ei_version) != constants.EXPORT_VERSION):
9872 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9873 (ei_version, constants.EXPORT_VERSION),
9874 errors.ECODE_ENVIRON)
9877 def _ReadExportParams(self, einfo):
9878 """Use export parameters as defaults.
9880 In case the opcode doesn't specify (as in override) some instance
9881 parameters, then try to use them from the export information, if
9885 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9887 if self.op.disk_template is None:
9888 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9889 self.op.disk_template = einfo.get(constants.INISECT_INS,
9891 if self.op.disk_template not in constants.DISK_TEMPLATES:
9892 raise errors.OpPrereqError("Disk template specified in configuration"
9893 " file is not one of the allowed values:"
9895 " ".join(constants.DISK_TEMPLATES),
9898 raise errors.OpPrereqError("No disk template specified and the export"
9899 " is missing the disk_template information",
9902 if not self.op.disks:
9904 # TODO: import the disk iv_name too
9905 for idx in range(constants.MAX_DISKS):
9906 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9907 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9908 disks.append({constants.IDISK_SIZE: disk_sz})
9909 self.op.disks = disks
9910 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9911 raise errors.OpPrereqError("No disk info specified and the export"
9912 " is missing the disk information",
9915 if not self.op.nics:
9917 for idx in range(constants.MAX_NICS):
9918 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9920 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9921 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9928 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9929 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9931 if (self.op.hypervisor is None and
9932 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9933 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9935 if einfo.has_section(constants.INISECT_HYP):
9936 # use the export parameters but do not override the ones
9937 # specified by the user
9938 for name, value in einfo.items(constants.INISECT_HYP):
9939 if name not in self.op.hvparams:
9940 self.op.hvparams[name] = value
9942 if einfo.has_section(constants.INISECT_BEP):
9943 # use the parameters, without overriding
9944 for name, value in einfo.items(constants.INISECT_BEP):
9945 if name not in self.op.beparams:
9946 self.op.beparams[name] = value
9947 # Compatibility for the old "memory" be param
9948 if name == constants.BE_MEMORY:
9949 if constants.BE_MAXMEM not in self.op.beparams:
9950 self.op.beparams[constants.BE_MAXMEM] = value
9951 if constants.BE_MINMEM not in self.op.beparams:
9952 self.op.beparams[constants.BE_MINMEM] = value
9954 # try to read the parameters old style, from the main section
9955 for name in constants.BES_PARAMETERS:
9956 if (name not in self.op.beparams and
9957 einfo.has_option(constants.INISECT_INS, name)):
9958 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9960 if einfo.has_section(constants.INISECT_OSP):
9961 # use the parameters, without overriding
9962 for name, value in einfo.items(constants.INISECT_OSP):
9963 if name not in self.op.osparams:
9964 self.op.osparams[name] = value
9966 def _RevertToDefaults(self, cluster):
9967 """Revert the instance parameters to the default values.
9971 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9972 for name in self.op.hvparams.keys():
9973 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9974 del self.op.hvparams[name]
9976 be_defs = cluster.SimpleFillBE({})
9977 for name in self.op.beparams.keys():
9978 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9979 del self.op.beparams[name]
9981 nic_defs = cluster.SimpleFillNIC({})
9982 for nic in self.op.nics:
9983 for name in constants.NICS_PARAMETERS:
9984 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9987 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9988 for name in self.op.osparams.keys():
9989 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9990 del self.op.osparams[name]
9992 def _CalculateFileStorageDir(self):
9993 """Calculate final instance file storage dir.
9996 # file storage dir calculation/check
9997 self.instance_file_storage_dir = None
9998 if self.op.disk_template in constants.DTS_FILEBASED:
9999 # build the full file storage dir path
10002 if self.op.disk_template == constants.DT_SHARED_FILE:
10003 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10005 get_fsd_fn = self.cfg.GetFileStorageDir
10007 cfg_storagedir = get_fsd_fn()
10008 if not cfg_storagedir:
10009 raise errors.OpPrereqError("Cluster file storage dir not defined",
10010 errors.ECODE_STATE)
10011 joinargs.append(cfg_storagedir)
10013 if self.op.file_storage_dir is not None:
10014 joinargs.append(self.op.file_storage_dir)
10016 joinargs.append(self.op.instance_name)
10018 # pylint: disable=W0142
10019 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10021 def CheckPrereq(self): # pylint: disable=R0914
10022 """Check prerequisites.
10025 self._CalculateFileStorageDir()
10027 if self.op.mode == constants.INSTANCE_IMPORT:
10028 export_info = self._ReadExportInfo()
10029 self._ReadExportParams(export_info)
10030 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10032 self._old_instance_name = None
10034 if (not self.cfg.GetVGName() and
10035 self.op.disk_template not in constants.DTS_NOT_LVM):
10036 raise errors.OpPrereqError("Cluster does not support lvm-based"
10037 " instances", errors.ECODE_STATE)
10039 if (self.op.hypervisor is None or
10040 self.op.hypervisor == constants.VALUE_AUTO):
10041 self.op.hypervisor = self.cfg.GetHypervisorType()
10043 cluster = self.cfg.GetClusterInfo()
10044 enabled_hvs = cluster.enabled_hypervisors
10045 if self.op.hypervisor not in enabled_hvs:
10046 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10048 (self.op.hypervisor, ",".join(enabled_hvs)),
10049 errors.ECODE_STATE)
10051 # Check tag validity
10052 for tag in self.op.tags:
10053 objects.TaggableObject.ValidateTag(tag)
10055 # check hypervisor parameter syntax (locally)
10056 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10057 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10059 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10060 hv_type.CheckParameterSyntax(filled_hvp)
10061 self.hv_full = filled_hvp
10062 # check that we don't specify global parameters on an instance
10063 _CheckGlobalHvParams(self.op.hvparams)
10065 # fill and remember the beparams dict
10066 self.be_full = _ComputeFullBeParams(self.op, cluster)
10068 # build os parameters
10069 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10071 # now that hvp/bep are in final format, let's reset to defaults,
10073 if self.op.identify_defaults:
10074 self._RevertToDefaults(cluster)
10077 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10080 # disk checks/pre-build
10081 default_vg = self.cfg.GetVGName()
10082 self.disks = _ComputeDisks(self.op, default_vg)
10084 if self.op.mode == constants.INSTANCE_IMPORT:
10086 for idx in range(len(self.disks)):
10087 option = "disk%d_dump" % idx
10088 if export_info.has_option(constants.INISECT_INS, option):
10089 # FIXME: are the old os-es, disk sizes, etc. useful?
10090 export_name = export_info.get(constants.INISECT_INS, option)
10091 image = utils.PathJoin(self.op.src_path, export_name)
10092 disk_images.append(image)
10094 disk_images.append(False)
10096 self.src_images = disk_images
10098 if self.op.instance_name == self._old_instance_name:
10099 for idx, nic in enumerate(self.nics):
10100 if nic.mac == constants.VALUE_AUTO:
10101 nic_mac_ini = "nic%d_mac" % idx
10102 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10104 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10106 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10107 if self.op.ip_check:
10108 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10109 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10110 (self.check_ip, self.op.instance_name),
10111 errors.ECODE_NOTUNIQUE)
10113 #### mac address generation
10114 # By generating here the mac address both the allocator and the hooks get
10115 # the real final mac address rather than the 'auto' or 'generate' value.
10116 # There is a race condition between the generation and the instance object
10117 # creation, which means that we know the mac is valid now, but we're not
10118 # sure it will be when we actually add the instance. If things go bad
10119 # adding the instance will abort because of a duplicate mac, and the
10120 # creation job will fail.
10121 for nic in self.nics:
10122 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10123 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10127 if self.op.iallocator is not None:
10128 self._RunAllocator()
10130 # Release all unneeded node locks
10131 _ReleaseLocks(self, locking.LEVEL_NODE,
10132 keep=filter(None, [self.op.pnode, self.op.snode,
10133 self.op.src_node]))
10134 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10135 keep=filter(None, [self.op.pnode, self.op.snode,
10136 self.op.src_node]))
10138 #### node related checks
10140 # check primary node
10141 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10142 assert self.pnode is not None, \
10143 "Cannot retrieve locked node %s" % self.op.pnode
10145 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10146 pnode.name, errors.ECODE_STATE)
10148 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10149 pnode.name, errors.ECODE_STATE)
10150 if not pnode.vm_capable:
10151 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10152 " '%s'" % pnode.name, errors.ECODE_STATE)
10154 self.secondaries = []
10156 # Fill in any IPs from IP pools. This must happen here, because we need to
10157 # know the nic's primary node, as specified by the iallocator
10158 for idx, nic in enumerate(self.nics):
10160 if net is not None:
10161 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10162 if netparams is None:
10163 raise errors.OpPrereqError("No netparams found for network"
10164 " %s. Propably not connected to"
10165 " node's %s nodegroup" %
10166 (net, self.pnode.name),
10167 errors.ECODE_INVAL)
10168 self.LogInfo("NIC/%d inherits netparams %s" %
10169 (idx, netparams.values()))
10170 nic.nicparams = dict(netparams)
10171 if nic.ip is not None:
10172 filled_params = cluster.SimpleFillNIC(nic.nicparams)
10173 if nic.ip.lower() == constants.NIC_IP_POOL:
10175 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10176 except errors.ReservationError:
10177 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10178 " from the address pool" % idx,
10179 errors.ECODE_STATE)
10180 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10183 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10184 except errors.ReservationError:
10185 raise errors.OpPrereqError("IP address %s already in use"
10186 " or does not belong to network %s" %
10188 errors.ECODE_NOTUNIQUE)
10190 # net is None, ip None or given
10191 if self.op.conflicts_check:
10192 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10195 # mirror node verification
10196 if self.op.disk_template in constants.DTS_INT_MIRROR:
10197 if self.op.snode == pnode.name:
10198 raise errors.OpPrereqError("The secondary node cannot be the"
10199 " primary node", errors.ECODE_INVAL)
10200 _CheckNodeOnline(self, self.op.snode)
10201 _CheckNodeNotDrained(self, self.op.snode)
10202 _CheckNodeVmCapable(self, self.op.snode)
10203 self.secondaries.append(self.op.snode)
10205 snode = self.cfg.GetNodeInfo(self.op.snode)
10206 if pnode.group != snode.group:
10207 self.LogWarning("The primary and secondary nodes are in two"
10208 " different node groups; the disk parameters"
10209 " from the first disk's node group will be"
10212 nodenames = [pnode.name] + self.secondaries
10214 # Verify instance specs
10215 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10217 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10218 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10219 constants.ISPEC_DISK_COUNT: len(self.disks),
10220 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10221 constants.ISPEC_NIC_COUNT: len(self.nics),
10222 constants.ISPEC_SPINDLE_USE: spindle_use,
10225 group_info = self.cfg.GetNodeGroup(pnode.group)
10226 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10227 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10228 if not self.op.ignore_ipolicy and res:
10229 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10230 (pnode.group, group_info.name, utils.CommaJoin(res)))
10231 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10233 if not self.adopt_disks:
10234 if self.op.disk_template == constants.DT_RBD:
10235 # _CheckRADOSFreeSpace() is just a placeholder.
10236 # Any function that checks prerequisites can be placed here.
10237 # Check if there is enough space on the RADOS cluster.
10238 _CheckRADOSFreeSpace()
10240 # Check lv size requirements, if not adopting
10241 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10242 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10244 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10245 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10246 disk[constants.IDISK_ADOPT])
10247 for disk in self.disks])
10248 if len(all_lvs) != len(self.disks):
10249 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10250 errors.ECODE_INVAL)
10251 for lv_name in all_lvs:
10253 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10254 # to ReserveLV uses the same syntax
10255 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10256 except errors.ReservationError:
10257 raise errors.OpPrereqError("LV named %s used by another instance" %
10258 lv_name, errors.ECODE_NOTUNIQUE)
10260 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10261 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10263 node_lvs = self.rpc.call_lv_list([pnode.name],
10264 vg_names.payload.keys())[pnode.name]
10265 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10266 node_lvs = node_lvs.payload
10268 delta = all_lvs.difference(node_lvs.keys())
10270 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10271 utils.CommaJoin(delta),
10272 errors.ECODE_INVAL)
10273 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10275 raise errors.OpPrereqError("Online logical volumes found, cannot"
10276 " adopt: %s" % utils.CommaJoin(online_lvs),
10277 errors.ECODE_STATE)
10278 # update the size of disk based on what is found
10279 for dsk in self.disks:
10280 dsk[constants.IDISK_SIZE] = \
10281 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10282 dsk[constants.IDISK_ADOPT])][0]))
10284 elif self.op.disk_template == constants.DT_BLOCK:
10285 # Normalize and de-duplicate device paths
10286 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10287 for disk in self.disks])
10288 if len(all_disks) != len(self.disks):
10289 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10290 errors.ECODE_INVAL)
10291 baddisks = [d for d in all_disks
10292 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10294 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10295 " cannot be adopted" %
10296 (", ".join(baddisks),
10297 constants.ADOPTABLE_BLOCKDEV_ROOT),
10298 errors.ECODE_INVAL)
10300 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10301 list(all_disks))[pnode.name]
10302 node_disks.Raise("Cannot get block device information from node %s" %
10304 node_disks = node_disks.payload
10305 delta = all_disks.difference(node_disks.keys())
10307 raise errors.OpPrereqError("Missing block device(s): %s" %
10308 utils.CommaJoin(delta),
10309 errors.ECODE_INVAL)
10310 for dsk in self.disks:
10311 dsk[constants.IDISK_SIZE] = \
10312 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10314 # Verify instance specs
10315 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10317 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10318 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10319 constants.ISPEC_DISK_COUNT: len(self.disks),
10320 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10321 for disk in self.disks],
10322 constants.ISPEC_NIC_COUNT: len(self.nics),
10323 constants.ISPEC_SPINDLE_USE: spindle_use,
10326 group_info = self.cfg.GetNodeGroup(pnode.group)
10327 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10328 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10329 if not self.op.ignore_ipolicy and res:
10330 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10331 " policy: %s") % (pnode.group,
10332 utils.CommaJoin(res)),
10333 errors.ECODE_INVAL)
10335 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10337 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10338 # check OS parameters (remotely)
10339 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10341 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10343 # memory check on primary node
10344 #TODO(dynmem): use MINMEM for checking
10346 _CheckNodeFreeMemory(self, self.pnode.name,
10347 "creating instance %s" % self.op.instance_name,
10348 self.be_full[constants.BE_MAXMEM],
10349 self.op.hypervisor)
10351 self.dry_run_result = list(nodenames)
10353 def Exec(self, feedback_fn):
10354 """Create and add the instance to the cluster.
10357 instance = self.op.instance_name
10358 pnode_name = self.pnode.name
10360 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10361 self.owned_locks(locking.LEVEL_NODE)), \
10362 "Node locks differ from node resource locks"
10364 ht_kind = self.op.hypervisor
10365 if ht_kind in constants.HTS_REQ_PORT:
10366 network_port = self.cfg.AllocatePort()
10368 network_port = None
10370 # This is ugly but we got a chicken-egg problem here
10371 # We can only take the group disk parameters, as the instance
10372 # has no disks yet (we are generating them right here).
10373 node = self.cfg.GetNodeInfo(pnode_name)
10374 nodegroup = self.cfg.GetNodeGroup(node.group)
10375 disks = _GenerateDiskTemplate(self,
10376 self.op.disk_template,
10377 instance, pnode_name,
10380 self.instance_file_storage_dir,
10381 self.op.file_driver,
10384 self.cfg.GetGroupDiskParams(nodegroup))
10386 iobj = objects.Instance(name=instance, os=self.op.os_type,
10387 primary_node=pnode_name,
10388 nics=self.nics, disks=disks,
10389 disk_template=self.op.disk_template,
10390 admin_state=constants.ADMINST_DOWN,
10391 network_port=network_port,
10392 beparams=self.op.beparams,
10393 hvparams=self.op.hvparams,
10394 hypervisor=self.op.hypervisor,
10395 osparams=self.op.osparams,
10399 for tag in self.op.tags:
10402 if self.adopt_disks:
10403 if self.op.disk_template == constants.DT_PLAIN:
10404 # rename LVs to the newly-generated names; we need to construct
10405 # 'fake' LV disks with the old data, plus the new unique_id
10406 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10408 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10409 rename_to.append(t_dsk.logical_id)
10410 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10411 self.cfg.SetDiskID(t_dsk, pnode_name)
10412 result = self.rpc.call_blockdev_rename(pnode_name,
10413 zip(tmp_disks, rename_to))
10414 result.Raise("Failed to rename adoped LVs")
10416 feedback_fn("* creating instance disks...")
10418 _CreateDisks(self, iobj)
10419 except errors.OpExecError:
10420 self.LogWarning("Device creation failed, reverting...")
10422 _RemoveDisks(self, iobj)
10424 self.cfg.ReleaseDRBDMinors(instance)
10427 feedback_fn("adding instance %s to cluster config" % instance)
10429 self.cfg.AddInstance(iobj, self.proc.GetECId())
10431 # Declare that we don't want to remove the instance lock anymore, as we've
10432 # added the instance to the config
10433 del self.remove_locks[locking.LEVEL_INSTANCE]
10435 if self.op.mode == constants.INSTANCE_IMPORT:
10436 # Release unused nodes
10437 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10439 # Release all nodes
10440 _ReleaseLocks(self, locking.LEVEL_NODE)
10443 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10444 feedback_fn("* wiping instance disks...")
10446 _WipeDisks(self, iobj)
10447 except errors.OpExecError, err:
10448 logging.exception("Wiping disks failed")
10449 self.LogWarning("Wiping instance disks failed (%s)", err)
10453 # Something is already wrong with the disks, don't do anything else
10455 elif self.op.wait_for_sync:
10456 disk_abort = not _WaitForSync(self, iobj)
10457 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10458 # make sure the disks are not degraded (still sync-ing is ok)
10459 feedback_fn("* checking mirrors status")
10460 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10465 _RemoveDisks(self, iobj)
10466 self.cfg.RemoveInstance(iobj.name)
10467 # Make sure the instance lock gets removed
10468 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10469 raise errors.OpExecError("There are some degraded disks for"
10472 # Release all node resource locks
10473 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10475 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10476 # we need to set the disks ID to the primary node, since the
10477 # preceding code might or might have not done it, depending on
10478 # disk template and other options
10479 for disk in iobj.disks:
10480 self.cfg.SetDiskID(disk, pnode_name)
10481 if self.op.mode == constants.INSTANCE_CREATE:
10482 if not self.op.no_install:
10483 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10484 not self.op.wait_for_sync)
10486 feedback_fn("* pausing disk sync to install instance OS")
10487 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10490 for idx, success in enumerate(result.payload):
10492 logging.warn("pause-sync of instance %s for disk %d failed",
10495 feedback_fn("* running the instance OS create scripts...")
10496 # FIXME: pass debug option from opcode to backend
10498 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10499 self.op.debug_level)
10501 feedback_fn("* resuming disk sync")
10502 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10505 for idx, success in enumerate(result.payload):
10507 logging.warn("resume-sync of instance %s for disk %d failed",
10510 os_add_result.Raise("Could not add os for instance %s"
10511 " on node %s" % (instance, pnode_name))
10514 if self.op.mode == constants.INSTANCE_IMPORT:
10515 feedback_fn("* running the instance OS import scripts...")
10519 for idx, image in enumerate(self.src_images):
10523 # FIXME: pass debug option from opcode to backend
10524 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10525 constants.IEIO_FILE, (image, ),
10526 constants.IEIO_SCRIPT,
10527 (iobj.disks[idx], idx),
10529 transfers.append(dt)
10532 masterd.instance.TransferInstanceData(self, feedback_fn,
10533 self.op.src_node, pnode_name,
10534 self.pnode.secondary_ip,
10536 if not compat.all(import_result):
10537 self.LogWarning("Some disks for instance %s on node %s were not"
10538 " imported successfully" % (instance, pnode_name))
10540 rename_from = self._old_instance_name
10542 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10543 feedback_fn("* preparing remote import...")
10544 # The source cluster will stop the instance before attempting to make
10545 # a connection. In some cases stopping an instance can take a long
10546 # time, hence the shutdown timeout is added to the connection
10548 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10549 self.op.source_shutdown_timeout)
10550 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10552 assert iobj.primary_node == self.pnode.name
10554 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10555 self.source_x509_ca,
10556 self._cds, timeouts)
10557 if not compat.all(disk_results):
10558 # TODO: Should the instance still be started, even if some disks
10559 # failed to import (valid for local imports, too)?
10560 self.LogWarning("Some disks for instance %s on node %s were not"
10561 " imported successfully" % (instance, pnode_name))
10563 rename_from = self.source_instance_name
10566 # also checked in the prereq part
10567 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10570 # Run rename script on newly imported instance
10571 assert iobj.name == instance
10572 feedback_fn("Running rename script for %s" % instance)
10573 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10575 self.op.debug_level)
10576 if result.fail_msg:
10577 self.LogWarning("Failed to run rename script for %s on node"
10578 " %s: %s" % (instance, pnode_name, result.fail_msg))
10580 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10583 iobj.admin_state = constants.ADMINST_UP
10584 self.cfg.Update(iobj, feedback_fn)
10585 logging.info("Starting instance %s on node %s", instance, pnode_name)
10586 feedback_fn("* starting instance...")
10587 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10589 result.Raise("Could not start instance")
10591 return list(iobj.all_nodes)
10594 class LUInstanceMultiAlloc(NoHooksLU):
10595 """Allocates multiple instances at the same time.
10600 def CheckArguments(self):
10601 """Check arguments.
10605 for inst in self.op.instances:
10606 if inst.iallocator is not None:
10607 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10608 " instance objects", errors.ECODE_INVAL)
10609 nodes.append(bool(inst.pnode))
10610 if inst.disk_template in constants.DTS_INT_MIRROR:
10611 nodes.append(bool(inst.snode))
10613 has_nodes = compat.any(nodes)
10614 if compat.all(nodes) ^ has_nodes:
10615 raise errors.OpPrereqError("There are instance objects providing"
10616 " pnode/snode while others do not",
10617 errors.ECODE_INVAL)
10619 if self.op.iallocator is None:
10620 default_iallocator = self.cfg.GetDefaultIAllocator()
10621 if default_iallocator and has_nodes:
10622 self.op.iallocator = default_iallocator
10624 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10625 " given and no cluster-wide default"
10626 " iallocator found; please specify either"
10627 " an iallocator or nodes on the instances"
10628 " or set a cluster-wide default iallocator",
10629 errors.ECODE_INVAL)
10631 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10633 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10634 utils.CommaJoin(dups), errors.ECODE_INVAL)
10636 def ExpandNames(self):
10637 """Calculate the locks.
10640 self.share_locks = _ShareAll()
10641 self.needed_locks = {}
10643 if self.op.iallocator:
10644 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10645 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10648 for inst in self.op.instances:
10649 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10650 nodeslist.append(inst.pnode)
10651 if inst.snode is not None:
10652 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10653 nodeslist.append(inst.snode)
10655 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10656 # Lock resources of instance's primary and secondary nodes (copy to
10657 # prevent accidential modification)
10658 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10660 def CheckPrereq(self):
10661 """Check prerequisite.
10664 cluster = self.cfg.GetClusterInfo()
10665 default_vg = self.cfg.GetVGName()
10666 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10667 _ComputeNics(op, cluster, None,
10668 self.cfg, self.proc),
10669 _ComputeFullBeParams(op, cluster))
10670 for op in self.op.instances]
10671 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10672 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10674 ial.Run(self.op.iallocator)
10676 if not ial.success:
10677 raise errors.OpPrereqError("Can't compute nodes using"
10678 " iallocator '%s': %s" %
10679 (self.op.iallocator, ial.info),
10680 errors.ECODE_NORES)
10682 self.ia_result = ial.result
10684 if self.op.dry_run:
10685 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10686 constants.JOB_IDS_KEY: [],
10689 def _ConstructPartialResult(self):
10690 """Contructs the partial result.
10693 (allocatable, failed) = self.ia_result
10695 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10696 map(compat.fst, allocatable),
10697 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10700 def Exec(self, feedback_fn):
10701 """Executes the opcode.
10704 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10705 (allocatable, failed) = self.ia_result
10708 for (name, nodes) in allocatable:
10709 op = op2inst.pop(name)
10712 (op.pnode, op.snode) = nodes
10714 (op.pnode,) = nodes
10718 missing = set(op2inst.keys()) - set(failed)
10719 assert not missing, \
10720 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10722 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10725 def _CheckRADOSFreeSpace():
10726 """Compute disk size requirements inside the RADOS cluster.
10729 # For the RADOS cluster we assume there is always enough space.
10733 class LUInstanceConsole(NoHooksLU):
10734 """Connect to an instance's console.
10736 This is somewhat special in that it returns the command line that
10737 you need to run on the master node in order to connect to the
10743 def ExpandNames(self):
10744 self.share_locks = _ShareAll()
10745 self._ExpandAndLockInstance()
10747 def CheckPrereq(self):
10748 """Check prerequisites.
10750 This checks that the instance is in the cluster.
10753 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10754 assert self.instance is not None, \
10755 "Cannot retrieve locked instance %s" % self.op.instance_name
10756 _CheckNodeOnline(self, self.instance.primary_node)
10758 def Exec(self, feedback_fn):
10759 """Connect to the console of an instance
10762 instance = self.instance
10763 node = instance.primary_node
10765 node_insts = self.rpc.call_instance_list([node],
10766 [instance.hypervisor])[node]
10767 node_insts.Raise("Can't get node information from %s" % node)
10769 if instance.name not in node_insts.payload:
10770 if instance.admin_state == constants.ADMINST_UP:
10771 state = constants.INSTST_ERRORDOWN
10772 elif instance.admin_state == constants.ADMINST_DOWN:
10773 state = constants.INSTST_ADMINDOWN
10775 state = constants.INSTST_ADMINOFFLINE
10776 raise errors.OpExecError("Instance %s is not running (state %s)" %
10777 (instance.name, state))
10779 logging.debug("Connecting to console of %s on %s", instance.name, node)
10781 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10784 def _GetInstanceConsole(cluster, instance):
10785 """Returns console information for an instance.
10787 @type cluster: L{objects.Cluster}
10788 @type instance: L{objects.Instance}
10792 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10793 # beparams and hvparams are passed separately, to avoid editing the
10794 # instance and then saving the defaults in the instance itself.
10795 hvparams = cluster.FillHV(instance)
10796 beparams = cluster.FillBE(instance)
10797 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10799 assert console.instance == instance.name
10800 assert console.Validate()
10802 return console.ToDict()
10805 class LUInstanceReplaceDisks(LogicalUnit):
10806 """Replace the disks of an instance.
10809 HPATH = "mirrors-replace"
10810 HTYPE = constants.HTYPE_INSTANCE
10813 def CheckArguments(self):
10814 """Check arguments.
10817 remote_node = self.op.remote_node
10818 ialloc = self.op.iallocator
10819 if self.op.mode == constants.REPLACE_DISK_CHG:
10820 if remote_node is None and ialloc is None:
10821 raise errors.OpPrereqError("When changing the secondary either an"
10822 " iallocator script must be used or the"
10823 " new node given", errors.ECODE_INVAL)
10825 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10827 elif remote_node is not None or ialloc is not None:
10828 # Not replacing the secondary
10829 raise errors.OpPrereqError("The iallocator and new node options can"
10830 " only be used when changing the"
10831 " secondary node", errors.ECODE_INVAL)
10833 def ExpandNames(self):
10834 self._ExpandAndLockInstance()
10836 assert locking.LEVEL_NODE not in self.needed_locks
10837 assert locking.LEVEL_NODE_RES not in self.needed_locks
10838 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10840 assert self.op.iallocator is None or self.op.remote_node is None, \
10841 "Conflicting options"
10843 if self.op.remote_node is not None:
10844 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10846 # Warning: do not remove the locking of the new secondary here
10847 # unless DRBD8.AddChildren is changed to work in parallel;
10848 # currently it doesn't since parallel invocations of
10849 # FindUnusedMinor will conflict
10850 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10851 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10853 self.needed_locks[locking.LEVEL_NODE] = []
10854 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10856 if self.op.iallocator is not None:
10857 # iallocator will select a new node in the same group
10858 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10860 self.needed_locks[locking.LEVEL_NODE_RES] = []
10862 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10863 self.op.iallocator, self.op.remote_node,
10864 self.op.disks, False, self.op.early_release,
10865 self.op.ignore_ipolicy)
10867 self.tasklets = [self.replacer]
10869 def DeclareLocks(self, level):
10870 if level == locking.LEVEL_NODEGROUP:
10871 assert self.op.remote_node is None
10872 assert self.op.iallocator is not None
10873 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10875 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10876 # Lock all groups used by instance optimistically; this requires going
10877 # via the node before it's locked, requiring verification later on
10878 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10879 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10881 elif level == locking.LEVEL_NODE:
10882 if self.op.iallocator is not None:
10883 assert self.op.remote_node is None
10884 assert not self.needed_locks[locking.LEVEL_NODE]
10886 # Lock member nodes of all locked groups
10887 self.needed_locks[locking.LEVEL_NODE] = \
10889 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10890 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10892 self._LockInstancesNodes()
10893 elif level == locking.LEVEL_NODE_RES:
10895 self.needed_locks[locking.LEVEL_NODE_RES] = \
10896 self.needed_locks[locking.LEVEL_NODE]
10898 def BuildHooksEnv(self):
10899 """Build hooks env.
10901 This runs on the master, the primary and all the secondaries.
10904 instance = self.replacer.instance
10906 "MODE": self.op.mode,
10907 "NEW_SECONDARY": self.op.remote_node,
10908 "OLD_SECONDARY": instance.secondary_nodes[0],
10910 env.update(_BuildInstanceHookEnvByObject(self, instance))
10913 def BuildHooksNodes(self):
10914 """Build hooks nodes.
10917 instance = self.replacer.instance
10919 self.cfg.GetMasterNode(),
10920 instance.primary_node,
10922 if self.op.remote_node is not None:
10923 nl.append(self.op.remote_node)
10926 def CheckPrereq(self):
10927 """Check prerequisites.
10930 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10931 self.op.iallocator is None)
10933 # Verify if node group locks are still correct
10934 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10936 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10938 return LogicalUnit.CheckPrereq(self)
10941 class TLReplaceDisks(Tasklet):
10942 """Replaces disks for an instance.
10944 Note: Locking is not within the scope of this class.
10947 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10948 disks, delay_iallocator, early_release, ignore_ipolicy):
10949 """Initializes this class.
10952 Tasklet.__init__(self, lu)
10955 self.instance_name = instance_name
10957 self.iallocator_name = iallocator_name
10958 self.remote_node = remote_node
10960 self.delay_iallocator = delay_iallocator
10961 self.early_release = early_release
10962 self.ignore_ipolicy = ignore_ipolicy
10965 self.instance = None
10966 self.new_node = None
10967 self.target_node = None
10968 self.other_node = None
10969 self.remote_node_info = None
10970 self.node_secondary_ip = None
10973 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10974 """Compute a new secondary node using an IAllocator.
10977 req = iallocator.IAReqRelocate(name=instance_name,
10978 relocate_from=list(relocate_from))
10979 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10981 ial.Run(iallocator_name)
10983 if not ial.success:
10984 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10985 " %s" % (iallocator_name, ial.info),
10986 errors.ECODE_NORES)
10988 remote_node_name = ial.result[0]
10990 lu.LogInfo("Selected new secondary for instance '%s': %s",
10991 instance_name, remote_node_name)
10993 return remote_node_name
10995 def _FindFaultyDisks(self, node_name):
10996 """Wrapper for L{_FindFaultyInstanceDisks}.
10999 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11002 def _CheckDisksActivated(self, instance):
11003 """Checks if the instance disks are activated.
11005 @param instance: The instance to check disks
11006 @return: True if they are activated, False otherwise
11009 nodes = instance.all_nodes
11011 for idx, dev in enumerate(instance.disks):
11013 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11014 self.cfg.SetDiskID(dev, node)
11016 result = _BlockdevFind(self, node, dev, instance)
11020 elif result.fail_msg or not result.payload:
11025 def CheckPrereq(self):
11026 """Check prerequisites.
11028 This checks that the instance is in the cluster.
11031 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11032 assert instance is not None, \
11033 "Cannot retrieve locked instance %s" % self.instance_name
11035 if instance.disk_template != constants.DT_DRBD8:
11036 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11037 " instances", errors.ECODE_INVAL)
11039 if len(instance.secondary_nodes) != 1:
11040 raise errors.OpPrereqError("The instance has a strange layout,"
11041 " expected one secondary but found %d" %
11042 len(instance.secondary_nodes),
11043 errors.ECODE_FAULT)
11045 if not self.delay_iallocator:
11046 self._CheckPrereq2()
11048 def _CheckPrereq2(self):
11049 """Check prerequisites, second part.
11051 This function should always be part of CheckPrereq. It was separated and is
11052 now called from Exec because during node evacuation iallocator was only
11053 called with an unmodified cluster model, not taking planned changes into
11057 instance = self.instance
11058 secondary_node = instance.secondary_nodes[0]
11060 if self.iallocator_name is None:
11061 remote_node = self.remote_node
11063 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11064 instance.name, instance.secondary_nodes)
11066 if remote_node is None:
11067 self.remote_node_info = None
11069 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11070 "Remote node '%s' is not locked" % remote_node
11072 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11073 assert self.remote_node_info is not None, \
11074 "Cannot retrieve locked node %s" % remote_node
11076 if remote_node == self.instance.primary_node:
11077 raise errors.OpPrereqError("The specified node is the primary node of"
11078 " the instance", errors.ECODE_INVAL)
11080 if remote_node == secondary_node:
11081 raise errors.OpPrereqError("The specified node is already the"
11082 " secondary node of the instance",
11083 errors.ECODE_INVAL)
11085 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11086 constants.REPLACE_DISK_CHG):
11087 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11088 errors.ECODE_INVAL)
11090 if self.mode == constants.REPLACE_DISK_AUTO:
11091 if not self._CheckDisksActivated(instance):
11092 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11093 " first" % self.instance_name,
11094 errors.ECODE_STATE)
11095 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11096 faulty_secondary = self._FindFaultyDisks(secondary_node)
11098 if faulty_primary and faulty_secondary:
11099 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11100 " one node and can not be repaired"
11101 " automatically" % self.instance_name,
11102 errors.ECODE_STATE)
11105 self.disks = faulty_primary
11106 self.target_node = instance.primary_node
11107 self.other_node = secondary_node
11108 check_nodes = [self.target_node, self.other_node]
11109 elif faulty_secondary:
11110 self.disks = faulty_secondary
11111 self.target_node = secondary_node
11112 self.other_node = instance.primary_node
11113 check_nodes = [self.target_node, self.other_node]
11119 # Non-automatic modes
11120 if self.mode == constants.REPLACE_DISK_PRI:
11121 self.target_node = instance.primary_node
11122 self.other_node = secondary_node
11123 check_nodes = [self.target_node, self.other_node]
11125 elif self.mode == constants.REPLACE_DISK_SEC:
11126 self.target_node = secondary_node
11127 self.other_node = instance.primary_node
11128 check_nodes = [self.target_node, self.other_node]
11130 elif self.mode == constants.REPLACE_DISK_CHG:
11131 self.new_node = remote_node
11132 self.other_node = instance.primary_node
11133 self.target_node = secondary_node
11134 check_nodes = [self.new_node, self.other_node]
11136 _CheckNodeNotDrained(self.lu, remote_node)
11137 _CheckNodeVmCapable(self.lu, remote_node)
11139 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11140 assert old_node_info is not None
11141 if old_node_info.offline and not self.early_release:
11142 # doesn't make sense to delay the release
11143 self.early_release = True
11144 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11145 " early-release mode", secondary_node)
11148 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11151 # If not specified all disks should be replaced
11153 self.disks = range(len(self.instance.disks))
11155 # TODO: This is ugly, but right now we can't distinguish between internal
11156 # submitted opcode and external one. We should fix that.
11157 if self.remote_node_info:
11158 # We change the node, lets verify it still meets instance policy
11159 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11160 cluster = self.cfg.GetClusterInfo()
11161 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11163 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11164 ignore=self.ignore_ipolicy)
11166 for node in check_nodes:
11167 _CheckNodeOnline(self.lu, node)
11169 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11172 if node_name is not None)
11174 # Release unneeded node and node resource locks
11175 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11176 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11178 # Release any owned node group
11179 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11180 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11182 # Check whether disks are valid
11183 for disk_idx in self.disks:
11184 instance.FindDisk(disk_idx)
11186 # Get secondary node IP addresses
11187 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11188 in self.cfg.GetMultiNodeInfo(touched_nodes))
11190 def Exec(self, feedback_fn):
11191 """Execute disk replacement.
11193 This dispatches the disk replacement to the appropriate handler.
11196 if self.delay_iallocator:
11197 self._CheckPrereq2()
11200 # Verify owned locks before starting operation
11201 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11202 assert set(owned_nodes) == set(self.node_secondary_ip), \
11203 ("Incorrect node locks, owning %s, expected %s" %
11204 (owned_nodes, self.node_secondary_ip.keys()))
11205 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11206 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11208 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11209 assert list(owned_instances) == [self.instance_name], \
11210 "Instance '%s' not locked" % self.instance_name
11212 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11213 "Should not own any node group lock at this point"
11216 feedback_fn("No disks need replacement for instance '%s'" %
11217 self.instance.name)
11220 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11221 (utils.CommaJoin(self.disks), self.instance.name))
11222 feedback_fn("Current primary node: %s", self.instance.primary_node)
11223 feedback_fn("Current seconary node: %s",
11224 utils.CommaJoin(self.instance.secondary_nodes))
11226 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11228 # Activate the instance disks if we're replacing them on a down instance
11230 _StartInstanceDisks(self.lu, self.instance, True)
11233 # Should we replace the secondary node?
11234 if self.new_node is not None:
11235 fn = self._ExecDrbd8Secondary
11237 fn = self._ExecDrbd8DiskOnly
11239 result = fn(feedback_fn)
11241 # Deactivate the instance disks if we're replacing them on a
11244 _SafeShutdownInstanceDisks(self.lu, self.instance)
11246 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11249 # Verify owned locks
11250 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11251 nodes = frozenset(self.node_secondary_ip)
11252 assert ((self.early_release and not owned_nodes) or
11253 (not self.early_release and not (set(owned_nodes) - nodes))), \
11254 ("Not owning the correct locks, early_release=%s, owned=%r,"
11255 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11259 def _CheckVolumeGroup(self, nodes):
11260 self.lu.LogInfo("Checking volume groups")
11262 vgname = self.cfg.GetVGName()
11264 # Make sure volume group exists on all involved nodes
11265 results = self.rpc.call_vg_list(nodes)
11267 raise errors.OpExecError("Can't list volume groups on the nodes")
11270 res = results[node]
11271 res.Raise("Error checking node %s" % node)
11272 if vgname not in res.payload:
11273 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11276 def _CheckDisksExistence(self, nodes):
11277 # Check disk existence
11278 for idx, dev in enumerate(self.instance.disks):
11279 if idx not in self.disks:
11283 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11284 self.cfg.SetDiskID(dev, node)
11286 result = _BlockdevFind(self, node, dev, self.instance)
11288 msg = result.fail_msg
11289 if msg or not result.payload:
11291 msg = "disk not found"
11292 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11295 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11296 for idx, dev in enumerate(self.instance.disks):
11297 if idx not in self.disks:
11300 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11303 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11304 on_primary, ldisk=ldisk):
11305 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11306 " replace disks for instance %s" %
11307 (node_name, self.instance.name))
11309 def _CreateNewStorage(self, node_name):
11310 """Create new storage on the primary or secondary node.
11312 This is only used for same-node replaces, not for changing the
11313 secondary node, hence we don't want to modify the existing disk.
11318 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11319 for idx, dev in enumerate(disks):
11320 if idx not in self.disks:
11323 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11325 self.cfg.SetDiskID(dev, node_name)
11327 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11328 names = _GenerateUniqueNames(self.lu, lv_names)
11330 (data_disk, meta_disk) = dev.children
11331 vg_data = data_disk.logical_id[0]
11332 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11333 logical_id=(vg_data, names[0]),
11334 params=data_disk.params)
11335 vg_meta = meta_disk.logical_id[0]
11336 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11337 size=constants.DRBD_META_SIZE,
11338 logical_id=(vg_meta, names[1]),
11339 params=meta_disk.params)
11341 new_lvs = [lv_data, lv_meta]
11342 old_lvs = [child.Copy() for child in dev.children]
11343 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11345 # we pass force_create=True to force the LVM creation
11346 for new_lv in new_lvs:
11347 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11348 _GetInstanceInfoText(self.instance), False)
11352 def _CheckDevices(self, node_name, iv_names):
11353 for name, (dev, _, _) in iv_names.iteritems():
11354 self.cfg.SetDiskID(dev, node_name)
11356 result = _BlockdevFind(self, node_name, dev, self.instance)
11358 msg = result.fail_msg
11359 if msg or not result.payload:
11361 msg = "disk not found"
11362 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11365 if result.payload.is_degraded:
11366 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11368 def _RemoveOldStorage(self, node_name, iv_names):
11369 for name, (_, old_lvs, _) in iv_names.iteritems():
11370 self.lu.LogInfo("Remove logical volumes for %s" % name)
11373 self.cfg.SetDiskID(lv, node_name)
11375 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11377 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11378 hint="remove unused LVs manually")
11380 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11381 """Replace a disk on the primary or secondary for DRBD 8.
11383 The algorithm for replace is quite complicated:
11385 1. for each disk to be replaced:
11387 1. create new LVs on the target node with unique names
11388 1. detach old LVs from the drbd device
11389 1. rename old LVs to name_replaced.<time_t>
11390 1. rename new LVs to old LVs
11391 1. attach the new LVs (with the old names now) to the drbd device
11393 1. wait for sync across all devices
11395 1. for each modified disk:
11397 1. remove old LVs (which have the name name_replaces.<time_t>)
11399 Failures are not very well handled.
11404 # Step: check device activation
11405 self.lu.LogStep(1, steps_total, "Check device existence")
11406 self._CheckDisksExistence([self.other_node, self.target_node])
11407 self._CheckVolumeGroup([self.target_node, self.other_node])
11409 # Step: check other node consistency
11410 self.lu.LogStep(2, steps_total, "Check peer consistency")
11411 self._CheckDisksConsistency(self.other_node,
11412 self.other_node == self.instance.primary_node,
11415 # Step: create new storage
11416 self.lu.LogStep(3, steps_total, "Allocate new storage")
11417 iv_names = self._CreateNewStorage(self.target_node)
11419 # Step: for each lv, detach+rename*2+attach
11420 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11421 for dev, old_lvs, new_lvs in iv_names.itervalues():
11422 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11424 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11426 result.Raise("Can't detach drbd from local storage on node"
11427 " %s for device %s" % (self.target_node, dev.iv_name))
11429 #cfg.Update(instance)
11431 # ok, we created the new LVs, so now we know we have the needed
11432 # storage; as such, we proceed on the target node to rename
11433 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11434 # using the assumption that logical_id == physical_id (which in
11435 # turn is the unique_id on that node)
11437 # FIXME(iustin): use a better name for the replaced LVs
11438 temp_suffix = int(time.time())
11439 ren_fn = lambda d, suff: (d.physical_id[0],
11440 d.physical_id[1] + "_replaced-%s" % suff)
11442 # Build the rename list based on what LVs exist on the node
11443 rename_old_to_new = []
11444 for to_ren in old_lvs:
11445 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11446 if not result.fail_msg and result.payload:
11448 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11450 self.lu.LogInfo("Renaming the old LVs on the target node")
11451 result = self.rpc.call_blockdev_rename(self.target_node,
11453 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11455 # Now we rename the new LVs to the old LVs
11456 self.lu.LogInfo("Renaming the new LVs on the target node")
11457 rename_new_to_old = [(new, old.physical_id)
11458 for old, new in zip(old_lvs, new_lvs)]
11459 result = self.rpc.call_blockdev_rename(self.target_node,
11461 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11463 # Intermediate steps of in memory modifications
11464 for old, new in zip(old_lvs, new_lvs):
11465 new.logical_id = old.logical_id
11466 self.cfg.SetDiskID(new, self.target_node)
11468 # We need to modify old_lvs so that removal later removes the
11469 # right LVs, not the newly added ones; note that old_lvs is a
11471 for disk in old_lvs:
11472 disk.logical_id = ren_fn(disk, temp_suffix)
11473 self.cfg.SetDiskID(disk, self.target_node)
11475 # Now that the new lvs have the old name, we can add them to the device
11476 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11477 result = self.rpc.call_blockdev_addchildren(self.target_node,
11478 (dev, self.instance), new_lvs)
11479 msg = result.fail_msg
11481 for new_lv in new_lvs:
11482 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11485 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11486 hint=("cleanup manually the unused logical"
11488 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11490 cstep = itertools.count(5)
11492 if self.early_release:
11493 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11494 self._RemoveOldStorage(self.target_node, iv_names)
11495 # TODO: Check if releasing locks early still makes sense
11496 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11498 # Release all resource locks except those used by the instance
11499 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11500 keep=self.node_secondary_ip.keys())
11502 # Release all node locks while waiting for sync
11503 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11505 # TODO: Can the instance lock be downgraded here? Take the optional disk
11506 # shutdown in the caller into consideration.
11509 # This can fail as the old devices are degraded and _WaitForSync
11510 # does a combined result over all disks, so we don't check its return value
11511 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11512 _WaitForSync(self.lu, self.instance)
11514 # Check all devices manually
11515 self._CheckDevices(self.instance.primary_node, iv_names)
11517 # Step: remove old storage
11518 if not self.early_release:
11519 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11520 self._RemoveOldStorage(self.target_node, iv_names)
11522 def _ExecDrbd8Secondary(self, feedback_fn):
11523 """Replace the secondary node for DRBD 8.
11525 The algorithm for replace is quite complicated:
11526 - for all disks of the instance:
11527 - create new LVs on the new node with same names
11528 - shutdown the drbd device on the old secondary
11529 - disconnect the drbd network on the primary
11530 - create the drbd device on the new secondary
11531 - network attach the drbd on the primary, using an artifice:
11532 the drbd code for Attach() will connect to the network if it
11533 finds a device which is connected to the good local disks but
11534 not network enabled
11535 - wait for sync across all devices
11536 - remove all disks from the old secondary
11538 Failures are not very well handled.
11543 pnode = self.instance.primary_node
11545 # Step: check device activation
11546 self.lu.LogStep(1, steps_total, "Check device existence")
11547 self._CheckDisksExistence([self.instance.primary_node])
11548 self._CheckVolumeGroup([self.instance.primary_node])
11550 # Step: check other node consistency
11551 self.lu.LogStep(2, steps_total, "Check peer consistency")
11552 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11554 # Step: create new storage
11555 self.lu.LogStep(3, steps_total, "Allocate new storage")
11556 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11557 for idx, dev in enumerate(disks):
11558 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11559 (self.new_node, idx))
11560 # we pass force_create=True to force LVM creation
11561 for new_lv in dev.children:
11562 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11563 True, _GetInstanceInfoText(self.instance), False)
11565 # Step 4: dbrd minors and drbd setups changes
11566 # after this, we must manually remove the drbd minors on both the
11567 # error and the success paths
11568 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11569 minors = self.cfg.AllocateDRBDMinor([self.new_node
11570 for dev in self.instance.disks],
11571 self.instance.name)
11572 logging.debug("Allocated minors %r", minors)
11575 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11576 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11577 (self.new_node, idx))
11578 # create new devices on new_node; note that we create two IDs:
11579 # one without port, so the drbd will be activated without
11580 # networking information on the new node at this stage, and one
11581 # with network, for the latter activation in step 4
11582 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11583 if self.instance.primary_node == o_node1:
11586 assert self.instance.primary_node == o_node2, "Three-node instance?"
11589 new_alone_id = (self.instance.primary_node, self.new_node, None,
11590 p_minor, new_minor, o_secret)
11591 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11592 p_minor, new_minor, o_secret)
11594 iv_names[idx] = (dev, dev.children, new_net_id)
11595 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11597 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11598 logical_id=new_alone_id,
11599 children=dev.children,
11602 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11605 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11607 _GetInstanceInfoText(self.instance), False)
11608 except errors.GenericError:
11609 self.cfg.ReleaseDRBDMinors(self.instance.name)
11612 # We have new devices, shutdown the drbd on the old secondary
11613 for idx, dev in enumerate(self.instance.disks):
11614 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11615 self.cfg.SetDiskID(dev, self.target_node)
11616 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11617 (dev, self.instance)).fail_msg
11619 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11620 "node: %s" % (idx, msg),
11621 hint=("Please cleanup this device manually as"
11622 " soon as possible"))
11624 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11625 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11626 self.instance.disks)[pnode]
11628 msg = result.fail_msg
11630 # detaches didn't succeed (unlikely)
11631 self.cfg.ReleaseDRBDMinors(self.instance.name)
11632 raise errors.OpExecError("Can't detach the disks from the network on"
11633 " old node: %s" % (msg,))
11635 # if we managed to detach at least one, we update all the disks of
11636 # the instance to point to the new secondary
11637 self.lu.LogInfo("Updating instance configuration")
11638 for dev, _, new_logical_id in iv_names.itervalues():
11639 dev.logical_id = new_logical_id
11640 self.cfg.SetDiskID(dev, self.instance.primary_node)
11642 self.cfg.Update(self.instance, feedback_fn)
11644 # Release all node locks (the configuration has been updated)
11645 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11647 # and now perform the drbd attach
11648 self.lu.LogInfo("Attaching primary drbds to new secondary"
11649 " (standalone => connected)")
11650 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11652 self.node_secondary_ip,
11653 (self.instance.disks, self.instance),
11654 self.instance.name,
11656 for to_node, to_result in result.items():
11657 msg = to_result.fail_msg
11659 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11661 hint=("please do a gnt-instance info to see the"
11662 " status of disks"))
11664 cstep = itertools.count(5)
11666 if self.early_release:
11667 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11668 self._RemoveOldStorage(self.target_node, iv_names)
11669 # TODO: Check if releasing locks early still makes sense
11670 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11672 # Release all resource locks except those used by the instance
11673 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11674 keep=self.node_secondary_ip.keys())
11676 # TODO: Can the instance lock be downgraded here? Take the optional disk
11677 # shutdown in the caller into consideration.
11680 # This can fail as the old devices are degraded and _WaitForSync
11681 # does a combined result over all disks, so we don't check its return value
11682 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11683 _WaitForSync(self.lu, self.instance)
11685 # Check all devices manually
11686 self._CheckDevices(self.instance.primary_node, iv_names)
11688 # Step: remove old storage
11689 if not self.early_release:
11690 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11691 self._RemoveOldStorage(self.target_node, iv_names)
11694 class LURepairNodeStorage(NoHooksLU):
11695 """Repairs the volume group on a node.
11700 def CheckArguments(self):
11701 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11703 storage_type = self.op.storage_type
11705 if (constants.SO_FIX_CONSISTENCY not in
11706 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11707 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11708 " repaired" % storage_type,
11709 errors.ECODE_INVAL)
11711 def ExpandNames(self):
11712 self.needed_locks = {
11713 locking.LEVEL_NODE: [self.op.node_name],
11716 def _CheckFaultyDisks(self, instance, node_name):
11717 """Ensure faulty disks abort the opcode or at least warn."""
11719 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11721 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11722 " node '%s'" % (instance.name, node_name),
11723 errors.ECODE_STATE)
11724 except errors.OpPrereqError, err:
11725 if self.op.ignore_consistency:
11726 self.proc.LogWarning(str(err.args[0]))
11730 def CheckPrereq(self):
11731 """Check prerequisites.
11734 # Check whether any instance on this node has faulty disks
11735 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11736 if inst.admin_state != constants.ADMINST_UP:
11738 check_nodes = set(inst.all_nodes)
11739 check_nodes.discard(self.op.node_name)
11740 for inst_node_name in check_nodes:
11741 self._CheckFaultyDisks(inst, inst_node_name)
11743 def Exec(self, feedback_fn):
11744 feedback_fn("Repairing storage unit '%s' on %s ..." %
11745 (self.op.name, self.op.node_name))
11747 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11748 result = self.rpc.call_storage_execute(self.op.node_name,
11749 self.op.storage_type, st_args,
11751 constants.SO_FIX_CONSISTENCY)
11752 result.Raise("Failed to repair storage unit '%s' on %s" %
11753 (self.op.name, self.op.node_name))
11756 class LUNodeEvacuate(NoHooksLU):
11757 """Evacuates instances off a list of nodes.
11762 _MODE2IALLOCATOR = {
11763 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11764 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11765 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11767 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11768 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11769 constants.IALLOCATOR_NEVAC_MODES)
11771 def CheckArguments(self):
11772 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11774 def ExpandNames(self):
11775 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11777 if self.op.remote_node is not None:
11778 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11779 assert self.op.remote_node
11781 if self.op.remote_node == self.op.node_name:
11782 raise errors.OpPrereqError("Can not use evacuated node as a new"
11783 " secondary node", errors.ECODE_INVAL)
11785 if self.op.mode != constants.NODE_EVAC_SEC:
11786 raise errors.OpPrereqError("Without the use of an iallocator only"
11787 " secondary instances can be evacuated",
11788 errors.ECODE_INVAL)
11791 self.share_locks = _ShareAll()
11792 self.needed_locks = {
11793 locking.LEVEL_INSTANCE: [],
11794 locking.LEVEL_NODEGROUP: [],
11795 locking.LEVEL_NODE: [],
11798 # Determine nodes (via group) optimistically, needs verification once locks
11799 # have been acquired
11800 self.lock_nodes = self._DetermineNodes()
11802 def _DetermineNodes(self):
11803 """Gets the list of nodes to operate on.
11806 if self.op.remote_node is None:
11807 # Iallocator will choose any node(s) in the same group
11808 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11810 group_nodes = frozenset([self.op.remote_node])
11812 # Determine nodes to be locked
11813 return set([self.op.node_name]) | group_nodes
11815 def _DetermineInstances(self):
11816 """Builds list of instances to operate on.
11819 assert self.op.mode in constants.NODE_EVAC_MODES
11821 if self.op.mode == constants.NODE_EVAC_PRI:
11822 # Primary instances only
11823 inst_fn = _GetNodePrimaryInstances
11824 assert self.op.remote_node is None, \
11825 "Evacuating primary instances requires iallocator"
11826 elif self.op.mode == constants.NODE_EVAC_SEC:
11827 # Secondary instances only
11828 inst_fn = _GetNodeSecondaryInstances
11831 assert self.op.mode == constants.NODE_EVAC_ALL
11832 inst_fn = _GetNodeInstances
11833 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11835 raise errors.OpPrereqError("Due to an issue with the iallocator"
11836 " interface it is not possible to evacuate"
11837 " all instances at once; specify explicitly"
11838 " whether to evacuate primary or secondary"
11840 errors.ECODE_INVAL)
11842 return inst_fn(self.cfg, self.op.node_name)
11844 def DeclareLocks(self, level):
11845 if level == locking.LEVEL_INSTANCE:
11846 # Lock instances optimistically, needs verification once node and group
11847 # locks have been acquired
11848 self.needed_locks[locking.LEVEL_INSTANCE] = \
11849 set(i.name for i in self._DetermineInstances())
11851 elif level == locking.LEVEL_NODEGROUP:
11852 # Lock node groups for all potential target nodes optimistically, needs
11853 # verification once nodes have been acquired
11854 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11855 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11857 elif level == locking.LEVEL_NODE:
11858 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11860 def CheckPrereq(self):
11862 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11863 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11864 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11866 need_nodes = self._DetermineNodes()
11868 if not owned_nodes.issuperset(need_nodes):
11869 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11870 " locks were acquired, current nodes are"
11871 " are '%s', used to be '%s'; retry the"
11873 (self.op.node_name,
11874 utils.CommaJoin(need_nodes),
11875 utils.CommaJoin(owned_nodes)),
11876 errors.ECODE_STATE)
11878 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11879 if owned_groups != wanted_groups:
11880 raise errors.OpExecError("Node groups changed since locks were acquired,"
11881 " current groups are '%s', used to be '%s';"
11882 " retry the operation" %
11883 (utils.CommaJoin(wanted_groups),
11884 utils.CommaJoin(owned_groups)))
11886 # Determine affected instances
11887 self.instances = self._DetermineInstances()
11888 self.instance_names = [i.name for i in self.instances]
11890 if set(self.instance_names) != owned_instances:
11891 raise errors.OpExecError("Instances on node '%s' changed since locks"
11892 " were acquired, current instances are '%s',"
11893 " used to be '%s'; retry the operation" %
11894 (self.op.node_name,
11895 utils.CommaJoin(self.instance_names),
11896 utils.CommaJoin(owned_instances)))
11898 if self.instance_names:
11899 self.LogInfo("Evacuating instances from node '%s': %s",
11901 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11903 self.LogInfo("No instances to evacuate from node '%s'",
11906 if self.op.remote_node is not None:
11907 for i in self.instances:
11908 if i.primary_node == self.op.remote_node:
11909 raise errors.OpPrereqError("Node %s is the primary node of"
11910 " instance %s, cannot use it as"
11912 (self.op.remote_node, i.name),
11913 errors.ECODE_INVAL)
11915 def Exec(self, feedback_fn):
11916 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11918 if not self.instance_names:
11919 # No instances to evacuate
11922 elif self.op.iallocator is not None:
11923 # TODO: Implement relocation to other group
11924 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11925 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11926 instances=list(self.instance_names))
11927 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11929 ial.Run(self.op.iallocator)
11931 if not ial.success:
11932 raise errors.OpPrereqError("Can't compute node evacuation using"
11933 " iallocator '%s': %s" %
11934 (self.op.iallocator, ial.info),
11935 errors.ECODE_NORES)
11937 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11939 elif self.op.remote_node is not None:
11940 assert self.op.mode == constants.NODE_EVAC_SEC
11942 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11943 remote_node=self.op.remote_node,
11945 mode=constants.REPLACE_DISK_CHG,
11946 early_release=self.op.early_release)]
11947 for instance_name in self.instance_names
11951 raise errors.ProgrammerError("No iallocator or remote node")
11953 return ResultWithJobs(jobs)
11956 def _SetOpEarlyRelease(early_release, op):
11957 """Sets C{early_release} flag on opcodes if available.
11961 op.early_release = early_release
11962 except AttributeError:
11963 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11968 def _NodeEvacDest(use_nodes, group, nodes):
11969 """Returns group or nodes depending on caller's choice.
11973 return utils.CommaJoin(nodes)
11978 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11979 """Unpacks the result of change-group and node-evacuate iallocator requests.
11981 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11982 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11984 @type lu: L{LogicalUnit}
11985 @param lu: Logical unit instance
11986 @type alloc_result: tuple/list
11987 @param alloc_result: Result from iallocator
11988 @type early_release: bool
11989 @param early_release: Whether to release locks early if possible
11990 @type use_nodes: bool
11991 @param use_nodes: Whether to display node names instead of groups
11994 (moved, failed, jobs) = alloc_result
11997 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11998 for (name, reason) in failed)
11999 lu.LogWarning("Unable to evacuate instances %s", failreason)
12000 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12003 lu.LogInfo("Instances to be moved: %s",
12004 utils.CommaJoin("%s (to %s)" %
12005 (name, _NodeEvacDest(use_nodes, group, nodes))
12006 for (name, group, nodes) in moved))
12008 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12009 map(opcodes.OpCode.LoadOpCode, ops))
12013 def _DiskSizeInBytesToMebibytes(lu, size):
12014 """Converts a disk size in bytes to mebibytes.
12016 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12019 (mib, remainder) = divmod(size, 1024 * 1024)
12022 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12023 " to not overwrite existing data (%s bytes will not be"
12024 " wiped)", (1024 * 1024) - remainder)
12030 class LUInstanceGrowDisk(LogicalUnit):
12031 """Grow a disk of an instance.
12034 HPATH = "disk-grow"
12035 HTYPE = constants.HTYPE_INSTANCE
12038 def ExpandNames(self):
12039 self._ExpandAndLockInstance()
12040 self.needed_locks[locking.LEVEL_NODE] = []
12041 self.needed_locks[locking.LEVEL_NODE_RES] = []
12042 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12043 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12045 def DeclareLocks(self, level):
12046 if level == locking.LEVEL_NODE:
12047 self._LockInstancesNodes()
12048 elif level == locking.LEVEL_NODE_RES:
12050 self.needed_locks[locking.LEVEL_NODE_RES] = \
12051 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12053 def BuildHooksEnv(self):
12054 """Build hooks env.
12056 This runs on the master, the primary and all the secondaries.
12060 "DISK": self.op.disk,
12061 "AMOUNT": self.op.amount,
12062 "ABSOLUTE": self.op.absolute,
12064 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12067 def BuildHooksNodes(self):
12068 """Build hooks nodes.
12071 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12074 def CheckPrereq(self):
12075 """Check prerequisites.
12077 This checks that the instance is in the cluster.
12080 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12081 assert instance is not None, \
12082 "Cannot retrieve locked instance %s" % self.op.instance_name
12083 nodenames = list(instance.all_nodes)
12084 for node in nodenames:
12085 _CheckNodeOnline(self, node)
12087 self.instance = instance
12089 if instance.disk_template not in constants.DTS_GROWABLE:
12090 raise errors.OpPrereqError("Instance's disk layout does not support"
12091 " growing", errors.ECODE_INVAL)
12093 self.disk = instance.FindDisk(self.op.disk)
12095 if self.op.absolute:
12096 self.target = self.op.amount
12097 self.delta = self.target - self.disk.size
12099 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12100 "current disk size (%s)" %
12101 (utils.FormatUnit(self.target, "h"),
12102 utils.FormatUnit(self.disk.size, "h")),
12103 errors.ECODE_STATE)
12105 self.delta = self.op.amount
12106 self.target = self.disk.size + self.delta
12108 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12109 utils.FormatUnit(self.delta, "h"),
12110 errors.ECODE_INVAL)
12112 if instance.disk_template not in (constants.DT_FILE,
12113 constants.DT_SHARED_FILE,
12115 # TODO: check the free disk space for file, when that feature will be
12117 _CheckNodesFreeDiskPerVG(self, nodenames,
12118 self.disk.ComputeGrowth(self.delta))
12120 def Exec(self, feedback_fn):
12121 """Execute disk grow.
12124 instance = self.instance
12127 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12128 assert (self.owned_locks(locking.LEVEL_NODE) ==
12129 self.owned_locks(locking.LEVEL_NODE_RES))
12131 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12133 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12135 raise errors.OpExecError("Cannot activate block device to grow")
12137 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12138 (self.op.disk, instance.name,
12139 utils.FormatUnit(self.delta, "h"),
12140 utils.FormatUnit(self.target, "h")))
12142 # First run all grow ops in dry-run mode
12143 for node in instance.all_nodes:
12144 self.cfg.SetDiskID(disk, node)
12145 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12147 result.Raise("Dry-run grow request failed to node %s" % node)
12150 # Get disk size from primary node for wiping
12151 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12152 result.Raise("Failed to retrieve disk size from node '%s'" %
12153 instance.primary_node)
12155 (disk_size_in_bytes, ) = result.payload
12157 if disk_size_in_bytes is None:
12158 raise errors.OpExecError("Failed to retrieve disk size from primary"
12159 " node '%s'" % instance.primary_node)
12161 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12163 assert old_disk_size >= disk.size, \
12164 ("Retrieved disk size too small (got %s, should be at least %s)" %
12165 (old_disk_size, disk.size))
12167 old_disk_size = None
12169 # We know that (as far as we can test) operations across different
12170 # nodes will succeed, time to run it for real on the backing storage
12171 for node in instance.all_nodes:
12172 self.cfg.SetDiskID(disk, node)
12173 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12175 result.Raise("Grow request failed to node %s" % node)
12177 # And now execute it for logical storage, on the primary node
12178 node = instance.primary_node
12179 self.cfg.SetDiskID(disk, node)
12180 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12182 result.Raise("Grow request failed to node %s" % node)
12184 disk.RecordGrow(self.delta)
12185 self.cfg.Update(instance, feedback_fn)
12187 # Changes have been recorded, release node lock
12188 _ReleaseLocks(self, locking.LEVEL_NODE)
12190 # Downgrade lock while waiting for sync
12191 self.glm.downgrade(locking.LEVEL_INSTANCE)
12193 assert wipe_disks ^ (old_disk_size is None)
12196 assert instance.disks[self.op.disk] == disk
12198 # Wipe newly added disk space
12199 _WipeDisks(self, instance,
12200 disks=[(self.op.disk, disk, old_disk_size)])
12202 if self.op.wait_for_sync:
12203 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12205 self.proc.LogWarning("Disk sync-ing has not returned a good"
12206 " status; please check the instance")
12207 if instance.admin_state != constants.ADMINST_UP:
12208 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12209 elif instance.admin_state != constants.ADMINST_UP:
12210 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12211 " not supposed to be running because no wait for"
12212 " sync mode was requested")
12214 assert self.owned_locks(locking.LEVEL_NODE_RES)
12215 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12218 class LUInstanceQueryData(NoHooksLU):
12219 """Query runtime instance data.
12224 def ExpandNames(self):
12225 self.needed_locks = {}
12227 # Use locking if requested or when non-static information is wanted
12228 if not (self.op.static or self.op.use_locking):
12229 self.LogWarning("Non-static data requested, locks need to be acquired")
12230 self.op.use_locking = True
12232 if self.op.instances or not self.op.use_locking:
12233 # Expand instance names right here
12234 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12236 # Will use acquired locks
12237 self.wanted_names = None
12239 if self.op.use_locking:
12240 self.share_locks = _ShareAll()
12242 if self.wanted_names is None:
12243 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12245 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12247 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12248 self.needed_locks[locking.LEVEL_NODE] = []
12249 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12251 def DeclareLocks(self, level):
12252 if self.op.use_locking:
12253 if level == locking.LEVEL_NODEGROUP:
12254 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12256 # Lock all groups used by instances optimistically; this requires going
12257 # via the node before it's locked, requiring verification later on
12258 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12259 frozenset(group_uuid
12260 for instance_name in owned_instances
12262 self.cfg.GetInstanceNodeGroups(instance_name))
12264 elif level == locking.LEVEL_NODE:
12265 self._LockInstancesNodes()
12267 def CheckPrereq(self):
12268 """Check prerequisites.
12270 This only checks the optional instance list against the existing names.
12273 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12274 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12275 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12277 if self.wanted_names is None:
12278 assert self.op.use_locking, "Locking was not used"
12279 self.wanted_names = owned_instances
12281 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12283 if self.op.use_locking:
12284 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12287 assert not (owned_instances or owned_groups or owned_nodes)
12289 self.wanted_instances = instances.values()
12291 def _ComputeBlockdevStatus(self, node, instance, dev):
12292 """Returns the status of a block device
12295 if self.op.static or not node:
12298 self.cfg.SetDiskID(dev, node)
12300 result = self.rpc.call_blockdev_find(node, dev)
12304 result.Raise("Can't compute disk status for %s" % instance.name)
12306 status = result.payload
12310 return (status.dev_path, status.major, status.minor,
12311 status.sync_percent, status.estimated_time,
12312 status.is_degraded, status.ldisk_status)
12314 def _ComputeDiskStatus(self, instance, snode, dev):
12315 """Compute block device status.
12318 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12320 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12322 def _ComputeDiskStatusInner(self, instance, snode, dev):
12323 """Compute block device status.
12325 @attention: The device has to be annotated already.
12328 if dev.dev_type in constants.LDS_DRBD:
12329 # we change the snode then (otherwise we use the one passed in)
12330 if dev.logical_id[0] == instance.primary_node:
12331 snode = dev.logical_id[1]
12333 snode = dev.logical_id[0]
12335 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12337 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12340 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12347 "iv_name": dev.iv_name,
12348 "dev_type": dev.dev_type,
12349 "logical_id": dev.logical_id,
12350 "physical_id": dev.physical_id,
12351 "pstatus": dev_pstatus,
12352 "sstatus": dev_sstatus,
12353 "children": dev_children,
12358 def Exec(self, feedback_fn):
12359 """Gather and return data"""
12362 cluster = self.cfg.GetClusterInfo()
12364 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12365 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12367 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12368 for node in nodes.values()))
12370 group2name_fn = lambda uuid: groups[uuid].name
12372 for instance in self.wanted_instances:
12373 pnode = nodes[instance.primary_node]
12375 if self.op.static or pnode.offline:
12376 remote_state = None
12378 self.LogWarning("Primary node %s is marked offline, returning static"
12379 " information only for instance %s" %
12380 (pnode.name, instance.name))
12382 remote_info = self.rpc.call_instance_info(instance.primary_node,
12384 instance.hypervisor)
12385 remote_info.Raise("Error checking node %s" % instance.primary_node)
12386 remote_info = remote_info.payload
12387 if remote_info and "state" in remote_info:
12388 remote_state = "up"
12390 if instance.admin_state == constants.ADMINST_UP:
12391 remote_state = "down"
12393 remote_state = instance.admin_state
12395 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12398 snodes_group_uuids = [nodes[snode_name].group
12399 for snode_name in instance.secondary_nodes]
12401 result[instance.name] = {
12402 "name": instance.name,
12403 "config_state": instance.admin_state,
12404 "run_state": remote_state,
12405 "pnode": instance.primary_node,
12406 "pnode_group_uuid": pnode.group,
12407 "pnode_group_name": group2name_fn(pnode.group),
12408 "snodes": instance.secondary_nodes,
12409 "snodes_group_uuids": snodes_group_uuids,
12410 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12412 # this happens to be the same format used for hooks
12413 "nics": _NICListToTuple(self, instance.nics),
12414 "disk_template": instance.disk_template,
12416 "hypervisor": instance.hypervisor,
12417 "network_port": instance.network_port,
12418 "hv_instance": instance.hvparams,
12419 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12420 "be_instance": instance.beparams,
12421 "be_actual": cluster.FillBE(instance),
12422 "os_instance": instance.osparams,
12423 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12424 "serial_no": instance.serial_no,
12425 "mtime": instance.mtime,
12426 "ctime": instance.ctime,
12427 "uuid": instance.uuid,
12433 def PrepareContainerMods(mods, private_fn):
12434 """Prepares a list of container modifications by adding a private data field.
12436 @type mods: list of tuples; (operation, index, parameters)
12437 @param mods: List of modifications
12438 @type private_fn: callable or None
12439 @param private_fn: Callable for constructing a private data field for a
12444 if private_fn is None:
12449 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12452 #: Type description for changes as returned by L{ApplyContainerMods}'s
12454 _TApplyContModsCbChanges = \
12455 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12456 ht.TNonEmptyString,
12461 def ApplyContainerMods(kind, container, chgdesc, mods,
12462 create_fn, modify_fn, remove_fn):
12463 """Applies descriptions in C{mods} to C{container}.
12466 @param kind: One-word item description
12467 @type container: list
12468 @param container: Container to modify
12469 @type chgdesc: None or list
12470 @param chgdesc: List of applied changes
12472 @param mods: Modifications as returned by L{PrepareContainerMods}
12473 @type create_fn: callable
12474 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12475 receives absolute item index, parameters and private data object as added
12476 by L{PrepareContainerMods}, returns tuple containing new item and changes
12478 @type modify_fn: callable
12479 @param modify_fn: Callback for modifying an existing item
12480 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12481 and private data object as added by L{PrepareContainerMods}, returns
12483 @type remove_fn: callable
12484 @param remove_fn: Callback on removing item; receives absolute item index,
12485 item and private data object as added by L{PrepareContainerMods}
12488 for (op, idx, params, private) in mods:
12491 absidx = len(container) - 1
12493 raise IndexError("Not accepting negative indices other than -1")
12494 elif idx > len(container):
12495 raise IndexError("Got %s index %s, but there are only %s" %
12496 (kind, idx, len(container)))
12502 if op == constants.DDM_ADD:
12503 # Calculate where item will be added
12505 addidx = len(container)
12509 if create_fn is None:
12512 (item, changes) = create_fn(addidx, params, private)
12515 container.append(item)
12518 assert idx <= len(container)
12519 # list.insert does so before the specified index
12520 container.insert(idx, item)
12522 # Retrieve existing item
12524 item = container[absidx]
12526 raise IndexError("Invalid %s index %s" % (kind, idx))
12528 if op == constants.DDM_REMOVE:
12531 if remove_fn is not None:
12532 remove_fn(absidx, item, private)
12534 changes = [("%s/%s" % (kind, absidx), "remove")]
12536 assert container[absidx] == item
12537 del container[absidx]
12538 elif op == constants.DDM_MODIFY:
12539 if modify_fn is not None:
12540 changes = modify_fn(absidx, item, params, private)
12542 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12544 assert _TApplyContModsCbChanges(changes)
12546 if not (chgdesc is None or changes is None):
12547 chgdesc.extend(changes)
12550 def _UpdateIvNames(base_index, disks):
12551 """Updates the C{iv_name} attribute of disks.
12553 @type disks: list of L{objects.Disk}
12556 for (idx, disk) in enumerate(disks):
12557 disk.iv_name = "disk/%s" % (base_index + idx, )
12560 class _InstNicModPrivate:
12561 """Data structure for network interface modifications.
12563 Used by L{LUInstanceSetParams}.
12566 def __init__(self):
12571 class LUInstanceSetParams(LogicalUnit):
12572 """Modifies an instances's parameters.
12575 HPATH = "instance-modify"
12576 HTYPE = constants.HTYPE_INSTANCE
12580 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12581 assert ht.TList(mods)
12582 assert not mods or len(mods[0]) in (2, 3)
12584 if mods and len(mods[0]) == 2:
12588 for op, params in mods:
12589 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12590 result.append((op, -1, params))
12594 raise errors.OpPrereqError("Only one %s add or remove operation is"
12595 " supported at a time" % kind,
12596 errors.ECODE_INVAL)
12598 result.append((constants.DDM_MODIFY, op, params))
12600 assert verify_fn(result)
12607 def _CheckMods(kind, mods, key_types, item_fn):
12608 """Ensures requested disk/NIC modifications are valid.
12611 for (op, _, params) in mods:
12612 assert ht.TDict(params)
12614 utils.ForceDictType(params, key_types)
12616 if op == constants.DDM_REMOVE:
12618 raise errors.OpPrereqError("No settings should be passed when"
12619 " removing a %s" % kind,
12620 errors.ECODE_INVAL)
12621 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12622 item_fn(op, params)
12624 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12627 def _VerifyDiskModification(op, params):
12628 """Verifies a disk modification.
12631 if op == constants.DDM_ADD:
12632 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12633 if mode not in constants.DISK_ACCESS_SET:
12634 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12635 errors.ECODE_INVAL)
12637 size = params.get(constants.IDISK_SIZE, None)
12639 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12640 constants.IDISK_SIZE, errors.ECODE_INVAL)
12644 except (TypeError, ValueError), err:
12645 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12646 errors.ECODE_INVAL)
12648 params[constants.IDISK_SIZE] = size
12650 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12651 raise errors.OpPrereqError("Disk size change not possible, use"
12652 " grow-disk", errors.ECODE_INVAL)
12655 def _VerifyNicModification(op, params):
12656 """Verifies a network interface modification.
12659 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12660 ip = params.get(constants.INIC_IP, None)
12663 elif ip.lower() == constants.VALUE_NONE:
12664 params[constants.INIC_IP] = None
12665 elif not netutils.IPAddress.IsValid(ip):
12666 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12667 errors.ECODE_INVAL)
12669 bridge = params.get("bridge", None)
12670 link = params.get(constants.INIC_LINK, None)
12671 if bridge and link:
12672 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12673 " at the same time", errors.ECODE_INVAL)
12674 elif bridge and bridge.lower() == constants.VALUE_NONE:
12675 params["bridge"] = None
12676 elif link and link.lower() == constants.VALUE_NONE:
12677 params[constants.INIC_LINK] = None
12679 if op == constants.DDM_ADD:
12680 macaddr = params.get(constants.INIC_MAC, None)
12681 if macaddr is None:
12682 params[constants.INIC_MAC] = constants.VALUE_AUTO
12684 if constants.INIC_MAC in params:
12685 macaddr = params[constants.INIC_MAC]
12686 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12687 macaddr = utils.NormalizeAndValidateMac(macaddr)
12689 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12690 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12691 " modifying an existing NIC",
12692 errors.ECODE_INVAL)
12694 def CheckArguments(self):
12695 if not (self.op.nics or self.op.disks or self.op.disk_template or
12696 self.op.hvparams or self.op.beparams or self.op.os_name or
12697 self.op.offline is not None or self.op.runtime_mem):
12698 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12700 if self.op.hvparams:
12701 _CheckGlobalHvParams(self.op.hvparams)
12703 self.op.disks = self._UpgradeDiskNicMods(
12704 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12705 self.op.nics = self._UpgradeDiskNicMods(
12706 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12708 # Check disk modifications
12709 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12710 self._VerifyDiskModification)
12712 if self.op.disks and self.op.disk_template is not None:
12713 raise errors.OpPrereqError("Disk template conversion and other disk"
12714 " changes not supported at the same time",
12715 errors.ECODE_INVAL)
12717 if (self.op.disk_template and
12718 self.op.disk_template in constants.DTS_INT_MIRROR and
12719 self.op.remote_node is None):
12720 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12721 " one requires specifying a secondary node",
12722 errors.ECODE_INVAL)
12724 # Check NIC modifications
12725 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12726 self._VerifyNicModification)
12728 def ExpandNames(self):
12729 self._ExpandAndLockInstance()
12730 # Can't even acquire node locks in shared mode as upcoming changes in
12731 # Ganeti 2.6 will start to modify the node object on disk conversion
12732 self.needed_locks[locking.LEVEL_NODE] = []
12733 self.needed_locks[locking.LEVEL_NODE_RES] = []
12734 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12736 def DeclareLocks(self, level):
12737 # TODO: Acquire group lock in shared mode (disk parameters)
12738 if level == locking.LEVEL_NODE:
12739 self._LockInstancesNodes()
12740 if self.op.disk_template and self.op.remote_node:
12741 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12742 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12743 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12745 self.needed_locks[locking.LEVEL_NODE_RES] = \
12746 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12748 def BuildHooksEnv(self):
12749 """Build hooks env.
12751 This runs on the master, primary and secondaries.
12755 if constants.BE_MINMEM in self.be_new:
12756 args["minmem"] = self.be_new[constants.BE_MINMEM]
12757 if constants.BE_MAXMEM in self.be_new:
12758 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12759 if constants.BE_VCPUS in self.be_new:
12760 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12761 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12762 # information at all.
12764 if self._new_nics is not None:
12767 for nic in self._new_nics:
12768 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12769 mode = nicparams[constants.NIC_MODE]
12770 link = nicparams[constants.NIC_LINK]
12771 nics.append((nic.ip, nic.mac, mode, link))
12773 args["nics"] = nics
12775 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12776 if self.op.disk_template:
12777 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12778 if self.op.runtime_mem:
12779 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12783 def BuildHooksNodes(self):
12784 """Build hooks nodes.
12787 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12790 def _PrepareNicModification(self, params, private, old_ip, old_params,
12792 update_params_dict = dict([(key, params[key])
12793 for key in constants.NICS_PARAMETERS
12796 if "bridge" in params:
12797 update_params_dict[constants.NIC_LINK] = params["bridge"]
12799 new_params = _GetUpdatedParams(old_params, update_params_dict)
12800 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12802 new_filled_params = cluster.SimpleFillNIC(new_params)
12803 objects.NIC.CheckParameterSyntax(new_filled_params)
12805 new_mode = new_filled_params[constants.NIC_MODE]
12806 if new_mode == constants.NIC_MODE_BRIDGED:
12807 bridge = new_filled_params[constants.NIC_LINK]
12808 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12810 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12812 self.warn.append(msg)
12814 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12816 elif new_mode == constants.NIC_MODE_ROUTED:
12817 ip = params.get(constants.INIC_IP, old_ip)
12819 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12820 " on a routed NIC", errors.ECODE_INVAL)
12822 if constants.INIC_MAC in params:
12823 mac = params[constants.INIC_MAC]
12825 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12826 errors.ECODE_INVAL)
12827 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12828 # otherwise generate the MAC address
12829 params[constants.INIC_MAC] = \
12830 self.cfg.GenerateMAC(self.proc.GetECId())
12832 # or validate/reserve the current one
12834 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12835 except errors.ReservationError:
12836 raise errors.OpPrereqError("MAC address '%s' already in use"
12837 " in cluster" % mac,
12838 errors.ECODE_NOTUNIQUE)
12840 private.params = new_params
12841 private.filled = new_filled_params
12843 def CheckPrereq(self):
12844 """Check prerequisites.
12846 This only checks the instance list against the existing names.
12849 # checking the new params on the primary/secondary nodes
12851 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12852 cluster = self.cluster = self.cfg.GetClusterInfo()
12853 assert self.instance is not None, \
12854 "Cannot retrieve locked instance %s" % self.op.instance_name
12855 pnode = instance.primary_node
12856 nodelist = list(instance.all_nodes)
12857 pnode_info = self.cfg.GetNodeInfo(pnode)
12858 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12860 # Prepare disk/NIC modifications
12861 self.diskmod = PrepareContainerMods(self.op.disks, None)
12862 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12865 if self.op.os_name and not self.op.force:
12866 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12867 self.op.force_variant)
12868 instance_os = self.op.os_name
12870 instance_os = instance.os
12872 assert not (self.op.disk_template and self.op.disks), \
12873 "Can't modify disk template and apply disk changes at the same time"
12875 if self.op.disk_template:
12876 if instance.disk_template == self.op.disk_template:
12877 raise errors.OpPrereqError("Instance already has disk template %s" %
12878 instance.disk_template, errors.ECODE_INVAL)
12880 if (instance.disk_template,
12881 self.op.disk_template) not in self._DISK_CONVERSIONS:
12882 raise errors.OpPrereqError("Unsupported disk template conversion from"
12883 " %s to %s" % (instance.disk_template,
12884 self.op.disk_template),
12885 errors.ECODE_INVAL)
12886 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12887 msg="cannot change disk template")
12888 if self.op.disk_template in constants.DTS_INT_MIRROR:
12889 if self.op.remote_node == pnode:
12890 raise errors.OpPrereqError("Given new secondary node %s is the same"
12891 " as the primary node of the instance" %
12892 self.op.remote_node, errors.ECODE_STATE)
12893 _CheckNodeOnline(self, self.op.remote_node)
12894 _CheckNodeNotDrained(self, self.op.remote_node)
12895 # FIXME: here we assume that the old instance type is DT_PLAIN
12896 assert instance.disk_template == constants.DT_PLAIN
12897 disks = [{constants.IDISK_SIZE: d.size,
12898 constants.IDISK_VG: d.logical_id[0]}
12899 for d in instance.disks]
12900 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12901 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12903 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12904 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12905 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12907 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12908 ignore=self.op.ignore_ipolicy)
12909 if pnode_info.group != snode_info.group:
12910 self.LogWarning("The primary and secondary nodes are in two"
12911 " different node groups; the disk parameters"
12912 " from the first disk's node group will be"
12915 # hvparams processing
12916 if self.op.hvparams:
12917 hv_type = instance.hypervisor
12918 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12919 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12920 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12923 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12924 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12925 self.hv_proposed = self.hv_new = hv_new # the new actual values
12926 self.hv_inst = i_hvdict # the new dict (without defaults)
12928 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12930 self.hv_new = self.hv_inst = {}
12932 # beparams processing
12933 if self.op.beparams:
12934 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12936 objects.UpgradeBeParams(i_bedict)
12937 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12938 be_new = cluster.SimpleFillBE(i_bedict)
12939 self.be_proposed = self.be_new = be_new # the new actual values
12940 self.be_inst = i_bedict # the new dict (without defaults)
12942 self.be_new = self.be_inst = {}
12943 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12944 be_old = cluster.FillBE(instance)
12946 # CPU param validation -- checking every time a parameter is
12947 # changed to cover all cases where either CPU mask or vcpus have
12949 if (constants.BE_VCPUS in self.be_proposed and
12950 constants.HV_CPU_MASK in self.hv_proposed):
12952 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12953 # Verify mask is consistent with number of vCPUs. Can skip this
12954 # test if only 1 entry in the CPU mask, which means same mask
12955 # is applied to all vCPUs.
12956 if (len(cpu_list) > 1 and
12957 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12958 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12960 (self.be_proposed[constants.BE_VCPUS],
12961 self.hv_proposed[constants.HV_CPU_MASK]),
12962 errors.ECODE_INVAL)
12964 # Only perform this test if a new CPU mask is given
12965 if constants.HV_CPU_MASK in self.hv_new:
12966 # Calculate the largest CPU number requested
12967 max_requested_cpu = max(map(max, cpu_list))
12968 # Check that all of the instance's nodes have enough physical CPUs to
12969 # satisfy the requested CPU mask
12970 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12971 max_requested_cpu + 1, instance.hypervisor)
12973 # osparams processing
12974 if self.op.osparams:
12975 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12976 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12977 self.os_inst = i_osdict # the new dict (without defaults)
12983 #TODO(dynmem): do the appropriate check involving MINMEM
12984 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12985 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12986 mem_check_list = [pnode]
12987 if be_new[constants.BE_AUTO_BALANCE]:
12988 # either we changed auto_balance to yes or it was from before
12989 mem_check_list.extend(instance.secondary_nodes)
12990 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12991 instance.hypervisor)
12992 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12993 [instance.hypervisor])
12994 pninfo = nodeinfo[pnode]
12995 msg = pninfo.fail_msg
12997 # Assume the primary node is unreachable and go ahead
12998 self.warn.append("Can't get info from primary node %s: %s" %
13001 (_, _, (pnhvinfo, )) = pninfo.payload
13002 if not isinstance(pnhvinfo.get("memory_free", None), int):
13003 self.warn.append("Node data from primary node %s doesn't contain"
13004 " free memory information" % pnode)
13005 elif instance_info.fail_msg:
13006 self.warn.append("Can't get instance runtime information: %s" %
13007 instance_info.fail_msg)
13009 if instance_info.payload:
13010 current_mem = int(instance_info.payload["memory"])
13012 # Assume instance not running
13013 # (there is a slight race condition here, but it's not very
13014 # probable, and we have no other way to check)
13015 # TODO: Describe race condition
13017 #TODO(dynmem): do the appropriate check involving MINMEM
13018 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13019 pnhvinfo["memory_free"])
13021 raise errors.OpPrereqError("This change will prevent the instance"
13022 " from starting, due to %d MB of memory"
13023 " missing on its primary node" %
13024 miss_mem, errors.ECODE_NORES)
13026 if be_new[constants.BE_AUTO_BALANCE]:
13027 for node, nres in nodeinfo.items():
13028 if node not in instance.secondary_nodes:
13030 nres.Raise("Can't get info from secondary node %s" % node,
13031 prereq=True, ecode=errors.ECODE_STATE)
13032 (_, _, (nhvinfo, )) = nres.payload
13033 if not isinstance(nhvinfo.get("memory_free", None), int):
13034 raise errors.OpPrereqError("Secondary node %s didn't return free"
13035 " memory information" % node,
13036 errors.ECODE_STATE)
13037 #TODO(dynmem): do the appropriate check involving MINMEM
13038 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13039 raise errors.OpPrereqError("This change will prevent the instance"
13040 " from failover to its secondary node"
13041 " %s, due to not enough memory" % node,
13042 errors.ECODE_STATE)
13044 if self.op.runtime_mem:
13045 remote_info = self.rpc.call_instance_info(instance.primary_node,
13047 instance.hypervisor)
13048 remote_info.Raise("Error checking node %s" % instance.primary_node)
13049 if not remote_info.payload: # not running already
13050 raise errors.OpPrereqError("Instance %s is not running" %
13051 instance.name, errors.ECODE_STATE)
13053 current_memory = remote_info.payload["memory"]
13054 if (not self.op.force and
13055 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13056 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13057 raise errors.OpPrereqError("Instance %s must have memory between %d"
13058 " and %d MB of memory unless --force is"
13061 self.be_proposed[constants.BE_MINMEM],
13062 self.be_proposed[constants.BE_MAXMEM]),
13063 errors.ECODE_INVAL)
13065 delta = self.op.runtime_mem - current_memory
13067 _CheckNodeFreeMemory(self, instance.primary_node,
13068 "ballooning memory for instance %s" %
13069 instance.name, delta, instance.hypervisor)
13071 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13072 raise errors.OpPrereqError("Disk operations not supported for"
13073 " diskless instances", errors.ECODE_INVAL)
13075 def _PrepareNicCreate(_, params, private):
13076 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
13077 return (None, None)
13079 def _PrepareNicMod(_, nic, params, private):
13080 self._PrepareNicModification(params, private, nic.ip,
13081 nic.nicparams, cluster, pnode)
13084 # Verify NIC changes (operating on copy)
13085 nics = instance.nics[:]
13086 ApplyContainerMods("NIC", nics, None, self.nicmod,
13087 _PrepareNicCreate, _PrepareNicMod, None)
13088 if len(nics) > constants.MAX_NICS:
13089 raise errors.OpPrereqError("Instance has too many network interfaces"
13090 " (%d), cannot add more" % constants.MAX_NICS,
13091 errors.ECODE_STATE)
13093 # Verify disk changes (operating on a copy)
13094 disks = instance.disks[:]
13095 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13096 if len(disks) > constants.MAX_DISKS:
13097 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13098 " more" % constants.MAX_DISKS,
13099 errors.ECODE_STATE)
13101 if self.op.offline is not None:
13102 if self.op.offline:
13103 msg = "can't change to offline"
13105 msg = "can't change to online"
13106 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13108 # Pre-compute NIC changes (necessary to use result in hooks)
13109 self._nic_chgdesc = []
13111 # Operate on copies as this is still in prereq
13112 nics = [nic.Copy() for nic in instance.nics]
13113 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13114 self._CreateNewNic, self._ApplyNicMods, None)
13115 self._new_nics = nics
13117 self._new_nics = None
13119 def _ConvertPlainToDrbd(self, feedback_fn):
13120 """Converts an instance from plain to drbd.
13123 feedback_fn("Converting template to drbd")
13124 instance = self.instance
13125 pnode = instance.primary_node
13126 snode = self.op.remote_node
13128 assert instance.disk_template == constants.DT_PLAIN
13130 # create a fake disk info for _GenerateDiskTemplate
13131 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13132 constants.IDISK_VG: d.logical_id[0]}
13133 for d in instance.disks]
13134 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13135 instance.name, pnode, [snode],
13136 disk_info, None, None, 0, feedback_fn,
13138 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13140 info = _GetInstanceInfoText(instance)
13141 feedback_fn("Creating additional volumes...")
13142 # first, create the missing data and meta devices
13143 for disk in anno_disks:
13144 # unfortunately this is... not too nice
13145 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13147 for child in disk.children:
13148 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13149 # at this stage, all new LVs have been created, we can rename the
13151 feedback_fn("Renaming original volumes...")
13152 rename_list = [(o, n.children[0].logical_id)
13153 for (o, n) in zip(instance.disks, new_disks)]
13154 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13155 result.Raise("Failed to rename original LVs")
13157 feedback_fn("Initializing DRBD devices...")
13158 # all child devices are in place, we can now create the DRBD devices
13159 for disk in anno_disks:
13160 for node in [pnode, snode]:
13161 f_create = node == pnode
13162 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13164 # at this point, the instance has been modified
13165 instance.disk_template = constants.DT_DRBD8
13166 instance.disks = new_disks
13167 self.cfg.Update(instance, feedback_fn)
13169 # Release node locks while waiting for sync
13170 _ReleaseLocks(self, locking.LEVEL_NODE)
13172 # disks are created, waiting for sync
13173 disk_abort = not _WaitForSync(self, instance,
13174 oneshot=not self.op.wait_for_sync)
13176 raise errors.OpExecError("There are some degraded disks for"
13177 " this instance, please cleanup manually")
13179 # Node resource locks will be released by caller
13181 def _ConvertDrbdToPlain(self, feedback_fn):
13182 """Converts an instance from drbd to plain.
13185 instance = self.instance
13187 assert len(instance.secondary_nodes) == 1
13188 assert instance.disk_template == constants.DT_DRBD8
13190 pnode = instance.primary_node
13191 snode = instance.secondary_nodes[0]
13192 feedback_fn("Converting template to plain")
13194 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13195 new_disks = [d.children[0] for d in instance.disks]
13197 # copy over size and mode
13198 for parent, child in zip(old_disks, new_disks):
13199 child.size = parent.size
13200 child.mode = parent.mode
13202 # this is a DRBD disk, return its port to the pool
13203 # NOTE: this must be done right before the call to cfg.Update!
13204 for disk in old_disks:
13205 tcp_port = disk.logical_id[2]
13206 self.cfg.AddTcpUdpPort(tcp_port)
13208 # update instance structure
13209 instance.disks = new_disks
13210 instance.disk_template = constants.DT_PLAIN
13211 self.cfg.Update(instance, feedback_fn)
13213 # Release locks in case removing disks takes a while
13214 _ReleaseLocks(self, locking.LEVEL_NODE)
13216 feedback_fn("Removing volumes on the secondary node...")
13217 for disk in old_disks:
13218 self.cfg.SetDiskID(disk, snode)
13219 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13221 self.LogWarning("Could not remove block device %s on node %s,"
13222 " continuing anyway: %s", disk.iv_name, snode, msg)
13224 feedback_fn("Removing unneeded volumes on the primary node...")
13225 for idx, disk in enumerate(old_disks):
13226 meta = disk.children[1]
13227 self.cfg.SetDiskID(meta, pnode)
13228 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13230 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13231 " continuing anyway: %s", idx, pnode, msg)
13233 def _CreateNewDisk(self, idx, params, _):
13234 """Creates a new disk.
13237 instance = self.instance
13240 if instance.disk_template in constants.DTS_FILEBASED:
13241 (file_driver, file_path) = instance.disks[0].logical_id
13242 file_path = os.path.dirname(file_path)
13244 file_driver = file_path = None
13247 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13248 instance.primary_node, instance.secondary_nodes,
13249 [params], file_path, file_driver, idx,
13250 self.Log, self.diskparams)[0]
13252 info = _GetInstanceInfoText(instance)
13254 logging.info("Creating volume %s for instance %s",
13255 disk.iv_name, instance.name)
13256 # Note: this needs to be kept in sync with _CreateDisks
13258 for node in instance.all_nodes:
13259 f_create = (node == instance.primary_node)
13261 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13262 except errors.OpExecError, err:
13263 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13264 disk.iv_name, disk, node, err)
13267 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13271 def _ModifyDisk(idx, disk, params, _):
13272 """Modifies a disk.
13275 disk.mode = params[constants.IDISK_MODE]
13278 ("disk.mode/%d" % idx, disk.mode),
13281 def _RemoveDisk(self, idx, root, _):
13285 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13286 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13287 self.cfg.SetDiskID(disk, node)
13288 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13290 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13291 " continuing anyway", idx, node, msg)
13293 # if this is a DRBD disk, return its port to the pool
13294 if root.dev_type in constants.LDS_DRBD:
13295 self.cfg.AddTcpUdpPort(root.logical_id[2])
13298 def _CreateNewNic(idx, params, private):
13299 """Creates data structure for a new network interface.
13302 mac = params[constants.INIC_MAC]
13303 ip = params.get(constants.INIC_IP, None)
13304 nicparams = private.params
13306 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13308 "add:mac=%s,ip=%s,mode=%s,link=%s" %
13309 (mac, ip, private.filled[constants.NIC_MODE],
13310 private.filled[constants.NIC_LINK])),
13314 def _ApplyNicMods(idx, nic, params, private):
13315 """Modifies a network interface.
13320 for key in [constants.INIC_MAC, constants.INIC_IP]:
13322 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13323 setattr(nic, key, params[key])
13326 nic.nicparams = private.params
13328 for (key, val) in params.items():
13329 changes.append(("nic.%s/%d" % (key, idx), val))
13333 def Exec(self, feedback_fn):
13334 """Modifies an instance.
13336 All parameters take effect only at the next restart of the instance.
13339 # Process here the warnings from CheckPrereq, as we don't have a
13340 # feedback_fn there.
13341 # TODO: Replace with self.LogWarning
13342 for warn in self.warn:
13343 feedback_fn("WARNING: %s" % warn)
13345 assert ((self.op.disk_template is None) ^
13346 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13347 "Not owning any node resource locks"
13350 instance = self.instance
13353 if self.op.runtime_mem:
13354 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13356 self.op.runtime_mem)
13357 rpcres.Raise("Cannot modify instance runtime memory")
13358 result.append(("runtime_memory", self.op.runtime_mem))
13360 # Apply disk changes
13361 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13362 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13363 _UpdateIvNames(0, instance.disks)
13365 if self.op.disk_template:
13367 check_nodes = set(instance.all_nodes)
13368 if self.op.remote_node:
13369 check_nodes.add(self.op.remote_node)
13370 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13371 owned = self.owned_locks(level)
13372 assert not (check_nodes - owned), \
13373 ("Not owning the correct locks, owning %r, expected at least %r" %
13374 (owned, check_nodes))
13376 r_shut = _ShutdownInstanceDisks(self, instance)
13378 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13379 " proceed with disk template conversion")
13380 mode = (instance.disk_template, self.op.disk_template)
13382 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13384 self.cfg.ReleaseDRBDMinors(instance.name)
13386 result.append(("disk_template", self.op.disk_template))
13388 assert instance.disk_template == self.op.disk_template, \
13389 ("Expected disk template '%s', found '%s'" %
13390 (self.op.disk_template, instance.disk_template))
13392 # Release node and resource locks if there are any (they might already have
13393 # been released during disk conversion)
13394 _ReleaseLocks(self, locking.LEVEL_NODE)
13395 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13397 # Apply NIC changes
13398 if self._new_nics is not None:
13399 instance.nics = self._new_nics
13400 result.extend(self._nic_chgdesc)
13403 if self.op.hvparams:
13404 instance.hvparams = self.hv_inst
13405 for key, val in self.op.hvparams.iteritems():
13406 result.append(("hv/%s" % key, val))
13409 if self.op.beparams:
13410 instance.beparams = self.be_inst
13411 for key, val in self.op.beparams.iteritems():
13412 result.append(("be/%s" % key, val))
13415 if self.op.os_name:
13416 instance.os = self.op.os_name
13419 if self.op.osparams:
13420 instance.osparams = self.os_inst
13421 for key, val in self.op.osparams.iteritems():
13422 result.append(("os/%s" % key, val))
13424 if self.op.offline is None:
13427 elif self.op.offline:
13428 # Mark instance as offline
13429 self.cfg.MarkInstanceOffline(instance.name)
13430 result.append(("admin_state", constants.ADMINST_OFFLINE))
13432 # Mark instance as online, but stopped
13433 self.cfg.MarkInstanceDown(instance.name)
13434 result.append(("admin_state", constants.ADMINST_DOWN))
13436 self.cfg.Update(instance, feedback_fn)
13438 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13439 self.owned_locks(locking.LEVEL_NODE)), \
13440 "All node locks should have been released by now"
13444 _DISK_CONVERSIONS = {
13445 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13446 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13450 class LUInstanceChangeGroup(LogicalUnit):
13451 HPATH = "instance-change-group"
13452 HTYPE = constants.HTYPE_INSTANCE
13455 def ExpandNames(self):
13456 self.share_locks = _ShareAll()
13457 self.needed_locks = {
13458 locking.LEVEL_NODEGROUP: [],
13459 locking.LEVEL_NODE: [],
13462 self._ExpandAndLockInstance()
13464 if self.op.target_groups:
13465 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13466 self.op.target_groups)
13468 self.req_target_uuids = None
13470 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13472 def DeclareLocks(self, level):
13473 if level == locking.LEVEL_NODEGROUP:
13474 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13476 if self.req_target_uuids:
13477 lock_groups = set(self.req_target_uuids)
13479 # Lock all groups used by instance optimistically; this requires going
13480 # via the node before it's locked, requiring verification later on
13481 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13482 lock_groups.update(instance_groups)
13484 # No target groups, need to lock all of them
13485 lock_groups = locking.ALL_SET
13487 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13489 elif level == locking.LEVEL_NODE:
13490 if self.req_target_uuids:
13491 # Lock all nodes used by instances
13492 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13493 self._LockInstancesNodes()
13495 # Lock all nodes in all potential target groups
13496 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13497 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13498 member_nodes = [node_name
13499 for group in lock_groups
13500 for node_name in self.cfg.GetNodeGroup(group).members]
13501 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13503 # Lock all nodes as all groups are potential targets
13504 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13506 def CheckPrereq(self):
13507 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13508 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13509 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13511 assert (self.req_target_uuids is None or
13512 owned_groups.issuperset(self.req_target_uuids))
13513 assert owned_instances == set([self.op.instance_name])
13515 # Get instance information
13516 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13518 # Check if node groups for locked instance are still correct
13519 assert owned_nodes.issuperset(self.instance.all_nodes), \
13520 ("Instance %s's nodes changed while we kept the lock" %
13521 self.op.instance_name)
13523 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13526 if self.req_target_uuids:
13527 # User requested specific target groups
13528 self.target_uuids = frozenset(self.req_target_uuids)
13530 # All groups except those used by the instance are potential targets
13531 self.target_uuids = owned_groups - inst_groups
13533 conflicting_groups = self.target_uuids & inst_groups
13534 if conflicting_groups:
13535 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13536 " used by the instance '%s'" %
13537 (utils.CommaJoin(conflicting_groups),
13538 self.op.instance_name),
13539 errors.ECODE_INVAL)
13541 if not self.target_uuids:
13542 raise errors.OpPrereqError("There are no possible target groups",
13543 errors.ECODE_INVAL)
13545 def BuildHooksEnv(self):
13546 """Build hooks env.
13549 assert self.target_uuids
13552 "TARGET_GROUPS": " ".join(self.target_uuids),
13555 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13559 def BuildHooksNodes(self):
13560 """Build hooks nodes.
13563 mn = self.cfg.GetMasterNode()
13564 return ([mn], [mn])
13566 def Exec(self, feedback_fn):
13567 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13569 assert instances == [self.op.instance_name], "Instance not locked"
13571 req = iallocator.IAReqGroupChange(instances=instances,
13572 target_groups=list(self.target_uuids))
13573 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13575 ial.Run(self.op.iallocator)
13577 if not ial.success:
13578 raise errors.OpPrereqError("Can't compute solution for changing group of"
13579 " instance '%s' using iallocator '%s': %s" %
13580 (self.op.instance_name, self.op.iallocator,
13581 ial.info), errors.ECODE_NORES)
13583 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13585 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13586 " instance '%s'", len(jobs), self.op.instance_name)
13588 return ResultWithJobs(jobs)
13591 class LUBackupQuery(NoHooksLU):
13592 """Query the exports list
13597 def CheckArguments(self):
13598 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13599 ["node", "export"], self.op.use_locking)
13601 def ExpandNames(self):
13602 self.expq.ExpandNames(self)
13604 def DeclareLocks(self, level):
13605 self.expq.DeclareLocks(self, level)
13607 def Exec(self, feedback_fn):
13610 for (node, expname) in self.expq.OldStyleQuery(self):
13611 if expname is None:
13612 result[node] = False
13614 result.setdefault(node, []).append(expname)
13619 class _ExportQuery(_QueryBase):
13620 FIELDS = query.EXPORT_FIELDS
13622 #: The node name is not a unique key for this query
13623 SORT_FIELD = "node"
13625 def ExpandNames(self, lu):
13626 lu.needed_locks = {}
13628 # The following variables interact with _QueryBase._GetNames
13630 self.wanted = _GetWantedNodes(lu, self.names)
13632 self.wanted = locking.ALL_SET
13634 self.do_locking = self.use_locking
13636 if self.do_locking:
13637 lu.share_locks = _ShareAll()
13638 lu.needed_locks = {
13639 locking.LEVEL_NODE: self.wanted,
13642 def DeclareLocks(self, lu, level):
13645 def _GetQueryData(self, lu):
13646 """Computes the list of nodes and their attributes.
13649 # Locking is not used
13651 assert not (compat.any(lu.glm.is_owned(level)
13652 for level in locking.LEVELS
13653 if level != locking.LEVEL_CLUSTER) or
13654 self.do_locking or self.use_locking)
13656 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13660 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13662 result.append((node, None))
13664 result.extend((node, expname) for expname in nres.payload)
13669 class LUBackupPrepare(NoHooksLU):
13670 """Prepares an instance for an export and returns useful information.
13675 def ExpandNames(self):
13676 self._ExpandAndLockInstance()
13678 def CheckPrereq(self):
13679 """Check prerequisites.
13682 instance_name = self.op.instance_name
13684 self.instance = self.cfg.GetInstanceInfo(instance_name)
13685 assert self.instance is not None, \
13686 "Cannot retrieve locked instance %s" % self.op.instance_name
13687 _CheckNodeOnline(self, self.instance.primary_node)
13689 self._cds = _GetClusterDomainSecret()
13691 def Exec(self, feedback_fn):
13692 """Prepares an instance for an export.
13695 instance = self.instance
13697 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13698 salt = utils.GenerateSecret(8)
13700 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13701 result = self.rpc.call_x509_cert_create(instance.primary_node,
13702 constants.RIE_CERT_VALIDITY)
13703 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13705 (name, cert_pem) = result.payload
13707 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13711 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13712 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13714 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13720 class LUBackupExport(LogicalUnit):
13721 """Export an instance to an image in the cluster.
13724 HPATH = "instance-export"
13725 HTYPE = constants.HTYPE_INSTANCE
13728 def CheckArguments(self):
13729 """Check the arguments.
13732 self.x509_key_name = self.op.x509_key_name
13733 self.dest_x509_ca_pem = self.op.destination_x509_ca
13735 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13736 if not self.x509_key_name:
13737 raise errors.OpPrereqError("Missing X509 key name for encryption",
13738 errors.ECODE_INVAL)
13740 if not self.dest_x509_ca_pem:
13741 raise errors.OpPrereqError("Missing destination X509 CA",
13742 errors.ECODE_INVAL)
13744 def ExpandNames(self):
13745 self._ExpandAndLockInstance()
13747 # Lock all nodes for local exports
13748 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13749 # FIXME: lock only instance primary and destination node
13751 # Sad but true, for now we have do lock all nodes, as we don't know where
13752 # the previous export might be, and in this LU we search for it and
13753 # remove it from its current node. In the future we could fix this by:
13754 # - making a tasklet to search (share-lock all), then create the
13755 # new one, then one to remove, after
13756 # - removing the removal operation altogether
13757 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13759 def DeclareLocks(self, level):
13760 """Last minute lock declaration."""
13761 # All nodes are locked anyway, so nothing to do here.
13763 def BuildHooksEnv(self):
13764 """Build hooks env.
13766 This will run on the master, primary node and target node.
13770 "EXPORT_MODE": self.op.mode,
13771 "EXPORT_NODE": self.op.target_node,
13772 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13773 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13774 # TODO: Generic function for boolean env variables
13775 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13778 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13782 def BuildHooksNodes(self):
13783 """Build hooks nodes.
13786 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13788 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13789 nl.append(self.op.target_node)
13793 def CheckPrereq(self):
13794 """Check prerequisites.
13796 This checks that the instance and node names are valid.
13799 instance_name = self.op.instance_name
13801 self.instance = self.cfg.GetInstanceInfo(instance_name)
13802 assert self.instance is not None, \
13803 "Cannot retrieve locked instance %s" % self.op.instance_name
13804 _CheckNodeOnline(self, self.instance.primary_node)
13806 if (self.op.remove_instance and
13807 self.instance.admin_state == constants.ADMINST_UP and
13808 not self.op.shutdown):
13809 raise errors.OpPrereqError("Can not remove instance without shutting it"
13810 " down before", errors.ECODE_STATE)
13812 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13813 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13814 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13815 assert self.dst_node is not None
13817 _CheckNodeOnline(self, self.dst_node.name)
13818 _CheckNodeNotDrained(self, self.dst_node.name)
13821 self.dest_disk_info = None
13822 self.dest_x509_ca = None
13824 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13825 self.dst_node = None
13827 if len(self.op.target_node) != len(self.instance.disks):
13828 raise errors.OpPrereqError(("Received destination information for %s"
13829 " disks, but instance %s has %s disks") %
13830 (len(self.op.target_node), instance_name,
13831 len(self.instance.disks)),
13832 errors.ECODE_INVAL)
13834 cds = _GetClusterDomainSecret()
13836 # Check X509 key name
13838 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13839 except (TypeError, ValueError), err:
13840 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13841 errors.ECODE_INVAL)
13843 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13844 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13845 errors.ECODE_INVAL)
13847 # Load and verify CA
13849 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13850 except OpenSSL.crypto.Error, err:
13851 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13852 (err, ), errors.ECODE_INVAL)
13854 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13855 if errcode is not None:
13856 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13857 (msg, ), errors.ECODE_INVAL)
13859 self.dest_x509_ca = cert
13861 # Verify target information
13863 for idx, disk_data in enumerate(self.op.target_node):
13865 (host, port, magic) = \
13866 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13867 except errors.GenericError, err:
13868 raise errors.OpPrereqError("Target info for disk %s: %s" %
13869 (idx, err), errors.ECODE_INVAL)
13871 disk_info.append((host, port, magic))
13873 assert len(disk_info) == len(self.op.target_node)
13874 self.dest_disk_info = disk_info
13877 raise errors.ProgrammerError("Unhandled export mode %r" %
13880 # instance disk type verification
13881 # TODO: Implement export support for file-based disks
13882 for disk in self.instance.disks:
13883 if disk.dev_type == constants.LD_FILE:
13884 raise errors.OpPrereqError("Export not supported for instances with"
13885 " file-based disks", errors.ECODE_INVAL)
13887 def _CleanupExports(self, feedback_fn):
13888 """Removes exports of current instance from all other nodes.
13890 If an instance in a cluster with nodes A..D was exported to node C, its
13891 exports will be removed from the nodes A, B and D.
13894 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13896 nodelist = self.cfg.GetNodeList()
13897 nodelist.remove(self.dst_node.name)
13899 # on one-node clusters nodelist will be empty after the removal
13900 # if we proceed the backup would be removed because OpBackupQuery
13901 # substitutes an empty list with the full cluster node list.
13902 iname = self.instance.name
13904 feedback_fn("Removing old exports for instance %s" % iname)
13905 exportlist = self.rpc.call_export_list(nodelist)
13906 for node in exportlist:
13907 if exportlist[node].fail_msg:
13909 if iname in exportlist[node].payload:
13910 msg = self.rpc.call_export_remove(node, iname).fail_msg
13912 self.LogWarning("Could not remove older export for instance %s"
13913 " on node %s: %s", iname, node, msg)
13915 def Exec(self, feedback_fn):
13916 """Export an instance to an image in the cluster.
13919 assert self.op.mode in constants.EXPORT_MODES
13921 instance = self.instance
13922 src_node = instance.primary_node
13924 if self.op.shutdown:
13925 # shutdown the instance, but not the disks
13926 feedback_fn("Shutting down instance %s" % instance.name)
13927 result = self.rpc.call_instance_shutdown(src_node, instance,
13928 self.op.shutdown_timeout)
13929 # TODO: Maybe ignore failures if ignore_remove_failures is set
13930 result.Raise("Could not shutdown instance %s on"
13931 " node %s" % (instance.name, src_node))
13933 # set the disks ID correctly since call_instance_start needs the
13934 # correct drbd minor to create the symlinks
13935 for disk in instance.disks:
13936 self.cfg.SetDiskID(disk, src_node)
13938 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13941 # Activate the instance disks if we'exporting a stopped instance
13942 feedback_fn("Activating disks for %s" % instance.name)
13943 _StartInstanceDisks(self, instance, None)
13946 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13949 helper.CreateSnapshots()
13951 if (self.op.shutdown and
13952 instance.admin_state == constants.ADMINST_UP and
13953 not self.op.remove_instance):
13954 assert not activate_disks
13955 feedback_fn("Starting instance %s" % instance.name)
13956 result = self.rpc.call_instance_start(src_node,
13957 (instance, None, None), False)
13958 msg = result.fail_msg
13960 feedback_fn("Failed to start instance: %s" % msg)
13961 _ShutdownInstanceDisks(self, instance)
13962 raise errors.OpExecError("Could not start instance: %s" % msg)
13964 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13965 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13966 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13967 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13968 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13970 (key_name, _, _) = self.x509_key_name
13973 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13976 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13977 key_name, dest_ca_pem,
13982 # Check for backwards compatibility
13983 assert len(dresults) == len(instance.disks)
13984 assert compat.all(isinstance(i, bool) for i in dresults), \
13985 "Not all results are boolean: %r" % dresults
13989 feedback_fn("Deactivating disks for %s" % instance.name)
13990 _ShutdownInstanceDisks(self, instance)
13992 if not (compat.all(dresults) and fin_resu):
13995 failures.append("export finalization")
13996 if not compat.all(dresults):
13997 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13999 failures.append("disk export: disk(s) %s" % fdsk)
14001 raise errors.OpExecError("Export failed, errors in %s" %
14002 utils.CommaJoin(failures))
14004 # At this point, the export was successful, we can cleanup/finish
14006 # Remove instance if requested
14007 if self.op.remove_instance:
14008 feedback_fn("Removing instance %s" % instance.name)
14009 _RemoveInstance(self, feedback_fn, instance,
14010 self.op.ignore_remove_failures)
14012 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14013 self._CleanupExports(feedback_fn)
14015 return fin_resu, dresults
14018 class LUBackupRemove(NoHooksLU):
14019 """Remove exports related to the named instance.
14024 def ExpandNames(self):
14025 self.needed_locks = {}
14026 # We need all nodes to be locked in order for RemoveExport to work, but we
14027 # don't need to lock the instance itself, as nothing will happen to it (and
14028 # we can remove exports also for a removed instance)
14029 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14031 def Exec(self, feedback_fn):
14032 """Remove any export.
14035 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14036 # If the instance was not found we'll try with the name that was passed in.
14037 # This will only work if it was an FQDN, though.
14039 if not instance_name:
14041 instance_name = self.op.instance_name
14043 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14044 exportlist = self.rpc.call_export_list(locked_nodes)
14046 for node in exportlist:
14047 msg = exportlist[node].fail_msg
14049 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14051 if instance_name in exportlist[node].payload:
14053 result = self.rpc.call_export_remove(node, instance_name)
14054 msg = result.fail_msg
14056 logging.error("Could not remove export for instance %s"
14057 " on node %s: %s", instance_name, node, msg)
14059 if fqdn_warn and not found:
14060 feedback_fn("Export not found. If trying to remove an export belonging"
14061 " to a deleted instance please use its Fully Qualified"
14065 class LUGroupAdd(LogicalUnit):
14066 """Logical unit for creating node groups.
14069 HPATH = "group-add"
14070 HTYPE = constants.HTYPE_GROUP
14073 def ExpandNames(self):
14074 # We need the new group's UUID here so that we can create and acquire the
14075 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14076 # that it should not check whether the UUID exists in the configuration.
14077 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14078 self.needed_locks = {}
14079 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14081 def CheckPrereq(self):
14082 """Check prerequisites.
14084 This checks that the given group name is not an existing node group
14089 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14090 except errors.OpPrereqError:
14093 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14094 " node group (UUID: %s)" %
14095 (self.op.group_name, existing_uuid),
14096 errors.ECODE_EXISTS)
14098 if self.op.ndparams:
14099 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14101 if self.op.hv_state:
14102 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14104 self.new_hv_state = None
14106 if self.op.disk_state:
14107 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14109 self.new_disk_state = None
14111 if self.op.diskparams:
14112 for templ in constants.DISK_TEMPLATES:
14113 if templ in self.op.diskparams:
14114 utils.ForceDictType(self.op.diskparams[templ],
14115 constants.DISK_DT_TYPES)
14116 self.new_diskparams = self.op.diskparams
14118 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14119 except errors.OpPrereqError, err:
14120 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14121 errors.ECODE_INVAL)
14123 self.new_diskparams = {}
14125 if self.op.ipolicy:
14126 cluster = self.cfg.GetClusterInfo()
14127 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14129 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14130 except errors.ConfigurationError, err:
14131 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14132 errors.ECODE_INVAL)
14134 def BuildHooksEnv(self):
14135 """Build hooks env.
14139 "GROUP_NAME": self.op.group_name,
14142 def BuildHooksNodes(self):
14143 """Build hooks nodes.
14146 mn = self.cfg.GetMasterNode()
14147 return ([mn], [mn])
14149 def Exec(self, feedback_fn):
14150 """Add the node group to the cluster.
14153 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14154 uuid=self.group_uuid,
14155 alloc_policy=self.op.alloc_policy,
14156 ndparams=self.op.ndparams,
14157 diskparams=self.new_diskparams,
14158 ipolicy=self.op.ipolicy,
14159 hv_state_static=self.new_hv_state,
14160 disk_state_static=self.new_disk_state)
14162 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14163 del self.remove_locks[locking.LEVEL_NODEGROUP]
14166 class LUGroupAssignNodes(NoHooksLU):
14167 """Logical unit for assigning nodes to groups.
14172 def ExpandNames(self):
14173 # These raise errors.OpPrereqError on their own:
14174 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14175 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14177 # We want to lock all the affected nodes and groups. We have readily
14178 # available the list of nodes, and the *destination* group. To gather the
14179 # list of "source" groups, we need to fetch node information later on.
14180 self.needed_locks = {
14181 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14182 locking.LEVEL_NODE: self.op.nodes,
14185 def DeclareLocks(self, level):
14186 if level == locking.LEVEL_NODEGROUP:
14187 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14189 # Try to get all affected nodes' groups without having the group or node
14190 # lock yet. Needs verification later in the code flow.
14191 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14193 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14195 def CheckPrereq(self):
14196 """Check prerequisites.
14199 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14200 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14201 frozenset(self.op.nodes))
14203 expected_locks = (set([self.group_uuid]) |
14204 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14205 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14206 if actual_locks != expected_locks:
14207 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14208 " current groups are '%s', used to be '%s'" %
14209 (utils.CommaJoin(expected_locks),
14210 utils.CommaJoin(actual_locks)))
14212 self.node_data = self.cfg.GetAllNodesInfo()
14213 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14214 instance_data = self.cfg.GetAllInstancesInfo()
14216 if self.group is None:
14217 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14218 (self.op.group_name, self.group_uuid))
14220 (new_splits, previous_splits) = \
14221 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14222 for node in self.op.nodes],
14223 self.node_data, instance_data)
14226 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14228 if not self.op.force:
14229 raise errors.OpExecError("The following instances get split by this"
14230 " change and --force was not given: %s" %
14233 self.LogWarning("This operation will split the following instances: %s",
14236 if previous_splits:
14237 self.LogWarning("In addition, these already-split instances continue"
14238 " to be split across groups: %s",
14239 utils.CommaJoin(utils.NiceSort(previous_splits)))
14241 def Exec(self, feedback_fn):
14242 """Assign nodes to a new group.
14245 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14247 self.cfg.AssignGroupNodes(mods)
14250 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14251 """Check for split instances after a node assignment.
14253 This method considers a series of node assignments as an atomic operation,
14254 and returns information about split instances after applying the set of
14257 In particular, it returns information about newly split instances, and
14258 instances that were already split, and remain so after the change.
14260 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14263 @type changes: list of (node_name, new_group_uuid) pairs.
14264 @param changes: list of node assignments to consider.
14265 @param node_data: a dict with data for all nodes
14266 @param instance_data: a dict with all instances to consider
14267 @rtype: a two-tuple
14268 @return: a list of instances that were previously okay and result split as a
14269 consequence of this change, and a list of instances that were previously
14270 split and this change does not fix.
14273 changed_nodes = dict((node, group) for node, group in changes
14274 if node_data[node].group != group)
14276 all_split_instances = set()
14277 previously_split_instances = set()
14279 def InstanceNodes(instance):
14280 return [instance.primary_node] + list(instance.secondary_nodes)
14282 for inst in instance_data.values():
14283 if inst.disk_template not in constants.DTS_INT_MIRROR:
14286 instance_nodes = InstanceNodes(inst)
14288 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14289 previously_split_instances.add(inst.name)
14291 if len(set(changed_nodes.get(node, node_data[node].group)
14292 for node in instance_nodes)) > 1:
14293 all_split_instances.add(inst.name)
14295 return (list(all_split_instances - previously_split_instances),
14296 list(previously_split_instances & all_split_instances))
14299 class _GroupQuery(_QueryBase):
14300 FIELDS = query.GROUP_FIELDS
14302 def ExpandNames(self, lu):
14303 lu.needed_locks = {}
14305 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14306 self._cluster = lu.cfg.GetClusterInfo()
14307 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14310 self.wanted = [name_to_uuid[name]
14311 for name in utils.NiceSort(name_to_uuid.keys())]
14313 # Accept names to be either names or UUIDs.
14316 all_uuid = frozenset(self._all_groups.keys())
14318 for name in self.names:
14319 if name in all_uuid:
14320 self.wanted.append(name)
14321 elif name in name_to_uuid:
14322 self.wanted.append(name_to_uuid[name])
14324 missing.append(name)
14327 raise errors.OpPrereqError("Some groups do not exist: %s" %
14328 utils.CommaJoin(missing),
14329 errors.ECODE_NOENT)
14331 def DeclareLocks(self, lu, level):
14334 def _GetQueryData(self, lu):
14335 """Computes the list of node groups and their attributes.
14338 do_nodes = query.GQ_NODE in self.requested_data
14339 do_instances = query.GQ_INST in self.requested_data
14341 group_to_nodes = None
14342 group_to_instances = None
14344 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14345 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14346 # latter GetAllInstancesInfo() is not enough, for we have to go through
14347 # instance->node. Hence, we will need to process nodes even if we only need
14348 # instance information.
14349 if do_nodes or do_instances:
14350 all_nodes = lu.cfg.GetAllNodesInfo()
14351 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14354 for node in all_nodes.values():
14355 if node.group in group_to_nodes:
14356 group_to_nodes[node.group].append(node.name)
14357 node_to_group[node.name] = node.group
14360 all_instances = lu.cfg.GetAllInstancesInfo()
14361 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14363 for instance in all_instances.values():
14364 node = instance.primary_node
14365 if node in node_to_group:
14366 group_to_instances[node_to_group[node]].append(instance.name)
14369 # Do not pass on node information if it was not requested.
14370 group_to_nodes = None
14372 return query.GroupQueryData(self._cluster,
14373 [self._all_groups[uuid]
14374 for uuid in self.wanted],
14375 group_to_nodes, group_to_instances,
14376 query.GQ_DISKPARAMS in self.requested_data)
14379 class LUGroupQuery(NoHooksLU):
14380 """Logical unit for querying node groups.
14385 def CheckArguments(self):
14386 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14387 self.op.output_fields, False)
14389 def ExpandNames(self):
14390 self.gq.ExpandNames(self)
14392 def DeclareLocks(self, level):
14393 self.gq.DeclareLocks(self, level)
14395 def Exec(self, feedback_fn):
14396 return self.gq.OldStyleQuery(self)
14399 class LUGroupSetParams(LogicalUnit):
14400 """Modifies the parameters of a node group.
14403 HPATH = "group-modify"
14404 HTYPE = constants.HTYPE_GROUP
14407 def CheckArguments(self):
14410 self.op.diskparams,
14411 self.op.alloc_policy,
14413 self.op.disk_state,
14417 if all_changes.count(None) == len(all_changes):
14418 raise errors.OpPrereqError("Please pass at least one modification",
14419 errors.ECODE_INVAL)
14421 def ExpandNames(self):
14422 # This raises errors.OpPrereqError on its own:
14423 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14425 self.needed_locks = {
14426 locking.LEVEL_INSTANCE: [],
14427 locking.LEVEL_NODEGROUP: [self.group_uuid],
14430 self.share_locks[locking.LEVEL_INSTANCE] = 1
14432 def DeclareLocks(self, level):
14433 if level == locking.LEVEL_INSTANCE:
14434 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14436 # Lock instances optimistically, needs verification once group lock has
14438 self.needed_locks[locking.LEVEL_INSTANCE] = \
14439 self.cfg.GetNodeGroupInstances(self.group_uuid)
14442 def _UpdateAndVerifyDiskParams(old, new):
14443 """Updates and verifies disk parameters.
14446 new_params = _GetUpdatedParams(old, new)
14447 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14450 def CheckPrereq(self):
14451 """Check prerequisites.
14454 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14456 # Check if locked instances are still correct
14457 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14459 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14460 cluster = self.cfg.GetClusterInfo()
14462 if self.group is None:
14463 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14464 (self.op.group_name, self.group_uuid))
14466 if self.op.ndparams:
14467 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14468 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14469 self.new_ndparams = new_ndparams
14471 if self.op.diskparams:
14472 diskparams = self.group.diskparams
14473 uavdp = self._UpdateAndVerifyDiskParams
14474 # For each disktemplate subdict update and verify the values
14475 new_diskparams = dict((dt,
14476 uavdp(diskparams.get(dt, {}),
14477 self.op.diskparams[dt]))
14478 for dt in constants.DISK_TEMPLATES
14479 if dt in self.op.diskparams)
14480 # As we've all subdicts of diskparams ready, lets merge the actual
14481 # dict with all updated subdicts
14482 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14484 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14485 except errors.OpPrereqError, err:
14486 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14487 errors.ECODE_INVAL)
14489 if self.op.hv_state:
14490 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14491 self.group.hv_state_static)
14493 if self.op.disk_state:
14494 self.new_disk_state = \
14495 _MergeAndVerifyDiskState(self.op.disk_state,
14496 self.group.disk_state_static)
14498 if self.op.ipolicy:
14499 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14503 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14504 inst_filter = lambda inst: inst.name in owned_instances
14505 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14506 gmi = ganeti.masterd.instance
14508 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14510 new_ipolicy, instances)
14513 self.LogWarning("After the ipolicy change the following instances"
14514 " violate them: %s",
14515 utils.CommaJoin(violations))
14517 def BuildHooksEnv(self):
14518 """Build hooks env.
14522 "GROUP_NAME": self.op.group_name,
14523 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14526 def BuildHooksNodes(self):
14527 """Build hooks nodes.
14530 mn = self.cfg.GetMasterNode()
14531 return ([mn], [mn])
14533 def Exec(self, feedback_fn):
14534 """Modifies the node group.
14539 if self.op.ndparams:
14540 self.group.ndparams = self.new_ndparams
14541 result.append(("ndparams", str(self.group.ndparams)))
14543 if self.op.diskparams:
14544 self.group.diskparams = self.new_diskparams
14545 result.append(("diskparams", str(self.group.diskparams)))
14547 if self.op.alloc_policy:
14548 self.group.alloc_policy = self.op.alloc_policy
14550 if self.op.hv_state:
14551 self.group.hv_state_static = self.new_hv_state
14553 if self.op.disk_state:
14554 self.group.disk_state_static = self.new_disk_state
14556 if self.op.ipolicy:
14557 self.group.ipolicy = self.new_ipolicy
14559 self.cfg.Update(self.group, feedback_fn)
14563 class LUGroupRemove(LogicalUnit):
14564 HPATH = "group-remove"
14565 HTYPE = constants.HTYPE_GROUP
14568 def ExpandNames(self):
14569 # This will raises errors.OpPrereqError on its own:
14570 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14571 self.needed_locks = {
14572 locking.LEVEL_NODEGROUP: [self.group_uuid],
14575 def CheckPrereq(self):
14576 """Check prerequisites.
14578 This checks that the given group name exists as a node group, that is
14579 empty (i.e., contains no nodes), and that is not the last group of the
14583 # Verify that the group is empty.
14584 group_nodes = [node.name
14585 for node in self.cfg.GetAllNodesInfo().values()
14586 if node.group == self.group_uuid]
14589 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14591 (self.op.group_name,
14592 utils.CommaJoin(utils.NiceSort(group_nodes))),
14593 errors.ECODE_STATE)
14595 # Verify the cluster would not be left group-less.
14596 if len(self.cfg.GetNodeGroupList()) == 1:
14597 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14598 " removed" % self.op.group_name,
14599 errors.ECODE_STATE)
14601 def BuildHooksEnv(self):
14602 """Build hooks env.
14606 "GROUP_NAME": self.op.group_name,
14609 def BuildHooksNodes(self):
14610 """Build hooks nodes.
14613 mn = self.cfg.GetMasterNode()
14614 return ([mn], [mn])
14616 def Exec(self, feedback_fn):
14617 """Remove the node group.
14621 self.cfg.RemoveNodeGroup(self.group_uuid)
14622 except errors.ConfigurationError:
14623 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14624 (self.op.group_name, self.group_uuid))
14626 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14629 class LUGroupRename(LogicalUnit):
14630 HPATH = "group-rename"
14631 HTYPE = constants.HTYPE_GROUP
14634 def ExpandNames(self):
14635 # This raises errors.OpPrereqError on its own:
14636 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14638 self.needed_locks = {
14639 locking.LEVEL_NODEGROUP: [self.group_uuid],
14642 def CheckPrereq(self):
14643 """Check prerequisites.
14645 Ensures requested new name is not yet used.
14649 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14650 except errors.OpPrereqError:
14653 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14654 " node group (UUID: %s)" %
14655 (self.op.new_name, new_name_uuid),
14656 errors.ECODE_EXISTS)
14658 def BuildHooksEnv(self):
14659 """Build hooks env.
14663 "OLD_NAME": self.op.group_name,
14664 "NEW_NAME": self.op.new_name,
14667 def BuildHooksNodes(self):
14668 """Build hooks nodes.
14671 mn = self.cfg.GetMasterNode()
14673 all_nodes = self.cfg.GetAllNodesInfo()
14674 all_nodes.pop(mn, None)
14677 run_nodes.extend(node.name for node in all_nodes.values()
14678 if node.group == self.group_uuid)
14680 return (run_nodes, run_nodes)
14682 def Exec(self, feedback_fn):
14683 """Rename the node group.
14686 group = self.cfg.GetNodeGroup(self.group_uuid)
14689 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14690 (self.op.group_name, self.group_uuid))
14692 group.name = self.op.new_name
14693 self.cfg.Update(group, feedback_fn)
14695 return self.op.new_name
14698 class LUGroupEvacuate(LogicalUnit):
14699 HPATH = "group-evacuate"
14700 HTYPE = constants.HTYPE_GROUP
14703 def ExpandNames(self):
14704 # This raises errors.OpPrereqError on its own:
14705 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14707 if self.op.target_groups:
14708 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14709 self.op.target_groups)
14711 self.req_target_uuids = []
14713 if self.group_uuid in self.req_target_uuids:
14714 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14715 " as a target group (targets are %s)" %
14717 utils.CommaJoin(self.req_target_uuids)),
14718 errors.ECODE_INVAL)
14720 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14722 self.share_locks = _ShareAll()
14723 self.needed_locks = {
14724 locking.LEVEL_INSTANCE: [],
14725 locking.LEVEL_NODEGROUP: [],
14726 locking.LEVEL_NODE: [],
14729 def DeclareLocks(self, level):
14730 if level == locking.LEVEL_INSTANCE:
14731 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14733 # Lock instances optimistically, needs verification once node and group
14734 # locks have been acquired
14735 self.needed_locks[locking.LEVEL_INSTANCE] = \
14736 self.cfg.GetNodeGroupInstances(self.group_uuid)
14738 elif level == locking.LEVEL_NODEGROUP:
14739 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14741 if self.req_target_uuids:
14742 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14744 # Lock all groups used by instances optimistically; this requires going
14745 # via the node before it's locked, requiring verification later on
14746 lock_groups.update(group_uuid
14747 for instance_name in
14748 self.owned_locks(locking.LEVEL_INSTANCE)
14750 self.cfg.GetInstanceNodeGroups(instance_name))
14752 # No target groups, need to lock all of them
14753 lock_groups = locking.ALL_SET
14755 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14757 elif level == locking.LEVEL_NODE:
14758 # This will only lock the nodes in the group to be evacuated which
14759 # contain actual instances
14760 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14761 self._LockInstancesNodes()
14763 # Lock all nodes in group to be evacuated and target groups
14764 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14765 assert self.group_uuid in owned_groups
14766 member_nodes = [node_name
14767 for group in owned_groups
14768 for node_name in self.cfg.GetNodeGroup(group).members]
14769 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14771 def CheckPrereq(self):
14772 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14773 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14774 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14776 assert owned_groups.issuperset(self.req_target_uuids)
14777 assert self.group_uuid in owned_groups
14779 # Check if locked instances are still correct
14780 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14782 # Get instance information
14783 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14785 # Check if node groups for locked instances are still correct
14786 _CheckInstancesNodeGroups(self.cfg, self.instances,
14787 owned_groups, owned_nodes, self.group_uuid)
14789 if self.req_target_uuids:
14790 # User requested specific target groups
14791 self.target_uuids = self.req_target_uuids
14793 # All groups except the one to be evacuated are potential targets
14794 self.target_uuids = [group_uuid for group_uuid in owned_groups
14795 if group_uuid != self.group_uuid]
14797 if not self.target_uuids:
14798 raise errors.OpPrereqError("There are no possible target groups",
14799 errors.ECODE_INVAL)
14801 def BuildHooksEnv(self):
14802 """Build hooks env.
14806 "GROUP_NAME": self.op.group_name,
14807 "TARGET_GROUPS": " ".join(self.target_uuids),
14810 def BuildHooksNodes(self):
14811 """Build hooks nodes.
14814 mn = self.cfg.GetMasterNode()
14816 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14818 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14820 return (run_nodes, run_nodes)
14822 def Exec(self, feedback_fn):
14823 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14825 assert self.group_uuid not in self.target_uuids
14827 req = iallocator.IAReqGroupChange(instances=instances,
14828 target_groups=self.target_uuids)
14829 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14831 ial.Run(self.op.iallocator)
14833 if not ial.success:
14834 raise errors.OpPrereqError("Can't compute group evacuation using"
14835 " iallocator '%s': %s" %
14836 (self.op.iallocator, ial.info),
14837 errors.ECODE_NORES)
14839 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14841 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14842 len(jobs), self.op.group_name)
14844 return ResultWithJobs(jobs)
14847 class TagsLU(NoHooksLU): # pylint: disable=W0223
14848 """Generic tags LU.
14850 This is an abstract class which is the parent of all the other tags LUs.
14853 def ExpandNames(self):
14854 self.group_uuid = None
14855 self.needed_locks = {}
14857 if self.op.kind == constants.TAG_NODE:
14858 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14859 lock_level = locking.LEVEL_NODE
14860 lock_name = self.op.name
14861 elif self.op.kind == constants.TAG_INSTANCE:
14862 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14863 lock_level = locking.LEVEL_INSTANCE
14864 lock_name = self.op.name
14865 elif self.op.kind == constants.TAG_NODEGROUP:
14866 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14867 lock_level = locking.LEVEL_NODEGROUP
14868 lock_name = self.group_uuid
14873 if lock_level and getattr(self.op, "use_locking", True):
14874 self.needed_locks[lock_level] = lock_name
14876 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14877 # not possible to acquire the BGL based on opcode parameters)
14879 def CheckPrereq(self):
14880 """Check prerequisites.
14883 if self.op.kind == constants.TAG_CLUSTER:
14884 self.target = self.cfg.GetClusterInfo()
14885 elif self.op.kind == constants.TAG_NODE:
14886 self.target = self.cfg.GetNodeInfo(self.op.name)
14887 elif self.op.kind == constants.TAG_INSTANCE:
14888 self.target = self.cfg.GetInstanceInfo(self.op.name)
14889 elif self.op.kind == constants.TAG_NODEGROUP:
14890 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14892 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14893 str(self.op.kind), errors.ECODE_INVAL)
14896 class LUTagsGet(TagsLU):
14897 """Returns the tags of a given object.
14902 def ExpandNames(self):
14903 TagsLU.ExpandNames(self)
14905 # Share locks as this is only a read operation
14906 self.share_locks = _ShareAll()
14908 def Exec(self, feedback_fn):
14909 """Returns the tag list.
14912 return list(self.target.GetTags())
14915 class LUTagsSearch(NoHooksLU):
14916 """Searches the tags for a given pattern.
14921 def ExpandNames(self):
14922 self.needed_locks = {}
14924 def CheckPrereq(self):
14925 """Check prerequisites.
14927 This checks the pattern passed for validity by compiling it.
14931 self.re = re.compile(self.op.pattern)
14932 except re.error, err:
14933 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14934 (self.op.pattern, err), errors.ECODE_INVAL)
14936 def Exec(self, feedback_fn):
14937 """Returns the tag list.
14941 tgts = [("/cluster", cfg.GetClusterInfo())]
14942 ilist = cfg.GetAllInstancesInfo().values()
14943 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14944 nlist = cfg.GetAllNodesInfo().values()
14945 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14946 tgts.extend(("/nodegroup/%s" % n.name, n)
14947 for n in cfg.GetAllNodeGroupsInfo().values())
14949 for path, target in tgts:
14950 for tag in target.GetTags():
14951 if self.re.search(tag):
14952 results.append((path, tag))
14956 class LUTagsSet(TagsLU):
14957 """Sets a tag on a given object.
14962 def CheckPrereq(self):
14963 """Check prerequisites.
14965 This checks the type and length of the tag name and value.
14968 TagsLU.CheckPrereq(self)
14969 for tag in self.op.tags:
14970 objects.TaggableObject.ValidateTag(tag)
14972 def Exec(self, feedback_fn):
14977 for tag in self.op.tags:
14978 self.target.AddTag(tag)
14979 except errors.TagError, err:
14980 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14981 self.cfg.Update(self.target, feedback_fn)
14984 class LUTagsDel(TagsLU):
14985 """Delete a list of tags from a given object.
14990 def CheckPrereq(self):
14991 """Check prerequisites.
14993 This checks that we have the given tag.
14996 TagsLU.CheckPrereq(self)
14997 for tag in self.op.tags:
14998 objects.TaggableObject.ValidateTag(tag)
14999 del_tags = frozenset(self.op.tags)
15000 cur_tags = self.target.GetTags()
15002 diff_tags = del_tags - cur_tags
15004 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15005 raise errors.OpPrereqError("Tag(s) %s not found" %
15006 (utils.CommaJoin(diff_names), ),
15007 errors.ECODE_NOENT)
15009 def Exec(self, feedback_fn):
15010 """Remove the tag from the object.
15013 for tag in self.op.tags:
15014 self.target.RemoveTag(tag)
15015 self.cfg.Update(self.target, feedback_fn)
15018 class LUTestDelay(NoHooksLU):
15019 """Sleep for a specified amount of time.
15021 This LU sleeps on the master and/or nodes for a specified amount of
15027 def ExpandNames(self):
15028 """Expand names and set required locks.
15030 This expands the node list, if any.
15033 self.needed_locks = {}
15034 if self.op.on_nodes:
15035 # _GetWantedNodes can be used here, but is not always appropriate to use
15036 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15037 # more information.
15038 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15039 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15041 def _TestDelay(self):
15042 """Do the actual sleep.
15045 if self.op.on_master:
15046 if not utils.TestDelay(self.op.duration):
15047 raise errors.OpExecError("Error during master delay test")
15048 if self.op.on_nodes:
15049 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15050 for node, node_result in result.items():
15051 node_result.Raise("Failure during rpc call to node %s" % node)
15053 def Exec(self, feedback_fn):
15054 """Execute the test delay opcode, with the wanted repetitions.
15057 if self.op.repeat == 0:
15060 top_value = self.op.repeat - 1
15061 for i in range(self.op.repeat):
15062 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15066 class LURestrictedCommand(NoHooksLU):
15067 """Logical unit for executing restricted commands.
15072 def ExpandNames(self):
15074 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15076 self.needed_locks = {
15077 locking.LEVEL_NODE: self.op.nodes,
15079 self.share_locks = {
15080 locking.LEVEL_NODE: not self.op.use_locking,
15083 def CheckPrereq(self):
15084 """Check prerequisites.
15088 def Exec(self, feedback_fn):
15089 """Execute restricted command and return output.
15092 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15094 # Check if correct locks are held
15095 assert set(self.op.nodes).issubset(owned_nodes)
15097 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15101 for node_name in self.op.nodes:
15102 nres = rpcres[node_name]
15104 msg = ("Command '%s' on node '%s' failed: %s" %
15105 (self.op.command, node_name, nres.fail_msg))
15106 result.append((False, msg))
15108 result.append((True, nres.payload))
15113 class LUTestJqueue(NoHooksLU):
15114 """Utility LU to test some aspects of the job queue.
15119 # Must be lower than default timeout for WaitForJobChange to see whether it
15120 # notices changed jobs
15121 _CLIENT_CONNECT_TIMEOUT = 20.0
15122 _CLIENT_CONFIRM_TIMEOUT = 60.0
15125 def _NotifyUsingSocket(cls, cb, errcls):
15126 """Opens a Unix socket and waits for another program to connect.
15129 @param cb: Callback to send socket name to client
15130 @type errcls: class
15131 @param errcls: Exception class to use for errors
15134 # Using a temporary directory as there's no easy way to create temporary
15135 # sockets without writing a custom loop around tempfile.mktemp and
15137 tmpdir = tempfile.mkdtemp()
15139 tmpsock = utils.PathJoin(tmpdir, "sock")
15141 logging.debug("Creating temporary socket at %s", tmpsock)
15142 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15147 # Send details to client
15150 # Wait for client to connect before continuing
15151 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15153 (conn, _) = sock.accept()
15154 except socket.error, err:
15155 raise errcls("Client didn't connect in time (%s)" % err)
15159 # Remove as soon as client is connected
15160 shutil.rmtree(tmpdir)
15162 # Wait for client to close
15165 # pylint: disable=E1101
15166 # Instance of '_socketobject' has no ... member
15167 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15169 except socket.error, err:
15170 raise errcls("Client failed to confirm notification (%s)" % err)
15174 def _SendNotification(self, test, arg, sockname):
15175 """Sends a notification to the client.
15178 @param test: Test name
15179 @param arg: Test argument (depends on test)
15180 @type sockname: string
15181 @param sockname: Socket path
15184 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15186 def _Notify(self, prereq, test, arg):
15187 """Notifies the client of a test.
15190 @param prereq: Whether this is a prereq-phase test
15192 @param test: Test name
15193 @param arg: Test argument (depends on test)
15197 errcls = errors.OpPrereqError
15199 errcls = errors.OpExecError
15201 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15205 def CheckArguments(self):
15206 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15207 self.expandnames_calls = 0
15209 def ExpandNames(self):
15210 checkargs_calls = getattr(self, "checkargs_calls", 0)
15211 if checkargs_calls < 1:
15212 raise errors.ProgrammerError("CheckArguments was not called")
15214 self.expandnames_calls += 1
15216 if self.op.notify_waitlock:
15217 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15219 self.LogInfo("Expanding names")
15221 # Get lock on master node (just to get a lock, not for a particular reason)
15222 self.needed_locks = {
15223 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15226 def Exec(self, feedback_fn):
15227 if self.expandnames_calls < 1:
15228 raise errors.ProgrammerError("ExpandNames was not called")
15230 if self.op.notify_exec:
15231 self._Notify(False, constants.JQT_EXEC, None)
15233 self.LogInfo("Executing")
15235 if self.op.log_messages:
15236 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15237 for idx, msg in enumerate(self.op.log_messages):
15238 self.LogInfo("Sending log message %s", idx + 1)
15239 feedback_fn(constants.JQT_MSGPREFIX + msg)
15240 # Report how many test messages have been sent
15241 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15244 raise errors.OpExecError("Opcode failure was requested")
15249 class LUTestAllocator(NoHooksLU):
15250 """Run allocator tests.
15252 This LU runs the allocator tests
15255 def CheckPrereq(self):
15256 """Check prerequisites.
15258 This checks the opcode parameters depending on the director and mode test.
15261 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15262 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15263 for attr in ["memory", "disks", "disk_template",
15264 "os", "tags", "nics", "vcpus"]:
15265 if not hasattr(self.op, attr):
15266 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15267 attr, errors.ECODE_INVAL)
15268 iname = self.cfg.ExpandInstanceName(self.op.name)
15269 if iname is not None:
15270 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15271 iname, errors.ECODE_EXISTS)
15272 if not isinstance(self.op.nics, list):
15273 raise errors.OpPrereqError("Invalid parameter 'nics'",
15274 errors.ECODE_INVAL)
15275 if not isinstance(self.op.disks, list):
15276 raise errors.OpPrereqError("Invalid parameter 'disks'",
15277 errors.ECODE_INVAL)
15278 for row in self.op.disks:
15279 if (not isinstance(row, dict) or
15280 constants.IDISK_SIZE not in row or
15281 not isinstance(row[constants.IDISK_SIZE], int) or
15282 constants.IDISK_MODE not in row or
15283 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15284 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15285 " parameter", errors.ECODE_INVAL)
15286 if self.op.hypervisor is None:
15287 self.op.hypervisor = self.cfg.GetHypervisorType()
15288 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15289 fname = _ExpandInstanceName(self.cfg, self.op.name)
15290 self.op.name = fname
15291 self.relocate_from = \
15292 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15293 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15294 constants.IALLOCATOR_MODE_NODE_EVAC):
15295 if not self.op.instances:
15296 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15297 self.op.instances = _GetWantedInstances(self, self.op.instances)
15299 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15300 self.op.mode, errors.ECODE_INVAL)
15302 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15303 if self.op.allocator is None:
15304 raise errors.OpPrereqError("Missing allocator name",
15305 errors.ECODE_INVAL)
15306 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15307 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15308 self.op.direction, errors.ECODE_INVAL)
15310 def Exec(self, feedback_fn):
15311 """Run the allocator test.
15314 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15315 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15316 memory=self.op.memory,
15317 disks=self.op.disks,
15318 disk_template=self.op.disk_template,
15322 vcpus=self.op.vcpus,
15323 spindle_use=self.op.spindle_use,
15324 hypervisor=self.op.hypervisor)
15325 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15326 req = iallocator.IAReqRelocate(name=self.op.name,
15327 relocate_from=list(self.relocate_from))
15328 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15329 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15330 target_groups=self.op.target_groups)
15331 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15332 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15333 evac_mode=self.op.evac_mode)
15334 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15335 disk_template = self.op.disk_template
15336 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15337 memory=self.op.memory,
15338 disks=self.op.disks,
15339 disk_template=disk_template,
15343 vcpus=self.op.vcpus,
15344 spindle_use=self.op.spindle_use,
15345 hypervisor=self.op.hypervisor)
15346 for idx in range(self.op.count)]
15347 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15349 raise errors.ProgrammerError("Uncatched mode %s in"
15350 " LUTestAllocator.Exec", self.op.mode)
15352 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15353 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15354 result = ial.in_text
15356 ial.Run(self.op.allocator, validate=False)
15357 result = ial.out_text
15361 class LUNetworkAdd(LogicalUnit):
15362 """Logical unit for creating networks.
15365 HPATH = "network-add"
15366 HTYPE = constants.HTYPE_NETWORK
15369 def BuildHooksNodes(self):
15370 """Build hooks nodes.
15373 mn = self.cfg.GetMasterNode()
15374 return ([mn], [mn])
15376 def ExpandNames(self):
15377 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15378 self.needed_locks = {}
15379 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15381 def CheckPrereq(self):
15382 """Check prerequisites.
15384 This checks that the given group name is not an existing node group
15388 if self.op.network is None:
15389 raise errors.OpPrereqError("Network must be given",
15390 errors.ECODE_INVAL)
15392 uuid = self.cfg.LookupNetwork(self.op.network_name)
15395 raise errors.OpPrereqError("Network '%s' already defined" %
15396 self.op.network, errors.ECODE_EXISTS)
15399 def BuildHooksEnv(self):
15400 """Build hooks env.
15404 "NETWORK_NAME": self.op.network_name,
15405 "NETWORK_SUBNET": self.op.network,
15406 "NETWORK_GATEWAY": self.op.gateway,
15407 "NETWORK_SUBNET6": self.op.network6,
15408 "NETWORK_GATEWAY6": self.op.gateway6,
15409 "NETWORK_MAC_PREFIX": self.op.mac_prefix,
15410 "NETWORK_TYPE": self.op.network_type,
15414 def Exec(self, feedback_fn):
15415 """Add the ip pool to the cluster.
15418 nobj = objects.Network(name=self.op.network_name,
15419 network=self.op.network,
15420 gateway=self.op.gateway,
15421 network6=self.op.network6,
15422 gateway6=self.op.gateway6,
15423 mac_prefix=self.op.mac_prefix,
15424 network_type=self.op.network_type,
15425 uuid=self.network_uuid,
15427 # Initialize the associated address pool
15429 pool = network.AddressPool.InitializeNetwork(nobj)
15430 except errors.AddressPoolError, e:
15431 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15433 # Check if we need to reserve the nodes and the cluster master IP
15434 # These may not be allocated to any instances in routed mode, as
15435 # they wouldn't function anyway.
15436 for node in self.cfg.GetAllNodesInfo().values():
15437 for ip in [node.primary_ip, node.secondary_ip]:
15440 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15442 except errors.AddressPoolError:
15445 master_ip = self.cfg.GetClusterInfo().master_ip
15447 pool.Reserve(master_ip)
15448 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15449 except errors.AddressPoolError:
15452 if self.op.add_reserved_ips:
15453 for ip in self.op.add_reserved_ips:
15455 pool.Reserve(ip, external=True)
15456 except errors.AddressPoolError, e:
15457 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15459 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15460 del self.remove_locks[locking.LEVEL_NETWORK]
15463 class LUNetworkRemove(LogicalUnit):
15464 HPATH = "network-remove"
15465 HTYPE = constants.HTYPE_NETWORK
15468 def ExpandNames(self):
15469 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15471 self.needed_locks = {
15472 locking.LEVEL_NETWORK: [self.network_uuid],
15476 def CheckPrereq(self):
15477 """Check prerequisites.
15479 This checks that the given network name exists as a network, that is
15480 empty (i.e., contains no nodes), and that is not the last group of the
15484 if not self.network_uuid:
15485 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15486 errors.ECODE_INVAL)
15488 # Verify that the network is not conncted.
15489 node_groups = [group.name
15490 for group in self.cfg.GetAllNodeGroupsInfo().values()
15491 for network in group.networks.keys()
15492 if network == self.network_uuid]
15495 self.LogWarning("Nework '%s' is connected to the following"
15496 " node groups: %s" % (self.op.network_name,
15497 utils.CommaJoin(utils.NiceSort(node_groups))))
15498 raise errors.OpPrereqError("Network still connected",
15499 errors.ECODE_STATE)
15501 def BuildHooksEnv(self):
15502 """Build hooks env.
15506 "NETWORK_NAME": self.op.network_name,
15509 def BuildHooksNodes(self):
15510 """Build hooks nodes.
15513 mn = self.cfg.GetMasterNode()
15514 return ([mn], [mn])
15516 def Exec(self, feedback_fn):
15517 """Remove the network.
15521 self.cfg.RemoveNetwork(self.network_uuid)
15522 except errors.ConfigurationError:
15523 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15524 (self.op.network_name, self.network_uuid))
15527 class LUNetworkSetParams(LogicalUnit):
15528 """Modifies the parameters of a network.
15531 HPATH = "network-modify"
15532 HTYPE = constants.HTYPE_NETWORK
15535 def CheckArguments(self):
15536 if (self.op.gateway and
15537 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15538 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15539 " at once", errors.ECODE_INVAL)
15542 def ExpandNames(self):
15543 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15544 self.network = self.cfg.GetNetwork(self.network_uuid)
15545 self.needed_locks = {
15546 locking.LEVEL_NETWORK: [self.network_uuid],
15550 if self.network is None:
15551 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15552 (self.op.network_name, self.network_uuid),
15553 errors.ECODE_INVAL)
15555 def CheckPrereq(self):
15556 """Check prerequisites.
15559 self.gateway = self.network.gateway
15560 self.network_type = self.network.network_type
15561 self.mac_prefix = self.network.mac_prefix
15562 self.network6 = self.network.network6
15563 self.gateway6 = self.network.gateway6
15565 self.pool = network.AddressPool(self.network)
15567 if self.op.gateway:
15568 if self.op.gateway == constants.VALUE_NONE:
15569 self.gateway = None
15571 self.gateway = self.op.gateway
15572 if self.pool.IsReserved(self.gateway):
15573 raise errors.OpPrereqError("%s is already reserved" %
15574 self.gateway, errors.ECODE_INVAL)
15576 if self.op.network_type:
15577 if self.op.network_type == constants.VALUE_NONE:
15578 self.network_type = None
15580 self.network_type = self.op.network_type
15582 if self.op.mac_prefix:
15583 if self.op.mac_prefix == constants.VALUE_NONE:
15584 self.mac_prefix = None
15586 self.mac_prefix = self.op.mac_prefix
15588 if self.op.gateway6:
15589 if self.op.gateway6 == constants.VALUE_NONE:
15590 self.gateway6 = None
15592 self.gateway6 = self.op.gateway6
15594 if self.op.network6:
15595 if self.op.network6 == constants.VALUE_NONE:
15596 self.network6 = None
15598 self.network6 = self.op.network6
15602 def BuildHooksEnv(self):
15603 """Build hooks env.
15607 "NETWORK_NAME": self.op.network_name,
15608 "NETWORK_SUBNET": self.network.network,
15609 "NETWORK_GATEWAY": self.gateway,
15610 "NETWORK_SUBNET6": self.network6,
15611 "NETWORK_GATEWAY6": self.gateway6,
15612 "NETWORK_MAC_PREFIX": self.mac_prefix,
15613 "NETWORK_TYPE": self.network_type,
15617 def BuildHooksNodes(self):
15618 """Build hooks nodes.
15621 mn = self.cfg.GetMasterNode()
15622 return ([mn], [mn])
15624 def Exec(self, feedback_fn):
15625 """Modifies the network.
15628 #TODO: reserve/release via temporary reservation manager
15629 # extend cfg.ReserveIp/ReleaseIp with the external flag
15630 if self.op.gateway:
15631 if self.gateway == self.network.gateway:
15632 self.LogWarning("Gateway is already %s" % self.gateway)
15635 self.pool.Reserve(self.gateway, external=True)
15636 if self.network.gateway:
15637 self.pool.Release(self.network.gateway, external=True)
15638 self.network.gateway = self.gateway
15640 if self.op.add_reserved_ips:
15641 for ip in self.op.add_reserved_ips:
15643 if self.pool.IsReserved(ip):
15644 self.LogWarning("IP %s is already reserved" % ip)
15646 self.pool.Reserve(ip, external=True)
15647 except errors.AddressPoolError, e:
15648 self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
15650 if self.op.remove_reserved_ips:
15651 for ip in self.op.remove_reserved_ips:
15652 if ip == self.network.gateway:
15653 self.LogWarning("Cannot unreserve Gateway's IP")
15656 if not self.pool.IsReserved(ip):
15657 self.LogWarning("IP %s is already unreserved" % ip)
15659 self.pool.Release(ip, external=True)
15660 except errors.AddressPoolError, e:
15661 self.LogWarning("Cannot release ip %s. %s" % (ip, e))
15663 if self.op.mac_prefix:
15664 self.network.mac_prefix = self.mac_prefix
15666 if self.op.network6:
15667 self.network.network6 = self.network6
15669 if self.op.gateway6:
15670 self.network.gateway6 = self.gateway6
15672 if self.op.network_type:
15673 self.network.network_type = self.network_type
15675 self.pool.Validate()
15677 self.cfg.Update(self.network, feedback_fn)
15680 class _NetworkQuery(_QueryBase):
15681 FIELDS = query.NETWORK_FIELDS
15683 def ExpandNames(self, lu):
15684 lu.needed_locks = {}
15686 self._all_networks = lu.cfg.GetAllNetworksInfo()
15687 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15690 self.wanted = [name_to_uuid[name]
15691 for name in utils.NiceSort(name_to_uuid.keys())]
15693 # Accept names to be either names or UUIDs.
15696 all_uuid = frozenset(self._all_networks.keys())
15698 for name in self.names:
15699 if name in all_uuid:
15700 self.wanted.append(name)
15701 elif name in name_to_uuid:
15702 self.wanted.append(name_to_uuid[name])
15704 missing.append(name)
15707 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15708 errors.ECODE_NOENT)
15710 def DeclareLocks(self, lu, level):
15713 def _GetQueryData(self, lu):
15714 """Computes the list of networks and their attributes.
15717 do_instances = query.NETQ_INST in self.requested_data
15718 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15719 do_stats = query.NETQ_STATS in self.requested_data
15720 cluster = lu.cfg.GetClusterInfo()
15722 network_to_groups = None
15723 network_to_instances = None
15726 # For NETQ_GROUP, we need to map network->[groups]
15728 all_groups = lu.cfg.GetAllNodeGroupsInfo()
15729 network_to_groups = dict((uuid, []) for uuid in self.wanted)
15730 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
15733 all_instances = lu.cfg.GetAllInstancesInfo()
15734 all_nodes = lu.cfg.GetAllNodesInfo()
15735 network_to_instances = dict((uuid, []) for uuid in self.wanted)
15738 for group in all_groups.values():
15740 group_nodes = [node.name for node in all_nodes.values() if
15741 node.group == group.uuid]
15742 group_instances = [instance for instance in all_instances.values()
15743 if instance.primary_node in group_nodes]
15745 for net_uuid in group.networks.keys():
15746 if net_uuid in network_to_groups:
15747 netparams = group.networks[net_uuid]
15748 mode = netparams[constants.NIC_MODE]
15749 link = netparams[constants.NIC_LINK]
15750 info = group.name + '(' + mode + ', ' + link + ')'
15751 network_to_groups[net_uuid].append(info)
15754 for instance in group_instances:
15755 for nic in instance.nics:
15756 if nic.network == self._all_networks[net_uuid].name:
15757 network_to_instances[net_uuid].append(instance.name)
15762 for uuid, net in self._all_networks.items():
15763 if uuid in self.wanted:
15764 pool = network.AddressPool(net)
15766 "free_count": pool.GetFreeCount(),
15767 "reserved_count": pool.GetReservedCount(),
15768 "map": pool.GetMap(),
15769 "external_reservations": ", ".join(pool.GetExternalReservations()),
15772 return query.NetworkQueryData([self._all_networks[uuid]
15773 for uuid in self.wanted],
15775 network_to_instances,
15779 class LUNetworkQuery(NoHooksLU):
15780 """Logical unit for querying networks.
15785 def CheckArguments(self):
15786 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
15787 self.op.output_fields, False)
15789 def ExpandNames(self):
15790 self.nq.ExpandNames(self)
15792 def Exec(self, feedback_fn):
15793 return self.nq.OldStyleQuery(self)
15797 class LUNetworkConnect(LogicalUnit):
15798 """Connect a network to a nodegroup
15801 HPATH = "network-connect"
15802 HTYPE = constants.HTYPE_NETWORK
15805 def ExpandNames(self):
15806 self.network_name = self.op.network_name
15807 self.group_name = self.op.group_name
15808 self.network_mode = self.op.network_mode
15809 self.network_link = self.op.network_link
15811 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
15812 self.network = self.cfg.GetNetwork(self.network_uuid)
15813 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
15814 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15816 self.needed_locks = {
15817 locking.LEVEL_INSTANCE: [],
15818 locking.LEVEL_NODEGROUP: [self.group_uuid],
15820 self.share_locks[locking.LEVEL_INSTANCE] = 1
15822 def DeclareLocks(self, level):
15823 if level == locking.LEVEL_INSTANCE:
15824 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15826 # Lock instances optimistically, needs verification once group lock has
15828 self.needed_locks[locking.LEVEL_INSTANCE] = \
15829 self.cfg.GetNodeGroupInstances(self.group_uuid)
15831 def BuildHooksEnv(self):
15833 ret["GROUP_NAME"] = self.group_name
15834 ret["GROUP_NETWORK_NAME"] = self.network_name
15835 ret["GROUP_NETWORK_MODE"] = self.network_mode
15836 ret["GROUP_NETWORK_LINK"] = self.network_link
15839 def BuildHooksNodes(self):
15840 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
15841 return (nodes, nodes)
15844 def CheckPrereq(self):
15845 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
15848 if self.network is None:
15849 raise errors.OpPrereqError("Network %s does not exist" %
15850 self.network_name, errors.ECODE_INVAL)
15852 self.netparams = dict()
15853 self.netparams[constants.NIC_MODE] = self.network_mode
15854 self.netparams[constants.NIC_LINK] = self.network_link
15855 objects.NIC.CheckParameterSyntax(self.netparams)
15857 #if self.network_mode == constants.NIC_MODE_BRIDGED:
15858 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
15859 self.connected = False
15860 if self.network_uuid in self.group.networks:
15861 self.LogWarning("Network '%s' is already mapped to group '%s'" %
15862 (self.network_name, self.group.name))
15863 self.connected = True
15866 pool = network.AddressPool(self.network)
15867 if self.op.conflicts_check:
15868 groupinstances = []
15869 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
15870 groupinstances.append(self.cfg.GetInstanceInfo(n))
15871 instances = [(instance.name, idx, nic.ip)
15872 for instance in groupinstances
15873 for idx, nic in enumerate(instance.nics)
15874 if (not nic.network and pool._Contains(nic.ip))]
15876 self.LogWarning("Following occurences use IPs from network %s"
15877 " that is about to connect to nodegroup %s: %s" %
15878 (self.network_name, self.group.name,
15880 raise errors.OpPrereqError("Conflicting IPs found."
15881 " Please remove/modify"
15882 " corresponding NICs",
15883 errors.ECODE_INVAL)
15885 def Exec(self, feedback_fn):
15889 self.group.networks[self.network_uuid] = self.netparams
15890 self.cfg.Update(self.group, feedback_fn)
15893 class LUNetworkDisconnect(LogicalUnit):
15894 """Disconnect a network to a nodegroup
15897 HPATH = "network-disconnect"
15898 HTYPE = constants.HTYPE_NETWORK
15901 def ExpandNames(self):
15902 self.network_name = self.op.network_name
15903 self.group_name = self.op.group_name
15905 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
15906 self.network = self.cfg.GetNetwork(self.network_uuid)
15907 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
15908 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15910 self.needed_locks = {
15911 locking.LEVEL_INSTANCE: [],
15912 locking.LEVEL_NODEGROUP: [self.group_uuid],
15914 self.share_locks[locking.LEVEL_INSTANCE] = 1
15916 def DeclareLocks(self, level):
15917 if level == locking.LEVEL_INSTANCE:
15918 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15920 # Lock instances optimistically, needs verification once group lock has
15922 self.needed_locks[locking.LEVEL_INSTANCE] = \
15923 self.cfg.GetNodeGroupInstances(self.group_uuid)
15925 def BuildHooksEnv(self):
15927 ret["GROUP_NAME"] = self.group_name
15928 ret["GROUP_NETWORK_NAME"] = self.network_name
15931 def BuildHooksNodes(self):
15932 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
15933 return (nodes, nodes)
15936 def CheckPrereq(self):
15937 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
15940 self.connected = True
15941 if self.network_uuid not in self.group.networks:
15942 self.LogWarning("Network '%s' is"
15943 " not mapped to group '%s'" %
15944 (self.network_name, self.group.name))
15945 self.connected = False
15948 if self.op.conflicts_check:
15949 groupinstances = []
15950 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
15951 groupinstances.append(self.cfg.GetInstanceInfo(n))
15952 instances = [(instance.name, idx, nic.ip)
15953 for instance in groupinstances
15954 for idx, nic in enumerate(instance.nics)
15955 if nic.network == self.network_name]
15957 self.LogWarning("Following occurences use IPs from network %s"
15958 " that is about to disconnected from the nodegroup"
15960 (self.network_name, self.group.name,
15962 raise errors.OpPrereqError("Conflicting IPs."
15963 " Please remove/modify"
15964 " corresponding NICS",
15965 errors.ECODE_INVAL)
15967 def Exec(self, feedback_fn):
15968 if not self.connected:
15971 del self.group.networks[self.network_uuid]
15972 self.cfg.Update(self.group, feedback_fn)
15975 #: Query type implementations
15977 constants.QR_CLUSTER: _ClusterQuery,
15978 constants.QR_INSTANCE: _InstanceQuery,
15979 constants.QR_NODE: _NodeQuery,
15980 constants.QR_GROUP: _GroupQuery,
15981 constants.QR_NETWORK: _NetworkQuery,
15982 constants.QR_OS: _OsQuery,
15983 constants.QR_EXPORT: _ExportQuery,
15986 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15989 def _GetQueryImplementation(name):
15990 """Returns the implemtnation for a query type.
15992 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15996 return _QUERY_IMPL[name]
15998 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15999 errors.ECODE_INVAL)
16001 def _CheckForConflictingIp(lu, ip, node):
16002 """In case of conflicting ip raise error.
16005 @param ip: ip address
16007 @param node: node name
16010 (conf_net, conf_netparams) = lu.cfg.CheckIPInNodeGroup(ip, node)
16011 if conf_net is not None:
16012 raise errors.OpPrereqError("Conflicting IP found:"
16013 " %s <> %s." % (ip, conf_net),
16014 errors.ECODE_INVAL)
16016 return (None, None)