4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
63 from ganeti import pathutils
64 from ganeti import vcluster
65 from ganeti import network
66 from ganeti.masterd import iallocator
68 import ganeti.masterd.instance # pylint: disable=W0611
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
74 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76 #: Instance status in which an instance can be marked as offline/online
77 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
78 constants.ADMINST_OFFLINE,
83 """Data container for LU results with jobs.
85 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
86 by L{mcpu._ProcessResult}. The latter will then submit the jobs
87 contained in the C{jobs} attribute and include the job IDs in the opcode
91 def __init__(self, jobs, **kwargs):
92 """Initializes this class.
94 Additional return values can be specified as keyword arguments.
96 @type jobs: list of lists of L{opcode.OpCode}
97 @param jobs: A list of lists of opcode objects
104 class LogicalUnit(object):
105 """Logical Unit base class.
107 Subclasses must follow these rules:
108 - implement ExpandNames
109 - implement CheckPrereq (except when tasklets are used)
110 - implement Exec (except when tasklets are used)
111 - implement BuildHooksEnv
112 - implement BuildHooksNodes
113 - redefine HPATH and HTYPE
114 - optionally redefine their run requirements:
115 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
117 Note that all commands require root permissions.
119 @ivar dry_run_result: the value (if any) that will be returned to the caller
120 in dry-run mode (signalled by opcode dry_run parameter)
127 def __init__(self, processor, op, context, rpc_runner):
128 """Constructor for LogicalUnit.
130 This needs to be overridden in derived classes in order to check op
134 self.proc = processor
136 self.cfg = context.cfg
137 self.glm = context.glm
139 self.owned_locks = context.glm.list_owned
140 self.context = context
141 self.rpc = rpc_runner
142 # Dicts used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
146 self.remove_locks = {}
147 # Used to force good behavior when calling helper functions
148 self.recalculate_locks = {}
150 self.Log = processor.Log # pylint: disable=C0103
151 self.LogWarning = processor.LogWarning # pylint: disable=C0103
152 self.LogInfo = processor.LogInfo # pylint: disable=C0103
153 self.LogStep = processor.LogStep # pylint: disable=C0103
154 # support for dry-run
155 self.dry_run_result = None
156 # support for generic debug attribute
157 if (not hasattr(self.op, "debug_level") or
158 not isinstance(self.op.debug_level, int)):
159 self.op.debug_level = 0
164 # Validate opcode parameters and set defaults
165 self.op.Validate(True)
167 self.CheckArguments()
169 def CheckArguments(self):
170 """Check syntactic validity for the opcode arguments.
172 This method is for doing a simple syntactic check and ensure
173 validity of opcode parameters, without any cluster-related
174 checks. While the same can be accomplished in ExpandNames and/or
175 CheckPrereq, doing these separate is better because:
177 - ExpandNames is left as as purely a lock-related function
178 - CheckPrereq is run after we have acquired locks (and possible
181 The function is allowed to change the self.op attribute so that
182 later methods can no longer worry about missing parameters.
187 def ExpandNames(self):
188 """Expand names for this LU.
190 This method is called before starting to execute the opcode, and it should
191 update all the parameters of the opcode to their canonical form (e.g. a
192 short node name must be fully expanded after this method has successfully
193 completed). This way locking, hooks, logging, etc. can work correctly.
195 LUs which implement this method must also populate the self.needed_locks
196 member, as a dict with lock levels as keys, and a list of needed lock names
199 - use an empty dict if you don't need any lock
200 - if you don't need any lock at a particular level omit that
201 level (note that in this case C{DeclareLocks} won't be called
202 at all for that level)
203 - if you need locks at a level, but you can't calculate it in
204 this function, initialise that level with an empty list and do
205 further processing in L{LogicalUnit.DeclareLocks} (see that
206 function's docstring)
207 - don't put anything for the BGL level
208 - if you want all locks at a level use L{locking.ALL_SET} as a value
210 If you need to share locks (rather than acquire them exclusively) at one
211 level you can modify self.share_locks, setting a true value (usually 1) for
212 that level. By default locks are not shared.
214 This function can also define a list of tasklets, which then will be
215 executed in order instead of the usual LU-level CheckPrereq and Exec
216 functions, if those are not defined by the LU.
220 # Acquire all nodes and one instance
221 self.needed_locks = {
222 locking.LEVEL_NODE: locking.ALL_SET,
223 locking.LEVEL_INSTANCE: ['instance1.example.com'],
225 # Acquire just two nodes
226 self.needed_locks = {
227 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
230 self.needed_locks = {} # No, you can't leave it to the default value None
233 # The implementation of this method is mandatory only if the new LU is
234 # concurrent, so that old LUs don't need to be changed all at the same
237 self.needed_locks = {} # Exclusive LUs don't need locks.
239 raise NotImplementedError
241 def DeclareLocks(self, level):
242 """Declare LU locking needs for a level
244 While most LUs can just declare their locking needs at ExpandNames time,
245 sometimes there's the need to calculate some locks after having acquired
246 the ones before. This function is called just before acquiring locks at a
247 particular level, but after acquiring the ones at lower levels, and permits
248 such calculations. It can be used to modify self.needed_locks, and by
249 default it does nothing.
251 This function is only called if you have something already set in
252 self.needed_locks for the level.
254 @param level: Locking level which is going to be locked
255 @type level: member of L{ganeti.locking.LEVELS}
259 def CheckPrereq(self):
260 """Check prerequisites for this LU.
262 This method should check that the prerequisites for the execution
263 of this LU are fulfilled. It can do internode communication, but
264 it should be idempotent - no cluster or system changes are
267 The method should raise errors.OpPrereqError in case something is
268 not fulfilled. Its return value is ignored.
270 This method should also update all the parameters of the opcode to
271 their canonical form if it hasn't been done by ExpandNames before.
274 if self.tasklets is not None:
275 for (idx, tl) in enumerate(self.tasklets):
276 logging.debug("Checking prerequisites for tasklet %s/%s",
277 idx + 1, len(self.tasklets))
282 def Exec(self, feedback_fn):
285 This method should implement the actual work. It should raise
286 errors.OpExecError for failures that are somewhat dealt with in
290 if self.tasklets is not None:
291 for (idx, tl) in enumerate(self.tasklets):
292 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
295 raise NotImplementedError
297 def BuildHooksEnv(self):
298 """Build hooks environment for this LU.
301 @return: Dictionary containing the environment that will be used for
302 running the hooks for this LU. The keys of the dict must not be prefixed
303 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
304 will extend the environment with additional variables. If no environment
305 should be defined, an empty dictionary should be returned (not C{None}).
306 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
310 raise NotImplementedError
312 def BuildHooksNodes(self):
313 """Build list of nodes to run LU's hooks.
315 @rtype: tuple; (list, list)
316 @return: Tuple containing a list of node names on which the hook
317 should run before the execution and a list of node names on which the
318 hook should run after the execution. No nodes should be returned as an
319 empty list (and not None).
320 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
324 raise NotImplementedError
326 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
327 """Notify the LU about the results of its hooks.
329 This method is called every time a hooks phase is executed, and notifies
330 the Logical Unit about the hooks' result. The LU can then use it to alter
331 its result based on the hooks. By default the method does nothing and the
332 previous result is passed back unchanged but any LU can define it if it
333 wants to use the local cluster hook-scripts somehow.
335 @param phase: one of L{constants.HOOKS_PHASE_POST} or
336 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
337 @param hook_results: the results of the multi-node hooks rpc call
338 @param feedback_fn: function used send feedback back to the caller
339 @param lu_result: the previous Exec result this LU had, or None
341 @return: the new Exec result, based on the previous result
345 # API must be kept, thus we ignore the unused argument and could
346 # be a function warnings
347 # pylint: disable=W0613,R0201
350 def _ExpandAndLockInstance(self):
351 """Helper function to expand and lock an instance.
353 Many LUs that work on an instance take its name in self.op.instance_name
354 and need to expand it and then declare the expanded name for locking. This
355 function does it, and then updates self.op.instance_name to the expanded
356 name. It also initializes needed_locks as a dict, if this hasn't been done
360 if self.needed_locks is None:
361 self.needed_locks = {}
363 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
364 "_ExpandAndLockInstance called with instance-level locks set"
365 self.op.instance_name = _ExpandInstanceName(self.cfg,
366 self.op.instance_name)
367 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
369 def _LockInstancesNodes(self, primary_only=False,
370 level=locking.LEVEL_NODE):
371 """Helper function to declare instances' nodes for locking.
373 This function should be called after locking one or more instances to lock
374 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
375 with all primary or secondary nodes for instances already locked and
376 present in self.needed_locks[locking.LEVEL_INSTANCE].
378 It should be called from DeclareLocks, and for safety only works if
379 self.recalculate_locks[locking.LEVEL_NODE] is set.
381 In the future it may grow parameters to just lock some instance's nodes, or
382 to just lock primaries or secondary nodes, if needed.
384 If should be called in DeclareLocks in a way similar to::
386 if level == locking.LEVEL_NODE:
387 self._LockInstancesNodes()
389 @type primary_only: boolean
390 @param primary_only: only lock primary nodes of locked instances
391 @param level: Which lock level to use for locking nodes
394 assert level in self.recalculate_locks, \
395 "_LockInstancesNodes helper function called with no nodes to recalculate"
397 # TODO: check if we're really been called with the instance locks held
399 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
400 # future we might want to have different behaviors depending on the value
401 # of self.recalculate_locks[locking.LEVEL_NODE]
403 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
404 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
405 wanted_nodes.append(instance.primary_node)
407 wanted_nodes.extend(instance.secondary_nodes)
409 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
410 self.needed_locks[level] = wanted_nodes
411 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
412 self.needed_locks[level].extend(wanted_nodes)
414 raise errors.ProgrammerError("Unknown recalculation mode")
416 del self.recalculate_locks[level]
419 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
420 """Simple LU which runs no hooks.
422 This LU is intended as a parent for other LogicalUnits which will
423 run no hooks, in order to reduce duplicate code.
429 def BuildHooksEnv(self):
430 """Empty BuildHooksEnv for NoHooksLu.
432 This just raises an error.
435 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
437 def BuildHooksNodes(self):
438 """Empty BuildHooksNodes for NoHooksLU.
441 raise AssertionError("BuildHooksNodes called for NoHooksLU")
445 """Tasklet base class.
447 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
448 they can mix legacy code with tasklets. Locking needs to be done in the LU,
449 tasklets know nothing about locks.
451 Subclasses must follow these rules:
452 - Implement CheckPrereq
456 def __init__(self, lu):
463 def CheckPrereq(self):
464 """Check prerequisites for this tasklets.
466 This method should check whether the prerequisites for the execution of
467 this tasklet are fulfilled. It can do internode communication, but it
468 should be idempotent - no cluster or system changes are allowed.
470 The method should raise errors.OpPrereqError in case something is not
471 fulfilled. Its return value is ignored.
473 This method should also update all parameters to their canonical form if it
474 hasn't been done before.
479 def Exec(self, feedback_fn):
480 """Execute the tasklet.
482 This method should implement the actual work. It should raise
483 errors.OpExecError for failures that are somewhat dealt with in code, or
487 raise NotImplementedError
491 """Base for query utility classes.
494 #: Attribute holding field definitions
500 def __init__(self, qfilter, fields, use_locking):
501 """Initializes this class.
504 self.use_locking = use_locking
506 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
507 namefield=self.SORT_FIELD)
508 self.requested_data = self.query.RequestedData()
509 self.names = self.query.RequestedNames()
511 # Sort only if no names were requested
512 self.sort_by_name = not self.names
514 self.do_locking = None
517 def _GetNames(self, lu, all_names, lock_level):
518 """Helper function to determine names asked for in the query.
522 names = lu.owned_locks(lock_level)
526 if self.wanted == locking.ALL_SET:
527 assert not self.names
528 # caller didn't specify names, so ordering is not important
529 return utils.NiceSort(names)
531 # caller specified names and we must keep the same order
533 assert not self.do_locking or lu.glm.is_owned(lock_level)
535 missing = set(self.wanted).difference(names)
537 raise errors.OpExecError("Some items were removed before retrieving"
538 " their data: %s" % missing)
540 # Return expanded names
543 def ExpandNames(self, lu):
544 """Expand names for this query.
546 See L{LogicalUnit.ExpandNames}.
549 raise NotImplementedError()
551 def DeclareLocks(self, lu, level):
552 """Declare locks for this query.
554 See L{LogicalUnit.DeclareLocks}.
557 raise NotImplementedError()
559 def _GetQueryData(self, lu):
560 """Collects all data for this query.
562 @return: Query data object
565 raise NotImplementedError()
567 def NewStyleQuery(self, lu):
568 """Collect data and execute query.
571 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
572 sort_by_name=self.sort_by_name)
574 def OldStyleQuery(self, lu):
575 """Collect data and execute query.
578 return self.query.OldStyleQuery(self._GetQueryData(lu),
579 sort_by_name=self.sort_by_name)
583 """Returns a dict declaring all lock levels shared.
586 return dict.fromkeys(locking.LEVELS, 1)
589 def _AnnotateDiskParams(instance, devs, cfg):
590 """Little helper wrapper to the rpc annotation method.
592 @param instance: The instance object
593 @type devs: List of L{objects.Disk}
594 @param devs: The root devices (not any of its children!)
595 @param cfg: The config object
596 @returns The annotated disk copies
597 @see L{rpc.AnnotateDiskParams}
600 return rpc.AnnotateDiskParams(instance.disk_template, devs,
601 cfg.GetInstanceDiskParams(instance))
604 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
606 """Checks if node groups for locked instances are still correct.
608 @type cfg: L{config.ConfigWriter}
609 @param cfg: Cluster configuration
610 @type instances: dict; string as key, L{objects.Instance} as value
611 @param instances: Dictionary, instance name as key, instance object as value
612 @type owned_groups: iterable of string
613 @param owned_groups: List of owned groups
614 @type owned_nodes: iterable of string
615 @param owned_nodes: List of owned nodes
616 @type cur_group_uuid: string or None
617 @param cur_group_uuid: Optional group UUID to check against instance's groups
620 for (name, inst) in instances.items():
621 assert owned_nodes.issuperset(inst.all_nodes), \
622 "Instance %s's nodes changed while we kept the lock" % name
624 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
626 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
627 "Instance %s has no node in group %s" % (name, cur_group_uuid)
630 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
632 """Checks if the owned node groups are still correct for an instance.
634 @type cfg: L{config.ConfigWriter}
635 @param cfg: The cluster configuration
636 @type instance_name: string
637 @param instance_name: Instance name
638 @type owned_groups: set or frozenset
639 @param owned_groups: List of currently owned node groups
640 @type primary_only: boolean
641 @param primary_only: Whether to check node groups for only the primary node
644 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
646 if not owned_groups.issuperset(inst_groups):
647 raise errors.OpPrereqError("Instance %s's node groups changed since"
648 " locks were acquired, current groups are"
649 " are '%s', owning groups '%s'; retry the"
652 utils.CommaJoin(inst_groups),
653 utils.CommaJoin(owned_groups)),
659 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
660 """Checks if the instances in a node group are still correct.
662 @type cfg: L{config.ConfigWriter}
663 @param cfg: The cluster configuration
664 @type group_uuid: string
665 @param group_uuid: Node group UUID
666 @type owned_instances: set or frozenset
667 @param owned_instances: List of currently owned instances
670 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
671 if owned_instances != wanted_instances:
672 raise errors.OpPrereqError("Instances in node group '%s' changed since"
673 " locks were acquired, wanted '%s', have '%s';"
674 " retry the operation" %
676 utils.CommaJoin(wanted_instances),
677 utils.CommaJoin(owned_instances)),
680 return wanted_instances
683 def _SupportsOob(cfg, node):
684 """Tells if node supports OOB.
686 @type cfg: L{config.ConfigWriter}
687 @param cfg: The cluster configuration
688 @type node: L{objects.Node}
689 @param node: The node
690 @return: The OOB script if supported or an empty string otherwise
693 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
696 def _CopyLockList(names):
697 """Makes a copy of a list of lock names.
699 Handles L{locking.ALL_SET} correctly.
702 if names == locking.ALL_SET:
703 return locking.ALL_SET
708 def _GetWantedNodes(lu, nodes):
709 """Returns list of checked and expanded node names.
711 @type lu: L{LogicalUnit}
712 @param lu: the logical unit on whose behalf we execute
714 @param nodes: list of node names or None for all nodes
716 @return: the list of nodes, sorted
717 @raise errors.ProgrammerError: if the nodes parameter is wrong type
721 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
723 return utils.NiceSort(lu.cfg.GetNodeList())
726 def _GetWantedInstances(lu, instances):
727 """Returns list of checked and expanded instance names.
729 @type lu: L{LogicalUnit}
730 @param lu: the logical unit on whose behalf we execute
731 @type instances: list
732 @param instances: list of instance names or None for all instances
734 @return: the list of instances, sorted
735 @raise errors.OpPrereqError: if the instances parameter is wrong type
736 @raise errors.OpPrereqError: if any of the passed instances is not found
740 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
742 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
746 def _GetUpdatedParams(old_params, update_dict,
747 use_default=True, use_none=False):
748 """Return the new version of a parameter dictionary.
750 @type old_params: dict
751 @param old_params: old parameters
752 @type update_dict: dict
753 @param update_dict: dict containing new parameter values, or
754 constants.VALUE_DEFAULT to reset the parameter to its default
756 @param use_default: boolean
757 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
758 values as 'to be deleted' values
759 @param use_none: boolean
760 @type use_none: whether to recognise C{None} values as 'to be
763 @return: the new parameter dictionary
766 params_copy = copy.deepcopy(old_params)
767 for key, val in update_dict.iteritems():
768 if ((use_default and val == constants.VALUE_DEFAULT) or
769 (use_none and val is None)):
775 params_copy[key] = val
779 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
780 """Return the new version of a instance policy.
782 @param group_policy: whether this policy applies to a group and thus
783 we should support removal of policy entries
786 use_none = use_default = group_policy
787 ipolicy = copy.deepcopy(old_ipolicy)
788 for key, value in new_ipolicy.items():
789 if key not in constants.IPOLICY_ALL_KEYS:
790 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
792 if key in constants.IPOLICY_ISPECS:
793 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
794 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
796 use_default=use_default)
798 if (not value or value == [constants.VALUE_DEFAULT] or
799 value == constants.VALUE_DEFAULT):
803 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
804 " on the cluster'" % key,
807 if key in constants.IPOLICY_PARAMETERS:
808 # FIXME: we assume all such values are float
810 ipolicy[key] = float(value)
811 except (TypeError, ValueError), err:
812 raise errors.OpPrereqError("Invalid value for attribute"
813 " '%s': '%s', error: %s" %
814 (key, value, err), errors.ECODE_INVAL)
816 # FIXME: we assume all others are lists; this should be redone
818 ipolicy[key] = list(value)
820 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
821 except errors.ConfigurationError, err:
822 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
827 def _UpdateAndVerifySubDict(base, updates, type_check):
828 """Updates and verifies a dict with sub dicts of the same type.
830 @param base: The dict with the old data
831 @param updates: The dict with the new data
832 @param type_check: Dict suitable to ForceDictType to verify correct types
833 @returns: A new dict with updated and verified values
837 new = _GetUpdatedParams(old, value)
838 utils.ForceDictType(new, type_check)
841 ret = copy.deepcopy(base)
842 ret.update(dict((key, fn(base.get(key, {}), value))
843 for key, value in updates.items()))
847 def _MergeAndVerifyHvState(op_input, obj_input):
848 """Combines the hv state from an opcode with the one of the object
850 @param op_input: The input dict from the opcode
851 @param obj_input: The input dict from the objects
852 @return: The verified and updated dict
856 invalid_hvs = set(op_input) - constants.HYPER_TYPES
858 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
859 " %s" % utils.CommaJoin(invalid_hvs),
861 if obj_input is None:
863 type_check = constants.HVSTS_PARAMETER_TYPES
864 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
869 def _MergeAndVerifyDiskState(op_input, obj_input):
870 """Combines the disk state from an opcode with the one of the object
872 @param op_input: The input dict from the opcode
873 @param obj_input: The input dict from the objects
874 @return: The verified and updated dict
877 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
879 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
880 utils.CommaJoin(invalid_dst),
882 type_check = constants.DSS_PARAMETER_TYPES
883 if obj_input is None:
885 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
887 for key, value in op_input.items())
892 def _ReleaseLocks(lu, level, names=None, keep=None):
893 """Releases locks owned by an LU.
895 @type lu: L{LogicalUnit}
896 @param level: Lock level
897 @type names: list or None
898 @param names: Names of locks to release
899 @type keep: list or None
900 @param keep: Names of locks to retain
903 assert not (keep is not None and names is not None), \
904 "Only one of the 'names' and the 'keep' parameters can be given"
906 if names is not None:
907 should_release = names.__contains__
909 should_release = lambda name: name not in keep
911 should_release = None
913 owned = lu.owned_locks(level)
915 # Not owning any lock at this level, do nothing
922 # Determine which locks to release
924 if should_release(name):
929 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
931 # Release just some locks
932 lu.glm.release(level, names=release)
934 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
937 lu.glm.release(level)
939 assert not lu.glm.is_owned(level), "No locks should be owned"
942 def _MapInstanceDisksToNodes(instances):
943 """Creates a map from (node, volume) to instance name.
945 @type instances: list of L{objects.Instance}
946 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
949 return dict(((node, vol), inst.name)
950 for inst in instances
951 for (node, vols) in inst.MapLVsByNode().items()
955 def _RunPostHook(lu, node_name):
956 """Runs the post-hook for an opcode on a single node.
959 hm = lu.proc.BuildHooksManager(lu)
961 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
962 except Exception, err: # pylint: disable=W0703
963 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
966 def _CheckOutputFields(static, dynamic, selected):
967 """Checks whether all selected fields are valid.
969 @type static: L{utils.FieldSet}
970 @param static: static fields set
971 @type dynamic: L{utils.FieldSet}
972 @param dynamic: dynamic fields set
979 delta = f.NonMatching(selected)
981 raise errors.OpPrereqError("Unknown output fields selected: %s"
982 % ",".join(delta), errors.ECODE_INVAL)
985 def _CheckGlobalHvParams(params):
986 """Validates that given hypervisor params are not global ones.
988 This will ensure that instances don't get customised versions of
992 used_globals = constants.HVC_GLOBALS.intersection(params)
994 msg = ("The following hypervisor parameters are global and cannot"
995 " be customized at instance level, please modify them at"
996 " cluster level: %s" % utils.CommaJoin(used_globals))
997 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1000 def _CheckNodeOnline(lu, node, msg=None):
1001 """Ensure that a given node is online.
1003 @param lu: the LU on behalf of which we make the check
1004 @param node: the node to check
1005 @param msg: if passed, should be a message to replace the default one
1006 @raise errors.OpPrereqError: if the node is offline
1010 msg = "Can't use offline node"
1011 if lu.cfg.GetNodeInfo(node).offline:
1012 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1015 def _CheckNodeNotDrained(lu, node):
1016 """Ensure that a given node is not drained.
1018 @param lu: the LU on behalf of which we make the check
1019 @param node: the node to check
1020 @raise errors.OpPrereqError: if the node is drained
1023 if lu.cfg.GetNodeInfo(node).drained:
1024 raise errors.OpPrereqError("Can't use drained node %s" % node,
1028 def _CheckNodeVmCapable(lu, node):
1029 """Ensure that a given node is vm capable.
1031 @param lu: the LU on behalf of which we make the check
1032 @param node: the node to check
1033 @raise errors.OpPrereqError: if the node is not vm capable
1036 if not lu.cfg.GetNodeInfo(node).vm_capable:
1037 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1041 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1042 """Ensure that a node supports a given OS.
1044 @param lu: the LU on behalf of which we make the check
1045 @param node: the node to check
1046 @param os_name: the OS to query about
1047 @param force_variant: whether to ignore variant errors
1048 @raise errors.OpPrereqError: if the node is not supporting the OS
1051 result = lu.rpc.call_os_get(node, os_name)
1052 result.Raise("OS '%s' not in supported OS list for node %s" %
1054 prereq=True, ecode=errors.ECODE_INVAL)
1055 if not force_variant:
1056 _CheckOSVariant(result.payload, os_name)
1059 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1060 """Ensure that a node has the given secondary ip.
1062 @type lu: L{LogicalUnit}
1063 @param lu: the LU on behalf of which we make the check
1065 @param node: the node to check
1066 @type secondary_ip: string
1067 @param secondary_ip: the ip to check
1068 @type prereq: boolean
1069 @param prereq: whether to throw a prerequisite or an execute error
1070 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1071 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1074 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1075 result.Raise("Failure checking secondary ip on node %s" % node,
1076 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1077 if not result.payload:
1078 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1079 " please fix and re-run this command" % secondary_ip)
1081 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1083 raise errors.OpExecError(msg)
1086 def _GetClusterDomainSecret():
1087 """Reads the cluster domain secret.
1090 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1094 def _CheckInstanceState(lu, instance, req_states, msg=None):
1095 """Ensure that an instance is in one of the required states.
1097 @param lu: the LU on behalf of which we make the check
1098 @param instance: the instance to check
1099 @param msg: if passed, should be a message to replace the default one
1100 @raise errors.OpPrereqError: if the instance is not in the required state
1104 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1105 if instance.admin_state not in req_states:
1106 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1107 (instance.name, instance.admin_state, msg),
1110 if constants.ADMINST_UP not in req_states:
1111 pnode = instance.primary_node
1112 if not lu.cfg.GetNodeInfo(pnode).offline:
1113 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1114 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1115 prereq=True, ecode=errors.ECODE_ENVIRON)
1116 if instance.name in ins_l.payload:
1117 raise errors.OpPrereqError("Instance %s is running, %s" %
1118 (instance.name, msg), errors.ECODE_STATE)
1120 lu.LogWarning("Primary node offline, ignoring check that instance"
1124 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1125 """Computes if value is in the desired range.
1127 @param name: name of the parameter for which we perform the check
1128 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1130 @param ipolicy: dictionary containing min, max and std values
1131 @param value: actual value that we want to use
1132 @return: None or element not meeting the criteria
1136 if value in [None, constants.VALUE_AUTO]:
1138 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1139 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1140 if value > max_v or min_v > value:
1142 fqn = "%s/%s" % (name, qualifier)
1145 return ("%s value %s is not in range [%s, %s]" %
1146 (fqn, value, min_v, max_v))
1150 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1151 nic_count, disk_sizes, spindle_use,
1152 _compute_fn=_ComputeMinMaxSpec):
1153 """Verifies ipolicy against provided specs.
1156 @param ipolicy: The ipolicy
1158 @param mem_size: The memory size
1159 @type cpu_count: int
1160 @param cpu_count: Used cpu cores
1161 @type disk_count: int
1162 @param disk_count: Number of disks used
1163 @type nic_count: int
1164 @param nic_count: Number of nics used
1165 @type disk_sizes: list of ints
1166 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1167 @type spindle_use: int
1168 @param spindle_use: The number of spindles this instance uses
1169 @param _compute_fn: The compute function (unittest only)
1170 @return: A list of violations, or an empty list of no violations are found
1173 assert disk_count == len(disk_sizes)
1176 (constants.ISPEC_MEM_SIZE, "", mem_size),
1177 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1178 (constants.ISPEC_DISK_COUNT, "", disk_count),
1179 (constants.ISPEC_NIC_COUNT, "", nic_count),
1180 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1181 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1182 for idx, d in enumerate(disk_sizes)]
1185 (_compute_fn(name, qualifier, ipolicy, value)
1186 for (name, qualifier, value) in test_settings))
1189 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1190 _compute_fn=_ComputeIPolicySpecViolation):
1191 """Compute if instance meets the specs of ipolicy.
1194 @param ipolicy: The ipolicy to verify against
1195 @type instance: L{objects.Instance}
1196 @param instance: The instance to verify
1197 @param _compute_fn: The function to verify ipolicy (unittest only)
1198 @see: L{_ComputeIPolicySpecViolation}
1201 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1202 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1203 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1204 disk_count = len(instance.disks)
1205 disk_sizes = [disk.size for disk in instance.disks]
1206 nic_count = len(instance.nics)
1208 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1209 disk_sizes, spindle_use)
1212 def _ComputeIPolicyInstanceSpecViolation(
1213 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1214 """Compute if instance specs meets the specs of ipolicy.
1217 @param ipolicy: The ipolicy to verify against
1218 @param instance_spec: dict
1219 @param instance_spec: The instance spec to verify
1220 @param _compute_fn: The function to verify ipolicy (unittest only)
1221 @see: L{_ComputeIPolicySpecViolation}
1224 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1225 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1226 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1227 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1228 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1229 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1231 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1232 disk_sizes, spindle_use)
1235 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1237 _compute_fn=_ComputeIPolicyInstanceViolation):
1238 """Compute if instance meets the specs of the new target group.
1240 @param ipolicy: The ipolicy to verify
1241 @param instance: The instance object to verify
1242 @param current_group: The current group of the instance
1243 @param target_group: The new group of the instance
1244 @param _compute_fn: The function to verify ipolicy (unittest only)
1245 @see: L{_ComputeIPolicySpecViolation}
1248 if current_group == target_group:
1251 return _compute_fn(ipolicy, instance)
1254 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1255 _compute_fn=_ComputeIPolicyNodeViolation):
1256 """Checks that the target node is correct in terms of instance policy.
1258 @param ipolicy: The ipolicy to verify
1259 @param instance: The instance object to verify
1260 @param node: The new node to relocate
1261 @param ignore: Ignore violations of the ipolicy
1262 @param _compute_fn: The function to verify ipolicy (unittest only)
1263 @see: L{_ComputeIPolicySpecViolation}
1266 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1267 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1270 msg = ("Instance does not meet target node group's (%s) instance"
1271 " policy: %s") % (node.group, utils.CommaJoin(res))
1275 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1278 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1279 """Computes a set of any instances that would violate the new ipolicy.
1281 @param old_ipolicy: The current (still in-place) ipolicy
1282 @param new_ipolicy: The new (to become) ipolicy
1283 @param instances: List of instances to verify
1284 @return: A list of instances which violates the new ipolicy but
1288 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1289 _ComputeViolatingInstances(old_ipolicy, instances))
1292 def _ExpandItemName(fn, name, kind):
1293 """Expand an item name.
1295 @param fn: the function to use for expansion
1296 @param name: requested item name
1297 @param kind: text description ('Node' or 'Instance')
1298 @return: the resolved (full) name
1299 @raise errors.OpPrereqError: if the item is not found
1302 full_name = fn(name)
1303 if full_name is None:
1304 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1309 def _ExpandNodeName(cfg, name):
1310 """Wrapper over L{_ExpandItemName} for nodes."""
1311 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1314 def _ExpandInstanceName(cfg, name):
1315 """Wrapper over L{_ExpandItemName} for instance."""
1316 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1319 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1320 minmem, maxmem, vcpus, nics, disk_template, disks,
1321 bep, hvp, hypervisor_name, tags):
1322 """Builds instance related env variables for hooks
1324 This builds the hook environment from individual variables.
1327 @param name: the name of the instance
1328 @type primary_node: string
1329 @param primary_node: the name of the instance's primary node
1330 @type secondary_nodes: list
1331 @param secondary_nodes: list of secondary nodes as strings
1332 @type os_type: string
1333 @param os_type: the name of the instance's OS
1334 @type status: string
1335 @param status: the desired status of the instance
1336 @type minmem: string
1337 @param minmem: the minimum memory size of the instance
1338 @type maxmem: string
1339 @param maxmem: the maximum memory size of the instance
1341 @param vcpus: the count of VCPUs the instance has
1343 @param nics: list of tuples (ip, mac, mode, link, network) representing
1344 the NICs the instance has
1345 @type disk_template: string
1346 @param disk_template: the disk template of the instance
1348 @param disks: the list of (size, mode) pairs
1350 @param bep: the backend parameters for the instance
1352 @param hvp: the hypervisor parameters for the instance
1353 @type hypervisor_name: string
1354 @param hypervisor_name: the hypervisor for the instance
1356 @param tags: list of instance tags as strings
1358 @return: the hook environment for this instance
1363 "INSTANCE_NAME": name,
1364 "INSTANCE_PRIMARY": primary_node,
1365 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1366 "INSTANCE_OS_TYPE": os_type,
1367 "INSTANCE_STATUS": status,
1368 "INSTANCE_MINMEM": minmem,
1369 "INSTANCE_MAXMEM": maxmem,
1370 # TODO(2.7) remove deprecated "memory" value
1371 "INSTANCE_MEMORY": maxmem,
1372 "INSTANCE_VCPUS": vcpus,
1373 "INSTANCE_DISK_TEMPLATE": disk_template,
1374 "INSTANCE_HYPERVISOR": hypervisor_name,
1377 nic_count = len(nics)
1378 for idx, (ip, mac, mode, link, network) in enumerate(nics):
1381 env["INSTANCE_NIC%d_IP" % idx] = ip
1382 env["INSTANCE_NIC%d_MAC" % idx] = mac
1383 env["INSTANCE_NIC%d_MODE" % idx] = mode
1384 env["INSTANCE_NIC%d_LINK" % idx] = link
1385 env["INSTANCE_NIC%d_NETWORK" % idx] = network
1386 if mode == constants.NIC_MODE_BRIDGED:
1387 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1391 env["INSTANCE_NIC_COUNT"] = nic_count
1394 disk_count = len(disks)
1395 for idx, (size, mode) in enumerate(disks):
1396 env["INSTANCE_DISK%d_SIZE" % idx] = size
1397 env["INSTANCE_DISK%d_MODE" % idx] = mode
1401 env["INSTANCE_DISK_COUNT"] = disk_count
1406 env["INSTANCE_TAGS"] = " ".join(tags)
1408 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1409 for key, value in source.items():
1410 env["INSTANCE_%s_%s" % (kind, key)] = value
1415 def _NICListToTuple(lu, nics):
1416 """Build a list of nic information tuples.
1418 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1419 value in LUInstanceQueryData.
1421 @type lu: L{LogicalUnit}
1422 @param lu: the logical unit on whose behalf we execute
1423 @type nics: list of L{objects.NIC}
1424 @param nics: list of nics to convert to hooks tuples
1428 cluster = lu.cfg.GetClusterInfo()
1432 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1433 mode = filled_params[constants.NIC_MODE]
1434 link = filled_params[constants.NIC_LINK]
1435 network = nic.network
1436 hooks_nics.append((ip, mac, mode, link, network))
1440 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1441 """Builds instance related env variables for hooks from an object.
1443 @type lu: L{LogicalUnit}
1444 @param lu: the logical unit on whose behalf we execute
1445 @type instance: L{objects.Instance}
1446 @param instance: the instance for which we should build the
1448 @type override: dict
1449 @param override: dictionary with key/values that will override
1452 @return: the hook environment dictionary
1455 cluster = lu.cfg.GetClusterInfo()
1456 bep = cluster.FillBE(instance)
1457 hvp = cluster.FillHV(instance)
1459 "name": instance.name,
1460 "primary_node": instance.primary_node,
1461 "secondary_nodes": instance.secondary_nodes,
1462 "os_type": instance.os,
1463 "status": instance.admin_state,
1464 "maxmem": bep[constants.BE_MAXMEM],
1465 "minmem": bep[constants.BE_MINMEM],
1466 "vcpus": bep[constants.BE_VCPUS],
1467 "nics": _NICListToTuple(lu, instance.nics),
1468 "disk_template": instance.disk_template,
1469 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1472 "hypervisor_name": instance.hypervisor,
1473 "tags": instance.tags,
1476 args.update(override)
1477 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1480 def _AdjustCandidatePool(lu, exceptions):
1481 """Adjust the candidate pool after node operations.
1484 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1486 lu.LogInfo("Promoted nodes to master candidate role: %s",
1487 utils.CommaJoin(node.name for node in mod_list))
1488 for name in mod_list:
1489 lu.context.ReaddNode(name)
1490 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1492 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1496 def _DecideSelfPromotion(lu, exceptions=None):
1497 """Decide whether I should promote myself as a master candidate.
1500 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1501 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1502 # the new node will increase mc_max with one, so:
1503 mc_should = min(mc_should + 1, cp_size)
1504 return mc_now < mc_should
1507 def _ComputeViolatingInstances(ipolicy, instances):
1508 """Computes a set of instances who violates given ipolicy.
1510 @param ipolicy: The ipolicy to verify
1511 @type instances: object.Instance
1512 @param instances: List of instances to verify
1513 @return: A frozenset of instance names violating the ipolicy
1516 return frozenset([inst.name for inst in instances
1517 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1520 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1521 """Check that the brigdes needed by a list of nics exist.
1524 cluster = lu.cfg.GetClusterInfo()
1525 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1526 brlist = [params[constants.NIC_LINK] for params in paramslist
1527 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1529 result = lu.rpc.call_bridges_exist(target_node, brlist)
1530 result.Raise("Error checking bridges on destination node '%s'" %
1531 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1534 def _CheckInstanceBridgesExist(lu, instance, node=None):
1535 """Check that the brigdes needed by an instance exist.
1539 node = instance.primary_node
1540 _CheckNicsBridgesExist(lu, instance.nics, node)
1543 def _CheckOSVariant(os_obj, name):
1544 """Check whether an OS name conforms to the os variants specification.
1546 @type os_obj: L{objects.OS}
1547 @param os_obj: OS object to check
1549 @param name: OS name passed by the user, to check for validity
1552 variant = objects.OS.GetVariant(name)
1553 if not os_obj.supported_variants:
1555 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1556 " passed)" % (os_obj.name, variant),
1560 raise errors.OpPrereqError("OS name must include a variant",
1563 if variant not in os_obj.supported_variants:
1564 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1567 def _GetNodeInstancesInner(cfg, fn):
1568 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1571 def _GetNodeInstances(cfg, node_name):
1572 """Returns a list of all primary and secondary instances on a node.
1576 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1579 def _GetNodePrimaryInstances(cfg, node_name):
1580 """Returns primary instances on a node.
1583 return _GetNodeInstancesInner(cfg,
1584 lambda inst: node_name == inst.primary_node)
1587 def _GetNodeSecondaryInstances(cfg, node_name):
1588 """Returns secondary instances on a node.
1591 return _GetNodeInstancesInner(cfg,
1592 lambda inst: node_name in inst.secondary_nodes)
1595 def _GetStorageTypeArgs(cfg, storage_type):
1596 """Returns the arguments for a storage type.
1599 # Special case for file storage
1600 if storage_type == constants.ST_FILE:
1601 # storage.FileStorage wants a list of storage directories
1602 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1607 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1610 for dev in instance.disks:
1611 cfg.SetDiskID(dev, node_name)
1613 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1615 result.Raise("Failed to get disk status from node %s" % node_name,
1616 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1618 for idx, bdev_status in enumerate(result.payload):
1619 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1625 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1626 """Check the sanity of iallocator and node arguments and use the
1627 cluster-wide iallocator if appropriate.
1629 Check that at most one of (iallocator, node) is specified. If none is
1630 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1631 then the LU's opcode's iallocator slot is filled with the cluster-wide
1634 @type iallocator_slot: string
1635 @param iallocator_slot: the name of the opcode iallocator slot
1636 @type node_slot: string
1637 @param node_slot: the name of the opcode target node slot
1640 node = getattr(lu.op, node_slot, None)
1641 ialloc = getattr(lu.op, iallocator_slot, None)
1645 if node is not None and ialloc is not None:
1646 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1648 elif ((node is None and ialloc is None) or
1649 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1650 default_iallocator = lu.cfg.GetDefaultIAllocator()
1651 if default_iallocator:
1652 setattr(lu.op, iallocator_slot, default_iallocator)
1654 raise errors.OpPrereqError("No iallocator or node given and no"
1655 " cluster-wide default iallocator found;"
1656 " please specify either an iallocator or a"
1657 " node, or set a cluster-wide default"
1658 " iallocator", errors.ECODE_INVAL)
1661 def _GetDefaultIAllocator(cfg, ialloc):
1662 """Decides on which iallocator to use.
1664 @type cfg: L{config.ConfigWriter}
1665 @param cfg: Cluster configuration object
1666 @type ialloc: string or None
1667 @param ialloc: Iallocator specified in opcode
1669 @return: Iallocator name
1673 # Use default iallocator
1674 ialloc = cfg.GetDefaultIAllocator()
1677 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1678 " opcode nor as a cluster-wide default",
1684 def _CheckHostnameSane(lu, name):
1685 """Ensures that a given hostname resolves to a 'sane' name.
1687 The given name is required to be a prefix of the resolved hostname,
1688 to prevent accidental mismatches.
1690 @param lu: the logical unit on behalf of which we're checking
1691 @param name: the name we should resolve and check
1692 @return: the resolved hostname object
1695 hostname = netutils.GetHostname(name=name)
1696 if hostname.name != name:
1697 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1698 if not utils.MatchNameComponent(name, [hostname.name]):
1699 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1700 " same as given hostname '%s'") %
1701 (hostname.name, name), errors.ECODE_INVAL)
1705 class LUClusterPostInit(LogicalUnit):
1706 """Logical unit for running hooks after cluster initialization.
1709 HPATH = "cluster-init"
1710 HTYPE = constants.HTYPE_CLUSTER
1712 def BuildHooksEnv(self):
1717 "OP_TARGET": self.cfg.GetClusterName(),
1720 def BuildHooksNodes(self):
1721 """Build hooks nodes.
1724 return ([], [self.cfg.GetMasterNode()])
1726 def Exec(self, feedback_fn):
1733 class LUClusterDestroy(LogicalUnit):
1734 """Logical unit for destroying the cluster.
1737 HPATH = "cluster-destroy"
1738 HTYPE = constants.HTYPE_CLUSTER
1740 def BuildHooksEnv(self):
1745 "OP_TARGET": self.cfg.GetClusterName(),
1748 def BuildHooksNodes(self):
1749 """Build hooks nodes.
1754 def CheckPrereq(self):
1755 """Check prerequisites.
1757 This checks whether the cluster is empty.
1759 Any errors are signaled by raising errors.OpPrereqError.
1762 master = self.cfg.GetMasterNode()
1764 nodelist = self.cfg.GetNodeList()
1765 if len(nodelist) != 1 or nodelist[0] != master:
1766 raise errors.OpPrereqError("There are still %d node(s) in"
1767 " this cluster." % (len(nodelist) - 1),
1769 instancelist = self.cfg.GetInstanceList()
1771 raise errors.OpPrereqError("There are still %d instance(s) in"
1772 " this cluster." % len(instancelist),
1775 def Exec(self, feedback_fn):
1776 """Destroys the cluster.
1779 master_params = self.cfg.GetMasterNetworkParameters()
1781 # Run post hooks on master node before it's removed
1782 _RunPostHook(self, master_params.name)
1784 ems = self.cfg.GetUseExternalMipScript()
1785 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1788 self.LogWarning("Error disabling the master IP address: %s",
1791 return master_params.name
1794 def _VerifyCertificate(filename):
1795 """Verifies a certificate for L{LUClusterVerifyConfig}.
1797 @type filename: string
1798 @param filename: Path to PEM file
1802 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1803 utils.ReadFile(filename))
1804 except Exception, err: # pylint: disable=W0703
1805 return (LUClusterVerifyConfig.ETYPE_ERROR,
1806 "Failed to load X509 certificate %s: %s" % (filename, err))
1809 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1810 constants.SSL_CERT_EXPIRATION_ERROR)
1813 fnamemsg = "While verifying %s: %s" % (filename, msg)
1818 return (None, fnamemsg)
1819 elif errcode == utils.CERT_WARNING:
1820 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1821 elif errcode == utils.CERT_ERROR:
1822 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1824 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1827 def _GetAllHypervisorParameters(cluster, instances):
1828 """Compute the set of all hypervisor parameters.
1830 @type cluster: L{objects.Cluster}
1831 @param cluster: the cluster object
1832 @param instances: list of L{objects.Instance}
1833 @param instances: additional instances from which to obtain parameters
1834 @rtype: list of (origin, hypervisor, parameters)
1835 @return: a list with all parameters found, indicating the hypervisor they
1836 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1841 for hv_name in cluster.enabled_hypervisors:
1842 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1844 for os_name, os_hvp in cluster.os_hvp.items():
1845 for hv_name, hv_params in os_hvp.items():
1847 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1848 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1850 # TODO: collapse identical parameter values in a single one
1851 for instance in instances:
1852 if instance.hvparams:
1853 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1854 cluster.FillHV(instance)))
1859 class _VerifyErrors(object):
1860 """Mix-in for cluster/group verify LUs.
1862 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1863 self.op and self._feedback_fn to be available.)
1867 ETYPE_FIELD = "code"
1868 ETYPE_ERROR = "ERROR"
1869 ETYPE_WARNING = "WARNING"
1871 def _Error(self, ecode, item, msg, *args, **kwargs):
1872 """Format an error message.
1874 Based on the opcode's error_codes parameter, either format a
1875 parseable error code, or a simpler error string.
1877 This must be called only from Exec and functions called from Exec.
1880 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1881 itype, etxt, _ = ecode
1882 # first complete the msg
1885 # then format the whole message
1886 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1887 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1893 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1894 # and finally report it via the feedback_fn
1895 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1897 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1898 """Log an error message if the passed condition is True.
1902 or self.op.debug_simulate_errors) # pylint: disable=E1101
1904 # If the error code is in the list of ignored errors, demote the error to a
1906 (_, etxt, _) = ecode
1907 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1908 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1911 self._Error(ecode, *args, **kwargs)
1913 # do not mark the operation as failed for WARN cases only
1914 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1915 self.bad = self.bad or cond
1918 class LUClusterVerify(NoHooksLU):
1919 """Submits all jobs necessary to verify the cluster.
1924 def ExpandNames(self):
1925 self.needed_locks = {}
1927 def Exec(self, feedback_fn):
1930 if self.op.group_name:
1931 groups = [self.op.group_name]
1932 depends_fn = lambda: None
1934 groups = self.cfg.GetNodeGroupList()
1936 # Verify global configuration
1938 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1941 # Always depend on global verification
1942 depends_fn = lambda: [(-len(jobs), [])]
1945 [opcodes.OpClusterVerifyGroup(group_name=group,
1946 ignore_errors=self.op.ignore_errors,
1947 depends=depends_fn())]
1948 for group in groups)
1950 # Fix up all parameters
1951 for op in itertools.chain(*jobs): # pylint: disable=W0142
1952 op.debug_simulate_errors = self.op.debug_simulate_errors
1953 op.verbose = self.op.verbose
1954 op.error_codes = self.op.error_codes
1956 op.skip_checks = self.op.skip_checks
1957 except AttributeError:
1958 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1960 return ResultWithJobs(jobs)
1963 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1964 """Verifies the cluster config.
1969 def _VerifyHVP(self, hvp_data):
1970 """Verifies locally the syntax of the hypervisor parameters.
1973 for item, hv_name, hv_params in hvp_data:
1974 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1977 hv_class = hypervisor.GetHypervisor(hv_name)
1978 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1979 hv_class.CheckParameterSyntax(hv_params)
1980 except errors.GenericError, err:
1981 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1983 def ExpandNames(self):
1984 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1985 self.share_locks = _ShareAll()
1987 def CheckPrereq(self):
1988 """Check prerequisites.
1991 # Retrieve all information
1992 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1993 self.all_node_info = self.cfg.GetAllNodesInfo()
1994 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1996 def Exec(self, feedback_fn):
1997 """Verify integrity of cluster, performing various test on nodes.
2001 self._feedback_fn = feedback_fn
2003 feedback_fn("* Verifying cluster config")
2005 for msg in self.cfg.VerifyConfig():
2006 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2008 feedback_fn("* Verifying cluster certificate files")
2010 for cert_filename in pathutils.ALL_CERT_FILES:
2011 (errcode, msg) = _VerifyCertificate(cert_filename)
2012 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2014 feedback_fn("* Verifying hypervisor parameters")
2016 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2017 self.all_inst_info.values()))
2019 feedback_fn("* Verifying all nodes belong to an existing group")
2021 # We do this verification here because, should this bogus circumstance
2022 # occur, it would never be caught by VerifyGroup, which only acts on
2023 # nodes/instances reachable from existing node groups.
2025 dangling_nodes = set(node.name for node in self.all_node_info.values()
2026 if node.group not in self.all_group_info)
2028 dangling_instances = {}
2029 no_node_instances = []
2031 for inst in self.all_inst_info.values():
2032 if inst.primary_node in dangling_nodes:
2033 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2034 elif inst.primary_node not in self.all_node_info:
2035 no_node_instances.append(inst.name)
2040 utils.CommaJoin(dangling_instances.get(node.name,
2042 for node in dangling_nodes]
2044 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2046 "the following nodes (and their instances) belong to a non"
2047 " existing group: %s", utils.CommaJoin(pretty_dangling))
2049 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2051 "the following instances have a non-existing primary-node:"
2052 " %s", utils.CommaJoin(no_node_instances))
2057 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2058 """Verifies the status of a node group.
2061 HPATH = "cluster-verify"
2062 HTYPE = constants.HTYPE_CLUSTER
2065 _HOOKS_INDENT_RE = re.compile("^", re.M)
2067 class NodeImage(object):
2068 """A class representing the logical and physical status of a node.
2071 @ivar name: the node name to which this object refers
2072 @ivar volumes: a structure as returned from
2073 L{ganeti.backend.GetVolumeList} (runtime)
2074 @ivar instances: a list of running instances (runtime)
2075 @ivar pinst: list of configured primary instances (config)
2076 @ivar sinst: list of configured secondary instances (config)
2077 @ivar sbp: dictionary of {primary-node: list of instances} for all
2078 instances for which this node is secondary (config)
2079 @ivar mfree: free memory, as reported by hypervisor (runtime)
2080 @ivar dfree: free disk, as reported by the node (runtime)
2081 @ivar offline: the offline status (config)
2082 @type rpc_fail: boolean
2083 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2084 not whether the individual keys were correct) (runtime)
2085 @type lvm_fail: boolean
2086 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2087 @type hyp_fail: boolean
2088 @ivar hyp_fail: whether the RPC call didn't return the instance list
2089 @type ghost: boolean
2090 @ivar ghost: whether this is a known node or not (config)
2091 @type os_fail: boolean
2092 @ivar os_fail: whether the RPC call didn't return valid OS data
2094 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2095 @type vm_capable: boolean
2096 @ivar vm_capable: whether the node can host instances
2099 def __init__(self, offline=False, name=None, vm_capable=True):
2108 self.offline = offline
2109 self.vm_capable = vm_capable
2110 self.rpc_fail = False
2111 self.lvm_fail = False
2112 self.hyp_fail = False
2114 self.os_fail = False
2117 def ExpandNames(self):
2118 # This raises errors.OpPrereqError on its own:
2119 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2121 # Get instances in node group; this is unsafe and needs verification later
2123 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2125 self.needed_locks = {
2126 locking.LEVEL_INSTANCE: inst_names,
2127 locking.LEVEL_NODEGROUP: [self.group_uuid],
2128 locking.LEVEL_NODE: [],
2131 self.share_locks = _ShareAll()
2133 def DeclareLocks(self, level):
2134 if level == locking.LEVEL_NODE:
2135 # Get members of node group; this is unsafe and needs verification later
2136 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2138 all_inst_info = self.cfg.GetAllInstancesInfo()
2140 # In Exec(), we warn about mirrored instances that have primary and
2141 # secondary living in separate node groups. To fully verify that
2142 # volumes for these instances are healthy, we will need to do an
2143 # extra call to their secondaries. We ensure here those nodes will
2145 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2146 # Important: access only the instances whose lock is owned
2147 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2148 nodes.update(all_inst_info[inst].secondary_nodes)
2150 self.needed_locks[locking.LEVEL_NODE] = nodes
2152 def CheckPrereq(self):
2153 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2154 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2156 group_nodes = set(self.group_info.members)
2158 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2161 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2163 unlocked_instances = \
2164 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2167 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2168 utils.CommaJoin(unlocked_nodes),
2171 if unlocked_instances:
2172 raise errors.OpPrereqError("Missing lock for instances: %s" %
2173 utils.CommaJoin(unlocked_instances),
2176 self.all_node_info = self.cfg.GetAllNodesInfo()
2177 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2179 self.my_node_names = utils.NiceSort(group_nodes)
2180 self.my_inst_names = utils.NiceSort(group_instances)
2182 self.my_node_info = dict((name, self.all_node_info[name])
2183 for name in self.my_node_names)
2185 self.my_inst_info = dict((name, self.all_inst_info[name])
2186 for name in self.my_inst_names)
2188 # We detect here the nodes that will need the extra RPC calls for verifying
2189 # split LV volumes; they should be locked.
2190 extra_lv_nodes = set()
2192 for inst in self.my_inst_info.values():
2193 if inst.disk_template in constants.DTS_INT_MIRROR:
2194 for nname in inst.all_nodes:
2195 if self.all_node_info[nname].group != self.group_uuid:
2196 extra_lv_nodes.add(nname)
2198 unlocked_lv_nodes = \
2199 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2201 if unlocked_lv_nodes:
2202 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2203 utils.CommaJoin(unlocked_lv_nodes),
2205 self.extra_lv_nodes = list(extra_lv_nodes)
2207 def _VerifyNode(self, ninfo, nresult):
2208 """Perform some basic validation on data returned from a node.
2210 - check the result data structure is well formed and has all the
2212 - check ganeti version
2214 @type ninfo: L{objects.Node}
2215 @param ninfo: the node to check
2216 @param nresult: the results from the node
2218 @return: whether overall this call was successful (and we can expect
2219 reasonable values in the respose)
2223 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2225 # main result, nresult should be a non-empty dict
2226 test = not nresult or not isinstance(nresult, dict)
2227 _ErrorIf(test, constants.CV_ENODERPC, node,
2228 "unable to verify node: no data returned")
2232 # compares ganeti version
2233 local_version = constants.PROTOCOL_VERSION
2234 remote_version = nresult.get("version", None)
2235 test = not (remote_version and
2236 isinstance(remote_version, (list, tuple)) and
2237 len(remote_version) == 2)
2238 _ErrorIf(test, constants.CV_ENODERPC, node,
2239 "connection to node returned invalid data")
2243 test = local_version != remote_version[0]
2244 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2245 "incompatible protocol versions: master %s,"
2246 " node %s", local_version, remote_version[0])
2250 # node seems compatible, we can actually try to look into its results
2252 # full package version
2253 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2254 constants.CV_ENODEVERSION, node,
2255 "software version mismatch: master %s, node %s",
2256 constants.RELEASE_VERSION, remote_version[1],
2257 code=self.ETYPE_WARNING)
2259 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2260 if ninfo.vm_capable and isinstance(hyp_result, dict):
2261 for hv_name, hv_result in hyp_result.iteritems():
2262 test = hv_result is not None
2263 _ErrorIf(test, constants.CV_ENODEHV, node,
2264 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2266 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2267 if ninfo.vm_capable and isinstance(hvp_result, list):
2268 for item, hv_name, hv_result in hvp_result:
2269 _ErrorIf(True, constants.CV_ENODEHV, node,
2270 "hypervisor %s parameter verify failure (source %s): %s",
2271 hv_name, item, hv_result)
2273 test = nresult.get(constants.NV_NODESETUP,
2274 ["Missing NODESETUP results"])
2275 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2280 def _VerifyNodeTime(self, ninfo, nresult,
2281 nvinfo_starttime, nvinfo_endtime):
2282 """Check the node time.
2284 @type ninfo: L{objects.Node}
2285 @param ninfo: the node to check
2286 @param nresult: the remote results for the node
2287 @param nvinfo_starttime: the start time of the RPC call
2288 @param nvinfo_endtime: the end time of the RPC call
2292 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2294 ntime = nresult.get(constants.NV_TIME, None)
2296 ntime_merged = utils.MergeTime(ntime)
2297 except (ValueError, TypeError):
2298 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2301 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2302 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2303 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2304 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2308 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2309 "Node time diverges by at least %s from master node time",
2312 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2313 """Check the node LVM results.
2315 @type ninfo: L{objects.Node}
2316 @param ninfo: the node to check
2317 @param nresult: the remote results for the node
2318 @param vg_name: the configured VG name
2325 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2327 # checks vg existence and size > 20G
2328 vglist = nresult.get(constants.NV_VGLIST, None)
2330 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2332 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2333 constants.MIN_VG_SIZE)
2334 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2337 pvlist = nresult.get(constants.NV_PVLIST, None)
2338 test = pvlist is None
2339 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2341 # check that ':' is not present in PV names, since it's a
2342 # special character for lvcreate (denotes the range of PEs to
2344 for _, pvname, owner_vg in pvlist:
2345 test = ":" in pvname
2346 _ErrorIf(test, constants.CV_ENODELVM, node,
2347 "Invalid character ':' in PV '%s' of VG '%s'",
2350 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2351 """Check the node bridges.
2353 @type ninfo: L{objects.Node}
2354 @param ninfo: the node to check
2355 @param nresult: the remote results for the node
2356 @param bridges: the expected list of bridges
2363 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2365 missing = nresult.get(constants.NV_BRIDGES, None)
2366 test = not isinstance(missing, list)
2367 _ErrorIf(test, constants.CV_ENODENET, node,
2368 "did not return valid bridge information")
2370 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2371 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2373 def _VerifyNodeUserScripts(self, ninfo, nresult):
2374 """Check the results of user scripts presence and executability on the node
2376 @type ninfo: L{objects.Node}
2377 @param ninfo: the node to check
2378 @param nresult: the remote results for the node
2383 test = not constants.NV_USERSCRIPTS in nresult
2384 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2385 "did not return user scripts information")
2387 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2389 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2390 "user scripts not present or not executable: %s" %
2391 utils.CommaJoin(sorted(broken_scripts)))
2393 def _VerifyNodeNetwork(self, ninfo, nresult):
2394 """Check the node network connectivity results.
2396 @type ninfo: L{objects.Node}
2397 @param ninfo: the node to check
2398 @param nresult: the remote results for the node
2402 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2404 test = constants.NV_NODELIST not in nresult
2405 _ErrorIf(test, constants.CV_ENODESSH, node,
2406 "node hasn't returned node ssh connectivity data")
2408 if nresult[constants.NV_NODELIST]:
2409 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2410 _ErrorIf(True, constants.CV_ENODESSH, node,
2411 "ssh communication with node '%s': %s", a_node, a_msg)
2413 test = constants.NV_NODENETTEST not in nresult
2414 _ErrorIf(test, constants.CV_ENODENET, node,
2415 "node hasn't returned node tcp connectivity data")
2417 if nresult[constants.NV_NODENETTEST]:
2418 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2420 _ErrorIf(True, constants.CV_ENODENET, node,
2421 "tcp communication with node '%s': %s",
2422 anode, nresult[constants.NV_NODENETTEST][anode])
2424 test = constants.NV_MASTERIP not in nresult
2425 _ErrorIf(test, constants.CV_ENODENET, node,
2426 "node hasn't returned node master IP reachability data")
2428 if not nresult[constants.NV_MASTERIP]:
2429 if node == self.master_node:
2430 msg = "the master node cannot reach the master IP (not configured?)"
2432 msg = "cannot reach the master IP"
2433 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2435 def _VerifyInstance(self, instance, instanceconfig, node_image,
2437 """Verify an instance.
2439 This function checks to see if the required block devices are
2440 available on the instance's node.
2443 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444 node_current = instanceconfig.primary_node
2446 node_vol_should = {}
2447 instanceconfig.MapLVsByNode(node_vol_should)
2449 cluster = self.cfg.GetClusterInfo()
2450 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2452 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2453 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2455 for node in node_vol_should:
2456 n_img = node_image[node]
2457 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2458 # ignore missing volumes on offline or broken nodes
2460 for volume in node_vol_should[node]:
2461 test = volume not in n_img.volumes
2462 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2463 "volume %s missing on node %s", volume, node)
2465 if instanceconfig.admin_state == constants.ADMINST_UP:
2466 pri_img = node_image[node_current]
2467 test = instance not in pri_img.instances and not pri_img.offline
2468 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2469 "instance not running on its primary node %s",
2472 diskdata = [(nname, success, status, idx)
2473 for (nname, disks) in diskstatus.items()
2474 for idx, (success, status) in enumerate(disks)]
2476 for nname, success, bdev_status, idx in diskdata:
2477 # the 'ghost node' construction in Exec() ensures that we have a
2479 snode = node_image[nname]
2480 bad_snode = snode.ghost or snode.offline
2481 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2482 not success and not bad_snode,
2483 constants.CV_EINSTANCEFAULTYDISK, instance,
2484 "couldn't retrieve status for disk/%s on %s: %s",
2485 idx, nname, bdev_status)
2486 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2487 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2488 constants.CV_EINSTANCEFAULTYDISK, instance,
2489 "disk/%s on %s is faulty", idx, nname)
2491 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2492 """Verify if there are any unknown volumes in the cluster.
2494 The .os, .swap and backup volumes are ignored. All other volumes are
2495 reported as unknown.
2497 @type reserved: L{ganeti.utils.FieldSet}
2498 @param reserved: a FieldSet of reserved volume names
2501 for node, n_img in node_image.items():
2502 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2503 self.all_node_info[node].group != self.group_uuid):
2504 # skip non-healthy nodes
2506 for volume in n_img.volumes:
2507 test = ((node not in node_vol_should or
2508 volume not in node_vol_should[node]) and
2509 not reserved.Matches(volume))
2510 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2511 "volume %s is unknown", volume)
2513 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2514 """Verify N+1 Memory Resilience.
2516 Check that if one single node dies we can still start all the
2517 instances it was primary for.
2520 cluster_info = self.cfg.GetClusterInfo()
2521 for node, n_img in node_image.items():
2522 # This code checks that every node which is now listed as
2523 # secondary has enough memory to host all instances it is
2524 # supposed to should a single other node in the cluster fail.
2525 # FIXME: not ready for failover to an arbitrary node
2526 # FIXME: does not support file-backed instances
2527 # WARNING: we currently take into account down instances as well
2528 # as up ones, considering that even if they're down someone
2529 # might want to start them even in the event of a node failure.
2530 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2531 # we're skipping nodes marked offline and nodes in other groups from
2532 # the N+1 warning, since most likely we don't have good memory
2533 # infromation from them; we already list instances living on such
2534 # nodes, and that's enough warning
2536 #TODO(dynmem): also consider ballooning out other instances
2537 for prinode, instances in n_img.sbp.items():
2539 for instance in instances:
2540 bep = cluster_info.FillBE(instance_cfg[instance])
2541 if bep[constants.BE_AUTO_BALANCE]:
2542 needed_mem += bep[constants.BE_MINMEM]
2543 test = n_img.mfree < needed_mem
2544 self._ErrorIf(test, constants.CV_ENODEN1, node,
2545 "not enough memory to accomodate instance failovers"
2546 " should node %s fail (%dMiB needed, %dMiB available)",
2547 prinode, needed_mem, n_img.mfree)
2550 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2551 (files_all, files_opt, files_mc, files_vm)):
2552 """Verifies file checksums collected from all nodes.
2554 @param errorif: Callback for reporting errors
2555 @param nodeinfo: List of L{objects.Node} objects
2556 @param master_node: Name of master node
2557 @param all_nvinfo: RPC results
2560 # Define functions determining which nodes to consider for a file
2563 (files_mc, lambda node: (node.master_candidate or
2564 node.name == master_node)),
2565 (files_vm, lambda node: node.vm_capable),
2568 # Build mapping from filename to list of nodes which should have the file
2570 for (files, fn) in files2nodefn:
2572 filenodes = nodeinfo
2574 filenodes = filter(fn, nodeinfo)
2575 nodefiles.update((filename,
2576 frozenset(map(operator.attrgetter("name"), filenodes)))
2577 for filename in files)
2579 assert set(nodefiles) == (files_all | files_mc | files_vm)
2581 fileinfo = dict((filename, {}) for filename in nodefiles)
2582 ignore_nodes = set()
2584 for node in nodeinfo:
2586 ignore_nodes.add(node.name)
2589 nresult = all_nvinfo[node.name]
2591 if nresult.fail_msg or not nresult.payload:
2594 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2595 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2596 for (key, value) in fingerprints.items())
2599 test = not (node_files and isinstance(node_files, dict))
2600 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2601 "Node did not return file checksum data")
2603 ignore_nodes.add(node.name)
2606 # Build per-checksum mapping from filename to nodes having it
2607 for (filename, checksum) in node_files.items():
2608 assert filename in nodefiles
2609 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2611 for (filename, checksums) in fileinfo.items():
2612 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2614 # Nodes having the file
2615 with_file = frozenset(node_name
2616 for nodes in fileinfo[filename].values()
2617 for node_name in nodes) - ignore_nodes
2619 expected_nodes = nodefiles[filename] - ignore_nodes
2621 # Nodes missing file
2622 missing_file = expected_nodes - with_file
2624 if filename in files_opt:
2626 errorif(missing_file and missing_file != expected_nodes,
2627 constants.CV_ECLUSTERFILECHECK, None,
2628 "File %s is optional, but it must exist on all or no"
2629 " nodes (not found on %s)",
2630 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2632 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2633 "File %s is missing from node(s) %s", filename,
2634 utils.CommaJoin(utils.NiceSort(missing_file)))
2636 # Warn if a node has a file it shouldn't
2637 unexpected = with_file - expected_nodes
2639 constants.CV_ECLUSTERFILECHECK, None,
2640 "File %s should not exist on node(s) %s",
2641 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2643 # See if there are multiple versions of the file
2644 test = len(checksums) > 1
2646 variants = ["variant %s on %s" %
2647 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2648 for (idx, (checksum, nodes)) in
2649 enumerate(sorted(checksums.items()))]
2653 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2654 "File %s found with %s different checksums (%s)",
2655 filename, len(checksums), "; ".join(variants))
2657 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2659 """Verifies and the node DRBD status.
2661 @type ninfo: L{objects.Node}
2662 @param ninfo: the node to check
2663 @param nresult: the remote results for the node
2664 @param instanceinfo: the dict of instances
2665 @param drbd_helper: the configured DRBD usermode helper
2666 @param drbd_map: the DRBD map as returned by
2667 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2671 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2674 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2675 test = (helper_result is None)
2676 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2677 "no drbd usermode helper returned")
2679 status, payload = helper_result
2681 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2682 "drbd usermode helper check unsuccessful: %s", payload)
2683 test = status and (payload != drbd_helper)
2684 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2685 "wrong drbd usermode helper: %s", payload)
2687 # compute the DRBD minors
2689 for minor, instance in drbd_map[node].items():
2690 test = instance not in instanceinfo
2691 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2692 "ghost instance '%s' in temporary DRBD map", instance)
2693 # ghost instance should not be running, but otherwise we
2694 # don't give double warnings (both ghost instance and
2695 # unallocated minor in use)
2697 node_drbd[minor] = (instance, False)
2699 instance = instanceinfo[instance]
2700 node_drbd[minor] = (instance.name,
2701 instance.admin_state == constants.ADMINST_UP)
2703 # and now check them
2704 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2705 test = not isinstance(used_minors, (tuple, list))
2706 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2707 "cannot parse drbd status file: %s", str(used_minors))
2709 # we cannot check drbd status
2712 for minor, (iname, must_exist) in node_drbd.items():
2713 test = minor not in used_minors and must_exist
2714 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2715 "drbd minor %d of instance %s is not active", minor, iname)
2716 for minor in used_minors:
2717 test = minor not in node_drbd
2718 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2719 "unallocated drbd minor %d is in use", minor)
2721 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2722 """Builds the node OS structures.
2724 @type ninfo: L{objects.Node}
2725 @param ninfo: the node to check
2726 @param nresult: the remote results for the node
2727 @param nimg: the node image object
2731 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2733 remote_os = nresult.get(constants.NV_OSLIST, None)
2734 test = (not isinstance(remote_os, list) or
2735 not compat.all(isinstance(v, list) and len(v) == 7
2736 for v in remote_os))
2738 _ErrorIf(test, constants.CV_ENODEOS, node,
2739 "node hasn't returned valid OS data")
2748 for (name, os_path, status, diagnose,
2749 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2751 if name not in os_dict:
2754 # parameters is a list of lists instead of list of tuples due to
2755 # JSON lacking a real tuple type, fix it:
2756 parameters = [tuple(v) for v in parameters]
2757 os_dict[name].append((os_path, status, diagnose,
2758 set(variants), set(parameters), set(api_ver)))
2760 nimg.oslist = os_dict
2762 def _VerifyNodeOS(self, ninfo, nimg, base):
2763 """Verifies the node OS list.
2765 @type ninfo: L{objects.Node}
2766 @param ninfo: the node to check
2767 @param nimg: the node image object
2768 @param base: the 'template' node we match against (e.g. from the master)
2772 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2774 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2776 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2777 for os_name, os_data in nimg.oslist.items():
2778 assert os_data, "Empty OS status for OS %s?!" % os_name
2779 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2780 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2781 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2782 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2783 "OS '%s' has multiple entries (first one shadows the rest): %s",
2784 os_name, utils.CommaJoin([v[0] for v in os_data]))
2785 # comparisons with the 'base' image
2786 test = os_name not in base.oslist
2787 _ErrorIf(test, constants.CV_ENODEOS, node,
2788 "Extra OS %s not present on reference node (%s)",
2792 assert base.oslist[os_name], "Base node has empty OS status?"
2793 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2795 # base OS is invalid, skipping
2797 for kind, a, b in [("API version", f_api, b_api),
2798 ("variants list", f_var, b_var),
2799 ("parameters", beautify_params(f_param),
2800 beautify_params(b_param))]:
2801 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2802 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2803 kind, os_name, base.name,
2804 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2806 # check any missing OSes
2807 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2808 _ErrorIf(missing, constants.CV_ENODEOS, node,
2809 "OSes present on reference node %s but missing on this node: %s",
2810 base.name, utils.CommaJoin(missing))
2812 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2813 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2815 @type ninfo: L{objects.Node}
2816 @param ninfo: the node to check
2817 @param nresult: the remote results for the node
2818 @type is_master: bool
2819 @param is_master: Whether node is the master node
2825 (constants.ENABLE_FILE_STORAGE or
2826 constants.ENABLE_SHARED_FILE_STORAGE)):
2828 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2830 # This should never happen
2831 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2832 "Node did not return forbidden file storage paths")
2834 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2835 "Found forbidden file storage paths: %s",
2836 utils.CommaJoin(fspaths))
2838 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2839 constants.CV_ENODEFILESTORAGEPATHS, node,
2840 "Node should not have returned forbidden file storage"
2843 def _VerifyOob(self, ninfo, nresult):
2844 """Verifies out of band functionality of a node.
2846 @type ninfo: L{objects.Node}
2847 @param ninfo: the node to check
2848 @param nresult: the remote results for the node
2852 # We just have to verify the paths on master and/or master candidates
2853 # as the oob helper is invoked on the master
2854 if ((ninfo.master_candidate or ninfo.master_capable) and
2855 constants.NV_OOB_PATHS in nresult):
2856 for path_result in nresult[constants.NV_OOB_PATHS]:
2857 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2859 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2860 """Verifies and updates the node volume data.
2862 This function will update a L{NodeImage}'s internal structures
2863 with data from the remote call.
2865 @type ninfo: L{objects.Node}
2866 @param ninfo: the node to check
2867 @param nresult: the remote results for the node
2868 @param nimg: the node image object
2869 @param vg_name: the configured VG name
2873 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2875 nimg.lvm_fail = True
2876 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2879 elif isinstance(lvdata, basestring):
2880 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2881 utils.SafeEncode(lvdata))
2882 elif not isinstance(lvdata, dict):
2883 _ErrorIf(True, constants.CV_ENODELVM, node,
2884 "rpc call to node failed (lvlist)")
2886 nimg.volumes = lvdata
2887 nimg.lvm_fail = False
2889 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2890 """Verifies and updates the node instance list.
2892 If the listing was successful, then updates this node's instance
2893 list. Otherwise, it marks the RPC call as failed for the instance
2896 @type ninfo: L{objects.Node}
2897 @param ninfo: the node to check
2898 @param nresult: the remote results for the node
2899 @param nimg: the node image object
2902 idata = nresult.get(constants.NV_INSTANCELIST, None)
2903 test = not isinstance(idata, list)
2904 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2905 "rpc call to node failed (instancelist): %s",
2906 utils.SafeEncode(str(idata)))
2908 nimg.hyp_fail = True
2910 nimg.instances = idata
2912 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2913 """Verifies and computes a node information map
2915 @type ninfo: L{objects.Node}
2916 @param ninfo: the node to check
2917 @param nresult: the remote results for the node
2918 @param nimg: the node image object
2919 @param vg_name: the configured VG name
2923 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2925 # try to read free memory (from the hypervisor)
2926 hv_info = nresult.get(constants.NV_HVINFO, None)
2927 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2928 _ErrorIf(test, constants.CV_ENODEHV, node,
2929 "rpc call to node failed (hvinfo)")
2932 nimg.mfree = int(hv_info["memory_free"])
2933 except (ValueError, TypeError):
2934 _ErrorIf(True, constants.CV_ENODERPC, node,
2935 "node returned invalid nodeinfo, check hypervisor")
2937 # FIXME: devise a free space model for file based instances as well
2938 if vg_name is not None:
2939 test = (constants.NV_VGLIST not in nresult or
2940 vg_name not in nresult[constants.NV_VGLIST])
2941 _ErrorIf(test, constants.CV_ENODELVM, node,
2942 "node didn't return data for the volume group '%s'"
2943 " - it is either missing or broken", vg_name)
2946 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2947 except (ValueError, TypeError):
2948 _ErrorIf(True, constants.CV_ENODERPC, node,
2949 "node returned invalid LVM info, check LVM status")
2951 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2952 """Gets per-disk status information for all instances.
2954 @type nodelist: list of strings
2955 @param nodelist: Node names
2956 @type node_image: dict of (name, L{objects.Node})
2957 @param node_image: Node objects
2958 @type instanceinfo: dict of (name, L{objects.Instance})
2959 @param instanceinfo: Instance objects
2960 @rtype: {instance: {node: [(succes, payload)]}}
2961 @return: a dictionary of per-instance dictionaries with nodes as
2962 keys and disk information as values; the disk information is a
2963 list of tuples (success, payload)
2966 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2969 node_disks_devonly = {}
2970 diskless_instances = set()
2971 diskless = constants.DT_DISKLESS
2973 for nname in nodelist:
2974 node_instances = list(itertools.chain(node_image[nname].pinst,
2975 node_image[nname].sinst))
2976 diskless_instances.update(inst for inst in node_instances
2977 if instanceinfo[inst].disk_template == diskless)
2978 disks = [(inst, disk)
2979 for inst in node_instances
2980 for disk in instanceinfo[inst].disks]
2983 # No need to collect data
2986 node_disks[nname] = disks
2988 # _AnnotateDiskParams makes already copies of the disks
2990 for (inst, dev) in disks:
2991 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2992 self.cfg.SetDiskID(anno_disk, nname)
2993 devonly.append(anno_disk)
2995 node_disks_devonly[nname] = devonly
2997 assert len(node_disks) == len(node_disks_devonly)
2999 # Collect data from all nodes with disks
3000 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3003 assert len(result) == len(node_disks)
3007 for (nname, nres) in result.items():
3008 disks = node_disks[nname]
3011 # No data from this node
3012 data = len(disks) * [(False, "node offline")]
3015 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3016 "while getting disk information: %s", msg)
3018 # No data from this node
3019 data = len(disks) * [(False, msg)]
3022 for idx, i in enumerate(nres.payload):
3023 if isinstance(i, (tuple, list)) and len(i) == 2:
3026 logging.warning("Invalid result from node %s, entry %d: %s",
3028 data.append((False, "Invalid result from the remote node"))
3030 for ((inst, _), status) in zip(disks, data):
3031 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3033 # Add empty entries for diskless instances.
3034 for inst in diskless_instances:
3035 assert inst not in instdisk
3038 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3039 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3040 compat.all(isinstance(s, (tuple, list)) and
3041 len(s) == 2 for s in statuses)
3042 for inst, nnames in instdisk.items()
3043 for nname, statuses in nnames.items())
3044 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3049 def _SshNodeSelector(group_uuid, all_nodes):
3050 """Create endless iterators for all potential SSH check hosts.
3053 nodes = [node for node in all_nodes
3054 if (node.group != group_uuid and
3056 keyfunc = operator.attrgetter("group")
3058 return map(itertools.cycle,
3059 [sorted(map(operator.attrgetter("name"), names))
3060 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3064 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3065 """Choose which nodes should talk to which other nodes.
3067 We will make nodes contact all nodes in their group, and one node from
3070 @warning: This algorithm has a known issue if one node group is much
3071 smaller than others (e.g. just one node). In such a case all other
3072 nodes will talk to the single node.
3075 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3076 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3078 return (online_nodes,
3079 dict((name, sorted([i.next() for i in sel]))
3080 for name in online_nodes))
3082 def BuildHooksEnv(self):
3085 Cluster-Verify hooks just ran in the post phase and their failure makes
3086 the output be logged in the verify output and the verification to fail.
3090 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3093 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3094 for node in self.my_node_info.values())
3098 def BuildHooksNodes(self):
3099 """Build hooks nodes.
3102 return ([], self.my_node_names)
3104 def Exec(self, feedback_fn):
3105 """Verify integrity of the node group, performing various test on nodes.
3108 # This method has too many local variables. pylint: disable=R0914
3109 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3111 if not self.my_node_names:
3113 feedback_fn("* Empty node group, skipping verification")
3117 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3118 verbose = self.op.verbose
3119 self._feedback_fn = feedback_fn
3121 vg_name = self.cfg.GetVGName()
3122 drbd_helper = self.cfg.GetDRBDHelper()
3123 cluster = self.cfg.GetClusterInfo()
3124 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3125 hypervisors = cluster.enabled_hypervisors
3126 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3128 i_non_redundant = [] # Non redundant instances
3129 i_non_a_balanced = [] # Non auto-balanced instances
3130 i_offline = 0 # Count of offline instances
3131 n_offline = 0 # Count of offline nodes
3132 n_drained = 0 # Count of nodes being drained
3133 node_vol_should = {}
3135 # FIXME: verify OS list
3138 filemap = _ComputeAncillaryFiles(cluster, False)
3140 # do local checksums
3141 master_node = self.master_node = self.cfg.GetMasterNode()
3142 master_ip = self.cfg.GetMasterIP()
3144 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3147 if self.cfg.GetUseExternalMipScript():
3148 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3150 node_verify_param = {
3151 constants.NV_FILELIST:
3152 map(vcluster.MakeVirtualPath,
3153 utils.UniqueSequence(filename
3154 for files in filemap
3155 for filename in files)),
3156 constants.NV_NODELIST:
3157 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3158 self.all_node_info.values()),
3159 constants.NV_HYPERVISOR: hypervisors,
3160 constants.NV_HVPARAMS:
3161 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3162 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3163 for node in node_data_list
3164 if not node.offline],
3165 constants.NV_INSTANCELIST: hypervisors,
3166 constants.NV_VERSION: None,
3167 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3168 constants.NV_NODESETUP: None,
3169 constants.NV_TIME: None,
3170 constants.NV_MASTERIP: (master_node, master_ip),
3171 constants.NV_OSLIST: None,
3172 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3173 constants.NV_USERSCRIPTS: user_scripts,
3176 if vg_name is not None:
3177 node_verify_param[constants.NV_VGLIST] = None
3178 node_verify_param[constants.NV_LVLIST] = vg_name
3179 node_verify_param[constants.NV_PVLIST] = [vg_name]
3182 node_verify_param[constants.NV_DRBDLIST] = None
3183 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3185 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3186 # Load file storage paths only from master node
3187 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3190 # FIXME: this needs to be changed per node-group, not cluster-wide
3192 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3193 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3194 bridges.add(default_nicpp[constants.NIC_LINK])
3195 for instance in self.my_inst_info.values():
3196 for nic in instance.nics:
3197 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3198 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3199 bridges.add(full_nic[constants.NIC_LINK])
3202 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3204 # Build our expected cluster state
3205 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3207 vm_capable=node.vm_capable))
3208 for node in node_data_list)
3212 for node in self.all_node_info.values():
3213 path = _SupportsOob(self.cfg, node)
3214 if path and path not in oob_paths:
3215 oob_paths.append(path)
3218 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3220 for instance in self.my_inst_names:
3221 inst_config = self.my_inst_info[instance]
3222 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3225 for nname in inst_config.all_nodes:
3226 if nname not in node_image:
3227 gnode = self.NodeImage(name=nname)
3228 gnode.ghost = (nname not in self.all_node_info)
3229 node_image[nname] = gnode
3231 inst_config.MapLVsByNode(node_vol_should)
3233 pnode = inst_config.primary_node
3234 node_image[pnode].pinst.append(instance)
3236 for snode in inst_config.secondary_nodes:
3237 nimg = node_image[snode]
3238 nimg.sinst.append(instance)
3239 if pnode not in nimg.sbp:
3240 nimg.sbp[pnode] = []
3241 nimg.sbp[pnode].append(instance)
3243 # At this point, we have the in-memory data structures complete,
3244 # except for the runtime information, which we'll gather next
3246 # Due to the way our RPC system works, exact response times cannot be
3247 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3248 # time before and after executing the request, we can at least have a time
3250 nvinfo_starttime = time.time()
3251 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3253 self.cfg.GetClusterName())
3254 nvinfo_endtime = time.time()
3256 if self.extra_lv_nodes and vg_name is not None:
3258 self.rpc.call_node_verify(self.extra_lv_nodes,
3259 {constants.NV_LVLIST: vg_name},
3260 self.cfg.GetClusterName())
3262 extra_lv_nvinfo = {}
3264 all_drbd_map = self.cfg.ComputeDRBDMap()
3266 feedback_fn("* Gathering disk information (%s nodes)" %
3267 len(self.my_node_names))
3268 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3271 feedback_fn("* Verifying configuration file consistency")
3273 # If not all nodes are being checked, we need to make sure the master node
3274 # and a non-checked vm_capable node are in the list.
3275 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3277 vf_nvinfo = all_nvinfo.copy()
3278 vf_node_info = list(self.my_node_info.values())
3279 additional_nodes = []
3280 if master_node not in self.my_node_info:
3281 additional_nodes.append(master_node)
3282 vf_node_info.append(self.all_node_info[master_node])
3283 # Add the first vm_capable node we find which is not included,
3284 # excluding the master node (which we already have)
3285 for node in absent_nodes:
3286 nodeinfo = self.all_node_info[node]
3287 if (nodeinfo.vm_capable and not nodeinfo.offline and
3288 node != master_node):
3289 additional_nodes.append(node)
3290 vf_node_info.append(self.all_node_info[node])
3292 key = constants.NV_FILELIST
3293 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3294 {key: node_verify_param[key]},
3295 self.cfg.GetClusterName()))
3297 vf_nvinfo = all_nvinfo
3298 vf_node_info = self.my_node_info.values()
3300 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3302 feedback_fn("* Verifying node status")
3306 for node_i in node_data_list:
3308 nimg = node_image[node]
3312 feedback_fn("* Skipping offline node %s" % (node,))
3316 if node == master_node:
3318 elif node_i.master_candidate:
3319 ntype = "master candidate"
3320 elif node_i.drained:
3326 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3328 msg = all_nvinfo[node].fail_msg
3329 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3332 nimg.rpc_fail = True
3335 nresult = all_nvinfo[node].payload
3337 nimg.call_ok = self._VerifyNode(node_i, nresult)
3338 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3339 self._VerifyNodeNetwork(node_i, nresult)
3340 self._VerifyNodeUserScripts(node_i, nresult)
3341 self._VerifyOob(node_i, nresult)
3342 self._VerifyFileStoragePaths(node_i, nresult,
3343 node == master_node)
3346 self._VerifyNodeLVM(node_i, nresult, vg_name)
3347 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3350 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3351 self._UpdateNodeInstances(node_i, nresult, nimg)
3352 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3353 self._UpdateNodeOS(node_i, nresult, nimg)
3355 if not nimg.os_fail:
3356 if refos_img is None:
3358 self._VerifyNodeOS(node_i, nimg, refos_img)
3359 self._VerifyNodeBridges(node_i, nresult, bridges)
3361 # Check whether all running instancies are primary for the node. (This
3362 # can no longer be done from _VerifyInstance below, since some of the
3363 # wrong instances could be from other node groups.)
3364 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3366 for inst in non_primary_inst:
3367 test = inst in self.all_inst_info
3368 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3369 "instance should not run on node %s", node_i.name)
3370 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3371 "node is running unknown instance %s", inst)
3373 for node, result in extra_lv_nvinfo.items():
3374 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3375 node_image[node], vg_name)
3377 feedback_fn("* Verifying instance status")
3378 for instance in self.my_inst_names:
3380 feedback_fn("* Verifying instance %s" % instance)
3381 inst_config = self.my_inst_info[instance]
3382 self._VerifyInstance(instance, inst_config, node_image,
3384 inst_nodes_offline = []
3386 pnode = inst_config.primary_node
3387 pnode_img = node_image[pnode]
3388 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3389 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3390 " primary node failed", instance)
3392 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3394 constants.CV_EINSTANCEBADNODE, instance,
3395 "instance is marked as running and lives on offline node %s",
3396 inst_config.primary_node)
3398 # If the instance is non-redundant we cannot survive losing its primary
3399 # node, so we are not N+1 compliant.
3400 if inst_config.disk_template not in constants.DTS_MIRRORED:
3401 i_non_redundant.append(instance)
3403 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3404 constants.CV_EINSTANCELAYOUT,
3405 instance, "instance has multiple secondary nodes: %s",
3406 utils.CommaJoin(inst_config.secondary_nodes),
3407 code=self.ETYPE_WARNING)
3409 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3410 pnode = inst_config.primary_node
3411 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3412 instance_groups = {}
3414 for node in instance_nodes:
3415 instance_groups.setdefault(self.all_node_info[node].group,
3419 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3420 # Sort so that we always list the primary node first.
3421 for group, nodes in sorted(instance_groups.items(),
3422 key=lambda (_, nodes): pnode in nodes,
3425 self._ErrorIf(len(instance_groups) > 1,
3426 constants.CV_EINSTANCESPLITGROUPS,
3427 instance, "instance has primary and secondary nodes in"
3428 " different groups: %s", utils.CommaJoin(pretty_list),
3429 code=self.ETYPE_WARNING)
3431 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3432 i_non_a_balanced.append(instance)
3434 for snode in inst_config.secondary_nodes:
3435 s_img = node_image[snode]
3436 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3437 snode, "instance %s, connection to secondary node failed",
3441 inst_nodes_offline.append(snode)
3443 # warn that the instance lives on offline nodes
3444 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3445 "instance has offline secondary node(s) %s",
3446 utils.CommaJoin(inst_nodes_offline))
3447 # ... or ghost/non-vm_capable nodes
3448 for node in inst_config.all_nodes:
3449 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3450 instance, "instance lives on ghost node %s", node)
3451 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3452 instance, "instance lives on non-vm_capable node %s", node)
3454 feedback_fn("* Verifying orphan volumes")
3455 reserved = utils.FieldSet(*cluster.reserved_lvs)
3457 # We will get spurious "unknown volume" warnings if any node of this group
3458 # is secondary for an instance whose primary is in another group. To avoid
3459 # them, we find these instances and add their volumes to node_vol_should.
3460 for inst in self.all_inst_info.values():
3461 for secondary in inst.secondary_nodes:
3462 if (secondary in self.my_node_info
3463 and inst.name not in self.my_inst_info):
3464 inst.MapLVsByNode(node_vol_should)
3467 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3469 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3470 feedback_fn("* Verifying N+1 Memory redundancy")
3471 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3473 feedback_fn("* Other Notes")
3475 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3476 % len(i_non_redundant))
3478 if i_non_a_balanced:
3479 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3480 % len(i_non_a_balanced))
3483 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3486 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3489 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3493 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3494 """Analyze the post-hooks' result
3496 This method analyses the hook result, handles it, and sends some
3497 nicely-formatted feedback back to the user.
3499 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3500 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3501 @param hooks_results: the results of the multi-node hooks rpc call
3502 @param feedback_fn: function used send feedback back to the caller
3503 @param lu_result: previous Exec result
3504 @return: the new Exec result, based on the previous result
3508 # We only really run POST phase hooks, only for non-empty groups,
3509 # and are only interested in their results
3510 if not self.my_node_names:
3513 elif phase == constants.HOOKS_PHASE_POST:
3514 # Used to change hooks' output to proper indentation
3515 feedback_fn("* Hooks Results")
3516 assert hooks_results, "invalid result from hooks"
3518 for node_name in hooks_results:
3519 res = hooks_results[node_name]
3521 test = msg and not res.offline
3522 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3523 "Communication failure in hooks execution: %s", msg)
3524 if res.offline or msg:
3525 # No need to investigate payload if node is offline or gave
3528 for script, hkr, output in res.payload:
3529 test = hkr == constants.HKR_FAIL
3530 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3531 "Script %s failed, output:", script)
3533 output = self._HOOKS_INDENT_RE.sub(" ", output)
3534 feedback_fn("%s" % output)
3540 class LUClusterVerifyDisks(NoHooksLU):
3541 """Verifies the cluster disks status.
3546 def ExpandNames(self):
3547 self.share_locks = _ShareAll()
3548 self.needed_locks = {
3549 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3552 def Exec(self, feedback_fn):
3553 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3555 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3556 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3557 for group in group_names])
3560 class LUGroupVerifyDisks(NoHooksLU):
3561 """Verifies the status of all disks in a node group.
3566 def ExpandNames(self):
3567 # Raises errors.OpPrereqError on its own if group can't be found
3568 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3570 self.share_locks = _ShareAll()
3571 self.needed_locks = {
3572 locking.LEVEL_INSTANCE: [],
3573 locking.LEVEL_NODEGROUP: [],
3574 locking.LEVEL_NODE: [],
3577 def DeclareLocks(self, level):
3578 if level == locking.LEVEL_INSTANCE:
3579 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3581 # Lock instances optimistically, needs verification once node and group
3582 # locks have been acquired
3583 self.needed_locks[locking.LEVEL_INSTANCE] = \
3584 self.cfg.GetNodeGroupInstances(self.group_uuid)
3586 elif level == locking.LEVEL_NODEGROUP:
3587 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3589 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3590 set([self.group_uuid] +
3591 # Lock all groups used by instances optimistically; this requires
3592 # going via the node before it's locked, requiring verification
3595 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3596 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3598 elif level == locking.LEVEL_NODE:
3599 # This will only lock the nodes in the group to be verified which contain
3601 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3602 self._LockInstancesNodes()
3604 # Lock all nodes in group to be verified
3605 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3606 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3607 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3609 def CheckPrereq(self):
3610 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3611 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3612 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3614 assert self.group_uuid in owned_groups
3616 # Check if locked instances are still correct
3617 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3619 # Get instance information
3620 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3622 # Check if node groups for locked instances are still correct
3623 _CheckInstancesNodeGroups(self.cfg, self.instances,
3624 owned_groups, owned_nodes, self.group_uuid)
3626 def Exec(self, feedback_fn):
3627 """Verify integrity of cluster disks.
3629 @rtype: tuple of three items
3630 @return: a tuple of (dict of node-to-node_error, list of instances
3631 which need activate-disks, dict of instance: (node, volume) for
3636 res_instances = set()
3639 nv_dict = _MapInstanceDisksToNodes(
3640 [inst for inst in self.instances.values()
3641 if inst.admin_state == constants.ADMINST_UP])
3644 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3645 set(self.cfg.GetVmCapableNodeList()))
3647 node_lvs = self.rpc.call_lv_list(nodes, [])
3649 for (node, node_res) in node_lvs.items():
3650 if node_res.offline:
3653 msg = node_res.fail_msg
3655 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3656 res_nodes[node] = msg
3659 for lv_name, (_, _, lv_online) in node_res.payload.items():
3660 inst = nv_dict.pop((node, lv_name), None)
3661 if not (lv_online or inst is None):
3662 res_instances.add(inst)
3664 # any leftover items in nv_dict are missing LVs, let's arrange the data
3666 for key, inst in nv_dict.iteritems():
3667 res_missing.setdefault(inst, []).append(list(key))
3669 return (res_nodes, list(res_instances), res_missing)
3672 class LUClusterRepairDiskSizes(NoHooksLU):
3673 """Verifies the cluster disks sizes.
3678 def ExpandNames(self):
3679 if self.op.instances:
3680 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3681 self.needed_locks = {
3682 locking.LEVEL_NODE_RES: [],
3683 locking.LEVEL_INSTANCE: self.wanted_names,
3685 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3687 self.wanted_names = None
3688 self.needed_locks = {
3689 locking.LEVEL_NODE_RES: locking.ALL_SET,
3690 locking.LEVEL_INSTANCE: locking.ALL_SET,
3692 self.share_locks = {
3693 locking.LEVEL_NODE_RES: 1,
3694 locking.LEVEL_INSTANCE: 0,
3697 def DeclareLocks(self, level):
3698 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3699 self._LockInstancesNodes(primary_only=True, level=level)
3701 def CheckPrereq(self):
3702 """Check prerequisites.
3704 This only checks the optional instance list against the existing names.
3707 if self.wanted_names is None:
3708 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3710 self.wanted_instances = \
3711 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3713 def _EnsureChildSizes(self, disk):
3714 """Ensure children of the disk have the needed disk size.
3716 This is valid mainly for DRBD8 and fixes an issue where the
3717 children have smaller disk size.
3719 @param disk: an L{ganeti.objects.Disk} object
3722 if disk.dev_type == constants.LD_DRBD8:
3723 assert disk.children, "Empty children for DRBD8?"
3724 fchild = disk.children[0]
3725 mismatch = fchild.size < disk.size
3727 self.LogInfo("Child disk has size %d, parent %d, fixing",
3728 fchild.size, disk.size)
3729 fchild.size = disk.size
3731 # and we recurse on this child only, not on the metadev
3732 return self._EnsureChildSizes(fchild) or mismatch
3736 def Exec(self, feedback_fn):
3737 """Verify the size of cluster disks.
3740 # TODO: check child disks too
3741 # TODO: check differences in size between primary/secondary nodes
3743 for instance in self.wanted_instances:
3744 pnode = instance.primary_node
3745 if pnode not in per_node_disks:
3746 per_node_disks[pnode] = []
3747 for idx, disk in enumerate(instance.disks):
3748 per_node_disks[pnode].append((instance, idx, disk))
3750 assert not (frozenset(per_node_disks.keys()) -
3751 self.owned_locks(locking.LEVEL_NODE_RES)), \
3752 "Not owning correct locks"
3753 assert not self.owned_locks(locking.LEVEL_NODE)
3756 for node, dskl in per_node_disks.items():
3757 newl = [v[2].Copy() for v in dskl]
3759 self.cfg.SetDiskID(dsk, node)
3760 result = self.rpc.call_blockdev_getsize(node, newl)
3762 self.LogWarning("Failure in blockdev_getsize call to node"
3763 " %s, ignoring", node)
3765 if len(result.payload) != len(dskl):
3766 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3767 " result.payload=%s", node, len(dskl), result.payload)
3768 self.LogWarning("Invalid result from node %s, ignoring node results",
3771 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3773 self.LogWarning("Disk %d of instance %s did not return size"
3774 " information, ignoring", idx, instance.name)
3776 if not isinstance(size, (int, long)):
3777 self.LogWarning("Disk %d of instance %s did not return valid"
3778 " size information, ignoring", idx, instance.name)
3781 if size != disk.size:
3782 self.LogInfo("Disk %d of instance %s has mismatched size,"
3783 " correcting: recorded %d, actual %d", idx,
3784 instance.name, disk.size, size)
3786 self.cfg.Update(instance, feedback_fn)
3787 changed.append((instance.name, idx, size))
3788 if self._EnsureChildSizes(disk):
3789 self.cfg.Update(instance, feedback_fn)
3790 changed.append((instance.name, idx, disk.size))
3794 class LUClusterRename(LogicalUnit):
3795 """Rename the cluster.
3798 HPATH = "cluster-rename"
3799 HTYPE = constants.HTYPE_CLUSTER
3801 def BuildHooksEnv(self):
3806 "OP_TARGET": self.cfg.GetClusterName(),
3807 "NEW_NAME": self.op.name,
3810 def BuildHooksNodes(self):
3811 """Build hooks nodes.
3814 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3816 def CheckPrereq(self):
3817 """Verify that the passed name is a valid one.
3820 hostname = netutils.GetHostname(name=self.op.name,
3821 family=self.cfg.GetPrimaryIPFamily())
3823 new_name = hostname.name
3824 self.ip = new_ip = hostname.ip
3825 old_name = self.cfg.GetClusterName()
3826 old_ip = self.cfg.GetMasterIP()
3827 if new_name == old_name and new_ip == old_ip:
3828 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3829 " cluster has changed",
3831 if new_ip != old_ip:
3832 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3833 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3834 " reachable on the network" %
3835 new_ip, errors.ECODE_NOTUNIQUE)
3837 self.op.name = new_name
3839 def Exec(self, feedback_fn):
3840 """Rename the cluster.
3843 clustername = self.op.name
3846 # shutdown the master IP
3847 master_params = self.cfg.GetMasterNetworkParameters()
3848 ems = self.cfg.GetUseExternalMipScript()
3849 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3851 result.Raise("Could not disable the master role")
3854 cluster = self.cfg.GetClusterInfo()
3855 cluster.cluster_name = clustername
3856 cluster.master_ip = new_ip
3857 self.cfg.Update(cluster, feedback_fn)
3859 # update the known hosts file
3860 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3861 node_list = self.cfg.GetOnlineNodeList()
3863 node_list.remove(master_params.name)
3866 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3868 master_params.ip = new_ip
3869 result = self.rpc.call_node_activate_master_ip(master_params.name,
3871 msg = result.fail_msg
3873 self.LogWarning("Could not re-enable the master role on"
3874 " the master, please restart manually: %s", msg)
3879 def _ValidateNetmask(cfg, netmask):
3880 """Checks if a netmask is valid.
3882 @type cfg: L{config.ConfigWriter}
3883 @param cfg: The cluster configuration
3885 @param netmask: the netmask to be verified
3886 @raise errors.OpPrereqError: if the validation fails
3889 ip_family = cfg.GetPrimaryIPFamily()
3891 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3892 except errors.ProgrammerError:
3893 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3894 ip_family, errors.ECODE_INVAL)
3895 if not ipcls.ValidateNetmask(netmask):
3896 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3897 (netmask), errors.ECODE_INVAL)
3900 class LUClusterSetParams(LogicalUnit):
3901 """Change the parameters of the cluster.
3904 HPATH = "cluster-modify"
3905 HTYPE = constants.HTYPE_CLUSTER
3908 def CheckArguments(self):
3912 if self.op.uid_pool:
3913 uidpool.CheckUidPool(self.op.uid_pool)
3915 if self.op.add_uids:
3916 uidpool.CheckUidPool(self.op.add_uids)
3918 if self.op.remove_uids:
3919 uidpool.CheckUidPool(self.op.remove_uids)
3921 if self.op.master_netmask is not None:
3922 _ValidateNetmask(self.cfg, self.op.master_netmask)
3924 if self.op.diskparams:
3925 for dt_params in self.op.diskparams.values():
3926 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3928 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3929 except errors.OpPrereqError, err:
3930 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3933 def ExpandNames(self):
3934 # FIXME: in the future maybe other cluster params won't require checking on
3935 # all nodes to be modified.
3936 self.needed_locks = {
3937 locking.LEVEL_NODE: locking.ALL_SET,
3938 locking.LEVEL_INSTANCE: locking.ALL_SET,
3939 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3941 self.share_locks = {
3942 locking.LEVEL_NODE: 1,
3943 locking.LEVEL_INSTANCE: 1,
3944 locking.LEVEL_NODEGROUP: 1,
3947 def BuildHooksEnv(self):
3952 "OP_TARGET": self.cfg.GetClusterName(),
3953 "NEW_VG_NAME": self.op.vg_name,
3956 def BuildHooksNodes(self):
3957 """Build hooks nodes.
3960 mn = self.cfg.GetMasterNode()
3963 def CheckPrereq(self):
3964 """Check prerequisites.
3966 This checks whether the given params don't conflict and
3967 if the given volume group is valid.
3970 if self.op.vg_name is not None and not self.op.vg_name:
3971 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3972 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3973 " instances exist", errors.ECODE_INVAL)
3975 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3976 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3977 raise errors.OpPrereqError("Cannot disable drbd helper while"
3978 " drbd-based instances exist",
3981 node_list = self.owned_locks(locking.LEVEL_NODE)
3983 # if vg_name not None, checks given volume group on all nodes
3985 vglist = self.rpc.call_vg_list(node_list)
3986 for node in node_list:
3987 msg = vglist[node].fail_msg
3989 # ignoring down node
3990 self.LogWarning("Error while gathering data on node %s"
3991 " (ignoring node): %s", node, msg)
3993 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3995 constants.MIN_VG_SIZE)
3997 raise errors.OpPrereqError("Error on node '%s': %s" %
3998 (node, vgstatus), errors.ECODE_ENVIRON)
4000 if self.op.drbd_helper:
4001 # checks given drbd helper on all nodes
4002 helpers = self.rpc.call_drbd_helper(node_list)
4003 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4005 self.LogInfo("Not checking drbd helper on offline node %s", node)
4007 msg = helpers[node].fail_msg
4009 raise errors.OpPrereqError("Error checking drbd helper on node"
4010 " '%s': %s" % (node, msg),
4011 errors.ECODE_ENVIRON)
4012 node_helper = helpers[node].payload
4013 if node_helper != self.op.drbd_helper:
4014 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4015 (node, node_helper), errors.ECODE_ENVIRON)
4017 self.cluster = cluster = self.cfg.GetClusterInfo()
4018 # validate params changes
4019 if self.op.beparams:
4020 objects.UpgradeBeParams(self.op.beparams)
4021 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4022 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4024 if self.op.ndparams:
4025 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4026 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4028 # TODO: we need a more general way to handle resetting
4029 # cluster-level parameters to default values
4030 if self.new_ndparams["oob_program"] == "":
4031 self.new_ndparams["oob_program"] = \
4032 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4034 if self.op.hv_state:
4035 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4036 self.cluster.hv_state_static)
4037 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4038 for hv, values in new_hv_state.items())
4040 if self.op.disk_state:
4041 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4042 self.cluster.disk_state_static)
4043 self.new_disk_state = \
4044 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4045 for name, values in svalues.items()))
4046 for storage, svalues in new_disk_state.items())
4049 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4052 all_instances = self.cfg.GetAllInstancesInfo().values()
4054 for group in self.cfg.GetAllNodeGroupsInfo().values():
4055 instances = frozenset([inst for inst in all_instances
4056 if compat.any(node in group.members
4057 for node in inst.all_nodes)])
4058 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4059 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4060 new = _ComputeNewInstanceViolations(ipol,
4061 new_ipolicy, instances)
4063 violations.update(new)
4066 self.LogWarning("After the ipolicy change the following instances"
4067 " violate them: %s",
4068 utils.CommaJoin(utils.NiceSort(violations)))
4070 if self.op.nicparams:
4071 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4072 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4073 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4076 # check all instances for consistency
4077 for instance in self.cfg.GetAllInstancesInfo().values():
4078 for nic_idx, nic in enumerate(instance.nics):
4079 params_copy = copy.deepcopy(nic.nicparams)
4080 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4082 # check parameter syntax
4084 objects.NIC.CheckParameterSyntax(params_filled)
4085 except errors.ConfigurationError, err:
4086 nic_errors.append("Instance %s, nic/%d: %s" %
4087 (instance.name, nic_idx, err))
4089 # if we're moving instances to routed, check that they have an ip
4090 target_mode = params_filled[constants.NIC_MODE]
4091 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4092 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4093 " address" % (instance.name, nic_idx))
4095 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4096 "\n".join(nic_errors), errors.ECODE_INVAL)
4098 # hypervisor list/parameters
4099 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4100 if self.op.hvparams:
4101 for hv_name, hv_dict in self.op.hvparams.items():
4102 if hv_name not in self.new_hvparams:
4103 self.new_hvparams[hv_name] = hv_dict
4105 self.new_hvparams[hv_name].update(hv_dict)
4107 # disk template parameters
4108 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4109 if self.op.diskparams:
4110 for dt_name, dt_params in self.op.diskparams.items():
4111 if dt_name not in self.op.diskparams:
4112 self.new_diskparams[dt_name] = dt_params
4114 self.new_diskparams[dt_name].update(dt_params)
4116 # os hypervisor parameters
4117 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4119 for os_name, hvs in self.op.os_hvp.items():
4120 if os_name not in self.new_os_hvp:
4121 self.new_os_hvp[os_name] = hvs
4123 for hv_name, hv_dict in hvs.items():
4124 if hv_name not in self.new_os_hvp[os_name]:
4125 self.new_os_hvp[os_name][hv_name] = hv_dict
4127 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4130 self.new_osp = objects.FillDict(cluster.osparams, {})
4131 if self.op.osparams:
4132 for os_name, osp in self.op.osparams.items():
4133 if os_name not in self.new_osp:
4134 self.new_osp[os_name] = {}
4136 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4139 if not self.new_osp[os_name]:
4140 # we removed all parameters
4141 del self.new_osp[os_name]
4143 # check the parameter validity (remote check)
4144 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4145 os_name, self.new_osp[os_name])
4147 # changes to the hypervisor list
4148 if self.op.enabled_hypervisors is not None:
4149 self.hv_list = self.op.enabled_hypervisors
4150 for hv in self.hv_list:
4151 # if the hypervisor doesn't already exist in the cluster
4152 # hvparams, we initialize it to empty, and then (in both
4153 # cases) we make sure to fill the defaults, as we might not
4154 # have a complete defaults list if the hypervisor wasn't
4156 if hv not in new_hvp:
4158 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4159 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4161 self.hv_list = cluster.enabled_hypervisors
4163 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4164 # either the enabled list has changed, or the parameters have, validate
4165 for hv_name, hv_params in self.new_hvparams.items():
4166 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4167 (self.op.enabled_hypervisors and
4168 hv_name in self.op.enabled_hypervisors)):
4169 # either this is a new hypervisor, or its parameters have changed
4170 hv_class = hypervisor.GetHypervisor(hv_name)
4171 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4172 hv_class.CheckParameterSyntax(hv_params)
4173 _CheckHVParams(self, node_list, hv_name, hv_params)
4176 # no need to check any newly-enabled hypervisors, since the
4177 # defaults have already been checked in the above code-block
4178 for os_name, os_hvp in self.new_os_hvp.items():
4179 for hv_name, hv_params in os_hvp.items():
4180 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4181 # we need to fill in the new os_hvp on top of the actual hv_p
4182 cluster_defaults = self.new_hvparams.get(hv_name, {})
4183 new_osp = objects.FillDict(cluster_defaults, hv_params)
4184 hv_class = hypervisor.GetHypervisor(hv_name)
4185 hv_class.CheckParameterSyntax(new_osp)
4186 _CheckHVParams(self, node_list, hv_name, new_osp)
4188 if self.op.default_iallocator:
4189 alloc_script = utils.FindFile(self.op.default_iallocator,
4190 constants.IALLOCATOR_SEARCH_PATH,
4192 if alloc_script is None:
4193 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4194 " specified" % self.op.default_iallocator,
4197 def Exec(self, feedback_fn):
4198 """Change the parameters of the cluster.
4201 if self.op.vg_name is not None:
4202 new_volume = self.op.vg_name
4205 if new_volume != self.cfg.GetVGName():
4206 self.cfg.SetVGName(new_volume)
4208 feedback_fn("Cluster LVM configuration already in desired"
4209 " state, not changing")
4210 if self.op.drbd_helper is not None:
4211 new_helper = self.op.drbd_helper
4214 if new_helper != self.cfg.GetDRBDHelper():
4215 self.cfg.SetDRBDHelper(new_helper)
4217 feedback_fn("Cluster DRBD helper already in desired state,"
4219 if self.op.hvparams:
4220 self.cluster.hvparams = self.new_hvparams
4222 self.cluster.os_hvp = self.new_os_hvp
4223 if self.op.enabled_hypervisors is not None:
4224 self.cluster.hvparams = self.new_hvparams
4225 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4226 if self.op.beparams:
4227 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4228 if self.op.nicparams:
4229 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4231 self.cluster.ipolicy = self.new_ipolicy
4232 if self.op.osparams:
4233 self.cluster.osparams = self.new_osp
4234 if self.op.ndparams:
4235 self.cluster.ndparams = self.new_ndparams
4236 if self.op.diskparams:
4237 self.cluster.diskparams = self.new_diskparams
4238 if self.op.hv_state:
4239 self.cluster.hv_state_static = self.new_hv_state
4240 if self.op.disk_state:
4241 self.cluster.disk_state_static = self.new_disk_state
4243 if self.op.candidate_pool_size is not None:
4244 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4245 # we need to update the pool size here, otherwise the save will fail
4246 _AdjustCandidatePool(self, [])
4248 if self.op.maintain_node_health is not None:
4249 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4250 feedback_fn("Note: CONFD was disabled at build time, node health"
4251 " maintenance is not useful (still enabling it)")
4252 self.cluster.maintain_node_health = self.op.maintain_node_health
4254 if self.op.prealloc_wipe_disks is not None:
4255 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4257 if self.op.add_uids is not None:
4258 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4260 if self.op.remove_uids is not None:
4261 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4263 if self.op.uid_pool is not None:
4264 self.cluster.uid_pool = self.op.uid_pool
4266 if self.op.default_iallocator is not None:
4267 self.cluster.default_iallocator = self.op.default_iallocator
4269 if self.op.reserved_lvs is not None:
4270 self.cluster.reserved_lvs = self.op.reserved_lvs
4272 if self.op.use_external_mip_script is not None:
4273 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4275 def helper_os(aname, mods, desc):
4277 lst = getattr(self.cluster, aname)
4278 for key, val in mods:
4279 if key == constants.DDM_ADD:
4281 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4284 elif key == constants.DDM_REMOVE:
4288 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4290 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4292 if self.op.hidden_os:
4293 helper_os("hidden_os", self.op.hidden_os, "hidden")
4295 if self.op.blacklisted_os:
4296 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4298 if self.op.master_netdev:
4299 master_params = self.cfg.GetMasterNetworkParameters()
4300 ems = self.cfg.GetUseExternalMipScript()
4301 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4302 self.cluster.master_netdev)
4303 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4305 result.Raise("Could not disable the master ip")
4306 feedback_fn("Changing master_netdev from %s to %s" %
4307 (master_params.netdev, self.op.master_netdev))
4308 self.cluster.master_netdev = self.op.master_netdev
4310 if self.op.master_netmask:
4311 master_params = self.cfg.GetMasterNetworkParameters()
4312 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4313 result = self.rpc.call_node_change_master_netmask(master_params.name,
4314 master_params.netmask,
4315 self.op.master_netmask,
4317 master_params.netdev)
4319 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4322 self.cluster.master_netmask = self.op.master_netmask
4324 self.cfg.Update(self.cluster, feedback_fn)
4326 if self.op.master_netdev:
4327 master_params = self.cfg.GetMasterNetworkParameters()
4328 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4329 self.op.master_netdev)
4330 ems = self.cfg.GetUseExternalMipScript()
4331 result = self.rpc.call_node_activate_master_ip(master_params.name,
4334 self.LogWarning("Could not re-enable the master ip on"
4335 " the master, please restart manually: %s",
4339 def _UploadHelper(lu, nodes, fname):
4340 """Helper for uploading a file and showing warnings.
4343 if os.path.exists(fname):
4344 result = lu.rpc.call_upload_file(nodes, fname)
4345 for to_node, to_result in result.items():
4346 msg = to_result.fail_msg
4348 msg = ("Copy of file %s to node %s failed: %s" %
4349 (fname, to_node, msg))
4350 lu.proc.LogWarning(msg)
4353 def _ComputeAncillaryFiles(cluster, redist):
4354 """Compute files external to Ganeti which need to be consistent.
4356 @type redist: boolean
4357 @param redist: Whether to include files which need to be redistributed
4360 # Compute files for all nodes
4362 pathutils.SSH_KNOWN_HOSTS_FILE,
4363 pathutils.CONFD_HMAC_KEY,
4364 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4365 pathutils.SPICE_CERT_FILE,
4366 pathutils.SPICE_CACERT_FILE,
4367 pathutils.RAPI_USERS_FILE,
4371 # we need to ship at least the RAPI certificate
4372 files_all.add(pathutils.RAPI_CERT_FILE)
4374 files_all.update(pathutils.ALL_CERT_FILES)
4375 files_all.update(ssconf.SimpleStore().GetFileList())
4377 if cluster.modify_etc_hosts:
4378 files_all.add(pathutils.ETC_HOSTS)
4380 if cluster.use_external_mip_script:
4381 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4383 # Files which are optional, these must:
4384 # - be present in one other category as well
4385 # - either exist or not exist on all nodes of that category (mc, vm all)
4387 pathutils.RAPI_USERS_FILE,
4390 # Files which should only be on master candidates
4394 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4398 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4399 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4400 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4402 # Files which should only be on VM-capable nodes
4405 for hv_name in cluster.enabled_hypervisors
4406 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4410 for hv_name in cluster.enabled_hypervisors
4411 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4413 # Filenames in each category must be unique
4414 all_files_set = files_all | files_mc | files_vm
4415 assert (len(all_files_set) ==
4416 sum(map(len, [files_all, files_mc, files_vm]))), \
4417 "Found file listed in more than one file list"
4419 # Optional files must be present in one other category
4420 assert all_files_set.issuperset(files_opt), \
4421 "Optional file not in a different required list"
4423 # This one file should never ever be re-distributed via RPC
4424 assert not (redist and
4425 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4427 return (files_all, files_opt, files_mc, files_vm)
4430 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4431 """Distribute additional files which are part of the cluster configuration.
4433 ConfigWriter takes care of distributing the config and ssconf files, but
4434 there are more files which should be distributed to all nodes. This function
4435 makes sure those are copied.
4437 @param lu: calling logical unit
4438 @param additional_nodes: list of nodes not in the config to distribute to
4439 @type additional_vm: boolean
4440 @param additional_vm: whether the additional nodes are vm-capable or not
4443 # Gather target nodes
4444 cluster = lu.cfg.GetClusterInfo()
4445 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4447 online_nodes = lu.cfg.GetOnlineNodeList()
4448 online_set = frozenset(online_nodes)
4449 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4451 if additional_nodes is not None:
4452 online_nodes.extend(additional_nodes)
4454 vm_nodes.extend(additional_nodes)
4456 # Never distribute to master node
4457 for nodelist in [online_nodes, vm_nodes]:
4458 if master_info.name in nodelist:
4459 nodelist.remove(master_info.name)
4462 (files_all, _, files_mc, files_vm) = \
4463 _ComputeAncillaryFiles(cluster, True)
4465 # Never re-distribute configuration file from here
4466 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4467 pathutils.CLUSTER_CONF_FILE in files_vm)
4468 assert not files_mc, "Master candidates not handled in this function"
4471 (online_nodes, files_all),
4472 (vm_nodes, files_vm),
4476 for (node_list, files) in filemap:
4478 _UploadHelper(lu, node_list, fname)
4481 class LUClusterRedistConf(NoHooksLU):
4482 """Force the redistribution of cluster configuration.
4484 This is a very simple LU.
4489 def ExpandNames(self):
4490 self.needed_locks = {
4491 locking.LEVEL_NODE: locking.ALL_SET,
4493 self.share_locks[locking.LEVEL_NODE] = 1
4495 def Exec(self, feedback_fn):
4496 """Redistribute the configuration.
4499 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4500 _RedistributeAncillaryFiles(self)
4503 class LUClusterActivateMasterIp(NoHooksLU):
4504 """Activate the master IP on the master node.
4507 def Exec(self, feedback_fn):
4508 """Activate the master IP.
4511 master_params = self.cfg.GetMasterNetworkParameters()
4512 ems = self.cfg.GetUseExternalMipScript()
4513 result = self.rpc.call_node_activate_master_ip(master_params.name,
4515 result.Raise("Could not activate the master IP")
4518 class LUClusterDeactivateMasterIp(NoHooksLU):
4519 """Deactivate the master IP on the master node.
4522 def Exec(self, feedback_fn):
4523 """Deactivate the master IP.
4526 master_params = self.cfg.GetMasterNetworkParameters()
4527 ems = self.cfg.GetUseExternalMipScript()
4528 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4530 result.Raise("Could not deactivate the master IP")
4533 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4534 """Sleep and poll for an instance's disk to sync.
4537 if not instance.disks or disks is not None and not disks:
4540 disks = _ExpandCheckDisks(instance, disks)
4543 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4545 node = instance.primary_node
4548 lu.cfg.SetDiskID(dev, node)
4550 # TODO: Convert to utils.Retry
4553 degr_retries = 10 # in seconds, as we sleep 1 second each time
4557 cumul_degraded = False
4558 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4559 msg = rstats.fail_msg
4561 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4564 raise errors.RemoteError("Can't contact node %s for mirror data,"
4565 " aborting." % node)
4568 rstats = rstats.payload
4570 for i, mstat in enumerate(rstats):
4572 lu.LogWarning("Can't compute data for node %s/%s",
4573 node, disks[i].iv_name)
4576 cumul_degraded = (cumul_degraded or
4577 (mstat.is_degraded and mstat.sync_percent is None))
4578 if mstat.sync_percent is not None:
4580 if mstat.estimated_time is not None:
4581 rem_time = ("%s remaining (estimated)" %
4582 utils.FormatSeconds(mstat.estimated_time))
4583 max_time = mstat.estimated_time
4585 rem_time = "no time estimate"
4586 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4587 (disks[i].iv_name, mstat.sync_percent, rem_time))
4589 # if we're done but degraded, let's do a few small retries, to
4590 # make sure we see a stable and not transient situation; therefore
4591 # we force restart of the loop
4592 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4593 logging.info("Degraded disks found, %d retries left", degr_retries)
4601 time.sleep(min(60, max_time))
4604 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4605 return not cumul_degraded
4608 def _BlockdevFind(lu, node, dev, instance):
4609 """Wrapper around call_blockdev_find to annotate diskparams.
4611 @param lu: A reference to the lu object
4612 @param node: The node to call out
4613 @param dev: The device to find
4614 @param instance: The instance object the device belongs to
4615 @returns The result of the rpc call
4618 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4619 return lu.rpc.call_blockdev_find(node, disk)
4622 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4623 """Wrapper around L{_CheckDiskConsistencyInner}.
4626 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4627 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4631 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4633 """Check that mirrors are not degraded.
4635 @attention: The device has to be annotated already.
4637 The ldisk parameter, if True, will change the test from the
4638 is_degraded attribute (which represents overall non-ok status for
4639 the device(s)) to the ldisk (representing the local storage status).
4642 lu.cfg.SetDiskID(dev, node)
4646 if on_primary or dev.AssembleOnSecondary():
4647 rstats = lu.rpc.call_blockdev_find(node, dev)
4648 msg = rstats.fail_msg
4650 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4652 elif not rstats.payload:
4653 lu.LogWarning("Can't find disk on node %s", node)
4657 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4659 result = result and not rstats.payload.is_degraded
4662 for child in dev.children:
4663 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4669 class LUOobCommand(NoHooksLU):
4670 """Logical unit for OOB handling.
4674 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4676 def ExpandNames(self):
4677 """Gather locks we need.
4680 if self.op.node_names:
4681 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4682 lock_names = self.op.node_names
4684 lock_names = locking.ALL_SET
4686 self.needed_locks = {
4687 locking.LEVEL_NODE: lock_names,
4690 def CheckPrereq(self):
4691 """Check prerequisites.
4694 - the node exists in the configuration
4697 Any errors are signaled by raising errors.OpPrereqError.
4701 self.master_node = self.cfg.GetMasterNode()
4703 assert self.op.power_delay >= 0.0
4705 if self.op.node_names:
4706 if (self.op.command in self._SKIP_MASTER and
4707 self.master_node in self.op.node_names):
4708 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4709 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4711 if master_oob_handler:
4712 additional_text = ("run '%s %s %s' if you want to operate on the"
4713 " master regardless") % (master_oob_handler,
4717 additional_text = "it does not support out-of-band operations"
4719 raise errors.OpPrereqError(("Operating on the master node %s is not"
4720 " allowed for %s; %s") %
4721 (self.master_node, self.op.command,
4722 additional_text), errors.ECODE_INVAL)
4724 self.op.node_names = self.cfg.GetNodeList()
4725 if self.op.command in self._SKIP_MASTER:
4726 self.op.node_names.remove(self.master_node)
4728 if self.op.command in self._SKIP_MASTER:
4729 assert self.master_node not in self.op.node_names
4731 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4733 raise errors.OpPrereqError("Node %s not found" % node_name,
4736 self.nodes.append(node)
4738 if (not self.op.ignore_status and
4739 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4740 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4741 " not marked offline") % node_name,
4744 def Exec(self, feedback_fn):
4745 """Execute OOB and return result if we expect any.
4748 master_node = self.master_node
4751 for idx, node in enumerate(utils.NiceSort(self.nodes,
4752 key=lambda node: node.name)):
4753 node_entry = [(constants.RS_NORMAL, node.name)]
4754 ret.append(node_entry)
4756 oob_program = _SupportsOob(self.cfg, node)
4759 node_entry.append((constants.RS_UNAVAIL, None))
4762 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4763 self.op.command, oob_program, node.name)
4764 result = self.rpc.call_run_oob(master_node, oob_program,
4765 self.op.command, node.name,
4769 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4770 node.name, result.fail_msg)
4771 node_entry.append((constants.RS_NODATA, None))
4774 self._CheckPayload(result)
4775 except errors.OpExecError, err:
4776 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4778 node_entry.append((constants.RS_NODATA, None))
4780 if self.op.command == constants.OOB_HEALTH:
4781 # For health we should log important events
4782 for item, status in result.payload:
4783 if status in [constants.OOB_STATUS_WARNING,
4784 constants.OOB_STATUS_CRITICAL]:
4785 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4786 item, node.name, status)
4788 if self.op.command == constants.OOB_POWER_ON:
4790 elif self.op.command == constants.OOB_POWER_OFF:
4791 node.powered = False
4792 elif self.op.command == constants.OOB_POWER_STATUS:
4793 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4794 if powered != node.powered:
4795 logging.warning(("Recorded power state (%s) of node '%s' does not"
4796 " match actual power state (%s)"), node.powered,
4799 # For configuration changing commands we should update the node
4800 if self.op.command in (constants.OOB_POWER_ON,
4801 constants.OOB_POWER_OFF):
4802 self.cfg.Update(node, feedback_fn)
4804 node_entry.append((constants.RS_NORMAL, result.payload))
4806 if (self.op.command == constants.OOB_POWER_ON and
4807 idx < len(self.nodes) - 1):
4808 time.sleep(self.op.power_delay)
4812 def _CheckPayload(self, result):
4813 """Checks if the payload is valid.
4815 @param result: RPC result
4816 @raises errors.OpExecError: If payload is not valid
4820 if self.op.command == constants.OOB_HEALTH:
4821 if not isinstance(result.payload, list):
4822 errs.append("command 'health' is expected to return a list but got %s" %
4823 type(result.payload))
4825 for item, status in result.payload:
4826 if status not in constants.OOB_STATUSES:
4827 errs.append("health item '%s' has invalid status '%s'" %
4830 if self.op.command == constants.OOB_POWER_STATUS:
4831 if not isinstance(result.payload, dict):
4832 errs.append("power-status is expected to return a dict but got %s" %
4833 type(result.payload))
4835 if self.op.command in [
4836 constants.OOB_POWER_ON,
4837 constants.OOB_POWER_OFF,
4838 constants.OOB_POWER_CYCLE,
4840 if result.payload is not None:
4841 errs.append("%s is expected to not return payload but got '%s'" %
4842 (self.op.command, result.payload))
4845 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4846 utils.CommaJoin(errs))
4849 class _OsQuery(_QueryBase):
4850 FIELDS = query.OS_FIELDS
4852 def ExpandNames(self, lu):
4853 # Lock all nodes in shared mode
4854 # Temporary removal of locks, should be reverted later
4855 # TODO: reintroduce locks when they are lighter-weight
4856 lu.needed_locks = {}
4857 #self.share_locks[locking.LEVEL_NODE] = 1
4858 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4860 # The following variables interact with _QueryBase._GetNames
4862 self.wanted = self.names
4864 self.wanted = locking.ALL_SET
4866 self.do_locking = self.use_locking
4868 def DeclareLocks(self, lu, level):
4872 def _DiagnoseByOS(rlist):
4873 """Remaps a per-node return list into an a per-os per-node dictionary
4875 @param rlist: a map with node names as keys and OS objects as values
4878 @return: a dictionary with osnames as keys and as value another
4879 map, with nodes as keys and tuples of (path, status, diagnose,
4880 variants, parameters, api_versions) as values, eg::
4882 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4883 (/srv/..., False, "invalid api")],
4884 "node2": [(/srv/..., True, "", [], [])]}
4889 # we build here the list of nodes that didn't fail the RPC (at RPC
4890 # level), so that nodes with a non-responding node daemon don't
4891 # make all OSes invalid
4892 good_nodes = [node_name for node_name in rlist
4893 if not rlist[node_name].fail_msg]
4894 for node_name, nr in rlist.items():
4895 if nr.fail_msg or not nr.payload:
4897 for (name, path, status, diagnose, variants,
4898 params, api_versions) in nr.payload:
4899 if name not in all_os:
4900 # build a list of nodes for this os containing empty lists
4901 # for each node in node_list
4903 for nname in good_nodes:
4904 all_os[name][nname] = []
4905 # convert params from [name, help] to (name, help)
4906 params = [tuple(v) for v in params]
4907 all_os[name][node_name].append((path, status, diagnose,
4908 variants, params, api_versions))
4911 def _GetQueryData(self, lu):
4912 """Computes the list of nodes and their attributes.
4915 # Locking is not used
4916 assert not (compat.any(lu.glm.is_owned(level)
4917 for level in locking.LEVELS
4918 if level != locking.LEVEL_CLUSTER) or
4919 self.do_locking or self.use_locking)
4921 valid_nodes = [node.name
4922 for node in lu.cfg.GetAllNodesInfo().values()
4923 if not node.offline and node.vm_capable]
4924 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4925 cluster = lu.cfg.GetClusterInfo()
4929 for (os_name, os_data) in pol.items():
4930 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4931 hidden=(os_name in cluster.hidden_os),
4932 blacklisted=(os_name in cluster.blacklisted_os))
4936 api_versions = set()
4938 for idx, osl in enumerate(os_data.values()):
4939 info.valid = bool(info.valid and osl and osl[0][1])
4943 (node_variants, node_params, node_api) = osl[0][3:6]
4946 variants.update(node_variants)
4947 parameters.update(node_params)
4948 api_versions.update(node_api)
4950 # Filter out inconsistent values
4951 variants.intersection_update(node_variants)
4952 parameters.intersection_update(node_params)
4953 api_versions.intersection_update(node_api)
4955 info.variants = list(variants)
4956 info.parameters = list(parameters)
4957 info.api_versions = list(api_versions)
4959 data[os_name] = info
4961 # Prepare data in requested order
4962 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4966 class LUOsDiagnose(NoHooksLU):
4967 """Logical unit for OS diagnose/query.
4973 def _BuildFilter(fields, names):
4974 """Builds a filter for querying OSes.
4977 name_filter = qlang.MakeSimpleFilter("name", names)
4979 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4980 # respective field is not requested
4981 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4982 for fname in ["hidden", "blacklisted"]
4983 if fname not in fields]
4984 if "valid" not in fields:
4985 status_filter.append([qlang.OP_TRUE, "valid"])
4988 status_filter.insert(0, qlang.OP_AND)
4990 status_filter = None
4992 if name_filter and status_filter:
4993 return [qlang.OP_AND, name_filter, status_filter]
4997 return status_filter
4999 def CheckArguments(self):
5000 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5001 self.op.output_fields, False)
5003 def ExpandNames(self):
5004 self.oq.ExpandNames(self)
5006 def Exec(self, feedback_fn):
5007 return self.oq.OldStyleQuery(self)
5010 class LUNodeRemove(LogicalUnit):
5011 """Logical unit for removing a node.
5014 HPATH = "node-remove"
5015 HTYPE = constants.HTYPE_NODE
5017 def BuildHooksEnv(self):
5022 "OP_TARGET": self.op.node_name,
5023 "NODE_NAME": self.op.node_name,
5026 def BuildHooksNodes(self):
5027 """Build hooks nodes.
5029 This doesn't run on the target node in the pre phase as a failed
5030 node would then be impossible to remove.
5033 all_nodes = self.cfg.GetNodeList()
5035 all_nodes.remove(self.op.node_name)
5038 return (all_nodes, all_nodes)
5040 def CheckPrereq(self):
5041 """Check prerequisites.
5044 - the node exists in the configuration
5045 - it does not have primary or secondary instances
5046 - it's not the master
5048 Any errors are signaled by raising errors.OpPrereqError.
5051 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5052 node = self.cfg.GetNodeInfo(self.op.node_name)
5053 assert node is not None
5055 masternode = self.cfg.GetMasterNode()
5056 if node.name == masternode:
5057 raise errors.OpPrereqError("Node is the master node, failover to another"
5058 " node is required", errors.ECODE_INVAL)
5060 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5061 if node.name in instance.all_nodes:
5062 raise errors.OpPrereqError("Instance %s is still running on the node,"
5063 " please remove first" % instance_name,
5065 self.op.node_name = node.name
5068 def Exec(self, feedback_fn):
5069 """Removes the node from the cluster.
5073 logging.info("Stopping the node daemon and removing configs from node %s",
5076 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5078 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5081 # Promote nodes to master candidate as needed
5082 _AdjustCandidatePool(self, exceptions=[node.name])
5083 self.context.RemoveNode(node.name)
5085 # Run post hooks on the node before it's removed
5086 _RunPostHook(self, node.name)
5088 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5089 msg = result.fail_msg
5091 self.LogWarning("Errors encountered on the remote node while leaving"
5092 " the cluster: %s", msg)
5094 # Remove node from our /etc/hosts
5095 if self.cfg.GetClusterInfo().modify_etc_hosts:
5096 master_node = self.cfg.GetMasterNode()
5097 result = self.rpc.call_etc_hosts_modify(master_node,
5098 constants.ETC_HOSTS_REMOVE,
5100 result.Raise("Can't update hosts file with new host data")
5101 _RedistributeAncillaryFiles(self)
5104 class _NodeQuery(_QueryBase):
5105 FIELDS = query.NODE_FIELDS
5107 def ExpandNames(self, lu):
5108 lu.needed_locks = {}
5109 lu.share_locks = _ShareAll()
5112 self.wanted = _GetWantedNodes(lu, self.names)
5114 self.wanted = locking.ALL_SET
5116 self.do_locking = (self.use_locking and
5117 query.NQ_LIVE in self.requested_data)
5120 # If any non-static field is requested we need to lock the nodes
5121 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5123 def DeclareLocks(self, lu, level):
5126 def _GetQueryData(self, lu):
5127 """Computes the list of nodes and their attributes.
5130 all_info = lu.cfg.GetAllNodesInfo()
5132 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5134 # Gather data as requested
5135 if query.NQ_LIVE in self.requested_data:
5136 # filter out non-vm_capable nodes
5137 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5139 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5140 [lu.cfg.GetHypervisorType()])
5141 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5142 for (name, nresult) in node_data.items()
5143 if not nresult.fail_msg and nresult.payload)
5147 if query.NQ_INST in self.requested_data:
5148 node_to_primary = dict([(name, set()) for name in nodenames])
5149 node_to_secondary = dict([(name, set()) for name in nodenames])
5151 inst_data = lu.cfg.GetAllInstancesInfo()
5153 for inst in inst_data.values():
5154 if inst.primary_node in node_to_primary:
5155 node_to_primary[inst.primary_node].add(inst.name)
5156 for secnode in inst.secondary_nodes:
5157 if secnode in node_to_secondary:
5158 node_to_secondary[secnode].add(inst.name)
5160 node_to_primary = None
5161 node_to_secondary = None
5163 if query.NQ_OOB in self.requested_data:
5164 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5165 for name, node in all_info.iteritems())
5169 if query.NQ_GROUP in self.requested_data:
5170 groups = lu.cfg.GetAllNodeGroupsInfo()
5174 return query.NodeQueryData([all_info[name] for name in nodenames],
5175 live_data, lu.cfg.GetMasterNode(),
5176 node_to_primary, node_to_secondary, groups,
5177 oob_support, lu.cfg.GetClusterInfo())
5180 class LUNodeQuery(NoHooksLU):
5181 """Logical unit for querying nodes.
5184 # pylint: disable=W0142
5187 def CheckArguments(self):
5188 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5189 self.op.output_fields, self.op.use_locking)
5191 def ExpandNames(self):
5192 self.nq.ExpandNames(self)
5194 def DeclareLocks(self, level):
5195 self.nq.DeclareLocks(self, level)
5197 def Exec(self, feedback_fn):
5198 return self.nq.OldStyleQuery(self)
5201 class LUNodeQueryvols(NoHooksLU):
5202 """Logical unit for getting volumes on node(s).
5206 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5207 _FIELDS_STATIC = utils.FieldSet("node")
5209 def CheckArguments(self):
5210 _CheckOutputFields(static=self._FIELDS_STATIC,
5211 dynamic=self._FIELDS_DYNAMIC,
5212 selected=self.op.output_fields)
5214 def ExpandNames(self):
5215 self.share_locks = _ShareAll()
5216 self.needed_locks = {}
5218 if not self.op.nodes:
5219 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5221 self.needed_locks[locking.LEVEL_NODE] = \
5222 _GetWantedNodes(self, self.op.nodes)
5224 def Exec(self, feedback_fn):
5225 """Computes the list of nodes and their attributes.
5228 nodenames = self.owned_locks(locking.LEVEL_NODE)
5229 volumes = self.rpc.call_node_volumes(nodenames)
5231 ilist = self.cfg.GetAllInstancesInfo()
5232 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5235 for node in nodenames:
5236 nresult = volumes[node]
5239 msg = nresult.fail_msg
5241 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5244 node_vols = sorted(nresult.payload,
5245 key=operator.itemgetter("dev"))
5247 for vol in node_vols:
5249 for field in self.op.output_fields:
5252 elif field == "phys":
5256 elif field == "name":
5258 elif field == "size":
5259 val = int(float(vol["size"]))
5260 elif field == "instance":
5261 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5263 raise errors.ParameterError(field)
5264 node_output.append(str(val))
5266 output.append(node_output)
5271 class LUNodeQueryStorage(NoHooksLU):
5272 """Logical unit for getting information on storage units on node(s).
5275 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5278 def CheckArguments(self):
5279 _CheckOutputFields(static=self._FIELDS_STATIC,
5280 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5281 selected=self.op.output_fields)
5283 def ExpandNames(self):
5284 self.share_locks = _ShareAll()
5285 self.needed_locks = {}
5288 self.needed_locks[locking.LEVEL_NODE] = \
5289 _GetWantedNodes(self, self.op.nodes)
5291 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5293 def Exec(self, feedback_fn):
5294 """Computes the list of nodes and their attributes.
5297 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5299 # Always get name to sort by
5300 if constants.SF_NAME in self.op.output_fields:
5301 fields = self.op.output_fields[:]
5303 fields = [constants.SF_NAME] + self.op.output_fields
5305 # Never ask for node or type as it's only known to the LU
5306 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5307 while extra in fields:
5308 fields.remove(extra)
5310 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5311 name_idx = field_idx[constants.SF_NAME]
5313 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5314 data = self.rpc.call_storage_list(self.nodes,
5315 self.op.storage_type, st_args,
5316 self.op.name, fields)
5320 for node in utils.NiceSort(self.nodes):
5321 nresult = data[node]
5325 msg = nresult.fail_msg
5327 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5330 rows = dict([(row[name_idx], row) for row in nresult.payload])
5332 for name in utils.NiceSort(rows.keys()):
5337 for field in self.op.output_fields:
5338 if field == constants.SF_NODE:
5340 elif field == constants.SF_TYPE:
5341 val = self.op.storage_type
5342 elif field in field_idx:
5343 val = row[field_idx[field]]
5345 raise errors.ParameterError(field)
5354 class _InstanceQuery(_QueryBase):
5355 FIELDS = query.INSTANCE_FIELDS
5357 def ExpandNames(self, lu):
5358 lu.needed_locks = {}
5359 lu.share_locks = _ShareAll()
5362 self.wanted = _GetWantedInstances(lu, self.names)
5364 self.wanted = locking.ALL_SET
5366 self.do_locking = (self.use_locking and
5367 query.IQ_LIVE in self.requested_data)
5369 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5370 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5371 lu.needed_locks[locking.LEVEL_NODE] = []
5372 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5374 self.do_grouplocks = (self.do_locking and
5375 query.IQ_NODES in self.requested_data)
5377 def DeclareLocks(self, lu, level):
5379 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5380 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5382 # Lock all groups used by instances optimistically; this requires going
5383 # via the node before it's locked, requiring verification later on
5384 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5386 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5387 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5388 elif level == locking.LEVEL_NODE:
5389 lu._LockInstancesNodes() # pylint: disable=W0212
5392 def _CheckGroupLocks(lu):
5393 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5394 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5396 # Check if node groups for locked instances are still correct
5397 for instance_name in owned_instances:
5398 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5400 def _GetQueryData(self, lu):
5401 """Computes the list of instances and their attributes.
5404 if self.do_grouplocks:
5405 self._CheckGroupLocks(lu)
5407 cluster = lu.cfg.GetClusterInfo()
5408 all_info = lu.cfg.GetAllInstancesInfo()
5410 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5412 instance_list = [all_info[name] for name in instance_names]
5413 nodes = frozenset(itertools.chain(*(inst.all_nodes
5414 for inst in instance_list)))
5415 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5418 wrongnode_inst = set()
5420 # Gather data as requested
5421 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5423 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5425 result = node_data[name]
5427 # offline nodes will be in both lists
5428 assert result.fail_msg
5429 offline_nodes.append(name)
5431 bad_nodes.append(name)
5432 elif result.payload:
5433 for inst in result.payload:
5434 if inst in all_info:
5435 if all_info[inst].primary_node == name:
5436 live_data.update(result.payload)
5438 wrongnode_inst.add(inst)
5440 # orphan instance; we don't list it here as we don't
5441 # handle this case yet in the output of instance listing
5442 logging.warning("Orphan instance '%s' found on node %s",
5444 # else no instance is alive
5448 if query.IQ_DISKUSAGE in self.requested_data:
5449 gmi = ganeti.masterd.instance
5450 disk_usage = dict((inst.name,
5451 gmi.ComputeDiskSize(inst.disk_template,
5452 [{constants.IDISK_SIZE: disk.size}
5453 for disk in inst.disks]))
5454 for inst in instance_list)
5458 if query.IQ_CONSOLE in self.requested_data:
5460 for inst in instance_list:
5461 if inst.name in live_data:
5462 # Instance is running
5463 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5465 consinfo[inst.name] = None
5466 assert set(consinfo.keys()) == set(instance_names)
5470 if query.IQ_NODES in self.requested_data:
5471 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5473 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5474 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5475 for uuid in set(map(operator.attrgetter("group"),
5481 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5482 disk_usage, offline_nodes, bad_nodes,
5483 live_data, wrongnode_inst, consinfo,
5487 class LUQuery(NoHooksLU):
5488 """Query for resources/items of a certain kind.
5491 # pylint: disable=W0142
5494 def CheckArguments(self):
5495 qcls = _GetQueryImplementation(self.op.what)
5497 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5499 def ExpandNames(self):
5500 self.impl.ExpandNames(self)
5502 def DeclareLocks(self, level):
5503 self.impl.DeclareLocks(self, level)
5505 def Exec(self, feedback_fn):
5506 return self.impl.NewStyleQuery(self)
5509 class LUQueryFields(NoHooksLU):
5510 """Query for resources/items of a certain kind.
5513 # pylint: disable=W0142
5516 def CheckArguments(self):
5517 self.qcls = _GetQueryImplementation(self.op.what)
5519 def ExpandNames(self):
5520 self.needed_locks = {}
5522 def Exec(self, feedback_fn):
5523 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5526 class LUNodeModifyStorage(NoHooksLU):
5527 """Logical unit for modifying a storage volume on a node.
5532 def CheckArguments(self):
5533 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5535 storage_type = self.op.storage_type
5538 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5540 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5541 " modified" % storage_type,
5544 diff = set(self.op.changes.keys()) - modifiable
5546 raise errors.OpPrereqError("The following fields can not be modified for"
5547 " storage units of type '%s': %r" %
5548 (storage_type, list(diff)),
5551 def ExpandNames(self):
5552 self.needed_locks = {
5553 locking.LEVEL_NODE: self.op.node_name,
5556 def Exec(self, feedback_fn):
5557 """Computes the list of nodes and their attributes.
5560 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5561 result = self.rpc.call_storage_modify(self.op.node_name,
5562 self.op.storage_type, st_args,
5563 self.op.name, self.op.changes)
5564 result.Raise("Failed to modify storage unit '%s' on %s" %
5565 (self.op.name, self.op.node_name))
5568 class LUNodeAdd(LogicalUnit):
5569 """Logical unit for adding node to the cluster.
5573 HTYPE = constants.HTYPE_NODE
5574 _NFLAGS = ["master_capable", "vm_capable"]
5576 def CheckArguments(self):
5577 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5578 # validate/normalize the node name
5579 self.hostname = netutils.GetHostname(name=self.op.node_name,
5580 family=self.primary_ip_family)
5581 self.op.node_name = self.hostname.name
5583 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5584 raise errors.OpPrereqError("Cannot readd the master node",
5587 if self.op.readd and self.op.group:
5588 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5589 " being readded", errors.ECODE_INVAL)
5591 def BuildHooksEnv(self):
5594 This will run on all nodes before, and on all nodes + the new node after.
5598 "OP_TARGET": self.op.node_name,
5599 "NODE_NAME": self.op.node_name,
5600 "NODE_PIP": self.op.primary_ip,
5601 "NODE_SIP": self.op.secondary_ip,
5602 "MASTER_CAPABLE": str(self.op.master_capable),
5603 "VM_CAPABLE": str(self.op.vm_capable),
5606 def BuildHooksNodes(self):
5607 """Build hooks nodes.
5610 # Exclude added node
5611 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5612 post_nodes = pre_nodes + [self.op.node_name, ]
5614 return (pre_nodes, post_nodes)
5616 def CheckPrereq(self):
5617 """Check prerequisites.
5620 - the new node is not already in the config
5622 - its parameters (single/dual homed) matches the cluster
5624 Any errors are signaled by raising errors.OpPrereqError.
5628 hostname = self.hostname
5629 node = hostname.name
5630 primary_ip = self.op.primary_ip = hostname.ip
5631 if self.op.secondary_ip is None:
5632 if self.primary_ip_family == netutils.IP6Address.family:
5633 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5634 " IPv4 address must be given as secondary",
5636 self.op.secondary_ip = primary_ip
5638 secondary_ip = self.op.secondary_ip
5639 if not netutils.IP4Address.IsValid(secondary_ip):
5640 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5641 " address" % secondary_ip, errors.ECODE_INVAL)
5643 node_list = cfg.GetNodeList()
5644 if not self.op.readd and node in node_list:
5645 raise errors.OpPrereqError("Node %s is already in the configuration" %
5646 node, errors.ECODE_EXISTS)
5647 elif self.op.readd and node not in node_list:
5648 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5651 self.changed_primary_ip = False
5653 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5654 if self.op.readd and node == existing_node_name:
5655 if existing_node.secondary_ip != secondary_ip:
5656 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5657 " address configuration as before",
5659 if existing_node.primary_ip != primary_ip:
5660 self.changed_primary_ip = True
5664 if (existing_node.primary_ip == primary_ip or
5665 existing_node.secondary_ip == primary_ip or
5666 existing_node.primary_ip == secondary_ip or
5667 existing_node.secondary_ip == secondary_ip):
5668 raise errors.OpPrereqError("New node ip address(es) conflict with"
5669 " existing node %s" % existing_node.name,
5670 errors.ECODE_NOTUNIQUE)
5672 # After this 'if' block, None is no longer a valid value for the
5673 # _capable op attributes
5675 old_node = self.cfg.GetNodeInfo(node)
5676 assert old_node is not None, "Can't retrieve locked node %s" % node
5677 for attr in self._NFLAGS:
5678 if getattr(self.op, attr) is None:
5679 setattr(self.op, attr, getattr(old_node, attr))
5681 for attr in self._NFLAGS:
5682 if getattr(self.op, attr) is None:
5683 setattr(self.op, attr, True)
5685 if self.op.readd and not self.op.vm_capable:
5686 pri, sec = cfg.GetNodeInstances(node)
5688 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5689 " flag set to false, but it already holds"
5690 " instances" % node,
5693 # check that the type of the node (single versus dual homed) is the
5694 # same as for the master
5695 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5696 master_singlehomed = myself.secondary_ip == myself.primary_ip
5697 newbie_singlehomed = secondary_ip == primary_ip
5698 if master_singlehomed != newbie_singlehomed:
5699 if master_singlehomed:
5700 raise errors.OpPrereqError("The master has no secondary ip but the"
5701 " new node has one",
5704 raise errors.OpPrereqError("The master has a secondary ip but the"
5705 " new node doesn't have one",
5708 # checks reachability
5709 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5710 raise errors.OpPrereqError("Node not reachable by ping",
5711 errors.ECODE_ENVIRON)
5713 if not newbie_singlehomed:
5714 # check reachability from my secondary ip to newbie's secondary ip
5715 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5716 source=myself.secondary_ip):
5717 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5718 " based ping to node daemon port",
5719 errors.ECODE_ENVIRON)
5726 if self.op.master_capable:
5727 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5729 self.master_candidate = False
5732 self.new_node = old_node
5734 node_group = cfg.LookupNodeGroup(self.op.group)
5735 self.new_node = objects.Node(name=node,
5736 primary_ip=primary_ip,
5737 secondary_ip=secondary_ip,
5738 master_candidate=self.master_candidate,
5739 offline=False, drained=False,
5742 if self.op.ndparams:
5743 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5745 if self.op.hv_state:
5746 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5748 if self.op.disk_state:
5749 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5751 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5752 # it a property on the base class.
5753 result = rpc.DnsOnlyRunner().call_version([node])[node]
5754 result.Raise("Can't get version information from node %s" % node)
5755 if constants.PROTOCOL_VERSION == result.payload:
5756 logging.info("Communication to node %s fine, sw version %s match",
5757 node, result.payload)
5759 raise errors.OpPrereqError("Version mismatch master version %s,"
5760 " node version %s" %
5761 (constants.PROTOCOL_VERSION, result.payload),
5762 errors.ECODE_ENVIRON)
5764 def Exec(self, feedback_fn):
5765 """Adds the new node to the cluster.
5768 new_node = self.new_node
5769 node = new_node.name
5771 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5774 # We adding a new node so we assume it's powered
5775 new_node.powered = True
5777 # for re-adds, reset the offline/drained/master-candidate flags;
5778 # we need to reset here, otherwise offline would prevent RPC calls
5779 # later in the procedure; this also means that if the re-add
5780 # fails, we are left with a non-offlined, broken node
5782 new_node.drained = new_node.offline = False # pylint: disable=W0201
5783 self.LogInfo("Readding a node, the offline/drained flags were reset")
5784 # if we demote the node, we do cleanup later in the procedure
5785 new_node.master_candidate = self.master_candidate
5786 if self.changed_primary_ip:
5787 new_node.primary_ip = self.op.primary_ip
5789 # copy the master/vm_capable flags
5790 for attr in self._NFLAGS:
5791 setattr(new_node, attr, getattr(self.op, attr))
5793 # notify the user about any possible mc promotion
5794 if new_node.master_candidate:
5795 self.LogInfo("Node will be a master candidate")
5797 if self.op.ndparams:
5798 new_node.ndparams = self.op.ndparams
5800 new_node.ndparams = {}
5802 if self.op.hv_state:
5803 new_node.hv_state_static = self.new_hv_state
5805 if self.op.disk_state:
5806 new_node.disk_state_static = self.new_disk_state
5808 # Add node to our /etc/hosts, and add key to known_hosts
5809 if self.cfg.GetClusterInfo().modify_etc_hosts:
5810 master_node = self.cfg.GetMasterNode()
5811 result = self.rpc.call_etc_hosts_modify(master_node,
5812 constants.ETC_HOSTS_ADD,
5815 result.Raise("Can't update hosts file with new host data")
5817 if new_node.secondary_ip != new_node.primary_ip:
5818 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5821 node_verify_list = [self.cfg.GetMasterNode()]
5822 node_verify_param = {
5823 constants.NV_NODELIST: ([node], {}),
5824 # TODO: do a node-net-test as well?
5827 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5828 self.cfg.GetClusterName())
5829 for verifier in node_verify_list:
5830 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5831 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5833 for failed in nl_payload:
5834 feedback_fn("ssh/hostname verification failed"
5835 " (checking from %s): %s" %
5836 (verifier, nl_payload[failed]))
5837 raise errors.OpExecError("ssh/hostname verification failed")
5840 _RedistributeAncillaryFiles(self)
5841 self.context.ReaddNode(new_node)
5842 # make sure we redistribute the config
5843 self.cfg.Update(new_node, feedback_fn)
5844 # and make sure the new node will not have old files around
5845 if not new_node.master_candidate:
5846 result = self.rpc.call_node_demote_from_mc(new_node.name)
5847 msg = result.fail_msg
5849 self.LogWarning("Node failed to demote itself from master"
5850 " candidate status: %s" % msg)
5852 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5853 additional_vm=self.op.vm_capable)
5854 self.context.AddNode(new_node, self.proc.GetECId())
5857 class LUNodeSetParams(LogicalUnit):
5858 """Modifies the parameters of a node.
5860 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5861 to the node role (as _ROLE_*)
5862 @cvar _R2F: a dictionary from node role to tuples of flags
5863 @cvar _FLAGS: a list of attribute names corresponding to the flags
5866 HPATH = "node-modify"
5867 HTYPE = constants.HTYPE_NODE
5869 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5871 (True, False, False): _ROLE_CANDIDATE,
5872 (False, True, False): _ROLE_DRAINED,
5873 (False, False, True): _ROLE_OFFLINE,
5874 (False, False, False): _ROLE_REGULAR,
5876 _R2F = dict((v, k) for k, v in _F2R.items())
5877 _FLAGS = ["master_candidate", "drained", "offline"]
5879 def CheckArguments(self):
5880 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5881 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5882 self.op.master_capable, self.op.vm_capable,
5883 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5885 if all_mods.count(None) == len(all_mods):
5886 raise errors.OpPrereqError("Please pass at least one modification",
5888 if all_mods.count(True) > 1:
5889 raise errors.OpPrereqError("Can't set the node into more than one"
5890 " state at the same time",
5893 # Boolean value that tells us whether we might be demoting from MC
5894 self.might_demote = (self.op.master_candidate is False or
5895 self.op.offline is True or
5896 self.op.drained is True or
5897 self.op.master_capable is False)
5899 if self.op.secondary_ip:
5900 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5901 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5902 " address" % self.op.secondary_ip,
5905 self.lock_all = self.op.auto_promote and self.might_demote
5906 self.lock_instances = self.op.secondary_ip is not None
5908 def _InstanceFilter(self, instance):
5909 """Filter for getting affected instances.
5912 return (instance.disk_template in constants.DTS_INT_MIRROR and
5913 self.op.node_name in instance.all_nodes)
5915 def ExpandNames(self):
5917 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5919 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5921 # Since modifying a node can have severe effects on currently running
5922 # operations the resource lock is at least acquired in shared mode
5923 self.needed_locks[locking.LEVEL_NODE_RES] = \
5924 self.needed_locks[locking.LEVEL_NODE]
5926 # Get node resource and instance locks in shared mode; they are not used
5927 # for anything but read-only access
5928 self.share_locks[locking.LEVEL_NODE_RES] = 1
5929 self.share_locks[locking.LEVEL_INSTANCE] = 1
5931 if self.lock_instances:
5932 self.needed_locks[locking.LEVEL_INSTANCE] = \
5933 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5935 def BuildHooksEnv(self):
5938 This runs on the master node.
5942 "OP_TARGET": self.op.node_name,
5943 "MASTER_CANDIDATE": str(self.op.master_candidate),
5944 "OFFLINE": str(self.op.offline),
5945 "DRAINED": str(self.op.drained),
5946 "MASTER_CAPABLE": str(self.op.master_capable),
5947 "VM_CAPABLE": str(self.op.vm_capable),
5950 def BuildHooksNodes(self):
5951 """Build hooks nodes.
5954 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5957 def CheckPrereq(self):
5958 """Check prerequisites.
5960 This only checks the instance list against the existing names.
5963 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5965 if self.lock_instances:
5966 affected_instances = \
5967 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5969 # Verify instance locks
5970 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5971 wanted_instances = frozenset(affected_instances.keys())
5972 if wanted_instances - owned_instances:
5973 raise errors.OpPrereqError("Instances affected by changing node %s's"
5974 " secondary IP address have changed since"
5975 " locks were acquired, wanted '%s', have"
5976 " '%s'; retry the operation" %
5978 utils.CommaJoin(wanted_instances),
5979 utils.CommaJoin(owned_instances)),
5982 affected_instances = None
5984 if (self.op.master_candidate is not None or
5985 self.op.drained is not None or
5986 self.op.offline is not None):
5987 # we can't change the master's node flags
5988 if self.op.node_name == self.cfg.GetMasterNode():
5989 raise errors.OpPrereqError("The master role can be changed"
5990 " only via master-failover",
5993 if self.op.master_candidate and not node.master_capable:
5994 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5995 " it a master candidate" % node.name,
5998 if self.op.vm_capable is False:
5999 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6001 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6002 " the vm_capable flag" % node.name,
6005 if node.master_candidate and self.might_demote and not self.lock_all:
6006 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6007 # check if after removing the current node, we're missing master
6009 (mc_remaining, mc_should, _) = \
6010 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6011 if mc_remaining < mc_should:
6012 raise errors.OpPrereqError("Not enough master candidates, please"
6013 " pass auto promote option to allow"
6014 " promotion (--auto-promote or RAPI"
6015 " auto_promote=True)", errors.ECODE_STATE)
6017 self.old_flags = old_flags = (node.master_candidate,
6018 node.drained, node.offline)
6019 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6020 self.old_role = old_role = self._F2R[old_flags]
6022 # Check for ineffective changes
6023 for attr in self._FLAGS:
6024 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6025 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6026 setattr(self.op, attr, None)
6028 # Past this point, any flag change to False means a transition
6029 # away from the respective state, as only real changes are kept
6031 # TODO: We might query the real power state if it supports OOB
6032 if _SupportsOob(self.cfg, node):
6033 if self.op.offline is False and not (node.powered or
6034 self.op.powered is True):
6035 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6036 " offline status can be reset") %
6037 self.op.node_name, errors.ECODE_STATE)
6038 elif self.op.powered is not None:
6039 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6040 " as it does not support out-of-band"
6041 " handling") % self.op.node_name,
6044 # If we're being deofflined/drained, we'll MC ourself if needed
6045 if (self.op.drained is False or self.op.offline is False or
6046 (self.op.master_capable and not node.master_capable)):
6047 if _DecideSelfPromotion(self):
6048 self.op.master_candidate = True
6049 self.LogInfo("Auto-promoting node to master candidate")
6051 # If we're no longer master capable, we'll demote ourselves from MC
6052 if self.op.master_capable is False and node.master_candidate:
6053 self.LogInfo("Demoting from master candidate")
6054 self.op.master_candidate = False
6057 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6058 if self.op.master_candidate:
6059 new_role = self._ROLE_CANDIDATE
6060 elif self.op.drained:
6061 new_role = self._ROLE_DRAINED
6062 elif self.op.offline:
6063 new_role = self._ROLE_OFFLINE
6064 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6065 # False is still in new flags, which means we're un-setting (the
6067 new_role = self._ROLE_REGULAR
6068 else: # no new flags, nothing, keep old role
6071 self.new_role = new_role
6073 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6074 # Trying to transition out of offline status
6075 result = self.rpc.call_version([node.name])[node.name]
6077 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6078 " to report its version: %s" %
6079 (node.name, result.fail_msg),
6082 self.LogWarning("Transitioning node from offline to online state"
6083 " without using re-add. Please make sure the node"
6086 # When changing the secondary ip, verify if this is a single-homed to
6087 # multi-homed transition or vice versa, and apply the relevant
6089 if self.op.secondary_ip:
6090 # Ok even without locking, because this can't be changed by any LU
6091 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6092 master_singlehomed = master.secondary_ip == master.primary_ip
6093 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6094 if self.op.force and node.name == master.name:
6095 self.LogWarning("Transitioning from single-homed to multi-homed"
6096 " cluster. All nodes will require a secondary ip.")
6098 raise errors.OpPrereqError("Changing the secondary ip on a"
6099 " single-homed cluster requires the"
6100 " --force option to be passed, and the"
6101 " target node to be the master",
6103 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6104 if self.op.force and node.name == master.name:
6105 self.LogWarning("Transitioning from multi-homed to single-homed"
6106 " cluster. Secondary IPs will have to be removed.")
6108 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6109 " same as the primary IP on a multi-homed"
6110 " cluster, unless the --force option is"
6111 " passed, and the target node is the"
6112 " master", errors.ECODE_INVAL)
6114 assert not (frozenset(affected_instances) -
6115 self.owned_locks(locking.LEVEL_INSTANCE))
6118 if affected_instances:
6119 msg = ("Cannot change secondary IP address: offline node has"
6120 " instances (%s) configured to use it" %
6121 utils.CommaJoin(affected_instances.keys()))
6122 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6124 # On online nodes, check that no instances are running, and that
6125 # the node has the new ip and we can reach it.
6126 for instance in affected_instances.values():
6127 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6128 msg="cannot change secondary ip")
6130 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6131 if master.name != node.name:
6132 # check reachability from master secondary ip to new secondary ip
6133 if not netutils.TcpPing(self.op.secondary_ip,
6134 constants.DEFAULT_NODED_PORT,
6135 source=master.secondary_ip):
6136 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6137 " based ping to node daemon port",
6138 errors.ECODE_ENVIRON)
6140 if self.op.ndparams:
6141 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6142 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6143 self.new_ndparams = new_ndparams
6145 if self.op.hv_state:
6146 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6147 self.node.hv_state_static)
6149 if self.op.disk_state:
6150 self.new_disk_state = \
6151 _MergeAndVerifyDiskState(self.op.disk_state,
6152 self.node.disk_state_static)
6154 def Exec(self, feedback_fn):
6159 old_role = self.old_role
6160 new_role = self.new_role
6164 if self.op.ndparams:
6165 node.ndparams = self.new_ndparams
6167 if self.op.powered is not None:
6168 node.powered = self.op.powered
6170 if self.op.hv_state:
6171 node.hv_state_static = self.new_hv_state
6173 if self.op.disk_state:
6174 node.disk_state_static = self.new_disk_state
6176 for attr in ["master_capable", "vm_capable"]:
6177 val = getattr(self.op, attr)
6179 setattr(node, attr, val)
6180 result.append((attr, str(val)))
6182 if new_role != old_role:
6183 # Tell the node to demote itself, if no longer MC and not offline
6184 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6185 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6187 self.LogWarning("Node failed to demote itself: %s", msg)
6189 new_flags = self._R2F[new_role]
6190 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6192 result.append((desc, str(nf)))
6193 (node.master_candidate, node.drained, node.offline) = new_flags
6195 # we locked all nodes, we adjust the CP before updating this node
6197 _AdjustCandidatePool(self, [node.name])
6199 if self.op.secondary_ip:
6200 node.secondary_ip = self.op.secondary_ip
6201 result.append(("secondary_ip", self.op.secondary_ip))
6203 # this will trigger configuration file update, if needed
6204 self.cfg.Update(node, feedback_fn)
6206 # this will trigger job queue propagation or cleanup if the mc
6208 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6209 self.context.ReaddNode(node)
6214 class LUNodePowercycle(NoHooksLU):
6215 """Powercycles a node.
6220 def CheckArguments(self):
6221 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6222 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6223 raise errors.OpPrereqError("The node is the master and the force"
6224 " parameter was not set",
6227 def ExpandNames(self):
6228 """Locking for PowercycleNode.
6230 This is a last-resort option and shouldn't block on other
6231 jobs. Therefore, we grab no locks.
6234 self.needed_locks = {}
6236 def Exec(self, feedback_fn):
6240 result = self.rpc.call_node_powercycle(self.op.node_name,
6241 self.cfg.GetHypervisorType())
6242 result.Raise("Failed to schedule the reboot")
6243 return result.payload
6246 class LUClusterQuery(NoHooksLU):
6247 """Query cluster configuration.
6252 def ExpandNames(self):
6253 self.needed_locks = {}
6255 def Exec(self, feedback_fn):
6256 """Return cluster config.
6259 cluster = self.cfg.GetClusterInfo()
6262 # Filter just for enabled hypervisors
6263 for os_name, hv_dict in cluster.os_hvp.items():
6264 os_hvp[os_name] = {}
6265 for hv_name, hv_params in hv_dict.items():
6266 if hv_name in cluster.enabled_hypervisors:
6267 os_hvp[os_name][hv_name] = hv_params
6269 # Convert ip_family to ip_version
6270 primary_ip_version = constants.IP4_VERSION
6271 if cluster.primary_ip_family == netutils.IP6Address.family:
6272 primary_ip_version = constants.IP6_VERSION
6275 "software_version": constants.RELEASE_VERSION,
6276 "protocol_version": constants.PROTOCOL_VERSION,
6277 "config_version": constants.CONFIG_VERSION,
6278 "os_api_version": max(constants.OS_API_VERSIONS),
6279 "export_version": constants.EXPORT_VERSION,
6280 "architecture": runtime.GetArchInfo(),
6281 "name": cluster.cluster_name,
6282 "master": cluster.master_node,
6283 "default_hypervisor": cluster.primary_hypervisor,
6284 "enabled_hypervisors": cluster.enabled_hypervisors,
6285 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6286 for hypervisor_name in cluster.enabled_hypervisors]),
6288 "beparams": cluster.beparams,
6289 "osparams": cluster.osparams,
6290 "ipolicy": cluster.ipolicy,
6291 "nicparams": cluster.nicparams,
6292 "ndparams": cluster.ndparams,
6293 "diskparams": cluster.diskparams,
6294 "candidate_pool_size": cluster.candidate_pool_size,
6295 "master_netdev": cluster.master_netdev,
6296 "master_netmask": cluster.master_netmask,
6297 "use_external_mip_script": cluster.use_external_mip_script,
6298 "volume_group_name": cluster.volume_group_name,
6299 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6300 "file_storage_dir": cluster.file_storage_dir,
6301 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6302 "maintain_node_health": cluster.maintain_node_health,
6303 "ctime": cluster.ctime,
6304 "mtime": cluster.mtime,
6305 "uuid": cluster.uuid,
6306 "tags": list(cluster.GetTags()),
6307 "uid_pool": cluster.uid_pool,
6308 "default_iallocator": cluster.default_iallocator,
6309 "reserved_lvs": cluster.reserved_lvs,
6310 "primary_ip_version": primary_ip_version,
6311 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6312 "hidden_os": cluster.hidden_os,
6313 "blacklisted_os": cluster.blacklisted_os,
6319 class LUClusterConfigQuery(NoHooksLU):
6320 """Return configuration values.
6325 def CheckArguments(self):
6326 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6328 def ExpandNames(self):
6329 self.cq.ExpandNames(self)
6331 def DeclareLocks(self, level):
6332 self.cq.DeclareLocks(self, level)
6334 def Exec(self, feedback_fn):
6335 result = self.cq.OldStyleQuery(self)
6337 assert len(result) == 1
6342 class _ClusterQuery(_QueryBase):
6343 FIELDS = query.CLUSTER_FIELDS
6345 #: Do not sort (there is only one item)
6348 def ExpandNames(self, lu):
6349 lu.needed_locks = {}
6351 # The following variables interact with _QueryBase._GetNames
6352 self.wanted = locking.ALL_SET
6353 self.do_locking = self.use_locking
6356 raise errors.OpPrereqError("Can not use locking for cluster queries",
6359 def DeclareLocks(self, lu, level):
6362 def _GetQueryData(self, lu):
6363 """Computes the list of nodes and their attributes.
6366 # Locking is not used
6367 assert not (compat.any(lu.glm.is_owned(level)
6368 for level in locking.LEVELS
6369 if level != locking.LEVEL_CLUSTER) or
6370 self.do_locking or self.use_locking)
6372 if query.CQ_CONFIG in self.requested_data:
6373 cluster = lu.cfg.GetClusterInfo()
6375 cluster = NotImplemented
6377 if query.CQ_QUEUE_DRAINED in self.requested_data:
6378 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6380 drain_flag = NotImplemented
6382 if query.CQ_WATCHER_PAUSE in self.requested_data:
6383 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6385 watcher_pause = NotImplemented
6387 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6390 class LUInstanceActivateDisks(NoHooksLU):
6391 """Bring up an instance's disks.
6396 def ExpandNames(self):
6397 self._ExpandAndLockInstance()
6398 self.needed_locks[locking.LEVEL_NODE] = []
6399 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6401 def DeclareLocks(self, level):
6402 if level == locking.LEVEL_NODE:
6403 self._LockInstancesNodes()
6405 def CheckPrereq(self):
6406 """Check prerequisites.
6408 This checks that the instance is in the cluster.
6411 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6412 assert self.instance is not None, \
6413 "Cannot retrieve locked instance %s" % self.op.instance_name
6414 _CheckNodeOnline(self, self.instance.primary_node)
6416 def Exec(self, feedback_fn):
6417 """Activate the disks.
6420 disks_ok, disks_info = \
6421 _AssembleInstanceDisks(self, self.instance,
6422 ignore_size=self.op.ignore_size)
6424 raise errors.OpExecError("Cannot activate block devices")
6426 if self.op.wait_for_sync:
6427 if not _WaitForSync(self, self.instance):
6428 raise errors.OpExecError("Some disks of the instance are degraded!")
6433 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6435 """Prepare the block devices for an instance.
6437 This sets up the block devices on all nodes.
6439 @type lu: L{LogicalUnit}
6440 @param lu: the logical unit on whose behalf we execute
6441 @type instance: L{objects.Instance}
6442 @param instance: the instance for whose disks we assemble
6443 @type disks: list of L{objects.Disk} or None
6444 @param disks: which disks to assemble (or all, if None)
6445 @type ignore_secondaries: boolean
6446 @param ignore_secondaries: if true, errors on secondary nodes
6447 won't result in an error return from the function
6448 @type ignore_size: boolean
6449 @param ignore_size: if true, the current known size of the disk
6450 will not be used during the disk activation, useful for cases
6451 when the size is wrong
6452 @return: False if the operation failed, otherwise a list of
6453 (host, instance_visible_name, node_visible_name)
6454 with the mapping from node devices to instance devices
6459 iname = instance.name
6460 disks = _ExpandCheckDisks(instance, disks)
6462 # With the two passes mechanism we try to reduce the window of
6463 # opportunity for the race condition of switching DRBD to primary
6464 # before handshaking occured, but we do not eliminate it
6466 # The proper fix would be to wait (with some limits) until the
6467 # connection has been made and drbd transitions from WFConnection
6468 # into any other network-connected state (Connected, SyncTarget,
6471 # 1st pass, assemble on all nodes in secondary mode
6472 for idx, inst_disk in enumerate(disks):
6473 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6475 node_disk = node_disk.Copy()
6476 node_disk.UnsetSize()
6477 lu.cfg.SetDiskID(node_disk, node)
6478 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6480 msg = result.fail_msg
6482 is_offline_secondary = (node in instance.secondary_nodes and
6484 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6485 " (is_primary=False, pass=1): %s",
6486 inst_disk.iv_name, node, msg)
6487 if not (ignore_secondaries or is_offline_secondary):
6490 # FIXME: race condition on drbd migration to primary
6492 # 2nd pass, do only the primary node
6493 for idx, inst_disk in enumerate(disks):
6496 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6497 if node != instance.primary_node:
6500 node_disk = node_disk.Copy()
6501 node_disk.UnsetSize()
6502 lu.cfg.SetDiskID(node_disk, node)
6503 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6505 msg = result.fail_msg
6507 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6508 " (is_primary=True, pass=2): %s",
6509 inst_disk.iv_name, node, msg)
6512 dev_path = result.payload
6514 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6516 # leave the disks configured for the primary node
6517 # this is a workaround that would be fixed better by
6518 # improving the logical/physical id handling
6520 lu.cfg.SetDiskID(disk, instance.primary_node)
6522 return disks_ok, device_info
6525 def _StartInstanceDisks(lu, instance, force):
6526 """Start the disks of an instance.
6529 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6530 ignore_secondaries=force)
6532 _ShutdownInstanceDisks(lu, instance)
6533 if force is not None and not force:
6534 lu.proc.LogWarning("", hint="If the message above refers to a"
6536 " you can retry the operation using '--force'.")
6537 raise errors.OpExecError("Disk consistency error")
6540 class LUInstanceDeactivateDisks(NoHooksLU):
6541 """Shutdown an instance's disks.
6546 def ExpandNames(self):
6547 self._ExpandAndLockInstance()
6548 self.needed_locks[locking.LEVEL_NODE] = []
6549 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6551 def DeclareLocks(self, level):
6552 if level == locking.LEVEL_NODE:
6553 self._LockInstancesNodes()
6555 def CheckPrereq(self):
6556 """Check prerequisites.
6558 This checks that the instance is in the cluster.
6561 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6562 assert self.instance is not None, \
6563 "Cannot retrieve locked instance %s" % self.op.instance_name
6565 def Exec(self, feedback_fn):
6566 """Deactivate the disks
6569 instance = self.instance
6571 _ShutdownInstanceDisks(self, instance)
6573 _SafeShutdownInstanceDisks(self, instance)
6576 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6577 """Shutdown block devices of an instance.
6579 This function checks if an instance is running, before calling
6580 _ShutdownInstanceDisks.
6583 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6584 _ShutdownInstanceDisks(lu, instance, disks=disks)
6587 def _ExpandCheckDisks(instance, disks):
6588 """Return the instance disks selected by the disks list
6590 @type disks: list of L{objects.Disk} or None
6591 @param disks: selected disks
6592 @rtype: list of L{objects.Disk}
6593 @return: selected instance disks to act on
6597 return instance.disks
6599 if not set(disks).issubset(instance.disks):
6600 raise errors.ProgrammerError("Can only act on disks belonging to the"
6605 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6606 """Shutdown block devices of an instance.
6608 This does the shutdown on all nodes of the instance.
6610 If the ignore_primary is false, errors on the primary node are
6615 disks = _ExpandCheckDisks(instance, disks)
6618 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6619 lu.cfg.SetDiskID(top_disk, node)
6620 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6621 msg = result.fail_msg
6623 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6624 disk.iv_name, node, msg)
6625 if ((node == instance.primary_node and not ignore_primary) or
6626 (node != instance.primary_node and not result.offline)):
6631 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6632 """Checks if a node has enough free memory.
6634 This function check if a given node has the needed amount of free
6635 memory. In case the node has less memory or we cannot get the
6636 information from the node, this function raise an OpPrereqError
6639 @type lu: C{LogicalUnit}
6640 @param lu: a logical unit from which we get configuration data
6642 @param node: the node to check
6643 @type reason: C{str}
6644 @param reason: string to use in the error message
6645 @type requested: C{int}
6646 @param requested: the amount of memory in MiB to check for
6647 @type hypervisor_name: C{str}
6648 @param hypervisor_name: the hypervisor to ask for memory stats
6650 @return: node current free memory
6651 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6652 we cannot check the node
6655 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6656 nodeinfo[node].Raise("Can't get data from node %s" % node,
6657 prereq=True, ecode=errors.ECODE_ENVIRON)
6658 (_, _, (hv_info, )) = nodeinfo[node].payload
6660 free_mem = hv_info.get("memory_free", None)
6661 if not isinstance(free_mem, int):
6662 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6663 " was '%s'" % (node, free_mem),
6664 errors.ECODE_ENVIRON)
6665 if requested > free_mem:
6666 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6667 " needed %s MiB, available %s MiB" %
6668 (node, reason, requested, free_mem),
6673 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6674 """Checks if nodes have enough free disk space in the all VGs.
6676 This function check if all given nodes have the needed amount of
6677 free disk. In case any node has less disk or we cannot get the
6678 information from the node, this function raise an OpPrereqError
6681 @type lu: C{LogicalUnit}
6682 @param lu: a logical unit from which we get configuration data
6683 @type nodenames: C{list}
6684 @param nodenames: the list of node names to check
6685 @type req_sizes: C{dict}
6686 @param req_sizes: the hash of vg and corresponding amount of disk in
6688 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6689 or we cannot check the node
6692 for vg, req_size in req_sizes.items():
6693 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6696 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6697 """Checks if nodes have enough free disk space in the specified VG.
6699 This function check if all given nodes have the needed amount of
6700 free disk. In case any node has less disk or we cannot get the
6701 information from the node, this function raise an OpPrereqError
6704 @type lu: C{LogicalUnit}
6705 @param lu: a logical unit from which we get configuration data
6706 @type nodenames: C{list}
6707 @param nodenames: the list of node names to check
6709 @param vg: the volume group to check
6710 @type requested: C{int}
6711 @param requested: the amount of disk in MiB to check for
6712 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6713 or we cannot check the node
6716 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6717 for node in nodenames:
6718 info = nodeinfo[node]
6719 info.Raise("Cannot get current information from node %s" % node,
6720 prereq=True, ecode=errors.ECODE_ENVIRON)
6721 (_, (vg_info, ), _) = info.payload
6722 vg_free = vg_info.get("vg_free", None)
6723 if not isinstance(vg_free, int):
6724 raise errors.OpPrereqError("Can't compute free disk space on node"
6725 " %s for vg %s, result was '%s'" %
6726 (node, vg, vg_free), errors.ECODE_ENVIRON)
6727 if requested > vg_free:
6728 raise errors.OpPrereqError("Not enough disk space on target node %s"
6729 " vg %s: required %d MiB, available %d MiB" %
6730 (node, vg, requested, vg_free),
6734 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6735 """Checks if nodes have enough physical CPUs
6737 This function checks if all given nodes have the needed number of
6738 physical CPUs. In case any node has less CPUs or we cannot get the
6739 information from the node, this function raises an OpPrereqError
6742 @type lu: C{LogicalUnit}
6743 @param lu: a logical unit from which we get configuration data
6744 @type nodenames: C{list}
6745 @param nodenames: the list of node names to check
6746 @type requested: C{int}
6747 @param requested: the minimum acceptable number of physical CPUs
6748 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6749 or we cannot check the node
6752 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6753 for node in nodenames:
6754 info = nodeinfo[node]
6755 info.Raise("Cannot get current information from node %s" % node,
6756 prereq=True, ecode=errors.ECODE_ENVIRON)
6757 (_, _, (hv_info, )) = info.payload
6758 num_cpus = hv_info.get("cpu_total", None)
6759 if not isinstance(num_cpus, int):
6760 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6761 " on node %s, result was '%s'" %
6762 (node, num_cpus), errors.ECODE_ENVIRON)
6763 if requested > num_cpus:
6764 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6765 "required" % (node, num_cpus, requested),
6769 class LUInstanceStartup(LogicalUnit):
6770 """Starts an instance.
6773 HPATH = "instance-start"
6774 HTYPE = constants.HTYPE_INSTANCE
6777 def CheckArguments(self):
6779 if self.op.beparams:
6780 # fill the beparams dict
6781 objects.UpgradeBeParams(self.op.beparams)
6782 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6784 def ExpandNames(self):
6785 self._ExpandAndLockInstance()
6786 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6788 def DeclareLocks(self, level):
6789 if level == locking.LEVEL_NODE_RES:
6790 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6792 def BuildHooksEnv(self):
6795 This runs on master, primary and secondary nodes of the instance.
6799 "FORCE": self.op.force,
6802 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6806 def BuildHooksNodes(self):
6807 """Build hooks nodes.
6810 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6813 def CheckPrereq(self):
6814 """Check prerequisites.
6816 This checks that the instance is in the cluster.
6819 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6820 assert self.instance is not None, \
6821 "Cannot retrieve locked instance %s" % self.op.instance_name
6824 if self.op.hvparams:
6825 # check hypervisor parameter syntax (locally)
6826 cluster = self.cfg.GetClusterInfo()
6827 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6828 filled_hvp = cluster.FillHV(instance)
6829 filled_hvp.update(self.op.hvparams)
6830 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6831 hv_type.CheckParameterSyntax(filled_hvp)
6832 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6834 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6836 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6838 if self.primary_offline and self.op.ignore_offline_nodes:
6839 self.proc.LogWarning("Ignoring offline primary node")
6841 if self.op.hvparams or self.op.beparams:
6842 self.proc.LogWarning("Overridden parameters are ignored")
6844 _CheckNodeOnline(self, instance.primary_node)
6846 bep = self.cfg.GetClusterInfo().FillBE(instance)
6847 bep.update(self.op.beparams)
6849 # check bridges existence
6850 _CheckInstanceBridgesExist(self, instance)
6852 remote_info = self.rpc.call_instance_info(instance.primary_node,
6854 instance.hypervisor)
6855 remote_info.Raise("Error checking node %s" % instance.primary_node,
6856 prereq=True, ecode=errors.ECODE_ENVIRON)
6857 if not remote_info.payload: # not running already
6858 _CheckNodeFreeMemory(self, instance.primary_node,
6859 "starting instance %s" % instance.name,
6860 bep[constants.BE_MINMEM], instance.hypervisor)
6862 def Exec(self, feedback_fn):
6863 """Start the instance.
6866 instance = self.instance
6867 force = self.op.force
6869 if not self.op.no_remember:
6870 self.cfg.MarkInstanceUp(instance.name)
6872 if self.primary_offline:
6873 assert self.op.ignore_offline_nodes
6874 self.proc.LogInfo("Primary node offline, marked instance as started")
6876 node_current = instance.primary_node
6878 _StartInstanceDisks(self, instance, force)
6881 self.rpc.call_instance_start(node_current,
6882 (instance, self.op.hvparams,
6884 self.op.startup_paused)
6885 msg = result.fail_msg
6887 _ShutdownInstanceDisks(self, instance)
6888 raise errors.OpExecError("Could not start instance: %s" % msg)
6891 class LUInstanceReboot(LogicalUnit):
6892 """Reboot an instance.
6895 HPATH = "instance-reboot"
6896 HTYPE = constants.HTYPE_INSTANCE
6899 def ExpandNames(self):
6900 self._ExpandAndLockInstance()
6902 def BuildHooksEnv(self):
6905 This runs on master, primary and secondary nodes of the instance.
6909 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6910 "REBOOT_TYPE": self.op.reboot_type,
6911 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6914 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6918 def BuildHooksNodes(self):
6919 """Build hooks nodes.
6922 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6925 def CheckPrereq(self):
6926 """Check prerequisites.
6928 This checks that the instance is in the cluster.
6931 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6932 assert self.instance is not None, \
6933 "Cannot retrieve locked instance %s" % self.op.instance_name
6934 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6935 _CheckNodeOnline(self, instance.primary_node)
6937 # check bridges existence
6938 _CheckInstanceBridgesExist(self, instance)
6940 def Exec(self, feedback_fn):
6941 """Reboot the instance.
6944 instance = self.instance
6945 ignore_secondaries = self.op.ignore_secondaries
6946 reboot_type = self.op.reboot_type
6948 remote_info = self.rpc.call_instance_info(instance.primary_node,
6950 instance.hypervisor)
6951 remote_info.Raise("Error checking node %s" % instance.primary_node)
6952 instance_running = bool(remote_info.payload)
6954 node_current = instance.primary_node
6956 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6957 constants.INSTANCE_REBOOT_HARD]:
6958 for disk in instance.disks:
6959 self.cfg.SetDiskID(disk, node_current)
6960 result = self.rpc.call_instance_reboot(node_current, instance,
6962 self.op.shutdown_timeout)
6963 result.Raise("Could not reboot instance")
6965 if instance_running:
6966 result = self.rpc.call_instance_shutdown(node_current, instance,
6967 self.op.shutdown_timeout)
6968 result.Raise("Could not shutdown instance for full reboot")
6969 _ShutdownInstanceDisks(self, instance)
6971 self.LogInfo("Instance %s was already stopped, starting now",
6973 _StartInstanceDisks(self, instance, ignore_secondaries)
6974 result = self.rpc.call_instance_start(node_current,
6975 (instance, None, None), False)
6976 msg = result.fail_msg
6978 _ShutdownInstanceDisks(self, instance)
6979 raise errors.OpExecError("Could not start instance for"
6980 " full reboot: %s" % msg)
6982 self.cfg.MarkInstanceUp(instance.name)
6985 class LUInstanceShutdown(LogicalUnit):
6986 """Shutdown an instance.
6989 HPATH = "instance-stop"
6990 HTYPE = constants.HTYPE_INSTANCE
6993 def ExpandNames(self):
6994 self._ExpandAndLockInstance()
6996 def BuildHooksEnv(self):
6999 This runs on master, primary and secondary nodes of the instance.
7002 env = _BuildInstanceHookEnvByObject(self, self.instance)
7003 env["TIMEOUT"] = self.op.timeout
7006 def BuildHooksNodes(self):
7007 """Build hooks nodes.
7010 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7013 def CheckPrereq(self):
7014 """Check prerequisites.
7016 This checks that the instance is in the cluster.
7019 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7020 assert self.instance is not None, \
7021 "Cannot retrieve locked instance %s" % self.op.instance_name
7023 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7025 self.primary_offline = \
7026 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7028 if self.primary_offline and self.op.ignore_offline_nodes:
7029 self.proc.LogWarning("Ignoring offline primary node")
7031 _CheckNodeOnline(self, self.instance.primary_node)
7033 def Exec(self, feedback_fn):
7034 """Shutdown the instance.
7037 instance = self.instance
7038 node_current = instance.primary_node
7039 timeout = self.op.timeout
7041 if not self.op.no_remember:
7042 self.cfg.MarkInstanceDown(instance.name)
7044 if self.primary_offline:
7045 assert self.op.ignore_offline_nodes
7046 self.proc.LogInfo("Primary node offline, marked instance as stopped")
7048 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7049 msg = result.fail_msg
7051 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7053 _ShutdownInstanceDisks(self, instance)
7056 class LUInstanceReinstall(LogicalUnit):
7057 """Reinstall an instance.
7060 HPATH = "instance-reinstall"
7061 HTYPE = constants.HTYPE_INSTANCE
7064 def ExpandNames(self):
7065 self._ExpandAndLockInstance()
7067 def BuildHooksEnv(self):
7070 This runs on master, primary and secondary nodes of the instance.
7073 return _BuildInstanceHookEnvByObject(self, self.instance)
7075 def BuildHooksNodes(self):
7076 """Build hooks nodes.
7079 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7082 def CheckPrereq(self):
7083 """Check prerequisites.
7085 This checks that the instance is in the cluster and is not running.
7088 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7089 assert instance is not None, \
7090 "Cannot retrieve locked instance %s" % self.op.instance_name
7091 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7092 " offline, cannot reinstall")
7094 if instance.disk_template == constants.DT_DISKLESS:
7095 raise errors.OpPrereqError("Instance '%s' has no disks" %
7096 self.op.instance_name,
7098 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7100 if self.op.os_type is not None:
7102 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7103 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7104 instance_os = self.op.os_type
7106 instance_os = instance.os
7108 nodelist = list(instance.all_nodes)
7110 if self.op.osparams:
7111 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7112 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7113 self.os_inst = i_osdict # the new dict (without defaults)
7117 self.instance = instance
7119 def Exec(self, feedback_fn):
7120 """Reinstall the instance.
7123 inst = self.instance
7125 if self.op.os_type is not None:
7126 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7127 inst.os = self.op.os_type
7128 # Write to configuration
7129 self.cfg.Update(inst, feedback_fn)
7131 _StartInstanceDisks(self, inst, None)
7133 feedback_fn("Running the instance OS create scripts...")
7134 # FIXME: pass debug option from opcode to backend
7135 result = self.rpc.call_instance_os_add(inst.primary_node,
7136 (inst, self.os_inst), True,
7137 self.op.debug_level)
7138 result.Raise("Could not install OS for instance %s on node %s" %
7139 (inst.name, inst.primary_node))
7141 _ShutdownInstanceDisks(self, inst)
7144 class LUInstanceRecreateDisks(LogicalUnit):
7145 """Recreate an instance's missing disks.
7148 HPATH = "instance-recreate-disks"
7149 HTYPE = constants.HTYPE_INSTANCE
7152 _MODIFYABLE = frozenset([
7153 constants.IDISK_SIZE,
7154 constants.IDISK_MODE,
7157 # New or changed disk parameters may have different semantics
7158 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7159 constants.IDISK_ADOPT,
7161 # TODO: Implement support changing VG while recreating
7163 constants.IDISK_METAVG,
7166 def _RunAllocator(self):
7167 """Run the allocator based on input opcode.
7170 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7173 # The allocator should actually run in "relocate" mode, but current
7174 # allocators don't support relocating all the nodes of an instance at
7175 # the same time. As a workaround we use "allocate" mode, but this is
7176 # suboptimal for two reasons:
7177 # - The instance name passed to the allocator is present in the list of
7178 # existing instances, so there could be a conflict within the
7179 # internal structures of the allocator. This doesn't happen with the
7180 # current allocators, but it's a liability.
7181 # - The allocator counts the resources used by the instance twice: once
7182 # because the instance exists already, and once because it tries to
7183 # allocate a new instance.
7184 # The allocator could choose some of the nodes on which the instance is
7185 # running, but that's not a problem. If the instance nodes are broken,
7186 # they should be already be marked as drained or offline, and hence
7187 # skipped by the allocator. If instance disks have been lost for other
7188 # reasons, then recreating the disks on the same nodes should be fine.
7189 disk_template = self.instance.disk_template
7190 spindle_use = be_full[constants.BE_SPINDLE_USE]
7191 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7192 disk_template=disk_template,
7193 tags=list(self.instance.GetTags()),
7194 os=self.instance.os,
7196 vcpus=be_full[constants.BE_VCPUS],
7197 memory=be_full[constants.BE_MAXMEM],
7198 spindle_use=spindle_use,
7199 disks=[{constants.IDISK_SIZE: d.size,
7200 constants.IDISK_MODE: d.mode}
7201 for d in self.instance.disks],
7202 hypervisor=self.instance.hypervisor)
7203 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7205 ial.Run(self.op.iallocator)
7207 assert req.RequiredNodes() == len(self.instance.all_nodes)
7210 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7211 " %s" % (self.op.iallocator, ial.info),
7214 self.op.nodes = ial.result
7215 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7216 self.op.instance_name, self.op.iallocator,
7217 utils.CommaJoin(ial.result))
7219 def CheckArguments(self):
7220 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7221 # Normalize and convert deprecated list of disk indices
7222 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7224 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7226 raise errors.OpPrereqError("Some disks have been specified more than"
7227 " once: %s" % utils.CommaJoin(duplicates),
7230 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7231 # when neither iallocator nor nodes are specified
7232 if self.op.iallocator or self.op.nodes:
7233 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7235 for (idx, params) in self.op.disks:
7236 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7237 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7239 raise errors.OpPrereqError("Parameters for disk %s try to change"
7240 " unmodifyable parameter(s): %s" %
7241 (idx, utils.CommaJoin(unsupported)),
7244 def ExpandNames(self):
7245 self._ExpandAndLockInstance()
7246 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7248 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7249 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7251 self.needed_locks[locking.LEVEL_NODE] = []
7252 if self.op.iallocator:
7253 # iallocator will select a new node in the same group
7254 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7255 self.needed_locks[locking.LEVEL_NODE_RES] = []
7257 def DeclareLocks(self, level):
7258 if level == locking.LEVEL_NODEGROUP:
7259 assert self.op.iallocator is not None
7260 assert not self.op.nodes
7261 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7262 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7263 # Lock the primary group used by the instance optimistically; this
7264 # requires going via the node before it's locked, requiring
7265 # verification later on
7266 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7267 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7269 elif level == locking.LEVEL_NODE:
7270 # If an allocator is used, then we lock all the nodes in the current
7271 # instance group, as we don't know yet which ones will be selected;
7272 # if we replace the nodes without using an allocator, locks are
7273 # already declared in ExpandNames; otherwise, we need to lock all the
7274 # instance nodes for disk re-creation
7275 if self.op.iallocator:
7276 assert not self.op.nodes
7277 assert not self.needed_locks[locking.LEVEL_NODE]
7278 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7280 # Lock member nodes of the group of the primary node
7281 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7282 self.needed_locks[locking.LEVEL_NODE].extend(
7283 self.cfg.GetNodeGroup(group_uuid).members)
7284 elif not self.op.nodes:
7285 self._LockInstancesNodes(primary_only=False)
7286 elif level == locking.LEVEL_NODE_RES:
7288 self.needed_locks[locking.LEVEL_NODE_RES] = \
7289 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7291 def BuildHooksEnv(self):
7294 This runs on master, primary and secondary nodes of the instance.
7297 return _BuildInstanceHookEnvByObject(self, self.instance)
7299 def BuildHooksNodes(self):
7300 """Build hooks nodes.
7303 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7306 def CheckPrereq(self):
7307 """Check prerequisites.
7309 This checks that the instance is in the cluster and is not running.
7312 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7313 assert instance is not None, \
7314 "Cannot retrieve locked instance %s" % self.op.instance_name
7316 if len(self.op.nodes) != len(instance.all_nodes):
7317 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7318 " %d replacement nodes were specified" %
7319 (instance.name, len(instance.all_nodes),
7320 len(self.op.nodes)),
7322 assert instance.disk_template != constants.DT_DRBD8 or \
7323 len(self.op.nodes) == 2
7324 assert instance.disk_template != constants.DT_PLAIN or \
7325 len(self.op.nodes) == 1
7326 primary_node = self.op.nodes[0]
7328 primary_node = instance.primary_node
7329 if not self.op.iallocator:
7330 _CheckNodeOnline(self, primary_node)
7332 if instance.disk_template == constants.DT_DISKLESS:
7333 raise errors.OpPrereqError("Instance '%s' has no disks" %
7334 self.op.instance_name, errors.ECODE_INVAL)
7336 # Verify if node group locks are still correct
7337 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7339 # Node group locks are acquired only for the primary node (and only
7340 # when the allocator is used)
7341 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7344 # if we replace nodes *and* the old primary is offline, we don't
7345 # check the instance state
7346 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7347 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7348 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7349 msg="cannot recreate disks")
7352 self.disks = dict(self.op.disks)
7354 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7356 maxidx = max(self.disks.keys())
7357 if maxidx >= len(instance.disks):
7358 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7361 if ((self.op.nodes or self.op.iallocator) and
7362 sorted(self.disks.keys()) != range(len(instance.disks))):
7363 raise errors.OpPrereqError("Can't recreate disks partially and"
7364 " change the nodes at the same time",
7367 self.instance = instance
7369 if self.op.iallocator:
7370 self._RunAllocator()
7371 # Release unneeded node and node resource locks
7372 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7373 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7375 def Exec(self, feedback_fn):
7376 """Recreate the disks.
7379 instance = self.instance
7381 assert (self.owned_locks(locking.LEVEL_NODE) ==
7382 self.owned_locks(locking.LEVEL_NODE_RES))
7385 mods = [] # keeps track of needed changes
7387 for idx, disk in enumerate(instance.disks):
7389 changes = self.disks[idx]
7391 # Disk should not be recreated
7395 # update secondaries for disks, if needed
7396 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7397 # need to update the nodes and minors
7398 assert len(self.op.nodes) == 2
7399 assert len(disk.logical_id) == 6 # otherwise disk internals
7401 (_, _, old_port, _, _, old_secret) = disk.logical_id
7402 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7403 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7404 new_minors[0], new_minors[1], old_secret)
7405 assert len(disk.logical_id) == len(new_id)
7409 mods.append((idx, new_id, changes))
7411 # now that we have passed all asserts above, we can apply the mods
7412 # in a single run (to avoid partial changes)
7413 for idx, new_id, changes in mods:
7414 disk = instance.disks[idx]
7415 if new_id is not None:
7416 assert disk.dev_type == constants.LD_DRBD8
7417 disk.logical_id = new_id
7419 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7420 mode=changes.get(constants.IDISK_MODE, None))
7422 # change primary node, if needed
7424 instance.primary_node = self.op.nodes[0]
7425 self.LogWarning("Changing the instance's nodes, you will have to"
7426 " remove any disks left on the older nodes manually")
7429 self.cfg.Update(instance, feedback_fn)
7431 # All touched nodes must be locked
7432 mylocks = self.owned_locks(locking.LEVEL_NODE)
7433 assert mylocks.issuperset(frozenset(instance.all_nodes))
7434 _CreateDisks(self, instance, to_skip=to_skip)
7437 class LUInstanceRename(LogicalUnit):
7438 """Rename an instance.
7441 HPATH = "instance-rename"
7442 HTYPE = constants.HTYPE_INSTANCE
7444 def CheckArguments(self):
7448 if self.op.ip_check and not self.op.name_check:
7449 # TODO: make the ip check more flexible and not depend on the name check
7450 raise errors.OpPrereqError("IP address check requires a name check",
7453 def BuildHooksEnv(self):
7456 This runs on master, primary and secondary nodes of the instance.
7459 env = _BuildInstanceHookEnvByObject(self, self.instance)
7460 env["INSTANCE_NEW_NAME"] = self.op.new_name
7463 def BuildHooksNodes(self):
7464 """Build hooks nodes.
7467 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7470 def CheckPrereq(self):
7471 """Check prerequisites.
7473 This checks that the instance is in the cluster and is not running.
7476 self.op.instance_name = _ExpandInstanceName(self.cfg,
7477 self.op.instance_name)
7478 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7479 assert instance is not None
7480 _CheckNodeOnline(self, instance.primary_node)
7481 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7482 msg="cannot rename")
7483 self.instance = instance
7485 new_name = self.op.new_name
7486 if self.op.name_check:
7487 hostname = _CheckHostnameSane(self, new_name)
7488 new_name = self.op.new_name = hostname.name
7489 if (self.op.ip_check and
7490 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7491 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7492 (hostname.ip, new_name),
7493 errors.ECODE_NOTUNIQUE)
7495 instance_list = self.cfg.GetInstanceList()
7496 if new_name in instance_list and new_name != instance.name:
7497 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7498 new_name, errors.ECODE_EXISTS)
7500 def Exec(self, feedback_fn):
7501 """Rename the instance.
7504 inst = self.instance
7505 old_name = inst.name
7507 rename_file_storage = False
7508 if (inst.disk_template in constants.DTS_FILEBASED and
7509 self.op.new_name != inst.name):
7510 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7511 rename_file_storage = True
7513 self.cfg.RenameInstance(inst.name, self.op.new_name)
7514 # Change the instance lock. This is definitely safe while we hold the BGL.
7515 # Otherwise the new lock would have to be added in acquired mode.
7517 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7518 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7520 # re-read the instance from the configuration after rename
7521 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7523 if rename_file_storage:
7524 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7525 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7526 old_file_storage_dir,
7527 new_file_storage_dir)
7528 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7529 " (but the instance has been renamed in Ganeti)" %
7530 (inst.primary_node, old_file_storage_dir,
7531 new_file_storage_dir))
7533 _StartInstanceDisks(self, inst, None)
7534 # update info on disks
7535 info = _GetInstanceInfoText(inst)
7536 for (idx, disk) in enumerate(inst.disks):
7537 for node in inst.all_nodes:
7538 self.cfg.SetDiskID(disk, node)
7539 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7541 self.LogWarning("Error setting info on node %s for disk %s: %s",
7542 node, idx, result.fail_msg)
7544 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7545 old_name, self.op.debug_level)
7546 msg = result.fail_msg
7548 msg = ("Could not run OS rename script for instance %s on node %s"
7549 " (but the instance has been renamed in Ganeti): %s" %
7550 (inst.name, inst.primary_node, msg))
7551 self.proc.LogWarning(msg)
7553 _ShutdownInstanceDisks(self, inst)
7558 class LUInstanceRemove(LogicalUnit):
7559 """Remove an instance.
7562 HPATH = "instance-remove"
7563 HTYPE = constants.HTYPE_INSTANCE
7566 def ExpandNames(self):
7567 self._ExpandAndLockInstance()
7568 self.needed_locks[locking.LEVEL_NODE] = []
7569 self.needed_locks[locking.LEVEL_NODE_RES] = []
7570 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7572 def DeclareLocks(self, level):
7573 if level == locking.LEVEL_NODE:
7574 self._LockInstancesNodes()
7575 elif level == locking.LEVEL_NODE_RES:
7577 self.needed_locks[locking.LEVEL_NODE_RES] = \
7578 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7580 def BuildHooksEnv(self):
7583 This runs on master, primary and secondary nodes of the instance.
7586 env = _BuildInstanceHookEnvByObject(self, self.instance)
7587 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7590 def BuildHooksNodes(self):
7591 """Build hooks nodes.
7594 nl = [self.cfg.GetMasterNode()]
7595 nl_post = list(self.instance.all_nodes) + nl
7596 return (nl, nl_post)
7598 def CheckPrereq(self):
7599 """Check prerequisites.
7601 This checks that the instance is in the cluster.
7604 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7605 assert self.instance is not None, \
7606 "Cannot retrieve locked instance %s" % self.op.instance_name
7608 def Exec(self, feedback_fn):
7609 """Remove the instance.
7612 instance = self.instance
7613 logging.info("Shutting down instance %s on node %s",
7614 instance.name, instance.primary_node)
7616 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7617 self.op.shutdown_timeout)
7618 msg = result.fail_msg
7620 if self.op.ignore_failures:
7621 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7623 raise errors.OpExecError("Could not shutdown instance %s on"
7625 (instance.name, instance.primary_node, msg))
7627 assert (self.owned_locks(locking.LEVEL_NODE) ==
7628 self.owned_locks(locking.LEVEL_NODE_RES))
7629 assert not (set(instance.all_nodes) -
7630 self.owned_locks(locking.LEVEL_NODE)), \
7631 "Not owning correct locks"
7633 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7636 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7637 """Utility function to remove an instance.
7640 logging.info("Removing block devices for instance %s", instance.name)
7642 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7643 if not ignore_failures:
7644 raise errors.OpExecError("Can't remove instance's disks")
7645 feedback_fn("Warning: can't remove instance's disks")
7647 logging.info("Removing instance %s out of cluster config", instance.name)
7649 lu.cfg.RemoveInstance(instance.name)
7651 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7652 "Instance lock removal conflict"
7654 # Remove lock for the instance
7655 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7658 class LUInstanceQuery(NoHooksLU):
7659 """Logical unit for querying instances.
7662 # pylint: disable=W0142
7665 def CheckArguments(self):
7666 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7667 self.op.output_fields, self.op.use_locking)
7669 def ExpandNames(self):
7670 self.iq.ExpandNames(self)
7672 def DeclareLocks(self, level):
7673 self.iq.DeclareLocks(self, level)
7675 def Exec(self, feedback_fn):
7676 return self.iq.OldStyleQuery(self)
7679 class LUInstanceFailover(LogicalUnit):
7680 """Failover an instance.
7683 HPATH = "instance-failover"
7684 HTYPE = constants.HTYPE_INSTANCE
7687 def CheckArguments(self):
7688 """Check the arguments.
7691 self.iallocator = getattr(self.op, "iallocator", None)
7692 self.target_node = getattr(self.op, "target_node", None)
7694 def ExpandNames(self):
7695 self._ExpandAndLockInstance()
7697 if self.op.target_node is not None:
7698 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7700 self.needed_locks[locking.LEVEL_NODE] = []
7701 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7703 self.needed_locks[locking.LEVEL_NODE_RES] = []
7704 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7706 ignore_consistency = self.op.ignore_consistency
7707 shutdown_timeout = self.op.shutdown_timeout
7708 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7711 ignore_consistency=ignore_consistency,
7712 shutdown_timeout=shutdown_timeout,
7713 ignore_ipolicy=self.op.ignore_ipolicy)
7714 self.tasklets = [self._migrater]
7716 def DeclareLocks(self, level):
7717 if level == locking.LEVEL_NODE:
7718 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7719 if instance.disk_template in constants.DTS_EXT_MIRROR:
7720 if self.op.target_node is None:
7721 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7723 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7724 self.op.target_node]
7725 del self.recalculate_locks[locking.LEVEL_NODE]
7727 self._LockInstancesNodes()
7728 elif level == locking.LEVEL_NODE_RES:
7730 self.needed_locks[locking.LEVEL_NODE_RES] = \
7731 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7733 def BuildHooksEnv(self):
7736 This runs on master, primary and secondary nodes of the instance.
7739 instance = self._migrater.instance
7740 source_node = instance.primary_node
7741 target_node = self.op.target_node
7743 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7744 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7745 "OLD_PRIMARY": source_node,
7746 "NEW_PRIMARY": target_node,
7749 if instance.disk_template in constants.DTS_INT_MIRROR:
7750 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7751 env["NEW_SECONDARY"] = source_node
7753 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7755 env.update(_BuildInstanceHookEnvByObject(self, instance))
7759 def BuildHooksNodes(self):
7760 """Build hooks nodes.
7763 instance = self._migrater.instance
7764 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7765 return (nl, nl + [instance.primary_node])
7768 class LUInstanceMigrate(LogicalUnit):
7769 """Migrate an instance.
7771 This is migration without shutting down, compared to the failover,
7772 which is done with shutdown.
7775 HPATH = "instance-migrate"
7776 HTYPE = constants.HTYPE_INSTANCE
7779 def ExpandNames(self):
7780 self._ExpandAndLockInstance()
7782 if self.op.target_node is not None:
7783 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7785 self.needed_locks[locking.LEVEL_NODE] = []
7786 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7788 self.needed_locks[locking.LEVEL_NODE] = []
7789 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7792 TLMigrateInstance(self, self.op.instance_name,
7793 cleanup=self.op.cleanup,
7795 fallback=self.op.allow_failover,
7796 allow_runtime_changes=self.op.allow_runtime_changes,
7797 ignore_ipolicy=self.op.ignore_ipolicy)
7798 self.tasklets = [self._migrater]
7800 def DeclareLocks(self, level):
7801 if level == locking.LEVEL_NODE:
7802 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7803 if instance.disk_template in constants.DTS_EXT_MIRROR:
7804 if self.op.target_node is None:
7805 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7807 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7808 self.op.target_node]
7809 del self.recalculate_locks[locking.LEVEL_NODE]
7811 self._LockInstancesNodes()
7812 elif level == locking.LEVEL_NODE_RES:
7814 self.needed_locks[locking.LEVEL_NODE_RES] = \
7815 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7817 def BuildHooksEnv(self):
7820 This runs on master, primary and secondary nodes of the instance.
7823 instance = self._migrater.instance
7824 source_node = instance.primary_node
7825 target_node = self.op.target_node
7826 env = _BuildInstanceHookEnvByObject(self, instance)
7828 "MIGRATE_LIVE": self._migrater.live,
7829 "MIGRATE_CLEANUP": self.op.cleanup,
7830 "OLD_PRIMARY": source_node,
7831 "NEW_PRIMARY": target_node,
7832 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7835 if instance.disk_template in constants.DTS_INT_MIRROR:
7836 env["OLD_SECONDARY"] = target_node
7837 env["NEW_SECONDARY"] = source_node
7839 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7843 def BuildHooksNodes(self):
7844 """Build hooks nodes.
7847 instance = self._migrater.instance
7848 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7849 return (nl, nl + [instance.primary_node])
7852 class LUInstanceMove(LogicalUnit):
7853 """Move an instance by data-copying.
7856 HPATH = "instance-move"
7857 HTYPE = constants.HTYPE_INSTANCE
7860 def ExpandNames(self):
7861 self._ExpandAndLockInstance()
7862 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7863 self.op.target_node = target_node
7864 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7865 self.needed_locks[locking.LEVEL_NODE_RES] = []
7866 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7868 def DeclareLocks(self, level):
7869 if level == locking.LEVEL_NODE:
7870 self._LockInstancesNodes(primary_only=True)
7871 elif level == locking.LEVEL_NODE_RES:
7873 self.needed_locks[locking.LEVEL_NODE_RES] = \
7874 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7876 def BuildHooksEnv(self):
7879 This runs on master, primary and secondary nodes of the instance.
7883 "TARGET_NODE": self.op.target_node,
7884 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7886 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7889 def BuildHooksNodes(self):
7890 """Build hooks nodes.
7894 self.cfg.GetMasterNode(),
7895 self.instance.primary_node,
7896 self.op.target_node,
7900 def CheckPrereq(self):
7901 """Check prerequisites.
7903 This checks that the instance is in the cluster.
7906 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7907 assert self.instance is not None, \
7908 "Cannot retrieve locked instance %s" % self.op.instance_name
7910 node = self.cfg.GetNodeInfo(self.op.target_node)
7911 assert node is not None, \
7912 "Cannot retrieve locked node %s" % self.op.target_node
7914 self.target_node = target_node = node.name
7916 if target_node == instance.primary_node:
7917 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7918 (instance.name, target_node),
7921 bep = self.cfg.GetClusterInfo().FillBE(instance)
7923 for idx, dsk in enumerate(instance.disks):
7924 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7925 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7926 " cannot copy" % idx, errors.ECODE_STATE)
7928 _CheckNodeOnline(self, target_node)
7929 _CheckNodeNotDrained(self, target_node)
7930 _CheckNodeVmCapable(self, target_node)
7931 cluster = self.cfg.GetClusterInfo()
7932 group_info = self.cfg.GetNodeGroup(node.group)
7933 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7934 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7935 ignore=self.op.ignore_ipolicy)
7937 if instance.admin_state == constants.ADMINST_UP:
7938 # check memory requirements on the secondary node
7939 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7940 instance.name, bep[constants.BE_MAXMEM],
7941 instance.hypervisor)
7943 self.LogInfo("Not checking memory on the secondary node as"
7944 " instance will not be started")
7946 # check bridge existance
7947 _CheckInstanceBridgesExist(self, instance, node=target_node)
7949 def Exec(self, feedback_fn):
7950 """Move an instance.
7952 The move is done by shutting it down on its present node, copying
7953 the data over (slow) and starting it on the new node.
7956 instance = self.instance
7958 source_node = instance.primary_node
7959 target_node = self.target_node
7961 self.LogInfo("Shutting down instance %s on source node %s",
7962 instance.name, source_node)
7964 assert (self.owned_locks(locking.LEVEL_NODE) ==
7965 self.owned_locks(locking.LEVEL_NODE_RES))
7967 result = self.rpc.call_instance_shutdown(source_node, instance,
7968 self.op.shutdown_timeout)
7969 msg = result.fail_msg
7971 if self.op.ignore_consistency:
7972 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7973 " Proceeding anyway. Please make sure node"
7974 " %s is down. Error details: %s",
7975 instance.name, source_node, source_node, msg)
7977 raise errors.OpExecError("Could not shutdown instance %s on"
7979 (instance.name, source_node, msg))
7981 # create the target disks
7983 _CreateDisks(self, instance, target_node=target_node)
7984 except errors.OpExecError:
7985 self.LogWarning("Device creation failed, reverting...")
7987 _RemoveDisks(self, instance, target_node=target_node)
7989 self.cfg.ReleaseDRBDMinors(instance.name)
7992 cluster_name = self.cfg.GetClusterInfo().cluster_name
7995 # activate, get path, copy the data over
7996 for idx, disk in enumerate(instance.disks):
7997 self.LogInfo("Copying data for disk %d", idx)
7998 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7999 instance.name, True, idx)
8001 self.LogWarning("Can't assemble newly created disk %d: %s",
8002 idx, result.fail_msg)
8003 errs.append(result.fail_msg)
8005 dev_path = result.payload
8006 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8007 target_node, dev_path,
8010 self.LogWarning("Can't copy data over for disk %d: %s",
8011 idx, result.fail_msg)
8012 errs.append(result.fail_msg)
8016 self.LogWarning("Some disks failed to copy, aborting")
8018 _RemoveDisks(self, instance, target_node=target_node)
8020 self.cfg.ReleaseDRBDMinors(instance.name)
8021 raise errors.OpExecError("Errors during disk copy: %s" %
8024 instance.primary_node = target_node
8025 self.cfg.Update(instance, feedback_fn)
8027 self.LogInfo("Removing the disks on the original node")
8028 _RemoveDisks(self, instance, target_node=source_node)
8030 # Only start the instance if it's marked as up
8031 if instance.admin_state == constants.ADMINST_UP:
8032 self.LogInfo("Starting instance %s on node %s",
8033 instance.name, target_node)
8035 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8036 ignore_secondaries=True)
8038 _ShutdownInstanceDisks(self, instance)
8039 raise errors.OpExecError("Can't activate the instance's disks")
8041 result = self.rpc.call_instance_start(target_node,
8042 (instance, None, None), False)
8043 msg = result.fail_msg
8045 _ShutdownInstanceDisks(self, instance)
8046 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8047 (instance.name, target_node, msg))
8050 class LUNodeMigrate(LogicalUnit):
8051 """Migrate all instances from a node.
8054 HPATH = "node-migrate"
8055 HTYPE = constants.HTYPE_NODE
8058 def CheckArguments(self):
8061 def ExpandNames(self):
8062 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8064 self.share_locks = _ShareAll()
8065 self.needed_locks = {
8066 locking.LEVEL_NODE: [self.op.node_name],
8069 def BuildHooksEnv(self):
8072 This runs on the master, the primary and all the secondaries.
8076 "NODE_NAME": self.op.node_name,
8077 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8080 def BuildHooksNodes(self):
8081 """Build hooks nodes.
8084 nl = [self.cfg.GetMasterNode()]
8087 def CheckPrereq(self):
8090 def Exec(self, feedback_fn):
8091 # Prepare jobs for migration instances
8092 allow_runtime_changes = self.op.allow_runtime_changes
8094 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8097 iallocator=self.op.iallocator,
8098 target_node=self.op.target_node,
8099 allow_runtime_changes=allow_runtime_changes,
8100 ignore_ipolicy=self.op.ignore_ipolicy)]
8101 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8104 # TODO: Run iallocator in this opcode and pass correct placement options to
8105 # OpInstanceMigrate. Since other jobs can modify the cluster between
8106 # running the iallocator and the actual migration, a good consistency model
8107 # will have to be found.
8109 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8110 frozenset([self.op.node_name]))
8112 return ResultWithJobs(jobs)
8115 class TLMigrateInstance(Tasklet):
8116 """Tasklet class for instance migration.
8119 @ivar live: whether the migration will be done live or non-live;
8120 this variable is initalized only after CheckPrereq has run
8121 @type cleanup: boolean
8122 @ivar cleanup: Wheater we cleanup from a failed migration
8123 @type iallocator: string
8124 @ivar iallocator: The iallocator used to determine target_node
8125 @type target_node: string
8126 @ivar target_node: If given, the target_node to reallocate the instance to
8127 @type failover: boolean
8128 @ivar failover: Whether operation results in failover or migration
8129 @type fallback: boolean
8130 @ivar fallback: Whether fallback to failover is allowed if migration not
8132 @type ignore_consistency: boolean
8133 @ivar ignore_consistency: Wheter we should ignore consistency between source
8135 @type shutdown_timeout: int
8136 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8137 @type ignore_ipolicy: bool
8138 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8143 _MIGRATION_POLL_INTERVAL = 1 # seconds
8144 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8146 def __init__(self, lu, instance_name, cleanup=False,
8147 failover=False, fallback=False,
8148 ignore_consistency=False,
8149 allow_runtime_changes=True,
8150 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8151 ignore_ipolicy=False):
8152 """Initializes this class.
8155 Tasklet.__init__(self, lu)
8158 self.instance_name = instance_name
8159 self.cleanup = cleanup
8160 self.live = False # will be overridden later
8161 self.failover = failover
8162 self.fallback = fallback
8163 self.ignore_consistency = ignore_consistency
8164 self.shutdown_timeout = shutdown_timeout
8165 self.ignore_ipolicy = ignore_ipolicy
8166 self.allow_runtime_changes = allow_runtime_changes
8168 def CheckPrereq(self):
8169 """Check prerequisites.
8171 This checks that the instance is in the cluster.
8174 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8175 instance = self.cfg.GetInstanceInfo(instance_name)
8176 assert instance is not None
8177 self.instance = instance
8178 cluster = self.cfg.GetClusterInfo()
8180 if (not self.cleanup and
8181 not instance.admin_state == constants.ADMINST_UP and
8182 not self.failover and self.fallback):
8183 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8184 " switching to failover")
8185 self.failover = True
8187 if instance.disk_template not in constants.DTS_MIRRORED:
8192 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8193 " %s" % (instance.disk_template, text),
8196 if instance.disk_template in constants.DTS_EXT_MIRROR:
8197 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8199 if self.lu.op.iallocator:
8200 self._RunAllocator()
8202 # We set set self.target_node as it is required by
8204 self.target_node = self.lu.op.target_node
8206 # Check that the target node is correct in terms of instance policy
8207 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8208 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8209 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8211 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8212 ignore=self.ignore_ipolicy)
8214 # self.target_node is already populated, either directly or by the
8216 target_node = self.target_node
8217 if self.target_node == instance.primary_node:
8218 raise errors.OpPrereqError("Cannot migrate instance %s"
8219 " to its primary (%s)" %
8220 (instance.name, instance.primary_node),
8223 if len(self.lu.tasklets) == 1:
8224 # It is safe to release locks only when we're the only tasklet
8226 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8227 keep=[instance.primary_node, self.target_node])
8230 secondary_nodes = instance.secondary_nodes
8231 if not secondary_nodes:
8232 raise errors.ConfigurationError("No secondary node but using"
8233 " %s disk template" %
8234 instance.disk_template)
8235 target_node = secondary_nodes[0]
8236 if self.lu.op.iallocator or (self.lu.op.target_node and
8237 self.lu.op.target_node != target_node):
8239 text = "failed over"
8242 raise errors.OpPrereqError("Instances with disk template %s cannot"
8243 " be %s to arbitrary nodes"
8244 " (neither an iallocator nor a target"
8245 " node can be passed)" %
8246 (instance.disk_template, text),
8248 nodeinfo = self.cfg.GetNodeInfo(target_node)
8249 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8250 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8252 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8253 ignore=self.ignore_ipolicy)
8255 i_be = cluster.FillBE(instance)
8257 # check memory requirements on the secondary node
8258 if (not self.cleanup and
8259 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8260 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8261 "migrating instance %s" %
8263 i_be[constants.BE_MINMEM],
8264 instance.hypervisor)
8266 self.lu.LogInfo("Not checking memory on the secondary node as"
8267 " instance will not be started")
8269 # check if failover must be forced instead of migration
8270 if (not self.cleanup and not self.failover and
8271 i_be[constants.BE_ALWAYS_FAILOVER]):
8272 self.lu.LogInfo("Instance configured to always failover; fallback"
8274 self.failover = True
8276 # check bridge existance
8277 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8279 if not self.cleanup:
8280 _CheckNodeNotDrained(self.lu, target_node)
8281 if not self.failover:
8282 result = self.rpc.call_instance_migratable(instance.primary_node,
8284 if result.fail_msg and self.fallback:
8285 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8287 self.failover = True
8289 result.Raise("Can't migrate, please use failover",
8290 prereq=True, ecode=errors.ECODE_STATE)
8292 assert not (self.failover and self.cleanup)
8294 if not self.failover:
8295 if self.lu.op.live is not None and self.lu.op.mode is not None:
8296 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8297 " parameters are accepted",
8299 if self.lu.op.live is not None:
8301 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8303 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8304 # reset the 'live' parameter to None so that repeated
8305 # invocations of CheckPrereq do not raise an exception
8306 self.lu.op.live = None
8307 elif self.lu.op.mode is None:
8308 # read the default value from the hypervisor
8309 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8310 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8312 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8314 # Failover is never live
8317 if not (self.failover or self.cleanup):
8318 remote_info = self.rpc.call_instance_info(instance.primary_node,
8320 instance.hypervisor)
8321 remote_info.Raise("Error checking instance on node %s" %
8322 instance.primary_node)
8323 instance_running = bool(remote_info.payload)
8324 if instance_running:
8325 self.current_mem = int(remote_info.payload["memory"])
8327 def _RunAllocator(self):
8328 """Run the allocator based on input opcode.
8331 # FIXME: add a self.ignore_ipolicy option
8332 req = iallocator.IAReqRelocate(name=self.instance_name,
8333 relocate_from=[self.instance.primary_node])
8334 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8336 ial.Run(self.lu.op.iallocator)
8339 raise errors.OpPrereqError("Can't compute nodes using"
8340 " iallocator '%s': %s" %
8341 (self.lu.op.iallocator, ial.info),
8343 self.target_node = ial.result[0]
8344 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8345 self.instance_name, self.lu.op.iallocator,
8346 utils.CommaJoin(ial.result))
8348 def _WaitUntilSync(self):
8349 """Poll with custom rpc for disk sync.
8351 This uses our own step-based rpc call.
8354 self.feedback_fn("* wait until resync is done")
8358 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8360 (self.instance.disks,
8363 for node, nres in result.items():
8364 nres.Raise("Cannot resync disks on node %s" % node)
8365 node_done, node_percent = nres.payload
8366 all_done = all_done and node_done
8367 if node_percent is not None:
8368 min_percent = min(min_percent, node_percent)
8370 if min_percent < 100:
8371 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8374 def _EnsureSecondary(self, node):
8375 """Demote a node to secondary.
8378 self.feedback_fn("* switching node %s to secondary mode" % node)
8380 for dev in self.instance.disks:
8381 self.cfg.SetDiskID(dev, node)
8383 result = self.rpc.call_blockdev_close(node, self.instance.name,
8384 self.instance.disks)
8385 result.Raise("Cannot change disk to secondary on node %s" % node)
8387 def _GoStandalone(self):
8388 """Disconnect from the network.
8391 self.feedback_fn("* changing into standalone mode")
8392 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8393 self.instance.disks)
8394 for node, nres in result.items():
8395 nres.Raise("Cannot disconnect disks node %s" % node)
8397 def _GoReconnect(self, multimaster):
8398 """Reconnect to the network.
8404 msg = "single-master"
8405 self.feedback_fn("* changing disks into %s mode" % msg)
8406 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8407 (self.instance.disks, self.instance),
8408 self.instance.name, multimaster)
8409 for node, nres in result.items():
8410 nres.Raise("Cannot change disks config on node %s" % node)
8412 def _ExecCleanup(self):
8413 """Try to cleanup after a failed migration.
8415 The cleanup is done by:
8416 - check that the instance is running only on one node
8417 (and update the config if needed)
8418 - change disks on its secondary node to secondary
8419 - wait until disks are fully synchronized
8420 - disconnect from the network
8421 - change disks into single-master mode
8422 - wait again until disks are fully synchronized
8425 instance = self.instance
8426 target_node = self.target_node
8427 source_node = self.source_node
8429 # check running on only one node
8430 self.feedback_fn("* checking where the instance actually runs"
8431 " (if this hangs, the hypervisor might be in"
8433 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8434 for node, result in ins_l.items():
8435 result.Raise("Can't contact node %s" % node)
8437 runningon_source = instance.name in ins_l[source_node].payload
8438 runningon_target = instance.name in ins_l[target_node].payload
8440 if runningon_source and runningon_target:
8441 raise errors.OpExecError("Instance seems to be running on two nodes,"
8442 " or the hypervisor is confused; you will have"
8443 " to ensure manually that it runs only on one"
8444 " and restart this operation")
8446 if not (runningon_source or runningon_target):
8447 raise errors.OpExecError("Instance does not seem to be running at all;"
8448 " in this case it's safer to repair by"
8449 " running 'gnt-instance stop' to ensure disk"
8450 " shutdown, and then restarting it")
8452 if runningon_target:
8453 # the migration has actually succeeded, we need to update the config
8454 self.feedback_fn("* instance running on secondary node (%s),"
8455 " updating config" % target_node)
8456 instance.primary_node = target_node
8457 self.cfg.Update(instance, self.feedback_fn)
8458 demoted_node = source_node
8460 self.feedback_fn("* instance confirmed to be running on its"
8461 " primary node (%s)" % source_node)
8462 demoted_node = target_node
8464 if instance.disk_template in constants.DTS_INT_MIRROR:
8465 self._EnsureSecondary(demoted_node)
8467 self._WaitUntilSync()
8468 except errors.OpExecError:
8469 # we ignore here errors, since if the device is standalone, it
8470 # won't be able to sync
8472 self._GoStandalone()
8473 self._GoReconnect(False)
8474 self._WaitUntilSync()
8476 self.feedback_fn("* done")
8478 def _RevertDiskStatus(self):
8479 """Try to revert the disk status after a failed migration.
8482 target_node = self.target_node
8483 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8487 self._EnsureSecondary(target_node)
8488 self._GoStandalone()
8489 self._GoReconnect(False)
8490 self._WaitUntilSync()
8491 except errors.OpExecError, err:
8492 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8493 " please try to recover the instance manually;"
8494 " error '%s'" % str(err))
8496 def _AbortMigration(self):
8497 """Call the hypervisor code to abort a started migration.
8500 instance = self.instance
8501 target_node = self.target_node
8502 source_node = self.source_node
8503 migration_info = self.migration_info
8505 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8509 abort_msg = abort_result.fail_msg
8511 logging.error("Aborting migration failed on target node %s: %s",
8512 target_node, abort_msg)
8513 # Don't raise an exception here, as we stil have to try to revert the
8514 # disk status, even if this step failed.
8516 abort_result = self.rpc.call_instance_finalize_migration_src(
8517 source_node, instance, False, self.live)
8518 abort_msg = abort_result.fail_msg
8520 logging.error("Aborting migration failed on source node %s: %s",
8521 source_node, abort_msg)
8523 def _ExecMigration(self):
8524 """Migrate an instance.
8526 The migrate is done by:
8527 - change the disks into dual-master mode
8528 - wait until disks are fully synchronized again
8529 - migrate the instance
8530 - change disks on the new secondary node (the old primary) to secondary
8531 - wait until disks are fully synchronized
8532 - change disks into single-master mode
8535 instance = self.instance
8536 target_node = self.target_node
8537 source_node = self.source_node
8539 # Check for hypervisor version mismatch and warn the user.
8540 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8541 None, [self.instance.hypervisor])
8542 for ninfo in nodeinfo.values():
8543 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8545 (_, _, (src_info, )) = nodeinfo[source_node].payload
8546 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8548 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8549 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8550 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8551 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8552 if src_version != dst_version:
8553 self.feedback_fn("* warning: hypervisor version mismatch between"
8554 " source (%s) and target (%s) node" %
8555 (src_version, dst_version))
8557 self.feedback_fn("* checking disk consistency between source and target")
8558 for (idx, dev) in enumerate(instance.disks):
8559 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8560 raise errors.OpExecError("Disk %s is degraded or not fully"
8561 " synchronized on target node,"
8562 " aborting migration" % idx)
8564 if self.current_mem > self.tgt_free_mem:
8565 if not self.allow_runtime_changes:
8566 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8567 " free memory to fit instance %s on target"
8568 " node %s (have %dMB, need %dMB)" %
8569 (instance.name, target_node,
8570 self.tgt_free_mem, self.current_mem))
8571 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8572 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8575 rpcres.Raise("Cannot modify instance runtime memory")
8577 # First get the migration information from the remote node
8578 result = self.rpc.call_migration_info(source_node, instance)
8579 msg = result.fail_msg
8581 log_err = ("Failed fetching source migration information from %s: %s" %
8583 logging.error(log_err)
8584 raise errors.OpExecError(log_err)
8586 self.migration_info = migration_info = result.payload
8588 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8589 # Then switch the disks to master/master mode
8590 self._EnsureSecondary(target_node)
8591 self._GoStandalone()
8592 self._GoReconnect(True)
8593 self._WaitUntilSync()
8595 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8596 result = self.rpc.call_accept_instance(target_node,
8599 self.nodes_ip[target_node])
8601 msg = result.fail_msg
8603 logging.error("Instance pre-migration failed, trying to revert"
8604 " disk status: %s", msg)
8605 self.feedback_fn("Pre-migration failed, aborting")
8606 self._AbortMigration()
8607 self._RevertDiskStatus()
8608 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8609 (instance.name, msg))
8611 self.feedback_fn("* migrating instance to %s" % target_node)
8612 result = self.rpc.call_instance_migrate(source_node, instance,
8613 self.nodes_ip[target_node],
8615 msg = result.fail_msg
8617 logging.error("Instance migration failed, trying to revert"
8618 " disk status: %s", msg)
8619 self.feedback_fn("Migration failed, aborting")
8620 self._AbortMigration()
8621 self._RevertDiskStatus()
8622 raise errors.OpExecError("Could not migrate instance %s: %s" %
8623 (instance.name, msg))
8625 self.feedback_fn("* starting memory transfer")
8626 last_feedback = time.time()
8628 result = self.rpc.call_instance_get_migration_status(source_node,
8630 msg = result.fail_msg
8631 ms = result.payload # MigrationStatus instance
8632 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8633 logging.error("Instance migration failed, trying to revert"
8634 " disk status: %s", msg)
8635 self.feedback_fn("Migration failed, aborting")
8636 self._AbortMigration()
8637 self._RevertDiskStatus()
8639 msg = "hypervisor returned failure"
8640 raise errors.OpExecError("Could not migrate instance %s: %s" %
8641 (instance.name, msg))
8643 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8644 self.feedback_fn("* memory transfer complete")
8647 if (utils.TimeoutExpired(last_feedback,
8648 self._MIGRATION_FEEDBACK_INTERVAL) and
8649 ms.transferred_ram is not None):
8650 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8651 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8652 last_feedback = time.time()
8654 time.sleep(self._MIGRATION_POLL_INTERVAL)
8656 result = self.rpc.call_instance_finalize_migration_src(source_node,
8660 msg = result.fail_msg
8662 logging.error("Instance migration succeeded, but finalization failed"
8663 " on the source node: %s", msg)
8664 raise errors.OpExecError("Could not finalize instance migration: %s" %
8667 instance.primary_node = target_node
8669 # distribute new instance config to the other nodes
8670 self.cfg.Update(instance, self.feedback_fn)
8672 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8676 msg = result.fail_msg
8678 logging.error("Instance migration succeeded, but finalization failed"
8679 " on the target node: %s", msg)
8680 raise errors.OpExecError("Could not finalize instance migration: %s" %
8683 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8684 self._EnsureSecondary(source_node)
8685 self._WaitUntilSync()
8686 self._GoStandalone()
8687 self._GoReconnect(False)
8688 self._WaitUntilSync()
8690 # If the instance's disk template is `rbd' and there was a successful
8691 # migration, unmap the device from the source node.
8692 if self.instance.disk_template == constants.DT_RBD:
8693 disks = _ExpandCheckDisks(instance, instance.disks)
8694 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8696 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8697 msg = result.fail_msg
8699 logging.error("Migration was successful, but couldn't unmap the"
8700 " block device %s on source node %s: %s",
8701 disk.iv_name, source_node, msg)
8702 logging.error("You need to unmap the device %s manually on %s",
8703 disk.iv_name, source_node)
8705 self.feedback_fn("* done")
8707 def _ExecFailover(self):
8708 """Failover an instance.
8710 The failover is done by shutting it down on its present node and
8711 starting it on the secondary.
8714 instance = self.instance
8715 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8717 source_node = instance.primary_node
8718 target_node = self.target_node
8720 if instance.admin_state == constants.ADMINST_UP:
8721 self.feedback_fn("* checking disk consistency between source and target")
8722 for (idx, dev) in enumerate(instance.disks):
8723 # for drbd, these are drbd over lvm
8724 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8726 if primary_node.offline:
8727 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8729 (primary_node.name, idx, target_node))
8730 elif not self.ignore_consistency:
8731 raise errors.OpExecError("Disk %s is degraded on target node,"
8732 " aborting failover" % idx)
8734 self.feedback_fn("* not checking disk consistency as instance is not"
8737 self.feedback_fn("* shutting down instance on source node")
8738 logging.info("Shutting down instance %s on node %s",
8739 instance.name, source_node)
8741 result = self.rpc.call_instance_shutdown(source_node, instance,
8742 self.shutdown_timeout)
8743 msg = result.fail_msg
8745 if self.ignore_consistency or primary_node.offline:
8746 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8747 " proceeding anyway; please make sure node"
8748 " %s is down; error details: %s",
8749 instance.name, source_node, source_node, msg)
8751 raise errors.OpExecError("Could not shutdown instance %s on"
8753 (instance.name, source_node, msg))
8755 self.feedback_fn("* deactivating the instance's disks on source node")
8756 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8757 raise errors.OpExecError("Can't shut down the instance's disks")
8759 instance.primary_node = target_node
8760 # distribute new instance config to the other nodes
8761 self.cfg.Update(instance, self.feedback_fn)
8763 # Only start the instance if it's marked as up
8764 if instance.admin_state == constants.ADMINST_UP:
8765 self.feedback_fn("* activating the instance's disks on target node %s" %
8767 logging.info("Starting instance %s on node %s",
8768 instance.name, target_node)
8770 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8771 ignore_secondaries=True)
8773 _ShutdownInstanceDisks(self.lu, instance)
8774 raise errors.OpExecError("Can't activate the instance's disks")
8776 self.feedback_fn("* starting the instance on the target node %s" %
8778 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8780 msg = result.fail_msg
8782 _ShutdownInstanceDisks(self.lu, instance)
8783 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8784 (instance.name, target_node, msg))
8786 def Exec(self, feedback_fn):
8787 """Perform the migration.
8790 self.feedback_fn = feedback_fn
8791 self.source_node = self.instance.primary_node
8793 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8794 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8795 self.target_node = self.instance.secondary_nodes[0]
8796 # Otherwise self.target_node has been populated either
8797 # directly, or through an iallocator.
8799 self.all_nodes = [self.source_node, self.target_node]
8800 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8801 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8804 feedback_fn("Failover instance %s" % self.instance.name)
8805 self._ExecFailover()
8807 feedback_fn("Migrating instance %s" % self.instance.name)
8810 return self._ExecCleanup()
8812 return self._ExecMigration()
8815 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8817 """Wrapper around L{_CreateBlockDevInner}.
8819 This method annotates the root device first.
8822 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8823 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8827 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8829 """Create a tree of block devices on a given node.
8831 If this device type has to be created on secondaries, create it and
8834 If not, just recurse to children keeping the same 'force' value.
8836 @attention: The device has to be annotated already.
8838 @param lu: the lu on whose behalf we execute
8839 @param node: the node on which to create the device
8840 @type instance: L{objects.Instance}
8841 @param instance: the instance which owns the device
8842 @type device: L{objects.Disk}
8843 @param device: the device to create
8844 @type force_create: boolean
8845 @param force_create: whether to force creation of this device; this
8846 will be change to True whenever we find a device which has
8847 CreateOnSecondary() attribute
8848 @param info: the extra 'metadata' we should attach to the device
8849 (this will be represented as a LVM tag)
8850 @type force_open: boolean
8851 @param force_open: this parameter will be passes to the
8852 L{backend.BlockdevCreate} function where it specifies
8853 whether we run on primary or not, and it affects both
8854 the child assembly and the device own Open() execution
8857 if device.CreateOnSecondary():
8861 for child in device.children:
8862 _CreateBlockDevInner(lu, node, instance, child, force_create,
8865 if not force_create:
8868 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8871 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8872 """Create a single block device on a given node.
8874 This will not recurse over children of the device, so they must be
8877 @param lu: the lu on whose behalf we execute
8878 @param node: the node on which to create the device
8879 @type instance: L{objects.Instance}
8880 @param instance: the instance which owns the device
8881 @type device: L{objects.Disk}
8882 @param device: the device to create
8883 @param info: the extra 'metadata' we should attach to the device
8884 (this will be represented as a LVM tag)
8885 @type force_open: boolean
8886 @param force_open: this parameter will be passes to the
8887 L{backend.BlockdevCreate} function where it specifies
8888 whether we run on primary or not, and it affects both
8889 the child assembly and the device own Open() execution
8892 lu.cfg.SetDiskID(device, node)
8893 result = lu.rpc.call_blockdev_create(node, device, device.size,
8894 instance.name, force_open, info)
8895 result.Raise("Can't create block device %s on"
8896 " node %s for instance %s" % (device, node, instance.name))
8897 if device.physical_id is None:
8898 device.physical_id = result.payload
8901 def _GenerateUniqueNames(lu, exts):
8902 """Generate a suitable LV name.
8904 This will generate a logical volume name for the given instance.
8909 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8910 results.append("%s%s" % (new_id, val))
8914 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8915 iv_name, p_minor, s_minor):
8916 """Generate a drbd8 device complete with its children.
8919 assert len(vgnames) == len(names) == 2
8920 port = lu.cfg.AllocatePort()
8921 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8923 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8924 logical_id=(vgnames[0], names[0]),
8926 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8927 size=constants.DRBD_META_SIZE,
8928 logical_id=(vgnames[1], names[1]),
8930 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8931 logical_id=(primary, secondary, port,
8934 children=[dev_data, dev_meta],
8935 iv_name=iv_name, params={})
8939 _DISK_TEMPLATE_NAME_PREFIX = {
8940 constants.DT_PLAIN: "",
8941 constants.DT_RBD: ".rbd",
8945 _DISK_TEMPLATE_DEVICE_TYPE = {
8946 constants.DT_PLAIN: constants.LD_LV,
8947 constants.DT_FILE: constants.LD_FILE,
8948 constants.DT_SHARED_FILE: constants.LD_FILE,
8949 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8950 constants.DT_RBD: constants.LD_RBD,
8954 def _GenerateDiskTemplate(
8955 lu, template_name, instance_name, primary_node, secondary_nodes,
8956 disk_info, file_storage_dir, file_driver, base_index,
8957 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8958 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8959 """Generate the entire disk layout for a given template type.
8962 #TODO: compute space requirements
8964 vgname = lu.cfg.GetVGName()
8965 disk_count = len(disk_info)
8968 if template_name == constants.DT_DISKLESS:
8970 elif template_name == constants.DT_DRBD8:
8971 if len(secondary_nodes) != 1:
8972 raise errors.ProgrammerError("Wrong template configuration")
8973 remote_node = secondary_nodes[0]
8974 minors = lu.cfg.AllocateDRBDMinor(
8975 [primary_node, remote_node] * len(disk_info), instance_name)
8977 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8979 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8982 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8983 for i in range(disk_count)]):
8984 names.append(lv_prefix + "_data")
8985 names.append(lv_prefix + "_meta")
8986 for idx, disk in enumerate(disk_info):
8987 disk_index = idx + base_index
8988 data_vg = disk.get(constants.IDISK_VG, vgname)
8989 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8990 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8991 disk[constants.IDISK_SIZE],
8993 names[idx * 2:idx * 2 + 2],
8994 "disk/%d" % disk_index,
8995 minors[idx * 2], minors[idx * 2 + 1])
8996 disk_dev.mode = disk[constants.IDISK_MODE]
8997 disks.append(disk_dev)
9000 raise errors.ProgrammerError("Wrong template configuration")
9002 if template_name == constants.DT_FILE:
9004 elif template_name == constants.DT_SHARED_FILE:
9005 _req_shr_file_storage()
9007 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9008 if name_prefix is None:
9011 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9012 (name_prefix, base_index + i)
9013 for i in range(disk_count)])
9015 if template_name == constants.DT_PLAIN:
9016 def logical_id_fn(idx, _, disk):
9017 vg = disk.get(constants.IDISK_VG, vgname)
9018 return (vg, names[idx])
9019 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9021 lambda _, disk_index, disk: (file_driver,
9022 "%s/disk%d" % (file_storage_dir,
9024 elif template_name == constants.DT_BLOCK:
9026 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9027 disk[constants.IDISK_ADOPT])
9028 elif template_name == constants.DT_RBD:
9029 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9031 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9033 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9035 for idx, disk in enumerate(disk_info):
9036 disk_index = idx + base_index
9037 size = disk[constants.IDISK_SIZE]
9038 feedback_fn("* disk %s, size %s" %
9039 (disk_index, utils.FormatUnit(size, "h")))
9040 disks.append(objects.Disk(dev_type=dev_type, size=size,
9041 logical_id=logical_id_fn(idx, disk_index, disk),
9042 iv_name="disk/%d" % disk_index,
9043 mode=disk[constants.IDISK_MODE],
9049 def _GetInstanceInfoText(instance):
9050 """Compute that text that should be added to the disk's metadata.
9053 return "originstname+%s" % instance.name
9056 def _CalcEta(time_taken, written, total_size):
9057 """Calculates the ETA based on size written and total size.
9059 @param time_taken: The time taken so far
9060 @param written: amount written so far
9061 @param total_size: The total size of data to be written
9062 @return: The remaining time in seconds
9065 avg_time = time_taken / float(written)
9066 return (total_size - written) * avg_time
9069 def _WipeDisks(lu, instance, disks=None):
9070 """Wipes instance disks.
9072 @type lu: L{LogicalUnit}
9073 @param lu: the logical unit on whose behalf we execute
9074 @type instance: L{objects.Instance}
9075 @param instance: the instance whose disks we should create
9076 @return: the success of the wipe
9079 node = instance.primary_node
9082 disks = [(idx, disk, 0)
9083 for (idx, disk) in enumerate(instance.disks)]
9085 for (_, device, _) in disks:
9086 lu.cfg.SetDiskID(device, node)
9088 logging.info("Pausing synchronization of disks of instance '%s'",
9090 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9091 (map(compat.snd, disks),
9094 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9096 for idx, success in enumerate(result.payload):
9098 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9099 " failed", idx, instance.name)
9102 for (idx, device, offset) in disks:
9103 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9104 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9106 int(min(constants.MAX_WIPE_CHUNK,
9107 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9111 start_time = time.time()
9116 info_text = (" (from %s to %s)" %
9117 (utils.FormatUnit(offset, "h"),
9118 utils.FormatUnit(size, "h")))
9120 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9122 logging.info("Wiping disk %d for instance %s on node %s using"
9123 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9125 while offset < size:
9126 wipe_size = min(wipe_chunk_size, size - offset)
9128 logging.debug("Wiping disk %d, offset %s, chunk %s",
9129 idx, offset, wipe_size)
9131 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9133 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9134 (idx, offset, wipe_size))
9138 if now - last_output >= 60:
9139 eta = _CalcEta(now - start_time, offset, size)
9140 lu.LogInfo(" - done: %.1f%% ETA: %s",
9141 offset / float(size) * 100, utils.FormatSeconds(eta))
9144 logging.info("Resuming synchronization of disks for instance '%s'",
9147 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9148 (map(compat.snd, disks),
9153 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9154 node, result.fail_msg)
9156 for idx, success in enumerate(result.payload):
9158 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9159 " failed", idx, instance.name)
9162 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9163 """Create all disks for an instance.
9165 This abstracts away some work from AddInstance.
9167 @type lu: L{LogicalUnit}
9168 @param lu: the logical unit on whose behalf we execute
9169 @type instance: L{objects.Instance}
9170 @param instance: the instance whose disks we should create
9172 @param to_skip: list of indices to skip
9173 @type target_node: string
9174 @param target_node: if passed, overrides the target node for creation
9176 @return: the success of the creation
9179 info = _GetInstanceInfoText(instance)
9180 if target_node is None:
9181 pnode = instance.primary_node
9182 all_nodes = instance.all_nodes
9187 if instance.disk_template in constants.DTS_FILEBASED:
9188 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9189 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9191 result.Raise("Failed to create directory '%s' on"
9192 " node %s" % (file_storage_dir, pnode))
9194 # Note: this needs to be kept in sync with adding of disks in
9195 # LUInstanceSetParams
9196 for idx, device in enumerate(instance.disks):
9197 if to_skip and idx in to_skip:
9199 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9201 for node in all_nodes:
9202 f_create = node == pnode
9203 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9206 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9207 """Remove all disks for an instance.
9209 This abstracts away some work from `AddInstance()` and
9210 `RemoveInstance()`. Note that in case some of the devices couldn't
9211 be removed, the removal will continue with the other ones (compare
9212 with `_CreateDisks()`).
9214 @type lu: L{LogicalUnit}
9215 @param lu: the logical unit on whose behalf we execute
9216 @type instance: L{objects.Instance}
9217 @param instance: the instance whose disks we should remove
9218 @type target_node: string
9219 @param target_node: used to override the node on which to remove the disks
9221 @return: the success of the removal
9224 logging.info("Removing block devices for instance %s", instance.name)
9227 ports_to_release = set()
9228 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9229 for (idx, device) in enumerate(anno_disks):
9231 edata = [(target_node, device)]
9233 edata = device.ComputeNodeTree(instance.primary_node)
9234 for node, disk in edata:
9235 lu.cfg.SetDiskID(disk, node)
9236 result = lu.rpc.call_blockdev_remove(node, disk)
9238 lu.LogWarning("Could not remove disk %s on node %s,"
9239 " continuing anyway: %s", idx, node, result.fail_msg)
9240 if not (result.offline and node != instance.primary_node):
9243 # if this is a DRBD disk, return its port to the pool
9244 if device.dev_type in constants.LDS_DRBD:
9245 ports_to_release.add(device.logical_id[2])
9247 if all_result or ignore_failures:
9248 for port in ports_to_release:
9249 lu.cfg.AddTcpUdpPort(port)
9251 if instance.disk_template in constants.DTS_FILEBASED:
9252 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9256 tgt = instance.primary_node
9257 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9259 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9260 file_storage_dir, instance.primary_node, result.fail_msg)
9266 def _ComputeDiskSizePerVG(disk_template, disks):
9267 """Compute disk size requirements in the volume group
9270 def _compute(disks, payload):
9271 """Universal algorithm.
9276 vgs[disk[constants.IDISK_VG]] = \
9277 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9281 # Required free disk space as a function of disk and swap space
9283 constants.DT_DISKLESS: {},
9284 constants.DT_PLAIN: _compute(disks, 0),
9285 # 128 MB are added for drbd metadata for each disk
9286 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9287 constants.DT_FILE: {},
9288 constants.DT_SHARED_FILE: {},
9291 if disk_template not in req_size_dict:
9292 raise errors.ProgrammerError("Disk template '%s' size requirement"
9293 " is unknown" % disk_template)
9295 return req_size_dict[disk_template]
9298 def _FilterVmNodes(lu, nodenames):
9299 """Filters out non-vm_capable nodes from a list.
9301 @type lu: L{LogicalUnit}
9302 @param lu: the logical unit for which we check
9303 @type nodenames: list
9304 @param nodenames: the list of nodes on which we should check
9306 @return: the list of vm-capable nodes
9309 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9310 return [name for name in nodenames if name not in vm_nodes]
9313 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9314 """Hypervisor parameter validation.
9316 This function abstract the hypervisor parameter validation to be
9317 used in both instance create and instance modify.
9319 @type lu: L{LogicalUnit}
9320 @param lu: the logical unit for which we check
9321 @type nodenames: list
9322 @param nodenames: the list of nodes on which we should check
9323 @type hvname: string
9324 @param hvname: the name of the hypervisor we should use
9325 @type hvparams: dict
9326 @param hvparams: the parameters which we need to check
9327 @raise errors.OpPrereqError: if the parameters are not valid
9330 nodenames = _FilterVmNodes(lu, nodenames)
9332 cluster = lu.cfg.GetClusterInfo()
9333 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9335 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9336 for node in nodenames:
9340 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9343 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9344 """OS parameters validation.
9346 @type lu: L{LogicalUnit}
9347 @param lu: the logical unit for which we check
9348 @type required: boolean
9349 @param required: whether the validation should fail if the OS is not
9351 @type nodenames: list
9352 @param nodenames: the list of nodes on which we should check
9353 @type osname: string
9354 @param osname: the name of the hypervisor we should use
9355 @type osparams: dict
9356 @param osparams: the parameters which we need to check
9357 @raise errors.OpPrereqError: if the parameters are not valid
9360 nodenames = _FilterVmNodes(lu, nodenames)
9361 result = lu.rpc.call_os_validate(nodenames, required, osname,
9362 [constants.OS_VALIDATE_PARAMETERS],
9364 for node, nres in result.items():
9365 # we don't check for offline cases since this should be run only
9366 # against the master node and/or an instance's nodes
9367 nres.Raise("OS Parameters validation failed on node %s" % node)
9368 if not nres.payload:
9369 lu.LogInfo("OS %s not found on node %s, validation skipped",
9373 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9374 """Wrapper around IAReqInstanceAlloc.
9376 @param op: The instance opcode
9377 @param disks: The computed disks
9378 @param nics: The computed nics
9379 @param beparams: The full filled beparams
9381 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9384 spindle_use = beparams[constants.BE_SPINDLE_USE]
9385 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9386 disk_template=op.disk_template,
9389 vcpus=beparams[constants.BE_VCPUS],
9390 memory=beparams[constants.BE_MAXMEM],
9391 spindle_use=spindle_use,
9393 nics=[n.ToDict() for n in nics],
9394 hypervisor=op.hypervisor)
9397 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9398 """Computes the nics.
9400 @param op: The instance opcode
9401 @param cluster: Cluster configuration object
9402 @param default_ip: The default ip to assign
9403 @param cfg: An instance of the configuration object
9404 @param proc: The executer instance
9406 @returns: The build up nics
9410 for idx, nic in enumerate(op.nics):
9411 nic_mode_req = nic.get(constants.INIC_MODE, None)
9412 nic_mode = nic_mode_req
9413 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9414 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9416 net = nic.get(constants.INIC_NETWORK, None)
9417 link = nic.get(constants.NIC_LINK, None)
9418 ip = nic.get(constants.INIC_IP, None)
9420 if net is None or net.lower() == constants.VALUE_NONE:
9423 if nic_mode_req is not None or link is not None:
9424 raise errors.OpPrereqError("If network is given, no mode or link"
9425 " is allowed to be passed",
9428 # ip validity checks
9429 if ip is None or ip.lower() == constants.VALUE_NONE:
9431 elif ip.lower() == constants.VALUE_AUTO:
9432 if not op.name_check:
9433 raise errors.OpPrereqError("IP address set to auto but name checks"
9434 " have been skipped",
9438 # We defer pool operations until later, so that the iallocator has
9439 # filled in the instance's node(s) dimara
9440 if ip.lower() == constants.NIC_IP_POOL:
9442 raise errors.OpPrereqError("if ip=pool, parameter network"
9443 " must be passed too",
9446 elif not netutils.IPAddress.IsValid(ip):
9447 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9452 # TODO: check the ip address for uniqueness
9453 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9454 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9457 # MAC address verification
9458 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9459 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9460 mac = utils.NormalizeAndValidateMac(mac)
9463 # TODO: We need to factor this out
9464 cfg.ReserveMAC(mac, proc.GetECId())
9465 except errors.ReservationError:
9466 raise errors.OpPrereqError("MAC address %s already in use"
9467 " in cluster" % mac,
9468 errors.ECODE_NOTUNIQUE)
9470 # Build nic parameters
9473 nicparams[constants.NIC_MODE] = nic_mode
9475 nicparams[constants.NIC_LINK] = link
9477 check_params = cluster.SimpleFillNIC(nicparams)
9478 objects.NIC.CheckParameterSyntax(check_params)
9479 nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9484 def _ComputeDisks(op, default_vg):
9485 """Computes the instance disks.
9487 @param op: The instance opcode
9488 @param default_vg: The default_vg to assume
9490 @return: The computer disks
9494 for disk in op.disks:
9495 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9496 if mode not in constants.DISK_ACCESS_SET:
9497 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9498 mode, errors.ECODE_INVAL)
9499 size = disk.get(constants.IDISK_SIZE, None)
9501 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9504 except (TypeError, ValueError):
9505 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9508 data_vg = disk.get(constants.IDISK_VG, default_vg)
9510 constants.IDISK_SIZE: size,
9511 constants.IDISK_MODE: mode,
9512 constants.IDISK_VG: data_vg,
9514 if constants.IDISK_METAVG in disk:
9515 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9516 if constants.IDISK_ADOPT in disk:
9517 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9518 disks.append(new_disk)
9523 def _ComputeFullBeParams(op, cluster):
9524 """Computes the full beparams.
9526 @param op: The instance opcode
9527 @param cluster: The cluster config object
9529 @return: The fully filled beparams
9532 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9533 for param, value in op.beparams.iteritems():
9534 if value == constants.VALUE_AUTO:
9535 op.beparams[param] = default_beparams[param]
9536 objects.UpgradeBeParams(op.beparams)
9537 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9538 return cluster.SimpleFillBE(op.beparams)
9541 class LUInstanceCreate(LogicalUnit):
9542 """Create an instance.
9545 HPATH = "instance-add"
9546 HTYPE = constants.HTYPE_INSTANCE
9549 def CheckArguments(self):
9553 # do not require name_check to ease forward/backward compatibility
9555 if self.op.no_install and self.op.start:
9556 self.LogInfo("No-installation mode selected, disabling startup")
9557 self.op.start = False
9558 # validate/normalize the instance name
9559 self.op.instance_name = \
9560 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9562 if self.op.ip_check and not self.op.name_check:
9563 # TODO: make the ip check more flexible and not depend on the name check
9564 raise errors.OpPrereqError("Cannot do IP address check without a name"
9565 " check", errors.ECODE_INVAL)
9567 # check nics' parameter names
9568 for nic in self.op.nics:
9569 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9571 # check disks. parameter names and consistent adopt/no-adopt strategy
9572 has_adopt = has_no_adopt = False
9573 for disk in self.op.disks:
9574 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9575 if constants.IDISK_ADOPT in disk:
9579 if has_adopt and has_no_adopt:
9580 raise errors.OpPrereqError("Either all disks are adopted or none is",
9583 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9584 raise errors.OpPrereqError("Disk adoption is not supported for the"
9585 " '%s' disk template" %
9586 self.op.disk_template,
9588 if self.op.iallocator is not None:
9589 raise errors.OpPrereqError("Disk adoption not allowed with an"
9590 " iallocator script", errors.ECODE_INVAL)
9591 if self.op.mode == constants.INSTANCE_IMPORT:
9592 raise errors.OpPrereqError("Disk adoption not allowed for"
9593 " instance import", errors.ECODE_INVAL)
9595 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9596 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9597 " but no 'adopt' parameter given" %
9598 self.op.disk_template,
9601 self.adopt_disks = has_adopt
9603 # instance name verification
9604 if self.op.name_check:
9605 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9606 self.op.instance_name = self.hostname1.name
9607 # used in CheckPrereq for ip ping check
9608 self.check_ip = self.hostname1.ip
9610 self.check_ip = None
9612 # file storage checks
9613 if (self.op.file_driver and
9614 not self.op.file_driver in constants.FILE_DRIVER):
9615 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9616 self.op.file_driver, errors.ECODE_INVAL)
9618 if self.op.disk_template == constants.DT_FILE:
9619 opcodes.RequireFileStorage()
9620 elif self.op.disk_template == constants.DT_SHARED_FILE:
9621 opcodes.RequireSharedFileStorage()
9623 ### Node/iallocator related checks
9624 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9626 if self.op.pnode is not None:
9627 if self.op.disk_template in constants.DTS_INT_MIRROR:
9628 if self.op.snode is None:
9629 raise errors.OpPrereqError("The networked disk templates need"
9630 " a mirror node", errors.ECODE_INVAL)
9632 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9634 self.op.snode = None
9636 self._cds = _GetClusterDomainSecret()
9638 if self.op.mode == constants.INSTANCE_IMPORT:
9639 # On import force_variant must be True, because if we forced it at
9640 # initial install, our only chance when importing it back is that it
9642 self.op.force_variant = True
9644 if self.op.no_install:
9645 self.LogInfo("No-installation mode has no effect during import")
9647 elif self.op.mode == constants.INSTANCE_CREATE:
9648 if self.op.os_type is None:
9649 raise errors.OpPrereqError("No guest OS specified",
9651 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9652 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9653 " installation" % self.op.os_type,
9655 if self.op.disk_template is None:
9656 raise errors.OpPrereqError("No disk template specified",
9659 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9660 # Check handshake to ensure both clusters have the same domain secret
9661 src_handshake = self.op.source_handshake
9662 if not src_handshake:
9663 raise errors.OpPrereqError("Missing source handshake",
9666 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9669 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9672 # Load and check source CA
9673 self.source_x509_ca_pem = self.op.source_x509_ca
9674 if not self.source_x509_ca_pem:
9675 raise errors.OpPrereqError("Missing source X509 CA",
9679 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9681 except OpenSSL.crypto.Error, err:
9682 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9683 (err, ), errors.ECODE_INVAL)
9685 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9686 if errcode is not None:
9687 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9690 self.source_x509_ca = cert
9692 src_instance_name = self.op.source_instance_name
9693 if not src_instance_name:
9694 raise errors.OpPrereqError("Missing source instance name",
9697 self.source_instance_name = \
9698 netutils.GetHostname(name=src_instance_name).name
9701 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9702 self.op.mode, errors.ECODE_INVAL)
9704 def ExpandNames(self):
9705 """ExpandNames for CreateInstance.
9707 Figure out the right locks for instance creation.
9710 self.needed_locks = {}
9712 instance_name = self.op.instance_name
9713 # this is just a preventive check, but someone might still add this
9714 # instance in the meantime, and creation will fail at lock-add time
9715 if instance_name in self.cfg.GetInstanceList():
9716 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9717 instance_name, errors.ECODE_EXISTS)
9719 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9721 if self.op.iallocator:
9722 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9723 # specifying a group on instance creation and then selecting nodes from
9725 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9726 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9728 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9729 nodelist = [self.op.pnode]
9730 if self.op.snode is not None:
9731 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9732 nodelist.append(self.op.snode)
9733 self.needed_locks[locking.LEVEL_NODE] = nodelist
9734 # Lock resources of instance's primary and secondary nodes (copy to
9735 # prevent accidential modification)
9736 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9738 # in case of import lock the source node too
9739 if self.op.mode == constants.INSTANCE_IMPORT:
9740 src_node = self.op.src_node
9741 src_path = self.op.src_path
9743 if src_path is None:
9744 self.op.src_path = src_path = self.op.instance_name
9746 if src_node is None:
9747 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9748 self.op.src_node = None
9749 if os.path.isabs(src_path):
9750 raise errors.OpPrereqError("Importing an instance from a path"
9751 " requires a source node option",
9754 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9755 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9756 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9757 if not os.path.isabs(src_path):
9758 self.op.src_path = src_path = \
9759 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9761 def _RunAllocator(self):
9762 """Run the allocator based on input opcode.
9765 #TODO Export network to iallocator so that it chooses a pnode
9766 # in a nodegroup that has the desired network connected to
9767 req = _CreateInstanceAllocRequest(self.op, self.disks,
9768 self.nics, self.be_full)
9769 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9771 ial.Run(self.op.iallocator)
9774 raise errors.OpPrereqError("Can't compute nodes using"
9775 " iallocator '%s': %s" %
9776 (self.op.iallocator, ial.info),
9778 self.op.pnode = ial.result[0]
9779 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9780 self.op.instance_name, self.op.iallocator,
9781 utils.CommaJoin(ial.result))
9783 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9785 if req.RequiredNodes() == 2:
9786 self.op.snode = ial.result[1]
9788 def BuildHooksEnv(self):
9791 This runs on master, primary and secondary nodes of the instance.
9795 "ADD_MODE": self.op.mode,
9797 if self.op.mode == constants.INSTANCE_IMPORT:
9798 env["SRC_NODE"] = self.op.src_node
9799 env["SRC_PATH"] = self.op.src_path
9800 env["SRC_IMAGES"] = self.src_images
9802 env.update(_BuildInstanceHookEnv(
9803 name=self.op.instance_name,
9804 primary_node=self.op.pnode,
9805 secondary_nodes=self.secondaries,
9806 status=self.op.start,
9807 os_type=self.op.os_type,
9808 minmem=self.be_full[constants.BE_MINMEM],
9809 maxmem=self.be_full[constants.BE_MAXMEM],
9810 vcpus=self.be_full[constants.BE_VCPUS],
9811 nics=_NICListToTuple(self, self.nics),
9812 disk_template=self.op.disk_template,
9813 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9814 for d in self.disks],
9817 hypervisor_name=self.op.hypervisor,
9823 def BuildHooksNodes(self):
9824 """Build hooks nodes.
9827 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9830 def _ReadExportInfo(self):
9831 """Reads the export information from disk.
9833 It will override the opcode source node and path with the actual
9834 information, if these two were not specified before.
9836 @return: the export information
9839 assert self.op.mode == constants.INSTANCE_IMPORT
9841 src_node = self.op.src_node
9842 src_path = self.op.src_path
9844 if src_node is None:
9845 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9846 exp_list = self.rpc.call_export_list(locked_nodes)
9848 for node in exp_list:
9849 if exp_list[node].fail_msg:
9851 if src_path in exp_list[node].payload:
9853 self.op.src_node = src_node = node
9854 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9858 raise errors.OpPrereqError("No export found for relative path %s" %
9859 src_path, errors.ECODE_INVAL)
9861 _CheckNodeOnline(self, src_node)
9862 result = self.rpc.call_export_info(src_node, src_path)
9863 result.Raise("No export or invalid export found in dir %s" % src_path)
9865 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9866 if not export_info.has_section(constants.INISECT_EXP):
9867 raise errors.ProgrammerError("Corrupted export config",
9868 errors.ECODE_ENVIRON)
9870 ei_version = export_info.get(constants.INISECT_EXP, "version")
9871 if (int(ei_version) != constants.EXPORT_VERSION):
9872 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9873 (ei_version, constants.EXPORT_VERSION),
9874 errors.ECODE_ENVIRON)
9877 def _ReadExportParams(self, einfo):
9878 """Use export parameters as defaults.
9880 In case the opcode doesn't specify (as in override) some instance
9881 parameters, then try to use them from the export information, if
9885 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9887 if self.op.disk_template is None:
9888 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9889 self.op.disk_template = einfo.get(constants.INISECT_INS,
9891 if self.op.disk_template not in constants.DISK_TEMPLATES:
9892 raise errors.OpPrereqError("Disk template specified in configuration"
9893 " file is not one of the allowed values:"
9895 " ".join(constants.DISK_TEMPLATES),
9898 raise errors.OpPrereqError("No disk template specified and the export"
9899 " is missing the disk_template information",
9902 if not self.op.disks:
9904 # TODO: import the disk iv_name too
9905 for idx in range(constants.MAX_DISKS):
9906 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9907 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9908 disks.append({constants.IDISK_SIZE: disk_sz})
9909 self.op.disks = disks
9910 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9911 raise errors.OpPrereqError("No disk info specified and the export"
9912 " is missing the disk information",
9915 if not self.op.nics:
9917 for idx in range(constants.MAX_NICS):
9918 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9920 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9921 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9928 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9929 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9931 if (self.op.hypervisor is None and
9932 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9933 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9935 if einfo.has_section(constants.INISECT_HYP):
9936 # use the export parameters but do not override the ones
9937 # specified by the user
9938 for name, value in einfo.items(constants.INISECT_HYP):
9939 if name not in self.op.hvparams:
9940 self.op.hvparams[name] = value
9942 if einfo.has_section(constants.INISECT_BEP):
9943 # use the parameters, without overriding
9944 for name, value in einfo.items(constants.INISECT_BEP):
9945 if name not in self.op.beparams:
9946 self.op.beparams[name] = value
9947 # Compatibility for the old "memory" be param
9948 if name == constants.BE_MEMORY:
9949 if constants.BE_MAXMEM not in self.op.beparams:
9950 self.op.beparams[constants.BE_MAXMEM] = value
9951 if constants.BE_MINMEM not in self.op.beparams:
9952 self.op.beparams[constants.BE_MINMEM] = value
9954 # try to read the parameters old style, from the main section
9955 for name in constants.BES_PARAMETERS:
9956 if (name not in self.op.beparams and
9957 einfo.has_option(constants.INISECT_INS, name)):
9958 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9960 if einfo.has_section(constants.INISECT_OSP):
9961 # use the parameters, without overriding
9962 for name, value in einfo.items(constants.INISECT_OSP):
9963 if name not in self.op.osparams:
9964 self.op.osparams[name] = value
9966 def _RevertToDefaults(self, cluster):
9967 """Revert the instance parameters to the default values.
9971 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9972 for name in self.op.hvparams.keys():
9973 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9974 del self.op.hvparams[name]
9976 be_defs = cluster.SimpleFillBE({})
9977 for name in self.op.beparams.keys():
9978 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9979 del self.op.beparams[name]
9981 nic_defs = cluster.SimpleFillNIC({})
9982 for nic in self.op.nics:
9983 for name in constants.NICS_PARAMETERS:
9984 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9987 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9988 for name in self.op.osparams.keys():
9989 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9990 del self.op.osparams[name]
9992 def _CalculateFileStorageDir(self):
9993 """Calculate final instance file storage dir.
9996 # file storage dir calculation/check
9997 self.instance_file_storage_dir = None
9998 if self.op.disk_template in constants.DTS_FILEBASED:
9999 # build the full file storage dir path
10002 if self.op.disk_template == constants.DT_SHARED_FILE:
10003 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10005 get_fsd_fn = self.cfg.GetFileStorageDir
10007 cfg_storagedir = get_fsd_fn()
10008 if not cfg_storagedir:
10009 raise errors.OpPrereqError("Cluster file storage dir not defined",
10010 errors.ECODE_STATE)
10011 joinargs.append(cfg_storagedir)
10013 if self.op.file_storage_dir is not None:
10014 joinargs.append(self.op.file_storage_dir)
10016 joinargs.append(self.op.instance_name)
10018 # pylint: disable=W0142
10019 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10021 def CheckPrereq(self): # pylint: disable=R0914
10022 """Check prerequisites.
10025 self._CalculateFileStorageDir()
10027 if self.op.mode == constants.INSTANCE_IMPORT:
10028 export_info = self._ReadExportInfo()
10029 self._ReadExportParams(export_info)
10030 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10032 self._old_instance_name = None
10034 if (not self.cfg.GetVGName() and
10035 self.op.disk_template not in constants.DTS_NOT_LVM):
10036 raise errors.OpPrereqError("Cluster does not support lvm-based"
10037 " instances", errors.ECODE_STATE)
10039 if (self.op.hypervisor is None or
10040 self.op.hypervisor == constants.VALUE_AUTO):
10041 self.op.hypervisor = self.cfg.GetHypervisorType()
10043 cluster = self.cfg.GetClusterInfo()
10044 enabled_hvs = cluster.enabled_hypervisors
10045 if self.op.hypervisor not in enabled_hvs:
10046 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10048 (self.op.hypervisor, ",".join(enabled_hvs)),
10049 errors.ECODE_STATE)
10051 # Check tag validity
10052 for tag in self.op.tags:
10053 objects.TaggableObject.ValidateTag(tag)
10055 # check hypervisor parameter syntax (locally)
10056 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10057 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10059 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10060 hv_type.CheckParameterSyntax(filled_hvp)
10061 self.hv_full = filled_hvp
10062 # check that we don't specify global parameters on an instance
10063 _CheckGlobalHvParams(self.op.hvparams)
10065 # fill and remember the beparams dict
10066 self.be_full = _ComputeFullBeParams(self.op, cluster)
10068 # build os parameters
10069 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10071 # now that hvp/bep are in final format, let's reset to defaults,
10073 if self.op.identify_defaults:
10074 self._RevertToDefaults(cluster)
10077 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10080 # disk checks/pre-build
10081 default_vg = self.cfg.GetVGName()
10082 self.disks = _ComputeDisks(self.op, default_vg)
10084 if self.op.mode == constants.INSTANCE_IMPORT:
10086 for idx in range(len(self.disks)):
10087 option = "disk%d_dump" % idx
10088 if export_info.has_option(constants.INISECT_INS, option):
10089 # FIXME: are the old os-es, disk sizes, etc. useful?
10090 export_name = export_info.get(constants.INISECT_INS, option)
10091 image = utils.PathJoin(self.op.src_path, export_name)
10092 disk_images.append(image)
10094 disk_images.append(False)
10096 self.src_images = disk_images
10098 if self.op.instance_name == self._old_instance_name:
10099 for idx, nic in enumerate(self.nics):
10100 if nic.mac == constants.VALUE_AUTO:
10101 nic_mac_ini = "nic%d_mac" % idx
10102 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10104 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10106 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10107 if self.op.ip_check:
10108 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10109 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10110 (self.check_ip, self.op.instance_name),
10111 errors.ECODE_NOTUNIQUE)
10113 #### mac address generation
10114 # By generating here the mac address both the allocator and the hooks get
10115 # the real final mac address rather than the 'auto' or 'generate' value.
10116 # There is a race condition between the generation and the instance object
10117 # creation, which means that we know the mac is valid now, but we're not
10118 # sure it will be when we actually add the instance. If things go bad
10119 # adding the instance will abort because of a duplicate mac, and the
10120 # creation job will fail.
10121 for nic in self.nics:
10122 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10123 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10127 if self.op.iallocator is not None:
10128 self._RunAllocator()
10130 # Release all unneeded node locks
10131 _ReleaseLocks(self, locking.LEVEL_NODE,
10132 keep=filter(None, [self.op.pnode, self.op.snode,
10133 self.op.src_node]))
10134 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10135 keep=filter(None, [self.op.pnode, self.op.snode,
10136 self.op.src_node]))
10138 #### node related checks
10140 # check primary node
10141 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10142 assert self.pnode is not None, \
10143 "Cannot retrieve locked node %s" % self.op.pnode
10145 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10146 pnode.name, errors.ECODE_STATE)
10148 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10149 pnode.name, errors.ECODE_STATE)
10150 if not pnode.vm_capable:
10151 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10152 " '%s'" % pnode.name, errors.ECODE_STATE)
10154 self.secondaries = []
10156 # Fill in any IPs from IP pools. This must happen here, because we need to
10157 # know the nic's primary node, as specified by the iallocator
10158 for idx, nic in enumerate(self.nics):
10160 if net is not None:
10161 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10162 if netparams is None:
10163 raise errors.OpPrereqError("No netparams found for network"
10164 " %s. Propably not connected to"
10165 " node's %s nodegroup" %
10166 (net, self.pnode.name),
10167 errors.ECODE_INVAL)
10168 self.LogInfo("NIC/%d inherits netparams %s" %
10169 (idx, netparams.values()))
10170 nic.nicparams = dict(netparams)
10171 if nic.ip is not None:
10172 filled_params = cluster.SimpleFillNIC(nic.nicparams)
10173 if nic.ip.lower() == constants.NIC_IP_POOL:
10175 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10176 except errors.ReservationError:
10177 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10178 " from the address pool" % idx,
10179 errors.ECODE_STATE)
10180 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10183 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10184 except errors.ReservationError:
10185 raise errors.OpPrereqError("IP address %s already in use"
10186 " or does not belong to network %s" %
10188 errors.ECODE_NOTUNIQUE)
10190 # net is None, ip None or given
10191 if self.op.conflicts_check:
10192 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10195 # mirror node verification
10196 if self.op.disk_template in constants.DTS_INT_MIRROR:
10197 if self.op.snode == pnode.name:
10198 raise errors.OpPrereqError("The secondary node cannot be the"
10199 " primary node", errors.ECODE_INVAL)
10200 _CheckNodeOnline(self, self.op.snode)
10201 _CheckNodeNotDrained(self, self.op.snode)
10202 _CheckNodeVmCapable(self, self.op.snode)
10203 self.secondaries.append(self.op.snode)
10205 snode = self.cfg.GetNodeInfo(self.op.snode)
10206 if pnode.group != snode.group:
10207 self.LogWarning("The primary and secondary nodes are in two"
10208 " different node groups; the disk parameters"
10209 " from the first disk's node group will be"
10212 nodenames = [pnode.name] + self.secondaries
10214 # Verify instance specs
10215 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10217 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10218 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10219 constants.ISPEC_DISK_COUNT: len(self.disks),
10220 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10221 constants.ISPEC_NIC_COUNT: len(self.nics),
10222 constants.ISPEC_SPINDLE_USE: spindle_use,
10225 group_info = self.cfg.GetNodeGroup(pnode.group)
10226 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10227 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10228 if not self.op.ignore_ipolicy and res:
10229 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10230 (pnode.group, group_info.name, utils.CommaJoin(res)))
10231 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10233 if not self.adopt_disks:
10234 if self.op.disk_template == constants.DT_RBD:
10235 # _CheckRADOSFreeSpace() is just a placeholder.
10236 # Any function that checks prerequisites can be placed here.
10237 # Check if there is enough space on the RADOS cluster.
10238 _CheckRADOSFreeSpace()
10240 # Check lv size requirements, if not adopting
10241 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10242 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10244 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10245 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10246 disk[constants.IDISK_ADOPT])
10247 for disk in self.disks])
10248 if len(all_lvs) != len(self.disks):
10249 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10250 errors.ECODE_INVAL)
10251 for lv_name in all_lvs:
10253 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10254 # to ReserveLV uses the same syntax
10255 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10256 except errors.ReservationError:
10257 raise errors.OpPrereqError("LV named %s used by another instance" %
10258 lv_name, errors.ECODE_NOTUNIQUE)
10260 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10261 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10263 node_lvs = self.rpc.call_lv_list([pnode.name],
10264 vg_names.payload.keys())[pnode.name]
10265 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10266 node_lvs = node_lvs.payload
10268 delta = all_lvs.difference(node_lvs.keys())
10270 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10271 utils.CommaJoin(delta),
10272 errors.ECODE_INVAL)
10273 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10275 raise errors.OpPrereqError("Online logical volumes found, cannot"
10276 " adopt: %s" % utils.CommaJoin(online_lvs),
10277 errors.ECODE_STATE)
10278 # update the size of disk based on what is found
10279 for dsk in self.disks:
10280 dsk[constants.IDISK_SIZE] = \
10281 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10282 dsk[constants.IDISK_ADOPT])][0]))
10284 elif self.op.disk_template == constants.DT_BLOCK:
10285 # Normalize and de-duplicate device paths
10286 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10287 for disk in self.disks])
10288 if len(all_disks) != len(self.disks):
10289 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10290 errors.ECODE_INVAL)
10291 baddisks = [d for d in all_disks
10292 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10294 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10295 " cannot be adopted" %
10296 (", ".join(baddisks),
10297 constants.ADOPTABLE_BLOCKDEV_ROOT),
10298 errors.ECODE_INVAL)
10300 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10301 list(all_disks))[pnode.name]
10302 node_disks.Raise("Cannot get block device information from node %s" %
10304 node_disks = node_disks.payload
10305 delta = all_disks.difference(node_disks.keys())
10307 raise errors.OpPrereqError("Missing block device(s): %s" %
10308 utils.CommaJoin(delta),
10309 errors.ECODE_INVAL)
10310 for dsk in self.disks:
10311 dsk[constants.IDISK_SIZE] = \
10312 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10314 # Verify instance specs
10315 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10317 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10318 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10319 constants.ISPEC_DISK_COUNT: len(self.disks),
10320 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10321 for disk in self.disks],
10322 constants.ISPEC_NIC_COUNT: len(self.nics),
10323 constants.ISPEC_SPINDLE_USE: spindle_use,
10326 group_info = self.cfg.GetNodeGroup(pnode.group)
10327 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10328 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10329 if not self.op.ignore_ipolicy and res:
10330 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10331 " policy: %s") % (pnode.group,
10332 utils.CommaJoin(res)),
10333 errors.ECODE_INVAL)
10335 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10337 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10338 # check OS parameters (remotely)
10339 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10341 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10343 # memory check on primary node
10344 #TODO(dynmem): use MINMEM for checking
10346 _CheckNodeFreeMemory(self, self.pnode.name,
10347 "creating instance %s" % self.op.instance_name,
10348 self.be_full[constants.BE_MAXMEM],
10349 self.op.hypervisor)
10351 self.dry_run_result = list(nodenames)
10353 def Exec(self, feedback_fn):
10354 """Create and add the instance to the cluster.
10357 instance = self.op.instance_name
10358 pnode_name = self.pnode.name
10360 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10361 self.owned_locks(locking.LEVEL_NODE)), \
10362 "Node locks differ from node resource locks"
10364 ht_kind = self.op.hypervisor
10365 if ht_kind in constants.HTS_REQ_PORT:
10366 network_port = self.cfg.AllocatePort()
10368 network_port = None
10370 # This is ugly but we got a chicken-egg problem here
10371 # We can only take the group disk parameters, as the instance
10372 # has no disks yet (we are generating them right here).
10373 node = self.cfg.GetNodeInfo(pnode_name)
10374 nodegroup = self.cfg.GetNodeGroup(node.group)
10375 disks = _GenerateDiskTemplate(self,
10376 self.op.disk_template,
10377 instance, pnode_name,
10380 self.instance_file_storage_dir,
10381 self.op.file_driver,
10384 self.cfg.GetGroupDiskParams(nodegroup))
10386 iobj = objects.Instance(name=instance, os=self.op.os_type,
10387 primary_node=pnode_name,
10388 nics=self.nics, disks=disks,
10389 disk_template=self.op.disk_template,
10390 admin_state=constants.ADMINST_DOWN,
10391 network_port=network_port,
10392 beparams=self.op.beparams,
10393 hvparams=self.op.hvparams,
10394 hypervisor=self.op.hypervisor,
10395 osparams=self.op.osparams,
10399 for tag in self.op.tags:
10402 if self.adopt_disks:
10403 if self.op.disk_template == constants.DT_PLAIN:
10404 # rename LVs to the newly-generated names; we need to construct
10405 # 'fake' LV disks with the old data, plus the new unique_id
10406 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10408 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10409 rename_to.append(t_dsk.logical_id)
10410 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10411 self.cfg.SetDiskID(t_dsk, pnode_name)
10412 result = self.rpc.call_blockdev_rename(pnode_name,
10413 zip(tmp_disks, rename_to))
10414 result.Raise("Failed to rename adoped LVs")
10416 feedback_fn("* creating instance disks...")
10418 _CreateDisks(self, iobj)
10419 except errors.OpExecError:
10420 self.LogWarning("Device creation failed, reverting...")
10422 _RemoveDisks(self, iobj)
10424 self.cfg.ReleaseDRBDMinors(instance)
10427 feedback_fn("adding instance %s to cluster config" % instance)
10429 self.cfg.AddInstance(iobj, self.proc.GetECId())
10431 # Declare that we don't want to remove the instance lock anymore, as we've
10432 # added the instance to the config
10433 del self.remove_locks[locking.LEVEL_INSTANCE]
10435 if self.op.mode == constants.INSTANCE_IMPORT:
10436 # Release unused nodes
10437 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10439 # Release all nodes
10440 _ReleaseLocks(self, locking.LEVEL_NODE)
10443 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10444 feedback_fn("* wiping instance disks...")
10446 _WipeDisks(self, iobj)
10447 except errors.OpExecError, err:
10448 logging.exception("Wiping disks failed")
10449 self.LogWarning("Wiping instance disks failed (%s)", err)
10453 # Something is already wrong with the disks, don't do anything else
10455 elif self.op.wait_for_sync:
10456 disk_abort = not _WaitForSync(self, iobj)
10457 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10458 # make sure the disks are not degraded (still sync-ing is ok)
10459 feedback_fn("* checking mirrors status")
10460 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10465 _RemoveDisks(self, iobj)
10466 self.cfg.RemoveInstance(iobj.name)
10467 # Make sure the instance lock gets removed
10468 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10469 raise errors.OpExecError("There are some degraded disks for"
10472 # Release all node resource locks
10473 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10475 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10476 # we need to set the disks ID to the primary node, since the
10477 # preceding code might or might have not done it, depending on
10478 # disk template and other options
10479 for disk in iobj.disks:
10480 self.cfg.SetDiskID(disk, pnode_name)
10481 if self.op.mode == constants.INSTANCE_CREATE:
10482 if not self.op.no_install:
10483 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10484 not self.op.wait_for_sync)
10486 feedback_fn("* pausing disk sync to install instance OS")
10487 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10490 for idx, success in enumerate(result.payload):
10492 logging.warn("pause-sync of instance %s for disk %d failed",
10495 feedback_fn("* running the instance OS create scripts...")
10496 # FIXME: pass debug option from opcode to backend
10498 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10499 self.op.debug_level)
10501 feedback_fn("* resuming disk sync")
10502 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10505 for idx, success in enumerate(result.payload):
10507 logging.warn("resume-sync of instance %s for disk %d failed",
10510 os_add_result.Raise("Could not add os for instance %s"
10511 " on node %s" % (instance, pnode_name))
10514 if self.op.mode == constants.INSTANCE_IMPORT:
10515 feedback_fn("* running the instance OS import scripts...")
10519 for idx, image in enumerate(self.src_images):
10523 # FIXME: pass debug option from opcode to backend
10524 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10525 constants.IEIO_FILE, (image, ),
10526 constants.IEIO_SCRIPT,
10527 (iobj.disks[idx], idx),
10529 transfers.append(dt)
10532 masterd.instance.TransferInstanceData(self, feedback_fn,
10533 self.op.src_node, pnode_name,
10534 self.pnode.secondary_ip,
10536 if not compat.all(import_result):
10537 self.LogWarning("Some disks for instance %s on node %s were not"
10538 " imported successfully" % (instance, pnode_name))
10540 rename_from = self._old_instance_name
10542 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10543 feedback_fn("* preparing remote import...")
10544 # The source cluster will stop the instance before attempting to make
10545 # a connection. In some cases stopping an instance can take a long
10546 # time, hence the shutdown timeout is added to the connection
10548 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10549 self.op.source_shutdown_timeout)
10550 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10552 assert iobj.primary_node == self.pnode.name
10554 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10555 self.source_x509_ca,
10556 self._cds, timeouts)
10557 if not compat.all(disk_results):
10558 # TODO: Should the instance still be started, even if some disks
10559 # failed to import (valid for local imports, too)?
10560 self.LogWarning("Some disks for instance %s on node %s were not"
10561 " imported successfully" % (instance, pnode_name))
10563 rename_from = self.source_instance_name
10566 # also checked in the prereq part
10567 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10570 # Run rename script on newly imported instance
10571 assert iobj.name == instance
10572 feedback_fn("Running rename script for %s" % instance)
10573 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10575 self.op.debug_level)
10576 if result.fail_msg:
10577 self.LogWarning("Failed to run rename script for %s on node"
10578 " %s: %s" % (instance, pnode_name, result.fail_msg))
10580 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10583 iobj.admin_state = constants.ADMINST_UP
10584 self.cfg.Update(iobj, feedback_fn)
10585 logging.info("Starting instance %s on node %s", instance, pnode_name)
10586 feedback_fn("* starting instance...")
10587 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10589 result.Raise("Could not start instance")
10591 return list(iobj.all_nodes)
10594 class LUInstanceMultiAlloc(NoHooksLU):
10595 """Allocates multiple instances at the same time.
10600 def CheckArguments(self):
10601 """Check arguments.
10605 for inst in self.op.instances:
10606 if inst.iallocator is not None:
10607 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10608 " instance objects", errors.ECODE_INVAL)
10609 nodes.append(bool(inst.pnode))
10610 if inst.disk_template in constants.DTS_INT_MIRROR:
10611 nodes.append(bool(inst.snode))
10613 has_nodes = compat.any(nodes)
10614 if compat.all(nodes) ^ has_nodes:
10615 raise errors.OpPrereqError("There are instance objects providing"
10616 " pnode/snode while others do not",
10617 errors.ECODE_INVAL)
10619 if self.op.iallocator is None:
10620 default_iallocator = self.cfg.GetDefaultIAllocator()
10621 if default_iallocator and has_nodes:
10622 self.op.iallocator = default_iallocator
10624 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10625 " given and no cluster-wide default"
10626 " iallocator found; please specify either"
10627 " an iallocator or nodes on the instances"
10628 " or set a cluster-wide default iallocator",
10629 errors.ECODE_INVAL)
10631 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10633 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10634 utils.CommaJoin(dups), errors.ECODE_INVAL)
10636 def ExpandNames(self):
10637 """Calculate the locks.
10640 self.share_locks = _ShareAll()
10641 self.needed_locks = {}
10643 if self.op.iallocator:
10644 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10645 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10648 for inst in self.op.instances:
10649 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10650 nodeslist.append(inst.pnode)
10651 if inst.snode is not None:
10652 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10653 nodeslist.append(inst.snode)
10655 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10656 # Lock resources of instance's primary and secondary nodes (copy to
10657 # prevent accidential modification)
10658 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10660 def CheckPrereq(self):
10661 """Check prerequisite.
10664 cluster = self.cfg.GetClusterInfo()
10665 default_vg = self.cfg.GetVGName()
10666 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10667 _ComputeNics(op, cluster, None,
10668 self.cfg, self.proc),
10669 _ComputeFullBeParams(op, cluster))
10670 for op in self.op.instances]
10671 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10672 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10674 ial.Run(self.op.iallocator)
10676 if not ial.success:
10677 raise errors.OpPrereqError("Can't compute nodes using"
10678 " iallocator '%s': %s" %
10679 (self.op.iallocator, ial.info),
10680 errors.ECODE_NORES)
10682 self.ia_result = ial.result
10684 if self.op.dry_run:
10685 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10686 constants.JOB_IDS_KEY: [],
10689 def _ConstructPartialResult(self):
10690 """Contructs the partial result.
10693 (allocatable, failed) = self.ia_result
10695 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10696 map(compat.fst, allocatable),
10697 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10700 def Exec(self, feedback_fn):
10701 """Executes the opcode.
10704 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10705 (allocatable, failed) = self.ia_result
10708 for (name, nodes) in allocatable:
10709 op = op2inst.pop(name)
10712 (op.pnode, op.snode) = nodes
10714 (op.pnode,) = nodes
10718 missing = set(op2inst.keys()) - set(failed)
10719 assert not missing, \
10720 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10722 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10725 def _CheckRADOSFreeSpace():
10726 """Compute disk size requirements inside the RADOS cluster.
10729 # For the RADOS cluster we assume there is always enough space.
10733 class LUInstanceConsole(NoHooksLU):
10734 """Connect to an instance's console.
10736 This is somewhat special in that it returns the command line that
10737 you need to run on the master node in order to connect to the
10743 def ExpandNames(self):
10744 self.share_locks = _ShareAll()
10745 self._ExpandAndLockInstance()
10747 def CheckPrereq(self):
10748 """Check prerequisites.
10750 This checks that the instance is in the cluster.
10753 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10754 assert self.instance is not None, \
10755 "Cannot retrieve locked instance %s" % self.op.instance_name
10756 _CheckNodeOnline(self, self.instance.primary_node)
10758 def Exec(self, feedback_fn):
10759 """Connect to the console of an instance
10762 instance = self.instance
10763 node = instance.primary_node
10765 node_insts = self.rpc.call_instance_list([node],
10766 [instance.hypervisor])[node]
10767 node_insts.Raise("Can't get node information from %s" % node)
10769 if instance.name not in node_insts.payload:
10770 if instance.admin_state == constants.ADMINST_UP:
10771 state = constants.INSTST_ERRORDOWN
10772 elif instance.admin_state == constants.ADMINST_DOWN:
10773 state = constants.INSTST_ADMINDOWN
10775 state = constants.INSTST_ADMINOFFLINE
10776 raise errors.OpExecError("Instance %s is not running (state %s)" %
10777 (instance.name, state))
10779 logging.debug("Connecting to console of %s on %s", instance.name, node)
10781 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10784 def _GetInstanceConsole(cluster, instance):
10785 """Returns console information for an instance.
10787 @type cluster: L{objects.Cluster}
10788 @type instance: L{objects.Instance}
10792 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10793 # beparams and hvparams are passed separately, to avoid editing the
10794 # instance and then saving the defaults in the instance itself.
10795 hvparams = cluster.FillHV(instance)
10796 beparams = cluster.FillBE(instance)
10797 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10799 assert console.instance == instance.name
10800 assert console.Validate()
10802 return console.ToDict()
10805 class LUInstanceReplaceDisks(LogicalUnit):
10806 """Replace the disks of an instance.
10809 HPATH = "mirrors-replace"
10810 HTYPE = constants.HTYPE_INSTANCE
10813 def CheckArguments(self):
10814 """Check arguments.
10817 remote_node = self.op.remote_node
10818 ialloc = self.op.iallocator
10819 if self.op.mode == constants.REPLACE_DISK_CHG:
10820 if remote_node is None and ialloc is None:
10821 raise errors.OpPrereqError("When changing the secondary either an"
10822 " iallocator script must be used or the"
10823 " new node given", errors.ECODE_INVAL)
10825 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10827 elif remote_node is not None or ialloc is not None:
10828 # Not replacing the secondary
10829 raise errors.OpPrereqError("The iallocator and new node options can"
10830 " only be used when changing the"
10831 " secondary node", errors.ECODE_INVAL)
10833 def ExpandNames(self):
10834 self._ExpandAndLockInstance()
10836 assert locking.LEVEL_NODE not in self.needed_locks
10837 assert locking.LEVEL_NODE_RES not in self.needed_locks
10838 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10840 assert self.op.iallocator is None or self.op.remote_node is None, \
10841 "Conflicting options"
10843 if self.op.remote_node is not None:
10844 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10846 # Warning: do not remove the locking of the new secondary here
10847 # unless DRBD8.AddChildren is changed to work in parallel;
10848 # currently it doesn't since parallel invocations of
10849 # FindUnusedMinor will conflict
10850 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10851 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10853 self.needed_locks[locking.LEVEL_NODE] = []
10854 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10856 if self.op.iallocator is not None:
10857 # iallocator will select a new node in the same group
10858 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10860 self.needed_locks[locking.LEVEL_NODE_RES] = []
10862 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10863 self.op.iallocator, self.op.remote_node,
10864 self.op.disks, False, self.op.early_release,
10865 self.op.ignore_ipolicy)
10867 self.tasklets = [self.replacer]
10869 def DeclareLocks(self, level):
10870 if level == locking.LEVEL_NODEGROUP:
10871 assert self.op.remote_node is None
10872 assert self.op.iallocator is not None
10873 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10875 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10876 # Lock all groups used by instance optimistically; this requires going
10877 # via the node before it's locked, requiring verification later on
10878 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10879 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10881 elif level == locking.LEVEL_NODE:
10882 if self.op.iallocator is not None:
10883 assert self.op.remote_node is None
10884 assert not self.needed_locks[locking.LEVEL_NODE]
10886 # Lock member nodes of all locked groups
10887 self.needed_locks[locking.LEVEL_NODE] = \
10889 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10890 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10892 self._LockInstancesNodes()
10893 elif level == locking.LEVEL_NODE_RES:
10895 self.needed_locks[locking.LEVEL_NODE_RES] = \
10896 self.needed_locks[locking.LEVEL_NODE]
10898 def BuildHooksEnv(self):
10899 """Build hooks env.
10901 This runs on the master, the primary and all the secondaries.
10904 instance = self.replacer.instance
10906 "MODE": self.op.mode,
10907 "NEW_SECONDARY": self.op.remote_node,
10908 "OLD_SECONDARY": instance.secondary_nodes[0],
10910 env.update(_BuildInstanceHookEnvByObject(self, instance))
10913 def BuildHooksNodes(self):
10914 """Build hooks nodes.
10917 instance = self.replacer.instance
10919 self.cfg.GetMasterNode(),
10920 instance.primary_node,
10922 if self.op.remote_node is not None:
10923 nl.append(self.op.remote_node)
10926 def CheckPrereq(self):
10927 """Check prerequisites.
10930 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10931 self.op.iallocator is None)
10933 # Verify if node group locks are still correct
10934 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10936 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10938 return LogicalUnit.CheckPrereq(self)
10941 class TLReplaceDisks(Tasklet):
10942 """Replaces disks for an instance.
10944 Note: Locking is not within the scope of this class.
10947 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10948 disks, delay_iallocator, early_release, ignore_ipolicy):
10949 """Initializes this class.
10952 Tasklet.__init__(self, lu)
10955 self.instance_name = instance_name
10957 self.iallocator_name = iallocator_name
10958 self.remote_node = remote_node
10960 self.delay_iallocator = delay_iallocator
10961 self.early_release = early_release
10962 self.ignore_ipolicy = ignore_ipolicy
10965 self.instance = None
10966 self.new_node = None
10967 self.target_node = None
10968 self.other_node = None
10969 self.remote_node_info = None
10970 self.node_secondary_ip = None
10973 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10974 """Compute a new secondary node using an IAllocator.
10977 req = iallocator.IAReqRelocate(name=instance_name,
10978 relocate_from=list(relocate_from))
10979 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10981 ial.Run(iallocator_name)
10983 if not ial.success:
10984 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10985 " %s" % (iallocator_name, ial.info),
10986 errors.ECODE_NORES)
10988 remote_node_name = ial.result[0]
10990 lu.LogInfo("Selected new secondary for instance '%s': %s",
10991 instance_name, remote_node_name)
10993 return remote_node_name
10995 def _FindFaultyDisks(self, node_name):
10996 """Wrapper for L{_FindFaultyInstanceDisks}.
10999 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11002 def _CheckDisksActivated(self, instance):
11003 """Checks if the instance disks are activated.
11005 @param instance: The instance to check disks
11006 @return: True if they are activated, False otherwise
11009 nodes = instance.all_nodes
11011 for idx, dev in enumerate(instance.disks):
11013 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11014 self.cfg.SetDiskID(dev, node)
11016 result = _BlockdevFind(self, node, dev, instance)
11020 elif result.fail_msg or not result.payload:
11025 def CheckPrereq(self):
11026 """Check prerequisites.
11028 This checks that the instance is in the cluster.
11031 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11032 assert instance is not None, \
11033 "Cannot retrieve locked instance %s" % self.instance_name
11035 if instance.disk_template != constants.DT_DRBD8:
11036 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11037 " instances", errors.ECODE_INVAL)
11039 if len(instance.secondary_nodes) != 1:
11040 raise errors.OpPrereqError("The instance has a strange layout,"
11041 " expected one secondary but found %d" %
11042 len(instance.secondary_nodes),
11043 errors.ECODE_FAULT)
11045 if not self.delay_iallocator:
11046 self._CheckPrereq2()
11048 def _CheckPrereq2(self):
11049 """Check prerequisites, second part.
11051 This function should always be part of CheckPrereq. It was separated and is
11052 now called from Exec because during node evacuation iallocator was only
11053 called with an unmodified cluster model, not taking planned changes into
11057 instance = self.instance
11058 secondary_node = instance.secondary_nodes[0]
11060 if self.iallocator_name is None:
11061 remote_node = self.remote_node
11063 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11064 instance.name, instance.secondary_nodes)
11066 if remote_node is None:
11067 self.remote_node_info = None
11069 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11070 "Remote node '%s' is not locked" % remote_node
11072 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11073 assert self.remote_node_info is not None, \
11074 "Cannot retrieve locked node %s" % remote_node
11076 if remote_node == self.instance.primary_node:
11077 raise errors.OpPrereqError("The specified node is the primary node of"
11078 " the instance", errors.ECODE_INVAL)
11080 if remote_node == secondary_node:
11081 raise errors.OpPrereqError("The specified node is already the"
11082 " secondary node of the instance",
11083 errors.ECODE_INVAL)
11085 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11086 constants.REPLACE_DISK_CHG):
11087 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11088 errors.ECODE_INVAL)
11090 if self.mode == constants.REPLACE_DISK_AUTO:
11091 if not self._CheckDisksActivated(instance):
11092 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11093 " first" % self.instance_name,
11094 errors.ECODE_STATE)
11095 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11096 faulty_secondary = self._FindFaultyDisks(secondary_node)
11098 if faulty_primary and faulty_secondary:
11099 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11100 " one node and can not be repaired"
11101 " automatically" % self.instance_name,
11102 errors.ECODE_STATE)
11105 self.disks = faulty_primary
11106 self.target_node = instance.primary_node
11107 self.other_node = secondary_node
11108 check_nodes = [self.target_node, self.other_node]
11109 elif faulty_secondary:
11110 self.disks = faulty_secondary
11111 self.target_node = secondary_node
11112 self.other_node = instance.primary_node
11113 check_nodes = [self.target_node, self.other_node]
11119 # Non-automatic modes
11120 if self.mode == constants.REPLACE_DISK_PRI:
11121 self.target_node = instance.primary_node
11122 self.other_node = secondary_node
11123 check_nodes = [self.target_node, self.other_node]
11125 elif self.mode == constants.REPLACE_DISK_SEC:
11126 self.target_node = secondary_node
11127 self.other_node = instance.primary_node
11128 check_nodes = [self.target_node, self.other_node]
11130 elif self.mode == constants.REPLACE_DISK_CHG:
11131 self.new_node = remote_node
11132 self.other_node = instance.primary_node
11133 self.target_node = secondary_node
11134 check_nodes = [self.new_node, self.other_node]
11136 _CheckNodeNotDrained(self.lu, remote_node)
11137 _CheckNodeVmCapable(self.lu, remote_node)
11139 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11140 assert old_node_info is not None
11141 if old_node_info.offline and not self.early_release:
11142 # doesn't make sense to delay the release
11143 self.early_release = True
11144 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11145 " early-release mode", secondary_node)
11148 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11151 # If not specified all disks should be replaced
11153 self.disks = range(len(self.instance.disks))
11155 # TODO: This is ugly, but right now we can't distinguish between internal
11156 # submitted opcode and external one. We should fix that.
11157 if self.remote_node_info:
11158 # We change the node, lets verify it still meets instance policy
11159 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11160 cluster = self.cfg.GetClusterInfo()
11161 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11163 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11164 ignore=self.ignore_ipolicy)
11166 for node in check_nodes:
11167 _CheckNodeOnline(self.lu, node)
11169 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11172 if node_name is not None)
11174 # Release unneeded node and node resource locks
11175 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11176 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11178 # Release any owned node group
11179 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11180 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11182 # Check whether disks are valid
11183 for disk_idx in self.disks:
11184 instance.FindDisk(disk_idx)
11186 # Get secondary node IP addresses
11187 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11188 in self.cfg.GetMultiNodeInfo(touched_nodes))
11190 def Exec(self, feedback_fn):
11191 """Execute disk replacement.
11193 This dispatches the disk replacement to the appropriate handler.
11196 if self.delay_iallocator:
11197 self._CheckPrereq2()
11200 # Verify owned locks before starting operation
11201 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11202 assert set(owned_nodes) == set(self.node_secondary_ip), \
11203 ("Incorrect node locks, owning %s, expected %s" %
11204 (owned_nodes, self.node_secondary_ip.keys()))
11205 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11206 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11208 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11209 assert list(owned_instances) == [self.instance_name], \
11210 "Instance '%s' not locked" % self.instance_name
11212 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11213 "Should not own any node group lock at this point"
11216 feedback_fn("No disks need replacement for instance '%s'" %
11217 self.instance.name)
11220 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11221 (utils.CommaJoin(self.disks), self.instance.name))
11222 feedback_fn("Current primary node: %s", self.instance.primary_node)
11223 feedback_fn("Current seconary node: %s",
11224 utils.CommaJoin(self.instance.secondary_nodes))
11226 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11228 # Activate the instance disks if we're replacing them on a down instance
11230 _StartInstanceDisks(self.lu, self.instance, True)
11233 # Should we replace the secondary node?
11234 if self.new_node is not None:
11235 fn = self._ExecDrbd8Secondary
11237 fn = self._ExecDrbd8DiskOnly
11239 result = fn(feedback_fn)
11241 # Deactivate the instance disks if we're replacing them on a
11244 _SafeShutdownInstanceDisks(self.lu, self.instance)
11246 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11249 # Verify owned locks
11250 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11251 nodes = frozenset(self.node_secondary_ip)
11252 assert ((self.early_release and not owned_nodes) or
11253 (not self.early_release and not (set(owned_nodes) - nodes))), \
11254 ("Not owning the correct locks, early_release=%s, owned=%r,"
11255 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11259 def _CheckVolumeGroup(self, nodes):
11260 self.lu.LogInfo("Checking volume groups")
11262 vgname = self.cfg.GetVGName()
11264 # Make sure volume group exists on all involved nodes
11265 results = self.rpc.call_vg_list(nodes)
11267 raise errors.OpExecError("Can't list volume groups on the nodes")
11270 res = results[node]
11271 res.Raise("Error checking node %s" % node)
11272 if vgname not in res.payload:
11273 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11276 def _CheckDisksExistence(self, nodes):
11277 # Check disk existence
11278 for idx, dev in enumerate(self.instance.disks):
11279 if idx not in self.disks:
11283 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11284 self.cfg.SetDiskID(dev, node)
11286 result = _BlockdevFind(self, node, dev, self.instance)
11288 msg = result.fail_msg
11289 if msg or not result.payload:
11291 msg = "disk not found"
11292 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11295 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11296 for idx, dev in enumerate(self.instance.disks):
11297 if idx not in self.disks:
11300 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11303 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11304 on_primary, ldisk=ldisk):
11305 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11306 " replace disks for instance %s" %
11307 (node_name, self.instance.name))
11309 def _CreateNewStorage(self, node_name):
11310 """Create new storage on the primary or secondary node.
11312 This is only used for same-node replaces, not for changing the
11313 secondary node, hence we don't want to modify the existing disk.
11318 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11319 for idx, dev in enumerate(disks):
11320 if idx not in self.disks:
11323 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11325 self.cfg.SetDiskID(dev, node_name)
11327 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11328 names = _GenerateUniqueNames(self.lu, lv_names)
11330 (data_disk, meta_disk) = dev.children
11331 vg_data = data_disk.logical_id[0]
11332 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11333 logical_id=(vg_data, names[0]),
11334 params=data_disk.params)
11335 vg_meta = meta_disk.logical_id[0]
11336 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11337 size=constants.DRBD_META_SIZE,
11338 logical_id=(vg_meta, names[1]),
11339 params=meta_disk.params)
11341 new_lvs = [lv_data, lv_meta]
11342 old_lvs = [child.Copy() for child in dev.children]
11343 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11345 # we pass force_create=True to force the LVM creation
11346 for new_lv in new_lvs:
11347 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11348 _GetInstanceInfoText(self.instance), False)
11352 def _CheckDevices(self, node_name, iv_names):
11353 for name, (dev, _, _) in iv_names.iteritems():
11354 self.cfg.SetDiskID(dev, node_name)
11356 result = _BlockdevFind(self, node_name, dev, self.instance)
11358 msg = result.fail_msg
11359 if msg or not result.payload:
11361 msg = "disk not found"
11362 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11365 if result.payload.is_degraded:
11366 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11368 def _RemoveOldStorage(self, node_name, iv_names):
11369 for name, (_, old_lvs, _) in iv_names.iteritems():
11370 self.lu.LogInfo("Remove logical volumes for %s" % name)
11373 self.cfg.SetDiskID(lv, node_name)
11375 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11377 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11378 hint="remove unused LVs manually")
11380 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11381 """Replace a disk on the primary or secondary for DRBD 8.
11383 The algorithm for replace is quite complicated:
11385 1. for each disk to be replaced:
11387 1. create new LVs on the target node with unique names
11388 1. detach old LVs from the drbd device
11389 1. rename old LVs to name_replaced.<time_t>
11390 1. rename new LVs to old LVs
11391 1. attach the new LVs (with the old names now) to the drbd device
11393 1. wait for sync across all devices
11395 1. for each modified disk:
11397 1. remove old LVs (which have the name name_replaces.<time_t>)
11399 Failures are not very well handled.
11404 # Step: check device activation
11405 self.lu.LogStep(1, steps_total, "Check device existence")
11406 self._CheckDisksExistence([self.other_node, self.target_node])
11407 self._CheckVolumeGroup([self.target_node, self.other_node])
11409 # Step: check other node consistency
11410 self.lu.LogStep(2, steps_total, "Check peer consistency")
11411 self._CheckDisksConsistency(self.other_node,
11412 self.other_node == self.instance.primary_node,
11415 # Step: create new storage
11416 self.lu.LogStep(3, steps_total, "Allocate new storage")
11417 iv_names = self._CreateNewStorage(self.target_node)
11419 # Step: for each lv, detach+rename*2+attach
11420 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11421 for dev, old_lvs, new_lvs in iv_names.itervalues():
11422 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11424 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11426 result.Raise("Can't detach drbd from local storage on node"
11427 " %s for device %s" % (self.target_node, dev.iv_name))
11429 #cfg.Update(instance)
11431 # ok, we created the new LVs, so now we know we have the needed
11432 # storage; as such, we proceed on the target node to rename
11433 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11434 # using the assumption that logical_id == physical_id (which in
11435 # turn is the unique_id on that node)
11437 # FIXME(iustin): use a better name for the replaced LVs
11438 temp_suffix = int(time.time())
11439 ren_fn = lambda d, suff: (d.physical_id[0],
11440 d.physical_id[1] + "_replaced-%s" % suff)
11442 # Build the rename list based on what LVs exist on the node
11443 rename_old_to_new = []
11444 for to_ren in old_lvs:
11445 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11446 if not result.fail_msg and result.payload:
11448 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11450 self.lu.LogInfo("Renaming the old LVs on the target node")
11451 result = self.rpc.call_blockdev_rename(self.target_node,
11453 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11455 # Now we rename the new LVs to the old LVs
11456 self.lu.LogInfo("Renaming the new LVs on the target node")
11457 rename_new_to_old = [(new, old.physical_id)
11458 for old, new in zip(old_lvs, new_lvs)]
11459 result = self.rpc.call_blockdev_rename(self.target_node,
11461 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11463 # Intermediate steps of in memory modifications
11464 for old, new in zip(old_lvs, new_lvs):
11465 new.logical_id = old.logical_id
11466 self.cfg.SetDiskID(new, self.target_node)
11468 # We need to modify old_lvs so that removal later removes the
11469 # right LVs, not the newly added ones; note that old_lvs is a
11471 for disk in old_lvs:
11472 disk.logical_id = ren_fn(disk, temp_suffix)
11473 self.cfg.SetDiskID(disk, self.target_node)
11475 # Now that the new lvs have the old name, we can add them to the device
11476 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11477 result = self.rpc.call_blockdev_addchildren(self.target_node,
11478 (dev, self.instance), new_lvs)
11479 msg = result.fail_msg
11481 for new_lv in new_lvs:
11482 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11485 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11486 hint=("cleanup manually the unused logical"
11488 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11490 cstep = itertools.count(5)
11492 if self.early_release:
11493 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11494 self._RemoveOldStorage(self.target_node, iv_names)
11495 # TODO: Check if releasing locks early still makes sense
11496 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11498 # Release all resource locks except those used by the instance
11499 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11500 keep=self.node_secondary_ip.keys())
11502 # Release all node locks while waiting for sync
11503 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11505 # TODO: Can the instance lock be downgraded here? Take the optional disk
11506 # shutdown in the caller into consideration.
11509 # This can fail as the old devices are degraded and _WaitForSync
11510 # does a combined result over all disks, so we don't check its return value
11511 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11512 _WaitForSync(self.lu, self.instance)
11514 # Check all devices manually
11515 self._CheckDevices(self.instance.primary_node, iv_names)
11517 # Step: remove old storage
11518 if not self.early_release:
11519 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11520 self._RemoveOldStorage(self.target_node, iv_names)
11522 def _ExecDrbd8Secondary(self, feedback_fn):
11523 """Replace the secondary node for DRBD 8.
11525 The algorithm for replace is quite complicated:
11526 - for all disks of the instance:
11527 - create new LVs on the new node with same names
11528 - shutdown the drbd device on the old secondary
11529 - disconnect the drbd network on the primary
11530 - create the drbd device on the new secondary
11531 - network attach the drbd on the primary, using an artifice:
11532 the drbd code for Attach() will connect to the network if it
11533 finds a device which is connected to the good local disks but
11534 not network enabled
11535 - wait for sync across all devices
11536 - remove all disks from the old secondary
11538 Failures are not very well handled.
11543 pnode = self.instance.primary_node
11545 # Step: check device activation
11546 self.lu.LogStep(1, steps_total, "Check device existence")
11547 self._CheckDisksExistence([self.instance.primary_node])
11548 self._CheckVolumeGroup([self.instance.primary_node])
11550 # Step: check other node consistency
11551 self.lu.LogStep(2, steps_total, "Check peer consistency")
11552 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11554 # Step: create new storage
11555 self.lu.LogStep(3, steps_total, "Allocate new storage")
11556 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11557 for idx, dev in enumerate(disks):
11558 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11559 (self.new_node, idx))
11560 # we pass force_create=True to force LVM creation
11561 for new_lv in dev.children:
11562 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11563 True, _GetInstanceInfoText(self.instance), False)
11565 # Step 4: dbrd minors and drbd setups changes
11566 # after this, we must manually remove the drbd minors on both the
11567 # error and the success paths
11568 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11569 minors = self.cfg.AllocateDRBDMinor([self.new_node
11570 for dev in self.instance.disks],
11571 self.instance.name)
11572 logging.debug("Allocated minors %r", minors)
11575 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11576 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11577 (self.new_node, idx))
11578 # create new devices on new_node; note that we create two IDs:
11579 # one without port, so the drbd will be activated without
11580 # networking information on the new node at this stage, and one
11581 # with network, for the latter activation in step 4
11582 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11583 if self.instance.primary_node == o_node1:
11586 assert self.instance.primary_node == o_node2, "Three-node instance?"
11589 new_alone_id = (self.instance.primary_node, self.new_node, None,
11590 p_minor, new_minor, o_secret)
11591 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11592 p_minor, new_minor, o_secret)
11594 iv_names[idx] = (dev, dev.children, new_net_id)
11595 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11597 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11598 logical_id=new_alone_id,
11599 children=dev.children,
11602 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11605 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11607 _GetInstanceInfoText(self.instance), False)
11608 except errors.GenericError:
11609 self.cfg.ReleaseDRBDMinors(self.instance.name)
11612 # We have new devices, shutdown the drbd on the old secondary
11613 for idx, dev in enumerate(self.instance.disks):
11614 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11615 self.cfg.SetDiskID(dev, self.target_node)
11616 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11617 (dev, self.instance)).fail_msg
11619 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11620 "node: %s" % (idx, msg),
11621 hint=("Please cleanup this device manually as"
11622 " soon as possible"))
11624 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11625 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11626 self.instance.disks)[pnode]
11628 msg = result.fail_msg
11630 # detaches didn't succeed (unlikely)
11631 self.cfg.ReleaseDRBDMinors(self.instance.name)
11632 raise errors.OpExecError("Can't detach the disks from the network on"
11633 " old node: %s" % (msg,))
11635 # if we managed to detach at least one, we update all the disks of
11636 # the instance to point to the new secondary
11637 self.lu.LogInfo("Updating instance configuration")
11638 for dev, _, new_logical_id in iv_names.itervalues():
11639 dev.logical_id = new_logical_id
11640 self.cfg.SetDiskID(dev, self.instance.primary_node)
11642 self.cfg.Update(self.instance, feedback_fn)
11644 # Release all node locks (the configuration has been updated)
11645 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11647 # and now perform the drbd attach
11648 self.lu.LogInfo("Attaching primary drbds to new secondary"
11649 " (standalone => connected)")
11650 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11652 self.node_secondary_ip,
11653 (self.instance.disks, self.instance),
11654 self.instance.name,
11656 for to_node, to_result in result.items():
11657 msg = to_result.fail_msg
11659 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11661 hint=("please do a gnt-instance info to see the"
11662 " status of disks"))
11664 cstep = itertools.count(5)
11666 if self.early_release:
11667 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11668 self._RemoveOldStorage(self.target_node, iv_names)
11669 # TODO: Check if releasing locks early still makes sense
11670 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11672 # Release all resource locks except those used by the instance
11673 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11674 keep=self.node_secondary_ip.keys())
11676 # TODO: Can the instance lock be downgraded here? Take the optional disk
11677 # shutdown in the caller into consideration.
11680 # This can fail as the old devices are degraded and _WaitForSync
11681 # does a combined result over all disks, so we don't check its return value
11682 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11683 _WaitForSync(self.lu, self.instance)
11685 # Check all devices manually
11686 self._CheckDevices(self.instance.primary_node, iv_names)
11688 # Step: remove old storage
11689 if not self.early_release:
11690 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11691 self._RemoveOldStorage(self.target_node, iv_names)
11694 class LURepairNodeStorage(NoHooksLU):
11695 """Repairs the volume group on a node.
11700 def CheckArguments(self):
11701 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11703 storage_type = self.op.storage_type
11705 if (constants.SO_FIX_CONSISTENCY not in
11706 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11707 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11708 " repaired" % storage_type,
11709 errors.ECODE_INVAL)
11711 def ExpandNames(self):
11712 self.needed_locks = {
11713 locking.LEVEL_NODE: [self.op.node_name],
11716 def _CheckFaultyDisks(self, instance, node_name):
11717 """Ensure faulty disks abort the opcode or at least warn."""
11719 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11721 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11722 " node '%s'" % (instance.name, node_name),
11723 errors.ECODE_STATE)
11724 except errors.OpPrereqError, err:
11725 if self.op.ignore_consistency:
11726 self.proc.LogWarning(str(err.args[0]))
11730 def CheckPrereq(self):
11731 """Check prerequisites.
11734 # Check whether any instance on this node has faulty disks
11735 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11736 if inst.admin_state != constants.ADMINST_UP:
11738 check_nodes = set(inst.all_nodes)
11739 check_nodes.discard(self.op.node_name)
11740 for inst_node_name in check_nodes:
11741 self._CheckFaultyDisks(inst, inst_node_name)
11743 def Exec(self, feedback_fn):
11744 feedback_fn("Repairing storage unit '%s' on %s ..." %
11745 (self.op.name, self.op.node_name))
11747 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11748 result = self.rpc.call_storage_execute(self.op.node_name,
11749 self.op.storage_type, st_args,
11751 constants.SO_FIX_CONSISTENCY)
11752 result.Raise("Failed to repair storage unit '%s' on %s" %
11753 (self.op.name, self.op.node_name))
11756 class LUNodeEvacuate(NoHooksLU):
11757 """Evacuates instances off a list of nodes.
11762 _MODE2IALLOCATOR = {
11763 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11764 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11765 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11767 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11768 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11769 constants.IALLOCATOR_NEVAC_MODES)
11771 def CheckArguments(self):
11772 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11774 def ExpandNames(self):
11775 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11777 if self.op.remote_node is not None:
11778 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11779 assert self.op.remote_node
11781 if self.op.remote_node == self.op.node_name:
11782 raise errors.OpPrereqError("Can not use evacuated node as a new"
11783 " secondary node", errors.ECODE_INVAL)
11785 if self.op.mode != constants.NODE_EVAC_SEC:
11786 raise errors.OpPrereqError("Without the use of an iallocator only"
11787 " secondary instances can be evacuated",
11788 errors.ECODE_INVAL)
11791 self.share_locks = _ShareAll()
11792 self.needed_locks = {
11793 locking.LEVEL_INSTANCE: [],
11794 locking.LEVEL_NODEGROUP: [],
11795 locking.LEVEL_NODE: [],
11798 # Determine nodes (via group) optimistically, needs verification once locks
11799 # have been acquired
11800 self.lock_nodes = self._DetermineNodes()
11802 def _DetermineNodes(self):
11803 """Gets the list of nodes to operate on.
11806 if self.op.remote_node is None:
11807 # Iallocator will choose any node(s) in the same group
11808 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11810 group_nodes = frozenset([self.op.remote_node])
11812 # Determine nodes to be locked
11813 return set([self.op.node_name]) | group_nodes
11815 def _DetermineInstances(self):
11816 """Builds list of instances to operate on.
11819 assert self.op.mode in constants.NODE_EVAC_MODES
11821 if self.op.mode == constants.NODE_EVAC_PRI:
11822 # Primary instances only
11823 inst_fn = _GetNodePrimaryInstances
11824 assert self.op.remote_node is None, \
11825 "Evacuating primary instances requires iallocator"
11826 elif self.op.mode == constants.NODE_EVAC_SEC:
11827 # Secondary instances only
11828 inst_fn = _GetNodeSecondaryInstances
11831 assert self.op.mode == constants.NODE_EVAC_ALL
11832 inst_fn = _GetNodeInstances
11833 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11835 raise errors.OpPrereqError("Due to an issue with the iallocator"
11836 " interface it is not possible to evacuate"
11837 " all instances at once; specify explicitly"
11838 " whether to evacuate primary or secondary"
11840 errors.ECODE_INVAL)
11842 return inst_fn(self.cfg, self.op.node_name)
11844 def DeclareLocks(self, level):
11845 if level == locking.LEVEL_INSTANCE:
11846 # Lock instances optimistically, needs verification once node and group
11847 # locks have been acquired
11848 self.needed_locks[locking.LEVEL_INSTANCE] = \
11849 set(i.name for i in self._DetermineInstances())
11851 elif level == locking.LEVEL_NODEGROUP:
11852 # Lock node groups for all potential target nodes optimistically, needs
11853 # verification once nodes have been acquired
11854 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11855 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11857 elif level == locking.LEVEL_NODE:
11858 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11860 def CheckPrereq(self):
11862 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11863 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11864 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11866 need_nodes = self._DetermineNodes()
11868 if not owned_nodes.issuperset(need_nodes):
11869 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11870 " locks were acquired, current nodes are"
11871 " are '%s', used to be '%s'; retry the"
11873 (self.op.node_name,
11874 utils.CommaJoin(need_nodes),
11875 utils.CommaJoin(owned_nodes)),
11876 errors.ECODE_STATE)
11878 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11879 if owned_groups != wanted_groups:
11880 raise errors.OpExecError("Node groups changed since locks were acquired,"
11881 " current groups are '%s', used to be '%s';"
11882 " retry the operation" %
11883 (utils.CommaJoin(wanted_groups),
11884 utils.CommaJoin(owned_groups)))
11886 # Determine affected instances
11887 self.instances = self._DetermineInstances()
11888 self.instance_names = [i.name for i in self.instances]
11890 if set(self.instance_names) != owned_instances:
11891 raise errors.OpExecError("Instances on node '%s' changed since locks"
11892 " were acquired, current instances are '%s',"
11893 " used to be '%s'; retry the operation" %
11894 (self.op.node_name,
11895 utils.CommaJoin(self.instance_names),
11896 utils.CommaJoin(owned_instances)))
11898 if self.instance_names:
11899 self.LogInfo("Evacuating instances from node '%s': %s",
11901 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11903 self.LogInfo("No instances to evacuate from node '%s'",
11906 if self.op.remote_node is not None:
11907 for i in self.instances:
11908 if i.primary_node == self.op.remote_node:
11909 raise errors.OpPrereqError("Node %s is the primary node of"
11910 " instance %s, cannot use it as"
11912 (self.op.remote_node, i.name),
11913 errors.ECODE_INVAL)
11915 def Exec(self, feedback_fn):
11916 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11918 if not self.instance_names:
11919 # No instances to evacuate
11922 elif self.op.iallocator is not None:
11923 # TODO: Implement relocation to other group
11924 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11925 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11926 instances=list(self.instance_names))
11927 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11929 ial.Run(self.op.iallocator)
11931 if not ial.success:
11932 raise errors.OpPrereqError("Can't compute node evacuation using"
11933 " iallocator '%s': %s" %
11934 (self.op.iallocator, ial.info),
11935 errors.ECODE_NORES)
11937 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11939 elif self.op.remote_node is not None:
11940 assert self.op.mode == constants.NODE_EVAC_SEC
11942 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11943 remote_node=self.op.remote_node,
11945 mode=constants.REPLACE_DISK_CHG,
11946 early_release=self.op.early_release)]
11947 for instance_name in self.instance_names
11951 raise errors.ProgrammerError("No iallocator or remote node")
11953 return ResultWithJobs(jobs)
11956 def _SetOpEarlyRelease(early_release, op):
11957 """Sets C{early_release} flag on opcodes if available.
11961 op.early_release = early_release
11962 except AttributeError:
11963 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11968 def _NodeEvacDest(use_nodes, group, nodes):
11969 """Returns group or nodes depending on caller's choice.
11973 return utils.CommaJoin(nodes)
11978 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11979 """Unpacks the result of change-group and node-evacuate iallocator requests.
11981 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11982 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11984 @type lu: L{LogicalUnit}
11985 @param lu: Logical unit instance
11986 @type alloc_result: tuple/list
11987 @param alloc_result: Result from iallocator
11988 @type early_release: bool
11989 @param early_release: Whether to release locks early if possible
11990 @type use_nodes: bool
11991 @param use_nodes: Whether to display node names instead of groups
11994 (moved, failed, jobs) = alloc_result
11997 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11998 for (name, reason) in failed)
11999 lu.LogWarning("Unable to evacuate instances %s", failreason)
12000 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12003 lu.LogInfo("Instances to be moved: %s",
12004 utils.CommaJoin("%s (to %s)" %
12005 (name, _NodeEvacDest(use_nodes, group, nodes))
12006 for (name, group, nodes) in moved))
12008 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12009 map(opcodes.OpCode.LoadOpCode, ops))
12013 def _DiskSizeInBytesToMebibytes(lu, size):
12014 """Converts a disk size in bytes to mebibytes.
12016 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12019 (mib, remainder) = divmod(size, 1024 * 1024)
12022 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12023 " to not overwrite existing data (%s bytes will not be"
12024 " wiped)", (1024 * 1024) - remainder)
12030 class LUInstanceGrowDisk(LogicalUnit):
12031 """Grow a disk of an instance.
12034 HPATH = "disk-grow"
12035 HTYPE = constants.HTYPE_INSTANCE
12038 def ExpandNames(self):
12039 self._ExpandAndLockInstance()
12040 self.needed_locks[locking.LEVEL_NODE] = []
12041 self.needed_locks[locking.LEVEL_NODE_RES] = []
12042 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12043 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12045 def DeclareLocks(self, level):
12046 if level == locking.LEVEL_NODE:
12047 self._LockInstancesNodes()
12048 elif level == locking.LEVEL_NODE_RES:
12050 self.needed_locks[locking.LEVEL_NODE_RES] = \
12051 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12053 def BuildHooksEnv(self):
12054 """Build hooks env.
12056 This runs on the master, the primary and all the secondaries.
12060 "DISK": self.op.disk,
12061 "AMOUNT": self.op.amount,
12062 "ABSOLUTE": self.op.absolute,
12064 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12067 def BuildHooksNodes(self):
12068 """Build hooks nodes.
12071 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12074 def CheckPrereq(self):
12075 """Check prerequisites.
12077 This checks that the instance is in the cluster.
12080 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12081 assert instance is not None, \
12082 "Cannot retrieve locked instance %s" % self.op.instance_name
12083 nodenames = list(instance.all_nodes)
12084 for node in nodenames:
12085 _CheckNodeOnline(self, node)
12087 self.instance = instance
12089 if instance.disk_template not in constants.DTS_GROWABLE:
12090 raise errors.OpPrereqError("Instance's disk layout does not support"
12091 " growing", errors.ECODE_INVAL)
12093 self.disk = instance.FindDisk(self.op.disk)
12095 if self.op.absolute:
12096 self.target = self.op.amount
12097 self.delta = self.target - self.disk.size
12099 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12100 "current disk size (%s)" %
12101 (utils.FormatUnit(self.target, "h"),
12102 utils.FormatUnit(self.disk.size, "h")),
12103 errors.ECODE_STATE)
12105 self.delta = self.op.amount
12106 self.target = self.disk.size + self.delta
12108 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12109 utils.FormatUnit(self.delta, "h"),
12110 errors.ECODE_INVAL)
12112 if instance.disk_template not in (constants.DT_FILE,
12113 constants.DT_SHARED_FILE,
12115 # TODO: check the free disk space for file, when that feature will be
12117 _CheckNodesFreeDiskPerVG(self, nodenames,
12118 self.disk.ComputeGrowth(self.delta))
12120 def Exec(self, feedback_fn):
12121 """Execute disk grow.
12124 instance = self.instance
12127 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12128 assert (self.owned_locks(locking.LEVEL_NODE) ==
12129 self.owned_locks(locking.LEVEL_NODE_RES))
12131 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12133 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12135 raise errors.OpExecError("Cannot activate block device to grow")
12137 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12138 (self.op.disk, instance.name,
12139 utils.FormatUnit(self.delta, "h"),
12140 utils.FormatUnit(self.target, "h")))
12142 # First run all grow ops in dry-run mode
12143 for node in instance.all_nodes:
12144 self.cfg.SetDiskID(disk, node)
12145 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12147 result.Raise("Dry-run grow request failed to node %s" % node)
12150 # Get disk size from primary node for wiping
12151 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12152 result.Raise("Failed to retrieve disk size from node '%s'" %
12153 instance.primary_node)
12155 (disk_size_in_bytes, ) = result.payload
12157 if disk_size_in_bytes is None:
12158 raise errors.OpExecError("Failed to retrieve disk size from primary"
12159 " node '%s'" % instance.primary_node)
12161 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12163 assert old_disk_size >= disk.size, \
12164 ("Retrieved disk size too small (got %s, should be at least %s)" %
12165 (old_disk_size, disk.size))
12167 old_disk_size = None
12169 # We know that (as far as we can test) operations across different
12170 # nodes will succeed, time to run it for real on the backing storage
12171 for node in instance.all_nodes:
12172 self.cfg.SetDiskID(disk, node)
12173 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12175 result.Raise("Grow request failed to node %s" % node)
12177 # And now execute it for logical storage, on the primary node
12178 node = instance.primary_node
12179 self.cfg.SetDiskID(disk, node)
12180 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12182 result.Raise("Grow request failed to node %s" % node)
12184 disk.RecordGrow(self.delta)
12185 self.cfg.Update(instance, feedback_fn)
12187 # Changes have been recorded, release node lock
12188 _ReleaseLocks(self, locking.LEVEL_NODE)
12190 # Downgrade lock while waiting for sync
12191 self.glm.downgrade(locking.LEVEL_INSTANCE)
12193 assert wipe_disks ^ (old_disk_size is None)
12196 assert instance.disks[self.op.disk] == disk
12198 # Wipe newly added disk space
12199 _WipeDisks(self, instance,
12200 disks=[(self.op.disk, disk, old_disk_size)])
12202 if self.op.wait_for_sync:
12203 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12205 self.proc.LogWarning("Disk sync-ing has not returned a good"
12206 " status; please check the instance")
12207 if instance.admin_state != constants.ADMINST_UP:
12208 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12209 elif instance.admin_state != constants.ADMINST_UP:
12210 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12211 " not supposed to be running because no wait for"
12212 " sync mode was requested")
12214 assert self.owned_locks(locking.LEVEL_NODE_RES)
12215 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12218 class LUInstanceQueryData(NoHooksLU):
12219 """Query runtime instance data.
12224 def ExpandNames(self):
12225 self.needed_locks = {}
12227 # Use locking if requested or when non-static information is wanted
12228 if not (self.op.static or self.op.use_locking):
12229 self.LogWarning("Non-static data requested, locks need to be acquired")
12230 self.op.use_locking = True
12232 if self.op.instances or not self.op.use_locking:
12233 # Expand instance names right here
12234 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12236 # Will use acquired locks
12237 self.wanted_names = None
12239 if self.op.use_locking:
12240 self.share_locks = _ShareAll()
12242 if self.wanted_names is None:
12243 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12245 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12247 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12248 self.needed_locks[locking.LEVEL_NODE] = []
12249 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12251 def DeclareLocks(self, level):
12252 if self.op.use_locking:
12253 if level == locking.LEVEL_NODEGROUP:
12254 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12256 # Lock all groups used by instances optimistically; this requires going
12257 # via the node before it's locked, requiring verification later on
12258 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12259 frozenset(group_uuid
12260 for instance_name in owned_instances
12262 self.cfg.GetInstanceNodeGroups(instance_name))
12264 elif level == locking.LEVEL_NODE:
12265 self._LockInstancesNodes()
12267 def CheckPrereq(self):
12268 """Check prerequisites.
12270 This only checks the optional instance list against the existing names.
12273 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12274 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12275 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12277 if self.wanted_names is None:
12278 assert self.op.use_locking, "Locking was not used"
12279 self.wanted_names = owned_instances
12281 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12283 if self.op.use_locking:
12284 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12287 assert not (owned_instances or owned_groups or owned_nodes)
12289 self.wanted_instances = instances.values()
12291 def _ComputeBlockdevStatus(self, node, instance, dev):
12292 """Returns the status of a block device
12295 if self.op.static or not node:
12298 self.cfg.SetDiskID(dev, node)
12300 result = self.rpc.call_blockdev_find(node, dev)
12304 result.Raise("Can't compute disk status for %s" % instance.name)
12306 status = result.payload
12310 return (status.dev_path, status.major, status.minor,
12311 status.sync_percent, status.estimated_time,
12312 status.is_degraded, status.ldisk_status)
12314 def _ComputeDiskStatus(self, instance, snode, dev):
12315 """Compute block device status.
12318 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12320 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12322 def _ComputeDiskStatusInner(self, instance, snode, dev):
12323 """Compute block device status.
12325 @attention: The device has to be annotated already.
12328 if dev.dev_type in constants.LDS_DRBD:
12329 # we change the snode then (otherwise we use the one passed in)
12330 if dev.logical_id[0] == instance.primary_node:
12331 snode = dev.logical_id[1]
12333 snode = dev.logical_id[0]
12335 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12337 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12340 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12347 "iv_name": dev.iv_name,
12348 "dev_type": dev.dev_type,
12349 "logical_id": dev.logical_id,
12350 "physical_id": dev.physical_id,
12351 "pstatus": dev_pstatus,
12352 "sstatus": dev_sstatus,
12353 "children": dev_children,
12358 def Exec(self, feedback_fn):
12359 """Gather and return data"""
12362 cluster = self.cfg.GetClusterInfo()
12364 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12365 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12367 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12368 for node in nodes.values()))
12370 group2name_fn = lambda uuid: groups[uuid].name
12372 for instance in self.wanted_instances:
12373 pnode = nodes[instance.primary_node]
12375 if self.op.static or pnode.offline:
12376 remote_state = None
12378 self.LogWarning("Primary node %s is marked offline, returning static"
12379 " information only for instance %s" %
12380 (pnode.name, instance.name))
12382 remote_info = self.rpc.call_instance_info(instance.primary_node,
12384 instance.hypervisor)
12385 remote_info.Raise("Error checking node %s" % instance.primary_node)
12386 remote_info = remote_info.payload
12387 if remote_info and "state" in remote_info:
12388 remote_state = "up"
12390 if instance.admin_state == constants.ADMINST_UP:
12391 remote_state = "down"
12393 remote_state = instance.admin_state
12395 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12398 snodes_group_uuids = [nodes[snode_name].group
12399 for snode_name in instance.secondary_nodes]
12401 result[instance.name] = {
12402 "name": instance.name,
12403 "config_state": instance.admin_state,
12404 "run_state": remote_state,
12405 "pnode": instance.primary_node,
12406 "pnode_group_uuid": pnode.group,
12407 "pnode_group_name": group2name_fn(pnode.group),
12408 "snodes": instance.secondary_nodes,
12409 "snodes_group_uuids": snodes_group_uuids,
12410 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12412 # this happens to be the same format used for hooks
12413 "nics": _NICListToTuple(self, instance.nics),
12414 "disk_template": instance.disk_template,
12416 "hypervisor": instance.hypervisor,
12417 "network_port": instance.network_port,
12418 "hv_instance": instance.hvparams,
12419 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12420 "be_instance": instance.beparams,
12421 "be_actual": cluster.FillBE(instance),
12422 "os_instance": instance.osparams,
12423 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12424 "serial_no": instance.serial_no,
12425 "mtime": instance.mtime,
12426 "ctime": instance.ctime,
12427 "uuid": instance.uuid,
12433 def PrepareContainerMods(mods, private_fn):
12434 """Prepares a list of container modifications by adding a private data field.
12436 @type mods: list of tuples; (operation, index, parameters)
12437 @param mods: List of modifications
12438 @type private_fn: callable or None
12439 @param private_fn: Callable for constructing a private data field for a
12444 if private_fn is None:
12449 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12452 #: Type description for changes as returned by L{ApplyContainerMods}'s
12454 _TApplyContModsCbChanges = \
12455 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12456 ht.TNonEmptyString,
12461 def ApplyContainerMods(kind, container, chgdesc, mods,
12462 create_fn, modify_fn, remove_fn):
12463 """Applies descriptions in C{mods} to C{container}.
12466 @param kind: One-word item description
12467 @type container: list
12468 @param container: Container to modify
12469 @type chgdesc: None or list
12470 @param chgdesc: List of applied changes
12472 @param mods: Modifications as returned by L{PrepareContainerMods}
12473 @type create_fn: callable
12474 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12475 receives absolute item index, parameters and private data object as added
12476 by L{PrepareContainerMods}, returns tuple containing new item and changes
12478 @type modify_fn: callable
12479 @param modify_fn: Callback for modifying an existing item
12480 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12481 and private data object as added by L{PrepareContainerMods}, returns
12483 @type remove_fn: callable
12484 @param remove_fn: Callback on removing item; receives absolute item index,
12485 item and private data object as added by L{PrepareContainerMods}
12488 for (op, idx, params, private) in mods:
12491 absidx = len(container) - 1
12493 raise IndexError("Not accepting negative indices other than -1")
12494 elif idx > len(container):
12495 raise IndexError("Got %s index %s, but there are only %s" %
12496 (kind, idx, len(container)))
12502 if op == constants.DDM_ADD:
12503 # Calculate where item will be added
12505 addidx = len(container)
12509 if create_fn is None:
12512 (item, changes) = create_fn(addidx, params, private)
12515 container.append(item)
12518 assert idx <= len(container)
12519 # list.insert does so before the specified index
12520 container.insert(idx, item)
12522 # Retrieve existing item
12524 item = container[absidx]
12526 raise IndexError("Invalid %s index %s" % (kind, idx))
12528 if op == constants.DDM_REMOVE:
12531 if remove_fn is not None:
12532 remove_fn(absidx, item, private)
12534 changes = [("%s/%s" % (kind, absidx), "remove")]
12536 assert container[absidx] == item
12537 del container[absidx]
12538 elif op == constants.DDM_MODIFY:
12539 if modify_fn is not None:
12540 changes = modify_fn(absidx, item, params, private)
12542 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12544 assert _TApplyContModsCbChanges(changes)
12546 if not (chgdesc is None or changes is None):
12547 chgdesc.extend(changes)
12550 def _UpdateIvNames(base_index, disks):
12551 """Updates the C{iv_name} attribute of disks.
12553 @type disks: list of L{objects.Disk}
12556 for (idx, disk) in enumerate(disks):
12557 disk.iv_name = "disk/%s" % (base_index + idx, )
12560 class _InstNicModPrivate:
12561 """Data structure for network interface modifications.
12563 Used by L{LUInstanceSetParams}.
12566 def __init__(self):
12571 class LUInstanceSetParams(LogicalUnit):
12572 """Modifies an instances's parameters.
12575 HPATH = "instance-modify"
12576 HTYPE = constants.HTYPE_INSTANCE
12580 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12581 assert ht.TList(mods)
12582 assert not mods or len(mods[0]) in (2, 3)
12584 if mods and len(mods[0]) == 2:
12588 for op, params in mods:
12589 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12590 result.append((op, -1, params))
12594 raise errors.OpPrereqError("Only one %s add or remove operation is"
12595 " supported at a time" % kind,
12596 errors.ECODE_INVAL)
12598 result.append((constants.DDM_MODIFY, op, params))
12600 assert verify_fn(result)
12607 def _CheckMods(kind, mods, key_types, item_fn):
12608 """Ensures requested disk/NIC modifications are valid.
12611 for (op, _, params) in mods:
12612 assert ht.TDict(params)
12614 utils.ForceDictType(params, key_types)
12616 if op == constants.DDM_REMOVE:
12618 raise errors.OpPrereqError("No settings should be passed when"
12619 " removing a %s" % kind,
12620 errors.ECODE_INVAL)
12621 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12622 item_fn(op, params)
12624 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12627 def _VerifyDiskModification(op, params):
12628 """Verifies a disk modification.
12631 if op == constants.DDM_ADD:
12632 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12633 if mode not in constants.DISK_ACCESS_SET:
12634 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12635 errors.ECODE_INVAL)
12637 size = params.get(constants.IDISK_SIZE, None)
12639 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12640 constants.IDISK_SIZE, errors.ECODE_INVAL)
12644 except (TypeError, ValueError), err:
12645 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12646 errors.ECODE_INVAL)
12648 params[constants.IDISK_SIZE] = size
12650 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12651 raise errors.OpPrereqError("Disk size change not possible, use"
12652 " grow-disk", errors.ECODE_INVAL)
12655 def _VerifyNicModification(op, params):
12656 """Verifies a network interface modification.
12659 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12660 ip = params.get(constants.INIC_IP, None)
12661 req_net = params.get(constants.INIC_NETWORK, None)
12662 link = params.get(constants.NIC_LINK, None)
12663 mode = params.get(constants.NIC_MODE, None)
12664 if req_net is not None:
12665 if req_net.lower() == constants.VALUE_NONE:
12666 params[constants.INIC_NETWORK] = None
12668 elif link is not None or mode is not None:
12669 raise errors.OpPrereqError("If network is given"
12670 " mode or link should not",
12671 errors.ECODE_INVAL)
12673 if op == constants.DDM_ADD:
12674 macaddr = params.get(constants.INIC_MAC, None)
12675 if macaddr is None:
12676 params[constants.INIC_MAC] = constants.VALUE_AUTO
12679 if ip.lower() == constants.VALUE_NONE:
12680 params[constants.INIC_IP] = None
12682 if ip.lower() == constants.NIC_IP_POOL:
12683 if op == constants.DDM_ADD and req_net is None:
12684 raise errors.OpPrereqError("If ip=pool, parameter network"
12686 errors.ECODE_INVAL)
12688 if not netutils.IPAddress.IsValid(ip):
12689 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12690 errors.ECODE_INVAL)
12692 if constants.INIC_MAC in params:
12693 macaddr = params[constants.INIC_MAC]
12694 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12695 macaddr = utils.NormalizeAndValidateMac(macaddr)
12697 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12698 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12699 " modifying an existing NIC",
12700 errors.ECODE_INVAL)
12702 def CheckArguments(self):
12703 if not (self.op.nics or self.op.disks or self.op.disk_template or
12704 self.op.hvparams or self.op.beparams or self.op.os_name or
12705 self.op.offline is not None or self.op.runtime_mem):
12706 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12708 if self.op.hvparams:
12709 _CheckGlobalHvParams(self.op.hvparams)
12711 self.op.disks = self._UpgradeDiskNicMods(
12712 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12713 self.op.nics = self._UpgradeDiskNicMods(
12714 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12716 # Check disk modifications
12717 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12718 self._VerifyDiskModification)
12720 if self.op.disks and self.op.disk_template is not None:
12721 raise errors.OpPrereqError("Disk template conversion and other disk"
12722 " changes not supported at the same time",
12723 errors.ECODE_INVAL)
12725 if (self.op.disk_template and
12726 self.op.disk_template in constants.DTS_INT_MIRROR and
12727 self.op.remote_node is None):
12728 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12729 " one requires specifying a secondary node",
12730 errors.ECODE_INVAL)
12732 # Check NIC modifications
12733 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12734 self._VerifyNicModification)
12736 def ExpandNames(self):
12737 self._ExpandAndLockInstance()
12738 # Can't even acquire node locks in shared mode as upcoming changes in
12739 # Ganeti 2.6 will start to modify the node object on disk conversion
12740 self.needed_locks[locking.LEVEL_NODE] = []
12741 self.needed_locks[locking.LEVEL_NODE_RES] = []
12742 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12744 def DeclareLocks(self, level):
12745 # TODO: Acquire group lock in shared mode (disk parameters)
12746 if level == locking.LEVEL_NODE:
12747 self._LockInstancesNodes()
12748 if self.op.disk_template and self.op.remote_node:
12749 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12750 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12751 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12753 self.needed_locks[locking.LEVEL_NODE_RES] = \
12754 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12756 def BuildHooksEnv(self):
12757 """Build hooks env.
12759 This runs on the master, primary and secondaries.
12763 if constants.BE_MINMEM in self.be_new:
12764 args["minmem"] = self.be_new[constants.BE_MINMEM]
12765 if constants.BE_MAXMEM in self.be_new:
12766 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12767 if constants.BE_VCPUS in self.be_new:
12768 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12769 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12770 # information at all.
12772 if self._new_nics is not None:
12775 for nic in self._new_nics:
12776 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12777 mode = nicparams[constants.NIC_MODE]
12778 link = nicparams[constants.NIC_LINK]
12779 nics.append((nic.ip, nic.mac, mode, link, nic.network))
12781 args["nics"] = nics
12783 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12784 if self.op.disk_template:
12785 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12786 if self.op.runtime_mem:
12787 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12791 def BuildHooksNodes(self):
12792 """Build hooks nodes.
12795 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12798 def _PrepareNicModification(self, params, private, old_ip, old_net,
12799 old_params, cluster, pnode):
12801 update_params_dict = dict([(key, params[key])
12802 for key in constants.NICS_PARAMETERS
12805 req_link = update_params_dict.get(constants.NIC_LINK, None)
12806 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12808 new_net = params.get(constants.INIC_NETWORK, old_net)
12809 if new_net is not None:
12810 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12811 if netparams is None:
12812 raise errors.OpPrereqError("No netparams found for the network"
12813 " %s, propably not connected." % new_net,
12814 errors.ECODE_INVAL)
12815 new_params = dict(netparams)
12817 new_params = _GetUpdatedParams(old_params, update_params_dict)
12819 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12821 new_filled_params = cluster.SimpleFillNIC(new_params)
12822 objects.NIC.CheckParameterSyntax(new_filled_params)
12824 new_mode = new_filled_params[constants.NIC_MODE]
12825 if new_mode == constants.NIC_MODE_BRIDGED:
12826 bridge = new_filled_params[constants.NIC_LINK]
12827 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12829 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12831 self.warn.append(msg)
12833 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12835 elif new_mode == constants.NIC_MODE_ROUTED:
12836 ip = params.get(constants.INIC_IP, old_ip)
12838 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12839 " on a routed NIC", errors.ECODE_INVAL)
12841 if constants.INIC_MAC in params:
12842 mac = params[constants.INIC_MAC]
12844 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12845 errors.ECODE_INVAL)
12846 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12847 # otherwise generate the MAC address
12848 params[constants.INIC_MAC] = \
12849 self.cfg.GenerateMAC(self.proc.GetECId())
12851 # or validate/reserve the current one
12853 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12854 except errors.ReservationError:
12855 raise errors.OpPrereqError("MAC address '%s' already in use"
12856 " in cluster" % mac,
12857 errors.ECODE_NOTUNIQUE)
12858 elif new_net != old_net:
12859 def get_net_prefix(net):
12861 uuid = self.cfg.LookupNetwork(net)
12863 nobj = self.cfg.GetNetwork(uuid)
12864 return nobj.mac_prefix
12866 new_prefix = get_net_prefix(new_net)
12867 old_prefix = get_net_prefix(old_net)
12868 if old_prefix != new_prefix:
12869 params[constants.INIC_MAC] = \
12870 self.cfg.GenerateMAC(self.proc.GetECId())
12872 #if there is a change in nic-network configuration
12873 new_ip = params.get(constants.INIC_IP, old_ip)
12874 if (new_ip, new_net) != (old_ip, old_net):
12877 if new_ip.lower() == constants.NIC_IP_POOL:
12879 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12880 except errors.ReservationError:
12881 raise errors.OpPrereqError("Unable to get a free IP"
12882 " from the address pool",
12883 errors.ECODE_STATE)
12884 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12885 params[constants.INIC_IP] = new_ip
12886 elif new_ip != old_ip or new_net != old_net:
12888 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12889 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12890 except errors.ReservationError:
12891 raise errors.OpPrereqError("IP %s not available in network %s" %
12893 errors.ECODE_NOTUNIQUE)
12894 elif new_ip.lower() == constants.NIC_IP_POOL:
12895 raise errors.OpPrereqError("ip=pool, but no network found",
12899 if self.op.conflicts_check:
12900 _CheckForConflictingIp(self, new_ip, pnode)
12905 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
12906 except errors.AddressPoolError:
12907 logging.warning("Release IP %s not contained in network %s",
12910 # there are no changes in (net, ip) tuple
12911 elif (old_net is not None and
12912 (req_link is not None or req_mode is not None)):
12913 raise errors.OpPrereqError("Not allowed to change link or mode of"
12914 " a NIC that is connected to a network.",
12915 errors.ECODE_INVAL)
12917 private.params = new_params
12918 private.filled = new_filled_params
12920 def CheckPrereq(self):
12921 """Check prerequisites.
12923 This only checks the instance list against the existing names.
12926 # checking the new params on the primary/secondary nodes
12928 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12929 cluster = self.cluster = self.cfg.GetClusterInfo()
12930 assert self.instance is not None, \
12931 "Cannot retrieve locked instance %s" % self.op.instance_name
12932 pnode = instance.primary_node
12933 nodelist = list(instance.all_nodes)
12934 pnode_info = self.cfg.GetNodeInfo(pnode)
12935 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12937 # Prepare disk/NIC modifications
12938 self.diskmod = PrepareContainerMods(self.op.disks, None)
12939 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12942 if self.op.os_name and not self.op.force:
12943 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12944 self.op.force_variant)
12945 instance_os = self.op.os_name
12947 instance_os = instance.os
12949 assert not (self.op.disk_template and self.op.disks), \
12950 "Can't modify disk template and apply disk changes at the same time"
12952 if self.op.disk_template:
12953 if instance.disk_template == self.op.disk_template:
12954 raise errors.OpPrereqError("Instance already has disk template %s" %
12955 instance.disk_template, errors.ECODE_INVAL)
12957 if (instance.disk_template,
12958 self.op.disk_template) not in self._DISK_CONVERSIONS:
12959 raise errors.OpPrereqError("Unsupported disk template conversion from"
12960 " %s to %s" % (instance.disk_template,
12961 self.op.disk_template),
12962 errors.ECODE_INVAL)
12963 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12964 msg="cannot change disk template")
12965 if self.op.disk_template in constants.DTS_INT_MIRROR:
12966 if self.op.remote_node == pnode:
12967 raise errors.OpPrereqError("Given new secondary node %s is the same"
12968 " as the primary node of the instance" %
12969 self.op.remote_node, errors.ECODE_STATE)
12970 _CheckNodeOnline(self, self.op.remote_node)
12971 _CheckNodeNotDrained(self, self.op.remote_node)
12972 # FIXME: here we assume that the old instance type is DT_PLAIN
12973 assert instance.disk_template == constants.DT_PLAIN
12974 disks = [{constants.IDISK_SIZE: d.size,
12975 constants.IDISK_VG: d.logical_id[0]}
12976 for d in instance.disks]
12977 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12978 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12980 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12981 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12982 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12984 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12985 ignore=self.op.ignore_ipolicy)
12986 if pnode_info.group != snode_info.group:
12987 self.LogWarning("The primary and secondary nodes are in two"
12988 " different node groups; the disk parameters"
12989 " from the first disk's node group will be"
12992 # hvparams processing
12993 if self.op.hvparams:
12994 hv_type = instance.hypervisor
12995 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12996 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12997 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13000 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13001 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13002 self.hv_proposed = self.hv_new = hv_new # the new actual values
13003 self.hv_inst = i_hvdict # the new dict (without defaults)
13005 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13007 self.hv_new = self.hv_inst = {}
13009 # beparams processing
13010 if self.op.beparams:
13011 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13013 objects.UpgradeBeParams(i_bedict)
13014 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13015 be_new = cluster.SimpleFillBE(i_bedict)
13016 self.be_proposed = self.be_new = be_new # the new actual values
13017 self.be_inst = i_bedict # the new dict (without defaults)
13019 self.be_new = self.be_inst = {}
13020 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13021 be_old = cluster.FillBE(instance)
13023 # CPU param validation -- checking every time a parameter is
13024 # changed to cover all cases where either CPU mask or vcpus have
13026 if (constants.BE_VCPUS in self.be_proposed and
13027 constants.HV_CPU_MASK in self.hv_proposed):
13029 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13030 # Verify mask is consistent with number of vCPUs. Can skip this
13031 # test if only 1 entry in the CPU mask, which means same mask
13032 # is applied to all vCPUs.
13033 if (len(cpu_list) > 1 and
13034 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13035 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13037 (self.be_proposed[constants.BE_VCPUS],
13038 self.hv_proposed[constants.HV_CPU_MASK]),
13039 errors.ECODE_INVAL)
13041 # Only perform this test if a new CPU mask is given
13042 if constants.HV_CPU_MASK in self.hv_new:
13043 # Calculate the largest CPU number requested
13044 max_requested_cpu = max(map(max, cpu_list))
13045 # Check that all of the instance's nodes have enough physical CPUs to
13046 # satisfy the requested CPU mask
13047 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13048 max_requested_cpu + 1, instance.hypervisor)
13050 # osparams processing
13051 if self.op.osparams:
13052 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13053 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13054 self.os_inst = i_osdict # the new dict (without defaults)
13060 #TODO(dynmem): do the appropriate check involving MINMEM
13061 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13062 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13063 mem_check_list = [pnode]
13064 if be_new[constants.BE_AUTO_BALANCE]:
13065 # either we changed auto_balance to yes or it was from before
13066 mem_check_list.extend(instance.secondary_nodes)
13067 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13068 instance.hypervisor)
13069 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13070 [instance.hypervisor])
13071 pninfo = nodeinfo[pnode]
13072 msg = pninfo.fail_msg
13074 # Assume the primary node is unreachable and go ahead
13075 self.warn.append("Can't get info from primary node %s: %s" %
13078 (_, _, (pnhvinfo, )) = pninfo.payload
13079 if not isinstance(pnhvinfo.get("memory_free", None), int):
13080 self.warn.append("Node data from primary node %s doesn't contain"
13081 " free memory information" % pnode)
13082 elif instance_info.fail_msg:
13083 self.warn.append("Can't get instance runtime information: %s" %
13084 instance_info.fail_msg)
13086 if instance_info.payload:
13087 current_mem = int(instance_info.payload["memory"])
13089 # Assume instance not running
13090 # (there is a slight race condition here, but it's not very
13091 # probable, and we have no other way to check)
13092 # TODO: Describe race condition
13094 #TODO(dynmem): do the appropriate check involving MINMEM
13095 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13096 pnhvinfo["memory_free"])
13098 raise errors.OpPrereqError("This change will prevent the instance"
13099 " from starting, due to %d MB of memory"
13100 " missing on its primary node" %
13101 miss_mem, errors.ECODE_NORES)
13103 if be_new[constants.BE_AUTO_BALANCE]:
13104 for node, nres in nodeinfo.items():
13105 if node not in instance.secondary_nodes:
13107 nres.Raise("Can't get info from secondary node %s" % node,
13108 prereq=True, ecode=errors.ECODE_STATE)
13109 (_, _, (nhvinfo, )) = nres.payload
13110 if not isinstance(nhvinfo.get("memory_free", None), int):
13111 raise errors.OpPrereqError("Secondary node %s didn't return free"
13112 " memory information" % node,
13113 errors.ECODE_STATE)
13114 #TODO(dynmem): do the appropriate check involving MINMEM
13115 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13116 raise errors.OpPrereqError("This change will prevent the instance"
13117 " from failover to its secondary node"
13118 " %s, due to not enough memory" % node,
13119 errors.ECODE_STATE)
13121 if self.op.runtime_mem:
13122 remote_info = self.rpc.call_instance_info(instance.primary_node,
13124 instance.hypervisor)
13125 remote_info.Raise("Error checking node %s" % instance.primary_node)
13126 if not remote_info.payload: # not running already
13127 raise errors.OpPrereqError("Instance %s is not running" %
13128 instance.name, errors.ECODE_STATE)
13130 current_memory = remote_info.payload["memory"]
13131 if (not self.op.force and
13132 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13133 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13134 raise errors.OpPrereqError("Instance %s must have memory between %d"
13135 " and %d MB of memory unless --force is"
13138 self.be_proposed[constants.BE_MINMEM],
13139 self.be_proposed[constants.BE_MAXMEM]),
13140 errors.ECODE_INVAL)
13142 delta = self.op.runtime_mem - current_memory
13144 _CheckNodeFreeMemory(self, instance.primary_node,
13145 "ballooning memory for instance %s" %
13146 instance.name, delta, instance.hypervisor)
13148 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13149 raise errors.OpPrereqError("Disk operations not supported for"
13150 " diskless instances", errors.ECODE_INVAL)
13152 def _PrepareNicCreate(_, params, private):
13153 self._PrepareNicModification(params, private, None, None,
13154 {}, cluster, pnode)
13155 return (None, None)
13157 def _PrepareNicMod(_, nic, params, private):
13158 self._PrepareNicModification(params, private, nic.ip, nic.network,
13159 nic.nicparams, cluster, pnode)
13162 def _PrepareNicRemove(_, params, private):
13164 net = params.network
13165 if net is not None and ip is not None:
13166 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13168 # Verify NIC changes (operating on copy)
13169 nics = instance.nics[:]
13170 ApplyContainerMods("NIC", nics, None, self.nicmod,
13171 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13172 if len(nics) > constants.MAX_NICS:
13173 raise errors.OpPrereqError("Instance has too many network interfaces"
13174 " (%d), cannot add more" % constants.MAX_NICS,
13175 errors.ECODE_STATE)
13177 # Verify disk changes (operating on a copy)
13178 disks = instance.disks[:]
13179 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13180 if len(disks) > constants.MAX_DISKS:
13181 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13182 " more" % constants.MAX_DISKS,
13183 errors.ECODE_STATE)
13185 if self.op.offline is not None:
13186 if self.op.offline:
13187 msg = "can't change to offline"
13189 msg = "can't change to online"
13190 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13192 # Pre-compute NIC changes (necessary to use result in hooks)
13193 self._nic_chgdesc = []
13195 # Operate on copies as this is still in prereq
13196 nics = [nic.Copy() for nic in instance.nics]
13197 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13198 self._CreateNewNic, self._ApplyNicMods, None)
13199 self._new_nics = nics
13201 self._new_nics = None
13203 def _ConvertPlainToDrbd(self, feedback_fn):
13204 """Converts an instance from plain to drbd.
13207 feedback_fn("Converting template to drbd")
13208 instance = self.instance
13209 pnode = instance.primary_node
13210 snode = self.op.remote_node
13212 assert instance.disk_template == constants.DT_PLAIN
13214 # create a fake disk info for _GenerateDiskTemplate
13215 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13216 constants.IDISK_VG: d.logical_id[0]}
13217 for d in instance.disks]
13218 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13219 instance.name, pnode, [snode],
13220 disk_info, None, None, 0, feedback_fn,
13222 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13224 info = _GetInstanceInfoText(instance)
13225 feedback_fn("Creating additional volumes...")
13226 # first, create the missing data and meta devices
13227 for disk in anno_disks:
13228 # unfortunately this is... not too nice
13229 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13231 for child in disk.children:
13232 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13233 # at this stage, all new LVs have been created, we can rename the
13235 feedback_fn("Renaming original volumes...")
13236 rename_list = [(o, n.children[0].logical_id)
13237 for (o, n) in zip(instance.disks, new_disks)]
13238 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13239 result.Raise("Failed to rename original LVs")
13241 feedback_fn("Initializing DRBD devices...")
13242 # all child devices are in place, we can now create the DRBD devices
13243 for disk in anno_disks:
13244 for node in [pnode, snode]:
13245 f_create = node == pnode
13246 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13248 # at this point, the instance has been modified
13249 instance.disk_template = constants.DT_DRBD8
13250 instance.disks = new_disks
13251 self.cfg.Update(instance, feedback_fn)
13253 # Release node locks while waiting for sync
13254 _ReleaseLocks(self, locking.LEVEL_NODE)
13256 # disks are created, waiting for sync
13257 disk_abort = not _WaitForSync(self, instance,
13258 oneshot=not self.op.wait_for_sync)
13260 raise errors.OpExecError("There are some degraded disks for"
13261 " this instance, please cleanup manually")
13263 # Node resource locks will be released by caller
13265 def _ConvertDrbdToPlain(self, feedback_fn):
13266 """Converts an instance from drbd to plain.
13269 instance = self.instance
13271 assert len(instance.secondary_nodes) == 1
13272 assert instance.disk_template == constants.DT_DRBD8
13274 pnode = instance.primary_node
13275 snode = instance.secondary_nodes[0]
13276 feedback_fn("Converting template to plain")
13278 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13279 new_disks = [d.children[0] for d in instance.disks]
13281 # copy over size and mode
13282 for parent, child in zip(old_disks, new_disks):
13283 child.size = parent.size
13284 child.mode = parent.mode
13286 # this is a DRBD disk, return its port to the pool
13287 # NOTE: this must be done right before the call to cfg.Update!
13288 for disk in old_disks:
13289 tcp_port = disk.logical_id[2]
13290 self.cfg.AddTcpUdpPort(tcp_port)
13292 # update instance structure
13293 instance.disks = new_disks
13294 instance.disk_template = constants.DT_PLAIN
13295 self.cfg.Update(instance, feedback_fn)
13297 # Release locks in case removing disks takes a while
13298 _ReleaseLocks(self, locking.LEVEL_NODE)
13300 feedback_fn("Removing volumes on the secondary node...")
13301 for disk in old_disks:
13302 self.cfg.SetDiskID(disk, snode)
13303 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13305 self.LogWarning("Could not remove block device %s on node %s,"
13306 " continuing anyway: %s", disk.iv_name, snode, msg)
13308 feedback_fn("Removing unneeded volumes on the primary node...")
13309 for idx, disk in enumerate(old_disks):
13310 meta = disk.children[1]
13311 self.cfg.SetDiskID(meta, pnode)
13312 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13314 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13315 " continuing anyway: %s", idx, pnode, msg)
13317 def _CreateNewDisk(self, idx, params, _):
13318 """Creates a new disk.
13321 instance = self.instance
13324 if instance.disk_template in constants.DTS_FILEBASED:
13325 (file_driver, file_path) = instance.disks[0].logical_id
13326 file_path = os.path.dirname(file_path)
13328 file_driver = file_path = None
13331 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13332 instance.primary_node, instance.secondary_nodes,
13333 [params], file_path, file_driver, idx,
13334 self.Log, self.diskparams)[0]
13336 info = _GetInstanceInfoText(instance)
13338 logging.info("Creating volume %s for instance %s",
13339 disk.iv_name, instance.name)
13340 # Note: this needs to be kept in sync with _CreateDisks
13342 for node in instance.all_nodes:
13343 f_create = (node == instance.primary_node)
13345 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13346 except errors.OpExecError, err:
13347 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13348 disk.iv_name, disk, node, err)
13351 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13355 def _ModifyDisk(idx, disk, params, _):
13356 """Modifies a disk.
13359 disk.mode = params[constants.IDISK_MODE]
13362 ("disk.mode/%d" % idx, disk.mode),
13365 def _RemoveDisk(self, idx, root, _):
13369 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13370 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13371 self.cfg.SetDiskID(disk, node)
13372 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13374 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13375 " continuing anyway", idx, node, msg)
13377 # if this is a DRBD disk, return its port to the pool
13378 if root.dev_type in constants.LDS_DRBD:
13379 self.cfg.AddTcpUdpPort(root.logical_id[2])
13382 def _CreateNewNic(idx, params, private):
13383 """Creates data structure for a new network interface.
13386 mac = params[constants.INIC_MAC]
13387 ip = params.get(constants.INIC_IP, None)
13388 network = params.get(constants.INIC_NETWORK, None)
13389 #TODO: not private.filled?? can a nic have no nicparams??
13390 nicparams = private.filled
13392 return (objects.NIC(mac=mac, ip=ip, network=network, nicparams=nicparams), [
13394 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13395 (mac, ip, private.filled[constants.NIC_MODE],
13396 private.filled[constants.NIC_LINK],
13401 def _ApplyNicMods(idx, nic, params, private):
13402 """Modifies a network interface.
13407 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13409 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13410 setattr(nic, key, params[key])
13413 nic.nicparams = private.filled
13415 for (key, val) in nic.nicparams.items():
13416 changes.append(("nic.%s/%d" % (key, idx), val))
13420 def Exec(self, feedback_fn):
13421 """Modifies an instance.
13423 All parameters take effect only at the next restart of the instance.
13426 # Process here the warnings from CheckPrereq, as we don't have a
13427 # feedback_fn there.
13428 # TODO: Replace with self.LogWarning
13429 for warn in self.warn:
13430 feedback_fn("WARNING: %s" % warn)
13432 assert ((self.op.disk_template is None) ^
13433 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13434 "Not owning any node resource locks"
13437 instance = self.instance
13440 if self.op.runtime_mem:
13441 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13443 self.op.runtime_mem)
13444 rpcres.Raise("Cannot modify instance runtime memory")
13445 result.append(("runtime_memory", self.op.runtime_mem))
13447 # Apply disk changes
13448 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13449 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13450 _UpdateIvNames(0, instance.disks)
13452 if self.op.disk_template:
13454 check_nodes = set(instance.all_nodes)
13455 if self.op.remote_node:
13456 check_nodes.add(self.op.remote_node)
13457 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13458 owned = self.owned_locks(level)
13459 assert not (check_nodes - owned), \
13460 ("Not owning the correct locks, owning %r, expected at least %r" %
13461 (owned, check_nodes))
13463 r_shut = _ShutdownInstanceDisks(self, instance)
13465 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13466 " proceed with disk template conversion")
13467 mode = (instance.disk_template, self.op.disk_template)
13469 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13471 self.cfg.ReleaseDRBDMinors(instance.name)
13473 result.append(("disk_template", self.op.disk_template))
13475 assert instance.disk_template == self.op.disk_template, \
13476 ("Expected disk template '%s', found '%s'" %
13477 (self.op.disk_template, instance.disk_template))
13479 # Release node and resource locks if there are any (they might already have
13480 # been released during disk conversion)
13481 _ReleaseLocks(self, locking.LEVEL_NODE)
13482 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13484 # Apply NIC changes
13485 if self._new_nics is not None:
13486 instance.nics = self._new_nics
13487 result.extend(self._nic_chgdesc)
13490 if self.op.hvparams:
13491 instance.hvparams = self.hv_inst
13492 for key, val in self.op.hvparams.iteritems():
13493 result.append(("hv/%s" % key, val))
13496 if self.op.beparams:
13497 instance.beparams = self.be_inst
13498 for key, val in self.op.beparams.iteritems():
13499 result.append(("be/%s" % key, val))
13502 if self.op.os_name:
13503 instance.os = self.op.os_name
13506 if self.op.osparams:
13507 instance.osparams = self.os_inst
13508 for key, val in self.op.osparams.iteritems():
13509 result.append(("os/%s" % key, val))
13511 if self.op.offline is None:
13514 elif self.op.offline:
13515 # Mark instance as offline
13516 self.cfg.MarkInstanceOffline(instance.name)
13517 result.append(("admin_state", constants.ADMINST_OFFLINE))
13519 # Mark instance as online, but stopped
13520 self.cfg.MarkInstanceDown(instance.name)
13521 result.append(("admin_state", constants.ADMINST_DOWN))
13523 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13525 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13526 self.owned_locks(locking.LEVEL_NODE)), \
13527 "All node locks should have been released by now"
13531 _DISK_CONVERSIONS = {
13532 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13533 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13537 class LUInstanceChangeGroup(LogicalUnit):
13538 HPATH = "instance-change-group"
13539 HTYPE = constants.HTYPE_INSTANCE
13542 def ExpandNames(self):
13543 self.share_locks = _ShareAll()
13544 self.needed_locks = {
13545 locking.LEVEL_NODEGROUP: [],
13546 locking.LEVEL_NODE: [],
13549 self._ExpandAndLockInstance()
13551 if self.op.target_groups:
13552 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13553 self.op.target_groups)
13555 self.req_target_uuids = None
13557 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13559 def DeclareLocks(self, level):
13560 if level == locking.LEVEL_NODEGROUP:
13561 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13563 if self.req_target_uuids:
13564 lock_groups = set(self.req_target_uuids)
13566 # Lock all groups used by instance optimistically; this requires going
13567 # via the node before it's locked, requiring verification later on
13568 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13569 lock_groups.update(instance_groups)
13571 # No target groups, need to lock all of them
13572 lock_groups = locking.ALL_SET
13574 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13576 elif level == locking.LEVEL_NODE:
13577 if self.req_target_uuids:
13578 # Lock all nodes used by instances
13579 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13580 self._LockInstancesNodes()
13582 # Lock all nodes in all potential target groups
13583 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13584 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13585 member_nodes = [node_name
13586 for group in lock_groups
13587 for node_name in self.cfg.GetNodeGroup(group).members]
13588 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13590 # Lock all nodes as all groups are potential targets
13591 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13593 def CheckPrereq(self):
13594 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13595 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13596 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13598 assert (self.req_target_uuids is None or
13599 owned_groups.issuperset(self.req_target_uuids))
13600 assert owned_instances == set([self.op.instance_name])
13602 # Get instance information
13603 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13605 # Check if node groups for locked instance are still correct
13606 assert owned_nodes.issuperset(self.instance.all_nodes), \
13607 ("Instance %s's nodes changed while we kept the lock" %
13608 self.op.instance_name)
13610 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13613 if self.req_target_uuids:
13614 # User requested specific target groups
13615 self.target_uuids = frozenset(self.req_target_uuids)
13617 # All groups except those used by the instance are potential targets
13618 self.target_uuids = owned_groups - inst_groups
13620 conflicting_groups = self.target_uuids & inst_groups
13621 if conflicting_groups:
13622 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13623 " used by the instance '%s'" %
13624 (utils.CommaJoin(conflicting_groups),
13625 self.op.instance_name),
13626 errors.ECODE_INVAL)
13628 if not self.target_uuids:
13629 raise errors.OpPrereqError("There are no possible target groups",
13630 errors.ECODE_INVAL)
13632 def BuildHooksEnv(self):
13633 """Build hooks env.
13636 assert self.target_uuids
13639 "TARGET_GROUPS": " ".join(self.target_uuids),
13642 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13646 def BuildHooksNodes(self):
13647 """Build hooks nodes.
13650 mn = self.cfg.GetMasterNode()
13651 return ([mn], [mn])
13653 def Exec(self, feedback_fn):
13654 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13656 assert instances == [self.op.instance_name], "Instance not locked"
13658 req = iallocator.IAReqGroupChange(instances=instances,
13659 target_groups=list(self.target_uuids))
13660 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13662 ial.Run(self.op.iallocator)
13664 if not ial.success:
13665 raise errors.OpPrereqError("Can't compute solution for changing group of"
13666 " instance '%s' using iallocator '%s': %s" %
13667 (self.op.instance_name, self.op.iallocator,
13668 ial.info), errors.ECODE_NORES)
13670 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13672 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13673 " instance '%s'", len(jobs), self.op.instance_name)
13675 return ResultWithJobs(jobs)
13678 class LUBackupQuery(NoHooksLU):
13679 """Query the exports list
13684 def CheckArguments(self):
13685 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13686 ["node", "export"], self.op.use_locking)
13688 def ExpandNames(self):
13689 self.expq.ExpandNames(self)
13691 def DeclareLocks(self, level):
13692 self.expq.DeclareLocks(self, level)
13694 def Exec(self, feedback_fn):
13697 for (node, expname) in self.expq.OldStyleQuery(self):
13698 if expname is None:
13699 result[node] = False
13701 result.setdefault(node, []).append(expname)
13706 class _ExportQuery(_QueryBase):
13707 FIELDS = query.EXPORT_FIELDS
13709 #: The node name is not a unique key for this query
13710 SORT_FIELD = "node"
13712 def ExpandNames(self, lu):
13713 lu.needed_locks = {}
13715 # The following variables interact with _QueryBase._GetNames
13717 self.wanted = _GetWantedNodes(lu, self.names)
13719 self.wanted = locking.ALL_SET
13721 self.do_locking = self.use_locking
13723 if self.do_locking:
13724 lu.share_locks = _ShareAll()
13725 lu.needed_locks = {
13726 locking.LEVEL_NODE: self.wanted,
13729 def DeclareLocks(self, lu, level):
13732 def _GetQueryData(self, lu):
13733 """Computes the list of nodes and their attributes.
13736 # Locking is not used
13738 assert not (compat.any(lu.glm.is_owned(level)
13739 for level in locking.LEVELS
13740 if level != locking.LEVEL_CLUSTER) or
13741 self.do_locking or self.use_locking)
13743 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13747 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13749 result.append((node, None))
13751 result.extend((node, expname) for expname in nres.payload)
13756 class LUBackupPrepare(NoHooksLU):
13757 """Prepares an instance for an export and returns useful information.
13762 def ExpandNames(self):
13763 self._ExpandAndLockInstance()
13765 def CheckPrereq(self):
13766 """Check prerequisites.
13769 instance_name = self.op.instance_name
13771 self.instance = self.cfg.GetInstanceInfo(instance_name)
13772 assert self.instance is not None, \
13773 "Cannot retrieve locked instance %s" % self.op.instance_name
13774 _CheckNodeOnline(self, self.instance.primary_node)
13776 self._cds = _GetClusterDomainSecret()
13778 def Exec(self, feedback_fn):
13779 """Prepares an instance for an export.
13782 instance = self.instance
13784 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13785 salt = utils.GenerateSecret(8)
13787 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13788 result = self.rpc.call_x509_cert_create(instance.primary_node,
13789 constants.RIE_CERT_VALIDITY)
13790 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13792 (name, cert_pem) = result.payload
13794 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13798 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13799 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13801 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13807 class LUBackupExport(LogicalUnit):
13808 """Export an instance to an image in the cluster.
13811 HPATH = "instance-export"
13812 HTYPE = constants.HTYPE_INSTANCE
13815 def CheckArguments(self):
13816 """Check the arguments.
13819 self.x509_key_name = self.op.x509_key_name
13820 self.dest_x509_ca_pem = self.op.destination_x509_ca
13822 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13823 if not self.x509_key_name:
13824 raise errors.OpPrereqError("Missing X509 key name for encryption",
13825 errors.ECODE_INVAL)
13827 if not self.dest_x509_ca_pem:
13828 raise errors.OpPrereqError("Missing destination X509 CA",
13829 errors.ECODE_INVAL)
13831 def ExpandNames(self):
13832 self._ExpandAndLockInstance()
13834 # Lock all nodes for local exports
13835 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13836 # FIXME: lock only instance primary and destination node
13838 # Sad but true, for now we have do lock all nodes, as we don't know where
13839 # the previous export might be, and in this LU we search for it and
13840 # remove it from its current node. In the future we could fix this by:
13841 # - making a tasklet to search (share-lock all), then create the
13842 # new one, then one to remove, after
13843 # - removing the removal operation altogether
13844 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13846 def DeclareLocks(self, level):
13847 """Last minute lock declaration."""
13848 # All nodes are locked anyway, so nothing to do here.
13850 def BuildHooksEnv(self):
13851 """Build hooks env.
13853 This will run on the master, primary node and target node.
13857 "EXPORT_MODE": self.op.mode,
13858 "EXPORT_NODE": self.op.target_node,
13859 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13860 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13861 # TODO: Generic function for boolean env variables
13862 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13865 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13869 def BuildHooksNodes(self):
13870 """Build hooks nodes.
13873 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13875 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13876 nl.append(self.op.target_node)
13880 def CheckPrereq(self):
13881 """Check prerequisites.
13883 This checks that the instance and node names are valid.
13886 instance_name = self.op.instance_name
13888 self.instance = self.cfg.GetInstanceInfo(instance_name)
13889 assert self.instance is not None, \
13890 "Cannot retrieve locked instance %s" % self.op.instance_name
13891 _CheckNodeOnline(self, self.instance.primary_node)
13893 if (self.op.remove_instance and
13894 self.instance.admin_state == constants.ADMINST_UP and
13895 not self.op.shutdown):
13896 raise errors.OpPrereqError("Can not remove instance without shutting it"
13897 " down before", errors.ECODE_STATE)
13899 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13900 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13901 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13902 assert self.dst_node is not None
13904 _CheckNodeOnline(self, self.dst_node.name)
13905 _CheckNodeNotDrained(self, self.dst_node.name)
13908 self.dest_disk_info = None
13909 self.dest_x509_ca = None
13911 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13912 self.dst_node = None
13914 if len(self.op.target_node) != len(self.instance.disks):
13915 raise errors.OpPrereqError(("Received destination information for %s"
13916 " disks, but instance %s has %s disks") %
13917 (len(self.op.target_node), instance_name,
13918 len(self.instance.disks)),
13919 errors.ECODE_INVAL)
13921 cds = _GetClusterDomainSecret()
13923 # Check X509 key name
13925 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13926 except (TypeError, ValueError), err:
13927 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13928 errors.ECODE_INVAL)
13930 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13931 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13932 errors.ECODE_INVAL)
13934 # Load and verify CA
13936 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13937 except OpenSSL.crypto.Error, err:
13938 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13939 (err, ), errors.ECODE_INVAL)
13941 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13942 if errcode is not None:
13943 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13944 (msg, ), errors.ECODE_INVAL)
13946 self.dest_x509_ca = cert
13948 # Verify target information
13950 for idx, disk_data in enumerate(self.op.target_node):
13952 (host, port, magic) = \
13953 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13954 except errors.GenericError, err:
13955 raise errors.OpPrereqError("Target info for disk %s: %s" %
13956 (idx, err), errors.ECODE_INVAL)
13958 disk_info.append((host, port, magic))
13960 assert len(disk_info) == len(self.op.target_node)
13961 self.dest_disk_info = disk_info
13964 raise errors.ProgrammerError("Unhandled export mode %r" %
13967 # instance disk type verification
13968 # TODO: Implement export support for file-based disks
13969 for disk in self.instance.disks:
13970 if disk.dev_type == constants.LD_FILE:
13971 raise errors.OpPrereqError("Export not supported for instances with"
13972 " file-based disks", errors.ECODE_INVAL)
13974 def _CleanupExports(self, feedback_fn):
13975 """Removes exports of current instance from all other nodes.
13977 If an instance in a cluster with nodes A..D was exported to node C, its
13978 exports will be removed from the nodes A, B and D.
13981 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13983 nodelist = self.cfg.GetNodeList()
13984 nodelist.remove(self.dst_node.name)
13986 # on one-node clusters nodelist will be empty after the removal
13987 # if we proceed the backup would be removed because OpBackupQuery
13988 # substitutes an empty list with the full cluster node list.
13989 iname = self.instance.name
13991 feedback_fn("Removing old exports for instance %s" % iname)
13992 exportlist = self.rpc.call_export_list(nodelist)
13993 for node in exportlist:
13994 if exportlist[node].fail_msg:
13996 if iname in exportlist[node].payload:
13997 msg = self.rpc.call_export_remove(node, iname).fail_msg
13999 self.LogWarning("Could not remove older export for instance %s"
14000 " on node %s: %s", iname, node, msg)
14002 def Exec(self, feedback_fn):
14003 """Export an instance to an image in the cluster.
14006 assert self.op.mode in constants.EXPORT_MODES
14008 instance = self.instance
14009 src_node = instance.primary_node
14011 if self.op.shutdown:
14012 # shutdown the instance, but not the disks
14013 feedback_fn("Shutting down instance %s" % instance.name)
14014 result = self.rpc.call_instance_shutdown(src_node, instance,
14015 self.op.shutdown_timeout)
14016 # TODO: Maybe ignore failures if ignore_remove_failures is set
14017 result.Raise("Could not shutdown instance %s on"
14018 " node %s" % (instance.name, src_node))
14020 # set the disks ID correctly since call_instance_start needs the
14021 # correct drbd minor to create the symlinks
14022 for disk in instance.disks:
14023 self.cfg.SetDiskID(disk, src_node)
14025 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14028 # Activate the instance disks if we'exporting a stopped instance
14029 feedback_fn("Activating disks for %s" % instance.name)
14030 _StartInstanceDisks(self, instance, None)
14033 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14036 helper.CreateSnapshots()
14038 if (self.op.shutdown and
14039 instance.admin_state == constants.ADMINST_UP and
14040 not self.op.remove_instance):
14041 assert not activate_disks
14042 feedback_fn("Starting instance %s" % instance.name)
14043 result = self.rpc.call_instance_start(src_node,
14044 (instance, None, None), False)
14045 msg = result.fail_msg
14047 feedback_fn("Failed to start instance: %s" % msg)
14048 _ShutdownInstanceDisks(self, instance)
14049 raise errors.OpExecError("Could not start instance: %s" % msg)
14051 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14052 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14053 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14054 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14055 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14057 (key_name, _, _) = self.x509_key_name
14060 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14063 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14064 key_name, dest_ca_pem,
14069 # Check for backwards compatibility
14070 assert len(dresults) == len(instance.disks)
14071 assert compat.all(isinstance(i, bool) for i in dresults), \
14072 "Not all results are boolean: %r" % dresults
14076 feedback_fn("Deactivating disks for %s" % instance.name)
14077 _ShutdownInstanceDisks(self, instance)
14079 if not (compat.all(dresults) and fin_resu):
14082 failures.append("export finalization")
14083 if not compat.all(dresults):
14084 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14086 failures.append("disk export: disk(s) %s" % fdsk)
14088 raise errors.OpExecError("Export failed, errors in %s" %
14089 utils.CommaJoin(failures))
14091 # At this point, the export was successful, we can cleanup/finish
14093 # Remove instance if requested
14094 if self.op.remove_instance:
14095 feedback_fn("Removing instance %s" % instance.name)
14096 _RemoveInstance(self, feedback_fn, instance,
14097 self.op.ignore_remove_failures)
14099 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14100 self._CleanupExports(feedback_fn)
14102 return fin_resu, dresults
14105 class LUBackupRemove(NoHooksLU):
14106 """Remove exports related to the named instance.
14111 def ExpandNames(self):
14112 self.needed_locks = {}
14113 # We need all nodes to be locked in order for RemoveExport to work, but we
14114 # don't need to lock the instance itself, as nothing will happen to it (and
14115 # we can remove exports also for a removed instance)
14116 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14118 def Exec(self, feedback_fn):
14119 """Remove any export.
14122 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14123 # If the instance was not found we'll try with the name that was passed in.
14124 # This will only work if it was an FQDN, though.
14126 if not instance_name:
14128 instance_name = self.op.instance_name
14130 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14131 exportlist = self.rpc.call_export_list(locked_nodes)
14133 for node in exportlist:
14134 msg = exportlist[node].fail_msg
14136 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14138 if instance_name in exportlist[node].payload:
14140 result = self.rpc.call_export_remove(node, instance_name)
14141 msg = result.fail_msg
14143 logging.error("Could not remove export for instance %s"
14144 " on node %s: %s", instance_name, node, msg)
14146 if fqdn_warn and not found:
14147 feedback_fn("Export not found. If trying to remove an export belonging"
14148 " to a deleted instance please use its Fully Qualified"
14152 class LUGroupAdd(LogicalUnit):
14153 """Logical unit for creating node groups.
14156 HPATH = "group-add"
14157 HTYPE = constants.HTYPE_GROUP
14160 def ExpandNames(self):
14161 # We need the new group's UUID here so that we can create and acquire the
14162 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14163 # that it should not check whether the UUID exists in the configuration.
14164 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14165 self.needed_locks = {}
14166 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14168 def CheckPrereq(self):
14169 """Check prerequisites.
14171 This checks that the given group name is not an existing node group
14176 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14177 except errors.OpPrereqError:
14180 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14181 " node group (UUID: %s)" %
14182 (self.op.group_name, existing_uuid),
14183 errors.ECODE_EXISTS)
14185 if self.op.ndparams:
14186 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14188 if self.op.hv_state:
14189 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14191 self.new_hv_state = None
14193 if self.op.disk_state:
14194 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14196 self.new_disk_state = None
14198 if self.op.diskparams:
14199 for templ in constants.DISK_TEMPLATES:
14200 if templ in self.op.diskparams:
14201 utils.ForceDictType(self.op.diskparams[templ],
14202 constants.DISK_DT_TYPES)
14203 self.new_diskparams = self.op.diskparams
14205 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14206 except errors.OpPrereqError, err:
14207 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14208 errors.ECODE_INVAL)
14210 self.new_diskparams = {}
14212 if self.op.ipolicy:
14213 cluster = self.cfg.GetClusterInfo()
14214 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14216 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14217 except errors.ConfigurationError, err:
14218 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14219 errors.ECODE_INVAL)
14221 def BuildHooksEnv(self):
14222 """Build hooks env.
14226 "GROUP_NAME": self.op.group_name,
14229 def BuildHooksNodes(self):
14230 """Build hooks nodes.
14233 mn = self.cfg.GetMasterNode()
14234 return ([mn], [mn])
14236 def Exec(self, feedback_fn):
14237 """Add the node group to the cluster.
14240 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14241 uuid=self.group_uuid,
14242 alloc_policy=self.op.alloc_policy,
14243 ndparams=self.op.ndparams,
14244 diskparams=self.new_diskparams,
14245 ipolicy=self.op.ipolicy,
14246 hv_state_static=self.new_hv_state,
14247 disk_state_static=self.new_disk_state)
14249 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14250 del self.remove_locks[locking.LEVEL_NODEGROUP]
14253 class LUGroupAssignNodes(NoHooksLU):
14254 """Logical unit for assigning nodes to groups.
14259 def ExpandNames(self):
14260 # These raise errors.OpPrereqError on their own:
14261 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14262 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14264 # We want to lock all the affected nodes and groups. We have readily
14265 # available the list of nodes, and the *destination* group. To gather the
14266 # list of "source" groups, we need to fetch node information later on.
14267 self.needed_locks = {
14268 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14269 locking.LEVEL_NODE: self.op.nodes,
14272 def DeclareLocks(self, level):
14273 if level == locking.LEVEL_NODEGROUP:
14274 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14276 # Try to get all affected nodes' groups without having the group or node
14277 # lock yet. Needs verification later in the code flow.
14278 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14280 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14282 def CheckPrereq(self):
14283 """Check prerequisites.
14286 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14287 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14288 frozenset(self.op.nodes))
14290 expected_locks = (set([self.group_uuid]) |
14291 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14292 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14293 if actual_locks != expected_locks:
14294 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14295 " current groups are '%s', used to be '%s'" %
14296 (utils.CommaJoin(expected_locks),
14297 utils.CommaJoin(actual_locks)))
14299 self.node_data = self.cfg.GetAllNodesInfo()
14300 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14301 instance_data = self.cfg.GetAllInstancesInfo()
14303 if self.group is None:
14304 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14305 (self.op.group_name, self.group_uuid))
14307 (new_splits, previous_splits) = \
14308 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14309 for node in self.op.nodes],
14310 self.node_data, instance_data)
14313 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14315 if not self.op.force:
14316 raise errors.OpExecError("The following instances get split by this"
14317 " change and --force was not given: %s" %
14320 self.LogWarning("This operation will split the following instances: %s",
14323 if previous_splits:
14324 self.LogWarning("In addition, these already-split instances continue"
14325 " to be split across groups: %s",
14326 utils.CommaJoin(utils.NiceSort(previous_splits)))
14328 def Exec(self, feedback_fn):
14329 """Assign nodes to a new group.
14332 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14334 self.cfg.AssignGroupNodes(mods)
14337 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14338 """Check for split instances after a node assignment.
14340 This method considers a series of node assignments as an atomic operation,
14341 and returns information about split instances after applying the set of
14344 In particular, it returns information about newly split instances, and
14345 instances that were already split, and remain so after the change.
14347 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14350 @type changes: list of (node_name, new_group_uuid) pairs.
14351 @param changes: list of node assignments to consider.
14352 @param node_data: a dict with data for all nodes
14353 @param instance_data: a dict with all instances to consider
14354 @rtype: a two-tuple
14355 @return: a list of instances that were previously okay and result split as a
14356 consequence of this change, and a list of instances that were previously
14357 split and this change does not fix.
14360 changed_nodes = dict((node, group) for node, group in changes
14361 if node_data[node].group != group)
14363 all_split_instances = set()
14364 previously_split_instances = set()
14366 def InstanceNodes(instance):
14367 return [instance.primary_node] + list(instance.secondary_nodes)
14369 for inst in instance_data.values():
14370 if inst.disk_template not in constants.DTS_INT_MIRROR:
14373 instance_nodes = InstanceNodes(inst)
14375 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14376 previously_split_instances.add(inst.name)
14378 if len(set(changed_nodes.get(node, node_data[node].group)
14379 for node in instance_nodes)) > 1:
14380 all_split_instances.add(inst.name)
14382 return (list(all_split_instances - previously_split_instances),
14383 list(previously_split_instances & all_split_instances))
14386 class _GroupQuery(_QueryBase):
14387 FIELDS = query.GROUP_FIELDS
14389 def ExpandNames(self, lu):
14390 lu.needed_locks = {}
14392 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14393 self._cluster = lu.cfg.GetClusterInfo()
14394 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14397 self.wanted = [name_to_uuid[name]
14398 for name in utils.NiceSort(name_to_uuid.keys())]
14400 # Accept names to be either names or UUIDs.
14403 all_uuid = frozenset(self._all_groups.keys())
14405 for name in self.names:
14406 if name in all_uuid:
14407 self.wanted.append(name)
14408 elif name in name_to_uuid:
14409 self.wanted.append(name_to_uuid[name])
14411 missing.append(name)
14414 raise errors.OpPrereqError("Some groups do not exist: %s" %
14415 utils.CommaJoin(missing),
14416 errors.ECODE_NOENT)
14418 def DeclareLocks(self, lu, level):
14421 def _GetQueryData(self, lu):
14422 """Computes the list of node groups and their attributes.
14425 do_nodes = query.GQ_NODE in self.requested_data
14426 do_instances = query.GQ_INST in self.requested_data
14428 group_to_nodes = None
14429 group_to_instances = None
14431 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14432 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14433 # latter GetAllInstancesInfo() is not enough, for we have to go through
14434 # instance->node. Hence, we will need to process nodes even if we only need
14435 # instance information.
14436 if do_nodes or do_instances:
14437 all_nodes = lu.cfg.GetAllNodesInfo()
14438 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14441 for node in all_nodes.values():
14442 if node.group in group_to_nodes:
14443 group_to_nodes[node.group].append(node.name)
14444 node_to_group[node.name] = node.group
14447 all_instances = lu.cfg.GetAllInstancesInfo()
14448 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14450 for instance in all_instances.values():
14451 node = instance.primary_node
14452 if node in node_to_group:
14453 group_to_instances[node_to_group[node]].append(instance.name)
14456 # Do not pass on node information if it was not requested.
14457 group_to_nodes = None
14459 return query.GroupQueryData(self._cluster,
14460 [self._all_groups[uuid]
14461 for uuid in self.wanted],
14462 group_to_nodes, group_to_instances,
14463 query.GQ_DISKPARAMS in self.requested_data)
14466 class LUGroupQuery(NoHooksLU):
14467 """Logical unit for querying node groups.
14472 def CheckArguments(self):
14473 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14474 self.op.output_fields, False)
14476 def ExpandNames(self):
14477 self.gq.ExpandNames(self)
14479 def DeclareLocks(self, level):
14480 self.gq.DeclareLocks(self, level)
14482 def Exec(self, feedback_fn):
14483 return self.gq.OldStyleQuery(self)
14486 class LUGroupSetParams(LogicalUnit):
14487 """Modifies the parameters of a node group.
14490 HPATH = "group-modify"
14491 HTYPE = constants.HTYPE_GROUP
14494 def CheckArguments(self):
14497 self.op.diskparams,
14498 self.op.alloc_policy,
14500 self.op.disk_state,
14504 if all_changes.count(None) == len(all_changes):
14505 raise errors.OpPrereqError("Please pass at least one modification",
14506 errors.ECODE_INVAL)
14508 def ExpandNames(self):
14509 # This raises errors.OpPrereqError on its own:
14510 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14512 self.needed_locks = {
14513 locking.LEVEL_INSTANCE: [],
14514 locking.LEVEL_NODEGROUP: [self.group_uuid],
14517 self.share_locks[locking.LEVEL_INSTANCE] = 1
14519 def DeclareLocks(self, level):
14520 if level == locking.LEVEL_INSTANCE:
14521 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14523 # Lock instances optimistically, needs verification once group lock has
14525 self.needed_locks[locking.LEVEL_INSTANCE] = \
14526 self.cfg.GetNodeGroupInstances(self.group_uuid)
14529 def _UpdateAndVerifyDiskParams(old, new):
14530 """Updates and verifies disk parameters.
14533 new_params = _GetUpdatedParams(old, new)
14534 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14537 def CheckPrereq(self):
14538 """Check prerequisites.
14541 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14543 # Check if locked instances are still correct
14544 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14546 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14547 cluster = self.cfg.GetClusterInfo()
14549 if self.group is None:
14550 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14551 (self.op.group_name, self.group_uuid))
14553 if self.op.ndparams:
14554 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14555 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14556 self.new_ndparams = new_ndparams
14558 if self.op.diskparams:
14559 diskparams = self.group.diskparams
14560 uavdp = self._UpdateAndVerifyDiskParams
14561 # For each disktemplate subdict update and verify the values
14562 new_diskparams = dict((dt,
14563 uavdp(diskparams.get(dt, {}),
14564 self.op.diskparams[dt]))
14565 for dt in constants.DISK_TEMPLATES
14566 if dt in self.op.diskparams)
14567 # As we've all subdicts of diskparams ready, lets merge the actual
14568 # dict with all updated subdicts
14569 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14571 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14572 except errors.OpPrereqError, err:
14573 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14574 errors.ECODE_INVAL)
14576 if self.op.hv_state:
14577 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14578 self.group.hv_state_static)
14580 if self.op.disk_state:
14581 self.new_disk_state = \
14582 _MergeAndVerifyDiskState(self.op.disk_state,
14583 self.group.disk_state_static)
14585 if self.op.ipolicy:
14586 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14590 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14591 inst_filter = lambda inst: inst.name in owned_instances
14592 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14593 gmi = ganeti.masterd.instance
14595 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14597 new_ipolicy, instances)
14600 self.LogWarning("After the ipolicy change the following instances"
14601 " violate them: %s",
14602 utils.CommaJoin(violations))
14604 def BuildHooksEnv(self):
14605 """Build hooks env.
14609 "GROUP_NAME": self.op.group_name,
14610 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14613 def BuildHooksNodes(self):
14614 """Build hooks nodes.
14617 mn = self.cfg.GetMasterNode()
14618 return ([mn], [mn])
14620 def Exec(self, feedback_fn):
14621 """Modifies the node group.
14626 if self.op.ndparams:
14627 self.group.ndparams = self.new_ndparams
14628 result.append(("ndparams", str(self.group.ndparams)))
14630 if self.op.diskparams:
14631 self.group.diskparams = self.new_diskparams
14632 result.append(("diskparams", str(self.group.diskparams)))
14634 if self.op.alloc_policy:
14635 self.group.alloc_policy = self.op.alloc_policy
14637 if self.op.hv_state:
14638 self.group.hv_state_static = self.new_hv_state
14640 if self.op.disk_state:
14641 self.group.disk_state_static = self.new_disk_state
14643 if self.op.ipolicy:
14644 self.group.ipolicy = self.new_ipolicy
14646 self.cfg.Update(self.group, feedback_fn)
14650 class LUGroupRemove(LogicalUnit):
14651 HPATH = "group-remove"
14652 HTYPE = constants.HTYPE_GROUP
14655 def ExpandNames(self):
14656 # This will raises errors.OpPrereqError on its own:
14657 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14658 self.needed_locks = {
14659 locking.LEVEL_NODEGROUP: [self.group_uuid],
14662 def CheckPrereq(self):
14663 """Check prerequisites.
14665 This checks that the given group name exists as a node group, that is
14666 empty (i.e., contains no nodes), and that is not the last group of the
14670 # Verify that the group is empty.
14671 group_nodes = [node.name
14672 for node in self.cfg.GetAllNodesInfo().values()
14673 if node.group == self.group_uuid]
14676 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14678 (self.op.group_name,
14679 utils.CommaJoin(utils.NiceSort(group_nodes))),
14680 errors.ECODE_STATE)
14682 # Verify the cluster would not be left group-less.
14683 if len(self.cfg.GetNodeGroupList()) == 1:
14684 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14685 " removed" % self.op.group_name,
14686 errors.ECODE_STATE)
14688 def BuildHooksEnv(self):
14689 """Build hooks env.
14693 "GROUP_NAME": self.op.group_name,
14696 def BuildHooksNodes(self):
14697 """Build hooks nodes.
14700 mn = self.cfg.GetMasterNode()
14701 return ([mn], [mn])
14703 def Exec(self, feedback_fn):
14704 """Remove the node group.
14708 self.cfg.RemoveNodeGroup(self.group_uuid)
14709 except errors.ConfigurationError:
14710 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14711 (self.op.group_name, self.group_uuid))
14713 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14716 class LUGroupRename(LogicalUnit):
14717 HPATH = "group-rename"
14718 HTYPE = constants.HTYPE_GROUP
14721 def ExpandNames(self):
14722 # This raises errors.OpPrereqError on its own:
14723 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14725 self.needed_locks = {
14726 locking.LEVEL_NODEGROUP: [self.group_uuid],
14729 def CheckPrereq(self):
14730 """Check prerequisites.
14732 Ensures requested new name is not yet used.
14736 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14737 except errors.OpPrereqError:
14740 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14741 " node group (UUID: %s)" %
14742 (self.op.new_name, new_name_uuid),
14743 errors.ECODE_EXISTS)
14745 def BuildHooksEnv(self):
14746 """Build hooks env.
14750 "OLD_NAME": self.op.group_name,
14751 "NEW_NAME": self.op.new_name,
14754 def BuildHooksNodes(self):
14755 """Build hooks nodes.
14758 mn = self.cfg.GetMasterNode()
14760 all_nodes = self.cfg.GetAllNodesInfo()
14761 all_nodes.pop(mn, None)
14764 run_nodes.extend(node.name for node in all_nodes.values()
14765 if node.group == self.group_uuid)
14767 return (run_nodes, run_nodes)
14769 def Exec(self, feedback_fn):
14770 """Rename the node group.
14773 group = self.cfg.GetNodeGroup(self.group_uuid)
14776 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14777 (self.op.group_name, self.group_uuid))
14779 group.name = self.op.new_name
14780 self.cfg.Update(group, feedback_fn)
14782 return self.op.new_name
14785 class LUGroupEvacuate(LogicalUnit):
14786 HPATH = "group-evacuate"
14787 HTYPE = constants.HTYPE_GROUP
14790 def ExpandNames(self):
14791 # This raises errors.OpPrereqError on its own:
14792 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14794 if self.op.target_groups:
14795 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14796 self.op.target_groups)
14798 self.req_target_uuids = []
14800 if self.group_uuid in self.req_target_uuids:
14801 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14802 " as a target group (targets are %s)" %
14804 utils.CommaJoin(self.req_target_uuids)),
14805 errors.ECODE_INVAL)
14807 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14809 self.share_locks = _ShareAll()
14810 self.needed_locks = {
14811 locking.LEVEL_INSTANCE: [],
14812 locking.LEVEL_NODEGROUP: [],
14813 locking.LEVEL_NODE: [],
14816 def DeclareLocks(self, level):
14817 if level == locking.LEVEL_INSTANCE:
14818 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14820 # Lock instances optimistically, needs verification once node and group
14821 # locks have been acquired
14822 self.needed_locks[locking.LEVEL_INSTANCE] = \
14823 self.cfg.GetNodeGroupInstances(self.group_uuid)
14825 elif level == locking.LEVEL_NODEGROUP:
14826 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14828 if self.req_target_uuids:
14829 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14831 # Lock all groups used by instances optimistically; this requires going
14832 # via the node before it's locked, requiring verification later on
14833 lock_groups.update(group_uuid
14834 for instance_name in
14835 self.owned_locks(locking.LEVEL_INSTANCE)
14837 self.cfg.GetInstanceNodeGroups(instance_name))
14839 # No target groups, need to lock all of them
14840 lock_groups = locking.ALL_SET
14842 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14844 elif level == locking.LEVEL_NODE:
14845 # This will only lock the nodes in the group to be evacuated which
14846 # contain actual instances
14847 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14848 self._LockInstancesNodes()
14850 # Lock all nodes in group to be evacuated and target groups
14851 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14852 assert self.group_uuid in owned_groups
14853 member_nodes = [node_name
14854 for group in owned_groups
14855 for node_name in self.cfg.GetNodeGroup(group).members]
14856 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14858 def CheckPrereq(self):
14859 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14860 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14861 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14863 assert owned_groups.issuperset(self.req_target_uuids)
14864 assert self.group_uuid in owned_groups
14866 # Check if locked instances are still correct
14867 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14869 # Get instance information
14870 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14872 # Check if node groups for locked instances are still correct
14873 _CheckInstancesNodeGroups(self.cfg, self.instances,
14874 owned_groups, owned_nodes, self.group_uuid)
14876 if self.req_target_uuids:
14877 # User requested specific target groups
14878 self.target_uuids = self.req_target_uuids
14880 # All groups except the one to be evacuated are potential targets
14881 self.target_uuids = [group_uuid for group_uuid in owned_groups
14882 if group_uuid != self.group_uuid]
14884 if not self.target_uuids:
14885 raise errors.OpPrereqError("There are no possible target groups",
14886 errors.ECODE_INVAL)
14888 def BuildHooksEnv(self):
14889 """Build hooks env.
14893 "GROUP_NAME": self.op.group_name,
14894 "TARGET_GROUPS": " ".join(self.target_uuids),
14897 def BuildHooksNodes(self):
14898 """Build hooks nodes.
14901 mn = self.cfg.GetMasterNode()
14903 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14905 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14907 return (run_nodes, run_nodes)
14909 def Exec(self, feedback_fn):
14910 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14912 assert self.group_uuid not in self.target_uuids
14914 req = iallocator.IAReqGroupChange(instances=instances,
14915 target_groups=self.target_uuids)
14916 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14918 ial.Run(self.op.iallocator)
14920 if not ial.success:
14921 raise errors.OpPrereqError("Can't compute group evacuation using"
14922 " iallocator '%s': %s" %
14923 (self.op.iallocator, ial.info),
14924 errors.ECODE_NORES)
14926 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14928 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14929 len(jobs), self.op.group_name)
14931 return ResultWithJobs(jobs)
14934 class TagsLU(NoHooksLU): # pylint: disable=W0223
14935 """Generic tags LU.
14937 This is an abstract class which is the parent of all the other tags LUs.
14940 def ExpandNames(self):
14941 self.group_uuid = None
14942 self.needed_locks = {}
14944 if self.op.kind == constants.TAG_NODE:
14945 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14946 lock_level = locking.LEVEL_NODE
14947 lock_name = self.op.name
14948 elif self.op.kind == constants.TAG_INSTANCE:
14949 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14950 lock_level = locking.LEVEL_INSTANCE
14951 lock_name = self.op.name
14952 elif self.op.kind == constants.TAG_NODEGROUP:
14953 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14954 lock_level = locking.LEVEL_NODEGROUP
14955 lock_name = self.group_uuid
14960 if lock_level and getattr(self.op, "use_locking", True):
14961 self.needed_locks[lock_level] = lock_name
14963 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14964 # not possible to acquire the BGL based on opcode parameters)
14966 def CheckPrereq(self):
14967 """Check prerequisites.
14970 if self.op.kind == constants.TAG_CLUSTER:
14971 self.target = self.cfg.GetClusterInfo()
14972 elif self.op.kind == constants.TAG_NODE:
14973 self.target = self.cfg.GetNodeInfo(self.op.name)
14974 elif self.op.kind == constants.TAG_INSTANCE:
14975 self.target = self.cfg.GetInstanceInfo(self.op.name)
14976 elif self.op.kind == constants.TAG_NODEGROUP:
14977 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14979 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14980 str(self.op.kind), errors.ECODE_INVAL)
14983 class LUTagsGet(TagsLU):
14984 """Returns the tags of a given object.
14989 def ExpandNames(self):
14990 TagsLU.ExpandNames(self)
14992 # Share locks as this is only a read operation
14993 self.share_locks = _ShareAll()
14995 def Exec(self, feedback_fn):
14996 """Returns the tag list.
14999 return list(self.target.GetTags())
15002 class LUTagsSearch(NoHooksLU):
15003 """Searches the tags for a given pattern.
15008 def ExpandNames(self):
15009 self.needed_locks = {}
15011 def CheckPrereq(self):
15012 """Check prerequisites.
15014 This checks the pattern passed for validity by compiling it.
15018 self.re = re.compile(self.op.pattern)
15019 except re.error, err:
15020 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15021 (self.op.pattern, err), errors.ECODE_INVAL)
15023 def Exec(self, feedback_fn):
15024 """Returns the tag list.
15028 tgts = [("/cluster", cfg.GetClusterInfo())]
15029 ilist = cfg.GetAllInstancesInfo().values()
15030 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15031 nlist = cfg.GetAllNodesInfo().values()
15032 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15033 tgts.extend(("/nodegroup/%s" % n.name, n)
15034 for n in cfg.GetAllNodeGroupsInfo().values())
15036 for path, target in tgts:
15037 for tag in target.GetTags():
15038 if self.re.search(tag):
15039 results.append((path, tag))
15043 class LUTagsSet(TagsLU):
15044 """Sets a tag on a given object.
15049 def CheckPrereq(self):
15050 """Check prerequisites.
15052 This checks the type and length of the tag name and value.
15055 TagsLU.CheckPrereq(self)
15056 for tag in self.op.tags:
15057 objects.TaggableObject.ValidateTag(tag)
15059 def Exec(self, feedback_fn):
15064 for tag in self.op.tags:
15065 self.target.AddTag(tag)
15066 except errors.TagError, err:
15067 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15068 self.cfg.Update(self.target, feedback_fn)
15071 class LUTagsDel(TagsLU):
15072 """Delete a list of tags from a given object.
15077 def CheckPrereq(self):
15078 """Check prerequisites.
15080 This checks that we have the given tag.
15083 TagsLU.CheckPrereq(self)
15084 for tag in self.op.tags:
15085 objects.TaggableObject.ValidateTag(tag)
15086 del_tags = frozenset(self.op.tags)
15087 cur_tags = self.target.GetTags()
15089 diff_tags = del_tags - cur_tags
15091 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15092 raise errors.OpPrereqError("Tag(s) %s not found" %
15093 (utils.CommaJoin(diff_names), ),
15094 errors.ECODE_NOENT)
15096 def Exec(self, feedback_fn):
15097 """Remove the tag from the object.
15100 for tag in self.op.tags:
15101 self.target.RemoveTag(tag)
15102 self.cfg.Update(self.target, feedback_fn)
15105 class LUTestDelay(NoHooksLU):
15106 """Sleep for a specified amount of time.
15108 This LU sleeps on the master and/or nodes for a specified amount of
15114 def ExpandNames(self):
15115 """Expand names and set required locks.
15117 This expands the node list, if any.
15120 self.needed_locks = {}
15121 if self.op.on_nodes:
15122 # _GetWantedNodes can be used here, but is not always appropriate to use
15123 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15124 # more information.
15125 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15126 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15128 def _TestDelay(self):
15129 """Do the actual sleep.
15132 if self.op.on_master:
15133 if not utils.TestDelay(self.op.duration):
15134 raise errors.OpExecError("Error during master delay test")
15135 if self.op.on_nodes:
15136 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15137 for node, node_result in result.items():
15138 node_result.Raise("Failure during rpc call to node %s" % node)
15140 def Exec(self, feedback_fn):
15141 """Execute the test delay opcode, with the wanted repetitions.
15144 if self.op.repeat == 0:
15147 top_value = self.op.repeat - 1
15148 for i in range(self.op.repeat):
15149 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15153 class LURestrictedCommand(NoHooksLU):
15154 """Logical unit for executing restricted commands.
15159 def ExpandNames(self):
15161 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15163 self.needed_locks = {
15164 locking.LEVEL_NODE: self.op.nodes,
15166 self.share_locks = {
15167 locking.LEVEL_NODE: not self.op.use_locking,
15170 def CheckPrereq(self):
15171 """Check prerequisites.
15175 def Exec(self, feedback_fn):
15176 """Execute restricted command and return output.
15179 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15181 # Check if correct locks are held
15182 assert set(self.op.nodes).issubset(owned_nodes)
15184 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15188 for node_name in self.op.nodes:
15189 nres = rpcres[node_name]
15191 msg = ("Command '%s' on node '%s' failed: %s" %
15192 (self.op.command, node_name, nres.fail_msg))
15193 result.append((False, msg))
15195 result.append((True, nres.payload))
15200 class LUTestJqueue(NoHooksLU):
15201 """Utility LU to test some aspects of the job queue.
15206 # Must be lower than default timeout for WaitForJobChange to see whether it
15207 # notices changed jobs
15208 _CLIENT_CONNECT_TIMEOUT = 20.0
15209 _CLIENT_CONFIRM_TIMEOUT = 60.0
15212 def _NotifyUsingSocket(cls, cb, errcls):
15213 """Opens a Unix socket and waits for another program to connect.
15216 @param cb: Callback to send socket name to client
15217 @type errcls: class
15218 @param errcls: Exception class to use for errors
15221 # Using a temporary directory as there's no easy way to create temporary
15222 # sockets without writing a custom loop around tempfile.mktemp and
15224 tmpdir = tempfile.mkdtemp()
15226 tmpsock = utils.PathJoin(tmpdir, "sock")
15228 logging.debug("Creating temporary socket at %s", tmpsock)
15229 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15234 # Send details to client
15237 # Wait for client to connect before continuing
15238 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15240 (conn, _) = sock.accept()
15241 except socket.error, err:
15242 raise errcls("Client didn't connect in time (%s)" % err)
15246 # Remove as soon as client is connected
15247 shutil.rmtree(tmpdir)
15249 # Wait for client to close
15252 # pylint: disable=E1101
15253 # Instance of '_socketobject' has no ... member
15254 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15256 except socket.error, err:
15257 raise errcls("Client failed to confirm notification (%s)" % err)
15261 def _SendNotification(self, test, arg, sockname):
15262 """Sends a notification to the client.
15265 @param test: Test name
15266 @param arg: Test argument (depends on test)
15267 @type sockname: string
15268 @param sockname: Socket path
15271 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15273 def _Notify(self, prereq, test, arg):
15274 """Notifies the client of a test.
15277 @param prereq: Whether this is a prereq-phase test
15279 @param test: Test name
15280 @param arg: Test argument (depends on test)
15284 errcls = errors.OpPrereqError
15286 errcls = errors.OpExecError
15288 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15292 def CheckArguments(self):
15293 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15294 self.expandnames_calls = 0
15296 def ExpandNames(self):
15297 checkargs_calls = getattr(self, "checkargs_calls", 0)
15298 if checkargs_calls < 1:
15299 raise errors.ProgrammerError("CheckArguments was not called")
15301 self.expandnames_calls += 1
15303 if self.op.notify_waitlock:
15304 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15306 self.LogInfo("Expanding names")
15308 # Get lock on master node (just to get a lock, not for a particular reason)
15309 self.needed_locks = {
15310 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15313 def Exec(self, feedback_fn):
15314 if self.expandnames_calls < 1:
15315 raise errors.ProgrammerError("ExpandNames was not called")
15317 if self.op.notify_exec:
15318 self._Notify(False, constants.JQT_EXEC, None)
15320 self.LogInfo("Executing")
15322 if self.op.log_messages:
15323 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15324 for idx, msg in enumerate(self.op.log_messages):
15325 self.LogInfo("Sending log message %s", idx + 1)
15326 feedback_fn(constants.JQT_MSGPREFIX + msg)
15327 # Report how many test messages have been sent
15328 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15331 raise errors.OpExecError("Opcode failure was requested")
15336 class LUTestAllocator(NoHooksLU):
15337 """Run allocator tests.
15339 This LU runs the allocator tests
15342 def CheckPrereq(self):
15343 """Check prerequisites.
15345 This checks the opcode parameters depending on the director and mode test.
15348 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15349 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15350 for attr in ["memory", "disks", "disk_template",
15351 "os", "tags", "nics", "vcpus"]:
15352 if not hasattr(self.op, attr):
15353 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15354 attr, errors.ECODE_INVAL)
15355 iname = self.cfg.ExpandInstanceName(self.op.name)
15356 if iname is not None:
15357 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15358 iname, errors.ECODE_EXISTS)
15359 if not isinstance(self.op.nics, list):
15360 raise errors.OpPrereqError("Invalid parameter 'nics'",
15361 errors.ECODE_INVAL)
15362 if not isinstance(self.op.disks, list):
15363 raise errors.OpPrereqError("Invalid parameter 'disks'",
15364 errors.ECODE_INVAL)
15365 for row in self.op.disks:
15366 if (not isinstance(row, dict) or
15367 constants.IDISK_SIZE not in row or
15368 not isinstance(row[constants.IDISK_SIZE], int) or
15369 constants.IDISK_MODE not in row or
15370 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15371 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15372 " parameter", errors.ECODE_INVAL)
15373 if self.op.hypervisor is None:
15374 self.op.hypervisor = self.cfg.GetHypervisorType()
15375 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15376 fname = _ExpandInstanceName(self.cfg, self.op.name)
15377 self.op.name = fname
15378 self.relocate_from = \
15379 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15380 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15381 constants.IALLOCATOR_MODE_NODE_EVAC):
15382 if not self.op.instances:
15383 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15384 self.op.instances = _GetWantedInstances(self, self.op.instances)
15386 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15387 self.op.mode, errors.ECODE_INVAL)
15389 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15390 if self.op.allocator is None:
15391 raise errors.OpPrereqError("Missing allocator name",
15392 errors.ECODE_INVAL)
15393 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15394 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15395 self.op.direction, errors.ECODE_INVAL)
15397 def Exec(self, feedback_fn):
15398 """Run the allocator test.
15401 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15402 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15403 memory=self.op.memory,
15404 disks=self.op.disks,
15405 disk_template=self.op.disk_template,
15409 vcpus=self.op.vcpus,
15410 spindle_use=self.op.spindle_use,
15411 hypervisor=self.op.hypervisor)
15412 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15413 req = iallocator.IAReqRelocate(name=self.op.name,
15414 relocate_from=list(self.relocate_from))
15415 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15416 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15417 target_groups=self.op.target_groups)
15418 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15419 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15420 evac_mode=self.op.evac_mode)
15421 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15422 disk_template = self.op.disk_template
15423 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15424 memory=self.op.memory,
15425 disks=self.op.disks,
15426 disk_template=disk_template,
15430 vcpus=self.op.vcpus,
15431 spindle_use=self.op.spindle_use,
15432 hypervisor=self.op.hypervisor)
15433 for idx in range(self.op.count)]
15434 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15436 raise errors.ProgrammerError("Uncatched mode %s in"
15437 " LUTestAllocator.Exec", self.op.mode)
15439 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15440 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15441 result = ial.in_text
15443 ial.Run(self.op.allocator, validate=False)
15444 result = ial.out_text
15448 class LUNetworkAdd(LogicalUnit):
15449 """Logical unit for creating networks.
15452 HPATH = "network-add"
15453 HTYPE = constants.HTYPE_NETWORK
15456 def BuildHooksNodes(self):
15457 """Build hooks nodes.
15460 mn = self.cfg.GetMasterNode()
15461 return ([mn], [mn])
15463 def ExpandNames(self):
15464 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15465 self.needed_locks = {}
15466 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15468 def CheckPrereq(self):
15469 """Check prerequisites.
15471 This checks that the given group name is not an existing node group
15475 if self.op.network is None:
15476 raise errors.OpPrereqError("Network must be given",
15477 errors.ECODE_INVAL)
15479 uuid = self.cfg.LookupNetwork(self.op.network_name)
15482 raise errors.OpPrereqError("Network '%s' already defined" %
15483 self.op.network, errors.ECODE_EXISTS)
15486 def BuildHooksEnv(self):
15487 """Build hooks env.
15491 "NETWORK_NAME": self.op.network_name,
15492 "NETWORK_SUBNET": self.op.network,
15493 "NETWORK_GATEWAY": self.op.gateway,
15494 "NETWORK_SUBNET6": self.op.network6,
15495 "NETWORK_GATEWAY6": self.op.gateway6,
15496 "NETWORK_MAC_PREFIX": self.op.mac_prefix,
15497 "NETWORK_TYPE": self.op.network_type,
15501 def Exec(self, feedback_fn):
15502 """Add the ip pool to the cluster.
15505 nobj = objects.Network(name=self.op.network_name,
15506 network=self.op.network,
15507 gateway=self.op.gateway,
15508 network6=self.op.network6,
15509 gateway6=self.op.gateway6,
15510 mac_prefix=self.op.mac_prefix,
15511 network_type=self.op.network_type,
15512 uuid=self.network_uuid,
15514 # Initialize the associated address pool
15516 pool = network.AddressPool.InitializeNetwork(nobj)
15517 except errors.AddressPoolError, e:
15518 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15520 # Check if we need to reserve the nodes and the cluster master IP
15521 # These may not be allocated to any instances in routed mode, as
15522 # they wouldn't function anyway.
15523 for node in self.cfg.GetAllNodesInfo().values():
15524 for ip in [node.primary_ip, node.secondary_ip]:
15527 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15529 except errors.AddressPoolError:
15532 master_ip = self.cfg.GetClusterInfo().master_ip
15534 pool.Reserve(master_ip)
15535 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15536 except errors.AddressPoolError:
15539 if self.op.add_reserved_ips:
15540 for ip in self.op.add_reserved_ips:
15542 pool.Reserve(ip, external=True)
15543 except errors.AddressPoolError, e:
15544 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15546 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15547 del self.remove_locks[locking.LEVEL_NETWORK]
15550 class LUNetworkRemove(LogicalUnit):
15551 HPATH = "network-remove"
15552 HTYPE = constants.HTYPE_NETWORK
15555 def ExpandNames(self):
15556 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15558 self.needed_locks = {
15559 locking.LEVEL_NETWORK: [self.network_uuid],
15563 def CheckPrereq(self):
15564 """Check prerequisites.
15566 This checks that the given network name exists as a network, that is
15567 empty (i.e., contains no nodes), and that is not the last group of the
15571 if not self.network_uuid:
15572 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15573 errors.ECODE_INVAL)
15575 # Verify that the network is not conncted.
15576 node_groups = [group.name
15577 for group in self.cfg.GetAllNodeGroupsInfo().values()
15578 for network in group.networks.keys()
15579 if network == self.network_uuid]
15582 self.LogWarning("Nework '%s' is connected to the following"
15583 " node groups: %s" % (self.op.network_name,
15584 utils.CommaJoin(utils.NiceSort(node_groups))))
15585 raise errors.OpPrereqError("Network still connected",
15586 errors.ECODE_STATE)
15588 def BuildHooksEnv(self):
15589 """Build hooks env.
15593 "NETWORK_NAME": self.op.network_name,
15596 def BuildHooksNodes(self):
15597 """Build hooks nodes.
15600 mn = self.cfg.GetMasterNode()
15601 return ([mn], [mn])
15603 def Exec(self, feedback_fn):
15604 """Remove the network.
15608 self.cfg.RemoveNetwork(self.network_uuid)
15609 except errors.ConfigurationError:
15610 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15611 (self.op.network_name, self.network_uuid))
15614 class LUNetworkSetParams(LogicalUnit):
15615 """Modifies the parameters of a network.
15618 HPATH = "network-modify"
15619 HTYPE = constants.HTYPE_NETWORK
15622 def CheckArguments(self):
15623 if (self.op.gateway and
15624 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15625 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15626 " at once", errors.ECODE_INVAL)
15629 def ExpandNames(self):
15630 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15631 self.network = self.cfg.GetNetwork(self.network_uuid)
15632 self.needed_locks = {
15633 locking.LEVEL_NETWORK: [self.network_uuid],
15637 if self.network is None:
15638 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15639 (self.op.network_name, self.network_uuid),
15640 errors.ECODE_INVAL)
15642 def CheckPrereq(self):
15643 """Check prerequisites.
15646 self.gateway = self.network.gateway
15647 self.network_type = self.network.network_type
15648 self.mac_prefix = self.network.mac_prefix
15649 self.network6 = self.network.network6
15650 self.gateway6 = self.network.gateway6
15652 self.pool = network.AddressPool(self.network)
15654 if self.op.gateway:
15655 if self.op.gateway == constants.VALUE_NONE:
15656 self.gateway = None
15658 self.gateway = self.op.gateway
15659 if self.pool.IsReserved(self.gateway):
15660 raise errors.OpPrereqError("%s is already reserved" %
15661 self.gateway, errors.ECODE_INVAL)
15663 if self.op.network_type:
15664 if self.op.network_type == constants.VALUE_NONE:
15665 self.network_type = None
15667 self.network_type = self.op.network_type
15669 if self.op.mac_prefix:
15670 if self.op.mac_prefix == constants.VALUE_NONE:
15671 self.mac_prefix = None
15673 self.mac_prefix = self.op.mac_prefix
15675 if self.op.gateway6:
15676 if self.op.gateway6 == constants.VALUE_NONE:
15677 self.gateway6 = None
15679 self.gateway6 = self.op.gateway6
15681 if self.op.network6:
15682 if self.op.network6 == constants.VALUE_NONE:
15683 self.network6 = None
15685 self.network6 = self.op.network6
15689 def BuildHooksEnv(self):
15690 """Build hooks env.
15694 "NETWORK_NAME": self.op.network_name,
15695 "NETWORK_SUBNET": self.network.network,
15696 "NETWORK_GATEWAY": self.gateway,
15697 "NETWORK_SUBNET6": self.network6,
15698 "NETWORK_GATEWAY6": self.gateway6,
15699 "NETWORK_MAC_PREFIX": self.mac_prefix,
15700 "NETWORK_TYPE": self.network_type,
15704 def BuildHooksNodes(self):
15705 """Build hooks nodes.
15708 mn = self.cfg.GetMasterNode()
15709 return ([mn], [mn])
15711 def Exec(self, feedback_fn):
15712 """Modifies the network.
15715 #TODO: reserve/release via temporary reservation manager
15716 # extend cfg.ReserveIp/ReleaseIp with the external flag
15717 if self.op.gateway:
15718 if self.gateway == self.network.gateway:
15719 self.LogWarning("Gateway is already %s" % self.gateway)
15722 self.pool.Reserve(self.gateway, external=True)
15723 if self.network.gateway:
15724 self.pool.Release(self.network.gateway, external=True)
15725 self.network.gateway = self.gateway
15727 if self.op.add_reserved_ips:
15728 for ip in self.op.add_reserved_ips:
15730 if self.pool.IsReserved(ip):
15731 self.LogWarning("IP %s is already reserved" % ip)
15733 self.pool.Reserve(ip, external=True)
15734 except errors.AddressPoolError, e:
15735 self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
15737 if self.op.remove_reserved_ips:
15738 for ip in self.op.remove_reserved_ips:
15739 if ip == self.network.gateway:
15740 self.LogWarning("Cannot unreserve Gateway's IP")
15743 if not self.pool.IsReserved(ip):
15744 self.LogWarning("IP %s is already unreserved" % ip)
15746 self.pool.Release(ip, external=True)
15747 except errors.AddressPoolError, e:
15748 self.LogWarning("Cannot release ip %s. %s" % (ip, e))
15750 if self.op.mac_prefix:
15751 self.network.mac_prefix = self.mac_prefix
15753 if self.op.network6:
15754 self.network.network6 = self.network6
15756 if self.op.gateway6:
15757 self.network.gateway6 = self.gateway6
15759 if self.op.network_type:
15760 self.network.network_type = self.network_type
15762 self.pool.Validate()
15764 self.cfg.Update(self.network, feedback_fn)
15767 class _NetworkQuery(_QueryBase):
15768 FIELDS = query.NETWORK_FIELDS
15770 def ExpandNames(self, lu):
15771 lu.needed_locks = {}
15773 self._all_networks = lu.cfg.GetAllNetworksInfo()
15774 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15777 self.wanted = [name_to_uuid[name]
15778 for name in utils.NiceSort(name_to_uuid.keys())]
15780 # Accept names to be either names or UUIDs.
15783 all_uuid = frozenset(self._all_networks.keys())
15785 for name in self.names:
15786 if name in all_uuid:
15787 self.wanted.append(name)
15788 elif name in name_to_uuid:
15789 self.wanted.append(name_to_uuid[name])
15791 missing.append(name)
15794 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15795 errors.ECODE_NOENT)
15797 def DeclareLocks(self, lu, level):
15800 def _GetQueryData(self, lu):
15801 """Computes the list of networks and their attributes.
15804 do_instances = query.NETQ_INST in self.requested_data
15805 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15806 do_stats = query.NETQ_STATS in self.requested_data
15807 cluster = lu.cfg.GetClusterInfo()
15809 network_to_groups = None
15810 network_to_instances = None
15813 # For NETQ_GROUP, we need to map network->[groups]
15815 all_groups = lu.cfg.GetAllNodeGroupsInfo()
15816 network_to_groups = dict((uuid, []) for uuid in self.wanted)
15817 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
15820 all_instances = lu.cfg.GetAllInstancesInfo()
15821 all_nodes = lu.cfg.GetAllNodesInfo()
15822 network_to_instances = dict((uuid, []) for uuid in self.wanted)
15825 for group in all_groups.values():
15827 group_nodes = [node.name for node in all_nodes.values() if
15828 node.group == group.uuid]
15829 group_instances = [instance for instance in all_instances.values()
15830 if instance.primary_node in group_nodes]
15832 for net_uuid in group.networks.keys():
15833 if net_uuid in network_to_groups:
15834 netparams = group.networks[net_uuid]
15835 mode = netparams[constants.NIC_MODE]
15836 link = netparams[constants.NIC_LINK]
15837 info = group.name + '(' + mode + ', ' + link + ')'
15838 network_to_groups[net_uuid].append(info)
15841 for instance in group_instances:
15842 for nic in instance.nics:
15843 if nic.network == self._all_networks[net_uuid].name:
15844 network_to_instances[net_uuid].append(instance.name)
15849 for uuid, net in self._all_networks.items():
15850 if uuid in self.wanted:
15851 pool = network.AddressPool(net)
15853 "free_count": pool.GetFreeCount(),
15854 "reserved_count": pool.GetReservedCount(),
15855 "map": pool.GetMap(),
15856 "external_reservations": ", ".join(pool.GetExternalReservations()),
15859 return query.NetworkQueryData([self._all_networks[uuid]
15860 for uuid in self.wanted],
15862 network_to_instances,
15866 class LUNetworkQuery(NoHooksLU):
15867 """Logical unit for querying networks.
15872 def CheckArguments(self):
15873 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
15874 self.op.output_fields, False)
15876 def ExpandNames(self):
15877 self.nq.ExpandNames(self)
15879 def Exec(self, feedback_fn):
15880 return self.nq.OldStyleQuery(self)
15884 class LUNetworkConnect(LogicalUnit):
15885 """Connect a network to a nodegroup
15888 HPATH = "network-connect"
15889 HTYPE = constants.HTYPE_NETWORK
15892 def ExpandNames(self):
15893 self.network_name = self.op.network_name
15894 self.group_name = self.op.group_name
15895 self.network_mode = self.op.network_mode
15896 self.network_link = self.op.network_link
15898 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
15899 self.network = self.cfg.GetNetwork(self.network_uuid)
15900 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
15901 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15903 self.needed_locks = {
15904 locking.LEVEL_INSTANCE: [],
15905 locking.LEVEL_NODEGROUP: [self.group_uuid],
15907 self.share_locks[locking.LEVEL_INSTANCE] = 1
15909 def DeclareLocks(self, level):
15910 if level == locking.LEVEL_INSTANCE:
15911 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15913 # Lock instances optimistically, needs verification once group lock has
15915 self.needed_locks[locking.LEVEL_INSTANCE] = \
15916 self.cfg.GetNodeGroupInstances(self.group_uuid)
15918 def BuildHooksEnv(self):
15920 ret["GROUP_NAME"] = self.group_name
15921 ret["GROUP_NETWORK_NAME"] = self.network_name
15922 ret["GROUP_NETWORK_MODE"] = self.network_mode
15923 ret["GROUP_NETWORK_LINK"] = self.network_link
15926 def BuildHooksNodes(self):
15927 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
15928 return (nodes, nodes)
15931 def CheckPrereq(self):
15932 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
15935 if self.network is None:
15936 raise errors.OpPrereqError("Network %s does not exist" %
15937 self.network_name, errors.ECODE_INVAL)
15939 self.netparams = dict()
15940 self.netparams[constants.NIC_MODE] = self.network_mode
15941 self.netparams[constants.NIC_LINK] = self.network_link
15942 objects.NIC.CheckParameterSyntax(self.netparams)
15944 #if self.network_mode == constants.NIC_MODE_BRIDGED:
15945 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
15946 self.connected = False
15947 if self.network_uuid in self.group.networks:
15948 self.LogWarning("Network '%s' is already mapped to group '%s'" %
15949 (self.network_name, self.group.name))
15950 self.connected = True
15953 pool = network.AddressPool(self.network)
15954 if self.op.conflicts_check:
15955 groupinstances = []
15956 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
15957 groupinstances.append(self.cfg.GetInstanceInfo(n))
15958 instances = [(instance.name, idx, nic.ip)
15959 for instance in groupinstances
15960 for idx, nic in enumerate(instance.nics)
15961 if (not nic.network and pool._Contains(nic.ip))]
15963 self.LogWarning("Following occurences use IPs from network %s"
15964 " that is about to connect to nodegroup %s: %s" %
15965 (self.network_name, self.group.name,
15967 raise errors.OpPrereqError("Conflicting IPs found."
15968 " Please remove/modify"
15969 " corresponding NICs",
15970 errors.ECODE_INVAL)
15972 def Exec(self, feedback_fn):
15976 self.group.networks[self.network_uuid] = self.netparams
15977 self.cfg.Update(self.group, feedback_fn)
15980 class LUNetworkDisconnect(LogicalUnit):
15981 """Disconnect a network to a nodegroup
15984 HPATH = "network-disconnect"
15985 HTYPE = constants.HTYPE_NETWORK
15988 def ExpandNames(self):
15989 self.network_name = self.op.network_name
15990 self.group_name = self.op.group_name
15992 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
15993 self.network = self.cfg.GetNetwork(self.network_uuid)
15994 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
15995 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15997 self.needed_locks = {
15998 locking.LEVEL_INSTANCE: [],
15999 locking.LEVEL_NODEGROUP: [self.group_uuid],
16001 self.share_locks[locking.LEVEL_INSTANCE] = 1
16003 def DeclareLocks(self, level):
16004 if level == locking.LEVEL_INSTANCE:
16005 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16007 # Lock instances optimistically, needs verification once group lock has
16009 self.needed_locks[locking.LEVEL_INSTANCE] = \
16010 self.cfg.GetNodeGroupInstances(self.group_uuid)
16012 def BuildHooksEnv(self):
16014 ret["GROUP_NAME"] = self.group_name
16015 ret["GROUP_NETWORK_NAME"] = self.network_name
16018 def BuildHooksNodes(self):
16019 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16020 return (nodes, nodes)
16023 def CheckPrereq(self):
16024 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16027 self.connected = True
16028 if self.network_uuid not in self.group.networks:
16029 self.LogWarning("Network '%s' is"
16030 " not mapped to group '%s'" %
16031 (self.network_name, self.group.name))
16032 self.connected = False
16035 if self.op.conflicts_check:
16036 groupinstances = []
16037 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16038 groupinstances.append(self.cfg.GetInstanceInfo(n))
16039 instances = [(instance.name, idx, nic.ip)
16040 for instance in groupinstances
16041 for idx, nic in enumerate(instance.nics)
16042 if nic.network == self.network_name]
16044 self.LogWarning("Following occurences use IPs from network %s"
16045 " that is about to disconnected from the nodegroup"
16047 (self.network_name, self.group.name,
16049 raise errors.OpPrereqError("Conflicting IPs."
16050 " Please remove/modify"
16051 " corresponding NICS",
16052 errors.ECODE_INVAL)
16054 def Exec(self, feedback_fn):
16055 if not self.connected:
16058 del self.group.networks[self.network_uuid]
16059 self.cfg.Update(self.group, feedback_fn)
16062 #: Query type implementations
16064 constants.QR_CLUSTER: _ClusterQuery,
16065 constants.QR_INSTANCE: _InstanceQuery,
16066 constants.QR_NODE: _NodeQuery,
16067 constants.QR_GROUP: _GroupQuery,
16068 constants.QR_NETWORK: _NetworkQuery,
16069 constants.QR_OS: _OsQuery,
16070 constants.QR_EXPORT: _ExportQuery,
16073 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16076 def _GetQueryImplementation(name):
16077 """Returns the implemtnation for a query type.
16079 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16083 return _QUERY_IMPL[name]
16085 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16086 errors.ECODE_INVAL)
16088 def _CheckForConflictingIp(lu, ip, node):
16089 """In case of conflicting ip raise error.
16092 @param ip: ip address
16094 @param node: node name
16097 (conf_net, conf_netparams) = lu.cfg.CheckIPInNodeGroup(ip, node)
16098 if conf_net is not None:
16099 raise errors.OpPrereqError("Conflicting IP found:"
16100 " %s <> %s." % (ip, conf_net),
16101 errors.ECODE_INVAL)
16103 return (None, None)