4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti.masterd import iallocator
64 import ganeti.masterd.instance # pylint: disable=W0611
68 INSTANCE_DOWN = [constants.ADMINST_DOWN]
69 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
70 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
72 #: Instance status in which an instance can be marked as offline/online
73 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
74 constants.ADMINST_OFFLINE,
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that
197 level (note that in this case C{DeclareLocks} won't be called
198 at all for that level)
199 - if you need locks at a level, but you can't calculate it in
200 this function, initialise that level with an empty list and do
201 further processing in L{LogicalUnit.DeclareLocks} (see that
202 function's docstring)
203 - don't put anything for the BGL level
204 - if you want all locks at a level use L{locking.ALL_SET} as a value
206 If you need to share locks (rather than acquire them exclusively) at one
207 level you can modify self.share_locks, setting a true value (usually 1) for
208 that level. By default locks are not shared.
210 This function can also define a list of tasklets, which then will be
211 executed in order instead of the usual LU-level CheckPrereq and Exec
212 functions, if those are not defined by the LU.
216 # Acquire all nodes and one instance
217 self.needed_locks = {
218 locking.LEVEL_NODE: locking.ALL_SET,
219 locking.LEVEL_INSTANCE: ['instance1.example.com'],
221 # Acquire just two nodes
222 self.needed_locks = {
223 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
226 self.needed_locks = {} # No, you can't leave it to the default value None
229 # The implementation of this method is mandatory only if the new LU is
230 # concurrent, so that old LUs don't need to be changed all at the same
233 self.needed_locks = {} # Exclusive LUs don't need locks.
235 raise NotImplementedError
237 def DeclareLocks(self, level):
238 """Declare LU locking needs for a level
240 While most LUs can just declare their locking needs at ExpandNames time,
241 sometimes there's the need to calculate some locks after having acquired
242 the ones before. This function is called just before acquiring locks at a
243 particular level, but after acquiring the ones at lower levels, and permits
244 such calculations. It can be used to modify self.needed_locks, and by
245 default it does nothing.
247 This function is only called if you have something already set in
248 self.needed_locks for the level.
250 @param level: Locking level which is going to be locked
251 @type level: member of L{ganeti.locking.LEVELS}
255 def CheckPrereq(self):
256 """Check prerequisites for this LU.
258 This method should check that the prerequisites for the execution
259 of this LU are fulfilled. It can do internode communication, but
260 it should be idempotent - no cluster or system changes are
263 The method should raise errors.OpPrereqError in case something is
264 not fulfilled. Its return value is ignored.
266 This method should also update all the parameters of the opcode to
267 their canonical form if it hasn't been done by ExpandNames before.
270 if self.tasklets is not None:
271 for (idx, tl) in enumerate(self.tasklets):
272 logging.debug("Checking prerequisites for tasklet %s/%s",
273 idx + 1, len(self.tasklets))
278 def Exec(self, feedback_fn):
281 This method should implement the actual work. It should raise
282 errors.OpExecError for failures that are somewhat dealt with in
286 if self.tasklets is not None:
287 for (idx, tl) in enumerate(self.tasklets):
288 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
291 raise NotImplementedError
293 def BuildHooksEnv(self):
294 """Build hooks environment for this LU.
297 @return: Dictionary containing the environment that will be used for
298 running the hooks for this LU. The keys of the dict must not be prefixed
299 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
300 will extend the environment with additional variables. If no environment
301 should be defined, an empty dictionary should be returned (not C{None}).
302 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
306 raise NotImplementedError
308 def BuildHooksNodes(self):
309 """Build list of nodes to run LU's hooks.
311 @rtype: tuple; (list, list)
312 @return: Tuple containing a list of node names on which the hook
313 should run before the execution and a list of node names on which the
314 hook should run after the execution. No nodes should be returned as an
315 empty list (and not None).
316 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
320 raise NotImplementedError
322 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
323 """Notify the LU about the results of its hooks.
325 This method is called every time a hooks phase is executed, and notifies
326 the Logical Unit about the hooks' result. The LU can then use it to alter
327 its result based on the hooks. By default the method does nothing and the
328 previous result is passed back unchanged but any LU can define it if it
329 wants to use the local cluster hook-scripts somehow.
331 @param phase: one of L{constants.HOOKS_PHASE_POST} or
332 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
333 @param hook_results: the results of the multi-node hooks rpc call
334 @param feedback_fn: function used send feedback back to the caller
335 @param lu_result: the previous Exec result this LU had, or None
337 @return: the new Exec result, based on the previous result
341 # API must be kept, thus we ignore the unused argument and could
342 # be a function warnings
343 # pylint: disable=W0613,R0201
346 def _ExpandAndLockInstance(self):
347 """Helper function to expand and lock an instance.
349 Many LUs that work on an instance take its name in self.op.instance_name
350 and need to expand it and then declare the expanded name for locking. This
351 function does it, and then updates self.op.instance_name to the expanded
352 name. It also initializes needed_locks as a dict, if this hasn't been done
356 if self.needed_locks is None:
357 self.needed_locks = {}
359 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
360 "_ExpandAndLockInstance called with instance-level locks set"
361 self.op.instance_name = _ExpandInstanceName(self.cfg,
362 self.op.instance_name)
363 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
365 def _LockInstancesNodes(self, primary_only=False,
366 level=locking.LEVEL_NODE):
367 """Helper function to declare instances' nodes for locking.
369 This function should be called after locking one or more instances to lock
370 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
371 with all primary or secondary nodes for instances already locked and
372 present in self.needed_locks[locking.LEVEL_INSTANCE].
374 It should be called from DeclareLocks, and for safety only works if
375 self.recalculate_locks[locking.LEVEL_NODE] is set.
377 In the future it may grow parameters to just lock some instance's nodes, or
378 to just lock primaries or secondary nodes, if needed.
380 If should be called in DeclareLocks in a way similar to::
382 if level == locking.LEVEL_NODE:
383 self._LockInstancesNodes()
385 @type primary_only: boolean
386 @param primary_only: only lock primary nodes of locked instances
387 @param level: Which lock level to use for locking nodes
390 assert level in self.recalculate_locks, \
391 "_LockInstancesNodes helper function called with no nodes to recalculate"
393 # TODO: check if we're really been called with the instance locks held
395 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
396 # future we might want to have different behaviors depending on the value
397 # of self.recalculate_locks[locking.LEVEL_NODE]
399 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
400 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
401 wanted_nodes.append(instance.primary_node)
403 wanted_nodes.extend(instance.secondary_nodes)
405 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
406 self.needed_locks[level] = wanted_nodes
407 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
408 self.needed_locks[level].extend(wanted_nodes)
410 raise errors.ProgrammerError("Unknown recalculation mode")
412 del self.recalculate_locks[level]
415 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
416 """Simple LU which runs no hooks.
418 This LU is intended as a parent for other LogicalUnits which will
419 run no hooks, in order to reduce duplicate code.
425 def BuildHooksEnv(self):
426 """Empty BuildHooksEnv for NoHooksLu.
428 This just raises an error.
431 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
433 def BuildHooksNodes(self):
434 """Empty BuildHooksNodes for NoHooksLU.
437 raise AssertionError("BuildHooksNodes called for NoHooksLU")
441 """Tasklet base class.
443 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
444 they can mix legacy code with tasklets. Locking needs to be done in the LU,
445 tasklets know nothing about locks.
447 Subclasses must follow these rules:
448 - Implement CheckPrereq
452 def __init__(self, lu):
459 def CheckPrereq(self):
460 """Check prerequisites for this tasklets.
462 This method should check whether the prerequisites for the execution of
463 this tasklet are fulfilled. It can do internode communication, but it
464 should be idempotent - no cluster or system changes are allowed.
466 The method should raise errors.OpPrereqError in case something is not
467 fulfilled. Its return value is ignored.
469 This method should also update all parameters to their canonical form if it
470 hasn't been done before.
475 def Exec(self, feedback_fn):
476 """Execute the tasklet.
478 This method should implement the actual work. It should raise
479 errors.OpExecError for failures that are somewhat dealt with in code, or
483 raise NotImplementedError
487 """Base for query utility classes.
490 #: Attribute holding field definitions
496 def __init__(self, qfilter, fields, use_locking):
497 """Initializes this class.
500 self.use_locking = use_locking
502 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
503 namefield=self.SORT_FIELD)
504 self.requested_data = self.query.RequestedData()
505 self.names = self.query.RequestedNames()
507 # Sort only if no names were requested
508 self.sort_by_name = not self.names
510 self.do_locking = None
513 def _GetNames(self, lu, all_names, lock_level):
514 """Helper function to determine names asked for in the query.
518 names = lu.owned_locks(lock_level)
522 if self.wanted == locking.ALL_SET:
523 assert not self.names
524 # caller didn't specify names, so ordering is not important
525 return utils.NiceSort(names)
527 # caller specified names and we must keep the same order
529 assert not self.do_locking or lu.glm.is_owned(lock_level)
531 missing = set(self.wanted).difference(names)
533 raise errors.OpExecError("Some items were removed before retrieving"
534 " their data: %s" % missing)
536 # Return expanded names
539 def ExpandNames(self, lu):
540 """Expand names for this query.
542 See L{LogicalUnit.ExpandNames}.
545 raise NotImplementedError()
547 def DeclareLocks(self, lu, level):
548 """Declare locks for this query.
550 See L{LogicalUnit.DeclareLocks}.
553 raise NotImplementedError()
555 def _GetQueryData(self, lu):
556 """Collects all data for this query.
558 @return: Query data object
561 raise NotImplementedError()
563 def NewStyleQuery(self, lu):
564 """Collect data and execute query.
567 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
568 sort_by_name=self.sort_by_name)
570 def OldStyleQuery(self, lu):
571 """Collect data and execute query.
574 return self.query.OldStyleQuery(self._GetQueryData(lu),
575 sort_by_name=self.sort_by_name)
579 """Returns a dict declaring all lock levels shared.
582 return dict.fromkeys(locking.LEVELS, 1)
585 def _AnnotateDiskParams(instance, devs, cfg):
586 """Little helper wrapper to the rpc annotation method.
588 @param instance: The instance object
589 @type devs: List of L{objects.Disk}
590 @param devs: The root devices (not any of its children!)
591 @param cfg: The config object
592 @returns The annotated disk copies
593 @see L{rpc.AnnotateDiskParams}
596 return rpc.AnnotateDiskParams(instance.disk_template, devs,
597 cfg.GetInstanceDiskParams(instance))
600 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
602 """Checks if node groups for locked instances are still correct.
604 @type cfg: L{config.ConfigWriter}
605 @param cfg: Cluster configuration
606 @type instances: dict; string as key, L{objects.Instance} as value
607 @param instances: Dictionary, instance name as key, instance object as value
608 @type owned_groups: iterable of string
609 @param owned_groups: List of owned groups
610 @type owned_nodes: iterable of string
611 @param owned_nodes: List of owned nodes
612 @type cur_group_uuid: string or None
613 @param cur_group_uuid: Optional group UUID to check against instance's groups
616 for (name, inst) in instances.items():
617 assert owned_nodes.issuperset(inst.all_nodes), \
618 "Instance %s's nodes changed while we kept the lock" % name
620 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
622 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
623 "Instance %s has no node in group %s" % (name, cur_group_uuid)
626 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
628 """Checks if the owned node groups are still correct for an instance.
630 @type cfg: L{config.ConfigWriter}
631 @param cfg: The cluster configuration
632 @type instance_name: string
633 @param instance_name: Instance name
634 @type owned_groups: set or frozenset
635 @param owned_groups: List of currently owned node groups
636 @type primary_only: boolean
637 @param primary_only: Whether to check node groups for only the primary node
640 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
642 if not owned_groups.issuperset(inst_groups):
643 raise errors.OpPrereqError("Instance %s's node groups changed since"
644 " locks were acquired, current groups are"
645 " are '%s', owning groups '%s'; retry the"
648 utils.CommaJoin(inst_groups),
649 utils.CommaJoin(owned_groups)),
655 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
656 """Checks if the instances in a node group are still correct.
658 @type cfg: L{config.ConfigWriter}
659 @param cfg: The cluster configuration
660 @type group_uuid: string
661 @param group_uuid: Node group UUID
662 @type owned_instances: set or frozenset
663 @param owned_instances: List of currently owned instances
666 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
667 if owned_instances != wanted_instances:
668 raise errors.OpPrereqError("Instances in node group '%s' changed since"
669 " locks were acquired, wanted '%s', have '%s';"
670 " retry the operation" %
672 utils.CommaJoin(wanted_instances),
673 utils.CommaJoin(owned_instances)),
676 return wanted_instances
679 def _SupportsOob(cfg, node):
680 """Tells if node supports OOB.
682 @type cfg: L{config.ConfigWriter}
683 @param cfg: The cluster configuration
684 @type node: L{objects.Node}
685 @param node: The node
686 @return: The OOB script if supported or an empty string otherwise
689 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
692 def _GetWantedNodes(lu, nodes):
693 """Returns list of checked and expanded node names.
695 @type lu: L{LogicalUnit}
696 @param lu: the logical unit on whose behalf we execute
698 @param nodes: list of node names or None for all nodes
700 @return: the list of nodes, sorted
701 @raise errors.ProgrammerError: if the nodes parameter is wrong type
705 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
707 return utils.NiceSort(lu.cfg.GetNodeList())
710 def _GetWantedInstances(lu, instances):
711 """Returns list of checked and expanded instance names.
713 @type lu: L{LogicalUnit}
714 @param lu: the logical unit on whose behalf we execute
715 @type instances: list
716 @param instances: list of instance names or None for all instances
718 @return: the list of instances, sorted
719 @raise errors.OpPrereqError: if the instances parameter is wrong type
720 @raise errors.OpPrereqError: if any of the passed instances is not found
724 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
726 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
730 def _GetUpdatedParams(old_params, update_dict,
731 use_default=True, use_none=False):
732 """Return the new version of a parameter dictionary.
734 @type old_params: dict
735 @param old_params: old parameters
736 @type update_dict: dict
737 @param update_dict: dict containing new parameter values, or
738 constants.VALUE_DEFAULT to reset the parameter to its default
740 @param use_default: boolean
741 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
742 values as 'to be deleted' values
743 @param use_none: boolean
744 @type use_none: whether to recognise C{None} values as 'to be
747 @return: the new parameter dictionary
750 params_copy = copy.deepcopy(old_params)
751 for key, val in update_dict.iteritems():
752 if ((use_default and val == constants.VALUE_DEFAULT) or
753 (use_none and val is None)):
759 params_copy[key] = val
763 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
764 """Return the new version of a instance policy.
766 @param group_policy: whether this policy applies to a group and thus
767 we should support removal of policy entries
770 use_none = use_default = group_policy
771 ipolicy = copy.deepcopy(old_ipolicy)
772 for key, value in new_ipolicy.items():
773 if key not in constants.IPOLICY_ALL_KEYS:
774 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
776 if key in constants.IPOLICY_ISPECS:
777 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
778 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
780 use_default=use_default)
782 if (not value or value == [constants.VALUE_DEFAULT] or
783 value == constants.VALUE_DEFAULT):
787 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
788 " on the cluster'" % key,
791 if key in constants.IPOLICY_PARAMETERS:
792 # FIXME: we assume all such values are float
794 ipolicy[key] = float(value)
795 except (TypeError, ValueError), err:
796 raise errors.OpPrereqError("Invalid value for attribute"
797 " '%s': '%s', error: %s" %
798 (key, value, err), errors.ECODE_INVAL)
800 # FIXME: we assume all others are lists; this should be redone
802 ipolicy[key] = list(value)
804 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
805 except errors.ConfigurationError, err:
806 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
811 def _UpdateAndVerifySubDict(base, updates, type_check):
812 """Updates and verifies a dict with sub dicts of the same type.
814 @param base: The dict with the old data
815 @param updates: The dict with the new data
816 @param type_check: Dict suitable to ForceDictType to verify correct types
817 @returns: A new dict with updated and verified values
821 new = _GetUpdatedParams(old, value)
822 utils.ForceDictType(new, type_check)
825 ret = copy.deepcopy(base)
826 ret.update(dict((key, fn(base.get(key, {}), value))
827 for key, value in updates.items()))
831 def _MergeAndVerifyHvState(op_input, obj_input):
832 """Combines the hv state from an opcode with the one of the object
834 @param op_input: The input dict from the opcode
835 @param obj_input: The input dict from the objects
836 @return: The verified and updated dict
840 invalid_hvs = set(op_input) - constants.HYPER_TYPES
842 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
843 " %s" % utils.CommaJoin(invalid_hvs),
845 if obj_input is None:
847 type_check = constants.HVSTS_PARAMETER_TYPES
848 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
853 def _MergeAndVerifyDiskState(op_input, obj_input):
854 """Combines the disk state from an opcode with the one of the object
856 @param op_input: The input dict from the opcode
857 @param obj_input: The input dict from the objects
858 @return: The verified and updated dict
861 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
863 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
864 utils.CommaJoin(invalid_dst),
866 type_check = constants.DSS_PARAMETER_TYPES
867 if obj_input is None:
869 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
871 for key, value in op_input.items())
876 def _ReleaseLocks(lu, level, names=None, keep=None):
877 """Releases locks owned by an LU.
879 @type lu: L{LogicalUnit}
880 @param level: Lock level
881 @type names: list or None
882 @param names: Names of locks to release
883 @type keep: list or None
884 @param keep: Names of locks to retain
887 assert not (keep is not None and names is not None), \
888 "Only one of the 'names' and the 'keep' parameters can be given"
890 if names is not None:
891 should_release = names.__contains__
893 should_release = lambda name: name not in keep
895 should_release = None
897 owned = lu.owned_locks(level)
899 # Not owning any lock at this level, do nothing
906 # Determine which locks to release
908 if should_release(name):
913 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
915 # Release just some locks
916 lu.glm.release(level, names=release)
918 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
921 lu.glm.release(level)
923 assert not lu.glm.is_owned(level), "No locks should be owned"
926 def _MapInstanceDisksToNodes(instances):
927 """Creates a map from (node, volume) to instance name.
929 @type instances: list of L{objects.Instance}
930 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
933 return dict(((node, vol), inst.name)
934 for inst in instances
935 for (node, vols) in inst.MapLVsByNode().items()
939 def _RunPostHook(lu, node_name):
940 """Runs the post-hook for an opcode on a single node.
943 hm = lu.proc.BuildHooksManager(lu)
945 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
946 except Exception, err: # pylint: disable=W0703
947 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
950 def _CheckOutputFields(static, dynamic, selected):
951 """Checks whether all selected fields are valid.
953 @type static: L{utils.FieldSet}
954 @param static: static fields set
955 @type dynamic: L{utils.FieldSet}
956 @param dynamic: dynamic fields set
963 delta = f.NonMatching(selected)
965 raise errors.OpPrereqError("Unknown output fields selected: %s"
966 % ",".join(delta), errors.ECODE_INVAL)
969 def _CheckGlobalHvParams(params):
970 """Validates that given hypervisor params are not global ones.
972 This will ensure that instances don't get customised versions of
976 used_globals = constants.HVC_GLOBALS.intersection(params)
978 msg = ("The following hypervisor parameters are global and cannot"
979 " be customized at instance level, please modify them at"
980 " cluster level: %s" % utils.CommaJoin(used_globals))
981 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
984 def _CheckNodeOnline(lu, node, msg=None):
985 """Ensure that a given node is online.
987 @param lu: the LU on behalf of which we make the check
988 @param node: the node to check
989 @param msg: if passed, should be a message to replace the default one
990 @raise errors.OpPrereqError: if the node is offline
994 msg = "Can't use offline node"
995 if lu.cfg.GetNodeInfo(node).offline:
996 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
999 def _CheckNodeNotDrained(lu, node):
1000 """Ensure that a given node is not drained.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @raise errors.OpPrereqError: if the node is drained
1007 if lu.cfg.GetNodeInfo(node).drained:
1008 raise errors.OpPrereqError("Can't use drained node %s" % node,
1012 def _CheckNodeVmCapable(lu, node):
1013 """Ensure that a given node is vm capable.
1015 @param lu: the LU on behalf of which we make the check
1016 @param node: the node to check
1017 @raise errors.OpPrereqError: if the node is not vm capable
1020 if not lu.cfg.GetNodeInfo(node).vm_capable:
1021 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1025 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1026 """Ensure that a node supports a given OS.
1028 @param lu: the LU on behalf of which we make the check
1029 @param node: the node to check
1030 @param os_name: the OS to query about
1031 @param force_variant: whether to ignore variant errors
1032 @raise errors.OpPrereqError: if the node is not supporting the OS
1035 result = lu.rpc.call_os_get(node, os_name)
1036 result.Raise("OS '%s' not in supported OS list for node %s" %
1038 prereq=True, ecode=errors.ECODE_INVAL)
1039 if not force_variant:
1040 _CheckOSVariant(result.payload, os_name)
1043 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1044 """Ensure that a node has the given secondary ip.
1046 @type lu: L{LogicalUnit}
1047 @param lu: the LU on behalf of which we make the check
1049 @param node: the node to check
1050 @type secondary_ip: string
1051 @param secondary_ip: the ip to check
1052 @type prereq: boolean
1053 @param prereq: whether to throw a prerequisite or an execute error
1054 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1055 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1058 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1059 result.Raise("Failure checking secondary ip on node %s" % node,
1060 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1061 if not result.payload:
1062 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1063 " please fix and re-run this command" % secondary_ip)
1065 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1067 raise errors.OpExecError(msg)
1070 def _GetClusterDomainSecret():
1071 """Reads the cluster domain secret.
1074 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1078 def _CheckInstanceState(lu, instance, req_states, msg=None):
1079 """Ensure that an instance is in one of the required states.
1081 @param lu: the LU on behalf of which we make the check
1082 @param instance: the instance to check
1083 @param msg: if passed, should be a message to replace the default one
1084 @raise errors.OpPrereqError: if the instance is not in the required state
1088 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1089 if instance.admin_state not in req_states:
1090 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1091 (instance.name, instance.admin_state, msg),
1094 if constants.ADMINST_UP not in req_states:
1095 pnode = instance.primary_node
1096 if not lu.cfg.GetNodeInfo(pnode).offline:
1097 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1098 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1099 prereq=True, ecode=errors.ECODE_ENVIRON)
1100 if instance.name in ins_l.payload:
1101 raise errors.OpPrereqError("Instance %s is running, %s" %
1102 (instance.name, msg), errors.ECODE_STATE)
1104 lu.LogWarning("Primary node offline, ignoring check that instance"
1108 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1109 """Computes if value is in the desired range.
1111 @param name: name of the parameter for which we perform the check
1112 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1114 @param ipolicy: dictionary containing min, max and std values
1115 @param value: actual value that we want to use
1116 @return: None or element not meeting the criteria
1120 if value in [None, constants.VALUE_AUTO]:
1122 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1123 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1124 if value > max_v or min_v > value:
1126 fqn = "%s/%s" % (name, qualifier)
1129 return ("%s value %s is not in range [%s, %s]" %
1130 (fqn, value, min_v, max_v))
1134 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1135 nic_count, disk_sizes, spindle_use,
1136 _compute_fn=_ComputeMinMaxSpec):
1137 """Verifies ipolicy against provided specs.
1140 @param ipolicy: The ipolicy
1142 @param mem_size: The memory size
1143 @type cpu_count: int
1144 @param cpu_count: Used cpu cores
1145 @type disk_count: int
1146 @param disk_count: Number of disks used
1147 @type nic_count: int
1148 @param nic_count: Number of nics used
1149 @type disk_sizes: list of ints
1150 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1151 @type spindle_use: int
1152 @param spindle_use: The number of spindles this instance uses
1153 @param _compute_fn: The compute function (unittest only)
1154 @return: A list of violations, or an empty list of no violations are found
1157 assert disk_count == len(disk_sizes)
1160 (constants.ISPEC_MEM_SIZE, "", mem_size),
1161 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1162 (constants.ISPEC_DISK_COUNT, "", disk_count),
1163 (constants.ISPEC_NIC_COUNT, "", nic_count),
1164 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1165 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1166 for idx, d in enumerate(disk_sizes)]
1169 (_compute_fn(name, qualifier, ipolicy, value)
1170 for (name, qualifier, value) in test_settings))
1173 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1174 _compute_fn=_ComputeIPolicySpecViolation):
1175 """Compute if instance meets the specs of ipolicy.
1178 @param ipolicy: The ipolicy to verify against
1179 @type instance: L{objects.Instance}
1180 @param instance: The instance to verify
1181 @param _compute_fn: The function to verify ipolicy (unittest only)
1182 @see: L{_ComputeIPolicySpecViolation}
1185 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1186 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1187 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1188 disk_count = len(instance.disks)
1189 disk_sizes = [disk.size for disk in instance.disks]
1190 nic_count = len(instance.nics)
1192 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1193 disk_sizes, spindle_use)
1196 def _ComputeIPolicyInstanceSpecViolation(
1197 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1198 """Compute if instance specs meets the specs of ipolicy.
1201 @param ipolicy: The ipolicy to verify against
1202 @param instance_spec: dict
1203 @param instance_spec: The instance spec to verify
1204 @param _compute_fn: The function to verify ipolicy (unittest only)
1205 @see: L{_ComputeIPolicySpecViolation}
1208 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1209 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1210 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1211 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1212 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1213 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1215 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216 disk_sizes, spindle_use)
1219 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1221 _compute_fn=_ComputeIPolicyInstanceViolation):
1222 """Compute if instance meets the specs of the new target group.
1224 @param ipolicy: The ipolicy to verify
1225 @param instance: The instance object to verify
1226 @param current_group: The current group of the instance
1227 @param target_group: The new group of the instance
1228 @param _compute_fn: The function to verify ipolicy (unittest only)
1229 @see: L{_ComputeIPolicySpecViolation}
1232 if current_group == target_group:
1235 return _compute_fn(ipolicy, instance)
1238 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1239 _compute_fn=_ComputeIPolicyNodeViolation):
1240 """Checks that the target node is correct in terms of instance policy.
1242 @param ipolicy: The ipolicy to verify
1243 @param instance: The instance object to verify
1244 @param node: The new node to relocate
1245 @param ignore: Ignore violations of the ipolicy
1246 @param _compute_fn: The function to verify ipolicy (unittest only)
1247 @see: L{_ComputeIPolicySpecViolation}
1250 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1251 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1254 msg = ("Instance does not meet target node group's (%s) instance"
1255 " policy: %s") % (node.group, utils.CommaJoin(res))
1259 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1262 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1263 """Computes a set of any instances that would violate the new ipolicy.
1265 @param old_ipolicy: The current (still in-place) ipolicy
1266 @param new_ipolicy: The new (to become) ipolicy
1267 @param instances: List of instances to verify
1268 @return: A list of instances which violates the new ipolicy but
1272 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1273 _ComputeViolatingInstances(old_ipolicy, instances))
1276 def _ExpandItemName(fn, name, kind):
1277 """Expand an item name.
1279 @param fn: the function to use for expansion
1280 @param name: requested item name
1281 @param kind: text description ('Node' or 'Instance')
1282 @return: the resolved (full) name
1283 @raise errors.OpPrereqError: if the item is not found
1286 full_name = fn(name)
1287 if full_name is None:
1288 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1293 def _ExpandNodeName(cfg, name):
1294 """Wrapper over L{_ExpandItemName} for nodes."""
1295 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1298 def _ExpandInstanceName(cfg, name):
1299 """Wrapper over L{_ExpandItemName} for instance."""
1300 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1303 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1304 minmem, maxmem, vcpus, nics, disk_template, disks,
1305 bep, hvp, hypervisor_name, tags):
1306 """Builds instance related env variables for hooks
1308 This builds the hook environment from individual variables.
1311 @param name: the name of the instance
1312 @type primary_node: string
1313 @param primary_node: the name of the instance's primary node
1314 @type secondary_nodes: list
1315 @param secondary_nodes: list of secondary nodes as strings
1316 @type os_type: string
1317 @param os_type: the name of the instance's OS
1318 @type status: string
1319 @param status: the desired status of the instance
1320 @type minmem: string
1321 @param minmem: the minimum memory size of the instance
1322 @type maxmem: string
1323 @param maxmem: the maximum memory size of the instance
1325 @param vcpus: the count of VCPUs the instance has
1327 @param nics: list of tuples (ip, mac, mode, link) representing
1328 the NICs the instance has
1329 @type disk_template: string
1330 @param disk_template: the disk template of the instance
1332 @param disks: the list of (size, mode) pairs
1334 @param bep: the backend parameters for the instance
1336 @param hvp: the hypervisor parameters for the instance
1337 @type hypervisor_name: string
1338 @param hypervisor_name: the hypervisor for the instance
1340 @param tags: list of instance tags as strings
1342 @return: the hook environment for this instance
1347 "INSTANCE_NAME": name,
1348 "INSTANCE_PRIMARY": primary_node,
1349 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1350 "INSTANCE_OS_TYPE": os_type,
1351 "INSTANCE_STATUS": status,
1352 "INSTANCE_MINMEM": minmem,
1353 "INSTANCE_MAXMEM": maxmem,
1354 # TODO(2.7) remove deprecated "memory" value
1355 "INSTANCE_MEMORY": maxmem,
1356 "INSTANCE_VCPUS": vcpus,
1357 "INSTANCE_DISK_TEMPLATE": disk_template,
1358 "INSTANCE_HYPERVISOR": hypervisor_name,
1361 nic_count = len(nics)
1362 for idx, (ip, mac, mode, link) in enumerate(nics):
1365 env["INSTANCE_NIC%d_IP" % idx] = ip
1366 env["INSTANCE_NIC%d_MAC" % idx] = mac
1367 env["INSTANCE_NIC%d_MODE" % idx] = mode
1368 env["INSTANCE_NIC%d_LINK" % idx] = link
1369 if mode == constants.NIC_MODE_BRIDGED:
1370 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1374 env["INSTANCE_NIC_COUNT"] = nic_count
1377 disk_count = len(disks)
1378 for idx, (size, mode) in enumerate(disks):
1379 env["INSTANCE_DISK%d_SIZE" % idx] = size
1380 env["INSTANCE_DISK%d_MODE" % idx] = mode
1384 env["INSTANCE_DISK_COUNT"] = disk_count
1389 env["INSTANCE_TAGS"] = " ".join(tags)
1391 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1392 for key, value in source.items():
1393 env["INSTANCE_%s_%s" % (kind, key)] = value
1398 def _NICListToTuple(lu, nics):
1399 """Build a list of nic information tuples.
1401 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1402 value in LUInstanceQueryData.
1404 @type lu: L{LogicalUnit}
1405 @param lu: the logical unit on whose behalf we execute
1406 @type nics: list of L{objects.NIC}
1407 @param nics: list of nics to convert to hooks tuples
1411 cluster = lu.cfg.GetClusterInfo()
1415 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1416 mode = filled_params[constants.NIC_MODE]
1417 link = filled_params[constants.NIC_LINK]
1418 hooks_nics.append((ip, mac, mode, link))
1422 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1423 """Builds instance related env variables for hooks from an object.
1425 @type lu: L{LogicalUnit}
1426 @param lu: the logical unit on whose behalf we execute
1427 @type instance: L{objects.Instance}
1428 @param instance: the instance for which we should build the
1430 @type override: dict
1431 @param override: dictionary with key/values that will override
1434 @return: the hook environment dictionary
1437 cluster = lu.cfg.GetClusterInfo()
1438 bep = cluster.FillBE(instance)
1439 hvp = cluster.FillHV(instance)
1441 "name": instance.name,
1442 "primary_node": instance.primary_node,
1443 "secondary_nodes": instance.secondary_nodes,
1444 "os_type": instance.os,
1445 "status": instance.admin_state,
1446 "maxmem": bep[constants.BE_MAXMEM],
1447 "minmem": bep[constants.BE_MINMEM],
1448 "vcpus": bep[constants.BE_VCPUS],
1449 "nics": _NICListToTuple(lu, instance.nics),
1450 "disk_template": instance.disk_template,
1451 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1454 "hypervisor_name": instance.hypervisor,
1455 "tags": instance.tags,
1458 args.update(override)
1459 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1462 def _AdjustCandidatePool(lu, exceptions):
1463 """Adjust the candidate pool after node operations.
1466 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1468 lu.LogInfo("Promoted nodes to master candidate role: %s",
1469 utils.CommaJoin(node.name for node in mod_list))
1470 for name in mod_list:
1471 lu.context.ReaddNode(name)
1472 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1474 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1478 def _DecideSelfPromotion(lu, exceptions=None):
1479 """Decide whether I should promote myself as a master candidate.
1482 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1483 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1484 # the new node will increase mc_max with one, so:
1485 mc_should = min(mc_should + 1, cp_size)
1486 return mc_now < mc_should
1489 def _ComputeViolatingInstances(ipolicy, instances):
1490 """Computes a set of instances who violates given ipolicy.
1492 @param ipolicy: The ipolicy to verify
1493 @type instances: object.Instance
1494 @param instances: List of instances to verify
1495 @return: A frozenset of instance names violating the ipolicy
1498 return frozenset([inst.name for inst in instances
1499 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1502 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1503 """Check that the brigdes needed by a list of nics exist.
1506 cluster = lu.cfg.GetClusterInfo()
1507 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1508 brlist = [params[constants.NIC_LINK] for params in paramslist
1509 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1511 result = lu.rpc.call_bridges_exist(target_node, brlist)
1512 result.Raise("Error checking bridges on destination node '%s'" %
1513 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1516 def _CheckInstanceBridgesExist(lu, instance, node=None):
1517 """Check that the brigdes needed by an instance exist.
1521 node = instance.primary_node
1522 _CheckNicsBridgesExist(lu, instance.nics, node)
1525 def _CheckOSVariant(os_obj, name):
1526 """Check whether an OS name conforms to the os variants specification.
1528 @type os_obj: L{objects.OS}
1529 @param os_obj: OS object to check
1531 @param name: OS name passed by the user, to check for validity
1534 variant = objects.OS.GetVariant(name)
1535 if not os_obj.supported_variants:
1537 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1538 " passed)" % (os_obj.name, variant),
1542 raise errors.OpPrereqError("OS name must include a variant",
1545 if variant not in os_obj.supported_variants:
1546 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1549 def _GetNodeInstancesInner(cfg, fn):
1550 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1553 def _GetNodeInstances(cfg, node_name):
1554 """Returns a list of all primary and secondary instances on a node.
1558 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1561 def _GetNodePrimaryInstances(cfg, node_name):
1562 """Returns primary instances on a node.
1565 return _GetNodeInstancesInner(cfg,
1566 lambda inst: node_name == inst.primary_node)
1569 def _GetNodeSecondaryInstances(cfg, node_name):
1570 """Returns secondary instances on a node.
1573 return _GetNodeInstancesInner(cfg,
1574 lambda inst: node_name in inst.secondary_nodes)
1577 def _GetStorageTypeArgs(cfg, storage_type):
1578 """Returns the arguments for a storage type.
1581 # Special case for file storage
1582 if storage_type == constants.ST_FILE:
1583 # storage.FileStorage wants a list of storage directories
1584 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1589 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1592 for dev in instance.disks:
1593 cfg.SetDiskID(dev, node_name)
1595 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1597 result.Raise("Failed to get disk status from node %s" % node_name,
1598 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1600 for idx, bdev_status in enumerate(result.payload):
1601 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1607 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1608 """Check the sanity of iallocator and node arguments and use the
1609 cluster-wide iallocator if appropriate.
1611 Check that at most one of (iallocator, node) is specified. If none is
1612 specified, then the LU's opcode's iallocator slot is filled with the
1613 cluster-wide default iallocator.
1615 @type iallocator_slot: string
1616 @param iallocator_slot: the name of the opcode iallocator slot
1617 @type node_slot: string
1618 @param node_slot: the name of the opcode target node slot
1621 node = getattr(lu.op, node_slot, None)
1622 ialloc = getattr(lu.op, iallocator_slot, None)
1624 if node is not None and ialloc is not None:
1625 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1627 elif node is None and ialloc is None:
1628 default_iallocator = lu.cfg.GetDefaultIAllocator()
1629 if default_iallocator:
1630 setattr(lu.op, iallocator_slot, default_iallocator)
1632 raise errors.OpPrereqError("No iallocator or node given and no"
1633 " cluster-wide default iallocator found;"
1634 " please specify either an iallocator or a"
1635 " node, or set a cluster-wide default"
1636 " iallocator", errors.ECODE_INVAL)
1639 def _GetDefaultIAllocator(cfg, ialloc):
1640 """Decides on which iallocator to use.
1642 @type cfg: L{config.ConfigWriter}
1643 @param cfg: Cluster configuration object
1644 @type ialloc: string or None
1645 @param ialloc: Iallocator specified in opcode
1647 @return: Iallocator name
1651 # Use default iallocator
1652 ialloc = cfg.GetDefaultIAllocator()
1655 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1656 " opcode nor as a cluster-wide default",
1662 class LUClusterPostInit(LogicalUnit):
1663 """Logical unit for running hooks after cluster initialization.
1666 HPATH = "cluster-init"
1667 HTYPE = constants.HTYPE_CLUSTER
1669 def BuildHooksEnv(self):
1674 "OP_TARGET": self.cfg.GetClusterName(),
1677 def BuildHooksNodes(self):
1678 """Build hooks nodes.
1681 return ([], [self.cfg.GetMasterNode()])
1683 def Exec(self, feedback_fn):
1690 class LUClusterDestroy(LogicalUnit):
1691 """Logical unit for destroying the cluster.
1694 HPATH = "cluster-destroy"
1695 HTYPE = constants.HTYPE_CLUSTER
1697 def BuildHooksEnv(self):
1702 "OP_TARGET": self.cfg.GetClusterName(),
1705 def BuildHooksNodes(self):
1706 """Build hooks nodes.
1711 def CheckPrereq(self):
1712 """Check prerequisites.
1714 This checks whether the cluster is empty.
1716 Any errors are signaled by raising errors.OpPrereqError.
1719 master = self.cfg.GetMasterNode()
1721 nodelist = self.cfg.GetNodeList()
1722 if len(nodelist) != 1 or nodelist[0] != master:
1723 raise errors.OpPrereqError("There are still %d node(s) in"
1724 " this cluster." % (len(nodelist) - 1),
1726 instancelist = self.cfg.GetInstanceList()
1728 raise errors.OpPrereqError("There are still %d instance(s) in"
1729 " this cluster." % len(instancelist),
1732 def Exec(self, feedback_fn):
1733 """Destroys the cluster.
1736 master_params = self.cfg.GetMasterNetworkParameters()
1738 # Run post hooks on master node before it's removed
1739 _RunPostHook(self, master_params.name)
1741 ems = self.cfg.GetUseExternalMipScript()
1742 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1745 self.LogWarning("Error disabling the master IP address: %s",
1748 return master_params.name
1751 def _VerifyCertificate(filename):
1752 """Verifies a certificate for L{LUClusterVerifyConfig}.
1754 @type filename: string
1755 @param filename: Path to PEM file
1759 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1760 utils.ReadFile(filename))
1761 except Exception, err: # pylint: disable=W0703
1762 return (LUClusterVerifyConfig.ETYPE_ERROR,
1763 "Failed to load X509 certificate %s: %s" % (filename, err))
1766 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1767 constants.SSL_CERT_EXPIRATION_ERROR)
1770 fnamemsg = "While verifying %s: %s" % (filename, msg)
1775 return (None, fnamemsg)
1776 elif errcode == utils.CERT_WARNING:
1777 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1778 elif errcode == utils.CERT_ERROR:
1779 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1781 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1784 def _GetAllHypervisorParameters(cluster, instances):
1785 """Compute the set of all hypervisor parameters.
1787 @type cluster: L{objects.Cluster}
1788 @param cluster: the cluster object
1789 @param instances: list of L{objects.Instance}
1790 @param instances: additional instances from which to obtain parameters
1791 @rtype: list of (origin, hypervisor, parameters)
1792 @return: a list with all parameters found, indicating the hypervisor they
1793 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1798 for hv_name in cluster.enabled_hypervisors:
1799 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1801 for os_name, os_hvp in cluster.os_hvp.items():
1802 for hv_name, hv_params in os_hvp.items():
1804 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1805 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1807 # TODO: collapse identical parameter values in a single one
1808 for instance in instances:
1809 if instance.hvparams:
1810 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1811 cluster.FillHV(instance)))
1816 class _VerifyErrors(object):
1817 """Mix-in for cluster/group verify LUs.
1819 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1820 self.op and self._feedback_fn to be available.)
1824 ETYPE_FIELD = "code"
1825 ETYPE_ERROR = "ERROR"
1826 ETYPE_WARNING = "WARNING"
1828 def _Error(self, ecode, item, msg, *args, **kwargs):
1829 """Format an error message.
1831 Based on the opcode's error_codes parameter, either format a
1832 parseable error code, or a simpler error string.
1834 This must be called only from Exec and functions called from Exec.
1837 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1838 itype, etxt, _ = ecode
1839 # first complete the msg
1842 # then format the whole message
1843 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1844 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1850 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1851 # and finally report it via the feedback_fn
1852 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1854 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1855 """Log an error message if the passed condition is True.
1859 or self.op.debug_simulate_errors) # pylint: disable=E1101
1861 # If the error code is in the list of ignored errors, demote the error to a
1863 (_, etxt, _) = ecode
1864 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1865 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1868 self._Error(ecode, *args, **kwargs)
1870 # do not mark the operation as failed for WARN cases only
1871 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1872 self.bad = self.bad or cond
1875 class LUClusterVerify(NoHooksLU):
1876 """Submits all jobs necessary to verify the cluster.
1881 def ExpandNames(self):
1882 self.needed_locks = {}
1884 def Exec(self, feedback_fn):
1887 if self.op.group_name:
1888 groups = [self.op.group_name]
1889 depends_fn = lambda: None
1891 groups = self.cfg.GetNodeGroupList()
1893 # Verify global configuration
1895 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1898 # Always depend on global verification
1899 depends_fn = lambda: [(-len(jobs), [])]
1902 [opcodes.OpClusterVerifyGroup(group_name=group,
1903 ignore_errors=self.op.ignore_errors,
1904 depends=depends_fn())]
1905 for group in groups)
1907 # Fix up all parameters
1908 for op in itertools.chain(*jobs): # pylint: disable=W0142
1909 op.debug_simulate_errors = self.op.debug_simulate_errors
1910 op.verbose = self.op.verbose
1911 op.error_codes = self.op.error_codes
1913 op.skip_checks = self.op.skip_checks
1914 except AttributeError:
1915 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1917 return ResultWithJobs(jobs)
1920 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1921 """Verifies the cluster config.
1926 def _VerifyHVP(self, hvp_data):
1927 """Verifies locally the syntax of the hypervisor parameters.
1930 for item, hv_name, hv_params in hvp_data:
1931 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1934 hv_class = hypervisor.GetHypervisor(hv_name)
1935 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1936 hv_class.CheckParameterSyntax(hv_params)
1937 except errors.GenericError, err:
1938 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1940 def ExpandNames(self):
1941 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1942 self.share_locks = _ShareAll()
1944 def CheckPrereq(self):
1945 """Check prerequisites.
1948 # Retrieve all information
1949 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1950 self.all_node_info = self.cfg.GetAllNodesInfo()
1951 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1953 def Exec(self, feedback_fn):
1954 """Verify integrity of cluster, performing various test on nodes.
1958 self._feedback_fn = feedback_fn
1960 feedback_fn("* Verifying cluster config")
1962 for msg in self.cfg.VerifyConfig():
1963 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1965 feedback_fn("* Verifying cluster certificate files")
1967 for cert_filename in constants.ALL_CERT_FILES:
1968 (errcode, msg) = _VerifyCertificate(cert_filename)
1969 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1971 feedback_fn("* Verifying hypervisor parameters")
1973 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1974 self.all_inst_info.values()))
1976 feedback_fn("* Verifying all nodes belong to an existing group")
1978 # We do this verification here because, should this bogus circumstance
1979 # occur, it would never be caught by VerifyGroup, which only acts on
1980 # nodes/instances reachable from existing node groups.
1982 dangling_nodes = set(node.name for node in self.all_node_info.values()
1983 if node.group not in self.all_group_info)
1985 dangling_instances = {}
1986 no_node_instances = []
1988 for inst in self.all_inst_info.values():
1989 if inst.primary_node in dangling_nodes:
1990 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1991 elif inst.primary_node not in self.all_node_info:
1992 no_node_instances.append(inst.name)
1997 utils.CommaJoin(dangling_instances.get(node.name,
1999 for node in dangling_nodes]
2001 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2003 "the following nodes (and their instances) belong to a non"
2004 " existing group: %s", utils.CommaJoin(pretty_dangling))
2006 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2008 "the following instances have a non-existing primary-node:"
2009 " %s", utils.CommaJoin(no_node_instances))
2014 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2015 """Verifies the status of a node group.
2018 HPATH = "cluster-verify"
2019 HTYPE = constants.HTYPE_CLUSTER
2022 _HOOKS_INDENT_RE = re.compile("^", re.M)
2024 class NodeImage(object):
2025 """A class representing the logical and physical status of a node.
2028 @ivar name: the node name to which this object refers
2029 @ivar volumes: a structure as returned from
2030 L{ganeti.backend.GetVolumeList} (runtime)
2031 @ivar instances: a list of running instances (runtime)
2032 @ivar pinst: list of configured primary instances (config)
2033 @ivar sinst: list of configured secondary instances (config)
2034 @ivar sbp: dictionary of {primary-node: list of instances} for all
2035 instances for which this node is secondary (config)
2036 @ivar mfree: free memory, as reported by hypervisor (runtime)
2037 @ivar dfree: free disk, as reported by the node (runtime)
2038 @ivar offline: the offline status (config)
2039 @type rpc_fail: boolean
2040 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2041 not whether the individual keys were correct) (runtime)
2042 @type lvm_fail: boolean
2043 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2044 @type hyp_fail: boolean
2045 @ivar hyp_fail: whether the RPC call didn't return the instance list
2046 @type ghost: boolean
2047 @ivar ghost: whether this is a known node or not (config)
2048 @type os_fail: boolean
2049 @ivar os_fail: whether the RPC call didn't return valid OS data
2051 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2052 @type vm_capable: boolean
2053 @ivar vm_capable: whether the node can host instances
2056 def __init__(self, offline=False, name=None, vm_capable=True):
2065 self.offline = offline
2066 self.vm_capable = vm_capable
2067 self.rpc_fail = False
2068 self.lvm_fail = False
2069 self.hyp_fail = False
2071 self.os_fail = False
2074 def ExpandNames(self):
2075 # This raises errors.OpPrereqError on its own:
2076 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2078 # Get instances in node group; this is unsafe and needs verification later
2080 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2082 self.needed_locks = {
2083 locking.LEVEL_INSTANCE: inst_names,
2084 locking.LEVEL_NODEGROUP: [self.group_uuid],
2085 locking.LEVEL_NODE: [],
2088 self.share_locks = _ShareAll()
2090 def DeclareLocks(self, level):
2091 if level == locking.LEVEL_NODE:
2092 # Get members of node group; this is unsafe and needs verification later
2093 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2095 all_inst_info = self.cfg.GetAllInstancesInfo()
2097 # In Exec(), we warn about mirrored instances that have primary and
2098 # secondary living in separate node groups. To fully verify that
2099 # volumes for these instances are healthy, we will need to do an
2100 # extra call to their secondaries. We ensure here those nodes will
2102 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2103 # Important: access only the instances whose lock is owned
2104 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2105 nodes.update(all_inst_info[inst].secondary_nodes)
2107 self.needed_locks[locking.LEVEL_NODE] = nodes
2109 def CheckPrereq(self):
2110 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2111 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2113 group_nodes = set(self.group_info.members)
2115 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2118 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2120 unlocked_instances = \
2121 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2124 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2125 utils.CommaJoin(unlocked_nodes),
2128 if unlocked_instances:
2129 raise errors.OpPrereqError("Missing lock for instances: %s" %
2130 utils.CommaJoin(unlocked_instances),
2133 self.all_node_info = self.cfg.GetAllNodesInfo()
2134 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2136 self.my_node_names = utils.NiceSort(group_nodes)
2137 self.my_inst_names = utils.NiceSort(group_instances)
2139 self.my_node_info = dict((name, self.all_node_info[name])
2140 for name in self.my_node_names)
2142 self.my_inst_info = dict((name, self.all_inst_info[name])
2143 for name in self.my_inst_names)
2145 # We detect here the nodes that will need the extra RPC calls for verifying
2146 # split LV volumes; they should be locked.
2147 extra_lv_nodes = set()
2149 for inst in self.my_inst_info.values():
2150 if inst.disk_template in constants.DTS_INT_MIRROR:
2151 for nname in inst.all_nodes:
2152 if self.all_node_info[nname].group != self.group_uuid:
2153 extra_lv_nodes.add(nname)
2155 unlocked_lv_nodes = \
2156 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2158 if unlocked_lv_nodes:
2159 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2160 utils.CommaJoin(unlocked_lv_nodes),
2162 self.extra_lv_nodes = list(extra_lv_nodes)
2164 def _VerifyNode(self, ninfo, nresult):
2165 """Perform some basic validation on data returned from a node.
2167 - check the result data structure is well formed and has all the
2169 - check ganeti version
2171 @type ninfo: L{objects.Node}
2172 @param ninfo: the node to check
2173 @param nresult: the results from the node
2175 @return: whether overall this call was successful (and we can expect
2176 reasonable values in the respose)
2180 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2182 # main result, nresult should be a non-empty dict
2183 test = not nresult or not isinstance(nresult, dict)
2184 _ErrorIf(test, constants.CV_ENODERPC, node,
2185 "unable to verify node: no data returned")
2189 # compares ganeti version
2190 local_version = constants.PROTOCOL_VERSION
2191 remote_version = nresult.get("version", None)
2192 test = not (remote_version and
2193 isinstance(remote_version, (list, tuple)) and
2194 len(remote_version) == 2)
2195 _ErrorIf(test, constants.CV_ENODERPC, node,
2196 "connection to node returned invalid data")
2200 test = local_version != remote_version[0]
2201 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2202 "incompatible protocol versions: master %s,"
2203 " node %s", local_version, remote_version[0])
2207 # node seems compatible, we can actually try to look into its results
2209 # full package version
2210 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2211 constants.CV_ENODEVERSION, node,
2212 "software version mismatch: master %s, node %s",
2213 constants.RELEASE_VERSION, remote_version[1],
2214 code=self.ETYPE_WARNING)
2216 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2217 if ninfo.vm_capable and isinstance(hyp_result, dict):
2218 for hv_name, hv_result in hyp_result.iteritems():
2219 test = hv_result is not None
2220 _ErrorIf(test, constants.CV_ENODEHV, node,
2221 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2223 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2224 if ninfo.vm_capable and isinstance(hvp_result, list):
2225 for item, hv_name, hv_result in hvp_result:
2226 _ErrorIf(True, constants.CV_ENODEHV, node,
2227 "hypervisor %s parameter verify failure (source %s): %s",
2228 hv_name, item, hv_result)
2230 test = nresult.get(constants.NV_NODESETUP,
2231 ["Missing NODESETUP results"])
2232 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2237 def _VerifyNodeTime(self, ninfo, nresult,
2238 nvinfo_starttime, nvinfo_endtime):
2239 """Check the node time.
2241 @type ninfo: L{objects.Node}
2242 @param ninfo: the node to check
2243 @param nresult: the remote results for the node
2244 @param nvinfo_starttime: the start time of the RPC call
2245 @param nvinfo_endtime: the end time of the RPC call
2249 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2251 ntime = nresult.get(constants.NV_TIME, None)
2253 ntime_merged = utils.MergeTime(ntime)
2254 except (ValueError, TypeError):
2255 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2258 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2259 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2260 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2261 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2265 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2266 "Node time diverges by at least %s from master node time",
2269 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2270 """Check the node LVM results.
2272 @type ninfo: L{objects.Node}
2273 @param ninfo: the node to check
2274 @param nresult: the remote results for the node
2275 @param vg_name: the configured VG name
2282 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2284 # checks vg existence and size > 20G
2285 vglist = nresult.get(constants.NV_VGLIST, None)
2287 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2289 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2290 constants.MIN_VG_SIZE)
2291 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2294 pvlist = nresult.get(constants.NV_PVLIST, None)
2295 test = pvlist is None
2296 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2298 # check that ':' is not present in PV names, since it's a
2299 # special character for lvcreate (denotes the range of PEs to
2301 for _, pvname, owner_vg in pvlist:
2302 test = ":" in pvname
2303 _ErrorIf(test, constants.CV_ENODELVM, node,
2304 "Invalid character ':' in PV '%s' of VG '%s'",
2307 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2308 """Check the node bridges.
2310 @type ninfo: L{objects.Node}
2311 @param ninfo: the node to check
2312 @param nresult: the remote results for the node
2313 @param bridges: the expected list of bridges
2320 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2322 missing = nresult.get(constants.NV_BRIDGES, None)
2323 test = not isinstance(missing, list)
2324 _ErrorIf(test, constants.CV_ENODENET, node,
2325 "did not return valid bridge information")
2327 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2328 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2330 def _VerifyNodeUserScripts(self, ninfo, nresult):
2331 """Check the results of user scripts presence and executability on the node
2333 @type ninfo: L{objects.Node}
2334 @param ninfo: the node to check
2335 @param nresult: the remote results for the node
2340 test = not constants.NV_USERSCRIPTS in nresult
2341 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2342 "did not return user scripts information")
2344 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2346 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2347 "user scripts not present or not executable: %s" %
2348 utils.CommaJoin(sorted(broken_scripts)))
2350 def _VerifyNodeNetwork(self, ninfo, nresult):
2351 """Check the node network connectivity results.
2353 @type ninfo: L{objects.Node}
2354 @param ninfo: the node to check
2355 @param nresult: the remote results for the node
2359 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2361 test = constants.NV_NODELIST not in nresult
2362 _ErrorIf(test, constants.CV_ENODESSH, node,
2363 "node hasn't returned node ssh connectivity data")
2365 if nresult[constants.NV_NODELIST]:
2366 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2367 _ErrorIf(True, constants.CV_ENODESSH, node,
2368 "ssh communication with node '%s': %s", a_node, a_msg)
2370 test = constants.NV_NODENETTEST not in nresult
2371 _ErrorIf(test, constants.CV_ENODENET, node,
2372 "node hasn't returned node tcp connectivity data")
2374 if nresult[constants.NV_NODENETTEST]:
2375 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2377 _ErrorIf(True, constants.CV_ENODENET, node,
2378 "tcp communication with node '%s': %s",
2379 anode, nresult[constants.NV_NODENETTEST][anode])
2381 test = constants.NV_MASTERIP not in nresult
2382 _ErrorIf(test, constants.CV_ENODENET, node,
2383 "node hasn't returned node master IP reachability data")
2385 if not nresult[constants.NV_MASTERIP]:
2386 if node == self.master_node:
2387 msg = "the master node cannot reach the master IP (not configured?)"
2389 msg = "cannot reach the master IP"
2390 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2392 def _VerifyInstance(self, instance, instanceconfig, node_image,
2394 """Verify an instance.
2396 This function checks to see if the required block devices are
2397 available on the instance's node.
2400 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2401 node_current = instanceconfig.primary_node
2403 node_vol_should = {}
2404 instanceconfig.MapLVsByNode(node_vol_should)
2406 cluster = self.cfg.GetClusterInfo()
2407 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2409 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2410 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2412 for node in node_vol_should:
2413 n_img = node_image[node]
2414 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2415 # ignore missing volumes on offline or broken nodes
2417 for volume in node_vol_should[node]:
2418 test = volume not in n_img.volumes
2419 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2420 "volume %s missing on node %s", volume, node)
2422 if instanceconfig.admin_state == constants.ADMINST_UP:
2423 pri_img = node_image[node_current]
2424 test = instance not in pri_img.instances and not pri_img.offline
2425 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2426 "instance not running on its primary node %s",
2429 diskdata = [(nname, success, status, idx)
2430 for (nname, disks) in diskstatus.items()
2431 for idx, (success, status) in enumerate(disks)]
2433 for nname, success, bdev_status, idx in diskdata:
2434 # the 'ghost node' construction in Exec() ensures that we have a
2436 snode = node_image[nname]
2437 bad_snode = snode.ghost or snode.offline
2438 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2439 not success and not bad_snode,
2440 constants.CV_EINSTANCEFAULTYDISK, instance,
2441 "couldn't retrieve status for disk/%s on %s: %s",
2442 idx, nname, bdev_status)
2443 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2444 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2445 constants.CV_EINSTANCEFAULTYDISK, instance,
2446 "disk/%s on %s is faulty", idx, nname)
2448 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2449 """Verify if there are any unknown volumes in the cluster.
2451 The .os, .swap and backup volumes are ignored. All other volumes are
2452 reported as unknown.
2454 @type reserved: L{ganeti.utils.FieldSet}
2455 @param reserved: a FieldSet of reserved volume names
2458 for node, n_img in node_image.items():
2459 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2460 self.all_node_info[node].group != self.group_uuid):
2461 # skip non-healthy nodes
2463 for volume in n_img.volumes:
2464 test = ((node not in node_vol_should or
2465 volume not in node_vol_should[node]) and
2466 not reserved.Matches(volume))
2467 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2468 "volume %s is unknown", volume)
2470 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2471 """Verify N+1 Memory Resilience.
2473 Check that if one single node dies we can still start all the
2474 instances it was primary for.
2477 cluster_info = self.cfg.GetClusterInfo()
2478 for node, n_img in node_image.items():
2479 # This code checks that every node which is now listed as
2480 # secondary has enough memory to host all instances it is
2481 # supposed to should a single other node in the cluster fail.
2482 # FIXME: not ready for failover to an arbitrary node
2483 # FIXME: does not support file-backed instances
2484 # WARNING: we currently take into account down instances as well
2485 # as up ones, considering that even if they're down someone
2486 # might want to start them even in the event of a node failure.
2487 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2488 # we're skipping nodes marked offline and nodes in other groups from
2489 # the N+1 warning, since most likely we don't have good memory
2490 # infromation from them; we already list instances living on such
2491 # nodes, and that's enough warning
2493 #TODO(dynmem): also consider ballooning out other instances
2494 for prinode, instances in n_img.sbp.items():
2496 for instance in instances:
2497 bep = cluster_info.FillBE(instance_cfg[instance])
2498 if bep[constants.BE_AUTO_BALANCE]:
2499 needed_mem += bep[constants.BE_MINMEM]
2500 test = n_img.mfree < needed_mem
2501 self._ErrorIf(test, constants.CV_ENODEN1, node,
2502 "not enough memory to accomodate instance failovers"
2503 " should node %s fail (%dMiB needed, %dMiB available)",
2504 prinode, needed_mem, n_img.mfree)
2507 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2508 (files_all, files_opt, files_mc, files_vm)):
2509 """Verifies file checksums collected from all nodes.
2511 @param errorif: Callback for reporting errors
2512 @param nodeinfo: List of L{objects.Node} objects
2513 @param master_node: Name of master node
2514 @param all_nvinfo: RPC results
2517 # Define functions determining which nodes to consider for a file
2520 (files_mc, lambda node: (node.master_candidate or
2521 node.name == master_node)),
2522 (files_vm, lambda node: node.vm_capable),
2525 # Build mapping from filename to list of nodes which should have the file
2527 for (files, fn) in files2nodefn:
2529 filenodes = nodeinfo
2531 filenodes = filter(fn, nodeinfo)
2532 nodefiles.update((filename,
2533 frozenset(map(operator.attrgetter("name"), filenodes)))
2534 for filename in files)
2536 assert set(nodefiles) == (files_all | files_mc | files_vm)
2538 fileinfo = dict((filename, {}) for filename in nodefiles)
2539 ignore_nodes = set()
2541 for node in nodeinfo:
2543 ignore_nodes.add(node.name)
2546 nresult = all_nvinfo[node.name]
2548 if nresult.fail_msg or not nresult.payload:
2551 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2553 test = not (node_files and isinstance(node_files, dict))
2554 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2555 "Node did not return file checksum data")
2557 ignore_nodes.add(node.name)
2560 # Build per-checksum mapping from filename to nodes having it
2561 for (filename, checksum) in node_files.items():
2562 assert filename in nodefiles
2563 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2565 for (filename, checksums) in fileinfo.items():
2566 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2568 # Nodes having the file
2569 with_file = frozenset(node_name
2570 for nodes in fileinfo[filename].values()
2571 for node_name in nodes) - ignore_nodes
2573 expected_nodes = nodefiles[filename] - ignore_nodes
2575 # Nodes missing file
2576 missing_file = expected_nodes - with_file
2578 if filename in files_opt:
2580 errorif(missing_file and missing_file != expected_nodes,
2581 constants.CV_ECLUSTERFILECHECK, None,
2582 "File %s is optional, but it must exist on all or no"
2583 " nodes (not found on %s)",
2584 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2586 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2587 "File %s is missing from node(s) %s", filename,
2588 utils.CommaJoin(utils.NiceSort(missing_file)))
2590 # Warn if a node has a file it shouldn't
2591 unexpected = with_file - expected_nodes
2593 constants.CV_ECLUSTERFILECHECK, None,
2594 "File %s should not exist on node(s) %s",
2595 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2597 # See if there are multiple versions of the file
2598 test = len(checksums) > 1
2600 variants = ["variant %s on %s" %
2601 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2602 for (idx, (checksum, nodes)) in
2603 enumerate(sorted(checksums.items()))]
2607 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2608 "File %s found with %s different checksums (%s)",
2609 filename, len(checksums), "; ".join(variants))
2611 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2613 """Verifies and the node DRBD status.
2615 @type ninfo: L{objects.Node}
2616 @param ninfo: the node to check
2617 @param nresult: the remote results for the node
2618 @param instanceinfo: the dict of instances
2619 @param drbd_helper: the configured DRBD usermode helper
2620 @param drbd_map: the DRBD map as returned by
2621 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2625 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2628 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2629 test = (helper_result is None)
2630 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2631 "no drbd usermode helper returned")
2633 status, payload = helper_result
2635 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2636 "drbd usermode helper check unsuccessful: %s", payload)
2637 test = status and (payload != drbd_helper)
2638 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2639 "wrong drbd usermode helper: %s", payload)
2641 # compute the DRBD minors
2643 for minor, instance in drbd_map[node].items():
2644 test = instance not in instanceinfo
2645 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2646 "ghost instance '%s' in temporary DRBD map", instance)
2647 # ghost instance should not be running, but otherwise we
2648 # don't give double warnings (both ghost instance and
2649 # unallocated minor in use)
2651 node_drbd[minor] = (instance, False)
2653 instance = instanceinfo[instance]
2654 node_drbd[minor] = (instance.name,
2655 instance.admin_state == constants.ADMINST_UP)
2657 # and now check them
2658 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2659 test = not isinstance(used_minors, (tuple, list))
2660 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2661 "cannot parse drbd status file: %s", str(used_minors))
2663 # we cannot check drbd status
2666 for minor, (iname, must_exist) in node_drbd.items():
2667 test = minor not in used_minors and must_exist
2668 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2669 "drbd minor %d of instance %s is not active", minor, iname)
2670 for minor in used_minors:
2671 test = minor not in node_drbd
2672 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2673 "unallocated drbd minor %d is in use", minor)
2675 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2676 """Builds the node OS structures.
2678 @type ninfo: L{objects.Node}
2679 @param ninfo: the node to check
2680 @param nresult: the remote results for the node
2681 @param nimg: the node image object
2685 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2687 remote_os = nresult.get(constants.NV_OSLIST, None)
2688 test = (not isinstance(remote_os, list) or
2689 not compat.all(isinstance(v, list) and len(v) == 7
2690 for v in remote_os))
2692 _ErrorIf(test, constants.CV_ENODEOS, node,
2693 "node hasn't returned valid OS data")
2702 for (name, os_path, status, diagnose,
2703 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2705 if name not in os_dict:
2708 # parameters is a list of lists instead of list of tuples due to
2709 # JSON lacking a real tuple type, fix it:
2710 parameters = [tuple(v) for v in parameters]
2711 os_dict[name].append((os_path, status, diagnose,
2712 set(variants), set(parameters), set(api_ver)))
2714 nimg.oslist = os_dict
2716 def _VerifyNodeOS(self, ninfo, nimg, base):
2717 """Verifies the node OS list.
2719 @type ninfo: L{objects.Node}
2720 @param ninfo: the node to check
2721 @param nimg: the node image object
2722 @param base: the 'template' node we match against (e.g. from the master)
2726 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2728 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2730 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2731 for os_name, os_data in nimg.oslist.items():
2732 assert os_data, "Empty OS status for OS %s?!" % os_name
2733 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2734 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2735 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2736 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2737 "OS '%s' has multiple entries (first one shadows the rest): %s",
2738 os_name, utils.CommaJoin([v[0] for v in os_data]))
2739 # comparisons with the 'base' image
2740 test = os_name not in base.oslist
2741 _ErrorIf(test, constants.CV_ENODEOS, node,
2742 "Extra OS %s not present on reference node (%s)",
2746 assert base.oslist[os_name], "Base node has empty OS status?"
2747 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2749 # base OS is invalid, skipping
2751 for kind, a, b in [("API version", f_api, b_api),
2752 ("variants list", f_var, b_var),
2753 ("parameters", beautify_params(f_param),
2754 beautify_params(b_param))]:
2755 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2756 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2757 kind, os_name, base.name,
2758 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2760 # check any missing OSes
2761 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2762 _ErrorIf(missing, constants.CV_ENODEOS, node,
2763 "OSes present on reference node %s but missing on this node: %s",
2764 base.name, utils.CommaJoin(missing))
2766 def _VerifyOob(self, ninfo, nresult):
2767 """Verifies out of band functionality of a node.
2769 @type ninfo: L{objects.Node}
2770 @param ninfo: the node to check
2771 @param nresult: the remote results for the node
2775 # We just have to verify the paths on master and/or master candidates
2776 # as the oob helper is invoked on the master
2777 if ((ninfo.master_candidate or ninfo.master_capable) and
2778 constants.NV_OOB_PATHS in nresult):
2779 for path_result in nresult[constants.NV_OOB_PATHS]:
2780 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2782 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2783 """Verifies and updates the node volume data.
2785 This function will update a L{NodeImage}'s internal structures
2786 with data from the remote call.
2788 @type ninfo: L{objects.Node}
2789 @param ninfo: the node to check
2790 @param nresult: the remote results for the node
2791 @param nimg: the node image object
2792 @param vg_name: the configured VG name
2796 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2798 nimg.lvm_fail = True
2799 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2802 elif isinstance(lvdata, basestring):
2803 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2804 utils.SafeEncode(lvdata))
2805 elif not isinstance(lvdata, dict):
2806 _ErrorIf(True, constants.CV_ENODELVM, node,
2807 "rpc call to node failed (lvlist)")
2809 nimg.volumes = lvdata
2810 nimg.lvm_fail = False
2812 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2813 """Verifies and updates the node instance list.
2815 If the listing was successful, then updates this node's instance
2816 list. Otherwise, it marks the RPC call as failed for the instance
2819 @type ninfo: L{objects.Node}
2820 @param ninfo: the node to check
2821 @param nresult: the remote results for the node
2822 @param nimg: the node image object
2825 idata = nresult.get(constants.NV_INSTANCELIST, None)
2826 test = not isinstance(idata, list)
2827 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2828 "rpc call to node failed (instancelist): %s",
2829 utils.SafeEncode(str(idata)))
2831 nimg.hyp_fail = True
2833 nimg.instances = idata
2835 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2836 """Verifies and computes a node information map
2838 @type ninfo: L{objects.Node}
2839 @param ninfo: the node to check
2840 @param nresult: the remote results for the node
2841 @param nimg: the node image object
2842 @param vg_name: the configured VG name
2846 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2848 # try to read free memory (from the hypervisor)
2849 hv_info = nresult.get(constants.NV_HVINFO, None)
2850 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2851 _ErrorIf(test, constants.CV_ENODEHV, node,
2852 "rpc call to node failed (hvinfo)")
2855 nimg.mfree = int(hv_info["memory_free"])
2856 except (ValueError, TypeError):
2857 _ErrorIf(True, constants.CV_ENODERPC, node,
2858 "node returned invalid nodeinfo, check hypervisor")
2860 # FIXME: devise a free space model for file based instances as well
2861 if vg_name is not None:
2862 test = (constants.NV_VGLIST not in nresult or
2863 vg_name not in nresult[constants.NV_VGLIST])
2864 _ErrorIf(test, constants.CV_ENODELVM, node,
2865 "node didn't return data for the volume group '%s'"
2866 " - it is either missing or broken", vg_name)
2869 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2870 except (ValueError, TypeError):
2871 _ErrorIf(True, constants.CV_ENODERPC, node,
2872 "node returned invalid LVM info, check LVM status")
2874 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2875 """Gets per-disk status information for all instances.
2877 @type nodelist: list of strings
2878 @param nodelist: Node names
2879 @type node_image: dict of (name, L{objects.Node})
2880 @param node_image: Node objects
2881 @type instanceinfo: dict of (name, L{objects.Instance})
2882 @param instanceinfo: Instance objects
2883 @rtype: {instance: {node: [(succes, payload)]}}
2884 @return: a dictionary of per-instance dictionaries with nodes as
2885 keys and disk information as values; the disk information is a
2886 list of tuples (success, payload)
2889 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2892 node_disks_devonly = {}
2893 diskless_instances = set()
2894 diskless = constants.DT_DISKLESS
2896 for nname in nodelist:
2897 node_instances = list(itertools.chain(node_image[nname].pinst,
2898 node_image[nname].sinst))
2899 diskless_instances.update(inst for inst in node_instances
2900 if instanceinfo[inst].disk_template == diskless)
2901 disks = [(inst, disk)
2902 for inst in node_instances
2903 for disk in instanceinfo[inst].disks]
2906 # No need to collect data
2909 node_disks[nname] = disks
2911 # _AnnotateDiskParams makes already copies of the disks
2913 for (inst, dev) in disks:
2914 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2915 self.cfg.SetDiskID(anno_disk, nname)
2916 devonly.append(anno_disk)
2918 node_disks_devonly[nname] = devonly
2920 assert len(node_disks) == len(node_disks_devonly)
2922 # Collect data from all nodes with disks
2923 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2926 assert len(result) == len(node_disks)
2930 for (nname, nres) in result.items():
2931 disks = node_disks[nname]
2934 # No data from this node
2935 data = len(disks) * [(False, "node offline")]
2938 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2939 "while getting disk information: %s", msg)
2941 # No data from this node
2942 data = len(disks) * [(False, msg)]
2945 for idx, i in enumerate(nres.payload):
2946 if isinstance(i, (tuple, list)) and len(i) == 2:
2949 logging.warning("Invalid result from node %s, entry %d: %s",
2951 data.append((False, "Invalid result from the remote node"))
2953 for ((inst, _), status) in zip(disks, data):
2954 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2956 # Add empty entries for diskless instances.
2957 for inst in diskless_instances:
2958 assert inst not in instdisk
2961 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2962 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2963 compat.all(isinstance(s, (tuple, list)) and
2964 len(s) == 2 for s in statuses)
2965 for inst, nnames in instdisk.items()
2966 for nname, statuses in nnames.items())
2967 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2972 def _SshNodeSelector(group_uuid, all_nodes):
2973 """Create endless iterators for all potential SSH check hosts.
2976 nodes = [node for node in all_nodes
2977 if (node.group != group_uuid and
2979 keyfunc = operator.attrgetter("group")
2981 return map(itertools.cycle,
2982 [sorted(map(operator.attrgetter("name"), names))
2983 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2987 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2988 """Choose which nodes should talk to which other nodes.
2990 We will make nodes contact all nodes in their group, and one node from
2993 @warning: This algorithm has a known issue if one node group is much
2994 smaller than others (e.g. just one node). In such a case all other
2995 nodes will talk to the single node.
2998 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2999 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3001 return (online_nodes,
3002 dict((name, sorted([i.next() for i in sel]))
3003 for name in online_nodes))
3005 def BuildHooksEnv(self):
3008 Cluster-Verify hooks just ran in the post phase and their failure makes
3009 the output be logged in the verify output and the verification to fail.
3013 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3016 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3017 for node in self.my_node_info.values())
3021 def BuildHooksNodes(self):
3022 """Build hooks nodes.
3025 return ([], self.my_node_names)
3027 def Exec(self, feedback_fn):
3028 """Verify integrity of the node group, performing various test on nodes.
3031 # This method has too many local variables. pylint: disable=R0914
3032 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3034 if not self.my_node_names:
3036 feedback_fn("* Empty node group, skipping verification")
3040 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3041 verbose = self.op.verbose
3042 self._feedback_fn = feedback_fn
3044 vg_name = self.cfg.GetVGName()
3045 drbd_helper = self.cfg.GetDRBDHelper()
3046 cluster = self.cfg.GetClusterInfo()
3047 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3048 hypervisors = cluster.enabled_hypervisors
3049 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3051 i_non_redundant = [] # Non redundant instances
3052 i_non_a_balanced = [] # Non auto-balanced instances
3053 i_offline = 0 # Count of offline instances
3054 n_offline = 0 # Count of offline nodes
3055 n_drained = 0 # Count of nodes being drained
3056 node_vol_should = {}
3058 # FIXME: verify OS list
3061 filemap = _ComputeAncillaryFiles(cluster, False)
3063 # do local checksums
3064 master_node = self.master_node = self.cfg.GetMasterNode()
3065 master_ip = self.cfg.GetMasterIP()
3067 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3070 if self.cfg.GetUseExternalMipScript():
3071 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3073 node_verify_param = {
3074 constants.NV_FILELIST:
3075 utils.UniqueSequence(filename
3076 for files in filemap
3077 for filename in files),
3078 constants.NV_NODELIST:
3079 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3080 self.all_node_info.values()),
3081 constants.NV_HYPERVISOR: hypervisors,
3082 constants.NV_HVPARAMS:
3083 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3084 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3085 for node in node_data_list
3086 if not node.offline],
3087 constants.NV_INSTANCELIST: hypervisors,
3088 constants.NV_VERSION: None,
3089 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3090 constants.NV_NODESETUP: None,
3091 constants.NV_TIME: None,
3092 constants.NV_MASTERIP: (master_node, master_ip),
3093 constants.NV_OSLIST: None,
3094 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3095 constants.NV_USERSCRIPTS: user_scripts,
3098 if vg_name is not None:
3099 node_verify_param[constants.NV_VGLIST] = None
3100 node_verify_param[constants.NV_LVLIST] = vg_name
3101 node_verify_param[constants.NV_PVLIST] = [vg_name]
3102 node_verify_param[constants.NV_DRBDLIST] = None
3105 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3108 # FIXME: this needs to be changed per node-group, not cluster-wide
3110 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3111 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3112 bridges.add(default_nicpp[constants.NIC_LINK])
3113 for instance in self.my_inst_info.values():
3114 for nic in instance.nics:
3115 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3116 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3117 bridges.add(full_nic[constants.NIC_LINK])
3120 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3122 # Build our expected cluster state
3123 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3125 vm_capable=node.vm_capable))
3126 for node in node_data_list)
3130 for node in self.all_node_info.values():
3131 path = _SupportsOob(self.cfg, node)
3132 if path and path not in oob_paths:
3133 oob_paths.append(path)
3136 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3138 for instance in self.my_inst_names:
3139 inst_config = self.my_inst_info[instance]
3140 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3143 for nname in inst_config.all_nodes:
3144 if nname not in node_image:
3145 gnode = self.NodeImage(name=nname)
3146 gnode.ghost = (nname not in self.all_node_info)
3147 node_image[nname] = gnode
3149 inst_config.MapLVsByNode(node_vol_should)
3151 pnode = inst_config.primary_node
3152 node_image[pnode].pinst.append(instance)
3154 for snode in inst_config.secondary_nodes:
3155 nimg = node_image[snode]
3156 nimg.sinst.append(instance)
3157 if pnode not in nimg.sbp:
3158 nimg.sbp[pnode] = []
3159 nimg.sbp[pnode].append(instance)
3161 # At this point, we have the in-memory data structures complete,
3162 # except for the runtime information, which we'll gather next
3164 # Due to the way our RPC system works, exact response times cannot be
3165 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3166 # time before and after executing the request, we can at least have a time
3168 nvinfo_starttime = time.time()
3169 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3171 self.cfg.GetClusterName())
3172 nvinfo_endtime = time.time()
3174 if self.extra_lv_nodes and vg_name is not None:
3176 self.rpc.call_node_verify(self.extra_lv_nodes,
3177 {constants.NV_LVLIST: vg_name},
3178 self.cfg.GetClusterName())
3180 extra_lv_nvinfo = {}
3182 all_drbd_map = self.cfg.ComputeDRBDMap()
3184 feedback_fn("* Gathering disk information (%s nodes)" %
3185 len(self.my_node_names))
3186 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3189 feedback_fn("* Verifying configuration file consistency")
3191 # If not all nodes are being checked, we need to make sure the master node
3192 # and a non-checked vm_capable node are in the list.
3193 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3195 vf_nvinfo = all_nvinfo.copy()
3196 vf_node_info = list(self.my_node_info.values())
3197 additional_nodes = []
3198 if master_node not in self.my_node_info:
3199 additional_nodes.append(master_node)
3200 vf_node_info.append(self.all_node_info[master_node])
3201 # Add the first vm_capable node we find which is not included,
3202 # excluding the master node (which we already have)
3203 for node in absent_nodes:
3204 nodeinfo = self.all_node_info[node]
3205 if (nodeinfo.vm_capable and not nodeinfo.offline and
3206 node != master_node):
3207 additional_nodes.append(node)
3208 vf_node_info.append(self.all_node_info[node])
3210 key = constants.NV_FILELIST
3211 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3212 {key: node_verify_param[key]},
3213 self.cfg.GetClusterName()))
3215 vf_nvinfo = all_nvinfo
3216 vf_node_info = self.my_node_info.values()
3218 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3220 feedback_fn("* Verifying node status")
3224 for node_i in node_data_list:
3226 nimg = node_image[node]
3230 feedback_fn("* Skipping offline node %s" % (node,))
3234 if node == master_node:
3236 elif node_i.master_candidate:
3237 ntype = "master candidate"
3238 elif node_i.drained:
3244 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3246 msg = all_nvinfo[node].fail_msg
3247 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3250 nimg.rpc_fail = True
3253 nresult = all_nvinfo[node].payload
3255 nimg.call_ok = self._VerifyNode(node_i, nresult)
3256 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3257 self._VerifyNodeNetwork(node_i, nresult)
3258 self._VerifyNodeUserScripts(node_i, nresult)
3259 self._VerifyOob(node_i, nresult)
3262 self._VerifyNodeLVM(node_i, nresult, vg_name)
3263 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3266 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3267 self._UpdateNodeInstances(node_i, nresult, nimg)
3268 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3269 self._UpdateNodeOS(node_i, nresult, nimg)
3271 if not nimg.os_fail:
3272 if refos_img is None:
3274 self._VerifyNodeOS(node_i, nimg, refos_img)
3275 self._VerifyNodeBridges(node_i, nresult, bridges)
3277 # Check whether all running instancies are primary for the node. (This
3278 # can no longer be done from _VerifyInstance below, since some of the
3279 # wrong instances could be from other node groups.)
3280 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3282 for inst in non_primary_inst:
3283 test = inst in self.all_inst_info
3284 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3285 "instance should not run on node %s", node_i.name)
3286 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3287 "node is running unknown instance %s", inst)
3289 for node, result in extra_lv_nvinfo.items():
3290 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3291 node_image[node], vg_name)
3293 feedback_fn("* Verifying instance status")
3294 for instance in self.my_inst_names:
3296 feedback_fn("* Verifying instance %s" % instance)
3297 inst_config = self.my_inst_info[instance]
3298 self._VerifyInstance(instance, inst_config, node_image,
3300 inst_nodes_offline = []
3302 pnode = inst_config.primary_node
3303 pnode_img = node_image[pnode]
3304 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3305 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3306 " primary node failed", instance)
3308 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3310 constants.CV_EINSTANCEBADNODE, instance,
3311 "instance is marked as running and lives on offline node %s",
3312 inst_config.primary_node)
3314 # If the instance is non-redundant we cannot survive losing its primary
3315 # node, so we are not N+1 compliant. On the other hand we have no disk
3316 # templates with more than one secondary so that situation is not well
3318 # FIXME: does not support file-backed instances
3319 if not inst_config.secondary_nodes:
3320 i_non_redundant.append(instance)
3322 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3323 constants.CV_EINSTANCELAYOUT,
3324 instance, "instance has multiple secondary nodes: %s",
3325 utils.CommaJoin(inst_config.secondary_nodes),
3326 code=self.ETYPE_WARNING)
3328 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3329 pnode = inst_config.primary_node
3330 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3331 instance_groups = {}
3333 for node in instance_nodes:
3334 instance_groups.setdefault(self.all_node_info[node].group,
3338 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3339 # Sort so that we always list the primary node first.
3340 for group, nodes in sorted(instance_groups.items(),
3341 key=lambda (_, nodes): pnode in nodes,
3344 self._ErrorIf(len(instance_groups) > 1,
3345 constants.CV_EINSTANCESPLITGROUPS,
3346 instance, "instance has primary and secondary nodes in"
3347 " different groups: %s", utils.CommaJoin(pretty_list),
3348 code=self.ETYPE_WARNING)
3350 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3351 i_non_a_balanced.append(instance)
3353 for snode in inst_config.secondary_nodes:
3354 s_img = node_image[snode]
3355 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3356 snode, "instance %s, connection to secondary node failed",
3360 inst_nodes_offline.append(snode)
3362 # warn that the instance lives on offline nodes
3363 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3364 "instance has offline secondary node(s) %s",
3365 utils.CommaJoin(inst_nodes_offline))
3366 # ... or ghost/non-vm_capable nodes
3367 for node in inst_config.all_nodes:
3368 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3369 instance, "instance lives on ghost node %s", node)
3370 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3371 instance, "instance lives on non-vm_capable node %s", node)
3373 feedback_fn("* Verifying orphan volumes")
3374 reserved = utils.FieldSet(*cluster.reserved_lvs)
3376 # We will get spurious "unknown volume" warnings if any node of this group
3377 # is secondary for an instance whose primary is in another group. To avoid
3378 # them, we find these instances and add their volumes to node_vol_should.
3379 for inst in self.all_inst_info.values():
3380 for secondary in inst.secondary_nodes:
3381 if (secondary in self.my_node_info
3382 and inst.name not in self.my_inst_info):
3383 inst.MapLVsByNode(node_vol_should)
3386 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3388 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3389 feedback_fn("* Verifying N+1 Memory redundancy")
3390 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3392 feedback_fn("* Other Notes")
3394 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3395 % len(i_non_redundant))
3397 if i_non_a_balanced:
3398 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3399 % len(i_non_a_balanced))
3402 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3405 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3408 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3412 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3413 """Analyze the post-hooks' result
3415 This method analyses the hook result, handles it, and sends some
3416 nicely-formatted feedback back to the user.
3418 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3419 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3420 @param hooks_results: the results of the multi-node hooks rpc call
3421 @param feedback_fn: function used send feedback back to the caller
3422 @param lu_result: previous Exec result
3423 @return: the new Exec result, based on the previous result
3427 # We only really run POST phase hooks, only for non-empty groups,
3428 # and are only interested in their results
3429 if not self.my_node_names:
3432 elif phase == constants.HOOKS_PHASE_POST:
3433 # Used to change hooks' output to proper indentation
3434 feedback_fn("* Hooks Results")
3435 assert hooks_results, "invalid result from hooks"
3437 for node_name in hooks_results:
3438 res = hooks_results[node_name]
3440 test = msg and not res.offline
3441 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3442 "Communication failure in hooks execution: %s", msg)
3443 if res.offline or msg:
3444 # No need to investigate payload if node is offline or gave
3447 for script, hkr, output in res.payload:
3448 test = hkr == constants.HKR_FAIL
3449 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3450 "Script %s failed, output:", script)
3452 output = self._HOOKS_INDENT_RE.sub(" ", output)
3453 feedback_fn("%s" % output)
3459 class LUClusterVerifyDisks(NoHooksLU):
3460 """Verifies the cluster disks status.
3465 def ExpandNames(self):
3466 self.share_locks = _ShareAll()
3467 self.needed_locks = {
3468 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3471 def Exec(self, feedback_fn):
3472 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3474 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3475 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3476 for group in group_names])
3479 class LUGroupVerifyDisks(NoHooksLU):
3480 """Verifies the status of all disks in a node group.
3485 def ExpandNames(self):
3486 # Raises errors.OpPrereqError on its own if group can't be found
3487 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3489 self.share_locks = _ShareAll()
3490 self.needed_locks = {
3491 locking.LEVEL_INSTANCE: [],
3492 locking.LEVEL_NODEGROUP: [],
3493 locking.LEVEL_NODE: [],
3496 def DeclareLocks(self, level):
3497 if level == locking.LEVEL_INSTANCE:
3498 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3500 # Lock instances optimistically, needs verification once node and group
3501 # locks have been acquired
3502 self.needed_locks[locking.LEVEL_INSTANCE] = \
3503 self.cfg.GetNodeGroupInstances(self.group_uuid)
3505 elif level == locking.LEVEL_NODEGROUP:
3506 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3508 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3509 set([self.group_uuid] +
3510 # Lock all groups used by instances optimistically; this requires
3511 # going via the node before it's locked, requiring verification
3514 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3515 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3517 elif level == locking.LEVEL_NODE:
3518 # This will only lock the nodes in the group to be verified which contain
3520 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3521 self._LockInstancesNodes()
3523 # Lock all nodes in group to be verified
3524 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3525 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3526 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3528 def CheckPrereq(self):
3529 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3530 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3531 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3533 assert self.group_uuid in owned_groups
3535 # Check if locked instances are still correct
3536 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3538 # Get instance information
3539 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3541 # Check if node groups for locked instances are still correct
3542 _CheckInstancesNodeGroups(self.cfg, self.instances,
3543 owned_groups, owned_nodes, self.group_uuid)
3545 def Exec(self, feedback_fn):
3546 """Verify integrity of cluster disks.
3548 @rtype: tuple of three items
3549 @return: a tuple of (dict of node-to-node_error, list of instances
3550 which need activate-disks, dict of instance: (node, volume) for
3555 res_instances = set()
3558 nv_dict = _MapInstanceDisksToNodes(
3559 [inst for inst in self.instances.values()
3560 if inst.admin_state == constants.ADMINST_UP])
3563 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3564 set(self.cfg.GetVmCapableNodeList()))
3566 node_lvs = self.rpc.call_lv_list(nodes, [])
3568 for (node, node_res) in node_lvs.items():
3569 if node_res.offline:
3572 msg = node_res.fail_msg
3574 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3575 res_nodes[node] = msg
3578 for lv_name, (_, _, lv_online) in node_res.payload.items():
3579 inst = nv_dict.pop((node, lv_name), None)
3580 if not (lv_online or inst is None):
3581 res_instances.add(inst)
3583 # any leftover items in nv_dict are missing LVs, let's arrange the data
3585 for key, inst in nv_dict.iteritems():
3586 res_missing.setdefault(inst, []).append(list(key))
3588 return (res_nodes, list(res_instances), res_missing)
3591 class LUClusterRepairDiskSizes(NoHooksLU):
3592 """Verifies the cluster disks sizes.
3597 def ExpandNames(self):
3598 if self.op.instances:
3599 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3600 self.needed_locks = {
3601 locking.LEVEL_NODE_RES: [],
3602 locking.LEVEL_INSTANCE: self.wanted_names,
3604 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3606 self.wanted_names = None
3607 self.needed_locks = {
3608 locking.LEVEL_NODE_RES: locking.ALL_SET,
3609 locking.LEVEL_INSTANCE: locking.ALL_SET,
3611 self.share_locks = {
3612 locking.LEVEL_NODE_RES: 1,
3613 locking.LEVEL_INSTANCE: 0,
3616 def DeclareLocks(self, level):
3617 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3618 self._LockInstancesNodes(primary_only=True, level=level)
3620 def CheckPrereq(self):
3621 """Check prerequisites.
3623 This only checks the optional instance list against the existing names.
3626 if self.wanted_names is None:
3627 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3629 self.wanted_instances = \
3630 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3632 def _EnsureChildSizes(self, disk):
3633 """Ensure children of the disk have the needed disk size.
3635 This is valid mainly for DRBD8 and fixes an issue where the
3636 children have smaller disk size.
3638 @param disk: an L{ganeti.objects.Disk} object
3641 if disk.dev_type == constants.LD_DRBD8:
3642 assert disk.children, "Empty children for DRBD8?"
3643 fchild = disk.children[0]
3644 mismatch = fchild.size < disk.size
3646 self.LogInfo("Child disk has size %d, parent %d, fixing",
3647 fchild.size, disk.size)
3648 fchild.size = disk.size
3650 # and we recurse on this child only, not on the metadev
3651 return self._EnsureChildSizes(fchild) or mismatch
3655 def Exec(self, feedback_fn):
3656 """Verify the size of cluster disks.
3659 # TODO: check child disks too
3660 # TODO: check differences in size between primary/secondary nodes
3662 for instance in self.wanted_instances:
3663 pnode = instance.primary_node
3664 if pnode not in per_node_disks:
3665 per_node_disks[pnode] = []
3666 for idx, disk in enumerate(instance.disks):
3667 per_node_disks[pnode].append((instance, idx, disk))
3669 assert not (frozenset(per_node_disks.keys()) -
3670 self.owned_locks(locking.LEVEL_NODE_RES)), \
3671 "Not owning correct locks"
3672 assert not self.owned_locks(locking.LEVEL_NODE)
3675 for node, dskl in per_node_disks.items():
3676 newl = [v[2].Copy() for v in dskl]
3678 self.cfg.SetDiskID(dsk, node)
3679 result = self.rpc.call_blockdev_getsize(node, newl)
3681 self.LogWarning("Failure in blockdev_getsize call to node"
3682 " %s, ignoring", node)
3684 if len(result.payload) != len(dskl):
3685 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3686 " result.payload=%s", node, len(dskl), result.payload)
3687 self.LogWarning("Invalid result from node %s, ignoring node results",
3690 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3692 self.LogWarning("Disk %d of instance %s did not return size"
3693 " information, ignoring", idx, instance.name)
3695 if not isinstance(size, (int, long)):
3696 self.LogWarning("Disk %d of instance %s did not return valid"
3697 " size information, ignoring", idx, instance.name)
3700 if size != disk.size:
3701 self.LogInfo("Disk %d of instance %s has mismatched size,"
3702 " correcting: recorded %d, actual %d", idx,
3703 instance.name, disk.size, size)
3705 self.cfg.Update(instance, feedback_fn)
3706 changed.append((instance.name, idx, size))
3707 if self._EnsureChildSizes(disk):
3708 self.cfg.Update(instance, feedback_fn)
3709 changed.append((instance.name, idx, disk.size))
3713 class LUClusterRename(LogicalUnit):
3714 """Rename the cluster.
3717 HPATH = "cluster-rename"
3718 HTYPE = constants.HTYPE_CLUSTER
3720 def BuildHooksEnv(self):
3725 "OP_TARGET": self.cfg.GetClusterName(),
3726 "NEW_NAME": self.op.name,
3729 def BuildHooksNodes(self):
3730 """Build hooks nodes.
3733 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3735 def CheckPrereq(self):
3736 """Verify that the passed name is a valid one.
3739 hostname = netutils.GetHostname(name=self.op.name,
3740 family=self.cfg.GetPrimaryIPFamily())
3742 new_name = hostname.name
3743 self.ip = new_ip = hostname.ip
3744 old_name = self.cfg.GetClusterName()
3745 old_ip = self.cfg.GetMasterIP()
3746 if new_name == old_name and new_ip == old_ip:
3747 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3748 " cluster has changed",
3750 if new_ip != old_ip:
3751 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3752 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3753 " reachable on the network" %
3754 new_ip, errors.ECODE_NOTUNIQUE)
3756 self.op.name = new_name
3758 def Exec(self, feedback_fn):
3759 """Rename the cluster.
3762 clustername = self.op.name
3765 # shutdown the master IP
3766 master_params = self.cfg.GetMasterNetworkParameters()
3767 ems = self.cfg.GetUseExternalMipScript()
3768 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3770 result.Raise("Could not disable the master role")
3773 cluster = self.cfg.GetClusterInfo()
3774 cluster.cluster_name = clustername
3775 cluster.master_ip = new_ip
3776 self.cfg.Update(cluster, feedback_fn)
3778 # update the known hosts file
3779 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3780 node_list = self.cfg.GetOnlineNodeList()
3782 node_list.remove(master_params.name)
3785 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3787 master_params.ip = new_ip
3788 result = self.rpc.call_node_activate_master_ip(master_params.name,
3790 msg = result.fail_msg
3792 self.LogWarning("Could not re-enable the master role on"
3793 " the master, please restart manually: %s", msg)
3798 def _ValidateNetmask(cfg, netmask):
3799 """Checks if a netmask is valid.
3801 @type cfg: L{config.ConfigWriter}
3802 @param cfg: The cluster configuration
3804 @param netmask: the netmask to be verified
3805 @raise errors.OpPrereqError: if the validation fails
3808 ip_family = cfg.GetPrimaryIPFamily()
3810 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3811 except errors.ProgrammerError:
3812 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3813 ip_family, errors.ECODE_INVAL)
3814 if not ipcls.ValidateNetmask(netmask):
3815 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3816 (netmask), errors.ECODE_INVAL)
3819 class LUClusterSetParams(LogicalUnit):
3820 """Change the parameters of the cluster.
3823 HPATH = "cluster-modify"
3824 HTYPE = constants.HTYPE_CLUSTER
3827 def CheckArguments(self):
3831 if self.op.uid_pool:
3832 uidpool.CheckUidPool(self.op.uid_pool)
3834 if self.op.add_uids:
3835 uidpool.CheckUidPool(self.op.add_uids)
3837 if self.op.remove_uids:
3838 uidpool.CheckUidPool(self.op.remove_uids)
3840 if self.op.master_netmask is not None:
3841 _ValidateNetmask(self.cfg, self.op.master_netmask)
3843 if self.op.diskparams:
3844 for dt_params in self.op.diskparams.values():
3845 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3847 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3848 except errors.OpPrereqError, err:
3849 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3852 def ExpandNames(self):
3853 # FIXME: in the future maybe other cluster params won't require checking on
3854 # all nodes to be modified.
3855 self.needed_locks = {
3856 locking.LEVEL_NODE: locking.ALL_SET,
3857 locking.LEVEL_INSTANCE: locking.ALL_SET,
3858 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3860 self.share_locks = {
3861 locking.LEVEL_NODE: 1,
3862 locking.LEVEL_INSTANCE: 1,
3863 locking.LEVEL_NODEGROUP: 1,
3866 def BuildHooksEnv(self):
3871 "OP_TARGET": self.cfg.GetClusterName(),
3872 "NEW_VG_NAME": self.op.vg_name,
3875 def BuildHooksNodes(self):
3876 """Build hooks nodes.
3879 mn = self.cfg.GetMasterNode()
3882 def CheckPrereq(self):
3883 """Check prerequisites.
3885 This checks whether the given params don't conflict and
3886 if the given volume group is valid.
3889 if self.op.vg_name is not None and not self.op.vg_name:
3890 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3891 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3892 " instances exist", errors.ECODE_INVAL)
3894 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3895 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3896 raise errors.OpPrereqError("Cannot disable drbd helper while"
3897 " drbd-based instances exist",
3900 node_list = self.owned_locks(locking.LEVEL_NODE)
3902 # if vg_name not None, checks given volume group on all nodes
3904 vglist = self.rpc.call_vg_list(node_list)
3905 for node in node_list:
3906 msg = vglist[node].fail_msg
3908 # ignoring down node
3909 self.LogWarning("Error while gathering data on node %s"
3910 " (ignoring node): %s", node, msg)
3912 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3914 constants.MIN_VG_SIZE)
3916 raise errors.OpPrereqError("Error on node '%s': %s" %
3917 (node, vgstatus), errors.ECODE_ENVIRON)
3919 if self.op.drbd_helper:
3920 # checks given drbd helper on all nodes
3921 helpers = self.rpc.call_drbd_helper(node_list)
3922 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3924 self.LogInfo("Not checking drbd helper on offline node %s", node)
3926 msg = helpers[node].fail_msg
3928 raise errors.OpPrereqError("Error checking drbd helper on node"
3929 " '%s': %s" % (node, msg),
3930 errors.ECODE_ENVIRON)
3931 node_helper = helpers[node].payload
3932 if node_helper != self.op.drbd_helper:
3933 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3934 (node, node_helper), errors.ECODE_ENVIRON)
3936 self.cluster = cluster = self.cfg.GetClusterInfo()
3937 # validate params changes
3938 if self.op.beparams:
3939 objects.UpgradeBeParams(self.op.beparams)
3940 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3941 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3943 if self.op.ndparams:
3944 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3945 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3947 # TODO: we need a more general way to handle resetting
3948 # cluster-level parameters to default values
3949 if self.new_ndparams["oob_program"] == "":
3950 self.new_ndparams["oob_program"] = \
3951 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3953 if self.op.hv_state:
3954 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3955 self.cluster.hv_state_static)
3956 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3957 for hv, values in new_hv_state.items())
3959 if self.op.disk_state:
3960 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3961 self.cluster.disk_state_static)
3962 self.new_disk_state = \
3963 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3964 for name, values in svalues.items()))
3965 for storage, svalues in new_disk_state.items())
3968 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3971 all_instances = self.cfg.GetAllInstancesInfo().values()
3973 for group in self.cfg.GetAllNodeGroupsInfo().values():
3974 instances = frozenset([inst for inst in all_instances
3975 if compat.any(node in group.members
3976 for node in inst.all_nodes)])
3977 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3978 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3979 new = _ComputeNewInstanceViolations(ipol,
3980 new_ipolicy, instances)
3982 violations.update(new)
3985 self.LogWarning("After the ipolicy change the following instances"
3986 " violate them: %s",
3987 utils.CommaJoin(utils.NiceSort(violations)))
3989 if self.op.nicparams:
3990 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3991 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3992 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3995 # check all instances for consistency
3996 for instance in self.cfg.GetAllInstancesInfo().values():
3997 for nic_idx, nic in enumerate(instance.nics):
3998 params_copy = copy.deepcopy(nic.nicparams)
3999 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4001 # check parameter syntax
4003 objects.NIC.CheckParameterSyntax(params_filled)
4004 except errors.ConfigurationError, err:
4005 nic_errors.append("Instance %s, nic/%d: %s" %
4006 (instance.name, nic_idx, err))
4008 # if we're moving instances to routed, check that they have an ip
4009 target_mode = params_filled[constants.NIC_MODE]
4010 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4011 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4012 " address" % (instance.name, nic_idx))
4014 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4015 "\n".join(nic_errors), errors.ECODE_INVAL)
4017 # hypervisor list/parameters
4018 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4019 if self.op.hvparams:
4020 for hv_name, hv_dict in self.op.hvparams.items():
4021 if hv_name not in self.new_hvparams:
4022 self.new_hvparams[hv_name] = hv_dict
4024 self.new_hvparams[hv_name].update(hv_dict)
4026 # disk template parameters
4027 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4028 if self.op.diskparams:
4029 for dt_name, dt_params in self.op.diskparams.items():
4030 if dt_name not in self.op.diskparams:
4031 self.new_diskparams[dt_name] = dt_params
4033 self.new_diskparams[dt_name].update(dt_params)
4035 # os hypervisor parameters
4036 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4038 for os_name, hvs in self.op.os_hvp.items():
4039 if os_name not in self.new_os_hvp:
4040 self.new_os_hvp[os_name] = hvs
4042 for hv_name, hv_dict in hvs.items():
4043 if hv_name not in self.new_os_hvp[os_name]:
4044 self.new_os_hvp[os_name][hv_name] = hv_dict
4046 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4049 self.new_osp = objects.FillDict(cluster.osparams, {})
4050 if self.op.osparams:
4051 for os_name, osp in self.op.osparams.items():
4052 if os_name not in self.new_osp:
4053 self.new_osp[os_name] = {}
4055 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4058 if not self.new_osp[os_name]:
4059 # we removed all parameters
4060 del self.new_osp[os_name]
4062 # check the parameter validity (remote check)
4063 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4064 os_name, self.new_osp[os_name])
4066 # changes to the hypervisor list
4067 if self.op.enabled_hypervisors is not None:
4068 self.hv_list = self.op.enabled_hypervisors
4069 for hv in self.hv_list:
4070 # if the hypervisor doesn't already exist in the cluster
4071 # hvparams, we initialize it to empty, and then (in both
4072 # cases) we make sure to fill the defaults, as we might not
4073 # have a complete defaults list if the hypervisor wasn't
4075 if hv not in new_hvp:
4077 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4078 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4080 self.hv_list = cluster.enabled_hypervisors
4082 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4083 # either the enabled list has changed, or the parameters have, validate
4084 for hv_name, hv_params in self.new_hvparams.items():
4085 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4086 (self.op.enabled_hypervisors and
4087 hv_name in self.op.enabled_hypervisors)):
4088 # either this is a new hypervisor, or its parameters have changed
4089 hv_class = hypervisor.GetHypervisor(hv_name)
4090 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4091 hv_class.CheckParameterSyntax(hv_params)
4092 _CheckHVParams(self, node_list, hv_name, hv_params)
4095 # no need to check any newly-enabled hypervisors, since the
4096 # defaults have already been checked in the above code-block
4097 for os_name, os_hvp in self.new_os_hvp.items():
4098 for hv_name, hv_params in os_hvp.items():
4099 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4100 # we need to fill in the new os_hvp on top of the actual hv_p
4101 cluster_defaults = self.new_hvparams.get(hv_name, {})
4102 new_osp = objects.FillDict(cluster_defaults, hv_params)
4103 hv_class = hypervisor.GetHypervisor(hv_name)
4104 hv_class.CheckParameterSyntax(new_osp)
4105 _CheckHVParams(self, node_list, hv_name, new_osp)
4107 if self.op.default_iallocator:
4108 alloc_script = utils.FindFile(self.op.default_iallocator,
4109 constants.IALLOCATOR_SEARCH_PATH,
4111 if alloc_script is None:
4112 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4113 " specified" % self.op.default_iallocator,
4116 def Exec(self, feedback_fn):
4117 """Change the parameters of the cluster.
4120 if self.op.vg_name is not None:
4121 new_volume = self.op.vg_name
4124 if new_volume != self.cfg.GetVGName():
4125 self.cfg.SetVGName(new_volume)
4127 feedback_fn("Cluster LVM configuration already in desired"
4128 " state, not changing")
4129 if self.op.drbd_helper is not None:
4130 new_helper = self.op.drbd_helper
4133 if new_helper != self.cfg.GetDRBDHelper():
4134 self.cfg.SetDRBDHelper(new_helper)
4136 feedback_fn("Cluster DRBD helper already in desired state,"
4138 if self.op.hvparams:
4139 self.cluster.hvparams = self.new_hvparams
4141 self.cluster.os_hvp = self.new_os_hvp
4142 if self.op.enabled_hypervisors is not None:
4143 self.cluster.hvparams = self.new_hvparams
4144 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4145 if self.op.beparams:
4146 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4147 if self.op.nicparams:
4148 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4150 self.cluster.ipolicy = self.new_ipolicy
4151 if self.op.osparams:
4152 self.cluster.osparams = self.new_osp
4153 if self.op.ndparams:
4154 self.cluster.ndparams = self.new_ndparams
4155 if self.op.diskparams:
4156 self.cluster.diskparams = self.new_diskparams
4157 if self.op.hv_state:
4158 self.cluster.hv_state_static = self.new_hv_state
4159 if self.op.disk_state:
4160 self.cluster.disk_state_static = self.new_disk_state
4162 if self.op.candidate_pool_size is not None:
4163 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4164 # we need to update the pool size here, otherwise the save will fail
4165 _AdjustCandidatePool(self, [])
4167 if self.op.maintain_node_health is not None:
4168 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4169 feedback_fn("Note: CONFD was disabled at build time, node health"
4170 " maintenance is not useful (still enabling it)")
4171 self.cluster.maintain_node_health = self.op.maintain_node_health
4173 if self.op.prealloc_wipe_disks is not None:
4174 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4176 if self.op.add_uids is not None:
4177 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4179 if self.op.remove_uids is not None:
4180 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4182 if self.op.uid_pool is not None:
4183 self.cluster.uid_pool = self.op.uid_pool
4185 if self.op.default_iallocator is not None:
4186 self.cluster.default_iallocator = self.op.default_iallocator
4188 if self.op.reserved_lvs is not None:
4189 self.cluster.reserved_lvs = self.op.reserved_lvs
4191 if self.op.use_external_mip_script is not None:
4192 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4194 def helper_os(aname, mods, desc):
4196 lst = getattr(self.cluster, aname)
4197 for key, val in mods:
4198 if key == constants.DDM_ADD:
4200 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4203 elif key == constants.DDM_REMOVE:
4207 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4209 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4211 if self.op.hidden_os:
4212 helper_os("hidden_os", self.op.hidden_os, "hidden")
4214 if self.op.blacklisted_os:
4215 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4217 if self.op.master_netdev:
4218 master_params = self.cfg.GetMasterNetworkParameters()
4219 ems = self.cfg.GetUseExternalMipScript()
4220 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4221 self.cluster.master_netdev)
4222 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4224 result.Raise("Could not disable the master ip")
4225 feedback_fn("Changing master_netdev from %s to %s" %
4226 (master_params.netdev, self.op.master_netdev))
4227 self.cluster.master_netdev = self.op.master_netdev
4229 if self.op.master_netmask:
4230 master_params = self.cfg.GetMasterNetworkParameters()
4231 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4232 result = self.rpc.call_node_change_master_netmask(master_params.name,
4233 master_params.netmask,
4234 self.op.master_netmask,
4236 master_params.netdev)
4238 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4241 self.cluster.master_netmask = self.op.master_netmask
4243 self.cfg.Update(self.cluster, feedback_fn)
4245 if self.op.master_netdev:
4246 master_params = self.cfg.GetMasterNetworkParameters()
4247 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4248 self.op.master_netdev)
4249 ems = self.cfg.GetUseExternalMipScript()
4250 result = self.rpc.call_node_activate_master_ip(master_params.name,
4253 self.LogWarning("Could not re-enable the master ip on"
4254 " the master, please restart manually: %s",
4258 def _UploadHelper(lu, nodes, fname):
4259 """Helper for uploading a file and showing warnings.
4262 if os.path.exists(fname):
4263 result = lu.rpc.call_upload_file(nodes, fname)
4264 for to_node, to_result in result.items():
4265 msg = to_result.fail_msg
4267 msg = ("Copy of file %s to node %s failed: %s" %
4268 (fname, to_node, msg))
4269 lu.proc.LogWarning(msg)
4272 def _ComputeAncillaryFiles(cluster, redist):
4273 """Compute files external to Ganeti which need to be consistent.
4275 @type redist: boolean
4276 @param redist: Whether to include files which need to be redistributed
4279 # Compute files for all nodes
4281 constants.SSH_KNOWN_HOSTS_FILE,
4282 constants.CONFD_HMAC_KEY,
4283 constants.CLUSTER_DOMAIN_SECRET_FILE,
4284 constants.SPICE_CERT_FILE,
4285 constants.SPICE_CACERT_FILE,
4286 constants.RAPI_USERS_FILE,
4290 files_all.update(constants.ALL_CERT_FILES)
4291 files_all.update(ssconf.SimpleStore().GetFileList())
4293 # we need to ship at least the RAPI certificate
4294 files_all.add(constants.RAPI_CERT_FILE)
4296 if cluster.modify_etc_hosts:
4297 files_all.add(constants.ETC_HOSTS)
4299 if cluster.use_external_mip_script:
4300 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4302 # Files which are optional, these must:
4303 # - be present in one other category as well
4304 # - either exist or not exist on all nodes of that category (mc, vm all)
4306 constants.RAPI_USERS_FILE,
4309 # Files which should only be on master candidates
4313 files_mc.add(constants.CLUSTER_CONF_FILE)
4315 # Files which should only be on VM-capable nodes
4318 for hv_name in cluster.enabled_hypervisors
4319 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4323 for hv_name in cluster.enabled_hypervisors
4324 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4326 # Filenames in each category must be unique
4327 all_files_set = files_all | files_mc | files_vm
4328 assert (len(all_files_set) ==
4329 sum(map(len, [files_all, files_mc, files_vm]))), \
4330 "Found file listed in more than one file list"
4332 # Optional files must be present in one other category
4333 assert all_files_set.issuperset(files_opt), \
4334 "Optional file not in a different required list"
4336 return (files_all, files_opt, files_mc, files_vm)
4339 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4340 """Distribute additional files which are part of the cluster configuration.
4342 ConfigWriter takes care of distributing the config and ssconf files, but
4343 there are more files which should be distributed to all nodes. This function
4344 makes sure those are copied.
4346 @param lu: calling logical unit
4347 @param additional_nodes: list of nodes not in the config to distribute to
4348 @type additional_vm: boolean
4349 @param additional_vm: whether the additional nodes are vm-capable or not
4352 # Gather target nodes
4353 cluster = lu.cfg.GetClusterInfo()
4354 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4356 online_nodes = lu.cfg.GetOnlineNodeList()
4357 online_set = frozenset(online_nodes)
4358 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4360 if additional_nodes is not None:
4361 online_nodes.extend(additional_nodes)
4363 vm_nodes.extend(additional_nodes)
4365 # Never distribute to master node
4366 for nodelist in [online_nodes, vm_nodes]:
4367 if master_info.name in nodelist:
4368 nodelist.remove(master_info.name)
4371 (files_all, _, files_mc, files_vm) = \
4372 _ComputeAncillaryFiles(cluster, True)
4374 # Never re-distribute configuration file from here
4375 assert not (constants.CLUSTER_CONF_FILE in files_all or
4376 constants.CLUSTER_CONF_FILE in files_vm)
4377 assert not files_mc, "Master candidates not handled in this function"
4380 (online_nodes, files_all),
4381 (vm_nodes, files_vm),
4385 for (node_list, files) in filemap:
4387 _UploadHelper(lu, node_list, fname)
4390 class LUClusterRedistConf(NoHooksLU):
4391 """Force the redistribution of cluster configuration.
4393 This is a very simple LU.
4398 def ExpandNames(self):
4399 self.needed_locks = {
4400 locking.LEVEL_NODE: locking.ALL_SET,
4402 self.share_locks[locking.LEVEL_NODE] = 1
4404 def Exec(self, feedback_fn):
4405 """Redistribute the configuration.
4408 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4409 _RedistributeAncillaryFiles(self)
4412 class LUClusterActivateMasterIp(NoHooksLU):
4413 """Activate the master IP on the master node.
4416 def Exec(self, feedback_fn):
4417 """Activate the master IP.
4420 master_params = self.cfg.GetMasterNetworkParameters()
4421 ems = self.cfg.GetUseExternalMipScript()
4422 result = self.rpc.call_node_activate_master_ip(master_params.name,
4424 result.Raise("Could not activate the master IP")
4427 class LUClusterDeactivateMasterIp(NoHooksLU):
4428 """Deactivate the master IP on the master node.
4431 def Exec(self, feedback_fn):
4432 """Deactivate the master IP.
4435 master_params = self.cfg.GetMasterNetworkParameters()
4436 ems = self.cfg.GetUseExternalMipScript()
4437 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4439 result.Raise("Could not deactivate the master IP")
4442 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4443 """Sleep and poll for an instance's disk to sync.
4446 if not instance.disks or disks is not None and not disks:
4449 disks = _ExpandCheckDisks(instance, disks)
4452 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4454 node = instance.primary_node
4457 lu.cfg.SetDiskID(dev, node)
4459 # TODO: Convert to utils.Retry
4462 degr_retries = 10 # in seconds, as we sleep 1 second each time
4466 cumul_degraded = False
4467 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4468 msg = rstats.fail_msg
4470 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4473 raise errors.RemoteError("Can't contact node %s for mirror data,"
4474 " aborting." % node)
4477 rstats = rstats.payload
4479 for i, mstat in enumerate(rstats):
4481 lu.LogWarning("Can't compute data for node %s/%s",
4482 node, disks[i].iv_name)
4485 cumul_degraded = (cumul_degraded or
4486 (mstat.is_degraded and mstat.sync_percent is None))
4487 if mstat.sync_percent is not None:
4489 if mstat.estimated_time is not None:
4490 rem_time = ("%s remaining (estimated)" %
4491 utils.FormatSeconds(mstat.estimated_time))
4492 max_time = mstat.estimated_time
4494 rem_time = "no time estimate"
4495 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4496 (disks[i].iv_name, mstat.sync_percent, rem_time))
4498 # if we're done but degraded, let's do a few small retries, to
4499 # make sure we see a stable and not transient situation; therefore
4500 # we force restart of the loop
4501 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4502 logging.info("Degraded disks found, %d retries left", degr_retries)
4510 time.sleep(min(60, max_time))
4513 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4514 return not cumul_degraded
4517 def _BlockdevFind(lu, node, dev, instance):
4518 """Wrapper around call_blockdev_find to annotate diskparams.
4520 @param lu: A reference to the lu object
4521 @param node: The node to call out
4522 @param dev: The device to find
4523 @param instance: The instance object the device belongs to
4524 @returns The result of the rpc call
4527 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4528 return lu.rpc.call_blockdev_find(node, disk)
4531 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4532 """Wrapper around L{_CheckDiskConsistencyInner}.
4535 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4536 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4540 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4542 """Check that mirrors are not degraded.
4544 @attention: The device has to be annotated already.
4546 The ldisk parameter, if True, will change the test from the
4547 is_degraded attribute (which represents overall non-ok status for
4548 the device(s)) to the ldisk (representing the local storage status).
4551 lu.cfg.SetDiskID(dev, node)
4555 if on_primary or dev.AssembleOnSecondary():
4556 rstats = lu.rpc.call_blockdev_find(node, dev)
4557 msg = rstats.fail_msg
4559 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4561 elif not rstats.payload:
4562 lu.LogWarning("Can't find disk on node %s", node)
4566 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4568 result = result and not rstats.payload.is_degraded
4571 for child in dev.children:
4572 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4578 class LUOobCommand(NoHooksLU):
4579 """Logical unit for OOB handling.
4583 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4585 def ExpandNames(self):
4586 """Gather locks we need.
4589 if self.op.node_names:
4590 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4591 lock_names = self.op.node_names
4593 lock_names = locking.ALL_SET
4595 self.needed_locks = {
4596 locking.LEVEL_NODE: lock_names,
4599 def CheckPrereq(self):
4600 """Check prerequisites.
4603 - the node exists in the configuration
4606 Any errors are signaled by raising errors.OpPrereqError.
4610 self.master_node = self.cfg.GetMasterNode()
4612 assert self.op.power_delay >= 0.0
4614 if self.op.node_names:
4615 if (self.op.command in self._SKIP_MASTER and
4616 self.master_node in self.op.node_names):
4617 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4618 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4620 if master_oob_handler:
4621 additional_text = ("run '%s %s %s' if you want to operate on the"
4622 " master regardless") % (master_oob_handler,
4626 additional_text = "it does not support out-of-band operations"
4628 raise errors.OpPrereqError(("Operating on the master node %s is not"
4629 " allowed for %s; %s") %
4630 (self.master_node, self.op.command,
4631 additional_text), errors.ECODE_INVAL)
4633 self.op.node_names = self.cfg.GetNodeList()
4634 if self.op.command in self._SKIP_MASTER:
4635 self.op.node_names.remove(self.master_node)
4637 if self.op.command in self._SKIP_MASTER:
4638 assert self.master_node not in self.op.node_names
4640 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4642 raise errors.OpPrereqError("Node %s not found" % node_name,
4645 self.nodes.append(node)
4647 if (not self.op.ignore_status and
4648 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4649 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4650 " not marked offline") % node_name,
4653 def Exec(self, feedback_fn):
4654 """Execute OOB and return result if we expect any.
4657 master_node = self.master_node
4660 for idx, node in enumerate(utils.NiceSort(self.nodes,
4661 key=lambda node: node.name)):
4662 node_entry = [(constants.RS_NORMAL, node.name)]
4663 ret.append(node_entry)
4665 oob_program = _SupportsOob(self.cfg, node)
4668 node_entry.append((constants.RS_UNAVAIL, None))
4671 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4672 self.op.command, oob_program, node.name)
4673 result = self.rpc.call_run_oob(master_node, oob_program,
4674 self.op.command, node.name,
4678 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4679 node.name, result.fail_msg)
4680 node_entry.append((constants.RS_NODATA, None))
4683 self._CheckPayload(result)
4684 except errors.OpExecError, err:
4685 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4687 node_entry.append((constants.RS_NODATA, None))
4689 if self.op.command == constants.OOB_HEALTH:
4690 # For health we should log important events
4691 for item, status in result.payload:
4692 if status in [constants.OOB_STATUS_WARNING,
4693 constants.OOB_STATUS_CRITICAL]:
4694 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4695 item, node.name, status)
4697 if self.op.command == constants.OOB_POWER_ON:
4699 elif self.op.command == constants.OOB_POWER_OFF:
4700 node.powered = False
4701 elif self.op.command == constants.OOB_POWER_STATUS:
4702 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4703 if powered != node.powered:
4704 logging.warning(("Recorded power state (%s) of node '%s' does not"
4705 " match actual power state (%s)"), node.powered,
4708 # For configuration changing commands we should update the node
4709 if self.op.command in (constants.OOB_POWER_ON,
4710 constants.OOB_POWER_OFF):
4711 self.cfg.Update(node, feedback_fn)
4713 node_entry.append((constants.RS_NORMAL, result.payload))
4715 if (self.op.command == constants.OOB_POWER_ON and
4716 idx < len(self.nodes) - 1):
4717 time.sleep(self.op.power_delay)
4721 def _CheckPayload(self, result):
4722 """Checks if the payload is valid.
4724 @param result: RPC result
4725 @raises errors.OpExecError: If payload is not valid
4729 if self.op.command == constants.OOB_HEALTH:
4730 if not isinstance(result.payload, list):
4731 errs.append("command 'health' is expected to return a list but got %s" %
4732 type(result.payload))
4734 for item, status in result.payload:
4735 if status not in constants.OOB_STATUSES:
4736 errs.append("health item '%s' has invalid status '%s'" %
4739 if self.op.command == constants.OOB_POWER_STATUS:
4740 if not isinstance(result.payload, dict):
4741 errs.append("power-status is expected to return a dict but got %s" %
4742 type(result.payload))
4744 if self.op.command in [
4745 constants.OOB_POWER_ON,
4746 constants.OOB_POWER_OFF,
4747 constants.OOB_POWER_CYCLE,
4749 if result.payload is not None:
4750 errs.append("%s is expected to not return payload but got '%s'" %
4751 (self.op.command, result.payload))
4754 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4755 utils.CommaJoin(errs))
4758 class _OsQuery(_QueryBase):
4759 FIELDS = query.OS_FIELDS
4761 def ExpandNames(self, lu):
4762 # Lock all nodes in shared mode
4763 # Temporary removal of locks, should be reverted later
4764 # TODO: reintroduce locks when they are lighter-weight
4765 lu.needed_locks = {}
4766 #self.share_locks[locking.LEVEL_NODE] = 1
4767 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4769 # The following variables interact with _QueryBase._GetNames
4771 self.wanted = self.names
4773 self.wanted = locking.ALL_SET
4775 self.do_locking = self.use_locking
4777 def DeclareLocks(self, lu, level):
4781 def _DiagnoseByOS(rlist):
4782 """Remaps a per-node return list into an a per-os per-node dictionary
4784 @param rlist: a map with node names as keys and OS objects as values
4787 @return: a dictionary with osnames as keys and as value another
4788 map, with nodes as keys and tuples of (path, status, diagnose,
4789 variants, parameters, api_versions) as values, eg::
4791 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4792 (/srv/..., False, "invalid api")],
4793 "node2": [(/srv/..., True, "", [], [])]}
4798 # we build here the list of nodes that didn't fail the RPC (at RPC
4799 # level), so that nodes with a non-responding node daemon don't
4800 # make all OSes invalid
4801 good_nodes = [node_name for node_name in rlist
4802 if not rlist[node_name].fail_msg]
4803 for node_name, nr in rlist.items():
4804 if nr.fail_msg or not nr.payload:
4806 for (name, path, status, diagnose, variants,
4807 params, api_versions) in nr.payload:
4808 if name not in all_os:
4809 # build a list of nodes for this os containing empty lists
4810 # for each node in node_list
4812 for nname in good_nodes:
4813 all_os[name][nname] = []
4814 # convert params from [name, help] to (name, help)
4815 params = [tuple(v) for v in params]
4816 all_os[name][node_name].append((path, status, diagnose,
4817 variants, params, api_versions))
4820 def _GetQueryData(self, lu):
4821 """Computes the list of nodes and their attributes.
4824 # Locking is not used
4825 assert not (compat.any(lu.glm.is_owned(level)
4826 for level in locking.LEVELS
4827 if level != locking.LEVEL_CLUSTER) or
4828 self.do_locking or self.use_locking)
4830 valid_nodes = [node.name
4831 for node in lu.cfg.GetAllNodesInfo().values()
4832 if not node.offline and node.vm_capable]
4833 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4834 cluster = lu.cfg.GetClusterInfo()
4838 for (os_name, os_data) in pol.items():
4839 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4840 hidden=(os_name in cluster.hidden_os),
4841 blacklisted=(os_name in cluster.blacklisted_os))
4845 api_versions = set()
4847 for idx, osl in enumerate(os_data.values()):
4848 info.valid = bool(info.valid and osl and osl[0][1])
4852 (node_variants, node_params, node_api) = osl[0][3:6]
4855 variants.update(node_variants)
4856 parameters.update(node_params)
4857 api_versions.update(node_api)
4859 # Filter out inconsistent values
4860 variants.intersection_update(node_variants)
4861 parameters.intersection_update(node_params)
4862 api_versions.intersection_update(node_api)
4864 info.variants = list(variants)
4865 info.parameters = list(parameters)
4866 info.api_versions = list(api_versions)
4868 data[os_name] = info
4870 # Prepare data in requested order
4871 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4875 class LUOsDiagnose(NoHooksLU):
4876 """Logical unit for OS diagnose/query.
4882 def _BuildFilter(fields, names):
4883 """Builds a filter for querying OSes.
4886 name_filter = qlang.MakeSimpleFilter("name", names)
4888 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4889 # respective field is not requested
4890 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4891 for fname in ["hidden", "blacklisted"]
4892 if fname not in fields]
4893 if "valid" not in fields:
4894 status_filter.append([qlang.OP_TRUE, "valid"])
4897 status_filter.insert(0, qlang.OP_AND)
4899 status_filter = None
4901 if name_filter and status_filter:
4902 return [qlang.OP_AND, name_filter, status_filter]
4906 return status_filter
4908 def CheckArguments(self):
4909 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4910 self.op.output_fields, False)
4912 def ExpandNames(self):
4913 self.oq.ExpandNames(self)
4915 def Exec(self, feedback_fn):
4916 return self.oq.OldStyleQuery(self)
4919 class LUNodeRemove(LogicalUnit):
4920 """Logical unit for removing a node.
4923 HPATH = "node-remove"
4924 HTYPE = constants.HTYPE_NODE
4926 def BuildHooksEnv(self):
4931 "OP_TARGET": self.op.node_name,
4932 "NODE_NAME": self.op.node_name,
4935 def BuildHooksNodes(self):
4936 """Build hooks nodes.
4938 This doesn't run on the target node in the pre phase as a failed
4939 node would then be impossible to remove.
4942 all_nodes = self.cfg.GetNodeList()
4944 all_nodes.remove(self.op.node_name)
4947 return (all_nodes, all_nodes)
4949 def CheckPrereq(self):
4950 """Check prerequisites.
4953 - the node exists in the configuration
4954 - it does not have primary or secondary instances
4955 - it's not the master
4957 Any errors are signaled by raising errors.OpPrereqError.
4960 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4961 node = self.cfg.GetNodeInfo(self.op.node_name)
4962 assert node is not None
4964 masternode = self.cfg.GetMasterNode()
4965 if node.name == masternode:
4966 raise errors.OpPrereqError("Node is the master node, failover to another"
4967 " node is required", errors.ECODE_INVAL)
4969 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4970 if node.name in instance.all_nodes:
4971 raise errors.OpPrereqError("Instance %s is still running on the node,"
4972 " please remove first" % instance_name,
4974 self.op.node_name = node.name
4977 def Exec(self, feedback_fn):
4978 """Removes the node from the cluster.
4982 logging.info("Stopping the node daemon and removing configs from node %s",
4985 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4987 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4990 # Promote nodes to master candidate as needed
4991 _AdjustCandidatePool(self, exceptions=[node.name])
4992 self.context.RemoveNode(node.name)
4994 # Run post hooks on the node before it's removed
4995 _RunPostHook(self, node.name)
4997 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4998 msg = result.fail_msg
5000 self.LogWarning("Errors encountered on the remote node while leaving"
5001 " the cluster: %s", msg)
5003 # Remove node from our /etc/hosts
5004 if self.cfg.GetClusterInfo().modify_etc_hosts:
5005 master_node = self.cfg.GetMasterNode()
5006 result = self.rpc.call_etc_hosts_modify(master_node,
5007 constants.ETC_HOSTS_REMOVE,
5009 result.Raise("Can't update hosts file with new host data")
5010 _RedistributeAncillaryFiles(self)
5013 class _NodeQuery(_QueryBase):
5014 FIELDS = query.NODE_FIELDS
5016 def ExpandNames(self, lu):
5017 lu.needed_locks = {}
5018 lu.share_locks = _ShareAll()
5021 self.wanted = _GetWantedNodes(lu, self.names)
5023 self.wanted = locking.ALL_SET
5025 self.do_locking = (self.use_locking and
5026 query.NQ_LIVE in self.requested_data)
5029 # If any non-static field is requested we need to lock the nodes
5030 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5032 def DeclareLocks(self, lu, level):
5035 def _GetQueryData(self, lu):
5036 """Computes the list of nodes and their attributes.
5039 all_info = lu.cfg.GetAllNodesInfo()
5041 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5043 # Gather data as requested
5044 if query.NQ_LIVE in self.requested_data:
5045 # filter out non-vm_capable nodes
5046 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5048 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5049 [lu.cfg.GetHypervisorType()])
5050 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5051 for (name, nresult) in node_data.items()
5052 if not nresult.fail_msg and nresult.payload)
5056 if query.NQ_INST in self.requested_data:
5057 node_to_primary = dict([(name, set()) for name in nodenames])
5058 node_to_secondary = dict([(name, set()) for name in nodenames])
5060 inst_data = lu.cfg.GetAllInstancesInfo()
5062 for inst in inst_data.values():
5063 if inst.primary_node in node_to_primary:
5064 node_to_primary[inst.primary_node].add(inst.name)
5065 for secnode in inst.secondary_nodes:
5066 if secnode in node_to_secondary:
5067 node_to_secondary[secnode].add(inst.name)
5069 node_to_primary = None
5070 node_to_secondary = None
5072 if query.NQ_OOB in self.requested_data:
5073 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5074 for name, node in all_info.iteritems())
5078 if query.NQ_GROUP in self.requested_data:
5079 groups = lu.cfg.GetAllNodeGroupsInfo()
5083 return query.NodeQueryData([all_info[name] for name in nodenames],
5084 live_data, lu.cfg.GetMasterNode(),
5085 node_to_primary, node_to_secondary, groups,
5086 oob_support, lu.cfg.GetClusterInfo())
5089 class LUNodeQuery(NoHooksLU):
5090 """Logical unit for querying nodes.
5093 # pylint: disable=W0142
5096 def CheckArguments(self):
5097 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5098 self.op.output_fields, self.op.use_locking)
5100 def ExpandNames(self):
5101 self.nq.ExpandNames(self)
5103 def DeclareLocks(self, level):
5104 self.nq.DeclareLocks(self, level)
5106 def Exec(self, feedback_fn):
5107 return self.nq.OldStyleQuery(self)
5110 class LUNodeQueryvols(NoHooksLU):
5111 """Logical unit for getting volumes on node(s).
5115 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5116 _FIELDS_STATIC = utils.FieldSet("node")
5118 def CheckArguments(self):
5119 _CheckOutputFields(static=self._FIELDS_STATIC,
5120 dynamic=self._FIELDS_DYNAMIC,
5121 selected=self.op.output_fields)
5123 def ExpandNames(self):
5124 self.share_locks = _ShareAll()
5125 self.needed_locks = {}
5127 if not self.op.nodes:
5128 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5130 self.needed_locks[locking.LEVEL_NODE] = \
5131 _GetWantedNodes(self, self.op.nodes)
5133 def Exec(self, feedback_fn):
5134 """Computes the list of nodes and their attributes.
5137 nodenames = self.owned_locks(locking.LEVEL_NODE)
5138 volumes = self.rpc.call_node_volumes(nodenames)
5140 ilist = self.cfg.GetAllInstancesInfo()
5141 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5144 for node in nodenames:
5145 nresult = volumes[node]
5148 msg = nresult.fail_msg
5150 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5153 node_vols = sorted(nresult.payload,
5154 key=operator.itemgetter("dev"))
5156 for vol in node_vols:
5158 for field in self.op.output_fields:
5161 elif field == "phys":
5165 elif field == "name":
5167 elif field == "size":
5168 val = int(float(vol["size"]))
5169 elif field == "instance":
5170 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5172 raise errors.ParameterError(field)
5173 node_output.append(str(val))
5175 output.append(node_output)
5180 class LUNodeQueryStorage(NoHooksLU):
5181 """Logical unit for getting information on storage units on node(s).
5184 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5187 def CheckArguments(self):
5188 _CheckOutputFields(static=self._FIELDS_STATIC,
5189 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5190 selected=self.op.output_fields)
5192 def ExpandNames(self):
5193 self.share_locks = _ShareAll()
5194 self.needed_locks = {}
5197 self.needed_locks[locking.LEVEL_NODE] = \
5198 _GetWantedNodes(self, self.op.nodes)
5200 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5202 def Exec(self, feedback_fn):
5203 """Computes the list of nodes and their attributes.
5206 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5208 # Always get name to sort by
5209 if constants.SF_NAME in self.op.output_fields:
5210 fields = self.op.output_fields[:]
5212 fields = [constants.SF_NAME] + self.op.output_fields
5214 # Never ask for node or type as it's only known to the LU
5215 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5216 while extra in fields:
5217 fields.remove(extra)
5219 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5220 name_idx = field_idx[constants.SF_NAME]
5222 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5223 data = self.rpc.call_storage_list(self.nodes,
5224 self.op.storage_type, st_args,
5225 self.op.name, fields)
5229 for node in utils.NiceSort(self.nodes):
5230 nresult = data[node]
5234 msg = nresult.fail_msg
5236 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5239 rows = dict([(row[name_idx], row) for row in nresult.payload])
5241 for name in utils.NiceSort(rows.keys()):
5246 for field in self.op.output_fields:
5247 if field == constants.SF_NODE:
5249 elif field == constants.SF_TYPE:
5250 val = self.op.storage_type
5251 elif field in field_idx:
5252 val = row[field_idx[field]]
5254 raise errors.ParameterError(field)
5263 class _InstanceQuery(_QueryBase):
5264 FIELDS = query.INSTANCE_FIELDS
5266 def ExpandNames(self, lu):
5267 lu.needed_locks = {}
5268 lu.share_locks = _ShareAll()
5271 self.wanted = _GetWantedInstances(lu, self.names)
5273 self.wanted = locking.ALL_SET
5275 self.do_locking = (self.use_locking and
5276 query.IQ_LIVE in self.requested_data)
5278 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5279 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5280 lu.needed_locks[locking.LEVEL_NODE] = []
5281 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5283 self.do_grouplocks = (self.do_locking and
5284 query.IQ_NODES in self.requested_data)
5286 def DeclareLocks(self, lu, level):
5288 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5289 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5291 # Lock all groups used by instances optimistically; this requires going
5292 # via the node before it's locked, requiring verification later on
5293 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5295 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5296 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5297 elif level == locking.LEVEL_NODE:
5298 lu._LockInstancesNodes() # pylint: disable=W0212
5301 def _CheckGroupLocks(lu):
5302 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5303 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5305 # Check if node groups for locked instances are still correct
5306 for instance_name in owned_instances:
5307 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5309 def _GetQueryData(self, lu):
5310 """Computes the list of instances and their attributes.
5313 if self.do_grouplocks:
5314 self._CheckGroupLocks(lu)
5316 cluster = lu.cfg.GetClusterInfo()
5317 all_info = lu.cfg.GetAllInstancesInfo()
5319 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5321 instance_list = [all_info[name] for name in instance_names]
5322 nodes = frozenset(itertools.chain(*(inst.all_nodes
5323 for inst in instance_list)))
5324 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5327 wrongnode_inst = set()
5329 # Gather data as requested
5330 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5332 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5334 result = node_data[name]
5336 # offline nodes will be in both lists
5337 assert result.fail_msg
5338 offline_nodes.append(name)
5340 bad_nodes.append(name)
5341 elif result.payload:
5342 for inst in result.payload:
5343 if inst in all_info:
5344 if all_info[inst].primary_node == name:
5345 live_data.update(result.payload)
5347 wrongnode_inst.add(inst)
5349 # orphan instance; we don't list it here as we don't
5350 # handle this case yet in the output of instance listing
5351 logging.warning("Orphan instance '%s' found on node %s",
5353 # else no instance is alive
5357 if query.IQ_DISKUSAGE in self.requested_data:
5358 gmi = ganeti.masterd.instance
5359 disk_usage = dict((inst.name,
5360 gmi.ComputeDiskSize(inst.disk_template,
5361 [{constants.IDISK_SIZE: disk.size}
5362 for disk in inst.disks]))
5363 for inst in instance_list)
5367 if query.IQ_CONSOLE in self.requested_data:
5369 for inst in instance_list:
5370 if inst.name in live_data:
5371 # Instance is running
5372 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5374 consinfo[inst.name] = None
5375 assert set(consinfo.keys()) == set(instance_names)
5379 if query.IQ_NODES in self.requested_data:
5380 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5382 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5383 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5384 for uuid in set(map(operator.attrgetter("group"),
5390 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5391 disk_usage, offline_nodes, bad_nodes,
5392 live_data, wrongnode_inst, consinfo,
5396 class LUQuery(NoHooksLU):
5397 """Query for resources/items of a certain kind.
5400 # pylint: disable=W0142
5403 def CheckArguments(self):
5404 qcls = _GetQueryImplementation(self.op.what)
5406 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5408 def ExpandNames(self):
5409 self.impl.ExpandNames(self)
5411 def DeclareLocks(self, level):
5412 self.impl.DeclareLocks(self, level)
5414 def Exec(self, feedback_fn):
5415 return self.impl.NewStyleQuery(self)
5418 class LUQueryFields(NoHooksLU):
5419 """Query for resources/items of a certain kind.
5422 # pylint: disable=W0142
5425 def CheckArguments(self):
5426 self.qcls = _GetQueryImplementation(self.op.what)
5428 def ExpandNames(self):
5429 self.needed_locks = {}
5431 def Exec(self, feedback_fn):
5432 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5435 class LUNodeModifyStorage(NoHooksLU):
5436 """Logical unit for modifying a storage volume on a node.
5441 def CheckArguments(self):
5442 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5444 storage_type = self.op.storage_type
5447 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5449 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5450 " modified" % storage_type,
5453 diff = set(self.op.changes.keys()) - modifiable
5455 raise errors.OpPrereqError("The following fields can not be modified for"
5456 " storage units of type '%s': %r" %
5457 (storage_type, list(diff)),
5460 def ExpandNames(self):
5461 self.needed_locks = {
5462 locking.LEVEL_NODE: self.op.node_name,
5465 def Exec(self, feedback_fn):
5466 """Computes the list of nodes and their attributes.
5469 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5470 result = self.rpc.call_storage_modify(self.op.node_name,
5471 self.op.storage_type, st_args,
5472 self.op.name, self.op.changes)
5473 result.Raise("Failed to modify storage unit '%s' on %s" %
5474 (self.op.name, self.op.node_name))
5477 class LUNodeAdd(LogicalUnit):
5478 """Logical unit for adding node to the cluster.
5482 HTYPE = constants.HTYPE_NODE
5483 _NFLAGS = ["master_capable", "vm_capable"]
5485 def CheckArguments(self):
5486 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5487 # validate/normalize the node name
5488 self.hostname = netutils.GetHostname(name=self.op.node_name,
5489 family=self.primary_ip_family)
5490 self.op.node_name = self.hostname.name
5492 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5493 raise errors.OpPrereqError("Cannot readd the master node",
5496 if self.op.readd and self.op.group:
5497 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5498 " being readded", errors.ECODE_INVAL)
5500 def BuildHooksEnv(self):
5503 This will run on all nodes before, and on all nodes + the new node after.
5507 "OP_TARGET": self.op.node_name,
5508 "NODE_NAME": self.op.node_name,
5509 "NODE_PIP": self.op.primary_ip,
5510 "NODE_SIP": self.op.secondary_ip,
5511 "MASTER_CAPABLE": str(self.op.master_capable),
5512 "VM_CAPABLE": str(self.op.vm_capable),
5515 def BuildHooksNodes(self):
5516 """Build hooks nodes.
5519 # Exclude added node
5520 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5521 post_nodes = pre_nodes + [self.op.node_name, ]
5523 return (pre_nodes, post_nodes)
5525 def CheckPrereq(self):
5526 """Check prerequisites.
5529 - the new node is not already in the config
5531 - its parameters (single/dual homed) matches the cluster
5533 Any errors are signaled by raising errors.OpPrereqError.
5537 hostname = self.hostname
5538 node = hostname.name
5539 primary_ip = self.op.primary_ip = hostname.ip
5540 if self.op.secondary_ip is None:
5541 if self.primary_ip_family == netutils.IP6Address.family:
5542 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5543 " IPv4 address must be given as secondary",
5545 self.op.secondary_ip = primary_ip
5547 secondary_ip = self.op.secondary_ip
5548 if not netutils.IP4Address.IsValid(secondary_ip):
5549 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5550 " address" % secondary_ip, errors.ECODE_INVAL)
5552 node_list = cfg.GetNodeList()
5553 if not self.op.readd and node in node_list:
5554 raise errors.OpPrereqError("Node %s is already in the configuration" %
5555 node, errors.ECODE_EXISTS)
5556 elif self.op.readd and node not in node_list:
5557 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5560 self.changed_primary_ip = False
5562 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5563 if self.op.readd and node == existing_node_name:
5564 if existing_node.secondary_ip != secondary_ip:
5565 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5566 " address configuration as before",
5568 if existing_node.primary_ip != primary_ip:
5569 self.changed_primary_ip = True
5573 if (existing_node.primary_ip == primary_ip or
5574 existing_node.secondary_ip == primary_ip or
5575 existing_node.primary_ip == secondary_ip or
5576 existing_node.secondary_ip == secondary_ip):
5577 raise errors.OpPrereqError("New node ip address(es) conflict with"
5578 " existing node %s" % existing_node.name,
5579 errors.ECODE_NOTUNIQUE)
5581 # After this 'if' block, None is no longer a valid value for the
5582 # _capable op attributes
5584 old_node = self.cfg.GetNodeInfo(node)
5585 assert old_node is not None, "Can't retrieve locked node %s" % node
5586 for attr in self._NFLAGS:
5587 if getattr(self.op, attr) is None:
5588 setattr(self.op, attr, getattr(old_node, attr))
5590 for attr in self._NFLAGS:
5591 if getattr(self.op, attr) is None:
5592 setattr(self.op, attr, True)
5594 if self.op.readd and not self.op.vm_capable:
5595 pri, sec = cfg.GetNodeInstances(node)
5597 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5598 " flag set to false, but it already holds"
5599 " instances" % node,
5602 # check that the type of the node (single versus dual homed) is the
5603 # same as for the master
5604 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5605 master_singlehomed = myself.secondary_ip == myself.primary_ip
5606 newbie_singlehomed = secondary_ip == primary_ip
5607 if master_singlehomed != newbie_singlehomed:
5608 if master_singlehomed:
5609 raise errors.OpPrereqError("The master has no secondary ip but the"
5610 " new node has one",
5613 raise errors.OpPrereqError("The master has a secondary ip but the"
5614 " new node doesn't have one",
5617 # checks reachability
5618 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5619 raise errors.OpPrereqError("Node not reachable by ping",
5620 errors.ECODE_ENVIRON)
5622 if not newbie_singlehomed:
5623 # check reachability from my secondary ip to newbie's secondary ip
5624 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5625 source=myself.secondary_ip):
5626 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5627 " based ping to node daemon port",
5628 errors.ECODE_ENVIRON)
5635 if self.op.master_capable:
5636 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5638 self.master_candidate = False
5641 self.new_node = old_node
5643 node_group = cfg.LookupNodeGroup(self.op.group)
5644 self.new_node = objects.Node(name=node,
5645 primary_ip=primary_ip,
5646 secondary_ip=secondary_ip,
5647 master_candidate=self.master_candidate,
5648 offline=False, drained=False,
5651 if self.op.ndparams:
5652 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5654 if self.op.hv_state:
5655 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5657 if self.op.disk_state:
5658 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5660 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5661 # it a property on the base class.
5662 result = rpc.DnsOnlyRunner().call_version([node])[node]
5663 result.Raise("Can't get version information from node %s" % node)
5664 if constants.PROTOCOL_VERSION == result.payload:
5665 logging.info("Communication to node %s fine, sw version %s match",
5666 node, result.payload)
5668 raise errors.OpPrereqError("Version mismatch master version %s,"
5669 " node version %s" %
5670 (constants.PROTOCOL_VERSION, result.payload),
5671 errors.ECODE_ENVIRON)
5673 def Exec(self, feedback_fn):
5674 """Adds the new node to the cluster.
5677 new_node = self.new_node
5678 node = new_node.name
5680 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5683 # We adding a new node so we assume it's powered
5684 new_node.powered = True
5686 # for re-adds, reset the offline/drained/master-candidate flags;
5687 # we need to reset here, otherwise offline would prevent RPC calls
5688 # later in the procedure; this also means that if the re-add
5689 # fails, we are left with a non-offlined, broken node
5691 new_node.drained = new_node.offline = False # pylint: disable=W0201
5692 self.LogInfo("Readding a node, the offline/drained flags were reset")
5693 # if we demote the node, we do cleanup later in the procedure
5694 new_node.master_candidate = self.master_candidate
5695 if self.changed_primary_ip:
5696 new_node.primary_ip = self.op.primary_ip
5698 # copy the master/vm_capable flags
5699 for attr in self._NFLAGS:
5700 setattr(new_node, attr, getattr(self.op, attr))
5702 # notify the user about any possible mc promotion
5703 if new_node.master_candidate:
5704 self.LogInfo("Node will be a master candidate")
5706 if self.op.ndparams:
5707 new_node.ndparams = self.op.ndparams
5709 new_node.ndparams = {}
5711 if self.op.hv_state:
5712 new_node.hv_state_static = self.new_hv_state
5714 if self.op.disk_state:
5715 new_node.disk_state_static = self.new_disk_state
5717 # Add node to our /etc/hosts, and add key to known_hosts
5718 if self.cfg.GetClusterInfo().modify_etc_hosts:
5719 master_node = self.cfg.GetMasterNode()
5720 result = self.rpc.call_etc_hosts_modify(master_node,
5721 constants.ETC_HOSTS_ADD,
5724 result.Raise("Can't update hosts file with new host data")
5726 if new_node.secondary_ip != new_node.primary_ip:
5727 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5730 node_verify_list = [self.cfg.GetMasterNode()]
5731 node_verify_param = {
5732 constants.NV_NODELIST: ([node], {}),
5733 # TODO: do a node-net-test as well?
5736 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5737 self.cfg.GetClusterName())
5738 for verifier in node_verify_list:
5739 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5740 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5742 for failed in nl_payload:
5743 feedback_fn("ssh/hostname verification failed"
5744 " (checking from %s): %s" %
5745 (verifier, nl_payload[failed]))
5746 raise errors.OpExecError("ssh/hostname verification failed")
5749 _RedistributeAncillaryFiles(self)
5750 self.context.ReaddNode(new_node)
5751 # make sure we redistribute the config
5752 self.cfg.Update(new_node, feedback_fn)
5753 # and make sure the new node will not have old files around
5754 if not new_node.master_candidate:
5755 result = self.rpc.call_node_demote_from_mc(new_node.name)
5756 msg = result.fail_msg
5758 self.LogWarning("Node failed to demote itself from master"
5759 " candidate status: %s" % msg)
5761 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5762 additional_vm=self.op.vm_capable)
5763 self.context.AddNode(new_node, self.proc.GetECId())
5766 class LUNodeSetParams(LogicalUnit):
5767 """Modifies the parameters of a node.
5769 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5770 to the node role (as _ROLE_*)
5771 @cvar _R2F: a dictionary from node role to tuples of flags
5772 @cvar _FLAGS: a list of attribute names corresponding to the flags
5775 HPATH = "node-modify"
5776 HTYPE = constants.HTYPE_NODE
5778 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5780 (True, False, False): _ROLE_CANDIDATE,
5781 (False, True, False): _ROLE_DRAINED,
5782 (False, False, True): _ROLE_OFFLINE,
5783 (False, False, False): _ROLE_REGULAR,
5785 _R2F = dict((v, k) for k, v in _F2R.items())
5786 _FLAGS = ["master_candidate", "drained", "offline"]
5788 def CheckArguments(self):
5789 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5790 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5791 self.op.master_capable, self.op.vm_capable,
5792 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5794 if all_mods.count(None) == len(all_mods):
5795 raise errors.OpPrereqError("Please pass at least one modification",
5797 if all_mods.count(True) > 1:
5798 raise errors.OpPrereqError("Can't set the node into more than one"
5799 " state at the same time",
5802 # Boolean value that tells us whether we might be demoting from MC
5803 self.might_demote = (self.op.master_candidate is False or
5804 self.op.offline is True or
5805 self.op.drained is True or
5806 self.op.master_capable is False)
5808 if self.op.secondary_ip:
5809 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5810 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5811 " address" % self.op.secondary_ip,
5814 self.lock_all = self.op.auto_promote and self.might_demote
5815 self.lock_instances = self.op.secondary_ip is not None
5817 def _InstanceFilter(self, instance):
5818 """Filter for getting affected instances.
5821 return (instance.disk_template in constants.DTS_INT_MIRROR and
5822 self.op.node_name in instance.all_nodes)
5824 def ExpandNames(self):
5826 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5828 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5830 # Since modifying a node can have severe effects on currently running
5831 # operations the resource lock is at least acquired in shared mode
5832 self.needed_locks[locking.LEVEL_NODE_RES] = \
5833 self.needed_locks[locking.LEVEL_NODE]
5835 # Get node resource and instance locks in shared mode; they are not used
5836 # for anything but read-only access
5837 self.share_locks[locking.LEVEL_NODE_RES] = 1
5838 self.share_locks[locking.LEVEL_INSTANCE] = 1
5840 if self.lock_instances:
5841 self.needed_locks[locking.LEVEL_INSTANCE] = \
5842 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5844 def BuildHooksEnv(self):
5847 This runs on the master node.
5851 "OP_TARGET": self.op.node_name,
5852 "MASTER_CANDIDATE": str(self.op.master_candidate),
5853 "OFFLINE": str(self.op.offline),
5854 "DRAINED": str(self.op.drained),
5855 "MASTER_CAPABLE": str(self.op.master_capable),
5856 "VM_CAPABLE": str(self.op.vm_capable),
5859 def BuildHooksNodes(self):
5860 """Build hooks nodes.
5863 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5866 def CheckPrereq(self):
5867 """Check prerequisites.
5869 This only checks the instance list against the existing names.
5872 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5874 if self.lock_instances:
5875 affected_instances = \
5876 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5878 # Verify instance locks
5879 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5880 wanted_instances = frozenset(affected_instances.keys())
5881 if wanted_instances - owned_instances:
5882 raise errors.OpPrereqError("Instances affected by changing node %s's"
5883 " secondary IP address have changed since"
5884 " locks were acquired, wanted '%s', have"
5885 " '%s'; retry the operation" %
5887 utils.CommaJoin(wanted_instances),
5888 utils.CommaJoin(owned_instances)),
5891 affected_instances = None
5893 if (self.op.master_candidate is not None or
5894 self.op.drained is not None or
5895 self.op.offline is not None):
5896 # we can't change the master's node flags
5897 if self.op.node_name == self.cfg.GetMasterNode():
5898 raise errors.OpPrereqError("The master role can be changed"
5899 " only via master-failover",
5902 if self.op.master_candidate and not node.master_capable:
5903 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5904 " it a master candidate" % node.name,
5907 if self.op.vm_capable is False:
5908 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5910 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5911 " the vm_capable flag" % node.name,
5914 if node.master_candidate and self.might_demote and not self.lock_all:
5915 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5916 # check if after removing the current node, we're missing master
5918 (mc_remaining, mc_should, _) = \
5919 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5920 if mc_remaining < mc_should:
5921 raise errors.OpPrereqError("Not enough master candidates, please"
5922 " pass auto promote option to allow"
5923 " promotion (--auto-promote or RAPI"
5924 " auto_promote=True)", errors.ECODE_STATE)
5926 self.old_flags = old_flags = (node.master_candidate,
5927 node.drained, node.offline)
5928 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5929 self.old_role = old_role = self._F2R[old_flags]
5931 # Check for ineffective changes
5932 for attr in self._FLAGS:
5933 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5934 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5935 setattr(self.op, attr, None)
5937 # Past this point, any flag change to False means a transition
5938 # away from the respective state, as only real changes are kept
5940 # TODO: We might query the real power state if it supports OOB
5941 if _SupportsOob(self.cfg, node):
5942 if self.op.offline is False and not (node.powered or
5943 self.op.powered is True):
5944 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5945 " offline status can be reset") %
5946 self.op.node_name, errors.ECODE_STATE)
5947 elif self.op.powered is not None:
5948 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5949 " as it does not support out-of-band"
5950 " handling") % self.op.node_name,
5953 # If we're being deofflined/drained, we'll MC ourself if needed
5954 if (self.op.drained is False or self.op.offline is False or
5955 (self.op.master_capable and not node.master_capable)):
5956 if _DecideSelfPromotion(self):
5957 self.op.master_candidate = True
5958 self.LogInfo("Auto-promoting node to master candidate")
5960 # If we're no longer master capable, we'll demote ourselves from MC
5961 if self.op.master_capable is False and node.master_candidate:
5962 self.LogInfo("Demoting from master candidate")
5963 self.op.master_candidate = False
5966 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5967 if self.op.master_candidate:
5968 new_role = self._ROLE_CANDIDATE
5969 elif self.op.drained:
5970 new_role = self._ROLE_DRAINED
5971 elif self.op.offline:
5972 new_role = self._ROLE_OFFLINE
5973 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5974 # False is still in new flags, which means we're un-setting (the
5976 new_role = self._ROLE_REGULAR
5977 else: # no new flags, nothing, keep old role
5980 self.new_role = new_role
5982 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5983 # Trying to transition out of offline status
5984 result = self.rpc.call_version([node.name])[node.name]
5986 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5987 " to report its version: %s" %
5988 (node.name, result.fail_msg),
5991 self.LogWarning("Transitioning node from offline to online state"
5992 " without using re-add. Please make sure the node"
5995 # When changing the secondary ip, verify if this is a single-homed to
5996 # multi-homed transition or vice versa, and apply the relevant
5998 if self.op.secondary_ip:
5999 # Ok even without locking, because this can't be changed by any LU
6000 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6001 master_singlehomed = master.secondary_ip == master.primary_ip
6002 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6003 if self.op.force and node.name == master.name:
6004 self.LogWarning("Transitioning from single-homed to multi-homed"
6005 " cluster. All nodes will require a secondary ip.")
6007 raise errors.OpPrereqError("Changing the secondary ip on a"
6008 " single-homed cluster requires the"
6009 " --force option to be passed, and the"
6010 " target node to be the master",
6012 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6013 if self.op.force and node.name == master.name:
6014 self.LogWarning("Transitioning from multi-homed to single-homed"
6015 " cluster. Secondary IPs will have to be removed.")
6017 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6018 " same as the primary IP on a multi-homed"
6019 " cluster, unless the --force option is"
6020 " passed, and the target node is the"
6021 " master", errors.ECODE_INVAL)
6023 assert not (frozenset(affected_instances) -
6024 self.owned_locks(locking.LEVEL_INSTANCE))
6027 if affected_instances:
6028 msg = ("Cannot change secondary IP address: offline node has"
6029 " instances (%s) configured to use it" %
6030 utils.CommaJoin(affected_instances.keys()))
6031 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6033 # On online nodes, check that no instances are running, and that
6034 # the node has the new ip and we can reach it.
6035 for instance in affected_instances.values():
6036 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6037 msg="cannot change secondary ip")
6039 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6040 if master.name != node.name:
6041 # check reachability from master secondary ip to new secondary ip
6042 if not netutils.TcpPing(self.op.secondary_ip,
6043 constants.DEFAULT_NODED_PORT,
6044 source=master.secondary_ip):
6045 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6046 " based ping to node daemon port",
6047 errors.ECODE_ENVIRON)
6049 if self.op.ndparams:
6050 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6051 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6052 self.new_ndparams = new_ndparams
6054 if self.op.hv_state:
6055 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6056 self.node.hv_state_static)
6058 if self.op.disk_state:
6059 self.new_disk_state = \
6060 _MergeAndVerifyDiskState(self.op.disk_state,
6061 self.node.disk_state_static)
6063 def Exec(self, feedback_fn):
6068 old_role = self.old_role
6069 new_role = self.new_role
6073 if self.op.ndparams:
6074 node.ndparams = self.new_ndparams
6076 if self.op.powered is not None:
6077 node.powered = self.op.powered
6079 if self.op.hv_state:
6080 node.hv_state_static = self.new_hv_state
6082 if self.op.disk_state:
6083 node.disk_state_static = self.new_disk_state
6085 for attr in ["master_capable", "vm_capable"]:
6086 val = getattr(self.op, attr)
6088 setattr(node, attr, val)
6089 result.append((attr, str(val)))
6091 if new_role != old_role:
6092 # Tell the node to demote itself, if no longer MC and not offline
6093 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6094 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6096 self.LogWarning("Node failed to demote itself: %s", msg)
6098 new_flags = self._R2F[new_role]
6099 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6101 result.append((desc, str(nf)))
6102 (node.master_candidate, node.drained, node.offline) = new_flags
6104 # we locked all nodes, we adjust the CP before updating this node
6106 _AdjustCandidatePool(self, [node.name])
6108 if self.op.secondary_ip:
6109 node.secondary_ip = self.op.secondary_ip
6110 result.append(("secondary_ip", self.op.secondary_ip))
6112 # this will trigger configuration file update, if needed
6113 self.cfg.Update(node, feedback_fn)
6115 # this will trigger job queue propagation or cleanup if the mc
6117 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6118 self.context.ReaddNode(node)
6123 class LUNodePowercycle(NoHooksLU):
6124 """Powercycles a node.
6129 def CheckArguments(self):
6130 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6131 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6132 raise errors.OpPrereqError("The node is the master and the force"
6133 " parameter was not set",
6136 def ExpandNames(self):
6137 """Locking for PowercycleNode.
6139 This is a last-resort option and shouldn't block on other
6140 jobs. Therefore, we grab no locks.
6143 self.needed_locks = {}
6145 def Exec(self, feedback_fn):
6149 result = self.rpc.call_node_powercycle(self.op.node_name,
6150 self.cfg.GetHypervisorType())
6151 result.Raise("Failed to schedule the reboot")
6152 return result.payload
6155 class LUClusterQuery(NoHooksLU):
6156 """Query cluster configuration.
6161 def ExpandNames(self):
6162 self.needed_locks = {}
6164 def Exec(self, feedback_fn):
6165 """Return cluster config.
6168 cluster = self.cfg.GetClusterInfo()
6171 # Filter just for enabled hypervisors
6172 for os_name, hv_dict in cluster.os_hvp.items():
6173 os_hvp[os_name] = {}
6174 for hv_name, hv_params in hv_dict.items():
6175 if hv_name in cluster.enabled_hypervisors:
6176 os_hvp[os_name][hv_name] = hv_params
6178 # Convert ip_family to ip_version
6179 primary_ip_version = constants.IP4_VERSION
6180 if cluster.primary_ip_family == netutils.IP6Address.family:
6181 primary_ip_version = constants.IP6_VERSION
6184 "software_version": constants.RELEASE_VERSION,
6185 "protocol_version": constants.PROTOCOL_VERSION,
6186 "config_version": constants.CONFIG_VERSION,
6187 "os_api_version": max(constants.OS_API_VERSIONS),
6188 "export_version": constants.EXPORT_VERSION,
6189 "architecture": runtime.GetArchInfo(),
6190 "name": cluster.cluster_name,
6191 "master": cluster.master_node,
6192 "default_hypervisor": cluster.primary_hypervisor,
6193 "enabled_hypervisors": cluster.enabled_hypervisors,
6194 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6195 for hypervisor_name in cluster.enabled_hypervisors]),
6197 "beparams": cluster.beparams,
6198 "osparams": cluster.osparams,
6199 "ipolicy": cluster.ipolicy,
6200 "nicparams": cluster.nicparams,
6201 "ndparams": cluster.ndparams,
6202 "diskparams": cluster.diskparams,
6203 "candidate_pool_size": cluster.candidate_pool_size,
6204 "master_netdev": cluster.master_netdev,
6205 "master_netmask": cluster.master_netmask,
6206 "use_external_mip_script": cluster.use_external_mip_script,
6207 "volume_group_name": cluster.volume_group_name,
6208 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6209 "file_storage_dir": cluster.file_storage_dir,
6210 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6211 "maintain_node_health": cluster.maintain_node_health,
6212 "ctime": cluster.ctime,
6213 "mtime": cluster.mtime,
6214 "uuid": cluster.uuid,
6215 "tags": list(cluster.GetTags()),
6216 "uid_pool": cluster.uid_pool,
6217 "default_iallocator": cluster.default_iallocator,
6218 "reserved_lvs": cluster.reserved_lvs,
6219 "primary_ip_version": primary_ip_version,
6220 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6221 "hidden_os": cluster.hidden_os,
6222 "blacklisted_os": cluster.blacklisted_os,
6228 class LUClusterConfigQuery(NoHooksLU):
6229 """Return configuration values.
6234 def CheckArguments(self):
6235 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6237 def ExpandNames(self):
6238 self.cq.ExpandNames(self)
6240 def DeclareLocks(self, level):
6241 self.cq.DeclareLocks(self, level)
6243 def Exec(self, feedback_fn):
6244 result = self.cq.OldStyleQuery(self)
6246 assert len(result) == 1
6251 class _ClusterQuery(_QueryBase):
6252 FIELDS = query.CLUSTER_FIELDS
6254 #: Do not sort (there is only one item)
6257 def ExpandNames(self, lu):
6258 lu.needed_locks = {}
6260 # The following variables interact with _QueryBase._GetNames
6261 self.wanted = locking.ALL_SET
6262 self.do_locking = self.use_locking
6265 raise errors.OpPrereqError("Can not use locking for cluster queries",
6268 def DeclareLocks(self, lu, level):
6271 def _GetQueryData(self, lu):
6272 """Computes the list of nodes and their attributes.
6275 # Locking is not used
6276 assert not (compat.any(lu.glm.is_owned(level)
6277 for level in locking.LEVELS
6278 if level != locking.LEVEL_CLUSTER) or
6279 self.do_locking or self.use_locking)
6281 if query.CQ_CONFIG in self.requested_data:
6282 cluster = lu.cfg.GetClusterInfo()
6284 cluster = NotImplemented
6286 if query.CQ_QUEUE_DRAINED in self.requested_data:
6287 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6289 drain_flag = NotImplemented
6291 if query.CQ_WATCHER_PAUSE in self.requested_data:
6292 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6294 watcher_pause = NotImplemented
6296 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6299 class LUInstanceActivateDisks(NoHooksLU):
6300 """Bring up an instance's disks.
6305 def ExpandNames(self):
6306 self._ExpandAndLockInstance()
6307 self.needed_locks[locking.LEVEL_NODE] = []
6308 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6310 def DeclareLocks(self, level):
6311 if level == locking.LEVEL_NODE:
6312 self._LockInstancesNodes()
6314 def CheckPrereq(self):
6315 """Check prerequisites.
6317 This checks that the instance is in the cluster.
6320 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6321 assert self.instance is not None, \
6322 "Cannot retrieve locked instance %s" % self.op.instance_name
6323 _CheckNodeOnline(self, self.instance.primary_node)
6325 def Exec(self, feedback_fn):
6326 """Activate the disks.
6329 disks_ok, disks_info = \
6330 _AssembleInstanceDisks(self, self.instance,
6331 ignore_size=self.op.ignore_size)
6333 raise errors.OpExecError("Cannot activate block devices")
6335 if self.op.wait_for_sync:
6336 if not _WaitForSync(self, self.instance):
6337 raise errors.OpExecError("Some disks of the instance are degraded!")
6342 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6344 """Prepare the block devices for an instance.
6346 This sets up the block devices on all nodes.
6348 @type lu: L{LogicalUnit}
6349 @param lu: the logical unit on whose behalf we execute
6350 @type instance: L{objects.Instance}
6351 @param instance: the instance for whose disks we assemble
6352 @type disks: list of L{objects.Disk} or None
6353 @param disks: which disks to assemble (or all, if None)
6354 @type ignore_secondaries: boolean
6355 @param ignore_secondaries: if true, errors on secondary nodes
6356 won't result in an error return from the function
6357 @type ignore_size: boolean
6358 @param ignore_size: if true, the current known size of the disk
6359 will not be used during the disk activation, useful for cases
6360 when the size is wrong
6361 @return: False if the operation failed, otherwise a list of
6362 (host, instance_visible_name, node_visible_name)
6363 with the mapping from node devices to instance devices
6368 iname = instance.name
6369 disks = _ExpandCheckDisks(instance, disks)
6371 # With the two passes mechanism we try to reduce the window of
6372 # opportunity for the race condition of switching DRBD to primary
6373 # before handshaking occured, but we do not eliminate it
6375 # The proper fix would be to wait (with some limits) until the
6376 # connection has been made and drbd transitions from WFConnection
6377 # into any other network-connected state (Connected, SyncTarget,
6380 # 1st pass, assemble on all nodes in secondary mode
6381 for idx, inst_disk in enumerate(disks):
6382 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6384 node_disk = node_disk.Copy()
6385 node_disk.UnsetSize()
6386 lu.cfg.SetDiskID(node_disk, node)
6387 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6389 msg = result.fail_msg
6391 is_offline_secondary = (node in instance.secondary_nodes and
6393 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6394 " (is_primary=False, pass=1): %s",
6395 inst_disk.iv_name, node, msg)
6396 if not (ignore_secondaries or is_offline_secondary):
6399 # FIXME: race condition on drbd migration to primary
6401 # 2nd pass, do only the primary node
6402 for idx, inst_disk in enumerate(disks):
6405 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6406 if node != instance.primary_node:
6409 node_disk = node_disk.Copy()
6410 node_disk.UnsetSize()
6411 lu.cfg.SetDiskID(node_disk, node)
6412 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6414 msg = result.fail_msg
6416 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6417 " (is_primary=True, pass=2): %s",
6418 inst_disk.iv_name, node, msg)
6421 dev_path = result.payload
6423 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6425 # leave the disks configured for the primary node
6426 # this is a workaround that would be fixed better by
6427 # improving the logical/physical id handling
6429 lu.cfg.SetDiskID(disk, instance.primary_node)
6431 return disks_ok, device_info
6434 def _StartInstanceDisks(lu, instance, force):
6435 """Start the disks of an instance.
6438 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6439 ignore_secondaries=force)
6441 _ShutdownInstanceDisks(lu, instance)
6442 if force is not None and not force:
6443 lu.proc.LogWarning("", hint="If the message above refers to a"
6445 " you can retry the operation using '--force'.")
6446 raise errors.OpExecError("Disk consistency error")
6449 class LUInstanceDeactivateDisks(NoHooksLU):
6450 """Shutdown an instance's disks.
6455 def ExpandNames(self):
6456 self._ExpandAndLockInstance()
6457 self.needed_locks[locking.LEVEL_NODE] = []
6458 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6460 def DeclareLocks(self, level):
6461 if level == locking.LEVEL_NODE:
6462 self._LockInstancesNodes()
6464 def CheckPrereq(self):
6465 """Check prerequisites.
6467 This checks that the instance is in the cluster.
6470 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471 assert self.instance is not None, \
6472 "Cannot retrieve locked instance %s" % self.op.instance_name
6474 def Exec(self, feedback_fn):
6475 """Deactivate the disks
6478 instance = self.instance
6480 _ShutdownInstanceDisks(self, instance)
6482 _SafeShutdownInstanceDisks(self, instance)
6485 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6486 """Shutdown block devices of an instance.
6488 This function checks if an instance is running, before calling
6489 _ShutdownInstanceDisks.
6492 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6493 _ShutdownInstanceDisks(lu, instance, disks=disks)
6496 def _ExpandCheckDisks(instance, disks):
6497 """Return the instance disks selected by the disks list
6499 @type disks: list of L{objects.Disk} or None
6500 @param disks: selected disks
6501 @rtype: list of L{objects.Disk}
6502 @return: selected instance disks to act on
6506 return instance.disks
6508 if not set(disks).issubset(instance.disks):
6509 raise errors.ProgrammerError("Can only act on disks belonging to the"
6514 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6515 """Shutdown block devices of an instance.
6517 This does the shutdown on all nodes of the instance.
6519 If the ignore_primary is false, errors on the primary node are
6524 disks = _ExpandCheckDisks(instance, disks)
6527 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6528 lu.cfg.SetDiskID(top_disk, node)
6529 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6530 msg = result.fail_msg
6532 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6533 disk.iv_name, node, msg)
6534 if ((node == instance.primary_node and not ignore_primary) or
6535 (node != instance.primary_node and not result.offline)):
6540 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6541 """Checks if a node has enough free memory.
6543 This function check if a given node has the needed amount of free
6544 memory. In case the node has less memory or we cannot get the
6545 information from the node, this function raise an OpPrereqError
6548 @type lu: C{LogicalUnit}
6549 @param lu: a logical unit from which we get configuration data
6551 @param node: the node to check
6552 @type reason: C{str}
6553 @param reason: string to use in the error message
6554 @type requested: C{int}
6555 @param requested: the amount of memory in MiB to check for
6556 @type hypervisor_name: C{str}
6557 @param hypervisor_name: the hypervisor to ask for memory stats
6559 @return: node current free memory
6560 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6561 we cannot check the node
6564 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6565 nodeinfo[node].Raise("Can't get data from node %s" % node,
6566 prereq=True, ecode=errors.ECODE_ENVIRON)
6567 (_, _, (hv_info, )) = nodeinfo[node].payload
6569 free_mem = hv_info.get("memory_free", None)
6570 if not isinstance(free_mem, int):
6571 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6572 " was '%s'" % (node, free_mem),
6573 errors.ECODE_ENVIRON)
6574 if requested > free_mem:
6575 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6576 " needed %s MiB, available %s MiB" %
6577 (node, reason, requested, free_mem),
6582 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6583 """Checks if nodes have enough free disk space in the all VGs.
6585 This function check if all given nodes have the needed amount of
6586 free disk. In case any node has less disk or we cannot get the
6587 information from the node, this function raise an OpPrereqError
6590 @type lu: C{LogicalUnit}
6591 @param lu: a logical unit from which we get configuration data
6592 @type nodenames: C{list}
6593 @param nodenames: the list of node names to check
6594 @type req_sizes: C{dict}
6595 @param req_sizes: the hash of vg and corresponding amount of disk in
6597 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6598 or we cannot check the node
6601 for vg, req_size in req_sizes.items():
6602 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6605 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6606 """Checks if nodes have enough free disk space in the specified VG.
6608 This function check if all given nodes have the needed amount of
6609 free disk. In case any node has less disk or we cannot get the
6610 information from the node, this function raise an OpPrereqError
6613 @type lu: C{LogicalUnit}
6614 @param lu: a logical unit from which we get configuration data
6615 @type nodenames: C{list}
6616 @param nodenames: the list of node names to check
6618 @param vg: the volume group to check
6619 @type requested: C{int}
6620 @param requested: the amount of disk in MiB to check for
6621 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6622 or we cannot check the node
6625 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6626 for node in nodenames:
6627 info = nodeinfo[node]
6628 info.Raise("Cannot get current information from node %s" % node,
6629 prereq=True, ecode=errors.ECODE_ENVIRON)
6630 (_, (vg_info, ), _) = info.payload
6631 vg_free = vg_info.get("vg_free", None)
6632 if not isinstance(vg_free, int):
6633 raise errors.OpPrereqError("Can't compute free disk space on node"
6634 " %s for vg %s, result was '%s'" %
6635 (node, vg, vg_free), errors.ECODE_ENVIRON)
6636 if requested > vg_free:
6637 raise errors.OpPrereqError("Not enough disk space on target node %s"
6638 " vg %s: required %d MiB, available %d MiB" %
6639 (node, vg, requested, vg_free),
6643 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6644 """Checks if nodes have enough physical CPUs
6646 This function checks if all given nodes have the needed number of
6647 physical CPUs. In case any node has less CPUs or we cannot get the
6648 information from the node, this function raises an OpPrereqError
6651 @type lu: C{LogicalUnit}
6652 @param lu: a logical unit from which we get configuration data
6653 @type nodenames: C{list}
6654 @param nodenames: the list of node names to check
6655 @type requested: C{int}
6656 @param requested: the minimum acceptable number of physical CPUs
6657 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6658 or we cannot check the node
6661 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6662 for node in nodenames:
6663 info = nodeinfo[node]
6664 info.Raise("Cannot get current information from node %s" % node,
6665 prereq=True, ecode=errors.ECODE_ENVIRON)
6666 (_, _, (hv_info, )) = info.payload
6667 num_cpus = hv_info.get("cpu_total", None)
6668 if not isinstance(num_cpus, int):
6669 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6670 " on node %s, result was '%s'" %
6671 (node, num_cpus), errors.ECODE_ENVIRON)
6672 if requested > num_cpus:
6673 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6674 "required" % (node, num_cpus, requested),
6678 class LUInstanceStartup(LogicalUnit):
6679 """Starts an instance.
6682 HPATH = "instance-start"
6683 HTYPE = constants.HTYPE_INSTANCE
6686 def CheckArguments(self):
6688 if self.op.beparams:
6689 # fill the beparams dict
6690 objects.UpgradeBeParams(self.op.beparams)
6691 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6693 def ExpandNames(self):
6694 self._ExpandAndLockInstance()
6695 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6697 def DeclareLocks(self, level):
6698 if level == locking.LEVEL_NODE_RES:
6699 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6701 def BuildHooksEnv(self):
6704 This runs on master, primary and secondary nodes of the instance.
6708 "FORCE": self.op.force,
6711 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6715 def BuildHooksNodes(self):
6716 """Build hooks nodes.
6719 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6722 def CheckPrereq(self):
6723 """Check prerequisites.
6725 This checks that the instance is in the cluster.
6728 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6729 assert self.instance is not None, \
6730 "Cannot retrieve locked instance %s" % self.op.instance_name
6733 if self.op.hvparams:
6734 # check hypervisor parameter syntax (locally)
6735 cluster = self.cfg.GetClusterInfo()
6736 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6737 filled_hvp = cluster.FillHV(instance)
6738 filled_hvp.update(self.op.hvparams)
6739 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6740 hv_type.CheckParameterSyntax(filled_hvp)
6741 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6743 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6745 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6747 if self.primary_offline and self.op.ignore_offline_nodes:
6748 self.proc.LogWarning("Ignoring offline primary node")
6750 if self.op.hvparams or self.op.beparams:
6751 self.proc.LogWarning("Overridden parameters are ignored")
6753 _CheckNodeOnline(self, instance.primary_node)
6755 bep = self.cfg.GetClusterInfo().FillBE(instance)
6756 bep.update(self.op.beparams)
6758 # check bridges existence
6759 _CheckInstanceBridgesExist(self, instance)
6761 remote_info = self.rpc.call_instance_info(instance.primary_node,
6763 instance.hypervisor)
6764 remote_info.Raise("Error checking node %s" % instance.primary_node,
6765 prereq=True, ecode=errors.ECODE_ENVIRON)
6766 if not remote_info.payload: # not running already
6767 _CheckNodeFreeMemory(self, instance.primary_node,
6768 "starting instance %s" % instance.name,
6769 bep[constants.BE_MINMEM], instance.hypervisor)
6771 def Exec(self, feedback_fn):
6772 """Start the instance.
6775 instance = self.instance
6776 force = self.op.force
6778 if not self.op.no_remember:
6779 self.cfg.MarkInstanceUp(instance.name)
6781 if self.primary_offline:
6782 assert self.op.ignore_offline_nodes
6783 self.proc.LogInfo("Primary node offline, marked instance as started")
6785 node_current = instance.primary_node
6787 _StartInstanceDisks(self, instance, force)
6790 self.rpc.call_instance_start(node_current,
6791 (instance, self.op.hvparams,
6793 self.op.startup_paused)
6794 msg = result.fail_msg
6796 _ShutdownInstanceDisks(self, instance)
6797 raise errors.OpExecError("Could not start instance: %s" % msg)
6800 class LUInstanceReboot(LogicalUnit):
6801 """Reboot an instance.
6804 HPATH = "instance-reboot"
6805 HTYPE = constants.HTYPE_INSTANCE
6808 def ExpandNames(self):
6809 self._ExpandAndLockInstance()
6811 def BuildHooksEnv(self):
6814 This runs on master, primary and secondary nodes of the instance.
6818 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6819 "REBOOT_TYPE": self.op.reboot_type,
6820 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6823 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6827 def BuildHooksNodes(self):
6828 """Build hooks nodes.
6831 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6834 def CheckPrereq(self):
6835 """Check prerequisites.
6837 This checks that the instance is in the cluster.
6840 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6841 assert self.instance is not None, \
6842 "Cannot retrieve locked instance %s" % self.op.instance_name
6843 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6844 _CheckNodeOnline(self, instance.primary_node)
6846 # check bridges existence
6847 _CheckInstanceBridgesExist(self, instance)
6849 def Exec(self, feedback_fn):
6850 """Reboot the instance.
6853 instance = self.instance
6854 ignore_secondaries = self.op.ignore_secondaries
6855 reboot_type = self.op.reboot_type
6857 remote_info = self.rpc.call_instance_info(instance.primary_node,
6859 instance.hypervisor)
6860 remote_info.Raise("Error checking node %s" % instance.primary_node)
6861 instance_running = bool(remote_info.payload)
6863 node_current = instance.primary_node
6865 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6866 constants.INSTANCE_REBOOT_HARD]:
6867 for disk in instance.disks:
6868 self.cfg.SetDiskID(disk, node_current)
6869 result = self.rpc.call_instance_reboot(node_current, instance,
6871 self.op.shutdown_timeout)
6872 result.Raise("Could not reboot instance")
6874 if instance_running:
6875 result = self.rpc.call_instance_shutdown(node_current, instance,
6876 self.op.shutdown_timeout)
6877 result.Raise("Could not shutdown instance for full reboot")
6878 _ShutdownInstanceDisks(self, instance)
6880 self.LogInfo("Instance %s was already stopped, starting now",
6882 _StartInstanceDisks(self, instance, ignore_secondaries)
6883 result = self.rpc.call_instance_start(node_current,
6884 (instance, None, None), False)
6885 msg = result.fail_msg
6887 _ShutdownInstanceDisks(self, instance)
6888 raise errors.OpExecError("Could not start instance for"
6889 " full reboot: %s" % msg)
6891 self.cfg.MarkInstanceUp(instance.name)
6894 class LUInstanceShutdown(LogicalUnit):
6895 """Shutdown an instance.
6898 HPATH = "instance-stop"
6899 HTYPE = constants.HTYPE_INSTANCE
6902 def ExpandNames(self):
6903 self._ExpandAndLockInstance()
6905 def BuildHooksEnv(self):
6908 This runs on master, primary and secondary nodes of the instance.
6911 env = _BuildInstanceHookEnvByObject(self, self.instance)
6912 env["TIMEOUT"] = self.op.timeout
6915 def BuildHooksNodes(self):
6916 """Build hooks nodes.
6919 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6922 def CheckPrereq(self):
6923 """Check prerequisites.
6925 This checks that the instance is in the cluster.
6928 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6929 assert self.instance is not None, \
6930 "Cannot retrieve locked instance %s" % self.op.instance_name
6932 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6934 self.primary_offline = \
6935 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6937 if self.primary_offline and self.op.ignore_offline_nodes:
6938 self.proc.LogWarning("Ignoring offline primary node")
6940 _CheckNodeOnline(self, self.instance.primary_node)
6942 def Exec(self, feedback_fn):
6943 """Shutdown the instance.
6946 instance = self.instance
6947 node_current = instance.primary_node
6948 timeout = self.op.timeout
6950 if not self.op.no_remember:
6951 self.cfg.MarkInstanceDown(instance.name)
6953 if self.primary_offline:
6954 assert self.op.ignore_offline_nodes
6955 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6957 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6958 msg = result.fail_msg
6960 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6962 _ShutdownInstanceDisks(self, instance)
6965 class LUInstanceReinstall(LogicalUnit):
6966 """Reinstall an instance.
6969 HPATH = "instance-reinstall"
6970 HTYPE = constants.HTYPE_INSTANCE
6973 def ExpandNames(self):
6974 self._ExpandAndLockInstance()
6976 def BuildHooksEnv(self):
6979 This runs on master, primary and secondary nodes of the instance.
6982 return _BuildInstanceHookEnvByObject(self, self.instance)
6984 def BuildHooksNodes(self):
6985 """Build hooks nodes.
6988 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6991 def CheckPrereq(self):
6992 """Check prerequisites.
6994 This checks that the instance is in the cluster and is not running.
6997 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6998 assert instance is not None, \
6999 "Cannot retrieve locked instance %s" % self.op.instance_name
7000 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7001 " offline, cannot reinstall")
7003 if instance.disk_template == constants.DT_DISKLESS:
7004 raise errors.OpPrereqError("Instance '%s' has no disks" %
7005 self.op.instance_name,
7007 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7009 if self.op.os_type is not None:
7011 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7012 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7013 instance_os = self.op.os_type
7015 instance_os = instance.os
7017 nodelist = list(instance.all_nodes)
7019 if self.op.osparams:
7020 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7021 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7022 self.os_inst = i_osdict # the new dict (without defaults)
7026 self.instance = instance
7028 def Exec(self, feedback_fn):
7029 """Reinstall the instance.
7032 inst = self.instance
7034 if self.op.os_type is not None:
7035 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7036 inst.os = self.op.os_type
7037 # Write to configuration
7038 self.cfg.Update(inst, feedback_fn)
7040 _StartInstanceDisks(self, inst, None)
7042 feedback_fn("Running the instance OS create scripts...")
7043 # FIXME: pass debug option from opcode to backend
7044 result = self.rpc.call_instance_os_add(inst.primary_node,
7045 (inst, self.os_inst), True,
7046 self.op.debug_level)
7047 result.Raise("Could not install OS for instance %s on node %s" %
7048 (inst.name, inst.primary_node))
7050 _ShutdownInstanceDisks(self, inst)
7053 class LUInstanceRecreateDisks(LogicalUnit):
7054 """Recreate an instance's missing disks.
7057 HPATH = "instance-recreate-disks"
7058 HTYPE = constants.HTYPE_INSTANCE
7061 _MODIFYABLE = frozenset([
7062 constants.IDISK_SIZE,
7063 constants.IDISK_MODE,
7066 # New or changed disk parameters may have different semantics
7067 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7068 constants.IDISK_ADOPT,
7070 # TODO: Implement support changing VG while recreating
7072 constants.IDISK_METAVG,
7075 def _RunAllocator(self):
7076 """Run the allocator based on input opcode.
7079 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7082 # The allocator should actually run in "relocate" mode, but current
7083 # allocators don't support relocating all the nodes of an instance at
7084 # the same time. As a workaround we use "allocate" mode, but this is
7085 # suboptimal for two reasons:
7086 # - The instance name passed to the allocator is present in the list of
7087 # existing instances, so there could be a conflict within the
7088 # internal structures of the allocator. This doesn't happen with the
7089 # current allocators, but it's a liability.
7090 # - The allocator counts the resources used by the instance twice: once
7091 # because the instance exists already, and once because it tries to
7092 # allocate a new instance.
7093 # The allocator could choose some of the nodes on which the instance is
7094 # running, but that's not a problem. If the instance nodes are broken,
7095 # they should be already be marked as drained or offline, and hence
7096 # skipped by the allocator. If instance disks have been lost for other
7097 # reasons, then recreating the disks on the same nodes should be fine.
7098 disk_template = self.instance.disk_template
7099 spindle_use = be_full[constants.BE_SPINDLE_USE]
7100 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7101 disk_template=disk_template,
7102 tags=list(self.instance.GetTags()),
7103 os=self.instance.os,
7105 vcpus=be_full[constants.BE_VCPUS],
7106 memory=be_full[constants.BE_MAXMEM],
7107 spindle_use=spindle_use,
7108 disks=[{constants.IDISK_SIZE: d.size,
7109 constants.IDISK_MODE: d.mode}
7110 for d in self.instance.disks],
7111 hypervisor=self.instance.hypervisor)
7112 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7114 ial.Run(self.op.iallocator)
7116 assert ial.required_nodes == len(self.instance.all_nodes)
7119 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7120 " %s" % (self.op.iallocator, ial.info),
7123 if len(ial.result) != ial.required_nodes:
7124 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7125 " of nodes (%s), required %s" %
7126 (self.op.iallocator, len(ial.result),
7127 ial.required_nodes), errors.ECODE_FAULT)
7129 self.op.nodes = ial.result
7130 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7131 self.op.instance_name, self.op.iallocator,
7132 utils.CommaJoin(ial.result))
7134 def CheckArguments(self):
7135 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7136 # Normalize and convert deprecated list of disk indices
7137 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7139 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7141 raise errors.OpPrereqError("Some disks have been specified more than"
7142 " once: %s" % utils.CommaJoin(duplicates),
7145 if self.op.iallocator and self.op.nodes:
7146 raise errors.OpPrereqError("Give either the iallocator or the new"
7147 " nodes, not both", errors.ECODE_INVAL)
7149 for (idx, params) in self.op.disks:
7150 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7151 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7153 raise errors.OpPrereqError("Parameters for disk %s try to change"
7154 " unmodifyable parameter(s): %s" %
7155 (idx, utils.CommaJoin(unsupported)),
7158 def ExpandNames(self):
7159 self._ExpandAndLockInstance()
7160 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7162 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7163 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7165 self.needed_locks[locking.LEVEL_NODE] = []
7166 if self.op.iallocator:
7167 # iallocator will select a new node in the same group
7168 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7169 self.needed_locks[locking.LEVEL_NODE_RES] = []
7171 def DeclareLocks(self, level):
7172 if level == locking.LEVEL_NODEGROUP:
7173 assert self.op.iallocator is not None
7174 assert not self.op.nodes
7175 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7176 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7177 # Lock the primary group used by the instance optimistically; this
7178 # requires going via the node before it's locked, requiring
7179 # verification later on
7180 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7181 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7183 elif level == locking.LEVEL_NODE:
7184 # If an allocator is used, then we lock all the nodes in the current
7185 # instance group, as we don't know yet which ones will be selected;
7186 # if we replace the nodes without using an allocator, we only need to
7187 # lock the old primary for doing RPCs (FIXME: we don't lock nodes for
7188 # RPC anymore), otherwise we need to lock all the instance nodes for
7190 if self.op.iallocator:
7191 assert not self.op.nodes
7192 assert not self.needed_locks[locking.LEVEL_NODE]
7193 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7195 # Lock member nodes of the group of the primary node
7196 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7197 self.needed_locks[locking.LEVEL_NODE].extend(
7198 self.cfg.GetNodeGroup(group_uuid).members)
7200 primary_only = bool(self.op.nodes)
7201 self._LockInstancesNodes(primary_only=primary_only)
7202 elif level == locking.LEVEL_NODE_RES:
7204 self.needed_locks[locking.LEVEL_NODE_RES] = \
7205 self.needed_locks[locking.LEVEL_NODE][:]
7207 def BuildHooksEnv(self):
7210 This runs on master, primary and secondary nodes of the instance.
7213 return _BuildInstanceHookEnvByObject(self, self.instance)
7215 def BuildHooksNodes(self):
7216 """Build hooks nodes.
7219 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7222 def CheckPrereq(self):
7223 """Check prerequisites.
7225 This checks that the instance is in the cluster and is not running.
7228 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7229 assert instance is not None, \
7230 "Cannot retrieve locked instance %s" % self.op.instance_name
7232 if len(self.op.nodes) != len(instance.all_nodes):
7233 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7234 " %d replacement nodes were specified" %
7235 (instance.name, len(instance.all_nodes),
7236 len(self.op.nodes)),
7238 assert instance.disk_template != constants.DT_DRBD8 or \
7239 len(self.op.nodes) == 2
7240 assert instance.disk_template != constants.DT_PLAIN or \
7241 len(self.op.nodes) == 1
7242 primary_node = self.op.nodes[0]
7244 primary_node = instance.primary_node
7245 if not self.op.iallocator:
7246 _CheckNodeOnline(self, primary_node)
7248 if instance.disk_template == constants.DT_DISKLESS:
7249 raise errors.OpPrereqError("Instance '%s' has no disks" %
7250 self.op.instance_name, errors.ECODE_INVAL)
7252 # Verify if node group locks are still correct
7253 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7255 # Node group locks are acquired only for the primary node (and only
7256 # when the allocator is used)
7257 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7260 # if we replace nodes *and* the old primary is offline, we don't
7262 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7263 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7264 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7265 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7266 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7267 msg="cannot recreate disks")
7270 self.disks = dict(self.op.disks)
7272 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7274 maxidx = max(self.disks.keys())
7275 if maxidx >= len(instance.disks):
7276 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7279 if ((self.op.nodes or self.op.iallocator) and
7280 sorted(self.disks.keys()) != range(len(instance.disks))):
7281 raise errors.OpPrereqError("Can't recreate disks partially and"
7282 " change the nodes at the same time",
7285 self.instance = instance
7287 if self.op.iallocator:
7288 self._RunAllocator()
7290 # Release unneeded node and node resource locks
7291 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7292 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7294 def Exec(self, feedback_fn):
7295 """Recreate the disks.
7298 instance = self.instance
7300 assert (self.owned_locks(locking.LEVEL_NODE) ==
7301 self.owned_locks(locking.LEVEL_NODE_RES))
7304 mods = [] # keeps track of needed changes
7306 for idx, disk in enumerate(instance.disks):
7308 changes = self.disks[idx]
7310 # Disk should not be recreated
7314 # update secondaries for disks, if needed
7315 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7316 # need to update the nodes and minors
7317 assert len(self.op.nodes) == 2
7318 assert len(disk.logical_id) == 6 # otherwise disk internals
7320 (_, _, old_port, _, _, old_secret) = disk.logical_id
7321 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7322 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7323 new_minors[0], new_minors[1], old_secret)
7324 assert len(disk.logical_id) == len(new_id)
7328 mods.append((idx, new_id, changes))
7330 # now that we have passed all asserts above, we can apply the mods
7331 # in a single run (to avoid partial changes)
7332 for idx, new_id, changes in mods:
7333 disk = instance.disks[idx]
7334 if new_id is not None:
7335 assert disk.dev_type == constants.LD_DRBD8
7336 disk.logical_id = new_id
7338 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7339 mode=changes.get(constants.IDISK_MODE, None))
7341 # change primary node, if needed
7343 instance.primary_node = self.op.nodes[0]
7344 self.LogWarning("Changing the instance's nodes, you will have to"
7345 " remove any disks left on the older nodes manually")
7348 self.cfg.Update(instance, feedback_fn)
7350 _CreateDisks(self, instance, to_skip=to_skip)
7353 class LUInstanceRename(LogicalUnit):
7354 """Rename an instance.
7357 HPATH = "instance-rename"
7358 HTYPE = constants.HTYPE_INSTANCE
7360 def CheckArguments(self):
7364 if self.op.ip_check and not self.op.name_check:
7365 # TODO: make the ip check more flexible and not depend on the name check
7366 raise errors.OpPrereqError("IP address check requires a name check",
7369 def BuildHooksEnv(self):
7372 This runs on master, primary and secondary nodes of the instance.
7375 env = _BuildInstanceHookEnvByObject(self, self.instance)
7376 env["INSTANCE_NEW_NAME"] = self.op.new_name
7379 def BuildHooksNodes(self):
7380 """Build hooks nodes.
7383 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7386 def CheckPrereq(self):
7387 """Check prerequisites.
7389 This checks that the instance is in the cluster and is not running.
7392 self.op.instance_name = _ExpandInstanceName(self.cfg,
7393 self.op.instance_name)
7394 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7395 assert instance is not None
7396 _CheckNodeOnline(self, instance.primary_node)
7397 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7398 msg="cannot rename")
7399 self.instance = instance
7401 new_name = self.op.new_name
7402 if self.op.name_check:
7403 hostname = netutils.GetHostname(name=new_name)
7404 if hostname.name != new_name:
7405 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7407 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7408 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7409 " same as given hostname '%s'") %
7410 (hostname.name, self.op.new_name),
7412 new_name = self.op.new_name = hostname.name
7413 if (self.op.ip_check and
7414 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7415 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7416 (hostname.ip, new_name),
7417 errors.ECODE_NOTUNIQUE)
7419 instance_list = self.cfg.GetInstanceList()
7420 if new_name in instance_list and new_name != instance.name:
7421 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7422 new_name, errors.ECODE_EXISTS)
7424 def Exec(self, feedback_fn):
7425 """Rename the instance.
7428 inst = self.instance
7429 old_name = inst.name
7431 rename_file_storage = False
7432 if (inst.disk_template in constants.DTS_FILEBASED and
7433 self.op.new_name != inst.name):
7434 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7435 rename_file_storage = True
7437 self.cfg.RenameInstance(inst.name, self.op.new_name)
7438 # Change the instance lock. This is definitely safe while we hold the BGL.
7439 # Otherwise the new lock would have to be added in acquired mode.
7441 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7442 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7444 # re-read the instance from the configuration after rename
7445 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7447 if rename_file_storage:
7448 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7449 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7450 old_file_storage_dir,
7451 new_file_storage_dir)
7452 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7453 " (but the instance has been renamed in Ganeti)" %
7454 (inst.primary_node, old_file_storage_dir,
7455 new_file_storage_dir))
7457 _StartInstanceDisks(self, inst, None)
7459 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7460 old_name, self.op.debug_level)
7461 msg = result.fail_msg
7463 msg = ("Could not run OS rename script for instance %s on node %s"
7464 " (but the instance has been renamed in Ganeti): %s" %
7465 (inst.name, inst.primary_node, msg))
7466 self.proc.LogWarning(msg)
7468 _ShutdownInstanceDisks(self, inst)
7473 class LUInstanceRemove(LogicalUnit):
7474 """Remove an instance.
7477 HPATH = "instance-remove"
7478 HTYPE = constants.HTYPE_INSTANCE
7481 def ExpandNames(self):
7482 self._ExpandAndLockInstance()
7483 self.needed_locks[locking.LEVEL_NODE] = []
7484 self.needed_locks[locking.LEVEL_NODE_RES] = []
7485 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7487 def DeclareLocks(self, level):
7488 if level == locking.LEVEL_NODE:
7489 self._LockInstancesNodes()
7490 elif level == locking.LEVEL_NODE_RES:
7492 self.needed_locks[locking.LEVEL_NODE_RES] = \
7493 self.needed_locks[locking.LEVEL_NODE][:]
7495 def BuildHooksEnv(self):
7498 This runs on master, primary and secondary nodes of the instance.
7501 env = _BuildInstanceHookEnvByObject(self, self.instance)
7502 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7505 def BuildHooksNodes(self):
7506 """Build hooks nodes.
7509 nl = [self.cfg.GetMasterNode()]
7510 nl_post = list(self.instance.all_nodes) + nl
7511 return (nl, nl_post)
7513 def CheckPrereq(self):
7514 """Check prerequisites.
7516 This checks that the instance is in the cluster.
7519 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7520 assert self.instance is not None, \
7521 "Cannot retrieve locked instance %s" % self.op.instance_name
7523 def Exec(self, feedback_fn):
7524 """Remove the instance.
7527 instance = self.instance
7528 logging.info("Shutting down instance %s on node %s",
7529 instance.name, instance.primary_node)
7531 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7532 self.op.shutdown_timeout)
7533 msg = result.fail_msg
7535 if self.op.ignore_failures:
7536 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7538 raise errors.OpExecError("Could not shutdown instance %s on"
7540 (instance.name, instance.primary_node, msg))
7542 assert (self.owned_locks(locking.LEVEL_NODE) ==
7543 self.owned_locks(locking.LEVEL_NODE_RES))
7544 assert not (set(instance.all_nodes) -
7545 self.owned_locks(locking.LEVEL_NODE)), \
7546 "Not owning correct locks"
7548 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7551 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7552 """Utility function to remove an instance.
7555 logging.info("Removing block devices for instance %s", instance.name)
7557 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7558 if not ignore_failures:
7559 raise errors.OpExecError("Can't remove instance's disks")
7560 feedback_fn("Warning: can't remove instance's disks")
7562 logging.info("Removing instance %s out of cluster config", instance.name)
7564 lu.cfg.RemoveInstance(instance.name)
7566 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7567 "Instance lock removal conflict"
7569 # Remove lock for the instance
7570 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7573 class LUInstanceQuery(NoHooksLU):
7574 """Logical unit for querying instances.
7577 # pylint: disable=W0142
7580 def CheckArguments(self):
7581 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7582 self.op.output_fields, self.op.use_locking)
7584 def ExpandNames(self):
7585 self.iq.ExpandNames(self)
7587 def DeclareLocks(self, level):
7588 self.iq.DeclareLocks(self, level)
7590 def Exec(self, feedback_fn):
7591 return self.iq.OldStyleQuery(self)
7594 class LUInstanceFailover(LogicalUnit):
7595 """Failover an instance.
7598 HPATH = "instance-failover"
7599 HTYPE = constants.HTYPE_INSTANCE
7602 def CheckArguments(self):
7603 """Check the arguments.
7606 self.iallocator = getattr(self.op, "iallocator", None)
7607 self.target_node = getattr(self.op, "target_node", None)
7609 def ExpandNames(self):
7610 self._ExpandAndLockInstance()
7612 if self.op.target_node is not None:
7613 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7615 self.needed_locks[locking.LEVEL_NODE] = []
7616 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7618 self.needed_locks[locking.LEVEL_NODE_RES] = []
7619 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7621 ignore_consistency = self.op.ignore_consistency
7622 shutdown_timeout = self.op.shutdown_timeout
7623 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7626 ignore_consistency=ignore_consistency,
7627 shutdown_timeout=shutdown_timeout,
7628 ignore_ipolicy=self.op.ignore_ipolicy)
7629 self.tasklets = [self._migrater]
7631 def DeclareLocks(self, level):
7632 if level == locking.LEVEL_NODE:
7633 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7634 if instance.disk_template in constants.DTS_EXT_MIRROR:
7635 if self.op.target_node is None:
7636 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7638 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7639 self.op.target_node]
7640 del self.recalculate_locks[locking.LEVEL_NODE]
7642 self._LockInstancesNodes()
7643 elif level == locking.LEVEL_NODE_RES:
7645 self.needed_locks[locking.LEVEL_NODE_RES] = \
7646 self.needed_locks[locking.LEVEL_NODE][:]
7648 def BuildHooksEnv(self):
7651 This runs on master, primary and secondary nodes of the instance.
7654 instance = self._migrater.instance
7655 source_node = instance.primary_node
7656 target_node = self.op.target_node
7658 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7659 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7660 "OLD_PRIMARY": source_node,
7661 "NEW_PRIMARY": target_node,
7664 if instance.disk_template in constants.DTS_INT_MIRROR:
7665 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7666 env["NEW_SECONDARY"] = source_node
7668 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7670 env.update(_BuildInstanceHookEnvByObject(self, instance))
7674 def BuildHooksNodes(self):
7675 """Build hooks nodes.
7678 instance = self._migrater.instance
7679 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7680 return (nl, nl + [instance.primary_node])
7683 class LUInstanceMigrate(LogicalUnit):
7684 """Migrate an instance.
7686 This is migration without shutting down, compared to the failover,
7687 which is done with shutdown.
7690 HPATH = "instance-migrate"
7691 HTYPE = constants.HTYPE_INSTANCE
7694 def ExpandNames(self):
7695 self._ExpandAndLockInstance()
7697 if self.op.target_node is not None:
7698 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7700 self.needed_locks[locking.LEVEL_NODE] = []
7701 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7703 self.needed_locks[locking.LEVEL_NODE] = []
7704 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7707 TLMigrateInstance(self, self.op.instance_name,
7708 cleanup=self.op.cleanup,
7710 fallback=self.op.allow_failover,
7711 allow_runtime_changes=self.op.allow_runtime_changes,
7712 ignore_ipolicy=self.op.ignore_ipolicy)
7713 self.tasklets = [self._migrater]
7715 def DeclareLocks(self, level):
7716 if level == locking.LEVEL_NODE:
7717 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7718 if instance.disk_template in constants.DTS_EXT_MIRROR:
7719 if self.op.target_node is None:
7720 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7722 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7723 self.op.target_node]
7724 del self.recalculate_locks[locking.LEVEL_NODE]
7726 self._LockInstancesNodes()
7727 elif level == locking.LEVEL_NODE_RES:
7729 self.needed_locks[locking.LEVEL_NODE_RES] = \
7730 self.needed_locks[locking.LEVEL_NODE][:]
7732 def BuildHooksEnv(self):
7735 This runs on master, primary and secondary nodes of the instance.
7738 instance = self._migrater.instance
7739 source_node = instance.primary_node
7740 target_node = self.op.target_node
7741 env = _BuildInstanceHookEnvByObject(self, instance)
7743 "MIGRATE_LIVE": self._migrater.live,
7744 "MIGRATE_CLEANUP": self.op.cleanup,
7745 "OLD_PRIMARY": source_node,
7746 "NEW_PRIMARY": target_node,
7747 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7750 if instance.disk_template in constants.DTS_INT_MIRROR:
7751 env["OLD_SECONDARY"] = target_node
7752 env["NEW_SECONDARY"] = source_node
7754 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7758 def BuildHooksNodes(self):
7759 """Build hooks nodes.
7762 instance = self._migrater.instance
7763 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7764 return (nl, nl + [instance.primary_node])
7767 class LUInstanceMove(LogicalUnit):
7768 """Move an instance by data-copying.
7771 HPATH = "instance-move"
7772 HTYPE = constants.HTYPE_INSTANCE
7775 def ExpandNames(self):
7776 self._ExpandAndLockInstance()
7777 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7778 self.op.target_node = target_node
7779 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7780 self.needed_locks[locking.LEVEL_NODE_RES] = []
7781 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7783 def DeclareLocks(self, level):
7784 if level == locking.LEVEL_NODE:
7785 self._LockInstancesNodes(primary_only=True)
7786 elif level == locking.LEVEL_NODE_RES:
7788 self.needed_locks[locking.LEVEL_NODE_RES] = \
7789 self.needed_locks[locking.LEVEL_NODE][:]
7791 def BuildHooksEnv(self):
7794 This runs on master, primary and secondary nodes of the instance.
7798 "TARGET_NODE": self.op.target_node,
7799 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7801 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7804 def BuildHooksNodes(self):
7805 """Build hooks nodes.
7809 self.cfg.GetMasterNode(),
7810 self.instance.primary_node,
7811 self.op.target_node,
7815 def CheckPrereq(self):
7816 """Check prerequisites.
7818 This checks that the instance is in the cluster.
7821 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7822 assert self.instance is not None, \
7823 "Cannot retrieve locked instance %s" % self.op.instance_name
7825 node = self.cfg.GetNodeInfo(self.op.target_node)
7826 assert node is not None, \
7827 "Cannot retrieve locked node %s" % self.op.target_node
7829 self.target_node = target_node = node.name
7831 if target_node == instance.primary_node:
7832 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7833 (instance.name, target_node),
7836 bep = self.cfg.GetClusterInfo().FillBE(instance)
7838 for idx, dsk in enumerate(instance.disks):
7839 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7840 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7841 " cannot copy" % idx, errors.ECODE_STATE)
7843 _CheckNodeOnline(self, target_node)
7844 _CheckNodeNotDrained(self, target_node)
7845 _CheckNodeVmCapable(self, target_node)
7846 cluster = self.cfg.GetClusterInfo()
7847 group_info = self.cfg.GetNodeGroup(node.group)
7848 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7849 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7850 ignore=self.op.ignore_ipolicy)
7852 if instance.admin_state == constants.ADMINST_UP:
7853 # check memory requirements on the secondary node
7854 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7855 instance.name, bep[constants.BE_MAXMEM],
7856 instance.hypervisor)
7858 self.LogInfo("Not checking memory on the secondary node as"
7859 " instance will not be started")
7861 # check bridge existance
7862 _CheckInstanceBridgesExist(self, instance, node=target_node)
7864 def Exec(self, feedback_fn):
7865 """Move an instance.
7867 The move is done by shutting it down on its present node, copying
7868 the data over (slow) and starting it on the new node.
7871 instance = self.instance
7873 source_node = instance.primary_node
7874 target_node = self.target_node
7876 self.LogInfo("Shutting down instance %s on source node %s",
7877 instance.name, source_node)
7879 assert (self.owned_locks(locking.LEVEL_NODE) ==
7880 self.owned_locks(locking.LEVEL_NODE_RES))
7882 result = self.rpc.call_instance_shutdown(source_node, instance,
7883 self.op.shutdown_timeout)
7884 msg = result.fail_msg
7886 if self.op.ignore_consistency:
7887 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7888 " Proceeding anyway. Please make sure node"
7889 " %s is down. Error details: %s",
7890 instance.name, source_node, source_node, msg)
7892 raise errors.OpExecError("Could not shutdown instance %s on"
7894 (instance.name, source_node, msg))
7896 # create the target disks
7898 _CreateDisks(self, instance, target_node=target_node)
7899 except errors.OpExecError:
7900 self.LogWarning("Device creation failed, reverting...")
7902 _RemoveDisks(self, instance, target_node=target_node)
7904 self.cfg.ReleaseDRBDMinors(instance.name)
7907 cluster_name = self.cfg.GetClusterInfo().cluster_name
7910 # activate, get path, copy the data over
7911 for idx, disk in enumerate(instance.disks):
7912 self.LogInfo("Copying data for disk %d", idx)
7913 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7914 instance.name, True, idx)
7916 self.LogWarning("Can't assemble newly created disk %d: %s",
7917 idx, result.fail_msg)
7918 errs.append(result.fail_msg)
7920 dev_path = result.payload
7921 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7922 target_node, dev_path,
7925 self.LogWarning("Can't copy data over for disk %d: %s",
7926 idx, result.fail_msg)
7927 errs.append(result.fail_msg)
7931 self.LogWarning("Some disks failed to copy, aborting")
7933 _RemoveDisks(self, instance, target_node=target_node)
7935 self.cfg.ReleaseDRBDMinors(instance.name)
7936 raise errors.OpExecError("Errors during disk copy: %s" %
7939 instance.primary_node = target_node
7940 self.cfg.Update(instance, feedback_fn)
7942 self.LogInfo("Removing the disks on the original node")
7943 _RemoveDisks(self, instance, target_node=source_node)
7945 # Only start the instance if it's marked as up
7946 if instance.admin_state == constants.ADMINST_UP:
7947 self.LogInfo("Starting instance %s on node %s",
7948 instance.name, target_node)
7950 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7951 ignore_secondaries=True)
7953 _ShutdownInstanceDisks(self, instance)
7954 raise errors.OpExecError("Can't activate the instance's disks")
7956 result = self.rpc.call_instance_start(target_node,
7957 (instance, None, None), False)
7958 msg = result.fail_msg
7960 _ShutdownInstanceDisks(self, instance)
7961 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7962 (instance.name, target_node, msg))
7965 class LUNodeMigrate(LogicalUnit):
7966 """Migrate all instances from a node.
7969 HPATH = "node-migrate"
7970 HTYPE = constants.HTYPE_NODE
7973 def CheckArguments(self):
7976 def ExpandNames(self):
7977 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7979 self.share_locks = _ShareAll()
7980 self.needed_locks = {
7981 locking.LEVEL_NODE: [self.op.node_name],
7984 def BuildHooksEnv(self):
7987 This runs on the master, the primary and all the secondaries.
7991 "NODE_NAME": self.op.node_name,
7992 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7995 def BuildHooksNodes(self):
7996 """Build hooks nodes.
7999 nl = [self.cfg.GetMasterNode()]
8002 def CheckPrereq(self):
8005 def Exec(self, feedback_fn):
8006 # Prepare jobs for migration instances
8007 allow_runtime_changes = self.op.allow_runtime_changes
8009 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8012 iallocator=self.op.iallocator,
8013 target_node=self.op.target_node,
8014 allow_runtime_changes=allow_runtime_changes,
8015 ignore_ipolicy=self.op.ignore_ipolicy)]
8016 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8019 # TODO: Run iallocator in this opcode and pass correct placement options to
8020 # OpInstanceMigrate. Since other jobs can modify the cluster between
8021 # running the iallocator and the actual migration, a good consistency model
8022 # will have to be found.
8024 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8025 frozenset([self.op.node_name]))
8027 return ResultWithJobs(jobs)
8030 class TLMigrateInstance(Tasklet):
8031 """Tasklet class for instance migration.
8034 @ivar live: whether the migration will be done live or non-live;
8035 this variable is initalized only after CheckPrereq has run
8036 @type cleanup: boolean
8037 @ivar cleanup: Wheater we cleanup from a failed migration
8038 @type iallocator: string
8039 @ivar iallocator: The iallocator used to determine target_node
8040 @type target_node: string
8041 @ivar target_node: If given, the target_node to reallocate the instance to
8042 @type failover: boolean
8043 @ivar failover: Whether operation results in failover or migration
8044 @type fallback: boolean
8045 @ivar fallback: Whether fallback to failover is allowed if migration not
8047 @type ignore_consistency: boolean
8048 @ivar ignore_consistency: Wheter we should ignore consistency between source
8050 @type shutdown_timeout: int
8051 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8052 @type ignore_ipolicy: bool
8053 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8058 _MIGRATION_POLL_INTERVAL = 1 # seconds
8059 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8061 def __init__(self, lu, instance_name, cleanup=False,
8062 failover=False, fallback=False,
8063 ignore_consistency=False,
8064 allow_runtime_changes=True,
8065 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8066 ignore_ipolicy=False):
8067 """Initializes this class.
8070 Tasklet.__init__(self, lu)
8073 self.instance_name = instance_name
8074 self.cleanup = cleanup
8075 self.live = False # will be overridden later
8076 self.failover = failover
8077 self.fallback = fallback
8078 self.ignore_consistency = ignore_consistency
8079 self.shutdown_timeout = shutdown_timeout
8080 self.ignore_ipolicy = ignore_ipolicy
8081 self.allow_runtime_changes = allow_runtime_changes
8083 def CheckPrereq(self):
8084 """Check prerequisites.
8086 This checks that the instance is in the cluster.
8089 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8090 instance = self.cfg.GetInstanceInfo(instance_name)
8091 assert instance is not None
8092 self.instance = instance
8093 cluster = self.cfg.GetClusterInfo()
8095 if (not self.cleanup and
8096 not instance.admin_state == constants.ADMINST_UP and
8097 not self.failover and self.fallback):
8098 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8099 " switching to failover")
8100 self.failover = True
8102 if instance.disk_template not in constants.DTS_MIRRORED:
8107 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8108 " %s" % (instance.disk_template, text),
8111 if instance.disk_template in constants.DTS_EXT_MIRROR:
8112 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8114 if self.lu.op.iallocator:
8115 self._RunAllocator()
8117 # We set set self.target_node as it is required by
8119 self.target_node = self.lu.op.target_node
8121 # Check that the target node is correct in terms of instance policy
8122 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8123 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8124 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8126 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8127 ignore=self.ignore_ipolicy)
8129 # self.target_node is already populated, either directly or by the
8131 target_node = self.target_node
8132 if self.target_node == instance.primary_node:
8133 raise errors.OpPrereqError("Cannot migrate instance %s"
8134 " to its primary (%s)" %
8135 (instance.name, instance.primary_node),
8138 if len(self.lu.tasklets) == 1:
8139 # It is safe to release locks only when we're the only tasklet
8141 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8142 keep=[instance.primary_node, self.target_node])
8145 secondary_nodes = instance.secondary_nodes
8146 if not secondary_nodes:
8147 raise errors.ConfigurationError("No secondary node but using"
8148 " %s disk template" %
8149 instance.disk_template)
8150 target_node = secondary_nodes[0]
8151 if self.lu.op.iallocator or (self.lu.op.target_node and
8152 self.lu.op.target_node != target_node):
8154 text = "failed over"
8157 raise errors.OpPrereqError("Instances with disk template %s cannot"
8158 " be %s to arbitrary nodes"
8159 " (neither an iallocator nor a target"
8160 " node can be passed)" %
8161 (instance.disk_template, text),
8163 nodeinfo = self.cfg.GetNodeInfo(target_node)
8164 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8165 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8167 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8168 ignore=self.ignore_ipolicy)
8170 i_be = cluster.FillBE(instance)
8172 # check memory requirements on the secondary node
8173 if (not self.cleanup and
8174 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8175 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8176 "migrating instance %s" %
8178 i_be[constants.BE_MINMEM],
8179 instance.hypervisor)
8181 self.lu.LogInfo("Not checking memory on the secondary node as"
8182 " instance will not be started")
8184 # check if failover must be forced instead of migration
8185 if (not self.cleanup and not self.failover and
8186 i_be[constants.BE_ALWAYS_FAILOVER]):
8188 self.lu.LogInfo("Instance configured to always failover; fallback"
8190 self.failover = True
8192 raise errors.OpPrereqError("This instance has been configured to"
8193 " always failover, please allow failover",
8196 # check bridge existance
8197 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8199 if not self.cleanup:
8200 _CheckNodeNotDrained(self.lu, target_node)
8201 if not self.failover:
8202 result = self.rpc.call_instance_migratable(instance.primary_node,
8204 if result.fail_msg and self.fallback:
8205 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8207 self.failover = True
8209 result.Raise("Can't migrate, please use failover",
8210 prereq=True, ecode=errors.ECODE_STATE)
8212 assert not (self.failover and self.cleanup)
8214 if not self.failover:
8215 if self.lu.op.live is not None and self.lu.op.mode is not None:
8216 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8217 " parameters are accepted",
8219 if self.lu.op.live is not None:
8221 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8223 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8224 # reset the 'live' parameter to None so that repeated
8225 # invocations of CheckPrereq do not raise an exception
8226 self.lu.op.live = None
8227 elif self.lu.op.mode is None:
8228 # read the default value from the hypervisor
8229 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8230 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8232 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8234 # Failover is never live
8237 if not (self.failover or self.cleanup):
8238 remote_info = self.rpc.call_instance_info(instance.primary_node,
8240 instance.hypervisor)
8241 remote_info.Raise("Error checking instance on node %s" %
8242 instance.primary_node)
8243 instance_running = bool(remote_info.payload)
8244 if instance_running:
8245 self.current_mem = int(remote_info.payload["memory"])
8247 def _RunAllocator(self):
8248 """Run the allocator based on input opcode.
8251 # FIXME: add a self.ignore_ipolicy option
8252 req = iallocator.IAReqRelocate(name=self.instance_name,
8253 relocate_from=[self.instance.primary_node])
8254 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8256 ial.Run(self.lu.op.iallocator)
8259 raise errors.OpPrereqError("Can't compute nodes using"
8260 " iallocator '%s': %s" %
8261 (self.lu.op.iallocator, ial.info),
8263 if len(ial.result) != ial.required_nodes:
8264 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8265 " of nodes (%s), required %s" %
8266 (self.lu.op.iallocator, len(ial.result),
8267 ial.required_nodes), errors.ECODE_FAULT)
8268 self.target_node = ial.result[0]
8269 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8270 self.instance_name, self.lu.op.iallocator,
8271 utils.CommaJoin(ial.result))
8273 def _WaitUntilSync(self):
8274 """Poll with custom rpc for disk sync.
8276 This uses our own step-based rpc call.
8279 self.feedback_fn("* wait until resync is done")
8283 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8285 (self.instance.disks,
8288 for node, nres in result.items():
8289 nres.Raise("Cannot resync disks on node %s" % node)
8290 node_done, node_percent = nres.payload
8291 all_done = all_done and node_done
8292 if node_percent is not None:
8293 min_percent = min(min_percent, node_percent)
8295 if min_percent < 100:
8296 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8299 def _EnsureSecondary(self, node):
8300 """Demote a node to secondary.
8303 self.feedback_fn("* switching node %s to secondary mode" % node)
8305 for dev in self.instance.disks:
8306 self.cfg.SetDiskID(dev, node)
8308 result = self.rpc.call_blockdev_close(node, self.instance.name,
8309 self.instance.disks)
8310 result.Raise("Cannot change disk to secondary on node %s" % node)
8312 def _GoStandalone(self):
8313 """Disconnect from the network.
8316 self.feedback_fn("* changing into standalone mode")
8317 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8318 self.instance.disks)
8319 for node, nres in result.items():
8320 nres.Raise("Cannot disconnect disks node %s" % node)
8322 def _GoReconnect(self, multimaster):
8323 """Reconnect to the network.
8329 msg = "single-master"
8330 self.feedback_fn("* changing disks into %s mode" % msg)
8331 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8332 (self.instance.disks, self.instance),
8333 self.instance.name, multimaster)
8334 for node, nres in result.items():
8335 nres.Raise("Cannot change disks config on node %s" % node)
8337 def _ExecCleanup(self):
8338 """Try to cleanup after a failed migration.
8340 The cleanup is done by:
8341 - check that the instance is running only on one node
8342 (and update the config if needed)
8343 - change disks on its secondary node to secondary
8344 - wait until disks are fully synchronized
8345 - disconnect from the network
8346 - change disks into single-master mode
8347 - wait again until disks are fully synchronized
8350 instance = self.instance
8351 target_node = self.target_node
8352 source_node = self.source_node
8354 # check running on only one node
8355 self.feedback_fn("* checking where the instance actually runs"
8356 " (if this hangs, the hypervisor might be in"
8358 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8359 for node, result in ins_l.items():
8360 result.Raise("Can't contact node %s" % node)
8362 runningon_source = instance.name in ins_l[source_node].payload
8363 runningon_target = instance.name in ins_l[target_node].payload
8365 if runningon_source and runningon_target:
8366 raise errors.OpExecError("Instance seems to be running on two nodes,"
8367 " or the hypervisor is confused; you will have"
8368 " to ensure manually that it runs only on one"
8369 " and restart this operation")
8371 if not (runningon_source or runningon_target):
8372 raise errors.OpExecError("Instance does not seem to be running at all;"
8373 " in this case it's safer to repair by"
8374 " running 'gnt-instance stop' to ensure disk"
8375 " shutdown, and then restarting it")
8377 if runningon_target:
8378 # the migration has actually succeeded, we need to update the config
8379 self.feedback_fn("* instance running on secondary node (%s),"
8380 " updating config" % target_node)
8381 instance.primary_node = target_node
8382 self.cfg.Update(instance, self.feedback_fn)
8383 demoted_node = source_node
8385 self.feedback_fn("* instance confirmed to be running on its"
8386 " primary node (%s)" % source_node)
8387 demoted_node = target_node
8389 if instance.disk_template in constants.DTS_INT_MIRROR:
8390 self._EnsureSecondary(demoted_node)
8392 self._WaitUntilSync()
8393 except errors.OpExecError:
8394 # we ignore here errors, since if the device is standalone, it
8395 # won't be able to sync
8397 self._GoStandalone()
8398 self._GoReconnect(False)
8399 self._WaitUntilSync()
8401 self.feedback_fn("* done")
8403 def _RevertDiskStatus(self):
8404 """Try to revert the disk status after a failed migration.
8407 target_node = self.target_node
8408 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8412 self._EnsureSecondary(target_node)
8413 self._GoStandalone()
8414 self._GoReconnect(False)
8415 self._WaitUntilSync()
8416 except errors.OpExecError, err:
8417 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8418 " please try to recover the instance manually;"
8419 " error '%s'" % str(err))
8421 def _AbortMigration(self):
8422 """Call the hypervisor code to abort a started migration.
8425 instance = self.instance
8426 target_node = self.target_node
8427 source_node = self.source_node
8428 migration_info = self.migration_info
8430 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8434 abort_msg = abort_result.fail_msg
8436 logging.error("Aborting migration failed on target node %s: %s",
8437 target_node, abort_msg)
8438 # Don't raise an exception here, as we stil have to try to revert the
8439 # disk status, even if this step failed.
8441 abort_result = self.rpc.call_instance_finalize_migration_src(
8442 source_node, instance, False, self.live)
8443 abort_msg = abort_result.fail_msg
8445 logging.error("Aborting migration failed on source node %s: %s",
8446 source_node, abort_msg)
8448 def _ExecMigration(self):
8449 """Migrate an instance.
8451 The migrate is done by:
8452 - change the disks into dual-master mode
8453 - wait until disks are fully synchronized again
8454 - migrate the instance
8455 - change disks on the new secondary node (the old primary) to secondary
8456 - wait until disks are fully synchronized
8457 - change disks into single-master mode
8460 instance = self.instance
8461 target_node = self.target_node
8462 source_node = self.source_node
8464 # Check for hypervisor version mismatch and warn the user.
8465 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8466 None, [self.instance.hypervisor])
8467 for ninfo in nodeinfo.values():
8468 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8470 (_, _, (src_info, )) = nodeinfo[source_node].payload
8471 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8473 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8474 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8475 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8476 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8477 if src_version != dst_version:
8478 self.feedback_fn("* warning: hypervisor version mismatch between"
8479 " source (%s) and target (%s) node" %
8480 (src_version, dst_version))
8482 self.feedback_fn("* checking disk consistency between source and target")
8483 for (idx, dev) in enumerate(instance.disks):
8484 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8485 raise errors.OpExecError("Disk %s is degraded or not fully"
8486 " synchronized on target node,"
8487 " aborting migration" % idx)
8489 if self.current_mem > self.tgt_free_mem:
8490 if not self.allow_runtime_changes:
8491 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8492 " free memory to fit instance %s on target"
8493 " node %s (have %dMB, need %dMB)" %
8494 (instance.name, target_node,
8495 self.tgt_free_mem, self.current_mem))
8496 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8497 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8500 rpcres.Raise("Cannot modify instance runtime memory")
8502 # First get the migration information from the remote node
8503 result = self.rpc.call_migration_info(source_node, instance)
8504 msg = result.fail_msg
8506 log_err = ("Failed fetching source migration information from %s: %s" %
8508 logging.error(log_err)
8509 raise errors.OpExecError(log_err)
8511 self.migration_info = migration_info = result.payload
8513 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8514 # Then switch the disks to master/master mode
8515 self._EnsureSecondary(target_node)
8516 self._GoStandalone()
8517 self._GoReconnect(True)
8518 self._WaitUntilSync()
8520 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8521 result = self.rpc.call_accept_instance(target_node,
8524 self.nodes_ip[target_node])
8526 msg = result.fail_msg
8528 logging.error("Instance pre-migration failed, trying to revert"
8529 " disk status: %s", msg)
8530 self.feedback_fn("Pre-migration failed, aborting")
8531 self._AbortMigration()
8532 self._RevertDiskStatus()
8533 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8534 (instance.name, msg))
8536 self.feedback_fn("* migrating instance to %s" % target_node)
8537 result = self.rpc.call_instance_migrate(source_node, instance,
8538 self.nodes_ip[target_node],
8540 msg = result.fail_msg
8542 logging.error("Instance migration failed, trying to revert"
8543 " disk status: %s", msg)
8544 self.feedback_fn("Migration failed, aborting")
8545 self._AbortMigration()
8546 self._RevertDiskStatus()
8547 raise errors.OpExecError("Could not migrate instance %s: %s" %
8548 (instance.name, msg))
8550 self.feedback_fn("* starting memory transfer")
8551 last_feedback = time.time()
8553 result = self.rpc.call_instance_get_migration_status(source_node,
8555 msg = result.fail_msg
8556 ms = result.payload # MigrationStatus instance
8557 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8558 logging.error("Instance migration failed, trying to revert"
8559 " disk status: %s", msg)
8560 self.feedback_fn("Migration failed, aborting")
8561 self._AbortMigration()
8562 self._RevertDiskStatus()
8563 raise errors.OpExecError("Could not migrate instance %s: %s" %
8564 (instance.name, msg))
8566 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8567 self.feedback_fn("* memory transfer complete")
8570 if (utils.TimeoutExpired(last_feedback,
8571 self._MIGRATION_FEEDBACK_INTERVAL) and
8572 ms.transferred_ram is not None):
8573 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8574 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8575 last_feedback = time.time()
8577 time.sleep(self._MIGRATION_POLL_INTERVAL)
8579 result = self.rpc.call_instance_finalize_migration_src(source_node,
8583 msg = result.fail_msg
8585 logging.error("Instance migration succeeded, but finalization failed"
8586 " on the source node: %s", msg)
8587 raise errors.OpExecError("Could not finalize instance migration: %s" %
8590 instance.primary_node = target_node
8592 # distribute new instance config to the other nodes
8593 self.cfg.Update(instance, self.feedback_fn)
8595 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8599 msg = result.fail_msg
8601 logging.error("Instance migration succeeded, but finalization failed"
8602 " on the target node: %s", msg)
8603 raise errors.OpExecError("Could not finalize instance migration: %s" %
8606 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8607 self._EnsureSecondary(source_node)
8608 self._WaitUntilSync()
8609 self._GoStandalone()
8610 self._GoReconnect(False)
8611 self._WaitUntilSync()
8613 # If the instance's disk template is `rbd' and there was a successful
8614 # migration, unmap the device from the source node.
8615 if self.instance.disk_template == constants.DT_RBD:
8616 disks = _ExpandCheckDisks(instance, instance.disks)
8617 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8619 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8620 msg = result.fail_msg
8622 logging.error("Migration was successful, but couldn't unmap the"
8623 " block device %s on source node %s: %s",
8624 disk.iv_name, source_node, msg)
8625 logging.error("You need to unmap the device %s manually on %s",
8626 disk.iv_name, source_node)
8628 self.feedback_fn("* done")
8630 def _ExecFailover(self):
8631 """Failover an instance.
8633 The failover is done by shutting it down on its present node and
8634 starting it on the secondary.
8637 instance = self.instance
8638 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8640 source_node = instance.primary_node
8641 target_node = self.target_node
8643 if instance.admin_state == constants.ADMINST_UP:
8644 self.feedback_fn("* checking disk consistency between source and target")
8645 for (idx, dev) in enumerate(instance.disks):
8646 # for drbd, these are drbd over lvm
8647 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8649 if primary_node.offline:
8650 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8652 (primary_node.name, idx, target_node))
8653 elif not self.ignore_consistency:
8654 raise errors.OpExecError("Disk %s is degraded on target node,"
8655 " aborting failover" % idx)
8657 self.feedback_fn("* not checking disk consistency as instance is not"
8660 self.feedback_fn("* shutting down instance on source node")
8661 logging.info("Shutting down instance %s on node %s",
8662 instance.name, source_node)
8664 result = self.rpc.call_instance_shutdown(source_node, instance,
8665 self.shutdown_timeout)
8666 msg = result.fail_msg
8668 if self.ignore_consistency or primary_node.offline:
8669 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8670 " proceeding anyway; please make sure node"
8671 " %s is down; error details: %s",
8672 instance.name, source_node, source_node, msg)
8674 raise errors.OpExecError("Could not shutdown instance %s on"
8676 (instance.name, source_node, msg))
8678 self.feedback_fn("* deactivating the instance's disks on source node")
8679 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8680 raise errors.OpExecError("Can't shut down the instance's disks")
8682 instance.primary_node = target_node
8683 # distribute new instance config to the other nodes
8684 self.cfg.Update(instance, self.feedback_fn)
8686 # Only start the instance if it's marked as up
8687 if instance.admin_state == constants.ADMINST_UP:
8688 self.feedback_fn("* activating the instance's disks on target node %s" %
8690 logging.info("Starting instance %s on node %s",
8691 instance.name, target_node)
8693 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8694 ignore_secondaries=True)
8696 _ShutdownInstanceDisks(self.lu, instance)
8697 raise errors.OpExecError("Can't activate the instance's disks")
8699 self.feedback_fn("* starting the instance on the target node %s" %
8701 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8703 msg = result.fail_msg
8705 _ShutdownInstanceDisks(self.lu, instance)
8706 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8707 (instance.name, target_node, msg))
8709 def Exec(self, feedback_fn):
8710 """Perform the migration.
8713 self.feedback_fn = feedback_fn
8714 self.source_node = self.instance.primary_node
8716 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8717 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8718 self.target_node = self.instance.secondary_nodes[0]
8719 # Otherwise self.target_node has been populated either
8720 # directly, or through an iallocator.
8722 self.all_nodes = [self.source_node, self.target_node]
8723 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8724 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8727 feedback_fn("Failover instance %s" % self.instance.name)
8728 self._ExecFailover()
8730 feedback_fn("Migrating instance %s" % self.instance.name)
8733 return self._ExecCleanup()
8735 return self._ExecMigration()
8738 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8740 """Wrapper around L{_CreateBlockDevInner}.
8742 This method annotates the root device first.
8745 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8746 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8750 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8752 """Create a tree of block devices on a given node.
8754 If this device type has to be created on secondaries, create it and
8757 If not, just recurse to children keeping the same 'force' value.
8759 @attention: The device has to be annotated already.
8761 @param lu: the lu on whose behalf we execute
8762 @param node: the node on which to create the device
8763 @type instance: L{objects.Instance}
8764 @param instance: the instance which owns the device
8765 @type device: L{objects.Disk}
8766 @param device: the device to create
8767 @type force_create: boolean
8768 @param force_create: whether to force creation of this device; this
8769 will be change to True whenever we find a device which has
8770 CreateOnSecondary() attribute
8771 @param info: the extra 'metadata' we should attach to the device
8772 (this will be represented as a LVM tag)
8773 @type force_open: boolean
8774 @param force_open: this parameter will be passes to the
8775 L{backend.BlockdevCreate} function where it specifies
8776 whether we run on primary or not, and it affects both
8777 the child assembly and the device own Open() execution
8780 if device.CreateOnSecondary():
8784 for child in device.children:
8785 _CreateBlockDevInner(lu, node, instance, child, force_create,
8788 if not force_create:
8791 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8794 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8795 """Create a single block device on a given node.
8797 This will not recurse over children of the device, so they must be
8800 @param lu: the lu on whose behalf we execute
8801 @param node: the node on which to create the device
8802 @type instance: L{objects.Instance}
8803 @param instance: the instance which owns the device
8804 @type device: L{objects.Disk}
8805 @param device: the device to create
8806 @param info: the extra 'metadata' we should attach to the device
8807 (this will be represented as a LVM tag)
8808 @type force_open: boolean
8809 @param force_open: this parameter will be passes to the
8810 L{backend.BlockdevCreate} function where it specifies
8811 whether we run on primary or not, and it affects both
8812 the child assembly and the device own Open() execution
8815 lu.cfg.SetDiskID(device, node)
8816 result = lu.rpc.call_blockdev_create(node, device, device.size,
8817 instance.name, force_open, info)
8818 result.Raise("Can't create block device %s on"
8819 " node %s for instance %s" % (device, node, instance.name))
8820 if device.physical_id is None:
8821 device.physical_id = result.payload
8824 def _GenerateUniqueNames(lu, exts):
8825 """Generate a suitable LV name.
8827 This will generate a logical volume name for the given instance.
8832 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8833 results.append("%s%s" % (new_id, val))
8837 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8838 iv_name, p_minor, s_minor):
8839 """Generate a drbd8 device complete with its children.
8842 assert len(vgnames) == len(names) == 2
8843 port = lu.cfg.AllocatePort()
8844 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8846 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8847 logical_id=(vgnames[0], names[0]),
8849 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8850 size=constants.DRBD_META_SIZE,
8851 logical_id=(vgnames[1], names[1]),
8853 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8854 logical_id=(primary, secondary, port,
8857 children=[dev_data, dev_meta],
8858 iv_name=iv_name, params={})
8862 _DISK_TEMPLATE_NAME_PREFIX = {
8863 constants.DT_PLAIN: "",
8864 constants.DT_RBD: ".rbd",
8868 _DISK_TEMPLATE_DEVICE_TYPE = {
8869 constants.DT_PLAIN: constants.LD_LV,
8870 constants.DT_FILE: constants.LD_FILE,
8871 constants.DT_SHARED_FILE: constants.LD_FILE,
8872 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8873 constants.DT_RBD: constants.LD_RBD,
8877 def _GenerateDiskTemplate(
8878 lu, template_name, instance_name, primary_node, secondary_nodes,
8879 disk_info, file_storage_dir, file_driver, base_index,
8880 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8881 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8882 """Generate the entire disk layout for a given template type.
8885 #TODO: compute space requirements
8887 vgname = lu.cfg.GetVGName()
8888 disk_count = len(disk_info)
8891 if template_name == constants.DT_DISKLESS:
8893 elif template_name == constants.DT_DRBD8:
8894 if len(secondary_nodes) != 1:
8895 raise errors.ProgrammerError("Wrong template configuration")
8896 remote_node = secondary_nodes[0]
8897 minors = lu.cfg.AllocateDRBDMinor(
8898 [primary_node, remote_node] * len(disk_info), instance_name)
8900 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8902 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8905 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8906 for i in range(disk_count)]):
8907 names.append(lv_prefix + "_data")
8908 names.append(lv_prefix + "_meta")
8909 for idx, disk in enumerate(disk_info):
8910 disk_index = idx + base_index
8911 data_vg = disk.get(constants.IDISK_VG, vgname)
8912 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8913 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8914 disk[constants.IDISK_SIZE],
8916 names[idx * 2:idx * 2 + 2],
8917 "disk/%d" % disk_index,
8918 minors[idx * 2], minors[idx * 2 + 1])
8919 disk_dev.mode = disk[constants.IDISK_MODE]
8920 disks.append(disk_dev)
8923 raise errors.ProgrammerError("Wrong template configuration")
8925 if template_name == constants.DT_FILE:
8927 elif template_name == constants.DT_SHARED_FILE:
8928 _req_shr_file_storage()
8930 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8931 if name_prefix is None:
8934 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8935 (name_prefix, base_index + i)
8936 for i in range(disk_count)])
8938 if template_name == constants.DT_PLAIN:
8939 def logical_id_fn(idx, _, disk):
8940 vg = disk.get(constants.IDISK_VG, vgname)
8941 return (vg, names[idx])
8942 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8944 lambda _, disk_index, disk: (file_driver,
8945 "%s/disk%d" % (file_storage_dir,
8947 elif template_name == constants.DT_BLOCK:
8949 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8950 disk[constants.IDISK_ADOPT])
8951 elif template_name == constants.DT_RBD:
8952 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8954 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8956 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8958 for idx, disk in enumerate(disk_info):
8959 disk_index = idx + base_index
8960 size = disk[constants.IDISK_SIZE]
8961 feedback_fn("* disk %s, size %s" %
8962 (disk_index, utils.FormatUnit(size, "h")))
8963 disks.append(objects.Disk(dev_type=dev_type, size=size,
8964 logical_id=logical_id_fn(idx, disk_index, disk),
8965 iv_name="disk/%d" % disk_index,
8966 mode=disk[constants.IDISK_MODE],
8972 def _GetInstanceInfoText(instance):
8973 """Compute that text that should be added to the disk's metadata.
8976 return "originstname+%s" % instance.name
8979 def _CalcEta(time_taken, written, total_size):
8980 """Calculates the ETA based on size written and total size.
8982 @param time_taken: The time taken so far
8983 @param written: amount written so far
8984 @param total_size: The total size of data to be written
8985 @return: The remaining time in seconds
8988 avg_time = time_taken / float(written)
8989 return (total_size - written) * avg_time
8992 def _WipeDisks(lu, instance):
8993 """Wipes instance disks.
8995 @type lu: L{LogicalUnit}
8996 @param lu: the logical unit on whose behalf we execute
8997 @type instance: L{objects.Instance}
8998 @param instance: the instance whose disks we should create
8999 @return: the success of the wipe
9002 node = instance.primary_node
9004 for device in instance.disks:
9005 lu.cfg.SetDiskID(device, node)
9007 logging.info("Pause sync of instance %s disks", instance.name)
9008 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9009 (instance.disks, instance),
9011 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
9013 for idx, success in enumerate(result.payload):
9015 logging.warn("pause-sync of instance %s for disks %d failed",
9019 for idx, device in enumerate(instance.disks):
9020 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9021 # MAX_WIPE_CHUNK at max
9022 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9023 constants.MIN_WIPE_CHUNK_PERCENT)
9024 # we _must_ make this an int, otherwise rounding errors will
9026 wipe_chunk_size = int(wipe_chunk_size)
9028 lu.LogInfo("* Wiping disk %d", idx)
9029 logging.info("Wiping disk %d for instance %s, node %s using"
9030 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9035 start_time = time.time()
9037 while offset < size:
9038 wipe_size = min(wipe_chunk_size, size - offset)
9039 logging.debug("Wiping disk %d, offset %s, chunk %s",
9040 idx, offset, wipe_size)
9041 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9043 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9044 (idx, offset, wipe_size))
9047 if now - last_output >= 60:
9048 eta = _CalcEta(now - start_time, offset, size)
9049 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9050 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9053 logging.info("Resume sync of instance %s disks", instance.name)
9055 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9056 (instance.disks, instance),
9060 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9061 " please have a look at the status and troubleshoot"
9062 " the issue: %s", node, result.fail_msg)
9064 for idx, success in enumerate(result.payload):
9066 lu.LogWarning("Resume sync of disk %d failed, please have a"
9067 " look at the status and troubleshoot the issue", idx)
9068 logging.warn("resume-sync of instance %s for disks %d failed",
9072 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9073 """Create all disks for an instance.
9075 This abstracts away some work from AddInstance.
9077 @type lu: L{LogicalUnit}
9078 @param lu: the logical unit on whose behalf we execute
9079 @type instance: L{objects.Instance}
9080 @param instance: the instance whose disks we should create
9082 @param to_skip: list of indices to skip
9083 @type target_node: string
9084 @param target_node: if passed, overrides the target node for creation
9086 @return: the success of the creation
9089 info = _GetInstanceInfoText(instance)
9090 if target_node is None:
9091 pnode = instance.primary_node
9092 all_nodes = instance.all_nodes
9097 if instance.disk_template in constants.DTS_FILEBASED:
9098 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9099 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9101 result.Raise("Failed to create directory '%s' on"
9102 " node %s" % (file_storage_dir, pnode))
9104 # Note: this needs to be kept in sync with adding of disks in
9105 # LUInstanceSetParams
9106 for idx, device in enumerate(instance.disks):
9107 if to_skip and idx in to_skip:
9109 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9111 for node in all_nodes:
9112 f_create = node == pnode
9113 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9116 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9117 """Remove all disks for an instance.
9119 This abstracts away some work from `AddInstance()` and
9120 `RemoveInstance()`. Note that in case some of the devices couldn't
9121 be removed, the removal will continue with the other ones (compare
9122 with `_CreateDisks()`).
9124 @type lu: L{LogicalUnit}
9125 @param lu: the logical unit on whose behalf we execute
9126 @type instance: L{objects.Instance}
9127 @param instance: the instance whose disks we should remove
9128 @type target_node: string
9129 @param target_node: used to override the node on which to remove the disks
9131 @return: the success of the removal
9134 logging.info("Removing block devices for instance %s", instance.name)
9137 ports_to_release = set()
9138 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9139 for (idx, device) in enumerate(anno_disks):
9141 edata = [(target_node, device)]
9143 edata = device.ComputeNodeTree(instance.primary_node)
9144 for node, disk in edata:
9145 lu.cfg.SetDiskID(disk, node)
9146 result = lu.rpc.call_blockdev_remove(node, disk)
9148 lu.LogWarning("Could not remove disk %s on node %s,"
9149 " continuing anyway: %s", idx, node, result.fail_msg)
9150 if not (result.offline and node != instance.primary_node):
9153 # if this is a DRBD disk, return its port to the pool
9154 if device.dev_type in constants.LDS_DRBD:
9155 ports_to_release.add(device.logical_id[2])
9157 if all_result or ignore_failures:
9158 for port in ports_to_release:
9159 lu.cfg.AddTcpUdpPort(port)
9161 if instance.disk_template == constants.DT_FILE:
9162 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9166 tgt = instance.primary_node
9167 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9169 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9170 file_storage_dir, instance.primary_node, result.fail_msg)
9176 def _ComputeDiskSizePerVG(disk_template, disks):
9177 """Compute disk size requirements in the volume group
9180 def _compute(disks, payload):
9181 """Universal algorithm.
9186 vgs[disk[constants.IDISK_VG]] = \
9187 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9191 # Required free disk space as a function of disk and swap space
9193 constants.DT_DISKLESS: {},
9194 constants.DT_PLAIN: _compute(disks, 0),
9195 # 128 MB are added for drbd metadata for each disk
9196 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9197 constants.DT_FILE: {},
9198 constants.DT_SHARED_FILE: {},
9201 if disk_template not in req_size_dict:
9202 raise errors.ProgrammerError("Disk template '%s' size requirement"
9203 " is unknown" % disk_template)
9205 return req_size_dict[disk_template]
9208 def _FilterVmNodes(lu, nodenames):
9209 """Filters out non-vm_capable nodes from a list.
9211 @type lu: L{LogicalUnit}
9212 @param lu: the logical unit for which we check
9213 @type nodenames: list
9214 @param nodenames: the list of nodes on which we should check
9216 @return: the list of vm-capable nodes
9219 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9220 return [name for name in nodenames if name not in vm_nodes]
9223 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9224 """Hypervisor parameter validation.
9226 This function abstract the hypervisor parameter validation to be
9227 used in both instance create and instance modify.
9229 @type lu: L{LogicalUnit}
9230 @param lu: the logical unit for which we check
9231 @type nodenames: list
9232 @param nodenames: the list of nodes on which we should check
9233 @type hvname: string
9234 @param hvname: the name of the hypervisor we should use
9235 @type hvparams: dict
9236 @param hvparams: the parameters which we need to check
9237 @raise errors.OpPrereqError: if the parameters are not valid
9240 nodenames = _FilterVmNodes(lu, nodenames)
9242 cluster = lu.cfg.GetClusterInfo()
9243 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9245 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9246 for node in nodenames:
9250 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9253 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9254 """OS parameters validation.
9256 @type lu: L{LogicalUnit}
9257 @param lu: the logical unit for which we check
9258 @type required: boolean
9259 @param required: whether the validation should fail if the OS is not
9261 @type nodenames: list
9262 @param nodenames: the list of nodes on which we should check
9263 @type osname: string
9264 @param osname: the name of the hypervisor we should use
9265 @type osparams: dict
9266 @param osparams: the parameters which we need to check
9267 @raise errors.OpPrereqError: if the parameters are not valid
9270 nodenames = _FilterVmNodes(lu, nodenames)
9271 result = lu.rpc.call_os_validate(nodenames, required, osname,
9272 [constants.OS_VALIDATE_PARAMETERS],
9274 for node, nres in result.items():
9275 # we don't check for offline cases since this should be run only
9276 # against the master node and/or an instance's nodes
9277 nres.Raise("OS Parameters validation failed on node %s" % node)
9278 if not nres.payload:
9279 lu.LogInfo("OS %s not found on node %s, validation skipped",
9283 class LUInstanceCreate(LogicalUnit):
9284 """Create an instance.
9287 HPATH = "instance-add"
9288 HTYPE = constants.HTYPE_INSTANCE
9291 def CheckArguments(self):
9295 # do not require name_check to ease forward/backward compatibility
9297 if self.op.no_install and self.op.start:
9298 self.LogInfo("No-installation mode selected, disabling startup")
9299 self.op.start = False
9300 # validate/normalize the instance name
9301 self.op.instance_name = \
9302 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9304 if self.op.ip_check and not self.op.name_check:
9305 # TODO: make the ip check more flexible and not depend on the name check
9306 raise errors.OpPrereqError("Cannot do IP address check without a name"
9307 " check", errors.ECODE_INVAL)
9309 # check nics' parameter names
9310 for nic in self.op.nics:
9311 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9313 # check disks. parameter names and consistent adopt/no-adopt strategy
9314 has_adopt = has_no_adopt = False
9315 for disk in self.op.disks:
9316 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9317 if constants.IDISK_ADOPT in disk:
9321 if has_adopt and has_no_adopt:
9322 raise errors.OpPrereqError("Either all disks are adopted or none is",
9325 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9326 raise errors.OpPrereqError("Disk adoption is not supported for the"
9327 " '%s' disk template" %
9328 self.op.disk_template,
9330 if self.op.iallocator is not None:
9331 raise errors.OpPrereqError("Disk adoption not allowed with an"
9332 " iallocator script", errors.ECODE_INVAL)
9333 if self.op.mode == constants.INSTANCE_IMPORT:
9334 raise errors.OpPrereqError("Disk adoption not allowed for"
9335 " instance import", errors.ECODE_INVAL)
9337 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9338 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9339 " but no 'adopt' parameter given" %
9340 self.op.disk_template,
9343 self.adopt_disks = has_adopt
9345 # instance name verification
9346 if self.op.name_check:
9347 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9348 self.op.instance_name = self.hostname1.name
9349 # used in CheckPrereq for ip ping check
9350 self.check_ip = self.hostname1.ip
9352 self.check_ip = None
9354 # file storage checks
9355 if (self.op.file_driver and
9356 not self.op.file_driver in constants.FILE_DRIVER):
9357 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9358 self.op.file_driver, errors.ECODE_INVAL)
9360 if self.op.disk_template == constants.DT_FILE:
9361 opcodes.RequireFileStorage()
9362 elif self.op.disk_template == constants.DT_SHARED_FILE:
9363 opcodes.RequireSharedFileStorage()
9365 ### Node/iallocator related checks
9366 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9368 if self.op.pnode is not None:
9369 if self.op.disk_template in constants.DTS_INT_MIRROR:
9370 if self.op.snode is None:
9371 raise errors.OpPrereqError("The networked disk templates need"
9372 " a mirror node", errors.ECODE_INVAL)
9374 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9376 self.op.snode = None
9378 self._cds = _GetClusterDomainSecret()
9380 if self.op.mode == constants.INSTANCE_IMPORT:
9381 # On import force_variant must be True, because if we forced it at
9382 # initial install, our only chance when importing it back is that it
9384 self.op.force_variant = True
9386 if self.op.no_install:
9387 self.LogInfo("No-installation mode has no effect during import")
9389 elif self.op.mode == constants.INSTANCE_CREATE:
9390 if self.op.os_type is None:
9391 raise errors.OpPrereqError("No guest OS specified",
9393 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9394 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9395 " installation" % self.op.os_type,
9397 if self.op.disk_template is None:
9398 raise errors.OpPrereqError("No disk template specified",
9401 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9402 # Check handshake to ensure both clusters have the same domain secret
9403 src_handshake = self.op.source_handshake
9404 if not src_handshake:
9405 raise errors.OpPrereqError("Missing source handshake",
9408 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9411 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9414 # Load and check source CA
9415 self.source_x509_ca_pem = self.op.source_x509_ca
9416 if not self.source_x509_ca_pem:
9417 raise errors.OpPrereqError("Missing source X509 CA",
9421 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9423 except OpenSSL.crypto.Error, err:
9424 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9425 (err, ), errors.ECODE_INVAL)
9427 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9428 if errcode is not None:
9429 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9432 self.source_x509_ca = cert
9434 src_instance_name = self.op.source_instance_name
9435 if not src_instance_name:
9436 raise errors.OpPrereqError("Missing source instance name",
9439 self.source_instance_name = \
9440 netutils.GetHostname(name=src_instance_name).name
9443 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9444 self.op.mode, errors.ECODE_INVAL)
9446 def ExpandNames(self):
9447 """ExpandNames for CreateInstance.
9449 Figure out the right locks for instance creation.
9452 self.needed_locks = {}
9454 instance_name = self.op.instance_name
9455 # this is just a preventive check, but someone might still add this
9456 # instance in the meantime, and creation will fail at lock-add time
9457 if instance_name in self.cfg.GetInstanceList():
9458 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9459 instance_name, errors.ECODE_EXISTS)
9461 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9463 if self.op.iallocator:
9464 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9465 # specifying a group on instance creation and then selecting nodes from
9467 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9468 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9470 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9471 nodelist = [self.op.pnode]
9472 if self.op.snode is not None:
9473 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9474 nodelist.append(self.op.snode)
9475 self.needed_locks[locking.LEVEL_NODE] = nodelist
9476 # Lock resources of instance's primary and secondary nodes (copy to
9477 # prevent accidential modification)
9478 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9480 # in case of import lock the source node too
9481 if self.op.mode == constants.INSTANCE_IMPORT:
9482 src_node = self.op.src_node
9483 src_path = self.op.src_path
9485 if src_path is None:
9486 self.op.src_path = src_path = self.op.instance_name
9488 if src_node is None:
9489 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9490 self.op.src_node = None
9491 if os.path.isabs(src_path):
9492 raise errors.OpPrereqError("Importing an instance from a path"
9493 " requires a source node option",
9496 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9497 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9498 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9499 if not os.path.isabs(src_path):
9500 self.op.src_path = src_path = \
9501 utils.PathJoin(constants.EXPORT_DIR, src_path)
9503 def _RunAllocator(self):
9504 """Run the allocator based on input opcode.
9507 nics = [n.ToDict() for n in self.nics]
9508 memory = self.be_full[constants.BE_MAXMEM]
9509 spindle_use = self.be_full[constants.BE_SPINDLE_USE]
9510 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
9511 disk_template=self.op.disk_template,
9514 vcpus=self.be_full[constants.BE_VCPUS],
9516 spindle_use=spindle_use,
9519 hypervisor=self.op.hypervisor)
9520 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9522 ial.Run(self.op.iallocator)
9525 raise errors.OpPrereqError("Can't compute nodes using"
9526 " iallocator '%s': %s" %
9527 (self.op.iallocator, ial.info),
9529 if len(ial.result) != ial.required_nodes:
9530 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9531 " of nodes (%s), required %s" %
9532 (self.op.iallocator, len(ial.result),
9533 ial.required_nodes), errors.ECODE_FAULT)
9534 self.op.pnode = ial.result[0]
9535 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9536 self.op.instance_name, self.op.iallocator,
9537 utils.CommaJoin(ial.result))
9538 if ial.required_nodes == 2:
9539 self.op.snode = ial.result[1]
9541 def BuildHooksEnv(self):
9544 This runs on master, primary and secondary nodes of the instance.
9548 "ADD_MODE": self.op.mode,
9550 if self.op.mode == constants.INSTANCE_IMPORT:
9551 env["SRC_NODE"] = self.op.src_node
9552 env["SRC_PATH"] = self.op.src_path
9553 env["SRC_IMAGES"] = self.src_images
9555 env.update(_BuildInstanceHookEnv(
9556 name=self.op.instance_name,
9557 primary_node=self.op.pnode,
9558 secondary_nodes=self.secondaries,
9559 status=self.op.start,
9560 os_type=self.op.os_type,
9561 minmem=self.be_full[constants.BE_MINMEM],
9562 maxmem=self.be_full[constants.BE_MAXMEM],
9563 vcpus=self.be_full[constants.BE_VCPUS],
9564 nics=_NICListToTuple(self, self.nics),
9565 disk_template=self.op.disk_template,
9566 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9567 for d in self.disks],
9570 hypervisor_name=self.op.hypervisor,
9576 def BuildHooksNodes(self):
9577 """Build hooks nodes.
9580 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9583 def _ReadExportInfo(self):
9584 """Reads the export information from disk.
9586 It will override the opcode source node and path with the actual
9587 information, if these two were not specified before.
9589 @return: the export information
9592 assert self.op.mode == constants.INSTANCE_IMPORT
9594 src_node = self.op.src_node
9595 src_path = self.op.src_path
9597 if src_node is None:
9598 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9599 exp_list = self.rpc.call_export_list(locked_nodes)
9601 for node in exp_list:
9602 if exp_list[node].fail_msg:
9604 if src_path in exp_list[node].payload:
9606 self.op.src_node = src_node = node
9607 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9611 raise errors.OpPrereqError("No export found for relative path %s" %
9612 src_path, errors.ECODE_INVAL)
9614 _CheckNodeOnline(self, src_node)
9615 result = self.rpc.call_export_info(src_node, src_path)
9616 result.Raise("No export or invalid export found in dir %s" % src_path)
9618 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9619 if not export_info.has_section(constants.INISECT_EXP):
9620 raise errors.ProgrammerError("Corrupted export config",
9621 errors.ECODE_ENVIRON)
9623 ei_version = export_info.get(constants.INISECT_EXP, "version")
9624 if (int(ei_version) != constants.EXPORT_VERSION):
9625 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9626 (ei_version, constants.EXPORT_VERSION),
9627 errors.ECODE_ENVIRON)
9630 def _ReadExportParams(self, einfo):
9631 """Use export parameters as defaults.
9633 In case the opcode doesn't specify (as in override) some instance
9634 parameters, then try to use them from the export information, if
9638 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9640 if self.op.disk_template is None:
9641 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9642 self.op.disk_template = einfo.get(constants.INISECT_INS,
9644 if self.op.disk_template not in constants.DISK_TEMPLATES:
9645 raise errors.OpPrereqError("Disk template specified in configuration"
9646 " file is not one of the allowed values:"
9648 " ".join(constants.DISK_TEMPLATES),
9651 raise errors.OpPrereqError("No disk template specified and the export"
9652 " is missing the disk_template information",
9655 if not self.op.disks:
9657 # TODO: import the disk iv_name too
9658 for idx in range(constants.MAX_DISKS):
9659 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9660 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9661 disks.append({constants.IDISK_SIZE: disk_sz})
9662 self.op.disks = disks
9663 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9664 raise errors.OpPrereqError("No disk info specified and the export"
9665 " is missing the disk information",
9668 if not self.op.nics:
9670 for idx in range(constants.MAX_NICS):
9671 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9673 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9674 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9681 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9682 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9684 if (self.op.hypervisor is None and
9685 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9686 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9688 if einfo.has_section(constants.INISECT_HYP):
9689 # use the export parameters but do not override the ones
9690 # specified by the user
9691 for name, value in einfo.items(constants.INISECT_HYP):
9692 if name not in self.op.hvparams:
9693 self.op.hvparams[name] = value
9695 if einfo.has_section(constants.INISECT_BEP):
9696 # use the parameters, without overriding
9697 for name, value in einfo.items(constants.INISECT_BEP):
9698 if name not in self.op.beparams:
9699 self.op.beparams[name] = value
9700 # Compatibility for the old "memory" be param
9701 if name == constants.BE_MEMORY:
9702 if constants.BE_MAXMEM not in self.op.beparams:
9703 self.op.beparams[constants.BE_MAXMEM] = value
9704 if constants.BE_MINMEM not in self.op.beparams:
9705 self.op.beparams[constants.BE_MINMEM] = value
9707 # try to read the parameters old style, from the main section
9708 for name in constants.BES_PARAMETERS:
9709 if (name not in self.op.beparams and
9710 einfo.has_option(constants.INISECT_INS, name)):
9711 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9713 if einfo.has_section(constants.INISECT_OSP):
9714 # use the parameters, without overriding
9715 for name, value in einfo.items(constants.INISECT_OSP):
9716 if name not in self.op.osparams:
9717 self.op.osparams[name] = value
9719 def _RevertToDefaults(self, cluster):
9720 """Revert the instance parameters to the default values.
9724 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9725 for name in self.op.hvparams.keys():
9726 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9727 del self.op.hvparams[name]
9729 be_defs = cluster.SimpleFillBE({})
9730 for name in self.op.beparams.keys():
9731 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9732 del self.op.beparams[name]
9734 nic_defs = cluster.SimpleFillNIC({})
9735 for nic in self.op.nics:
9736 for name in constants.NICS_PARAMETERS:
9737 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9740 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9741 for name in self.op.osparams.keys():
9742 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9743 del self.op.osparams[name]
9745 def _CalculateFileStorageDir(self):
9746 """Calculate final instance file storage dir.
9749 # file storage dir calculation/check
9750 self.instance_file_storage_dir = None
9751 if self.op.disk_template in constants.DTS_FILEBASED:
9752 # build the full file storage dir path
9755 if self.op.disk_template == constants.DT_SHARED_FILE:
9756 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9758 get_fsd_fn = self.cfg.GetFileStorageDir
9760 cfg_storagedir = get_fsd_fn()
9761 if not cfg_storagedir:
9762 raise errors.OpPrereqError("Cluster file storage dir not defined",
9764 joinargs.append(cfg_storagedir)
9766 if self.op.file_storage_dir is not None:
9767 joinargs.append(self.op.file_storage_dir)
9769 joinargs.append(self.op.instance_name)
9771 # pylint: disable=W0142
9772 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9774 def CheckPrereq(self): # pylint: disable=R0914
9775 """Check prerequisites.
9778 self._CalculateFileStorageDir()
9780 if self.op.mode == constants.INSTANCE_IMPORT:
9781 export_info = self._ReadExportInfo()
9782 self._ReadExportParams(export_info)
9783 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9785 self._old_instance_name = None
9787 if (not self.cfg.GetVGName() and
9788 self.op.disk_template not in constants.DTS_NOT_LVM):
9789 raise errors.OpPrereqError("Cluster does not support lvm-based"
9790 " instances", errors.ECODE_STATE)
9792 if (self.op.hypervisor is None or
9793 self.op.hypervisor == constants.VALUE_AUTO):
9794 self.op.hypervisor = self.cfg.GetHypervisorType()
9796 cluster = self.cfg.GetClusterInfo()
9797 enabled_hvs = cluster.enabled_hypervisors
9798 if self.op.hypervisor not in enabled_hvs:
9799 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9801 (self.op.hypervisor, ",".join(enabled_hvs)),
9804 # Check tag validity
9805 for tag in self.op.tags:
9806 objects.TaggableObject.ValidateTag(tag)
9808 # check hypervisor parameter syntax (locally)
9809 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9810 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9812 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9813 hv_type.CheckParameterSyntax(filled_hvp)
9814 self.hv_full = filled_hvp
9815 # check that we don't specify global parameters on an instance
9816 _CheckGlobalHvParams(self.op.hvparams)
9818 # fill and remember the beparams dict
9819 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9820 for param, value in self.op.beparams.iteritems():
9821 if value == constants.VALUE_AUTO:
9822 self.op.beparams[param] = default_beparams[param]
9823 objects.UpgradeBeParams(self.op.beparams)
9824 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9825 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9827 # build os parameters
9828 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9830 # now that hvp/bep are in final format, let's reset to defaults,
9832 if self.op.identify_defaults:
9833 self._RevertToDefaults(cluster)
9837 for idx, nic in enumerate(self.op.nics):
9838 nic_mode_req = nic.get(constants.INIC_MODE, None)
9839 nic_mode = nic_mode_req
9840 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9841 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9843 # in routed mode, for the first nic, the default ip is 'auto'
9844 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9845 default_ip_mode = constants.VALUE_AUTO
9847 default_ip_mode = constants.VALUE_NONE
9849 # ip validity checks
9850 ip = nic.get(constants.INIC_IP, default_ip_mode)
9851 if ip is None or ip.lower() == constants.VALUE_NONE:
9853 elif ip.lower() == constants.VALUE_AUTO:
9854 if not self.op.name_check:
9855 raise errors.OpPrereqError("IP address set to auto but name checks"
9856 " have been skipped",
9858 nic_ip = self.hostname1.ip
9860 if not netutils.IPAddress.IsValid(ip):
9861 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9865 # TODO: check the ip address for uniqueness
9866 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9867 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9870 # MAC address verification
9871 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9872 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9873 mac = utils.NormalizeAndValidateMac(mac)
9876 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9877 except errors.ReservationError:
9878 raise errors.OpPrereqError("MAC address %s already in use"
9879 " in cluster" % mac,
9880 errors.ECODE_NOTUNIQUE)
9882 # Build nic parameters
9883 link = nic.get(constants.INIC_LINK, None)
9884 if link == constants.VALUE_AUTO:
9885 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9888 nicparams[constants.NIC_MODE] = nic_mode
9890 nicparams[constants.NIC_LINK] = link
9892 check_params = cluster.SimpleFillNIC(nicparams)
9893 objects.NIC.CheckParameterSyntax(check_params)
9894 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9896 # disk checks/pre-build
9897 default_vg = self.cfg.GetVGName()
9899 for disk in self.op.disks:
9900 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9901 if mode not in constants.DISK_ACCESS_SET:
9902 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9903 mode, errors.ECODE_INVAL)
9904 size = disk.get(constants.IDISK_SIZE, None)
9906 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9909 except (TypeError, ValueError):
9910 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9913 data_vg = disk.get(constants.IDISK_VG, default_vg)
9915 constants.IDISK_SIZE: size,
9916 constants.IDISK_MODE: mode,
9917 constants.IDISK_VG: data_vg,
9919 if constants.IDISK_METAVG in disk:
9920 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9921 if constants.IDISK_ADOPT in disk:
9922 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9923 self.disks.append(new_disk)
9925 if self.op.mode == constants.INSTANCE_IMPORT:
9927 for idx in range(len(self.disks)):
9928 option = "disk%d_dump" % idx
9929 if export_info.has_option(constants.INISECT_INS, option):
9930 # FIXME: are the old os-es, disk sizes, etc. useful?
9931 export_name = export_info.get(constants.INISECT_INS, option)
9932 image = utils.PathJoin(self.op.src_path, export_name)
9933 disk_images.append(image)
9935 disk_images.append(False)
9937 self.src_images = disk_images
9939 if self.op.instance_name == self._old_instance_name:
9940 for idx, nic in enumerate(self.nics):
9941 if nic.mac == constants.VALUE_AUTO:
9942 nic_mac_ini = "nic%d_mac" % idx
9943 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9945 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9947 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9948 if self.op.ip_check:
9949 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9950 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9951 (self.check_ip, self.op.instance_name),
9952 errors.ECODE_NOTUNIQUE)
9954 #### mac address generation
9955 # By generating here the mac address both the allocator and the hooks get
9956 # the real final mac address rather than the 'auto' or 'generate' value.
9957 # There is a race condition between the generation and the instance object
9958 # creation, which means that we know the mac is valid now, but we're not
9959 # sure it will be when we actually add the instance. If things go bad
9960 # adding the instance will abort because of a duplicate mac, and the
9961 # creation job will fail.
9962 for nic in self.nics:
9963 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9964 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9968 if self.op.iallocator is not None:
9969 self._RunAllocator()
9971 # Release all unneeded node locks
9972 _ReleaseLocks(self, locking.LEVEL_NODE,
9973 keep=filter(None, [self.op.pnode, self.op.snode,
9975 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9976 keep=filter(None, [self.op.pnode, self.op.snode,
9979 #### node related checks
9981 # check primary node
9982 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9983 assert self.pnode is not None, \
9984 "Cannot retrieve locked node %s" % self.op.pnode
9986 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9987 pnode.name, errors.ECODE_STATE)
9989 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9990 pnode.name, errors.ECODE_STATE)
9991 if not pnode.vm_capable:
9992 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9993 " '%s'" % pnode.name, errors.ECODE_STATE)
9995 self.secondaries = []
9997 # mirror node verification
9998 if self.op.disk_template in constants.DTS_INT_MIRROR:
9999 if self.op.snode == pnode.name:
10000 raise errors.OpPrereqError("The secondary node cannot be the"
10001 " primary node", errors.ECODE_INVAL)
10002 _CheckNodeOnline(self, self.op.snode)
10003 _CheckNodeNotDrained(self, self.op.snode)
10004 _CheckNodeVmCapable(self, self.op.snode)
10005 self.secondaries.append(self.op.snode)
10007 snode = self.cfg.GetNodeInfo(self.op.snode)
10008 if pnode.group != snode.group:
10009 self.LogWarning("The primary and secondary nodes are in two"
10010 " different node groups; the disk parameters"
10011 " from the first disk's node group will be"
10014 nodenames = [pnode.name] + self.secondaries
10016 # Verify instance specs
10017 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10019 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10020 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10021 constants.ISPEC_DISK_COUNT: len(self.disks),
10022 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10023 constants.ISPEC_NIC_COUNT: len(self.nics),
10024 constants.ISPEC_SPINDLE_USE: spindle_use,
10027 group_info = self.cfg.GetNodeGroup(pnode.group)
10028 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10029 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10030 if not self.op.ignore_ipolicy and res:
10031 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10032 " policy: %s") % (pnode.group,
10033 utils.CommaJoin(res)),
10034 errors.ECODE_INVAL)
10036 if not self.adopt_disks:
10037 if self.op.disk_template == constants.DT_RBD:
10038 # _CheckRADOSFreeSpace() is just a placeholder.
10039 # Any function that checks prerequisites can be placed here.
10040 # Check if there is enough space on the RADOS cluster.
10041 _CheckRADOSFreeSpace()
10043 # Check lv size requirements, if not adopting
10044 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10045 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10047 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10048 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10049 disk[constants.IDISK_ADOPT])
10050 for disk in self.disks])
10051 if len(all_lvs) != len(self.disks):
10052 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10053 errors.ECODE_INVAL)
10054 for lv_name in all_lvs:
10056 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10057 # to ReserveLV uses the same syntax
10058 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10059 except errors.ReservationError:
10060 raise errors.OpPrereqError("LV named %s used by another instance" %
10061 lv_name, errors.ECODE_NOTUNIQUE)
10063 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10064 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10066 node_lvs = self.rpc.call_lv_list([pnode.name],
10067 vg_names.payload.keys())[pnode.name]
10068 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10069 node_lvs = node_lvs.payload
10071 delta = all_lvs.difference(node_lvs.keys())
10073 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10074 utils.CommaJoin(delta),
10075 errors.ECODE_INVAL)
10076 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10078 raise errors.OpPrereqError("Online logical volumes found, cannot"
10079 " adopt: %s" % utils.CommaJoin(online_lvs),
10080 errors.ECODE_STATE)
10081 # update the size of disk based on what is found
10082 for dsk in self.disks:
10083 dsk[constants.IDISK_SIZE] = \
10084 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10085 dsk[constants.IDISK_ADOPT])][0]))
10087 elif self.op.disk_template == constants.DT_BLOCK:
10088 # Normalize and de-duplicate device paths
10089 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10090 for disk in self.disks])
10091 if len(all_disks) != len(self.disks):
10092 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10093 errors.ECODE_INVAL)
10094 baddisks = [d for d in all_disks
10095 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10097 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10098 " cannot be adopted" %
10099 (", ".join(baddisks),
10100 constants.ADOPTABLE_BLOCKDEV_ROOT),
10101 errors.ECODE_INVAL)
10103 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10104 list(all_disks))[pnode.name]
10105 node_disks.Raise("Cannot get block device information from node %s" %
10107 node_disks = node_disks.payload
10108 delta = all_disks.difference(node_disks.keys())
10110 raise errors.OpPrereqError("Missing block device(s): %s" %
10111 utils.CommaJoin(delta),
10112 errors.ECODE_INVAL)
10113 for dsk in self.disks:
10114 dsk[constants.IDISK_SIZE] = \
10115 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10117 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10119 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10120 # check OS parameters (remotely)
10121 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10123 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10125 # memory check on primary node
10126 #TODO(dynmem): use MINMEM for checking
10128 _CheckNodeFreeMemory(self, self.pnode.name,
10129 "creating instance %s" % self.op.instance_name,
10130 self.be_full[constants.BE_MAXMEM],
10131 self.op.hypervisor)
10133 self.dry_run_result = list(nodenames)
10135 def Exec(self, feedback_fn):
10136 """Create and add the instance to the cluster.
10139 instance = self.op.instance_name
10140 pnode_name = self.pnode.name
10142 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10143 self.owned_locks(locking.LEVEL_NODE)), \
10144 "Node locks differ from node resource locks"
10146 ht_kind = self.op.hypervisor
10147 if ht_kind in constants.HTS_REQ_PORT:
10148 network_port = self.cfg.AllocatePort()
10150 network_port = None
10152 # This is ugly but we got a chicken-egg problem here
10153 # We can only take the group disk parameters, as the instance
10154 # has no disks yet (we are generating them right here).
10155 node = self.cfg.GetNodeInfo(pnode_name)
10156 nodegroup = self.cfg.GetNodeGroup(node.group)
10157 disks = _GenerateDiskTemplate(self,
10158 self.op.disk_template,
10159 instance, pnode_name,
10162 self.instance_file_storage_dir,
10163 self.op.file_driver,
10166 self.cfg.GetGroupDiskParams(nodegroup))
10168 iobj = objects.Instance(name=instance, os=self.op.os_type,
10169 primary_node=pnode_name,
10170 nics=self.nics, disks=disks,
10171 disk_template=self.op.disk_template,
10172 admin_state=constants.ADMINST_DOWN,
10173 network_port=network_port,
10174 beparams=self.op.beparams,
10175 hvparams=self.op.hvparams,
10176 hypervisor=self.op.hypervisor,
10177 osparams=self.op.osparams,
10181 for tag in self.op.tags:
10184 if self.adopt_disks:
10185 if self.op.disk_template == constants.DT_PLAIN:
10186 # rename LVs to the newly-generated names; we need to construct
10187 # 'fake' LV disks with the old data, plus the new unique_id
10188 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10190 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10191 rename_to.append(t_dsk.logical_id)
10192 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10193 self.cfg.SetDiskID(t_dsk, pnode_name)
10194 result = self.rpc.call_blockdev_rename(pnode_name,
10195 zip(tmp_disks, rename_to))
10196 result.Raise("Failed to rename adoped LVs")
10198 feedback_fn("* creating instance disks...")
10200 _CreateDisks(self, iobj)
10201 except errors.OpExecError:
10202 self.LogWarning("Device creation failed, reverting...")
10204 _RemoveDisks(self, iobj)
10206 self.cfg.ReleaseDRBDMinors(instance)
10209 feedback_fn("adding instance %s to cluster config" % instance)
10211 self.cfg.AddInstance(iobj, self.proc.GetECId())
10213 # Declare that we don't want to remove the instance lock anymore, as we've
10214 # added the instance to the config
10215 del self.remove_locks[locking.LEVEL_INSTANCE]
10217 if self.op.mode == constants.INSTANCE_IMPORT:
10218 # Release unused nodes
10219 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10221 # Release all nodes
10222 _ReleaseLocks(self, locking.LEVEL_NODE)
10225 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10226 feedback_fn("* wiping instance disks...")
10228 _WipeDisks(self, iobj)
10229 except errors.OpExecError, err:
10230 logging.exception("Wiping disks failed")
10231 self.LogWarning("Wiping instance disks failed (%s)", err)
10235 # Something is already wrong with the disks, don't do anything else
10237 elif self.op.wait_for_sync:
10238 disk_abort = not _WaitForSync(self, iobj)
10239 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10240 # make sure the disks are not degraded (still sync-ing is ok)
10241 feedback_fn("* checking mirrors status")
10242 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10247 _RemoveDisks(self, iobj)
10248 self.cfg.RemoveInstance(iobj.name)
10249 # Make sure the instance lock gets removed
10250 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10251 raise errors.OpExecError("There are some degraded disks for"
10254 # Release all node resource locks
10255 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10257 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10258 # we need to set the disks ID to the primary node, since the
10259 # preceding code might or might have not done it, depending on
10260 # disk template and other options
10261 for disk in iobj.disks:
10262 self.cfg.SetDiskID(disk, pnode_name)
10263 if self.op.mode == constants.INSTANCE_CREATE:
10264 if not self.op.no_install:
10265 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10266 not self.op.wait_for_sync)
10268 feedback_fn("* pausing disk sync to install instance OS")
10269 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10272 for idx, success in enumerate(result.payload):
10274 logging.warn("pause-sync of instance %s for disk %d failed",
10277 feedback_fn("* running the instance OS create scripts...")
10278 # FIXME: pass debug option from opcode to backend
10280 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10281 self.op.debug_level)
10283 feedback_fn("* resuming disk sync")
10284 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10287 for idx, success in enumerate(result.payload):
10289 logging.warn("resume-sync of instance %s for disk %d failed",
10292 os_add_result.Raise("Could not add os for instance %s"
10293 " on node %s" % (instance, pnode_name))
10296 if self.op.mode == constants.INSTANCE_IMPORT:
10297 feedback_fn("* running the instance OS import scripts...")
10301 for idx, image in enumerate(self.src_images):
10305 # FIXME: pass debug option from opcode to backend
10306 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10307 constants.IEIO_FILE, (image, ),
10308 constants.IEIO_SCRIPT,
10309 (iobj.disks[idx], idx),
10311 transfers.append(dt)
10314 masterd.instance.TransferInstanceData(self, feedback_fn,
10315 self.op.src_node, pnode_name,
10316 self.pnode.secondary_ip,
10318 if not compat.all(import_result):
10319 self.LogWarning("Some disks for instance %s on node %s were not"
10320 " imported successfully" % (instance, pnode_name))
10322 rename_from = self._old_instance_name
10324 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10325 feedback_fn("* preparing remote import...")
10326 # The source cluster will stop the instance before attempting to make
10327 # a connection. In some cases stopping an instance can take a long
10328 # time, hence the shutdown timeout is added to the connection
10330 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10331 self.op.source_shutdown_timeout)
10332 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10334 assert iobj.primary_node == self.pnode.name
10336 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10337 self.source_x509_ca,
10338 self._cds, timeouts)
10339 if not compat.all(disk_results):
10340 # TODO: Should the instance still be started, even if some disks
10341 # failed to import (valid for local imports, too)?
10342 self.LogWarning("Some disks for instance %s on node %s were not"
10343 " imported successfully" % (instance, pnode_name))
10345 rename_from = self.source_instance_name
10348 # also checked in the prereq part
10349 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10352 # Run rename script on newly imported instance
10353 assert iobj.name == instance
10354 feedback_fn("Running rename script for %s" % instance)
10355 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10357 self.op.debug_level)
10358 if result.fail_msg:
10359 self.LogWarning("Failed to run rename script for %s on node"
10360 " %s: %s" % (instance, pnode_name, result.fail_msg))
10362 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10365 iobj.admin_state = constants.ADMINST_UP
10366 self.cfg.Update(iobj, feedback_fn)
10367 logging.info("Starting instance %s on node %s", instance, pnode_name)
10368 feedback_fn("* starting instance...")
10369 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10371 result.Raise("Could not start instance")
10373 return list(iobj.all_nodes)
10376 def _CheckRADOSFreeSpace():
10377 """Compute disk size requirements inside the RADOS cluster.
10380 # For the RADOS cluster we assume there is always enough space.
10384 class LUInstanceConsole(NoHooksLU):
10385 """Connect to an instance's console.
10387 This is somewhat special in that it returns the command line that
10388 you need to run on the master node in order to connect to the
10394 def ExpandNames(self):
10395 self.share_locks = _ShareAll()
10396 self._ExpandAndLockInstance()
10398 def CheckPrereq(self):
10399 """Check prerequisites.
10401 This checks that the instance is in the cluster.
10404 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10405 assert self.instance is not None, \
10406 "Cannot retrieve locked instance %s" % self.op.instance_name
10407 _CheckNodeOnline(self, self.instance.primary_node)
10409 def Exec(self, feedback_fn):
10410 """Connect to the console of an instance
10413 instance = self.instance
10414 node = instance.primary_node
10416 node_insts = self.rpc.call_instance_list([node],
10417 [instance.hypervisor])[node]
10418 node_insts.Raise("Can't get node information from %s" % node)
10420 if instance.name not in node_insts.payload:
10421 if instance.admin_state == constants.ADMINST_UP:
10422 state = constants.INSTST_ERRORDOWN
10423 elif instance.admin_state == constants.ADMINST_DOWN:
10424 state = constants.INSTST_ADMINDOWN
10426 state = constants.INSTST_ADMINOFFLINE
10427 raise errors.OpExecError("Instance %s is not running (state %s)" %
10428 (instance.name, state))
10430 logging.debug("Connecting to console of %s on %s", instance.name, node)
10432 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10435 def _GetInstanceConsole(cluster, instance):
10436 """Returns console information for an instance.
10438 @type cluster: L{objects.Cluster}
10439 @type instance: L{objects.Instance}
10443 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10444 # beparams and hvparams are passed separately, to avoid editing the
10445 # instance and then saving the defaults in the instance itself.
10446 hvparams = cluster.FillHV(instance)
10447 beparams = cluster.FillBE(instance)
10448 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10450 assert console.instance == instance.name
10451 assert console.Validate()
10453 return console.ToDict()
10456 class LUInstanceReplaceDisks(LogicalUnit):
10457 """Replace the disks of an instance.
10460 HPATH = "mirrors-replace"
10461 HTYPE = constants.HTYPE_INSTANCE
10464 def CheckArguments(self):
10465 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10466 self.op.iallocator)
10468 def ExpandNames(self):
10469 self._ExpandAndLockInstance()
10471 assert locking.LEVEL_NODE not in self.needed_locks
10472 assert locking.LEVEL_NODE_RES not in self.needed_locks
10473 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10475 assert self.op.iallocator is None or self.op.remote_node is None, \
10476 "Conflicting options"
10478 if self.op.remote_node is not None:
10479 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10481 # Warning: do not remove the locking of the new secondary here
10482 # unless DRBD8.AddChildren is changed to work in parallel;
10483 # currently it doesn't since parallel invocations of
10484 # FindUnusedMinor will conflict
10485 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10486 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10488 self.needed_locks[locking.LEVEL_NODE] = []
10489 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10491 if self.op.iallocator is not None:
10492 # iallocator will select a new node in the same group
10493 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10495 self.needed_locks[locking.LEVEL_NODE_RES] = []
10497 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10498 self.op.iallocator, self.op.remote_node,
10499 self.op.disks, False, self.op.early_release,
10500 self.op.ignore_ipolicy)
10502 self.tasklets = [self.replacer]
10504 def DeclareLocks(self, level):
10505 if level == locking.LEVEL_NODEGROUP:
10506 assert self.op.remote_node is None
10507 assert self.op.iallocator is not None
10508 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10510 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10511 # Lock all groups used by instance optimistically; this requires going
10512 # via the node before it's locked, requiring verification later on
10513 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10514 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10516 elif level == locking.LEVEL_NODE:
10517 if self.op.iallocator is not None:
10518 assert self.op.remote_node is None
10519 assert not self.needed_locks[locking.LEVEL_NODE]
10521 # Lock member nodes of all locked groups
10522 self.needed_locks[locking.LEVEL_NODE] = \
10524 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10525 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10527 self._LockInstancesNodes()
10528 elif level == locking.LEVEL_NODE_RES:
10530 self.needed_locks[locking.LEVEL_NODE_RES] = \
10531 self.needed_locks[locking.LEVEL_NODE]
10533 def BuildHooksEnv(self):
10534 """Build hooks env.
10536 This runs on the master, the primary and all the secondaries.
10539 instance = self.replacer.instance
10541 "MODE": self.op.mode,
10542 "NEW_SECONDARY": self.op.remote_node,
10543 "OLD_SECONDARY": instance.secondary_nodes[0],
10545 env.update(_BuildInstanceHookEnvByObject(self, instance))
10548 def BuildHooksNodes(self):
10549 """Build hooks nodes.
10552 instance = self.replacer.instance
10554 self.cfg.GetMasterNode(),
10555 instance.primary_node,
10557 if self.op.remote_node is not None:
10558 nl.append(self.op.remote_node)
10561 def CheckPrereq(self):
10562 """Check prerequisites.
10565 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10566 self.op.iallocator is None)
10568 # Verify if node group locks are still correct
10569 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10571 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10573 return LogicalUnit.CheckPrereq(self)
10576 class TLReplaceDisks(Tasklet):
10577 """Replaces disks for an instance.
10579 Note: Locking is not within the scope of this class.
10582 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10583 disks, delay_iallocator, early_release, ignore_ipolicy):
10584 """Initializes this class.
10587 Tasklet.__init__(self, lu)
10590 self.instance_name = instance_name
10592 self.iallocator_name = iallocator_name
10593 self.remote_node = remote_node
10595 self.delay_iallocator = delay_iallocator
10596 self.early_release = early_release
10597 self.ignore_ipolicy = ignore_ipolicy
10600 self.instance = None
10601 self.new_node = None
10602 self.target_node = None
10603 self.other_node = None
10604 self.remote_node_info = None
10605 self.node_secondary_ip = None
10608 def CheckArguments(mode, remote_node, ialloc):
10609 """Helper function for users of this class.
10612 # check for valid parameter combination
10613 if mode == constants.REPLACE_DISK_CHG:
10614 if remote_node is None and ialloc is None:
10615 raise errors.OpPrereqError("When changing the secondary either an"
10616 " iallocator script must be used or the"
10617 " new node given", errors.ECODE_INVAL)
10619 if remote_node is not None and ialloc is not None:
10620 raise errors.OpPrereqError("Give either the iallocator or the new"
10621 " secondary, not both", errors.ECODE_INVAL)
10623 elif remote_node is not None or ialloc is not None:
10624 # Not replacing the secondary
10625 raise errors.OpPrereqError("The iallocator and new node options can"
10626 " only be used when changing the"
10627 " secondary node", errors.ECODE_INVAL)
10630 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10631 """Compute a new secondary node using an IAllocator.
10634 req = iallocator.IAReqRelocate(name=instance_name,
10635 relocate_from=list(relocate_from))
10636 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10638 ial.Run(iallocator_name)
10640 if not ial.success:
10641 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10642 " %s" % (iallocator_name, ial.info),
10643 errors.ECODE_NORES)
10645 if len(ial.result) != ial.required_nodes:
10646 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10647 " of nodes (%s), required %s" %
10649 len(ial.result), ial.required_nodes),
10650 errors.ECODE_FAULT)
10652 remote_node_name = ial.result[0]
10654 lu.LogInfo("Selected new secondary for instance '%s': %s",
10655 instance_name, remote_node_name)
10657 return remote_node_name
10659 def _FindFaultyDisks(self, node_name):
10660 """Wrapper for L{_FindFaultyInstanceDisks}.
10663 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10666 def _CheckDisksActivated(self, instance):
10667 """Checks if the instance disks are activated.
10669 @param instance: The instance to check disks
10670 @return: True if they are activated, False otherwise
10673 nodes = instance.all_nodes
10675 for idx, dev in enumerate(instance.disks):
10677 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10678 self.cfg.SetDiskID(dev, node)
10680 result = _BlockdevFind(self, node, dev, instance)
10684 elif result.fail_msg or not result.payload:
10689 def CheckPrereq(self):
10690 """Check prerequisites.
10692 This checks that the instance is in the cluster.
10695 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10696 assert instance is not None, \
10697 "Cannot retrieve locked instance %s" % self.instance_name
10699 if instance.disk_template != constants.DT_DRBD8:
10700 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10701 " instances", errors.ECODE_INVAL)
10703 if len(instance.secondary_nodes) != 1:
10704 raise errors.OpPrereqError("The instance has a strange layout,"
10705 " expected one secondary but found %d" %
10706 len(instance.secondary_nodes),
10707 errors.ECODE_FAULT)
10709 if not self.delay_iallocator:
10710 self._CheckPrereq2()
10712 def _CheckPrereq2(self):
10713 """Check prerequisites, second part.
10715 This function should always be part of CheckPrereq. It was separated and is
10716 now called from Exec because during node evacuation iallocator was only
10717 called with an unmodified cluster model, not taking planned changes into
10721 instance = self.instance
10722 secondary_node = instance.secondary_nodes[0]
10724 if self.iallocator_name is None:
10725 remote_node = self.remote_node
10727 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10728 instance.name, instance.secondary_nodes)
10730 if remote_node is None:
10731 self.remote_node_info = None
10733 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10734 "Remote node '%s' is not locked" % remote_node
10736 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10737 assert self.remote_node_info is not None, \
10738 "Cannot retrieve locked node %s" % remote_node
10740 if remote_node == self.instance.primary_node:
10741 raise errors.OpPrereqError("The specified node is the primary node of"
10742 " the instance", errors.ECODE_INVAL)
10744 if remote_node == secondary_node:
10745 raise errors.OpPrereqError("The specified node is already the"
10746 " secondary node of the instance",
10747 errors.ECODE_INVAL)
10749 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10750 constants.REPLACE_DISK_CHG):
10751 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10752 errors.ECODE_INVAL)
10754 if self.mode == constants.REPLACE_DISK_AUTO:
10755 if not self._CheckDisksActivated(instance):
10756 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10757 " first" % self.instance_name,
10758 errors.ECODE_STATE)
10759 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10760 faulty_secondary = self._FindFaultyDisks(secondary_node)
10762 if faulty_primary and faulty_secondary:
10763 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10764 " one node and can not be repaired"
10765 " automatically" % self.instance_name,
10766 errors.ECODE_STATE)
10769 self.disks = faulty_primary
10770 self.target_node = instance.primary_node
10771 self.other_node = secondary_node
10772 check_nodes = [self.target_node, self.other_node]
10773 elif faulty_secondary:
10774 self.disks = faulty_secondary
10775 self.target_node = secondary_node
10776 self.other_node = instance.primary_node
10777 check_nodes = [self.target_node, self.other_node]
10783 # Non-automatic modes
10784 if self.mode == constants.REPLACE_DISK_PRI:
10785 self.target_node = instance.primary_node
10786 self.other_node = secondary_node
10787 check_nodes = [self.target_node, self.other_node]
10789 elif self.mode == constants.REPLACE_DISK_SEC:
10790 self.target_node = secondary_node
10791 self.other_node = instance.primary_node
10792 check_nodes = [self.target_node, self.other_node]
10794 elif self.mode == constants.REPLACE_DISK_CHG:
10795 self.new_node = remote_node
10796 self.other_node = instance.primary_node
10797 self.target_node = secondary_node
10798 check_nodes = [self.new_node, self.other_node]
10800 _CheckNodeNotDrained(self.lu, remote_node)
10801 _CheckNodeVmCapable(self.lu, remote_node)
10803 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10804 assert old_node_info is not None
10805 if old_node_info.offline and not self.early_release:
10806 # doesn't make sense to delay the release
10807 self.early_release = True
10808 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10809 " early-release mode", secondary_node)
10812 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10815 # If not specified all disks should be replaced
10817 self.disks = range(len(self.instance.disks))
10819 # TODO: This is ugly, but right now we can't distinguish between internal
10820 # submitted opcode and external one. We should fix that.
10821 if self.remote_node_info:
10822 # We change the node, lets verify it still meets instance policy
10823 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10824 cluster = self.cfg.GetClusterInfo()
10825 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10827 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10828 ignore=self.ignore_ipolicy)
10830 for node in check_nodes:
10831 _CheckNodeOnline(self.lu, node)
10833 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10836 if node_name is not None)
10838 # Release unneeded node and node resource locks
10839 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10840 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10842 # Release any owned node group
10843 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10844 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10846 # Check whether disks are valid
10847 for disk_idx in self.disks:
10848 instance.FindDisk(disk_idx)
10850 # Get secondary node IP addresses
10851 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10852 in self.cfg.GetMultiNodeInfo(touched_nodes))
10854 def Exec(self, feedback_fn):
10855 """Execute disk replacement.
10857 This dispatches the disk replacement to the appropriate handler.
10860 if self.delay_iallocator:
10861 self._CheckPrereq2()
10864 # Verify owned locks before starting operation
10865 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10866 assert set(owned_nodes) == set(self.node_secondary_ip), \
10867 ("Incorrect node locks, owning %s, expected %s" %
10868 (owned_nodes, self.node_secondary_ip.keys()))
10869 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10870 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10872 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10873 assert list(owned_instances) == [self.instance_name], \
10874 "Instance '%s' not locked" % self.instance_name
10876 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10877 "Should not own any node group lock at this point"
10880 feedback_fn("No disks need replacement")
10883 feedback_fn("Replacing disk(s) %s for %s" %
10884 (utils.CommaJoin(self.disks), self.instance.name))
10886 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10888 # Activate the instance disks if we're replacing them on a down instance
10890 _StartInstanceDisks(self.lu, self.instance, True)
10893 # Should we replace the secondary node?
10894 if self.new_node is not None:
10895 fn = self._ExecDrbd8Secondary
10897 fn = self._ExecDrbd8DiskOnly
10899 result = fn(feedback_fn)
10901 # Deactivate the instance disks if we're replacing them on a
10904 _SafeShutdownInstanceDisks(self.lu, self.instance)
10906 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10909 # Verify owned locks
10910 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10911 nodes = frozenset(self.node_secondary_ip)
10912 assert ((self.early_release and not owned_nodes) or
10913 (not self.early_release and not (set(owned_nodes) - nodes))), \
10914 ("Not owning the correct locks, early_release=%s, owned=%r,"
10915 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10919 def _CheckVolumeGroup(self, nodes):
10920 self.lu.LogInfo("Checking volume groups")
10922 vgname = self.cfg.GetVGName()
10924 # Make sure volume group exists on all involved nodes
10925 results = self.rpc.call_vg_list(nodes)
10927 raise errors.OpExecError("Can't list volume groups on the nodes")
10930 res = results[node]
10931 res.Raise("Error checking node %s" % node)
10932 if vgname not in res.payload:
10933 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10936 def _CheckDisksExistence(self, nodes):
10937 # Check disk existence
10938 for idx, dev in enumerate(self.instance.disks):
10939 if idx not in self.disks:
10943 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10944 self.cfg.SetDiskID(dev, node)
10946 result = _BlockdevFind(self, node, dev, self.instance)
10948 msg = result.fail_msg
10949 if msg or not result.payload:
10951 msg = "disk not found"
10952 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10955 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10956 for idx, dev in enumerate(self.instance.disks):
10957 if idx not in self.disks:
10960 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10963 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10964 on_primary, ldisk=ldisk):
10965 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10966 " replace disks for instance %s" %
10967 (node_name, self.instance.name))
10969 def _CreateNewStorage(self, node_name):
10970 """Create new storage on the primary or secondary node.
10972 This is only used for same-node replaces, not for changing the
10973 secondary node, hence we don't want to modify the existing disk.
10978 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10979 for idx, dev in enumerate(disks):
10980 if idx not in self.disks:
10983 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10985 self.cfg.SetDiskID(dev, node_name)
10987 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10988 names = _GenerateUniqueNames(self.lu, lv_names)
10990 (data_disk, meta_disk) = dev.children
10991 vg_data = data_disk.logical_id[0]
10992 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10993 logical_id=(vg_data, names[0]),
10994 params=data_disk.params)
10995 vg_meta = meta_disk.logical_id[0]
10996 lv_meta = objects.Disk(dev_type=constants.LD_LV,
10997 size=constants.DRBD_META_SIZE,
10998 logical_id=(vg_meta, names[1]),
10999 params=meta_disk.params)
11001 new_lvs = [lv_data, lv_meta]
11002 old_lvs = [child.Copy() for child in dev.children]
11003 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11005 # we pass force_create=True to force the LVM creation
11006 for new_lv in new_lvs:
11007 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11008 _GetInstanceInfoText(self.instance), False)
11012 def _CheckDevices(self, node_name, iv_names):
11013 for name, (dev, _, _) in iv_names.iteritems():
11014 self.cfg.SetDiskID(dev, node_name)
11016 result = _BlockdevFind(self, node_name, dev, self.instance)
11018 msg = result.fail_msg
11019 if msg or not result.payload:
11021 msg = "disk not found"
11022 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11025 if result.payload.is_degraded:
11026 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11028 def _RemoveOldStorage(self, node_name, iv_names):
11029 for name, (_, old_lvs, _) in iv_names.iteritems():
11030 self.lu.LogInfo("Remove logical volumes for %s" % name)
11033 self.cfg.SetDiskID(lv, node_name)
11035 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11037 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11038 hint="remove unused LVs manually")
11040 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11041 """Replace a disk on the primary or secondary for DRBD 8.
11043 The algorithm for replace is quite complicated:
11045 1. for each disk to be replaced:
11047 1. create new LVs on the target node with unique names
11048 1. detach old LVs from the drbd device
11049 1. rename old LVs to name_replaced.<time_t>
11050 1. rename new LVs to old LVs
11051 1. attach the new LVs (with the old names now) to the drbd device
11053 1. wait for sync across all devices
11055 1. for each modified disk:
11057 1. remove old LVs (which have the name name_replaces.<time_t>)
11059 Failures are not very well handled.
11064 # Step: check device activation
11065 self.lu.LogStep(1, steps_total, "Check device existence")
11066 self._CheckDisksExistence([self.other_node, self.target_node])
11067 self._CheckVolumeGroup([self.target_node, self.other_node])
11069 # Step: check other node consistency
11070 self.lu.LogStep(2, steps_total, "Check peer consistency")
11071 self._CheckDisksConsistency(self.other_node,
11072 self.other_node == self.instance.primary_node,
11075 # Step: create new storage
11076 self.lu.LogStep(3, steps_total, "Allocate new storage")
11077 iv_names = self._CreateNewStorage(self.target_node)
11079 # Step: for each lv, detach+rename*2+attach
11080 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11081 for dev, old_lvs, new_lvs in iv_names.itervalues():
11082 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11084 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11086 result.Raise("Can't detach drbd from local storage on node"
11087 " %s for device %s" % (self.target_node, dev.iv_name))
11089 #cfg.Update(instance)
11091 # ok, we created the new LVs, so now we know we have the needed
11092 # storage; as such, we proceed on the target node to rename
11093 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11094 # using the assumption that logical_id == physical_id (which in
11095 # turn is the unique_id on that node)
11097 # FIXME(iustin): use a better name for the replaced LVs
11098 temp_suffix = int(time.time())
11099 ren_fn = lambda d, suff: (d.physical_id[0],
11100 d.physical_id[1] + "_replaced-%s" % suff)
11102 # Build the rename list based on what LVs exist on the node
11103 rename_old_to_new = []
11104 for to_ren in old_lvs:
11105 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11106 if not result.fail_msg and result.payload:
11108 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11110 self.lu.LogInfo("Renaming the old LVs on the target node")
11111 result = self.rpc.call_blockdev_rename(self.target_node,
11113 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11115 # Now we rename the new LVs to the old LVs
11116 self.lu.LogInfo("Renaming the new LVs on the target node")
11117 rename_new_to_old = [(new, old.physical_id)
11118 for old, new in zip(old_lvs, new_lvs)]
11119 result = self.rpc.call_blockdev_rename(self.target_node,
11121 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11123 # Intermediate steps of in memory modifications
11124 for old, new in zip(old_lvs, new_lvs):
11125 new.logical_id = old.logical_id
11126 self.cfg.SetDiskID(new, self.target_node)
11128 # We need to modify old_lvs so that removal later removes the
11129 # right LVs, not the newly added ones; note that old_lvs is a
11131 for disk in old_lvs:
11132 disk.logical_id = ren_fn(disk, temp_suffix)
11133 self.cfg.SetDiskID(disk, self.target_node)
11135 # Now that the new lvs have the old name, we can add them to the device
11136 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11137 result = self.rpc.call_blockdev_addchildren(self.target_node,
11138 (dev, self.instance), new_lvs)
11139 msg = result.fail_msg
11141 for new_lv in new_lvs:
11142 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11145 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11146 hint=("cleanup manually the unused logical"
11148 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11150 cstep = itertools.count(5)
11152 if self.early_release:
11153 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11154 self._RemoveOldStorage(self.target_node, iv_names)
11155 # TODO: Check if releasing locks early still makes sense
11156 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11158 # Release all resource locks except those used by the instance
11159 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11160 keep=self.node_secondary_ip.keys())
11162 # Release all node locks while waiting for sync
11163 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11165 # TODO: Can the instance lock be downgraded here? Take the optional disk
11166 # shutdown in the caller into consideration.
11169 # This can fail as the old devices are degraded and _WaitForSync
11170 # does a combined result over all disks, so we don't check its return value
11171 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11172 _WaitForSync(self.lu, self.instance)
11174 # Check all devices manually
11175 self._CheckDevices(self.instance.primary_node, iv_names)
11177 # Step: remove old storage
11178 if not self.early_release:
11179 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11180 self._RemoveOldStorage(self.target_node, iv_names)
11182 def _ExecDrbd8Secondary(self, feedback_fn):
11183 """Replace the secondary node for DRBD 8.
11185 The algorithm for replace is quite complicated:
11186 - for all disks of the instance:
11187 - create new LVs on the new node with same names
11188 - shutdown the drbd device on the old secondary
11189 - disconnect the drbd network on the primary
11190 - create the drbd device on the new secondary
11191 - network attach the drbd on the primary, using an artifice:
11192 the drbd code for Attach() will connect to the network if it
11193 finds a device which is connected to the good local disks but
11194 not network enabled
11195 - wait for sync across all devices
11196 - remove all disks from the old secondary
11198 Failures are not very well handled.
11203 pnode = self.instance.primary_node
11205 # Step: check device activation
11206 self.lu.LogStep(1, steps_total, "Check device existence")
11207 self._CheckDisksExistence([self.instance.primary_node])
11208 self._CheckVolumeGroup([self.instance.primary_node])
11210 # Step: check other node consistency
11211 self.lu.LogStep(2, steps_total, "Check peer consistency")
11212 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11214 # Step: create new storage
11215 self.lu.LogStep(3, steps_total, "Allocate new storage")
11216 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11217 for idx, dev in enumerate(disks):
11218 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11219 (self.new_node, idx))
11220 # we pass force_create=True to force LVM creation
11221 for new_lv in dev.children:
11222 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11223 True, _GetInstanceInfoText(self.instance), False)
11225 # Step 4: dbrd minors and drbd setups changes
11226 # after this, we must manually remove the drbd minors on both the
11227 # error and the success paths
11228 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11229 minors = self.cfg.AllocateDRBDMinor([self.new_node
11230 for dev in self.instance.disks],
11231 self.instance.name)
11232 logging.debug("Allocated minors %r", minors)
11235 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11236 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11237 (self.new_node, idx))
11238 # create new devices on new_node; note that we create two IDs:
11239 # one without port, so the drbd will be activated without
11240 # networking information on the new node at this stage, and one
11241 # with network, for the latter activation in step 4
11242 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11243 if self.instance.primary_node == o_node1:
11246 assert self.instance.primary_node == o_node2, "Three-node instance?"
11249 new_alone_id = (self.instance.primary_node, self.new_node, None,
11250 p_minor, new_minor, o_secret)
11251 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11252 p_minor, new_minor, o_secret)
11254 iv_names[idx] = (dev, dev.children, new_net_id)
11255 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11257 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11258 logical_id=new_alone_id,
11259 children=dev.children,
11262 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11265 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11267 _GetInstanceInfoText(self.instance), False)
11268 except errors.GenericError:
11269 self.cfg.ReleaseDRBDMinors(self.instance.name)
11272 # We have new devices, shutdown the drbd on the old secondary
11273 for idx, dev in enumerate(self.instance.disks):
11274 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11275 self.cfg.SetDiskID(dev, self.target_node)
11276 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11277 (dev, self.instance)).fail_msg
11279 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11280 "node: %s" % (idx, msg),
11281 hint=("Please cleanup this device manually as"
11282 " soon as possible"))
11284 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11285 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11286 self.instance.disks)[pnode]
11288 msg = result.fail_msg
11290 # detaches didn't succeed (unlikely)
11291 self.cfg.ReleaseDRBDMinors(self.instance.name)
11292 raise errors.OpExecError("Can't detach the disks from the network on"
11293 " old node: %s" % (msg,))
11295 # if we managed to detach at least one, we update all the disks of
11296 # the instance to point to the new secondary
11297 self.lu.LogInfo("Updating instance configuration")
11298 for dev, _, new_logical_id in iv_names.itervalues():
11299 dev.logical_id = new_logical_id
11300 self.cfg.SetDiskID(dev, self.instance.primary_node)
11302 self.cfg.Update(self.instance, feedback_fn)
11304 # Release all node locks (the configuration has been updated)
11305 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11307 # and now perform the drbd attach
11308 self.lu.LogInfo("Attaching primary drbds to new secondary"
11309 " (standalone => connected)")
11310 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11312 self.node_secondary_ip,
11313 (self.instance.disks, self.instance),
11314 self.instance.name,
11316 for to_node, to_result in result.items():
11317 msg = to_result.fail_msg
11319 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11321 hint=("please do a gnt-instance info to see the"
11322 " status of disks"))
11324 cstep = itertools.count(5)
11326 if self.early_release:
11327 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11328 self._RemoveOldStorage(self.target_node, iv_names)
11329 # TODO: Check if releasing locks early still makes sense
11330 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11332 # Release all resource locks except those used by the instance
11333 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11334 keep=self.node_secondary_ip.keys())
11336 # TODO: Can the instance lock be downgraded here? Take the optional disk
11337 # shutdown in the caller into consideration.
11340 # This can fail as the old devices are degraded and _WaitForSync
11341 # does a combined result over all disks, so we don't check its return value
11342 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11343 _WaitForSync(self.lu, self.instance)
11345 # Check all devices manually
11346 self._CheckDevices(self.instance.primary_node, iv_names)
11348 # Step: remove old storage
11349 if not self.early_release:
11350 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11351 self._RemoveOldStorage(self.target_node, iv_names)
11354 class LURepairNodeStorage(NoHooksLU):
11355 """Repairs the volume group on a node.
11360 def CheckArguments(self):
11361 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11363 storage_type = self.op.storage_type
11365 if (constants.SO_FIX_CONSISTENCY not in
11366 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11367 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11368 " repaired" % storage_type,
11369 errors.ECODE_INVAL)
11371 def ExpandNames(self):
11372 self.needed_locks = {
11373 locking.LEVEL_NODE: [self.op.node_name],
11376 def _CheckFaultyDisks(self, instance, node_name):
11377 """Ensure faulty disks abort the opcode or at least warn."""
11379 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11381 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11382 " node '%s'" % (instance.name, node_name),
11383 errors.ECODE_STATE)
11384 except errors.OpPrereqError, err:
11385 if self.op.ignore_consistency:
11386 self.proc.LogWarning(str(err.args[0]))
11390 def CheckPrereq(self):
11391 """Check prerequisites.
11394 # Check whether any instance on this node has faulty disks
11395 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11396 if inst.admin_state != constants.ADMINST_UP:
11398 check_nodes = set(inst.all_nodes)
11399 check_nodes.discard(self.op.node_name)
11400 for inst_node_name in check_nodes:
11401 self._CheckFaultyDisks(inst, inst_node_name)
11403 def Exec(self, feedback_fn):
11404 feedback_fn("Repairing storage unit '%s' on %s ..." %
11405 (self.op.name, self.op.node_name))
11407 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11408 result = self.rpc.call_storage_execute(self.op.node_name,
11409 self.op.storage_type, st_args,
11411 constants.SO_FIX_CONSISTENCY)
11412 result.Raise("Failed to repair storage unit '%s' on %s" %
11413 (self.op.name, self.op.node_name))
11416 class LUNodeEvacuate(NoHooksLU):
11417 """Evacuates instances off a list of nodes.
11422 _MODE2IALLOCATOR = {
11423 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11424 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11425 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11427 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11428 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11429 constants.IALLOCATOR_NEVAC_MODES)
11431 def CheckArguments(self):
11432 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11434 def ExpandNames(self):
11435 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11437 if self.op.remote_node is not None:
11438 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11439 assert self.op.remote_node
11441 if self.op.remote_node == self.op.node_name:
11442 raise errors.OpPrereqError("Can not use evacuated node as a new"
11443 " secondary node", errors.ECODE_INVAL)
11445 if self.op.mode != constants.NODE_EVAC_SEC:
11446 raise errors.OpPrereqError("Without the use of an iallocator only"
11447 " secondary instances can be evacuated",
11448 errors.ECODE_INVAL)
11451 self.share_locks = _ShareAll()
11452 self.needed_locks = {
11453 locking.LEVEL_INSTANCE: [],
11454 locking.LEVEL_NODEGROUP: [],
11455 locking.LEVEL_NODE: [],
11458 # Determine nodes (via group) optimistically, needs verification once locks
11459 # have been acquired
11460 self.lock_nodes = self._DetermineNodes()
11462 def _DetermineNodes(self):
11463 """Gets the list of nodes to operate on.
11466 if self.op.remote_node is None:
11467 # Iallocator will choose any node(s) in the same group
11468 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11470 group_nodes = frozenset([self.op.remote_node])
11472 # Determine nodes to be locked
11473 return set([self.op.node_name]) | group_nodes
11475 def _DetermineInstances(self):
11476 """Builds list of instances to operate on.
11479 assert self.op.mode in constants.NODE_EVAC_MODES
11481 if self.op.mode == constants.NODE_EVAC_PRI:
11482 # Primary instances only
11483 inst_fn = _GetNodePrimaryInstances
11484 assert self.op.remote_node is None, \
11485 "Evacuating primary instances requires iallocator"
11486 elif self.op.mode == constants.NODE_EVAC_SEC:
11487 # Secondary instances only
11488 inst_fn = _GetNodeSecondaryInstances
11491 assert self.op.mode == constants.NODE_EVAC_ALL
11492 inst_fn = _GetNodeInstances
11493 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11495 raise errors.OpPrereqError("Due to an issue with the iallocator"
11496 " interface it is not possible to evacuate"
11497 " all instances at once; specify explicitly"
11498 " whether to evacuate primary or secondary"
11500 errors.ECODE_INVAL)
11502 return inst_fn(self.cfg, self.op.node_name)
11504 def DeclareLocks(self, level):
11505 if level == locking.LEVEL_INSTANCE:
11506 # Lock instances optimistically, needs verification once node and group
11507 # locks have been acquired
11508 self.needed_locks[locking.LEVEL_INSTANCE] = \
11509 set(i.name for i in self._DetermineInstances())
11511 elif level == locking.LEVEL_NODEGROUP:
11512 # Lock node groups for all potential target nodes optimistically, needs
11513 # verification once nodes have been acquired
11514 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11515 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11517 elif level == locking.LEVEL_NODE:
11518 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11520 def CheckPrereq(self):
11522 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11523 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11524 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11526 need_nodes = self._DetermineNodes()
11528 if not owned_nodes.issuperset(need_nodes):
11529 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11530 " locks were acquired, current nodes are"
11531 " are '%s', used to be '%s'; retry the"
11533 (self.op.node_name,
11534 utils.CommaJoin(need_nodes),
11535 utils.CommaJoin(owned_nodes)),
11536 errors.ECODE_STATE)
11538 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11539 if owned_groups != wanted_groups:
11540 raise errors.OpExecError("Node groups changed since locks were acquired,"
11541 " current groups are '%s', used to be '%s';"
11542 " retry the operation" %
11543 (utils.CommaJoin(wanted_groups),
11544 utils.CommaJoin(owned_groups)))
11546 # Determine affected instances
11547 self.instances = self._DetermineInstances()
11548 self.instance_names = [i.name for i in self.instances]
11550 if set(self.instance_names) != owned_instances:
11551 raise errors.OpExecError("Instances on node '%s' changed since locks"
11552 " were acquired, current instances are '%s',"
11553 " used to be '%s'; retry the operation" %
11554 (self.op.node_name,
11555 utils.CommaJoin(self.instance_names),
11556 utils.CommaJoin(owned_instances)))
11558 if self.instance_names:
11559 self.LogInfo("Evacuating instances from node '%s': %s",
11561 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11563 self.LogInfo("No instances to evacuate from node '%s'",
11566 if self.op.remote_node is not None:
11567 for i in self.instances:
11568 if i.primary_node == self.op.remote_node:
11569 raise errors.OpPrereqError("Node %s is the primary node of"
11570 " instance %s, cannot use it as"
11572 (self.op.remote_node, i.name),
11573 errors.ECODE_INVAL)
11575 def Exec(self, feedback_fn):
11576 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11578 if not self.instance_names:
11579 # No instances to evacuate
11582 elif self.op.iallocator is not None:
11583 # TODO: Implement relocation to other group
11584 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11585 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11586 instances=list(self.instance_names))
11587 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11589 ial.Run(self.op.iallocator)
11591 if not ial.success:
11592 raise errors.OpPrereqError("Can't compute node evacuation using"
11593 " iallocator '%s': %s" %
11594 (self.op.iallocator, ial.info),
11595 errors.ECODE_NORES)
11597 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11599 elif self.op.remote_node is not None:
11600 assert self.op.mode == constants.NODE_EVAC_SEC
11602 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11603 remote_node=self.op.remote_node,
11605 mode=constants.REPLACE_DISK_CHG,
11606 early_release=self.op.early_release)]
11607 for instance_name in self.instance_names
11611 raise errors.ProgrammerError("No iallocator or remote node")
11613 return ResultWithJobs(jobs)
11616 def _SetOpEarlyRelease(early_release, op):
11617 """Sets C{early_release} flag on opcodes if available.
11621 op.early_release = early_release
11622 except AttributeError:
11623 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11628 def _NodeEvacDest(use_nodes, group, nodes):
11629 """Returns group or nodes depending on caller's choice.
11633 return utils.CommaJoin(nodes)
11638 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11639 """Unpacks the result of change-group and node-evacuate iallocator requests.
11641 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11642 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11644 @type lu: L{LogicalUnit}
11645 @param lu: Logical unit instance
11646 @type alloc_result: tuple/list
11647 @param alloc_result: Result from iallocator
11648 @type early_release: bool
11649 @param early_release: Whether to release locks early if possible
11650 @type use_nodes: bool
11651 @param use_nodes: Whether to display node names instead of groups
11654 (moved, failed, jobs) = alloc_result
11657 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11658 for (name, reason) in failed)
11659 lu.LogWarning("Unable to evacuate instances %s", failreason)
11660 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11663 lu.LogInfo("Instances to be moved: %s",
11664 utils.CommaJoin("%s (to %s)" %
11665 (name, _NodeEvacDest(use_nodes, group, nodes))
11666 for (name, group, nodes) in moved))
11668 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11669 map(opcodes.OpCode.LoadOpCode, ops))
11673 class LUInstanceGrowDisk(LogicalUnit):
11674 """Grow a disk of an instance.
11677 HPATH = "disk-grow"
11678 HTYPE = constants.HTYPE_INSTANCE
11681 def ExpandNames(self):
11682 self._ExpandAndLockInstance()
11683 self.needed_locks[locking.LEVEL_NODE] = []
11684 self.needed_locks[locking.LEVEL_NODE_RES] = []
11685 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11686 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11688 def DeclareLocks(self, level):
11689 if level == locking.LEVEL_NODE:
11690 self._LockInstancesNodes()
11691 elif level == locking.LEVEL_NODE_RES:
11693 self.needed_locks[locking.LEVEL_NODE_RES] = \
11694 self.needed_locks[locking.LEVEL_NODE][:]
11696 def BuildHooksEnv(self):
11697 """Build hooks env.
11699 This runs on the master, the primary and all the secondaries.
11703 "DISK": self.op.disk,
11704 "AMOUNT": self.op.amount,
11705 "ABSOLUTE": self.op.absolute,
11707 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11710 def BuildHooksNodes(self):
11711 """Build hooks nodes.
11714 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11717 def CheckPrereq(self):
11718 """Check prerequisites.
11720 This checks that the instance is in the cluster.
11723 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11724 assert instance is not None, \
11725 "Cannot retrieve locked instance %s" % self.op.instance_name
11726 nodenames = list(instance.all_nodes)
11727 for node in nodenames:
11728 _CheckNodeOnline(self, node)
11730 self.instance = instance
11732 if instance.disk_template not in constants.DTS_GROWABLE:
11733 raise errors.OpPrereqError("Instance's disk layout does not support"
11734 " growing", errors.ECODE_INVAL)
11736 self.disk = instance.FindDisk(self.op.disk)
11738 if self.op.absolute:
11739 self.target = self.op.amount
11740 self.delta = self.target - self.disk.size
11742 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11743 "current disk size (%s)" %
11744 (utils.FormatUnit(self.target, "h"),
11745 utils.FormatUnit(self.disk.size, "h")),
11746 errors.ECODE_STATE)
11748 self.delta = self.op.amount
11749 self.target = self.disk.size + self.delta
11751 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11752 utils.FormatUnit(self.delta, "h"),
11753 errors.ECODE_INVAL)
11755 if instance.disk_template not in (constants.DT_FILE,
11756 constants.DT_SHARED_FILE,
11758 # TODO: check the free disk space for file, when that feature will be
11760 _CheckNodesFreeDiskPerVG(self, nodenames,
11761 self.disk.ComputeGrowth(self.delta))
11763 def Exec(self, feedback_fn):
11764 """Execute disk grow.
11767 instance = self.instance
11770 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11771 assert (self.owned_locks(locking.LEVEL_NODE) ==
11772 self.owned_locks(locking.LEVEL_NODE_RES))
11774 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11776 raise errors.OpExecError("Cannot activate block device to grow")
11778 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11779 (self.op.disk, instance.name,
11780 utils.FormatUnit(self.delta, "h"),
11781 utils.FormatUnit(self.target, "h")))
11783 # First run all grow ops in dry-run mode
11784 for node in instance.all_nodes:
11785 self.cfg.SetDiskID(disk, node)
11786 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11788 result.Raise("Grow request failed to node %s" % node)
11790 # We know that (as far as we can test) operations across different
11791 # nodes will succeed, time to run it for real on the backing storage
11792 for node in instance.all_nodes:
11793 self.cfg.SetDiskID(disk, node)
11794 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11796 result.Raise("Grow request failed to node %s" % node)
11798 # And now execute it for logical storage, on the primary node
11799 node = instance.primary_node
11800 self.cfg.SetDiskID(disk, node)
11801 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11803 result.Raise("Grow request failed to node %s" % node)
11805 disk.RecordGrow(self.delta)
11806 self.cfg.Update(instance, feedback_fn)
11808 # Changes have been recorded, release node lock
11809 _ReleaseLocks(self, locking.LEVEL_NODE)
11811 # Downgrade lock while waiting for sync
11812 self.glm.downgrade(locking.LEVEL_INSTANCE)
11814 if self.op.wait_for_sync:
11815 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11817 self.proc.LogWarning("Disk sync-ing has not returned a good"
11818 " status; please check the instance")
11819 if instance.admin_state != constants.ADMINST_UP:
11820 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11821 elif instance.admin_state != constants.ADMINST_UP:
11822 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11823 " not supposed to be running because no wait for"
11824 " sync mode was requested")
11826 assert self.owned_locks(locking.LEVEL_NODE_RES)
11827 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11830 class LUInstanceQueryData(NoHooksLU):
11831 """Query runtime instance data.
11836 def ExpandNames(self):
11837 self.needed_locks = {}
11839 # Use locking if requested or when non-static information is wanted
11840 if not (self.op.static or self.op.use_locking):
11841 self.LogWarning("Non-static data requested, locks need to be acquired")
11842 self.op.use_locking = True
11844 if self.op.instances or not self.op.use_locking:
11845 # Expand instance names right here
11846 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11848 # Will use acquired locks
11849 self.wanted_names = None
11851 if self.op.use_locking:
11852 self.share_locks = _ShareAll()
11854 if self.wanted_names is None:
11855 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11857 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11859 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11860 self.needed_locks[locking.LEVEL_NODE] = []
11861 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11863 def DeclareLocks(self, level):
11864 if self.op.use_locking:
11865 if level == locking.LEVEL_NODEGROUP:
11866 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11868 # Lock all groups used by instances optimistically; this requires going
11869 # via the node before it's locked, requiring verification later on
11870 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11871 frozenset(group_uuid
11872 for instance_name in owned_instances
11874 self.cfg.GetInstanceNodeGroups(instance_name))
11876 elif level == locking.LEVEL_NODE:
11877 self._LockInstancesNodes()
11879 def CheckPrereq(self):
11880 """Check prerequisites.
11882 This only checks the optional instance list against the existing names.
11885 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11886 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11887 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11889 if self.wanted_names is None:
11890 assert self.op.use_locking, "Locking was not used"
11891 self.wanted_names = owned_instances
11893 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11895 if self.op.use_locking:
11896 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11899 assert not (owned_instances or owned_groups or owned_nodes)
11901 self.wanted_instances = instances.values()
11903 def _ComputeBlockdevStatus(self, node, instance, dev):
11904 """Returns the status of a block device
11907 if self.op.static or not node:
11910 self.cfg.SetDiskID(dev, node)
11912 result = self.rpc.call_blockdev_find(node, dev)
11916 result.Raise("Can't compute disk status for %s" % instance.name)
11918 status = result.payload
11922 return (status.dev_path, status.major, status.minor,
11923 status.sync_percent, status.estimated_time,
11924 status.is_degraded, status.ldisk_status)
11926 def _ComputeDiskStatus(self, instance, snode, dev):
11927 """Compute block device status.
11930 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11932 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11934 def _ComputeDiskStatusInner(self, instance, snode, dev):
11935 """Compute block device status.
11937 @attention: The device has to be annotated already.
11940 if dev.dev_type in constants.LDS_DRBD:
11941 # we change the snode then (otherwise we use the one passed in)
11942 if dev.logical_id[0] == instance.primary_node:
11943 snode = dev.logical_id[1]
11945 snode = dev.logical_id[0]
11947 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11949 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11952 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11959 "iv_name": dev.iv_name,
11960 "dev_type": dev.dev_type,
11961 "logical_id": dev.logical_id,
11962 "physical_id": dev.physical_id,
11963 "pstatus": dev_pstatus,
11964 "sstatus": dev_sstatus,
11965 "children": dev_children,
11970 def Exec(self, feedback_fn):
11971 """Gather and return data"""
11974 cluster = self.cfg.GetClusterInfo()
11976 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11977 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11979 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11980 for node in nodes.values()))
11982 group2name_fn = lambda uuid: groups[uuid].name
11984 for instance in self.wanted_instances:
11985 pnode = nodes[instance.primary_node]
11987 if self.op.static or pnode.offline:
11988 remote_state = None
11990 self.LogWarning("Primary node %s is marked offline, returning static"
11991 " information only for instance %s" %
11992 (pnode.name, instance.name))
11994 remote_info = self.rpc.call_instance_info(instance.primary_node,
11996 instance.hypervisor)
11997 remote_info.Raise("Error checking node %s" % instance.primary_node)
11998 remote_info = remote_info.payload
11999 if remote_info and "state" in remote_info:
12000 remote_state = "up"
12002 if instance.admin_state == constants.ADMINST_UP:
12003 remote_state = "down"
12005 remote_state = instance.admin_state
12007 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12010 snodes_group_uuids = [nodes[snode_name].group
12011 for snode_name in instance.secondary_nodes]
12013 result[instance.name] = {
12014 "name": instance.name,
12015 "config_state": instance.admin_state,
12016 "run_state": remote_state,
12017 "pnode": instance.primary_node,
12018 "pnode_group_uuid": pnode.group,
12019 "pnode_group_name": group2name_fn(pnode.group),
12020 "snodes": instance.secondary_nodes,
12021 "snodes_group_uuids": snodes_group_uuids,
12022 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12024 # this happens to be the same format used for hooks
12025 "nics": _NICListToTuple(self, instance.nics),
12026 "disk_template": instance.disk_template,
12028 "hypervisor": instance.hypervisor,
12029 "network_port": instance.network_port,
12030 "hv_instance": instance.hvparams,
12031 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12032 "be_instance": instance.beparams,
12033 "be_actual": cluster.FillBE(instance),
12034 "os_instance": instance.osparams,
12035 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12036 "serial_no": instance.serial_no,
12037 "mtime": instance.mtime,
12038 "ctime": instance.ctime,
12039 "uuid": instance.uuid,
12045 def PrepareContainerMods(mods, private_fn):
12046 """Prepares a list of container modifications by adding a private data field.
12048 @type mods: list of tuples; (operation, index, parameters)
12049 @param mods: List of modifications
12050 @type private_fn: callable or None
12051 @param private_fn: Callable for constructing a private data field for a
12056 if private_fn is None:
12061 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12064 #: Type description for changes as returned by L{ApplyContainerMods}'s
12066 _TApplyContModsCbChanges = \
12067 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12068 ht.TNonEmptyString,
12073 def ApplyContainerMods(kind, container, chgdesc, mods,
12074 create_fn, modify_fn, remove_fn):
12075 """Applies descriptions in C{mods} to C{container}.
12078 @param kind: One-word item description
12079 @type container: list
12080 @param container: Container to modify
12081 @type chgdesc: None or list
12082 @param chgdesc: List of applied changes
12084 @param mods: Modifications as returned by L{PrepareContainerMods}
12085 @type create_fn: callable
12086 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12087 receives absolute item index, parameters and private data object as added
12088 by L{PrepareContainerMods}, returns tuple containing new item and changes
12090 @type modify_fn: callable
12091 @param modify_fn: Callback for modifying an existing item
12092 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12093 and private data object as added by L{PrepareContainerMods}, returns
12095 @type remove_fn: callable
12096 @param remove_fn: Callback on removing item; receives absolute item index,
12097 item and private data object as added by L{PrepareContainerMods}
12100 for (op, idx, params, private) in mods:
12103 absidx = len(container) - 1
12105 raise IndexError("Not accepting negative indices other than -1")
12106 elif idx > len(container):
12107 raise IndexError("Got %s index %s, but there are only %s" %
12108 (kind, idx, len(container)))
12114 if op == constants.DDM_ADD:
12115 # Calculate where item will be added
12117 addidx = len(container)
12121 if create_fn is None:
12124 (item, changes) = create_fn(addidx, params, private)
12127 container.append(item)
12130 assert idx <= len(container)
12131 # list.insert does so before the specified index
12132 container.insert(idx, item)
12134 # Retrieve existing item
12136 item = container[absidx]
12138 raise IndexError("Invalid %s index %s" % (kind, idx))
12140 if op == constants.DDM_REMOVE:
12143 if remove_fn is not None:
12144 remove_fn(absidx, item, private)
12146 changes = [("%s/%s" % (kind, absidx), "remove")]
12148 assert container[absidx] == item
12149 del container[absidx]
12150 elif op == constants.DDM_MODIFY:
12151 if modify_fn is not None:
12152 changes = modify_fn(absidx, item, params, private)
12154 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12156 assert _TApplyContModsCbChanges(changes)
12158 if not (chgdesc is None or changes is None):
12159 chgdesc.extend(changes)
12162 def _UpdateIvNames(base_index, disks):
12163 """Updates the C{iv_name} attribute of disks.
12165 @type disks: list of L{objects.Disk}
12168 for (idx, disk) in enumerate(disks):
12169 disk.iv_name = "disk/%s" % (base_index + idx, )
12172 class _InstNicModPrivate:
12173 """Data structure for network interface modifications.
12175 Used by L{LUInstanceSetParams}.
12178 def __init__(self):
12183 class LUInstanceSetParams(LogicalUnit):
12184 """Modifies an instances's parameters.
12187 HPATH = "instance-modify"
12188 HTYPE = constants.HTYPE_INSTANCE
12192 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12193 assert ht.TList(mods)
12194 assert not mods or len(mods[0]) in (2, 3)
12196 if mods and len(mods[0]) == 2:
12200 for op, params in mods:
12201 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12202 result.append((op, -1, params))
12206 raise errors.OpPrereqError("Only one %s add or remove operation is"
12207 " supported at a time" % kind,
12208 errors.ECODE_INVAL)
12210 result.append((constants.DDM_MODIFY, op, params))
12212 assert verify_fn(result)
12219 def _CheckMods(kind, mods, key_types, item_fn):
12220 """Ensures requested disk/NIC modifications are valid.
12223 for (op, _, params) in mods:
12224 assert ht.TDict(params)
12226 utils.ForceDictType(params, key_types)
12228 if op == constants.DDM_REMOVE:
12230 raise errors.OpPrereqError("No settings should be passed when"
12231 " removing a %s" % kind,
12232 errors.ECODE_INVAL)
12233 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12234 item_fn(op, params)
12236 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12239 def _VerifyDiskModification(op, params):
12240 """Verifies a disk modification.
12243 if op == constants.DDM_ADD:
12244 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12245 if mode not in constants.DISK_ACCESS_SET:
12246 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12247 errors.ECODE_INVAL)
12249 size = params.get(constants.IDISK_SIZE, None)
12251 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12252 constants.IDISK_SIZE, errors.ECODE_INVAL)
12256 except (TypeError, ValueError), err:
12257 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12258 errors.ECODE_INVAL)
12260 params[constants.IDISK_SIZE] = size
12262 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12263 raise errors.OpPrereqError("Disk size change not possible, use"
12264 " grow-disk", errors.ECODE_INVAL)
12267 def _VerifyNicModification(op, params):
12268 """Verifies a network interface modification.
12271 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12272 ip = params.get(constants.INIC_IP, None)
12275 elif ip.lower() == constants.VALUE_NONE:
12276 params[constants.INIC_IP] = None
12277 elif not netutils.IPAddress.IsValid(ip):
12278 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12279 errors.ECODE_INVAL)
12281 bridge = params.get("bridge", None)
12282 link = params.get(constants.INIC_LINK, None)
12283 if bridge and link:
12284 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12285 " at the same time", errors.ECODE_INVAL)
12286 elif bridge and bridge.lower() == constants.VALUE_NONE:
12287 params["bridge"] = None
12288 elif link and link.lower() == constants.VALUE_NONE:
12289 params[constants.INIC_LINK] = None
12291 if op == constants.DDM_ADD:
12292 macaddr = params.get(constants.INIC_MAC, None)
12293 if macaddr is None:
12294 params[constants.INIC_MAC] = constants.VALUE_AUTO
12296 if constants.INIC_MAC in params:
12297 macaddr = params[constants.INIC_MAC]
12298 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12299 macaddr = utils.NormalizeAndValidateMac(macaddr)
12301 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12302 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12303 " modifying an existing NIC",
12304 errors.ECODE_INVAL)
12306 def CheckArguments(self):
12307 if not (self.op.nics or self.op.disks or self.op.disk_template or
12308 self.op.hvparams or self.op.beparams or self.op.os_name or
12309 self.op.offline is not None or self.op.runtime_mem):
12310 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12312 if self.op.hvparams:
12313 _CheckGlobalHvParams(self.op.hvparams)
12315 self.op.disks = self._UpgradeDiskNicMods(
12316 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12317 self.op.nics = self._UpgradeDiskNicMods(
12318 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12320 # Check disk modifications
12321 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12322 self._VerifyDiskModification)
12324 if self.op.disks and self.op.disk_template is not None:
12325 raise errors.OpPrereqError("Disk template conversion and other disk"
12326 " changes not supported at the same time",
12327 errors.ECODE_INVAL)
12329 if (self.op.disk_template and
12330 self.op.disk_template in constants.DTS_INT_MIRROR and
12331 self.op.remote_node is None):
12332 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12333 " one requires specifying a secondary node",
12334 errors.ECODE_INVAL)
12336 # Check NIC modifications
12337 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12338 self._VerifyNicModification)
12340 def ExpandNames(self):
12341 self._ExpandAndLockInstance()
12342 # Can't even acquire node locks in shared mode as upcoming changes in
12343 # Ganeti 2.6 will start to modify the node object on disk conversion
12344 self.needed_locks[locking.LEVEL_NODE] = []
12345 self.needed_locks[locking.LEVEL_NODE_RES] = []
12346 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12348 def DeclareLocks(self, level):
12349 # TODO: Acquire group lock in shared mode (disk parameters)
12350 if level == locking.LEVEL_NODE:
12351 self._LockInstancesNodes()
12352 if self.op.disk_template and self.op.remote_node:
12353 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12354 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12355 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12357 self.needed_locks[locking.LEVEL_NODE_RES] = \
12358 self.needed_locks[locking.LEVEL_NODE][:]
12360 def BuildHooksEnv(self):
12361 """Build hooks env.
12363 This runs on the master, primary and secondaries.
12367 if constants.BE_MINMEM in self.be_new:
12368 args["minmem"] = self.be_new[constants.BE_MINMEM]
12369 if constants.BE_MAXMEM in self.be_new:
12370 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12371 if constants.BE_VCPUS in self.be_new:
12372 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12373 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12374 # information at all.
12376 if self._new_nics is not None:
12379 for nic in self._new_nics:
12380 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12381 mode = nicparams[constants.NIC_MODE]
12382 link = nicparams[constants.NIC_LINK]
12383 nics.append((nic.ip, nic.mac, mode, link))
12385 args["nics"] = nics
12387 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12388 if self.op.disk_template:
12389 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12390 if self.op.runtime_mem:
12391 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12395 def BuildHooksNodes(self):
12396 """Build hooks nodes.
12399 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12402 def _PrepareNicModification(self, params, private, old_ip, old_params,
12404 update_params_dict = dict([(key, params[key])
12405 for key in constants.NICS_PARAMETERS
12408 if "bridge" in params:
12409 update_params_dict[constants.NIC_LINK] = params["bridge"]
12411 new_params = _GetUpdatedParams(old_params, update_params_dict)
12412 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12414 new_filled_params = cluster.SimpleFillNIC(new_params)
12415 objects.NIC.CheckParameterSyntax(new_filled_params)
12417 new_mode = new_filled_params[constants.NIC_MODE]
12418 if new_mode == constants.NIC_MODE_BRIDGED:
12419 bridge = new_filled_params[constants.NIC_LINK]
12420 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12422 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12424 self.warn.append(msg)
12426 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12428 elif new_mode == constants.NIC_MODE_ROUTED:
12429 ip = params.get(constants.INIC_IP, old_ip)
12431 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12432 " on a routed NIC", errors.ECODE_INVAL)
12434 if constants.INIC_MAC in params:
12435 mac = params[constants.INIC_MAC]
12437 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12438 errors.ECODE_INVAL)
12439 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12440 # otherwise generate the MAC address
12441 params[constants.INIC_MAC] = \
12442 self.cfg.GenerateMAC(self.proc.GetECId())
12444 # or validate/reserve the current one
12446 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12447 except errors.ReservationError:
12448 raise errors.OpPrereqError("MAC address '%s' already in use"
12449 " in cluster" % mac,
12450 errors.ECODE_NOTUNIQUE)
12452 private.params = new_params
12453 private.filled = new_filled_params
12455 def CheckPrereq(self):
12456 """Check prerequisites.
12458 This only checks the instance list against the existing names.
12461 # checking the new params on the primary/secondary nodes
12463 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12464 cluster = self.cluster = self.cfg.GetClusterInfo()
12465 assert self.instance is not None, \
12466 "Cannot retrieve locked instance %s" % self.op.instance_name
12467 pnode = instance.primary_node
12468 nodelist = list(instance.all_nodes)
12469 pnode_info = self.cfg.GetNodeInfo(pnode)
12470 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12472 # Prepare disk/NIC modifications
12473 self.diskmod = PrepareContainerMods(self.op.disks, None)
12474 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12477 if self.op.os_name and not self.op.force:
12478 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12479 self.op.force_variant)
12480 instance_os = self.op.os_name
12482 instance_os = instance.os
12484 assert not (self.op.disk_template and self.op.disks), \
12485 "Can't modify disk template and apply disk changes at the same time"
12487 if self.op.disk_template:
12488 if instance.disk_template == self.op.disk_template:
12489 raise errors.OpPrereqError("Instance already has disk template %s" %
12490 instance.disk_template, errors.ECODE_INVAL)
12492 if (instance.disk_template,
12493 self.op.disk_template) not in self._DISK_CONVERSIONS:
12494 raise errors.OpPrereqError("Unsupported disk template conversion from"
12495 " %s to %s" % (instance.disk_template,
12496 self.op.disk_template),
12497 errors.ECODE_INVAL)
12498 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12499 msg="cannot change disk template")
12500 if self.op.disk_template in constants.DTS_INT_MIRROR:
12501 if self.op.remote_node == pnode:
12502 raise errors.OpPrereqError("Given new secondary node %s is the same"
12503 " as the primary node of the instance" %
12504 self.op.remote_node, errors.ECODE_STATE)
12505 _CheckNodeOnline(self, self.op.remote_node)
12506 _CheckNodeNotDrained(self, self.op.remote_node)
12507 # FIXME: here we assume that the old instance type is DT_PLAIN
12508 assert instance.disk_template == constants.DT_PLAIN
12509 disks = [{constants.IDISK_SIZE: d.size,
12510 constants.IDISK_VG: d.logical_id[0]}
12511 for d in instance.disks]
12512 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12513 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12515 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12516 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12517 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12519 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12520 ignore=self.op.ignore_ipolicy)
12521 if pnode_info.group != snode_info.group:
12522 self.LogWarning("The primary and secondary nodes are in two"
12523 " different node groups; the disk parameters"
12524 " from the first disk's node group will be"
12527 # hvparams processing
12528 if self.op.hvparams:
12529 hv_type = instance.hypervisor
12530 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12531 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12532 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12535 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12536 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12537 self.hv_proposed = self.hv_new = hv_new # the new actual values
12538 self.hv_inst = i_hvdict # the new dict (without defaults)
12540 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12542 self.hv_new = self.hv_inst = {}
12544 # beparams processing
12545 if self.op.beparams:
12546 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12548 objects.UpgradeBeParams(i_bedict)
12549 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12550 be_new = cluster.SimpleFillBE(i_bedict)
12551 self.be_proposed = self.be_new = be_new # the new actual values
12552 self.be_inst = i_bedict # the new dict (without defaults)
12554 self.be_new = self.be_inst = {}
12555 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12556 be_old = cluster.FillBE(instance)
12558 # CPU param validation -- checking every time a parameter is
12559 # changed to cover all cases where either CPU mask or vcpus have
12561 if (constants.BE_VCPUS in self.be_proposed and
12562 constants.HV_CPU_MASK in self.hv_proposed):
12564 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12565 # Verify mask is consistent with number of vCPUs. Can skip this
12566 # test if only 1 entry in the CPU mask, which means same mask
12567 # is applied to all vCPUs.
12568 if (len(cpu_list) > 1 and
12569 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12570 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12572 (self.be_proposed[constants.BE_VCPUS],
12573 self.hv_proposed[constants.HV_CPU_MASK]),
12574 errors.ECODE_INVAL)
12576 # Only perform this test if a new CPU mask is given
12577 if constants.HV_CPU_MASK in self.hv_new:
12578 # Calculate the largest CPU number requested
12579 max_requested_cpu = max(map(max, cpu_list))
12580 # Check that all of the instance's nodes have enough physical CPUs to
12581 # satisfy the requested CPU mask
12582 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12583 max_requested_cpu + 1, instance.hypervisor)
12585 # osparams processing
12586 if self.op.osparams:
12587 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12588 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12589 self.os_inst = i_osdict # the new dict (without defaults)
12595 #TODO(dynmem): do the appropriate check involving MINMEM
12596 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12597 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12598 mem_check_list = [pnode]
12599 if be_new[constants.BE_AUTO_BALANCE]:
12600 # either we changed auto_balance to yes or it was from before
12601 mem_check_list.extend(instance.secondary_nodes)
12602 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12603 instance.hypervisor)
12604 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12605 [instance.hypervisor])
12606 pninfo = nodeinfo[pnode]
12607 msg = pninfo.fail_msg
12609 # Assume the primary node is unreachable and go ahead
12610 self.warn.append("Can't get info from primary node %s: %s" %
12613 (_, _, (pnhvinfo, )) = pninfo.payload
12614 if not isinstance(pnhvinfo.get("memory_free", None), int):
12615 self.warn.append("Node data from primary node %s doesn't contain"
12616 " free memory information" % pnode)
12617 elif instance_info.fail_msg:
12618 self.warn.append("Can't get instance runtime information: %s" %
12619 instance_info.fail_msg)
12621 if instance_info.payload:
12622 current_mem = int(instance_info.payload["memory"])
12624 # Assume instance not running
12625 # (there is a slight race condition here, but it's not very
12626 # probable, and we have no other way to check)
12627 # TODO: Describe race condition
12629 #TODO(dynmem): do the appropriate check involving MINMEM
12630 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12631 pnhvinfo["memory_free"])
12633 raise errors.OpPrereqError("This change will prevent the instance"
12634 " from starting, due to %d MB of memory"
12635 " missing on its primary node" %
12636 miss_mem, errors.ECODE_NORES)
12638 if be_new[constants.BE_AUTO_BALANCE]:
12639 for node, nres in nodeinfo.items():
12640 if node not in instance.secondary_nodes:
12642 nres.Raise("Can't get info from secondary node %s" % node,
12643 prereq=True, ecode=errors.ECODE_STATE)
12644 (_, _, (nhvinfo, )) = nres.payload
12645 if not isinstance(nhvinfo.get("memory_free", None), int):
12646 raise errors.OpPrereqError("Secondary node %s didn't return free"
12647 " memory information" % node,
12648 errors.ECODE_STATE)
12649 #TODO(dynmem): do the appropriate check involving MINMEM
12650 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12651 raise errors.OpPrereqError("This change will prevent the instance"
12652 " from failover to its secondary node"
12653 " %s, due to not enough memory" % node,
12654 errors.ECODE_STATE)
12656 if self.op.runtime_mem:
12657 remote_info = self.rpc.call_instance_info(instance.primary_node,
12659 instance.hypervisor)
12660 remote_info.Raise("Error checking node %s" % instance.primary_node)
12661 if not remote_info.payload: # not running already
12662 raise errors.OpPrereqError("Instance %s is not running" %
12663 instance.name, errors.ECODE_STATE)
12665 current_memory = remote_info.payload["memory"]
12666 if (not self.op.force and
12667 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12668 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12669 raise errors.OpPrereqError("Instance %s must have memory between %d"
12670 " and %d MB of memory unless --force is"
12673 self.be_proposed[constants.BE_MINMEM],
12674 self.be_proposed[constants.BE_MAXMEM]),
12675 errors.ECODE_INVAL)
12677 if self.op.runtime_mem > current_memory:
12678 _CheckNodeFreeMemory(self, instance.primary_node,
12679 "ballooning memory for instance %s" %
12681 self.op.memory - current_memory,
12682 instance.hypervisor)
12684 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12685 raise errors.OpPrereqError("Disk operations not supported for"
12686 " diskless instances", errors.ECODE_INVAL)
12688 def _PrepareNicCreate(_, params, private):
12689 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12690 return (None, None)
12692 def _PrepareNicMod(_, nic, params, private):
12693 self._PrepareNicModification(params, private, nic.ip,
12694 nic.nicparams, cluster, pnode)
12697 # Verify NIC changes (operating on copy)
12698 nics = instance.nics[:]
12699 ApplyContainerMods("NIC", nics, None, self.nicmod,
12700 _PrepareNicCreate, _PrepareNicMod, None)
12701 if len(nics) > constants.MAX_NICS:
12702 raise errors.OpPrereqError("Instance has too many network interfaces"
12703 " (%d), cannot add more" % constants.MAX_NICS,
12704 errors.ECODE_STATE)
12706 # Verify disk changes (operating on a copy)
12707 disks = instance.disks[:]
12708 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12709 if len(disks) > constants.MAX_DISKS:
12710 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12711 " more" % constants.MAX_DISKS,
12712 errors.ECODE_STATE)
12714 if self.op.offline is not None:
12715 if self.op.offline:
12716 msg = "can't change to offline"
12718 msg = "can't change to online"
12719 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12721 # Pre-compute NIC changes (necessary to use result in hooks)
12722 self._nic_chgdesc = []
12724 # Operate on copies as this is still in prereq
12725 nics = [nic.Copy() for nic in instance.nics]
12726 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12727 self._CreateNewNic, self._ApplyNicMods, None)
12728 self._new_nics = nics
12730 self._new_nics = None
12732 def _ConvertPlainToDrbd(self, feedback_fn):
12733 """Converts an instance from plain to drbd.
12736 feedback_fn("Converting template to drbd")
12737 instance = self.instance
12738 pnode = instance.primary_node
12739 snode = self.op.remote_node
12741 assert instance.disk_template == constants.DT_PLAIN
12743 # create a fake disk info for _GenerateDiskTemplate
12744 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12745 constants.IDISK_VG: d.logical_id[0]}
12746 for d in instance.disks]
12747 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12748 instance.name, pnode, [snode],
12749 disk_info, None, None, 0, feedback_fn,
12751 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12753 info = _GetInstanceInfoText(instance)
12754 feedback_fn("Creating additional volumes...")
12755 # first, create the missing data and meta devices
12756 for disk in anno_disks:
12757 # unfortunately this is... not too nice
12758 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12760 for child in disk.children:
12761 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12762 # at this stage, all new LVs have been created, we can rename the
12764 feedback_fn("Renaming original volumes...")
12765 rename_list = [(o, n.children[0].logical_id)
12766 for (o, n) in zip(instance.disks, new_disks)]
12767 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12768 result.Raise("Failed to rename original LVs")
12770 feedback_fn("Initializing DRBD devices...")
12771 # all child devices are in place, we can now create the DRBD devices
12772 for disk in anno_disks:
12773 for node in [pnode, snode]:
12774 f_create = node == pnode
12775 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12777 # at this point, the instance has been modified
12778 instance.disk_template = constants.DT_DRBD8
12779 instance.disks = new_disks
12780 self.cfg.Update(instance, feedback_fn)
12782 # Release node locks while waiting for sync
12783 _ReleaseLocks(self, locking.LEVEL_NODE)
12785 # disks are created, waiting for sync
12786 disk_abort = not _WaitForSync(self, instance,
12787 oneshot=not self.op.wait_for_sync)
12789 raise errors.OpExecError("There are some degraded disks for"
12790 " this instance, please cleanup manually")
12792 # Node resource locks will be released by caller
12794 def _ConvertDrbdToPlain(self, feedback_fn):
12795 """Converts an instance from drbd to plain.
12798 instance = self.instance
12800 assert len(instance.secondary_nodes) == 1
12801 assert instance.disk_template == constants.DT_DRBD8
12803 pnode = instance.primary_node
12804 snode = instance.secondary_nodes[0]
12805 feedback_fn("Converting template to plain")
12807 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12808 new_disks = [d.children[0] for d in instance.disks]
12810 # copy over size and mode
12811 for parent, child in zip(old_disks, new_disks):
12812 child.size = parent.size
12813 child.mode = parent.mode
12815 # this is a DRBD disk, return its port to the pool
12816 # NOTE: this must be done right before the call to cfg.Update!
12817 for disk in old_disks:
12818 tcp_port = disk.logical_id[2]
12819 self.cfg.AddTcpUdpPort(tcp_port)
12821 # update instance structure
12822 instance.disks = new_disks
12823 instance.disk_template = constants.DT_PLAIN
12824 self.cfg.Update(instance, feedback_fn)
12826 # Release locks in case removing disks takes a while
12827 _ReleaseLocks(self, locking.LEVEL_NODE)
12829 feedback_fn("Removing volumes on the secondary node...")
12830 for disk in old_disks:
12831 self.cfg.SetDiskID(disk, snode)
12832 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12834 self.LogWarning("Could not remove block device %s on node %s,"
12835 " continuing anyway: %s", disk.iv_name, snode, msg)
12837 feedback_fn("Removing unneeded volumes on the primary node...")
12838 for idx, disk in enumerate(old_disks):
12839 meta = disk.children[1]
12840 self.cfg.SetDiskID(meta, pnode)
12841 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12843 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12844 " continuing anyway: %s", idx, pnode, msg)
12846 def _CreateNewDisk(self, idx, params, _):
12847 """Creates a new disk.
12850 instance = self.instance
12853 if instance.disk_template in constants.DTS_FILEBASED:
12854 (file_driver, file_path) = instance.disks[0].logical_id
12855 file_path = os.path.dirname(file_path)
12857 file_driver = file_path = None
12860 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12861 instance.primary_node, instance.secondary_nodes,
12862 [params], file_path, file_driver, idx,
12863 self.Log, self.diskparams)[0]
12865 info = _GetInstanceInfoText(instance)
12867 logging.info("Creating volume %s for instance %s",
12868 disk.iv_name, instance.name)
12869 # Note: this needs to be kept in sync with _CreateDisks
12871 for node in instance.all_nodes:
12872 f_create = (node == instance.primary_node)
12874 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12875 except errors.OpExecError, err:
12876 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12877 disk.iv_name, disk, node, err)
12880 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12884 def _ModifyDisk(idx, disk, params, _):
12885 """Modifies a disk.
12888 disk.mode = params[constants.IDISK_MODE]
12891 ("disk.mode/%d" % idx, disk.mode),
12894 def _RemoveDisk(self, idx, root, _):
12898 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12899 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12900 self.cfg.SetDiskID(disk, node)
12901 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12903 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12904 " continuing anyway", idx, node, msg)
12906 # if this is a DRBD disk, return its port to the pool
12907 if root.dev_type in constants.LDS_DRBD:
12908 self.cfg.AddTcpUdpPort(root.logical_id[2])
12911 def _CreateNewNic(idx, params, private):
12912 """Creates data structure for a new network interface.
12915 mac = params[constants.INIC_MAC]
12916 ip = params.get(constants.INIC_IP, None)
12917 nicparams = private.params
12919 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12921 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12922 (mac, ip, private.filled[constants.NIC_MODE],
12923 private.filled[constants.NIC_LINK])),
12927 def _ApplyNicMods(idx, nic, params, private):
12928 """Modifies a network interface.
12933 for key in [constants.INIC_MAC, constants.INIC_IP]:
12935 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12936 setattr(nic, key, params[key])
12939 nic.nicparams = private.params
12941 for (key, val) in params.items():
12942 changes.append(("nic.%s/%d" % (key, idx), val))
12946 def Exec(self, feedback_fn):
12947 """Modifies an instance.
12949 All parameters take effect only at the next restart of the instance.
12952 # Process here the warnings from CheckPrereq, as we don't have a
12953 # feedback_fn there.
12954 # TODO: Replace with self.LogWarning
12955 for warn in self.warn:
12956 feedback_fn("WARNING: %s" % warn)
12958 assert ((self.op.disk_template is None) ^
12959 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12960 "Not owning any node resource locks"
12963 instance = self.instance
12966 if self.op.runtime_mem:
12967 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12969 self.op.runtime_mem)
12970 rpcres.Raise("Cannot modify instance runtime memory")
12971 result.append(("runtime_memory", self.op.runtime_mem))
12973 # Apply disk changes
12974 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12975 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12976 _UpdateIvNames(0, instance.disks)
12978 if self.op.disk_template:
12980 check_nodes = set(instance.all_nodes)
12981 if self.op.remote_node:
12982 check_nodes.add(self.op.remote_node)
12983 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12984 owned = self.owned_locks(level)
12985 assert not (check_nodes - owned), \
12986 ("Not owning the correct locks, owning %r, expected at least %r" %
12987 (owned, check_nodes))
12989 r_shut = _ShutdownInstanceDisks(self, instance)
12991 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12992 " proceed with disk template conversion")
12993 mode = (instance.disk_template, self.op.disk_template)
12995 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12997 self.cfg.ReleaseDRBDMinors(instance.name)
12999 result.append(("disk_template", self.op.disk_template))
13001 assert instance.disk_template == self.op.disk_template, \
13002 ("Expected disk template '%s', found '%s'" %
13003 (self.op.disk_template, instance.disk_template))
13005 # Release node and resource locks if there are any (they might already have
13006 # been released during disk conversion)
13007 _ReleaseLocks(self, locking.LEVEL_NODE)
13008 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13010 # Apply NIC changes
13011 if self._new_nics is not None:
13012 instance.nics = self._new_nics
13013 result.extend(self._nic_chgdesc)
13016 if self.op.hvparams:
13017 instance.hvparams = self.hv_inst
13018 for key, val in self.op.hvparams.iteritems():
13019 result.append(("hv/%s" % key, val))
13022 if self.op.beparams:
13023 instance.beparams = self.be_inst
13024 for key, val in self.op.beparams.iteritems():
13025 result.append(("be/%s" % key, val))
13028 if self.op.os_name:
13029 instance.os = self.op.os_name
13032 if self.op.osparams:
13033 instance.osparams = self.os_inst
13034 for key, val in self.op.osparams.iteritems():
13035 result.append(("os/%s" % key, val))
13037 if self.op.offline is None:
13040 elif self.op.offline:
13041 # Mark instance as offline
13042 self.cfg.MarkInstanceOffline(instance.name)
13043 result.append(("admin_state", constants.ADMINST_OFFLINE))
13045 # Mark instance as online, but stopped
13046 self.cfg.MarkInstanceDown(instance.name)
13047 result.append(("admin_state", constants.ADMINST_DOWN))
13049 self.cfg.Update(instance, feedback_fn)
13051 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13052 self.owned_locks(locking.LEVEL_NODE)), \
13053 "All node locks should have been released by now"
13057 _DISK_CONVERSIONS = {
13058 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13059 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13063 class LUInstanceChangeGroup(LogicalUnit):
13064 HPATH = "instance-change-group"
13065 HTYPE = constants.HTYPE_INSTANCE
13068 def ExpandNames(self):
13069 self.share_locks = _ShareAll()
13070 self.needed_locks = {
13071 locking.LEVEL_NODEGROUP: [],
13072 locking.LEVEL_NODE: [],
13075 self._ExpandAndLockInstance()
13077 if self.op.target_groups:
13078 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13079 self.op.target_groups)
13081 self.req_target_uuids = None
13083 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13085 def DeclareLocks(self, level):
13086 if level == locking.LEVEL_NODEGROUP:
13087 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13089 if self.req_target_uuids:
13090 lock_groups = set(self.req_target_uuids)
13092 # Lock all groups used by instance optimistically; this requires going
13093 # via the node before it's locked, requiring verification later on
13094 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13095 lock_groups.update(instance_groups)
13097 # No target groups, need to lock all of them
13098 lock_groups = locking.ALL_SET
13100 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13102 elif level == locking.LEVEL_NODE:
13103 if self.req_target_uuids:
13104 # Lock all nodes used by instances
13105 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13106 self._LockInstancesNodes()
13108 # Lock all nodes in all potential target groups
13109 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13110 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13111 member_nodes = [node_name
13112 for group in lock_groups
13113 for node_name in self.cfg.GetNodeGroup(group).members]
13114 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13116 # Lock all nodes as all groups are potential targets
13117 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13119 def CheckPrereq(self):
13120 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13121 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13122 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13124 assert (self.req_target_uuids is None or
13125 owned_groups.issuperset(self.req_target_uuids))
13126 assert owned_instances == set([self.op.instance_name])
13128 # Get instance information
13129 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13131 # Check if node groups for locked instance are still correct
13132 assert owned_nodes.issuperset(self.instance.all_nodes), \
13133 ("Instance %s's nodes changed while we kept the lock" %
13134 self.op.instance_name)
13136 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13139 if self.req_target_uuids:
13140 # User requested specific target groups
13141 self.target_uuids = frozenset(self.req_target_uuids)
13143 # All groups except those used by the instance are potential targets
13144 self.target_uuids = owned_groups - inst_groups
13146 conflicting_groups = self.target_uuids & inst_groups
13147 if conflicting_groups:
13148 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13149 " used by the instance '%s'" %
13150 (utils.CommaJoin(conflicting_groups),
13151 self.op.instance_name),
13152 errors.ECODE_INVAL)
13154 if not self.target_uuids:
13155 raise errors.OpPrereqError("There are no possible target groups",
13156 errors.ECODE_INVAL)
13158 def BuildHooksEnv(self):
13159 """Build hooks env.
13162 assert self.target_uuids
13165 "TARGET_GROUPS": " ".join(self.target_uuids),
13168 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13172 def BuildHooksNodes(self):
13173 """Build hooks nodes.
13176 mn = self.cfg.GetMasterNode()
13177 return ([mn], [mn])
13179 def Exec(self, feedback_fn):
13180 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13182 assert instances == [self.op.instance_name], "Instance not locked"
13184 req = iallocator.IAReqGroupChange(instances=instances,
13185 target_groups=list(self.target_uuids))
13186 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13188 ial.Run(self.op.iallocator)
13190 if not ial.success:
13191 raise errors.OpPrereqError("Can't compute solution for changing group of"
13192 " instance '%s' using iallocator '%s': %s" %
13193 (self.op.instance_name, self.op.iallocator,
13194 ial.info), errors.ECODE_NORES)
13196 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13198 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13199 " instance '%s'", len(jobs), self.op.instance_name)
13201 return ResultWithJobs(jobs)
13204 class LUBackupQuery(NoHooksLU):
13205 """Query the exports list
13210 def CheckArguments(self):
13211 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13212 ["node", "export"], self.op.use_locking)
13214 def ExpandNames(self):
13215 self.expq.ExpandNames(self)
13217 def DeclareLocks(self, level):
13218 self.expq.DeclareLocks(self, level)
13220 def Exec(self, feedback_fn):
13223 for (node, expname) in self.expq.OldStyleQuery(self):
13224 if expname is None:
13225 result[node] = False
13227 result.setdefault(node, []).append(expname)
13232 class _ExportQuery(_QueryBase):
13233 FIELDS = query.EXPORT_FIELDS
13235 #: The node name is not a unique key for this query
13236 SORT_FIELD = "node"
13238 def ExpandNames(self, lu):
13239 lu.needed_locks = {}
13241 # The following variables interact with _QueryBase._GetNames
13243 self.wanted = _GetWantedNodes(lu, self.names)
13245 self.wanted = locking.ALL_SET
13247 self.do_locking = self.use_locking
13249 if self.do_locking:
13250 lu.share_locks = _ShareAll()
13251 lu.needed_locks = {
13252 locking.LEVEL_NODE: self.wanted,
13255 def DeclareLocks(self, lu, level):
13258 def _GetQueryData(self, lu):
13259 """Computes the list of nodes and their attributes.
13262 # Locking is not used
13264 assert not (compat.any(lu.glm.is_owned(level)
13265 for level in locking.LEVELS
13266 if level != locking.LEVEL_CLUSTER) or
13267 self.do_locking or self.use_locking)
13269 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13273 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13275 result.append((node, None))
13277 result.extend((node, expname) for expname in nres.payload)
13282 class LUBackupPrepare(NoHooksLU):
13283 """Prepares an instance for an export and returns useful information.
13288 def ExpandNames(self):
13289 self._ExpandAndLockInstance()
13291 def CheckPrereq(self):
13292 """Check prerequisites.
13295 instance_name = self.op.instance_name
13297 self.instance = self.cfg.GetInstanceInfo(instance_name)
13298 assert self.instance is not None, \
13299 "Cannot retrieve locked instance %s" % self.op.instance_name
13300 _CheckNodeOnline(self, self.instance.primary_node)
13302 self._cds = _GetClusterDomainSecret()
13304 def Exec(self, feedback_fn):
13305 """Prepares an instance for an export.
13308 instance = self.instance
13310 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13311 salt = utils.GenerateSecret(8)
13313 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13314 result = self.rpc.call_x509_cert_create(instance.primary_node,
13315 constants.RIE_CERT_VALIDITY)
13316 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13318 (name, cert_pem) = result.payload
13320 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13324 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13325 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13327 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13333 class LUBackupExport(LogicalUnit):
13334 """Export an instance to an image in the cluster.
13337 HPATH = "instance-export"
13338 HTYPE = constants.HTYPE_INSTANCE
13341 def CheckArguments(self):
13342 """Check the arguments.
13345 self.x509_key_name = self.op.x509_key_name
13346 self.dest_x509_ca_pem = self.op.destination_x509_ca
13348 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13349 if not self.x509_key_name:
13350 raise errors.OpPrereqError("Missing X509 key name for encryption",
13351 errors.ECODE_INVAL)
13353 if not self.dest_x509_ca_pem:
13354 raise errors.OpPrereqError("Missing destination X509 CA",
13355 errors.ECODE_INVAL)
13357 def ExpandNames(self):
13358 self._ExpandAndLockInstance()
13360 # Lock all nodes for local exports
13361 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13362 # FIXME: lock only instance primary and destination node
13364 # Sad but true, for now we have do lock all nodes, as we don't know where
13365 # the previous export might be, and in this LU we search for it and
13366 # remove it from its current node. In the future we could fix this by:
13367 # - making a tasklet to search (share-lock all), then create the
13368 # new one, then one to remove, after
13369 # - removing the removal operation altogether
13370 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13372 def DeclareLocks(self, level):
13373 """Last minute lock declaration."""
13374 # All nodes are locked anyway, so nothing to do here.
13376 def BuildHooksEnv(self):
13377 """Build hooks env.
13379 This will run on the master, primary node and target node.
13383 "EXPORT_MODE": self.op.mode,
13384 "EXPORT_NODE": self.op.target_node,
13385 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13386 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13387 # TODO: Generic function for boolean env variables
13388 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13391 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13395 def BuildHooksNodes(self):
13396 """Build hooks nodes.
13399 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13401 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13402 nl.append(self.op.target_node)
13406 def CheckPrereq(self):
13407 """Check prerequisites.
13409 This checks that the instance and node names are valid.
13412 instance_name = self.op.instance_name
13414 self.instance = self.cfg.GetInstanceInfo(instance_name)
13415 assert self.instance is not None, \
13416 "Cannot retrieve locked instance %s" % self.op.instance_name
13417 _CheckNodeOnline(self, self.instance.primary_node)
13419 if (self.op.remove_instance and
13420 self.instance.admin_state == constants.ADMINST_UP and
13421 not self.op.shutdown):
13422 raise errors.OpPrereqError("Can not remove instance without shutting it"
13423 " down before", errors.ECODE_STATE)
13425 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13426 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13427 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13428 assert self.dst_node is not None
13430 _CheckNodeOnline(self, self.dst_node.name)
13431 _CheckNodeNotDrained(self, self.dst_node.name)
13434 self.dest_disk_info = None
13435 self.dest_x509_ca = None
13437 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13438 self.dst_node = None
13440 if len(self.op.target_node) != len(self.instance.disks):
13441 raise errors.OpPrereqError(("Received destination information for %s"
13442 " disks, but instance %s has %s disks") %
13443 (len(self.op.target_node), instance_name,
13444 len(self.instance.disks)),
13445 errors.ECODE_INVAL)
13447 cds = _GetClusterDomainSecret()
13449 # Check X509 key name
13451 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13452 except (TypeError, ValueError), err:
13453 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13454 errors.ECODE_INVAL)
13456 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13457 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13458 errors.ECODE_INVAL)
13460 # Load and verify CA
13462 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13463 except OpenSSL.crypto.Error, err:
13464 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13465 (err, ), errors.ECODE_INVAL)
13467 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13468 if errcode is not None:
13469 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13470 (msg, ), errors.ECODE_INVAL)
13472 self.dest_x509_ca = cert
13474 # Verify target information
13476 for idx, disk_data in enumerate(self.op.target_node):
13478 (host, port, magic) = \
13479 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13480 except errors.GenericError, err:
13481 raise errors.OpPrereqError("Target info for disk %s: %s" %
13482 (idx, err), errors.ECODE_INVAL)
13484 disk_info.append((host, port, magic))
13486 assert len(disk_info) == len(self.op.target_node)
13487 self.dest_disk_info = disk_info
13490 raise errors.ProgrammerError("Unhandled export mode %r" %
13493 # instance disk type verification
13494 # TODO: Implement export support for file-based disks
13495 for disk in self.instance.disks:
13496 if disk.dev_type == constants.LD_FILE:
13497 raise errors.OpPrereqError("Export not supported for instances with"
13498 " file-based disks", errors.ECODE_INVAL)
13500 def _CleanupExports(self, feedback_fn):
13501 """Removes exports of current instance from all other nodes.
13503 If an instance in a cluster with nodes A..D was exported to node C, its
13504 exports will be removed from the nodes A, B and D.
13507 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13509 nodelist = self.cfg.GetNodeList()
13510 nodelist.remove(self.dst_node.name)
13512 # on one-node clusters nodelist will be empty after the removal
13513 # if we proceed the backup would be removed because OpBackupQuery
13514 # substitutes an empty list with the full cluster node list.
13515 iname = self.instance.name
13517 feedback_fn("Removing old exports for instance %s" % iname)
13518 exportlist = self.rpc.call_export_list(nodelist)
13519 for node in exportlist:
13520 if exportlist[node].fail_msg:
13522 if iname in exportlist[node].payload:
13523 msg = self.rpc.call_export_remove(node, iname).fail_msg
13525 self.LogWarning("Could not remove older export for instance %s"
13526 " on node %s: %s", iname, node, msg)
13528 def Exec(self, feedback_fn):
13529 """Export an instance to an image in the cluster.
13532 assert self.op.mode in constants.EXPORT_MODES
13534 instance = self.instance
13535 src_node = instance.primary_node
13537 if self.op.shutdown:
13538 # shutdown the instance, but not the disks
13539 feedback_fn("Shutting down instance %s" % instance.name)
13540 result = self.rpc.call_instance_shutdown(src_node, instance,
13541 self.op.shutdown_timeout)
13542 # TODO: Maybe ignore failures if ignore_remove_failures is set
13543 result.Raise("Could not shutdown instance %s on"
13544 " node %s" % (instance.name, src_node))
13546 # set the disks ID correctly since call_instance_start needs the
13547 # correct drbd minor to create the symlinks
13548 for disk in instance.disks:
13549 self.cfg.SetDiskID(disk, src_node)
13551 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13554 # Activate the instance disks if we'exporting a stopped instance
13555 feedback_fn("Activating disks for %s" % instance.name)
13556 _StartInstanceDisks(self, instance, None)
13559 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13562 helper.CreateSnapshots()
13564 if (self.op.shutdown and
13565 instance.admin_state == constants.ADMINST_UP and
13566 not self.op.remove_instance):
13567 assert not activate_disks
13568 feedback_fn("Starting instance %s" % instance.name)
13569 result = self.rpc.call_instance_start(src_node,
13570 (instance, None, None), False)
13571 msg = result.fail_msg
13573 feedback_fn("Failed to start instance: %s" % msg)
13574 _ShutdownInstanceDisks(self, instance)
13575 raise errors.OpExecError("Could not start instance: %s" % msg)
13577 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13578 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13579 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13580 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13581 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13583 (key_name, _, _) = self.x509_key_name
13586 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13589 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13590 key_name, dest_ca_pem,
13595 # Check for backwards compatibility
13596 assert len(dresults) == len(instance.disks)
13597 assert compat.all(isinstance(i, bool) for i in dresults), \
13598 "Not all results are boolean: %r" % dresults
13602 feedback_fn("Deactivating disks for %s" % instance.name)
13603 _ShutdownInstanceDisks(self, instance)
13605 if not (compat.all(dresults) and fin_resu):
13608 failures.append("export finalization")
13609 if not compat.all(dresults):
13610 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13612 failures.append("disk export: disk(s) %s" % fdsk)
13614 raise errors.OpExecError("Export failed, errors in %s" %
13615 utils.CommaJoin(failures))
13617 # At this point, the export was successful, we can cleanup/finish
13619 # Remove instance if requested
13620 if self.op.remove_instance:
13621 feedback_fn("Removing instance %s" % instance.name)
13622 _RemoveInstance(self, feedback_fn, instance,
13623 self.op.ignore_remove_failures)
13625 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13626 self._CleanupExports(feedback_fn)
13628 return fin_resu, dresults
13631 class LUBackupRemove(NoHooksLU):
13632 """Remove exports related to the named instance.
13637 def ExpandNames(self):
13638 self.needed_locks = {}
13639 # We need all nodes to be locked in order for RemoveExport to work, but we
13640 # don't need to lock the instance itself, as nothing will happen to it (and
13641 # we can remove exports also for a removed instance)
13642 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13644 def Exec(self, feedback_fn):
13645 """Remove any export.
13648 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13649 # If the instance was not found we'll try with the name that was passed in.
13650 # This will only work if it was an FQDN, though.
13652 if not instance_name:
13654 instance_name = self.op.instance_name
13656 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13657 exportlist = self.rpc.call_export_list(locked_nodes)
13659 for node in exportlist:
13660 msg = exportlist[node].fail_msg
13662 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13664 if instance_name in exportlist[node].payload:
13666 result = self.rpc.call_export_remove(node, instance_name)
13667 msg = result.fail_msg
13669 logging.error("Could not remove export for instance %s"
13670 " on node %s: %s", instance_name, node, msg)
13672 if fqdn_warn and not found:
13673 feedback_fn("Export not found. If trying to remove an export belonging"
13674 " to a deleted instance please use its Fully Qualified"
13678 class LUGroupAdd(LogicalUnit):
13679 """Logical unit for creating node groups.
13682 HPATH = "group-add"
13683 HTYPE = constants.HTYPE_GROUP
13686 def ExpandNames(self):
13687 # We need the new group's UUID here so that we can create and acquire the
13688 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13689 # that it should not check whether the UUID exists in the configuration.
13690 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13691 self.needed_locks = {}
13692 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13694 def CheckPrereq(self):
13695 """Check prerequisites.
13697 This checks that the given group name is not an existing node group
13702 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13703 except errors.OpPrereqError:
13706 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13707 " node group (UUID: %s)" %
13708 (self.op.group_name, existing_uuid),
13709 errors.ECODE_EXISTS)
13711 if self.op.ndparams:
13712 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13714 if self.op.hv_state:
13715 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13717 self.new_hv_state = None
13719 if self.op.disk_state:
13720 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13722 self.new_disk_state = None
13724 if self.op.diskparams:
13725 for templ in constants.DISK_TEMPLATES:
13726 if templ in self.op.diskparams:
13727 utils.ForceDictType(self.op.diskparams[templ],
13728 constants.DISK_DT_TYPES)
13729 self.new_diskparams = self.op.diskparams
13731 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13732 except errors.OpPrereqError, err:
13733 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13734 errors.ECODE_INVAL)
13736 self.new_diskparams = {}
13738 if self.op.ipolicy:
13739 cluster = self.cfg.GetClusterInfo()
13740 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13742 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13743 except errors.ConfigurationError, err:
13744 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13745 errors.ECODE_INVAL)
13747 def BuildHooksEnv(self):
13748 """Build hooks env.
13752 "GROUP_NAME": self.op.group_name,
13755 def BuildHooksNodes(self):
13756 """Build hooks nodes.
13759 mn = self.cfg.GetMasterNode()
13760 return ([mn], [mn])
13762 def Exec(self, feedback_fn):
13763 """Add the node group to the cluster.
13766 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13767 uuid=self.group_uuid,
13768 alloc_policy=self.op.alloc_policy,
13769 ndparams=self.op.ndparams,
13770 diskparams=self.new_diskparams,
13771 ipolicy=self.op.ipolicy,
13772 hv_state_static=self.new_hv_state,
13773 disk_state_static=self.new_disk_state)
13775 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13776 del self.remove_locks[locking.LEVEL_NODEGROUP]
13779 class LUGroupAssignNodes(NoHooksLU):
13780 """Logical unit for assigning nodes to groups.
13785 def ExpandNames(self):
13786 # These raise errors.OpPrereqError on their own:
13787 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13788 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13790 # We want to lock all the affected nodes and groups. We have readily
13791 # available the list of nodes, and the *destination* group. To gather the
13792 # list of "source" groups, we need to fetch node information later on.
13793 self.needed_locks = {
13794 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13795 locking.LEVEL_NODE: self.op.nodes,
13798 def DeclareLocks(self, level):
13799 if level == locking.LEVEL_NODEGROUP:
13800 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13802 # Try to get all affected nodes' groups without having the group or node
13803 # lock yet. Needs verification later in the code flow.
13804 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13806 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13808 def CheckPrereq(self):
13809 """Check prerequisites.
13812 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13813 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13814 frozenset(self.op.nodes))
13816 expected_locks = (set([self.group_uuid]) |
13817 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13818 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13819 if actual_locks != expected_locks:
13820 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13821 " current groups are '%s', used to be '%s'" %
13822 (utils.CommaJoin(expected_locks),
13823 utils.CommaJoin(actual_locks)))
13825 self.node_data = self.cfg.GetAllNodesInfo()
13826 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13827 instance_data = self.cfg.GetAllInstancesInfo()
13829 if self.group is None:
13830 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13831 (self.op.group_name, self.group_uuid))
13833 (new_splits, previous_splits) = \
13834 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13835 for node in self.op.nodes],
13836 self.node_data, instance_data)
13839 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13841 if not self.op.force:
13842 raise errors.OpExecError("The following instances get split by this"
13843 " change and --force was not given: %s" %
13846 self.LogWarning("This operation will split the following instances: %s",
13849 if previous_splits:
13850 self.LogWarning("In addition, these already-split instances continue"
13851 " to be split across groups: %s",
13852 utils.CommaJoin(utils.NiceSort(previous_splits)))
13854 def Exec(self, feedback_fn):
13855 """Assign nodes to a new group.
13858 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13860 self.cfg.AssignGroupNodes(mods)
13863 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13864 """Check for split instances after a node assignment.
13866 This method considers a series of node assignments as an atomic operation,
13867 and returns information about split instances after applying the set of
13870 In particular, it returns information about newly split instances, and
13871 instances that were already split, and remain so after the change.
13873 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13876 @type changes: list of (node_name, new_group_uuid) pairs.
13877 @param changes: list of node assignments to consider.
13878 @param node_data: a dict with data for all nodes
13879 @param instance_data: a dict with all instances to consider
13880 @rtype: a two-tuple
13881 @return: a list of instances that were previously okay and result split as a
13882 consequence of this change, and a list of instances that were previously
13883 split and this change does not fix.
13886 changed_nodes = dict((node, group) for node, group in changes
13887 if node_data[node].group != group)
13889 all_split_instances = set()
13890 previously_split_instances = set()
13892 def InstanceNodes(instance):
13893 return [instance.primary_node] + list(instance.secondary_nodes)
13895 for inst in instance_data.values():
13896 if inst.disk_template not in constants.DTS_INT_MIRROR:
13899 instance_nodes = InstanceNodes(inst)
13901 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13902 previously_split_instances.add(inst.name)
13904 if len(set(changed_nodes.get(node, node_data[node].group)
13905 for node in instance_nodes)) > 1:
13906 all_split_instances.add(inst.name)
13908 return (list(all_split_instances - previously_split_instances),
13909 list(previously_split_instances & all_split_instances))
13912 class _GroupQuery(_QueryBase):
13913 FIELDS = query.GROUP_FIELDS
13915 def ExpandNames(self, lu):
13916 lu.needed_locks = {}
13918 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13919 self._cluster = lu.cfg.GetClusterInfo()
13920 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13923 self.wanted = [name_to_uuid[name]
13924 for name in utils.NiceSort(name_to_uuid.keys())]
13926 # Accept names to be either names or UUIDs.
13929 all_uuid = frozenset(self._all_groups.keys())
13931 for name in self.names:
13932 if name in all_uuid:
13933 self.wanted.append(name)
13934 elif name in name_to_uuid:
13935 self.wanted.append(name_to_uuid[name])
13937 missing.append(name)
13940 raise errors.OpPrereqError("Some groups do not exist: %s" %
13941 utils.CommaJoin(missing),
13942 errors.ECODE_NOENT)
13944 def DeclareLocks(self, lu, level):
13947 def _GetQueryData(self, lu):
13948 """Computes the list of node groups and their attributes.
13951 do_nodes = query.GQ_NODE in self.requested_data
13952 do_instances = query.GQ_INST in self.requested_data
13954 group_to_nodes = None
13955 group_to_instances = None
13957 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13958 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13959 # latter GetAllInstancesInfo() is not enough, for we have to go through
13960 # instance->node. Hence, we will need to process nodes even if we only need
13961 # instance information.
13962 if do_nodes or do_instances:
13963 all_nodes = lu.cfg.GetAllNodesInfo()
13964 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13967 for node in all_nodes.values():
13968 if node.group in group_to_nodes:
13969 group_to_nodes[node.group].append(node.name)
13970 node_to_group[node.name] = node.group
13973 all_instances = lu.cfg.GetAllInstancesInfo()
13974 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13976 for instance in all_instances.values():
13977 node = instance.primary_node
13978 if node in node_to_group:
13979 group_to_instances[node_to_group[node]].append(instance.name)
13982 # Do not pass on node information if it was not requested.
13983 group_to_nodes = None
13985 return query.GroupQueryData(self._cluster,
13986 [self._all_groups[uuid]
13987 for uuid in self.wanted],
13988 group_to_nodes, group_to_instances,
13989 query.GQ_DISKPARAMS in self.requested_data)
13992 class LUGroupQuery(NoHooksLU):
13993 """Logical unit for querying node groups.
13998 def CheckArguments(self):
13999 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14000 self.op.output_fields, False)
14002 def ExpandNames(self):
14003 self.gq.ExpandNames(self)
14005 def DeclareLocks(self, level):
14006 self.gq.DeclareLocks(self, level)
14008 def Exec(self, feedback_fn):
14009 return self.gq.OldStyleQuery(self)
14012 class LUGroupSetParams(LogicalUnit):
14013 """Modifies the parameters of a node group.
14016 HPATH = "group-modify"
14017 HTYPE = constants.HTYPE_GROUP
14020 def CheckArguments(self):
14023 self.op.diskparams,
14024 self.op.alloc_policy,
14026 self.op.disk_state,
14030 if all_changes.count(None) == len(all_changes):
14031 raise errors.OpPrereqError("Please pass at least one modification",
14032 errors.ECODE_INVAL)
14034 def ExpandNames(self):
14035 # This raises errors.OpPrereqError on its own:
14036 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14038 self.needed_locks = {
14039 locking.LEVEL_INSTANCE: [],
14040 locking.LEVEL_NODEGROUP: [self.group_uuid],
14043 self.share_locks[locking.LEVEL_INSTANCE] = 1
14045 def DeclareLocks(self, level):
14046 if level == locking.LEVEL_INSTANCE:
14047 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14049 # Lock instances optimistically, needs verification once group lock has
14051 self.needed_locks[locking.LEVEL_INSTANCE] = \
14052 self.cfg.GetNodeGroupInstances(self.group_uuid)
14055 def _UpdateAndVerifyDiskParams(old, new):
14056 """Updates and verifies disk parameters.
14059 new_params = _GetUpdatedParams(old, new)
14060 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14063 def CheckPrereq(self):
14064 """Check prerequisites.
14067 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14069 # Check if locked instances are still correct
14070 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14072 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14073 cluster = self.cfg.GetClusterInfo()
14075 if self.group is None:
14076 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14077 (self.op.group_name, self.group_uuid))
14079 if self.op.ndparams:
14080 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14081 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14082 self.new_ndparams = new_ndparams
14084 if self.op.diskparams:
14085 diskparams = self.group.diskparams
14086 uavdp = self._UpdateAndVerifyDiskParams
14087 # For each disktemplate subdict update and verify the values
14088 new_diskparams = dict((dt,
14089 uavdp(diskparams.get(dt, {}),
14090 self.op.diskparams[dt]))
14091 for dt in constants.DISK_TEMPLATES
14092 if dt in self.op.diskparams)
14093 # As we've all subdicts of diskparams ready, lets merge the actual
14094 # dict with all updated subdicts
14095 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14097 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14098 except errors.OpPrereqError, err:
14099 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14100 errors.ECODE_INVAL)
14102 if self.op.hv_state:
14103 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14104 self.group.hv_state_static)
14106 if self.op.disk_state:
14107 self.new_disk_state = \
14108 _MergeAndVerifyDiskState(self.op.disk_state,
14109 self.group.disk_state_static)
14111 if self.op.ipolicy:
14112 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14116 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14117 inst_filter = lambda inst: inst.name in owned_instances
14118 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14119 gmi = ganeti.masterd.instance
14121 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14123 new_ipolicy, instances)
14126 self.LogWarning("After the ipolicy change the following instances"
14127 " violate them: %s",
14128 utils.CommaJoin(violations))
14130 def BuildHooksEnv(self):
14131 """Build hooks env.
14135 "GROUP_NAME": self.op.group_name,
14136 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14139 def BuildHooksNodes(self):
14140 """Build hooks nodes.
14143 mn = self.cfg.GetMasterNode()
14144 return ([mn], [mn])
14146 def Exec(self, feedback_fn):
14147 """Modifies the node group.
14152 if self.op.ndparams:
14153 self.group.ndparams = self.new_ndparams
14154 result.append(("ndparams", str(self.group.ndparams)))
14156 if self.op.diskparams:
14157 self.group.diskparams = self.new_diskparams
14158 result.append(("diskparams", str(self.group.diskparams)))
14160 if self.op.alloc_policy:
14161 self.group.alloc_policy = self.op.alloc_policy
14163 if self.op.hv_state:
14164 self.group.hv_state_static = self.new_hv_state
14166 if self.op.disk_state:
14167 self.group.disk_state_static = self.new_disk_state
14169 if self.op.ipolicy:
14170 self.group.ipolicy = self.new_ipolicy
14172 self.cfg.Update(self.group, feedback_fn)
14176 class LUGroupRemove(LogicalUnit):
14177 HPATH = "group-remove"
14178 HTYPE = constants.HTYPE_GROUP
14181 def ExpandNames(self):
14182 # This will raises errors.OpPrereqError on its own:
14183 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14184 self.needed_locks = {
14185 locking.LEVEL_NODEGROUP: [self.group_uuid],
14188 def CheckPrereq(self):
14189 """Check prerequisites.
14191 This checks that the given group name exists as a node group, that is
14192 empty (i.e., contains no nodes), and that is not the last group of the
14196 # Verify that the group is empty.
14197 group_nodes = [node.name
14198 for node in self.cfg.GetAllNodesInfo().values()
14199 if node.group == self.group_uuid]
14202 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14204 (self.op.group_name,
14205 utils.CommaJoin(utils.NiceSort(group_nodes))),
14206 errors.ECODE_STATE)
14208 # Verify the cluster would not be left group-less.
14209 if len(self.cfg.GetNodeGroupList()) == 1:
14210 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14211 " removed" % self.op.group_name,
14212 errors.ECODE_STATE)
14214 def BuildHooksEnv(self):
14215 """Build hooks env.
14219 "GROUP_NAME": self.op.group_name,
14222 def BuildHooksNodes(self):
14223 """Build hooks nodes.
14226 mn = self.cfg.GetMasterNode()
14227 return ([mn], [mn])
14229 def Exec(self, feedback_fn):
14230 """Remove the node group.
14234 self.cfg.RemoveNodeGroup(self.group_uuid)
14235 except errors.ConfigurationError:
14236 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14237 (self.op.group_name, self.group_uuid))
14239 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14242 class LUGroupRename(LogicalUnit):
14243 HPATH = "group-rename"
14244 HTYPE = constants.HTYPE_GROUP
14247 def ExpandNames(self):
14248 # This raises errors.OpPrereqError on its own:
14249 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14251 self.needed_locks = {
14252 locking.LEVEL_NODEGROUP: [self.group_uuid],
14255 def CheckPrereq(self):
14256 """Check prerequisites.
14258 Ensures requested new name is not yet used.
14262 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14263 except errors.OpPrereqError:
14266 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14267 " node group (UUID: %s)" %
14268 (self.op.new_name, new_name_uuid),
14269 errors.ECODE_EXISTS)
14271 def BuildHooksEnv(self):
14272 """Build hooks env.
14276 "OLD_NAME": self.op.group_name,
14277 "NEW_NAME": self.op.new_name,
14280 def BuildHooksNodes(self):
14281 """Build hooks nodes.
14284 mn = self.cfg.GetMasterNode()
14286 all_nodes = self.cfg.GetAllNodesInfo()
14287 all_nodes.pop(mn, None)
14290 run_nodes.extend(node.name for node in all_nodes.values()
14291 if node.group == self.group_uuid)
14293 return (run_nodes, run_nodes)
14295 def Exec(self, feedback_fn):
14296 """Rename the node group.
14299 group = self.cfg.GetNodeGroup(self.group_uuid)
14302 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14303 (self.op.group_name, self.group_uuid))
14305 group.name = self.op.new_name
14306 self.cfg.Update(group, feedback_fn)
14308 return self.op.new_name
14311 class LUGroupEvacuate(LogicalUnit):
14312 HPATH = "group-evacuate"
14313 HTYPE = constants.HTYPE_GROUP
14316 def ExpandNames(self):
14317 # This raises errors.OpPrereqError on its own:
14318 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14320 if self.op.target_groups:
14321 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14322 self.op.target_groups)
14324 self.req_target_uuids = []
14326 if self.group_uuid in self.req_target_uuids:
14327 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14328 " as a target group (targets are %s)" %
14330 utils.CommaJoin(self.req_target_uuids)),
14331 errors.ECODE_INVAL)
14333 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14335 self.share_locks = _ShareAll()
14336 self.needed_locks = {
14337 locking.LEVEL_INSTANCE: [],
14338 locking.LEVEL_NODEGROUP: [],
14339 locking.LEVEL_NODE: [],
14342 def DeclareLocks(self, level):
14343 if level == locking.LEVEL_INSTANCE:
14344 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14346 # Lock instances optimistically, needs verification once node and group
14347 # locks have been acquired
14348 self.needed_locks[locking.LEVEL_INSTANCE] = \
14349 self.cfg.GetNodeGroupInstances(self.group_uuid)
14351 elif level == locking.LEVEL_NODEGROUP:
14352 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14354 if self.req_target_uuids:
14355 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14357 # Lock all groups used by instances optimistically; this requires going
14358 # via the node before it's locked, requiring verification later on
14359 lock_groups.update(group_uuid
14360 for instance_name in
14361 self.owned_locks(locking.LEVEL_INSTANCE)
14363 self.cfg.GetInstanceNodeGroups(instance_name))
14365 # No target groups, need to lock all of them
14366 lock_groups = locking.ALL_SET
14368 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14370 elif level == locking.LEVEL_NODE:
14371 # This will only lock the nodes in the group to be evacuated which
14372 # contain actual instances
14373 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14374 self._LockInstancesNodes()
14376 # Lock all nodes in group to be evacuated and target groups
14377 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14378 assert self.group_uuid in owned_groups
14379 member_nodes = [node_name
14380 for group in owned_groups
14381 for node_name in self.cfg.GetNodeGroup(group).members]
14382 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14384 def CheckPrereq(self):
14385 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14386 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14387 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14389 assert owned_groups.issuperset(self.req_target_uuids)
14390 assert self.group_uuid in owned_groups
14392 # Check if locked instances are still correct
14393 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14395 # Get instance information
14396 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14398 # Check if node groups for locked instances are still correct
14399 _CheckInstancesNodeGroups(self.cfg, self.instances,
14400 owned_groups, owned_nodes, self.group_uuid)
14402 if self.req_target_uuids:
14403 # User requested specific target groups
14404 self.target_uuids = self.req_target_uuids
14406 # All groups except the one to be evacuated are potential targets
14407 self.target_uuids = [group_uuid for group_uuid in owned_groups
14408 if group_uuid != self.group_uuid]
14410 if not self.target_uuids:
14411 raise errors.OpPrereqError("There are no possible target groups",
14412 errors.ECODE_INVAL)
14414 def BuildHooksEnv(self):
14415 """Build hooks env.
14419 "GROUP_NAME": self.op.group_name,
14420 "TARGET_GROUPS": " ".join(self.target_uuids),
14423 def BuildHooksNodes(self):
14424 """Build hooks nodes.
14427 mn = self.cfg.GetMasterNode()
14429 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14431 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14433 return (run_nodes, run_nodes)
14435 def Exec(self, feedback_fn):
14436 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14438 assert self.group_uuid not in self.target_uuids
14440 req = iallocator.IAReqGroupChange(instances=instances,
14441 target_groups=self.target_uuids)
14442 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14444 ial.Run(self.op.iallocator)
14446 if not ial.success:
14447 raise errors.OpPrereqError("Can't compute group evacuation using"
14448 " iallocator '%s': %s" %
14449 (self.op.iallocator, ial.info),
14450 errors.ECODE_NORES)
14452 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14454 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14455 len(jobs), self.op.group_name)
14457 return ResultWithJobs(jobs)
14460 class TagsLU(NoHooksLU): # pylint: disable=W0223
14461 """Generic tags LU.
14463 This is an abstract class which is the parent of all the other tags LUs.
14466 def ExpandNames(self):
14467 self.group_uuid = None
14468 self.needed_locks = {}
14470 if self.op.kind == constants.TAG_NODE:
14471 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14472 lock_level = locking.LEVEL_NODE
14473 lock_name = self.op.name
14474 elif self.op.kind == constants.TAG_INSTANCE:
14475 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14476 lock_level = locking.LEVEL_INSTANCE
14477 lock_name = self.op.name
14478 elif self.op.kind == constants.TAG_NODEGROUP:
14479 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14480 lock_level = locking.LEVEL_NODEGROUP
14481 lock_name = self.group_uuid
14486 if lock_level and getattr(self.op, "use_locking", True):
14487 self.needed_locks[lock_level] = lock_name
14489 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14490 # not possible to acquire the BGL based on opcode parameters)
14492 def CheckPrereq(self):
14493 """Check prerequisites.
14496 if self.op.kind == constants.TAG_CLUSTER:
14497 self.target = self.cfg.GetClusterInfo()
14498 elif self.op.kind == constants.TAG_NODE:
14499 self.target = self.cfg.GetNodeInfo(self.op.name)
14500 elif self.op.kind == constants.TAG_INSTANCE:
14501 self.target = self.cfg.GetInstanceInfo(self.op.name)
14502 elif self.op.kind == constants.TAG_NODEGROUP:
14503 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14505 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14506 str(self.op.kind), errors.ECODE_INVAL)
14509 class LUTagsGet(TagsLU):
14510 """Returns the tags of a given object.
14515 def ExpandNames(self):
14516 TagsLU.ExpandNames(self)
14518 # Share locks as this is only a read operation
14519 self.share_locks = _ShareAll()
14521 def Exec(self, feedback_fn):
14522 """Returns the tag list.
14525 return list(self.target.GetTags())
14528 class LUTagsSearch(NoHooksLU):
14529 """Searches the tags for a given pattern.
14534 def ExpandNames(self):
14535 self.needed_locks = {}
14537 def CheckPrereq(self):
14538 """Check prerequisites.
14540 This checks the pattern passed for validity by compiling it.
14544 self.re = re.compile(self.op.pattern)
14545 except re.error, err:
14546 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14547 (self.op.pattern, err), errors.ECODE_INVAL)
14549 def Exec(self, feedback_fn):
14550 """Returns the tag list.
14554 tgts = [("/cluster", cfg.GetClusterInfo())]
14555 ilist = cfg.GetAllInstancesInfo().values()
14556 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14557 nlist = cfg.GetAllNodesInfo().values()
14558 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14559 tgts.extend(("/nodegroup/%s" % n.name, n)
14560 for n in cfg.GetAllNodeGroupsInfo().values())
14562 for path, target in tgts:
14563 for tag in target.GetTags():
14564 if self.re.search(tag):
14565 results.append((path, tag))
14569 class LUTagsSet(TagsLU):
14570 """Sets a tag on a given object.
14575 def CheckPrereq(self):
14576 """Check prerequisites.
14578 This checks the type and length of the tag name and value.
14581 TagsLU.CheckPrereq(self)
14582 for tag in self.op.tags:
14583 objects.TaggableObject.ValidateTag(tag)
14585 def Exec(self, feedback_fn):
14590 for tag in self.op.tags:
14591 self.target.AddTag(tag)
14592 except errors.TagError, err:
14593 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14594 self.cfg.Update(self.target, feedback_fn)
14597 class LUTagsDel(TagsLU):
14598 """Delete a list of tags from a given object.
14603 def CheckPrereq(self):
14604 """Check prerequisites.
14606 This checks that we have the given tag.
14609 TagsLU.CheckPrereq(self)
14610 for tag in self.op.tags:
14611 objects.TaggableObject.ValidateTag(tag)
14612 del_tags = frozenset(self.op.tags)
14613 cur_tags = self.target.GetTags()
14615 diff_tags = del_tags - cur_tags
14617 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14618 raise errors.OpPrereqError("Tag(s) %s not found" %
14619 (utils.CommaJoin(diff_names), ),
14620 errors.ECODE_NOENT)
14622 def Exec(self, feedback_fn):
14623 """Remove the tag from the object.
14626 for tag in self.op.tags:
14627 self.target.RemoveTag(tag)
14628 self.cfg.Update(self.target, feedback_fn)
14631 class LUTestDelay(NoHooksLU):
14632 """Sleep for a specified amount of time.
14634 This LU sleeps on the master and/or nodes for a specified amount of
14640 def ExpandNames(self):
14641 """Expand names and set required locks.
14643 This expands the node list, if any.
14646 self.needed_locks = {}
14647 if self.op.on_nodes:
14648 # _GetWantedNodes can be used here, but is not always appropriate to use
14649 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14650 # more information.
14651 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14652 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14654 def _TestDelay(self):
14655 """Do the actual sleep.
14658 if self.op.on_master:
14659 if not utils.TestDelay(self.op.duration):
14660 raise errors.OpExecError("Error during master delay test")
14661 if self.op.on_nodes:
14662 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14663 for node, node_result in result.items():
14664 node_result.Raise("Failure during rpc call to node %s" % node)
14666 def Exec(self, feedback_fn):
14667 """Execute the test delay opcode, with the wanted repetitions.
14670 if self.op.repeat == 0:
14673 top_value = self.op.repeat - 1
14674 for i in range(self.op.repeat):
14675 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14679 class LUTestJqueue(NoHooksLU):
14680 """Utility LU to test some aspects of the job queue.
14685 # Must be lower than default timeout for WaitForJobChange to see whether it
14686 # notices changed jobs
14687 _CLIENT_CONNECT_TIMEOUT = 20.0
14688 _CLIENT_CONFIRM_TIMEOUT = 60.0
14691 def _NotifyUsingSocket(cls, cb, errcls):
14692 """Opens a Unix socket and waits for another program to connect.
14695 @param cb: Callback to send socket name to client
14696 @type errcls: class
14697 @param errcls: Exception class to use for errors
14700 # Using a temporary directory as there's no easy way to create temporary
14701 # sockets without writing a custom loop around tempfile.mktemp and
14703 tmpdir = tempfile.mkdtemp()
14705 tmpsock = utils.PathJoin(tmpdir, "sock")
14707 logging.debug("Creating temporary socket at %s", tmpsock)
14708 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14713 # Send details to client
14716 # Wait for client to connect before continuing
14717 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14719 (conn, _) = sock.accept()
14720 except socket.error, err:
14721 raise errcls("Client didn't connect in time (%s)" % err)
14725 # Remove as soon as client is connected
14726 shutil.rmtree(tmpdir)
14728 # Wait for client to close
14731 # pylint: disable=E1101
14732 # Instance of '_socketobject' has no ... member
14733 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14735 except socket.error, err:
14736 raise errcls("Client failed to confirm notification (%s)" % err)
14740 def _SendNotification(self, test, arg, sockname):
14741 """Sends a notification to the client.
14744 @param test: Test name
14745 @param arg: Test argument (depends on test)
14746 @type sockname: string
14747 @param sockname: Socket path
14750 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14752 def _Notify(self, prereq, test, arg):
14753 """Notifies the client of a test.
14756 @param prereq: Whether this is a prereq-phase test
14758 @param test: Test name
14759 @param arg: Test argument (depends on test)
14763 errcls = errors.OpPrereqError
14765 errcls = errors.OpExecError
14767 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14771 def CheckArguments(self):
14772 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14773 self.expandnames_calls = 0
14775 def ExpandNames(self):
14776 checkargs_calls = getattr(self, "checkargs_calls", 0)
14777 if checkargs_calls < 1:
14778 raise errors.ProgrammerError("CheckArguments was not called")
14780 self.expandnames_calls += 1
14782 if self.op.notify_waitlock:
14783 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14785 self.LogInfo("Expanding names")
14787 # Get lock on master node (just to get a lock, not for a particular reason)
14788 self.needed_locks = {
14789 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14792 def Exec(self, feedback_fn):
14793 if self.expandnames_calls < 1:
14794 raise errors.ProgrammerError("ExpandNames was not called")
14796 if self.op.notify_exec:
14797 self._Notify(False, constants.JQT_EXEC, None)
14799 self.LogInfo("Executing")
14801 if self.op.log_messages:
14802 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14803 for idx, msg in enumerate(self.op.log_messages):
14804 self.LogInfo("Sending log message %s", idx + 1)
14805 feedback_fn(constants.JQT_MSGPREFIX + msg)
14806 # Report how many test messages have been sent
14807 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14810 raise errors.OpExecError("Opcode failure was requested")
14815 class LUTestAllocator(NoHooksLU):
14816 """Run allocator tests.
14818 This LU runs the allocator tests
14821 def CheckPrereq(self):
14822 """Check prerequisites.
14824 This checks the opcode parameters depending on the director and mode test.
14827 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
14828 constants.IALLOCATOR_MODE_MULTI_ALLOC):
14829 for attr in ["memory", "disks", "disk_template",
14830 "os", "tags", "nics", "vcpus"]:
14831 if not hasattr(self.op, attr):
14832 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14833 attr, errors.ECODE_INVAL)
14834 iname = self.cfg.ExpandInstanceName(self.op.name)
14835 if iname is not None:
14836 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14837 iname, errors.ECODE_EXISTS)
14838 if not isinstance(self.op.nics, list):
14839 raise errors.OpPrereqError("Invalid parameter 'nics'",
14840 errors.ECODE_INVAL)
14841 if not isinstance(self.op.disks, list):
14842 raise errors.OpPrereqError("Invalid parameter 'disks'",
14843 errors.ECODE_INVAL)
14844 for row in self.op.disks:
14845 if (not isinstance(row, dict) or
14846 constants.IDISK_SIZE not in row or
14847 not isinstance(row[constants.IDISK_SIZE], int) or
14848 constants.IDISK_MODE not in row or
14849 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14850 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14851 " parameter", errors.ECODE_INVAL)
14852 if self.op.hypervisor is None:
14853 self.op.hypervisor = self.cfg.GetHypervisorType()
14854 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14855 fname = _ExpandInstanceName(self.cfg, self.op.name)
14856 self.op.name = fname
14857 self.relocate_from = \
14858 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14859 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14860 constants.IALLOCATOR_MODE_NODE_EVAC):
14861 if not self.op.instances:
14862 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14863 self.op.instances = _GetWantedInstances(self, self.op.instances)
14865 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14866 self.op.mode, errors.ECODE_INVAL)
14868 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14869 if self.op.allocator is None:
14870 raise errors.OpPrereqError("Missing allocator name",
14871 errors.ECODE_INVAL)
14872 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14873 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14874 self.op.direction, errors.ECODE_INVAL)
14876 def Exec(self, feedback_fn):
14877 """Run the allocator test.
14880 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14881 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
14882 memory=self.op.memory,
14883 disks=self.op.disks,
14884 disk_template=self.op.disk_template,
14888 vcpus=self.op.vcpus,
14889 spindle_use=self.op.spindle_use,
14890 hypervisor=self.op.hypervisor)
14891 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14892 req = iallocator.IAReqRelocate(name=self.op.name,
14893 relocate_from=list(self.relocate_from))
14894 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14895 req = iallocator.IAReqGroupChange(instances=self.op.instances,
14896 target_groups=self.op.target_groups)
14897 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14898 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
14899 evac_mode=self.op.evac_mode)
14900 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
14901 disk_template = self.op.disk_template
14902 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
14903 memory=self.op.memory,
14904 disks=self.op.disks,
14905 disk_template=disk_template,
14909 vcpus=self.op.vcpus,
14910 spindle_use=self.op.spindle_use,
14911 hypervisor=self.op.hypervisor)
14912 for idx in range(self.op.count)]
14913 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
14915 raise errors.ProgrammerError("Uncatched mode %s in"
14916 " LUTestAllocator.Exec", self.op.mode)
14918 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14919 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14920 result = ial.in_text
14922 ial.Run(self.op.allocator, validate=False)
14923 result = ial.out_text
14927 #: Query type implementations
14929 constants.QR_CLUSTER: _ClusterQuery,
14930 constants.QR_INSTANCE: _InstanceQuery,
14931 constants.QR_NODE: _NodeQuery,
14932 constants.QR_GROUP: _GroupQuery,
14933 constants.QR_OS: _OsQuery,
14934 constants.QR_EXPORT: _ExportQuery,
14937 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14940 def _GetQueryImplementation(name):
14941 """Returns the implemtnation for a query type.
14943 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14947 return _QUERY_IMPL[name]
14949 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14950 errors.ECODE_INVAL)