4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
604 """Checks if node groups for locked instances are still correct.
606 @type cfg: L{config.ConfigWriter}
607 @param cfg: Cluster configuration
608 @type instances: dict; string as key, L{objects.Instance} as value
609 @param instances: Dictionary, instance name as key, instance object as value
610 @type owned_groups: iterable of string
611 @param owned_groups: List of owned groups
612 @type owned_nodes: iterable of string
613 @param owned_nodes: List of owned nodes
614 @type cur_group_uuid: string or None
615 @param cur_group_uuid: Optional group UUID to check against instance's groups
618 for (name, inst) in instances.items():
619 assert owned_nodes.issuperset(inst.all_nodes), \
620 "Instance %s's nodes changed while we kept the lock" % name
622 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
624 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
625 "Instance %s has no node in group %s" % (name, cur_group_uuid)
628 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
629 """Checks if the owned node groups are still correct for an instance.
631 @type cfg: L{config.ConfigWriter}
632 @param cfg: The cluster configuration
633 @type instance_name: string
634 @param instance_name: Instance name
635 @type owned_groups: set or frozenset
636 @param owned_groups: List of currently owned node groups
639 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
641 if not owned_groups.issuperset(inst_groups):
642 raise errors.OpPrereqError("Instance %s's node groups changed since"
643 " locks were acquired, current groups are"
644 " are '%s', owning groups '%s'; retry the"
647 utils.CommaJoin(inst_groups),
648 utils.CommaJoin(owned_groups)),
654 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
655 """Checks if the instances in a node group are still correct.
657 @type cfg: L{config.ConfigWriter}
658 @param cfg: The cluster configuration
659 @type group_uuid: string
660 @param group_uuid: Node group UUID
661 @type owned_instances: set or frozenset
662 @param owned_instances: List of currently owned instances
665 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
666 if owned_instances != wanted_instances:
667 raise errors.OpPrereqError("Instances in node group '%s' changed since"
668 " locks were acquired, wanted '%s', have '%s';"
669 " retry the operation" %
671 utils.CommaJoin(wanted_instances),
672 utils.CommaJoin(owned_instances)),
675 return wanted_instances
678 def _SupportsOob(cfg, node):
679 """Tells if node supports OOB.
681 @type cfg: L{config.ConfigWriter}
682 @param cfg: The cluster configuration
683 @type node: L{objects.Node}
684 @param node: The node
685 @return: The OOB script if supported or an empty string otherwise
688 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
691 def _GetWantedNodes(lu, nodes):
692 """Returns list of checked and expanded node names.
694 @type lu: L{LogicalUnit}
695 @param lu: the logical unit on whose behalf we execute
697 @param nodes: list of node names or None for all nodes
699 @return: the list of nodes, sorted
700 @raise errors.ProgrammerError: if the nodes parameter is wrong type
704 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
706 return utils.NiceSort(lu.cfg.GetNodeList())
709 def _GetWantedInstances(lu, instances):
710 """Returns list of checked and expanded instance names.
712 @type lu: L{LogicalUnit}
713 @param lu: the logical unit on whose behalf we execute
714 @type instances: list
715 @param instances: list of instance names or None for all instances
717 @return: the list of instances, sorted
718 @raise errors.OpPrereqError: if the instances parameter is wrong type
719 @raise errors.OpPrereqError: if any of the passed instances is not found
723 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
725 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
729 def _GetUpdatedParams(old_params, update_dict,
730 use_default=True, use_none=False):
731 """Return the new version of a parameter dictionary.
733 @type old_params: dict
734 @param old_params: old parameters
735 @type update_dict: dict
736 @param update_dict: dict containing new parameter values, or
737 constants.VALUE_DEFAULT to reset the parameter to its default
739 @param use_default: boolean
740 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
741 values as 'to be deleted' values
742 @param use_none: boolean
743 @type use_none: whether to recognise C{None} values as 'to be
746 @return: the new parameter dictionary
749 params_copy = copy.deepcopy(old_params)
750 for key, val in update_dict.iteritems():
751 if ((use_default and val == constants.VALUE_DEFAULT) or
752 (use_none and val is None)):
758 params_copy[key] = val
762 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
763 """Return the new version of a instance policy.
765 @param group_policy: whether this policy applies to a group and thus
766 we should support removal of policy entries
769 use_none = use_default = group_policy
770 ipolicy = copy.deepcopy(old_ipolicy)
771 for key, value in new_ipolicy.items():
772 if key not in constants.IPOLICY_ALL_KEYS:
773 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
775 if key in constants.IPOLICY_ISPECS:
776 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
777 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
779 use_default=use_default)
781 if not value or value == [constants.VALUE_DEFAULT]:
785 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
786 " on the cluster'" % key,
789 if key in constants.IPOLICY_PARAMETERS:
790 # FIXME: we assume all such values are float
792 ipolicy[key] = float(value)
793 except (TypeError, ValueError), err:
794 raise errors.OpPrereqError("Invalid value for attribute"
795 " '%s': '%s', error: %s" %
796 (key, value, err), errors.ECODE_INVAL)
798 # FIXME: we assume all others are lists; this should be redone
800 ipolicy[key] = list(value)
802 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
803 except errors.ConfigurationError, err:
804 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
809 def _UpdateAndVerifySubDict(base, updates, type_check):
810 """Updates and verifies a dict with sub dicts of the same type.
812 @param base: The dict with the old data
813 @param updates: The dict with the new data
814 @param type_check: Dict suitable to ForceDictType to verify correct types
815 @returns: A new dict with updated and verified values
819 new = _GetUpdatedParams(old, value)
820 utils.ForceDictType(new, type_check)
823 ret = copy.deepcopy(base)
824 ret.update(dict((key, fn(base.get(key, {}), value))
825 for key, value in updates.items()))
829 def _MergeAndVerifyHvState(op_input, obj_input):
830 """Combines the hv state from an opcode with the one of the object
832 @param op_input: The input dict from the opcode
833 @param obj_input: The input dict from the objects
834 @return: The verified and updated dict
838 invalid_hvs = set(op_input) - constants.HYPER_TYPES
840 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
841 " %s" % utils.CommaJoin(invalid_hvs),
843 if obj_input is None:
845 type_check = constants.HVSTS_PARAMETER_TYPES
846 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
851 def _MergeAndVerifyDiskState(op_input, obj_input):
852 """Combines the disk state from an opcode with the one of the object
854 @param op_input: The input dict from the opcode
855 @param obj_input: The input dict from the objects
856 @return: The verified and updated dict
859 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
861 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
862 utils.CommaJoin(invalid_dst),
864 type_check = constants.DSS_PARAMETER_TYPES
865 if obj_input is None:
867 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
869 for key, value in op_input.items())
874 def _ReleaseLocks(lu, level, names=None, keep=None):
875 """Releases locks owned by an LU.
877 @type lu: L{LogicalUnit}
878 @param level: Lock level
879 @type names: list or None
880 @param names: Names of locks to release
881 @type keep: list or None
882 @param keep: Names of locks to retain
885 assert not (keep is not None and names is not None), \
886 "Only one of the 'names' and the 'keep' parameters can be given"
888 if names is not None:
889 should_release = names.__contains__
891 should_release = lambda name: name not in keep
893 should_release = None
895 owned = lu.owned_locks(level)
897 # Not owning any lock at this level, do nothing
904 # Determine which locks to release
906 if should_release(name):
911 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
913 # Release just some locks
914 lu.glm.release(level, names=release)
916 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
919 lu.glm.release(level)
921 assert not lu.glm.is_owned(level), "No locks should be owned"
924 def _MapInstanceDisksToNodes(instances):
925 """Creates a map from (node, volume) to instance name.
927 @type instances: list of L{objects.Instance}
928 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
931 return dict(((node, vol), inst.name)
932 for inst in instances
933 for (node, vols) in inst.MapLVsByNode().items()
937 def _RunPostHook(lu, node_name):
938 """Runs the post-hook for an opcode on a single node.
941 hm = lu.proc.BuildHooksManager(lu)
943 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
945 # pylint: disable=W0702
946 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
949 def _CheckOutputFields(static, dynamic, selected):
950 """Checks whether all selected fields are valid.
952 @type static: L{utils.FieldSet}
953 @param static: static fields set
954 @type dynamic: L{utils.FieldSet}
955 @param dynamic: dynamic fields set
962 delta = f.NonMatching(selected)
964 raise errors.OpPrereqError("Unknown output fields selected: %s"
965 % ",".join(delta), errors.ECODE_INVAL)
968 def _CheckGlobalHvParams(params):
969 """Validates that given hypervisor params are not global ones.
971 This will ensure that instances don't get customised versions of
975 used_globals = constants.HVC_GLOBALS.intersection(params)
977 msg = ("The following hypervisor parameters are global and cannot"
978 " be customized at instance level, please modify them at"
979 " cluster level: %s" % utils.CommaJoin(used_globals))
980 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
983 def _CheckNodeOnline(lu, node, msg=None):
984 """Ensure that a given node is online.
986 @param lu: the LU on behalf of which we make the check
987 @param node: the node to check
988 @param msg: if passed, should be a message to replace the default one
989 @raise errors.OpPrereqError: if the node is offline
993 msg = "Can't use offline node"
994 if lu.cfg.GetNodeInfo(node).offline:
995 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
998 def _CheckNodeNotDrained(lu, node):
999 """Ensure that a given node is not drained.
1001 @param lu: the LU on behalf of which we make the check
1002 @param node: the node to check
1003 @raise errors.OpPrereqError: if the node is drained
1006 if lu.cfg.GetNodeInfo(node).drained:
1007 raise errors.OpPrereqError("Can't use drained node %s" % node,
1011 def _CheckNodeVmCapable(lu, node):
1012 """Ensure that a given node is vm capable.
1014 @param lu: the LU on behalf of which we make the check
1015 @param node: the node to check
1016 @raise errors.OpPrereqError: if the node is not vm capable
1019 if not lu.cfg.GetNodeInfo(node).vm_capable:
1020 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1024 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1025 """Ensure that a node supports a given OS.
1027 @param lu: the LU on behalf of which we make the check
1028 @param node: the node to check
1029 @param os_name: the OS to query about
1030 @param force_variant: whether to ignore variant errors
1031 @raise errors.OpPrereqError: if the node is not supporting the OS
1034 result = lu.rpc.call_os_get(node, os_name)
1035 result.Raise("OS '%s' not in supported OS list for node %s" %
1037 prereq=True, ecode=errors.ECODE_INVAL)
1038 if not force_variant:
1039 _CheckOSVariant(result.payload, os_name)
1042 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1043 """Ensure that a node has the given secondary ip.
1045 @type lu: L{LogicalUnit}
1046 @param lu: the LU on behalf of which we make the check
1048 @param node: the node to check
1049 @type secondary_ip: string
1050 @param secondary_ip: the ip to check
1051 @type prereq: boolean
1052 @param prereq: whether to throw a prerequisite or an execute error
1053 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1054 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1057 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1058 result.Raise("Failure checking secondary ip on node %s" % node,
1059 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1060 if not result.payload:
1061 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1062 " please fix and re-run this command" % secondary_ip)
1064 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1066 raise errors.OpExecError(msg)
1069 def _GetClusterDomainSecret():
1070 """Reads the cluster domain secret.
1073 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1077 def _CheckInstanceState(lu, instance, req_states, msg=None):
1078 """Ensure that an instance is in one of the required states.
1080 @param lu: the LU on behalf of which we make the check
1081 @param instance: the instance to check
1082 @param msg: if passed, should be a message to replace the default one
1083 @raise errors.OpPrereqError: if the instance is not in the required state
1087 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1088 if instance.admin_state not in req_states:
1089 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1090 (instance.name, instance.admin_state, msg),
1093 if constants.ADMINST_UP not in req_states:
1094 pnode = instance.primary_node
1095 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1096 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1097 prereq=True, ecode=errors.ECODE_ENVIRON)
1099 if instance.name in ins_l.payload:
1100 raise errors.OpPrereqError("Instance %s is running, %s" %
1101 (instance.name, msg), errors.ECODE_STATE)
1104 def _ComputeMinMaxSpec(name, ipolicy, value):
1105 """Computes if value is in the desired range.
1107 @param name: name of the parameter for which we perform the check
1108 @param ipolicy: dictionary containing min, max and std values
1109 @param value: actual value that we want to use
1110 @return: None or element not meeting the criteria
1114 if value in [None, constants.VALUE_AUTO]:
1116 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1117 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1118 if value > max_v or min_v > value:
1119 return ("%s value %s is not in range [%s, %s]" %
1120 (name, value, min_v, max_v))
1124 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1125 nic_count, disk_sizes, spindle_use,
1126 _compute_fn=_ComputeMinMaxSpec):
1127 """Verifies ipolicy against provided specs.
1130 @param ipolicy: The ipolicy
1132 @param mem_size: The memory size
1133 @type cpu_count: int
1134 @param cpu_count: Used cpu cores
1135 @type disk_count: int
1136 @param disk_count: Number of disks used
1137 @type nic_count: int
1138 @param nic_count: Number of nics used
1139 @type disk_sizes: list of ints
1140 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1141 @type spindle_use: int
1142 @param spindle_use: The number of spindles this instance uses
1143 @param _compute_fn: The compute function (unittest only)
1144 @return: A list of violations, or an empty list of no violations are found
1147 assert disk_count == len(disk_sizes)
1150 (constants.ISPEC_MEM_SIZE, mem_size),
1151 (constants.ISPEC_CPU_COUNT, cpu_count),
1152 (constants.ISPEC_DISK_COUNT, disk_count),
1153 (constants.ISPEC_NIC_COUNT, nic_count),
1154 (constants.ISPEC_SPINDLE_USE, spindle_use),
1155 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1158 (_compute_fn(name, ipolicy, value)
1159 for (name, value) in test_settings))
1162 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1163 _compute_fn=_ComputeIPolicySpecViolation):
1164 """Compute if instance meets the specs of ipolicy.
1167 @param ipolicy: The ipolicy to verify against
1168 @type instance: L{objects.Instance}
1169 @param instance: The instance to verify
1170 @param _compute_fn: The function to verify ipolicy (unittest only)
1171 @see: L{_ComputeIPolicySpecViolation}
1174 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1175 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1176 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1177 disk_count = len(instance.disks)
1178 disk_sizes = [disk.size for disk in instance.disks]
1179 nic_count = len(instance.nics)
1181 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1182 disk_sizes, spindle_use)
1185 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1186 _compute_fn=_ComputeIPolicySpecViolation):
1187 """Compute if instance specs meets the specs of ipolicy.
1190 @param ipolicy: The ipolicy to verify against
1191 @param instance_spec: dict
1192 @param instance_spec: The instance spec to verify
1193 @param _compute_fn: The function to verify ipolicy (unittest only)
1194 @see: L{_ComputeIPolicySpecViolation}
1197 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1198 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1199 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1200 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1201 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1202 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1204 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205 disk_sizes, spindle_use)
1208 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1210 _compute_fn=_ComputeIPolicyInstanceViolation):
1211 """Compute if instance meets the specs of the new target group.
1213 @param ipolicy: The ipolicy to verify
1214 @param instance: The instance object to verify
1215 @param current_group: The current group of the instance
1216 @param target_group: The new group of the instance
1217 @param _compute_fn: The function to verify ipolicy (unittest only)
1218 @see: L{_ComputeIPolicySpecViolation}
1221 if current_group == target_group:
1224 return _compute_fn(ipolicy, instance)
1227 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1228 _compute_fn=_ComputeIPolicyNodeViolation):
1229 """Checks that the target node is correct in terms of instance policy.
1231 @param ipolicy: The ipolicy to verify
1232 @param instance: The instance object to verify
1233 @param node: The new node to relocate
1234 @param ignore: Ignore violations of the ipolicy
1235 @param _compute_fn: The function to verify ipolicy (unittest only)
1236 @see: L{_ComputeIPolicySpecViolation}
1239 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1240 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1243 msg = ("Instance does not meet target node group's (%s) instance"
1244 " policy: %s") % (node.group, utils.CommaJoin(res))
1248 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1251 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1252 """Computes a set of any instances that would violate the new ipolicy.
1254 @param old_ipolicy: The current (still in-place) ipolicy
1255 @param new_ipolicy: The new (to become) ipolicy
1256 @param instances: List of instances to verify
1257 @return: A list of instances which violates the new ipolicy but did not before
1260 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1261 _ComputeViolatingInstances(new_ipolicy, instances))
1264 def _ExpandItemName(fn, name, kind):
1265 """Expand an item name.
1267 @param fn: the function to use for expansion
1268 @param name: requested item name
1269 @param kind: text description ('Node' or 'Instance')
1270 @return: the resolved (full) name
1271 @raise errors.OpPrereqError: if the item is not found
1274 full_name = fn(name)
1275 if full_name is None:
1276 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1281 def _ExpandNodeName(cfg, name):
1282 """Wrapper over L{_ExpandItemName} for nodes."""
1283 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1286 def _ExpandInstanceName(cfg, name):
1287 """Wrapper over L{_ExpandItemName} for instance."""
1288 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1291 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1292 minmem, maxmem, vcpus, nics, disk_template, disks,
1293 bep, hvp, hypervisor_name, tags):
1294 """Builds instance related env variables for hooks
1296 This builds the hook environment from individual variables.
1299 @param name: the name of the instance
1300 @type primary_node: string
1301 @param primary_node: the name of the instance's primary node
1302 @type secondary_nodes: list
1303 @param secondary_nodes: list of secondary nodes as strings
1304 @type os_type: string
1305 @param os_type: the name of the instance's OS
1306 @type status: string
1307 @param status: the desired status of the instance
1308 @type minmem: string
1309 @param minmem: the minimum memory size of the instance
1310 @type maxmem: string
1311 @param maxmem: the maximum memory size of the instance
1313 @param vcpus: the count of VCPUs the instance has
1315 @param nics: list of tuples (ip, mac, mode, link) representing
1316 the NICs the instance has
1317 @type disk_template: string
1318 @param disk_template: the disk template of the instance
1320 @param disks: the list of (size, mode) pairs
1322 @param bep: the backend parameters for the instance
1324 @param hvp: the hypervisor parameters for the instance
1325 @type hypervisor_name: string
1326 @param hypervisor_name: the hypervisor for the instance
1328 @param tags: list of instance tags as strings
1330 @return: the hook environment for this instance
1335 "INSTANCE_NAME": name,
1336 "INSTANCE_PRIMARY": primary_node,
1337 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1338 "INSTANCE_OS_TYPE": os_type,
1339 "INSTANCE_STATUS": status,
1340 "INSTANCE_MINMEM": minmem,
1341 "INSTANCE_MAXMEM": maxmem,
1342 # TODO(2.7) remove deprecated "memory" value
1343 "INSTANCE_MEMORY": maxmem,
1344 "INSTANCE_VCPUS": vcpus,
1345 "INSTANCE_DISK_TEMPLATE": disk_template,
1346 "INSTANCE_HYPERVISOR": hypervisor_name,
1349 nic_count = len(nics)
1350 for idx, (ip, mac, mode, link) in enumerate(nics):
1353 env["INSTANCE_NIC%d_IP" % idx] = ip
1354 env["INSTANCE_NIC%d_MAC" % idx] = mac
1355 env["INSTANCE_NIC%d_MODE" % idx] = mode
1356 env["INSTANCE_NIC%d_LINK" % idx] = link
1357 if mode == constants.NIC_MODE_BRIDGED:
1358 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1362 env["INSTANCE_NIC_COUNT"] = nic_count
1365 disk_count = len(disks)
1366 for idx, (size, mode) in enumerate(disks):
1367 env["INSTANCE_DISK%d_SIZE" % idx] = size
1368 env["INSTANCE_DISK%d_MODE" % idx] = mode
1372 env["INSTANCE_DISK_COUNT"] = disk_count
1377 env["INSTANCE_TAGS"] = " ".join(tags)
1379 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1380 for key, value in source.items():
1381 env["INSTANCE_%s_%s" % (kind, key)] = value
1386 def _NICListToTuple(lu, nics):
1387 """Build a list of nic information tuples.
1389 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1390 value in LUInstanceQueryData.
1392 @type lu: L{LogicalUnit}
1393 @param lu: the logical unit on whose behalf we execute
1394 @type nics: list of L{objects.NIC}
1395 @param nics: list of nics to convert to hooks tuples
1399 cluster = lu.cfg.GetClusterInfo()
1403 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1404 mode = filled_params[constants.NIC_MODE]
1405 link = filled_params[constants.NIC_LINK]
1406 hooks_nics.append((ip, mac, mode, link))
1410 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1411 """Builds instance related env variables for hooks from an object.
1413 @type lu: L{LogicalUnit}
1414 @param lu: the logical unit on whose behalf we execute
1415 @type instance: L{objects.Instance}
1416 @param instance: the instance for which we should build the
1418 @type override: dict
1419 @param override: dictionary with key/values that will override
1422 @return: the hook environment dictionary
1425 cluster = lu.cfg.GetClusterInfo()
1426 bep = cluster.FillBE(instance)
1427 hvp = cluster.FillHV(instance)
1429 "name": instance.name,
1430 "primary_node": instance.primary_node,
1431 "secondary_nodes": instance.secondary_nodes,
1432 "os_type": instance.os,
1433 "status": instance.admin_state,
1434 "maxmem": bep[constants.BE_MAXMEM],
1435 "minmem": bep[constants.BE_MINMEM],
1436 "vcpus": bep[constants.BE_VCPUS],
1437 "nics": _NICListToTuple(lu, instance.nics),
1438 "disk_template": instance.disk_template,
1439 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1442 "hypervisor_name": instance.hypervisor,
1443 "tags": instance.tags,
1446 args.update(override)
1447 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1450 def _AdjustCandidatePool(lu, exceptions):
1451 """Adjust the candidate pool after node operations.
1454 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1456 lu.LogInfo("Promoted nodes to master candidate role: %s",
1457 utils.CommaJoin(node.name for node in mod_list))
1458 for name in mod_list:
1459 lu.context.ReaddNode(name)
1460 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1462 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1466 def _DecideSelfPromotion(lu, exceptions=None):
1467 """Decide whether I should promote myself as a master candidate.
1470 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1471 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1472 # the new node will increase mc_max with one, so:
1473 mc_should = min(mc_should + 1, cp_size)
1474 return mc_now < mc_should
1477 def _CalculateGroupIPolicy(cluster, group):
1478 """Calculate instance policy for group.
1481 return cluster.SimpleFillIPolicy(group.ipolicy)
1484 def _ComputeViolatingInstances(ipolicy, instances):
1485 """Computes a set of instances who violates given ipolicy.
1487 @param ipolicy: The ipolicy to verify
1488 @type instances: object.Instance
1489 @param instances: List of instances to verify
1490 @return: A frozenset of instance names violating the ipolicy
1493 return frozenset([inst.name for inst in instances
1494 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1497 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1498 """Check that the brigdes needed by a list of nics exist.
1501 cluster = lu.cfg.GetClusterInfo()
1502 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1503 brlist = [params[constants.NIC_LINK] for params in paramslist
1504 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1506 result = lu.rpc.call_bridges_exist(target_node, brlist)
1507 result.Raise("Error checking bridges on destination node '%s'" %
1508 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1511 def _CheckInstanceBridgesExist(lu, instance, node=None):
1512 """Check that the brigdes needed by an instance exist.
1516 node = instance.primary_node
1517 _CheckNicsBridgesExist(lu, instance.nics, node)
1520 def _CheckOSVariant(os_obj, name):
1521 """Check whether an OS name conforms to the os variants specification.
1523 @type os_obj: L{objects.OS}
1524 @param os_obj: OS object to check
1526 @param name: OS name passed by the user, to check for validity
1529 variant = objects.OS.GetVariant(name)
1530 if not os_obj.supported_variants:
1532 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1533 " passed)" % (os_obj.name, variant),
1537 raise errors.OpPrereqError("OS name must include a variant",
1540 if variant not in os_obj.supported_variants:
1541 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1544 def _GetNodeInstancesInner(cfg, fn):
1545 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1548 def _GetNodeInstances(cfg, node_name):
1549 """Returns a list of all primary and secondary instances on a node.
1553 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1556 def _GetNodePrimaryInstances(cfg, node_name):
1557 """Returns primary instances on a node.
1560 return _GetNodeInstancesInner(cfg,
1561 lambda inst: node_name == inst.primary_node)
1564 def _GetNodeSecondaryInstances(cfg, node_name):
1565 """Returns secondary instances on a node.
1568 return _GetNodeInstancesInner(cfg,
1569 lambda inst: node_name in inst.secondary_nodes)
1572 def _GetStorageTypeArgs(cfg, storage_type):
1573 """Returns the arguments for a storage type.
1576 # Special case for file storage
1577 if storage_type == constants.ST_FILE:
1578 # storage.FileStorage wants a list of storage directories
1579 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1584 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1587 for dev in instance.disks:
1588 cfg.SetDiskID(dev, node_name)
1590 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1591 result.Raise("Failed to get disk status from node %s" % node_name,
1592 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1594 for idx, bdev_status in enumerate(result.payload):
1595 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1601 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1602 """Check the sanity of iallocator and node arguments and use the
1603 cluster-wide iallocator if appropriate.
1605 Check that at most one of (iallocator, node) is specified. If none is
1606 specified, then the LU's opcode's iallocator slot is filled with the
1607 cluster-wide default iallocator.
1609 @type iallocator_slot: string
1610 @param iallocator_slot: the name of the opcode iallocator slot
1611 @type node_slot: string
1612 @param node_slot: the name of the opcode target node slot
1615 node = getattr(lu.op, node_slot, None)
1616 iallocator = getattr(lu.op, iallocator_slot, None)
1618 if node is not None and iallocator is not None:
1619 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1621 elif node is None and iallocator is None:
1622 default_iallocator = lu.cfg.GetDefaultIAllocator()
1623 if default_iallocator:
1624 setattr(lu.op, iallocator_slot, default_iallocator)
1626 raise errors.OpPrereqError("No iallocator or node given and no"
1627 " cluster-wide default iallocator found;"
1628 " please specify either an iallocator or a"
1629 " node, or set a cluster-wide default"
1633 def _GetDefaultIAllocator(cfg, iallocator):
1634 """Decides on which iallocator to use.
1636 @type cfg: L{config.ConfigWriter}
1637 @param cfg: Cluster configuration object
1638 @type iallocator: string or None
1639 @param iallocator: Iallocator specified in opcode
1641 @return: Iallocator name
1645 # Use default iallocator
1646 iallocator = cfg.GetDefaultIAllocator()
1649 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1650 " opcode nor as a cluster-wide default",
1656 class LUClusterPostInit(LogicalUnit):
1657 """Logical unit for running hooks after cluster initialization.
1660 HPATH = "cluster-init"
1661 HTYPE = constants.HTYPE_CLUSTER
1663 def BuildHooksEnv(self):
1668 "OP_TARGET": self.cfg.GetClusterName(),
1671 def BuildHooksNodes(self):
1672 """Build hooks nodes.
1675 return ([], [self.cfg.GetMasterNode()])
1677 def Exec(self, feedback_fn):
1684 class LUClusterDestroy(LogicalUnit):
1685 """Logical unit for destroying the cluster.
1688 HPATH = "cluster-destroy"
1689 HTYPE = constants.HTYPE_CLUSTER
1691 def BuildHooksEnv(self):
1696 "OP_TARGET": self.cfg.GetClusterName(),
1699 def BuildHooksNodes(self):
1700 """Build hooks nodes.
1705 def CheckPrereq(self):
1706 """Check prerequisites.
1708 This checks whether the cluster is empty.
1710 Any errors are signaled by raising errors.OpPrereqError.
1713 master = self.cfg.GetMasterNode()
1715 nodelist = self.cfg.GetNodeList()
1716 if len(nodelist) != 1 or nodelist[0] != master:
1717 raise errors.OpPrereqError("There are still %d node(s) in"
1718 " this cluster." % (len(nodelist) - 1),
1720 instancelist = self.cfg.GetInstanceList()
1722 raise errors.OpPrereqError("There are still %d instance(s) in"
1723 " this cluster." % len(instancelist),
1726 def Exec(self, feedback_fn):
1727 """Destroys the cluster.
1730 master_params = self.cfg.GetMasterNetworkParameters()
1732 # Run post hooks on master node before it's removed
1733 _RunPostHook(self, master_params.name)
1735 ems = self.cfg.GetUseExternalMipScript()
1736 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1739 self.LogWarning("Error disabling the master IP address: %s",
1742 return master_params.name
1745 def _VerifyCertificate(filename):
1746 """Verifies a certificate for L{LUClusterVerifyConfig}.
1748 @type filename: string
1749 @param filename: Path to PEM file
1753 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1754 utils.ReadFile(filename))
1755 except Exception, err: # pylint: disable=W0703
1756 return (LUClusterVerifyConfig.ETYPE_ERROR,
1757 "Failed to load X509 certificate %s: %s" % (filename, err))
1760 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1761 constants.SSL_CERT_EXPIRATION_ERROR)
1764 fnamemsg = "While verifying %s: %s" % (filename, msg)
1769 return (None, fnamemsg)
1770 elif errcode == utils.CERT_WARNING:
1771 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1772 elif errcode == utils.CERT_ERROR:
1773 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1775 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1778 def _GetAllHypervisorParameters(cluster, instances):
1779 """Compute the set of all hypervisor parameters.
1781 @type cluster: L{objects.Cluster}
1782 @param cluster: the cluster object
1783 @param instances: list of L{objects.Instance}
1784 @param instances: additional instances from which to obtain parameters
1785 @rtype: list of (origin, hypervisor, parameters)
1786 @return: a list with all parameters found, indicating the hypervisor they
1787 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1792 for hv_name in cluster.enabled_hypervisors:
1793 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1795 for os_name, os_hvp in cluster.os_hvp.items():
1796 for hv_name, hv_params in os_hvp.items():
1798 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1799 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1801 # TODO: collapse identical parameter values in a single one
1802 for instance in instances:
1803 if instance.hvparams:
1804 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1805 cluster.FillHV(instance)))
1810 class _VerifyErrors(object):
1811 """Mix-in for cluster/group verify LUs.
1813 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1814 self.op and self._feedback_fn to be available.)
1818 ETYPE_FIELD = "code"
1819 ETYPE_ERROR = "ERROR"
1820 ETYPE_WARNING = "WARNING"
1822 def _Error(self, ecode, item, msg, *args, **kwargs):
1823 """Format an error message.
1825 Based on the opcode's error_codes parameter, either format a
1826 parseable error code, or a simpler error string.
1828 This must be called only from Exec and functions called from Exec.
1831 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1832 itype, etxt, _ = ecode
1833 # first complete the msg
1836 # then format the whole message
1837 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1838 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1844 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1845 # and finally report it via the feedback_fn
1846 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1848 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1849 """Log an error message if the passed condition is True.
1853 or self.op.debug_simulate_errors) # pylint: disable=E1101
1855 # If the error code is in the list of ignored errors, demote the error to a
1857 (_, etxt, _) = ecode
1858 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1859 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1862 self._Error(ecode, *args, **kwargs)
1864 # do not mark the operation as failed for WARN cases only
1865 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1866 self.bad = self.bad or cond
1869 class LUClusterVerify(NoHooksLU):
1870 """Submits all jobs necessary to verify the cluster.
1875 def ExpandNames(self):
1876 self.needed_locks = {}
1878 def Exec(self, feedback_fn):
1881 if self.op.group_name:
1882 groups = [self.op.group_name]
1883 depends_fn = lambda: None
1885 groups = self.cfg.GetNodeGroupList()
1887 # Verify global configuration
1889 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1892 # Always depend on global verification
1893 depends_fn = lambda: [(-len(jobs), [])]
1895 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1896 ignore_errors=self.op.ignore_errors,
1897 depends=depends_fn())]
1898 for group in groups)
1900 # Fix up all parameters
1901 for op in itertools.chain(*jobs): # pylint: disable=W0142
1902 op.debug_simulate_errors = self.op.debug_simulate_errors
1903 op.verbose = self.op.verbose
1904 op.error_codes = self.op.error_codes
1906 op.skip_checks = self.op.skip_checks
1907 except AttributeError:
1908 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1910 return ResultWithJobs(jobs)
1913 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1914 """Verifies the cluster config.
1919 def _VerifyHVP(self, hvp_data):
1920 """Verifies locally the syntax of the hypervisor parameters.
1923 for item, hv_name, hv_params in hvp_data:
1924 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1927 hv_class = hypervisor.GetHypervisor(hv_name)
1928 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1929 hv_class.CheckParameterSyntax(hv_params)
1930 except errors.GenericError, err:
1931 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1933 def ExpandNames(self):
1934 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1935 self.share_locks = _ShareAll()
1937 def CheckPrereq(self):
1938 """Check prerequisites.
1941 # Retrieve all information
1942 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1943 self.all_node_info = self.cfg.GetAllNodesInfo()
1944 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1946 def Exec(self, feedback_fn):
1947 """Verify integrity of cluster, performing various test on nodes.
1951 self._feedback_fn = feedback_fn
1953 feedback_fn("* Verifying cluster config")
1955 for msg in self.cfg.VerifyConfig():
1956 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1958 feedback_fn("* Verifying cluster certificate files")
1960 for cert_filename in constants.ALL_CERT_FILES:
1961 (errcode, msg) = _VerifyCertificate(cert_filename)
1962 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1964 feedback_fn("* Verifying hypervisor parameters")
1966 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1967 self.all_inst_info.values()))
1969 feedback_fn("* Verifying all nodes belong to an existing group")
1971 # We do this verification here because, should this bogus circumstance
1972 # occur, it would never be caught by VerifyGroup, which only acts on
1973 # nodes/instances reachable from existing node groups.
1975 dangling_nodes = set(node.name for node in self.all_node_info.values()
1976 if node.group not in self.all_group_info)
1978 dangling_instances = {}
1979 no_node_instances = []
1981 for inst in self.all_inst_info.values():
1982 if inst.primary_node in dangling_nodes:
1983 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1984 elif inst.primary_node not in self.all_node_info:
1985 no_node_instances.append(inst.name)
1990 utils.CommaJoin(dangling_instances.get(node.name,
1992 for node in dangling_nodes]
1994 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1996 "the following nodes (and their instances) belong to a non"
1997 " existing group: %s", utils.CommaJoin(pretty_dangling))
1999 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2001 "the following instances have a non-existing primary-node:"
2002 " %s", utils.CommaJoin(no_node_instances))
2007 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2008 """Verifies the status of a node group.
2011 HPATH = "cluster-verify"
2012 HTYPE = constants.HTYPE_CLUSTER
2015 _HOOKS_INDENT_RE = re.compile("^", re.M)
2017 class NodeImage(object):
2018 """A class representing the logical and physical status of a node.
2021 @ivar name: the node name to which this object refers
2022 @ivar volumes: a structure as returned from
2023 L{ganeti.backend.GetVolumeList} (runtime)
2024 @ivar instances: a list of running instances (runtime)
2025 @ivar pinst: list of configured primary instances (config)
2026 @ivar sinst: list of configured secondary instances (config)
2027 @ivar sbp: dictionary of {primary-node: list of instances} for all
2028 instances for which this node is secondary (config)
2029 @ivar mfree: free memory, as reported by hypervisor (runtime)
2030 @ivar dfree: free disk, as reported by the node (runtime)
2031 @ivar offline: the offline status (config)
2032 @type rpc_fail: boolean
2033 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2034 not whether the individual keys were correct) (runtime)
2035 @type lvm_fail: boolean
2036 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2037 @type hyp_fail: boolean
2038 @ivar hyp_fail: whether the RPC call didn't return the instance list
2039 @type ghost: boolean
2040 @ivar ghost: whether this is a known node or not (config)
2041 @type os_fail: boolean
2042 @ivar os_fail: whether the RPC call didn't return valid OS data
2044 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2045 @type vm_capable: boolean
2046 @ivar vm_capable: whether the node can host instances
2049 def __init__(self, offline=False, name=None, vm_capable=True):
2058 self.offline = offline
2059 self.vm_capable = vm_capable
2060 self.rpc_fail = False
2061 self.lvm_fail = False
2062 self.hyp_fail = False
2064 self.os_fail = False
2067 def ExpandNames(self):
2068 # This raises errors.OpPrereqError on its own:
2069 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2071 # Get instances in node group; this is unsafe and needs verification later
2073 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2075 self.needed_locks = {
2076 locking.LEVEL_INSTANCE: inst_names,
2077 locking.LEVEL_NODEGROUP: [self.group_uuid],
2078 locking.LEVEL_NODE: [],
2081 self.share_locks = _ShareAll()
2083 def DeclareLocks(self, level):
2084 if level == locking.LEVEL_NODE:
2085 # Get members of node group; this is unsafe and needs verification later
2086 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2088 all_inst_info = self.cfg.GetAllInstancesInfo()
2090 # In Exec(), we warn about mirrored instances that have primary and
2091 # secondary living in separate node groups. To fully verify that
2092 # volumes for these instances are healthy, we will need to do an
2093 # extra call to their secondaries. We ensure here those nodes will
2095 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2096 # Important: access only the instances whose lock is owned
2097 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2098 nodes.update(all_inst_info[inst].secondary_nodes)
2100 self.needed_locks[locking.LEVEL_NODE] = nodes
2102 def CheckPrereq(self):
2103 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2104 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2106 group_nodes = set(self.group_info.members)
2108 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2111 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2113 unlocked_instances = \
2114 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2117 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2118 utils.CommaJoin(unlocked_nodes),
2121 if unlocked_instances:
2122 raise errors.OpPrereqError("Missing lock for instances: %s" %
2123 utils.CommaJoin(unlocked_instances),
2126 self.all_node_info = self.cfg.GetAllNodesInfo()
2127 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2129 self.my_node_names = utils.NiceSort(group_nodes)
2130 self.my_inst_names = utils.NiceSort(group_instances)
2132 self.my_node_info = dict((name, self.all_node_info[name])
2133 for name in self.my_node_names)
2135 self.my_inst_info = dict((name, self.all_inst_info[name])
2136 for name in self.my_inst_names)
2138 # We detect here the nodes that will need the extra RPC calls for verifying
2139 # split LV volumes; they should be locked.
2140 extra_lv_nodes = set()
2142 for inst in self.my_inst_info.values():
2143 if inst.disk_template in constants.DTS_INT_MIRROR:
2144 for nname in inst.all_nodes:
2145 if self.all_node_info[nname].group != self.group_uuid:
2146 extra_lv_nodes.add(nname)
2148 unlocked_lv_nodes = \
2149 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2151 if unlocked_lv_nodes:
2152 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2153 utils.CommaJoin(unlocked_lv_nodes),
2155 self.extra_lv_nodes = list(extra_lv_nodes)
2157 def _VerifyNode(self, ninfo, nresult):
2158 """Perform some basic validation on data returned from a node.
2160 - check the result data structure is well formed and has all the
2162 - check ganeti version
2164 @type ninfo: L{objects.Node}
2165 @param ninfo: the node to check
2166 @param nresult: the results from the node
2168 @return: whether overall this call was successful (and we can expect
2169 reasonable values in the respose)
2173 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2175 # main result, nresult should be a non-empty dict
2176 test = not nresult or not isinstance(nresult, dict)
2177 _ErrorIf(test, constants.CV_ENODERPC, node,
2178 "unable to verify node: no data returned")
2182 # compares ganeti version
2183 local_version = constants.PROTOCOL_VERSION
2184 remote_version = nresult.get("version", None)
2185 test = not (remote_version and
2186 isinstance(remote_version, (list, tuple)) and
2187 len(remote_version) == 2)
2188 _ErrorIf(test, constants.CV_ENODERPC, node,
2189 "connection to node returned invalid data")
2193 test = local_version != remote_version[0]
2194 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2195 "incompatible protocol versions: master %s,"
2196 " node %s", local_version, remote_version[0])
2200 # node seems compatible, we can actually try to look into its results
2202 # full package version
2203 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2204 constants.CV_ENODEVERSION, node,
2205 "software version mismatch: master %s, node %s",
2206 constants.RELEASE_VERSION, remote_version[1],
2207 code=self.ETYPE_WARNING)
2209 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2210 if ninfo.vm_capable and isinstance(hyp_result, dict):
2211 for hv_name, hv_result in hyp_result.iteritems():
2212 test = hv_result is not None
2213 _ErrorIf(test, constants.CV_ENODEHV, node,
2214 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2216 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2217 if ninfo.vm_capable and isinstance(hvp_result, list):
2218 for item, hv_name, hv_result in hvp_result:
2219 _ErrorIf(True, constants.CV_ENODEHV, node,
2220 "hypervisor %s parameter verify failure (source %s): %s",
2221 hv_name, item, hv_result)
2223 test = nresult.get(constants.NV_NODESETUP,
2224 ["Missing NODESETUP results"])
2225 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2230 def _VerifyNodeTime(self, ninfo, nresult,
2231 nvinfo_starttime, nvinfo_endtime):
2232 """Check the node time.
2234 @type ninfo: L{objects.Node}
2235 @param ninfo: the node to check
2236 @param nresult: the remote results for the node
2237 @param nvinfo_starttime: the start time of the RPC call
2238 @param nvinfo_endtime: the end time of the RPC call
2242 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2244 ntime = nresult.get(constants.NV_TIME, None)
2246 ntime_merged = utils.MergeTime(ntime)
2247 except (ValueError, TypeError):
2248 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2251 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2252 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2253 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2254 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2258 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2259 "Node time diverges by at least %s from master node time",
2262 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2263 """Check the node LVM results.
2265 @type ninfo: L{objects.Node}
2266 @param ninfo: the node to check
2267 @param nresult: the remote results for the node
2268 @param vg_name: the configured VG name
2275 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2277 # checks vg existence and size > 20G
2278 vglist = nresult.get(constants.NV_VGLIST, None)
2280 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2282 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2283 constants.MIN_VG_SIZE)
2284 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2287 pvlist = nresult.get(constants.NV_PVLIST, None)
2288 test = pvlist is None
2289 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2291 # check that ':' is not present in PV names, since it's a
2292 # special character for lvcreate (denotes the range of PEs to
2294 for _, pvname, owner_vg in pvlist:
2295 test = ":" in pvname
2296 _ErrorIf(test, constants.CV_ENODELVM, node,
2297 "Invalid character ':' in PV '%s' of VG '%s'",
2300 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2301 """Check the node bridges.
2303 @type ninfo: L{objects.Node}
2304 @param ninfo: the node to check
2305 @param nresult: the remote results for the node
2306 @param bridges: the expected list of bridges
2313 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2315 missing = nresult.get(constants.NV_BRIDGES, None)
2316 test = not isinstance(missing, list)
2317 _ErrorIf(test, constants.CV_ENODENET, node,
2318 "did not return valid bridge information")
2320 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2321 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2323 def _VerifyNodeUserScripts(self, ninfo, nresult):
2324 """Check the results of user scripts presence and executability on the node
2326 @type ninfo: L{objects.Node}
2327 @param ninfo: the node to check
2328 @param nresult: the remote results for the node
2333 test = not constants.NV_USERSCRIPTS in nresult
2334 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2335 "did not return user scripts information")
2337 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2339 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2340 "user scripts not present or not executable: %s" %
2341 utils.CommaJoin(sorted(broken_scripts)))
2343 def _VerifyNodeNetwork(self, ninfo, nresult):
2344 """Check the node network connectivity results.
2346 @type ninfo: L{objects.Node}
2347 @param ninfo: the node to check
2348 @param nresult: the remote results for the node
2352 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2354 test = constants.NV_NODELIST not in nresult
2355 _ErrorIf(test, constants.CV_ENODESSH, node,
2356 "node hasn't returned node ssh connectivity data")
2358 if nresult[constants.NV_NODELIST]:
2359 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2360 _ErrorIf(True, constants.CV_ENODESSH, node,
2361 "ssh communication with node '%s': %s", a_node, a_msg)
2363 test = constants.NV_NODENETTEST not in nresult
2364 _ErrorIf(test, constants.CV_ENODENET, node,
2365 "node hasn't returned node tcp connectivity data")
2367 if nresult[constants.NV_NODENETTEST]:
2368 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2370 _ErrorIf(True, constants.CV_ENODENET, node,
2371 "tcp communication with node '%s': %s",
2372 anode, nresult[constants.NV_NODENETTEST][anode])
2374 test = constants.NV_MASTERIP not in nresult
2375 _ErrorIf(test, constants.CV_ENODENET, node,
2376 "node hasn't returned node master IP reachability data")
2378 if not nresult[constants.NV_MASTERIP]:
2379 if node == self.master_node:
2380 msg = "the master node cannot reach the master IP (not configured?)"
2382 msg = "cannot reach the master IP"
2383 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2385 def _VerifyInstance(self, instance, instanceconfig, node_image,
2387 """Verify an instance.
2389 This function checks to see if the required block devices are
2390 available on the instance's node.
2393 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2394 node_current = instanceconfig.primary_node
2396 node_vol_should = {}
2397 instanceconfig.MapLVsByNode(node_vol_should)
2399 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2400 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2401 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2403 for node in node_vol_should:
2404 n_img = node_image[node]
2405 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2406 # ignore missing volumes on offline or broken nodes
2408 for volume in node_vol_should[node]:
2409 test = volume not in n_img.volumes
2410 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2411 "volume %s missing on node %s", volume, node)
2413 if instanceconfig.admin_state == constants.ADMINST_UP:
2414 pri_img = node_image[node_current]
2415 test = instance not in pri_img.instances and not pri_img.offline
2416 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2417 "instance not running on its primary node %s",
2420 diskdata = [(nname, success, status, idx)
2421 for (nname, disks) in diskstatus.items()
2422 for idx, (success, status) in enumerate(disks)]
2424 for nname, success, bdev_status, idx in diskdata:
2425 # the 'ghost node' construction in Exec() ensures that we have a
2427 snode = node_image[nname]
2428 bad_snode = snode.ghost or snode.offline
2429 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2430 not success and not bad_snode,
2431 constants.CV_EINSTANCEFAULTYDISK, instance,
2432 "couldn't retrieve status for disk/%s on %s: %s",
2433 idx, nname, bdev_status)
2434 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2435 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2436 constants.CV_EINSTANCEFAULTYDISK, instance,
2437 "disk/%s on %s is faulty", idx, nname)
2439 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2440 """Verify if there are any unknown volumes in the cluster.
2442 The .os, .swap and backup volumes are ignored. All other volumes are
2443 reported as unknown.
2445 @type reserved: L{ganeti.utils.FieldSet}
2446 @param reserved: a FieldSet of reserved volume names
2449 for node, n_img in node_image.items():
2450 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2451 self.all_node_info[node].group != self.group_uuid):
2452 # skip non-healthy nodes
2454 for volume in n_img.volumes:
2455 test = ((node not in node_vol_should or
2456 volume not in node_vol_should[node]) and
2457 not reserved.Matches(volume))
2458 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2459 "volume %s is unknown", volume)
2461 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2462 """Verify N+1 Memory Resilience.
2464 Check that if one single node dies we can still start all the
2465 instances it was primary for.
2468 cluster_info = self.cfg.GetClusterInfo()
2469 for node, n_img in node_image.items():
2470 # This code checks that every node which is now listed as
2471 # secondary has enough memory to host all instances it is
2472 # supposed to should a single other node in the cluster fail.
2473 # FIXME: not ready for failover to an arbitrary node
2474 # FIXME: does not support file-backed instances
2475 # WARNING: we currently take into account down instances as well
2476 # as up ones, considering that even if they're down someone
2477 # might want to start them even in the event of a node failure.
2478 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2479 # we're skipping nodes marked offline and nodes in other groups from
2480 # the N+1 warning, since most likely we don't have good memory
2481 # infromation from them; we already list instances living on such
2482 # nodes, and that's enough warning
2484 #TODO(dynmem): also consider ballooning out other instances
2485 for prinode, instances in n_img.sbp.items():
2487 for instance in instances:
2488 bep = cluster_info.FillBE(instance_cfg[instance])
2489 if bep[constants.BE_AUTO_BALANCE]:
2490 needed_mem += bep[constants.BE_MINMEM]
2491 test = n_img.mfree < needed_mem
2492 self._ErrorIf(test, constants.CV_ENODEN1, node,
2493 "not enough memory to accomodate instance failovers"
2494 " should node %s fail (%dMiB needed, %dMiB available)",
2495 prinode, needed_mem, n_img.mfree)
2498 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2499 (files_all, files_opt, files_mc, files_vm)):
2500 """Verifies file checksums collected from all nodes.
2502 @param errorif: Callback for reporting errors
2503 @param nodeinfo: List of L{objects.Node} objects
2504 @param master_node: Name of master node
2505 @param all_nvinfo: RPC results
2508 # Define functions determining which nodes to consider for a file
2511 (files_mc, lambda node: (node.master_candidate or
2512 node.name == master_node)),
2513 (files_vm, lambda node: node.vm_capable),
2516 # Build mapping from filename to list of nodes which should have the file
2518 for (files, fn) in files2nodefn:
2520 filenodes = nodeinfo
2522 filenodes = filter(fn, nodeinfo)
2523 nodefiles.update((filename,
2524 frozenset(map(operator.attrgetter("name"), filenodes)))
2525 for filename in files)
2527 assert set(nodefiles) == (files_all | files_mc | files_vm)
2529 fileinfo = dict((filename, {}) for filename in nodefiles)
2530 ignore_nodes = set()
2532 for node in nodeinfo:
2534 ignore_nodes.add(node.name)
2537 nresult = all_nvinfo[node.name]
2539 if nresult.fail_msg or not nresult.payload:
2542 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2544 test = not (node_files and isinstance(node_files, dict))
2545 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2546 "Node did not return file checksum data")
2548 ignore_nodes.add(node.name)
2551 # Build per-checksum mapping from filename to nodes having it
2552 for (filename, checksum) in node_files.items():
2553 assert filename in nodefiles
2554 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2556 for (filename, checksums) in fileinfo.items():
2557 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2559 # Nodes having the file
2560 with_file = frozenset(node_name
2561 for nodes in fileinfo[filename].values()
2562 for node_name in nodes) - ignore_nodes
2564 expected_nodes = nodefiles[filename] - ignore_nodes
2566 # Nodes missing file
2567 missing_file = expected_nodes - with_file
2569 if filename in files_opt:
2571 errorif(missing_file and missing_file != expected_nodes,
2572 constants.CV_ECLUSTERFILECHECK, None,
2573 "File %s is optional, but it must exist on all or no"
2574 " nodes (not found on %s)",
2575 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2577 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2578 "File %s is missing from node(s) %s", filename,
2579 utils.CommaJoin(utils.NiceSort(missing_file)))
2581 # Warn if a node has a file it shouldn't
2582 unexpected = with_file - expected_nodes
2584 constants.CV_ECLUSTERFILECHECK, None,
2585 "File %s should not exist on node(s) %s",
2586 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2588 # See if there are multiple versions of the file
2589 test = len(checksums) > 1
2591 variants = ["variant %s on %s" %
2592 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2593 for (idx, (checksum, nodes)) in
2594 enumerate(sorted(checksums.items()))]
2598 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2599 "File %s found with %s different checksums (%s)",
2600 filename, len(checksums), "; ".join(variants))
2602 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2604 """Verifies and the node DRBD status.
2606 @type ninfo: L{objects.Node}
2607 @param ninfo: the node to check
2608 @param nresult: the remote results for the node
2609 @param instanceinfo: the dict of instances
2610 @param drbd_helper: the configured DRBD usermode helper
2611 @param drbd_map: the DRBD map as returned by
2612 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2616 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2620 test = (helper_result == None)
2621 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2622 "no drbd usermode helper returned")
2624 status, payload = helper_result
2626 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2627 "drbd usermode helper check unsuccessful: %s", payload)
2628 test = status and (payload != drbd_helper)
2629 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2630 "wrong drbd usermode helper: %s", payload)
2632 # compute the DRBD minors
2634 for minor, instance in drbd_map[node].items():
2635 test = instance not in instanceinfo
2636 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2637 "ghost instance '%s' in temporary DRBD map", instance)
2638 # ghost instance should not be running, but otherwise we
2639 # don't give double warnings (both ghost instance and
2640 # unallocated minor in use)
2642 node_drbd[minor] = (instance, False)
2644 instance = instanceinfo[instance]
2645 node_drbd[minor] = (instance.name,
2646 instance.admin_state == constants.ADMINST_UP)
2648 # and now check them
2649 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2650 test = not isinstance(used_minors, (tuple, list))
2651 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2652 "cannot parse drbd status file: %s", str(used_minors))
2654 # we cannot check drbd status
2657 for minor, (iname, must_exist) in node_drbd.items():
2658 test = minor not in used_minors and must_exist
2659 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2660 "drbd minor %d of instance %s is not active", minor, iname)
2661 for minor in used_minors:
2662 test = minor not in node_drbd
2663 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2664 "unallocated drbd minor %d is in use", minor)
2666 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2667 """Builds the node OS structures.
2669 @type ninfo: L{objects.Node}
2670 @param ninfo: the node to check
2671 @param nresult: the remote results for the node
2672 @param nimg: the node image object
2676 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2678 remote_os = nresult.get(constants.NV_OSLIST, None)
2679 test = (not isinstance(remote_os, list) or
2680 not compat.all(isinstance(v, list) and len(v) == 7
2681 for v in remote_os))
2683 _ErrorIf(test, constants.CV_ENODEOS, node,
2684 "node hasn't returned valid OS data")
2693 for (name, os_path, status, diagnose,
2694 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2696 if name not in os_dict:
2699 # parameters is a list of lists instead of list of tuples due to
2700 # JSON lacking a real tuple type, fix it:
2701 parameters = [tuple(v) for v in parameters]
2702 os_dict[name].append((os_path, status, diagnose,
2703 set(variants), set(parameters), set(api_ver)))
2705 nimg.oslist = os_dict
2707 def _VerifyNodeOS(self, ninfo, nimg, base):
2708 """Verifies the node OS list.
2710 @type ninfo: L{objects.Node}
2711 @param ninfo: the node to check
2712 @param nimg: the node image object
2713 @param base: the 'template' node we match against (e.g. from the master)
2717 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2719 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2721 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2722 for os_name, os_data in nimg.oslist.items():
2723 assert os_data, "Empty OS status for OS %s?!" % os_name
2724 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2725 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2726 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2727 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2728 "OS '%s' has multiple entries (first one shadows the rest): %s",
2729 os_name, utils.CommaJoin([v[0] for v in os_data]))
2730 # comparisons with the 'base' image
2731 test = os_name not in base.oslist
2732 _ErrorIf(test, constants.CV_ENODEOS, node,
2733 "Extra OS %s not present on reference node (%s)",
2737 assert base.oslist[os_name], "Base node has empty OS status?"
2738 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2740 # base OS is invalid, skipping
2742 for kind, a, b in [("API version", f_api, b_api),
2743 ("variants list", f_var, b_var),
2744 ("parameters", beautify_params(f_param),
2745 beautify_params(b_param))]:
2746 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2747 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2748 kind, os_name, base.name,
2749 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2751 # check any missing OSes
2752 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2753 _ErrorIf(missing, constants.CV_ENODEOS, node,
2754 "OSes present on reference node %s but missing on this node: %s",
2755 base.name, utils.CommaJoin(missing))
2757 def _VerifyOob(self, ninfo, nresult):
2758 """Verifies out of band functionality of a node.
2760 @type ninfo: L{objects.Node}
2761 @param ninfo: the node to check
2762 @param nresult: the remote results for the node
2766 # We just have to verify the paths on master and/or master candidates
2767 # as the oob helper is invoked on the master
2768 if ((ninfo.master_candidate or ninfo.master_capable) and
2769 constants.NV_OOB_PATHS in nresult):
2770 for path_result in nresult[constants.NV_OOB_PATHS]:
2771 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2773 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2774 """Verifies and updates the node volume data.
2776 This function will update a L{NodeImage}'s internal structures
2777 with data from the remote call.
2779 @type ninfo: L{objects.Node}
2780 @param ninfo: the node to check
2781 @param nresult: the remote results for the node
2782 @param nimg: the node image object
2783 @param vg_name: the configured VG name
2787 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2789 nimg.lvm_fail = True
2790 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2793 elif isinstance(lvdata, basestring):
2794 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2795 utils.SafeEncode(lvdata))
2796 elif not isinstance(lvdata, dict):
2797 _ErrorIf(True, constants.CV_ENODELVM, node,
2798 "rpc call to node failed (lvlist)")
2800 nimg.volumes = lvdata
2801 nimg.lvm_fail = False
2803 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2804 """Verifies and updates the node instance list.
2806 If the listing was successful, then updates this node's instance
2807 list. Otherwise, it marks the RPC call as failed for the instance
2810 @type ninfo: L{objects.Node}
2811 @param ninfo: the node to check
2812 @param nresult: the remote results for the node
2813 @param nimg: the node image object
2816 idata = nresult.get(constants.NV_INSTANCELIST, None)
2817 test = not isinstance(idata, list)
2818 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2819 "rpc call to node failed (instancelist): %s",
2820 utils.SafeEncode(str(idata)))
2822 nimg.hyp_fail = True
2824 nimg.instances = idata
2826 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2827 """Verifies and computes a node information map
2829 @type ninfo: L{objects.Node}
2830 @param ninfo: the node to check
2831 @param nresult: the remote results for the node
2832 @param nimg: the node image object
2833 @param vg_name: the configured VG name
2837 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2839 # try to read free memory (from the hypervisor)
2840 hv_info = nresult.get(constants.NV_HVINFO, None)
2841 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2842 _ErrorIf(test, constants.CV_ENODEHV, node,
2843 "rpc call to node failed (hvinfo)")
2846 nimg.mfree = int(hv_info["memory_free"])
2847 except (ValueError, TypeError):
2848 _ErrorIf(True, constants.CV_ENODERPC, node,
2849 "node returned invalid nodeinfo, check hypervisor")
2851 # FIXME: devise a free space model for file based instances as well
2852 if vg_name is not None:
2853 test = (constants.NV_VGLIST not in nresult or
2854 vg_name not in nresult[constants.NV_VGLIST])
2855 _ErrorIf(test, constants.CV_ENODELVM, node,
2856 "node didn't return data for the volume group '%s'"
2857 " - it is either missing or broken", vg_name)
2860 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2861 except (ValueError, TypeError):
2862 _ErrorIf(True, constants.CV_ENODERPC, node,
2863 "node returned invalid LVM info, check LVM status")
2865 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2866 """Gets per-disk status information for all instances.
2868 @type nodelist: list of strings
2869 @param nodelist: Node names
2870 @type node_image: dict of (name, L{objects.Node})
2871 @param node_image: Node objects
2872 @type instanceinfo: dict of (name, L{objects.Instance})
2873 @param instanceinfo: Instance objects
2874 @rtype: {instance: {node: [(succes, payload)]}}
2875 @return: a dictionary of per-instance dictionaries with nodes as
2876 keys and disk information as values; the disk information is a
2877 list of tuples (success, payload)
2880 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2883 node_disks_devonly = {}
2884 diskless_instances = set()
2885 diskless = constants.DT_DISKLESS
2887 for nname in nodelist:
2888 node_instances = list(itertools.chain(node_image[nname].pinst,
2889 node_image[nname].sinst))
2890 diskless_instances.update(inst for inst in node_instances
2891 if instanceinfo[inst].disk_template == diskless)
2892 disks = [(inst, disk)
2893 for inst in node_instances
2894 for disk in instanceinfo[inst].disks]
2897 # No need to collect data
2900 node_disks[nname] = disks
2902 # Creating copies as SetDiskID below will modify the objects and that can
2903 # lead to incorrect data returned from nodes
2904 devonly = [dev.Copy() for (_, dev) in disks]
2907 self.cfg.SetDiskID(dev, nname)
2909 node_disks_devonly[nname] = devonly
2911 assert len(node_disks) == len(node_disks_devonly)
2913 # Collect data from all nodes with disks
2914 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2917 assert len(result) == len(node_disks)
2921 for (nname, nres) in result.items():
2922 disks = node_disks[nname]
2925 # No data from this node
2926 data = len(disks) * [(False, "node offline")]
2929 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2930 "while getting disk information: %s", msg)
2932 # No data from this node
2933 data = len(disks) * [(False, msg)]
2936 for idx, i in enumerate(nres.payload):
2937 if isinstance(i, (tuple, list)) and len(i) == 2:
2940 logging.warning("Invalid result from node %s, entry %d: %s",
2942 data.append((False, "Invalid result from the remote node"))
2944 for ((inst, _), status) in zip(disks, data):
2945 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2947 # Add empty entries for diskless instances.
2948 for inst in diskless_instances:
2949 assert inst not in instdisk
2952 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2953 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2954 compat.all(isinstance(s, (tuple, list)) and
2955 len(s) == 2 for s in statuses)
2956 for inst, nnames in instdisk.items()
2957 for nname, statuses in nnames.items())
2958 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2963 def _SshNodeSelector(group_uuid, all_nodes):
2964 """Create endless iterators for all potential SSH check hosts.
2967 nodes = [node for node in all_nodes
2968 if (node.group != group_uuid and
2970 keyfunc = operator.attrgetter("group")
2972 return map(itertools.cycle,
2973 [sorted(map(operator.attrgetter("name"), names))
2974 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2978 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2979 """Choose which nodes should talk to which other nodes.
2981 We will make nodes contact all nodes in their group, and one node from
2984 @warning: This algorithm has a known issue if one node group is much
2985 smaller than others (e.g. just one node). In such a case all other
2986 nodes will talk to the single node.
2989 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2990 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2992 return (online_nodes,
2993 dict((name, sorted([i.next() for i in sel]))
2994 for name in online_nodes))
2996 def BuildHooksEnv(self):
2999 Cluster-Verify hooks just ran in the post phase and their failure makes
3000 the output be logged in the verify output and the verification to fail.
3004 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3007 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3008 for node in self.my_node_info.values())
3012 def BuildHooksNodes(self):
3013 """Build hooks nodes.
3016 return ([], self.my_node_names)
3018 def Exec(self, feedback_fn):
3019 """Verify integrity of the node group, performing various test on nodes.
3022 # This method has too many local variables. pylint: disable=R0914
3023 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3025 if not self.my_node_names:
3027 feedback_fn("* Empty node group, skipping verification")
3031 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3032 verbose = self.op.verbose
3033 self._feedback_fn = feedback_fn
3035 vg_name = self.cfg.GetVGName()
3036 drbd_helper = self.cfg.GetDRBDHelper()
3037 cluster = self.cfg.GetClusterInfo()
3038 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3039 hypervisors = cluster.enabled_hypervisors
3040 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3042 i_non_redundant = [] # Non redundant instances
3043 i_non_a_balanced = [] # Non auto-balanced instances
3044 i_offline = 0 # Count of offline instances
3045 n_offline = 0 # Count of offline nodes
3046 n_drained = 0 # Count of nodes being drained
3047 node_vol_should = {}
3049 # FIXME: verify OS list
3052 filemap = _ComputeAncillaryFiles(cluster, False)
3054 # do local checksums
3055 master_node = self.master_node = self.cfg.GetMasterNode()
3056 master_ip = self.cfg.GetMasterIP()
3058 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3061 if self.cfg.GetUseExternalMipScript():
3062 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3064 node_verify_param = {
3065 constants.NV_FILELIST:
3066 utils.UniqueSequence(filename
3067 for files in filemap
3068 for filename in files),
3069 constants.NV_NODELIST:
3070 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3071 self.all_node_info.values()),
3072 constants.NV_HYPERVISOR: hypervisors,
3073 constants.NV_HVPARAMS:
3074 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3075 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3076 for node in node_data_list
3077 if not node.offline],
3078 constants.NV_INSTANCELIST: hypervisors,
3079 constants.NV_VERSION: None,
3080 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3081 constants.NV_NODESETUP: None,
3082 constants.NV_TIME: None,
3083 constants.NV_MASTERIP: (master_node, master_ip),
3084 constants.NV_OSLIST: None,
3085 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3086 constants.NV_USERSCRIPTS: user_scripts,
3089 if vg_name is not None:
3090 node_verify_param[constants.NV_VGLIST] = None
3091 node_verify_param[constants.NV_LVLIST] = vg_name
3092 node_verify_param[constants.NV_PVLIST] = [vg_name]
3093 node_verify_param[constants.NV_DRBDLIST] = None
3096 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3099 # FIXME: this needs to be changed per node-group, not cluster-wide
3101 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3102 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3103 bridges.add(default_nicpp[constants.NIC_LINK])
3104 for instance in self.my_inst_info.values():
3105 for nic in instance.nics:
3106 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3107 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3108 bridges.add(full_nic[constants.NIC_LINK])
3111 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3113 # Build our expected cluster state
3114 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3116 vm_capable=node.vm_capable))
3117 for node in node_data_list)
3121 for node in self.all_node_info.values():
3122 path = _SupportsOob(self.cfg, node)
3123 if path and path not in oob_paths:
3124 oob_paths.append(path)
3127 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3129 for instance in self.my_inst_names:
3130 inst_config = self.my_inst_info[instance]
3132 for nname in inst_config.all_nodes:
3133 if nname not in node_image:
3134 gnode = self.NodeImage(name=nname)
3135 gnode.ghost = (nname not in self.all_node_info)
3136 node_image[nname] = gnode
3138 inst_config.MapLVsByNode(node_vol_should)
3140 pnode = inst_config.primary_node
3141 node_image[pnode].pinst.append(instance)
3143 for snode in inst_config.secondary_nodes:
3144 nimg = node_image[snode]
3145 nimg.sinst.append(instance)
3146 if pnode not in nimg.sbp:
3147 nimg.sbp[pnode] = []
3148 nimg.sbp[pnode].append(instance)
3150 # At this point, we have the in-memory data structures complete,
3151 # except for the runtime information, which we'll gather next
3153 # Due to the way our RPC system works, exact response times cannot be
3154 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3155 # time before and after executing the request, we can at least have a time
3157 nvinfo_starttime = time.time()
3158 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3160 self.cfg.GetClusterName())
3161 nvinfo_endtime = time.time()
3163 if self.extra_lv_nodes and vg_name is not None:
3165 self.rpc.call_node_verify(self.extra_lv_nodes,
3166 {constants.NV_LVLIST: vg_name},
3167 self.cfg.GetClusterName())
3169 extra_lv_nvinfo = {}
3171 all_drbd_map = self.cfg.ComputeDRBDMap()
3173 feedback_fn("* Gathering disk information (%s nodes)" %
3174 len(self.my_node_names))
3175 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3178 feedback_fn("* Verifying configuration file consistency")
3180 # If not all nodes are being checked, we need to make sure the master node
3181 # and a non-checked vm_capable node are in the list.
3182 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3184 vf_nvinfo = all_nvinfo.copy()
3185 vf_node_info = list(self.my_node_info.values())
3186 additional_nodes = []
3187 if master_node not in self.my_node_info:
3188 additional_nodes.append(master_node)
3189 vf_node_info.append(self.all_node_info[master_node])
3190 # Add the first vm_capable node we find which is not included
3191 for node in absent_nodes:
3192 nodeinfo = self.all_node_info[node]
3193 if nodeinfo.vm_capable and not nodeinfo.offline:
3194 additional_nodes.append(node)
3195 vf_node_info.append(self.all_node_info[node])
3197 key = constants.NV_FILELIST
3198 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3199 {key: node_verify_param[key]},
3200 self.cfg.GetClusterName()))
3202 vf_nvinfo = all_nvinfo
3203 vf_node_info = self.my_node_info.values()
3205 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3207 feedback_fn("* Verifying node status")
3211 for node_i in node_data_list:
3213 nimg = node_image[node]
3217 feedback_fn("* Skipping offline node %s" % (node,))
3221 if node == master_node:
3223 elif node_i.master_candidate:
3224 ntype = "master candidate"
3225 elif node_i.drained:
3231 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3233 msg = all_nvinfo[node].fail_msg
3234 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3237 nimg.rpc_fail = True
3240 nresult = all_nvinfo[node].payload
3242 nimg.call_ok = self._VerifyNode(node_i, nresult)
3243 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3244 self._VerifyNodeNetwork(node_i, nresult)
3245 self._VerifyNodeUserScripts(node_i, nresult)
3246 self._VerifyOob(node_i, nresult)
3249 self._VerifyNodeLVM(node_i, nresult, vg_name)
3250 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3253 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3254 self._UpdateNodeInstances(node_i, nresult, nimg)
3255 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3256 self._UpdateNodeOS(node_i, nresult, nimg)
3258 if not nimg.os_fail:
3259 if refos_img is None:
3261 self._VerifyNodeOS(node_i, nimg, refos_img)
3262 self._VerifyNodeBridges(node_i, nresult, bridges)
3264 # Check whether all running instancies are primary for the node. (This
3265 # can no longer be done from _VerifyInstance below, since some of the
3266 # wrong instances could be from other node groups.)
3267 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3269 for inst in non_primary_inst:
3270 # FIXME: investigate best way to handle offline insts
3271 if inst.admin_state == constants.ADMINST_OFFLINE:
3273 feedback_fn("* Skipping offline instance %s" % inst.name)
3276 test = inst in self.all_inst_info
3277 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3278 "instance should not run on node %s", node_i.name)
3279 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3280 "node is running unknown instance %s", inst)
3282 for node, result in extra_lv_nvinfo.items():
3283 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3284 node_image[node], vg_name)
3286 feedback_fn("* Verifying instance status")
3287 for instance in self.my_inst_names:
3289 feedback_fn("* Verifying instance %s" % instance)
3290 inst_config = self.my_inst_info[instance]
3291 self._VerifyInstance(instance, inst_config, node_image,
3293 inst_nodes_offline = []
3295 pnode = inst_config.primary_node
3296 pnode_img = node_image[pnode]
3297 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3298 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3299 " primary node failed", instance)
3301 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3303 constants.CV_EINSTANCEBADNODE, instance,
3304 "instance is marked as running and lives on offline node %s",
3305 inst_config.primary_node)
3307 # If the instance is non-redundant we cannot survive losing its primary
3308 # node, so we are not N+1 compliant. On the other hand we have no disk
3309 # templates with more than one secondary so that situation is not well
3311 # FIXME: does not support file-backed instances
3312 if not inst_config.secondary_nodes:
3313 i_non_redundant.append(instance)
3315 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3316 constants.CV_EINSTANCELAYOUT,
3317 instance, "instance has multiple secondary nodes: %s",
3318 utils.CommaJoin(inst_config.secondary_nodes),
3319 code=self.ETYPE_WARNING)
3321 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3322 pnode = inst_config.primary_node
3323 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3324 instance_groups = {}
3326 for node in instance_nodes:
3327 instance_groups.setdefault(self.all_node_info[node].group,
3331 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3332 # Sort so that we always list the primary node first.
3333 for group, nodes in sorted(instance_groups.items(),
3334 key=lambda (_, nodes): pnode in nodes,
3337 self._ErrorIf(len(instance_groups) > 1,
3338 constants.CV_EINSTANCESPLITGROUPS,
3339 instance, "instance has primary and secondary nodes in"
3340 " different groups: %s", utils.CommaJoin(pretty_list),
3341 code=self.ETYPE_WARNING)
3343 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3344 i_non_a_balanced.append(instance)
3346 for snode in inst_config.secondary_nodes:
3347 s_img = node_image[snode]
3348 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3349 snode, "instance %s, connection to secondary node failed",
3353 inst_nodes_offline.append(snode)
3355 # warn that the instance lives on offline nodes
3356 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3357 "instance has offline secondary node(s) %s",
3358 utils.CommaJoin(inst_nodes_offline))
3359 # ... or ghost/non-vm_capable nodes
3360 for node in inst_config.all_nodes:
3361 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3362 instance, "instance lives on ghost node %s", node)
3363 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3364 instance, "instance lives on non-vm_capable node %s", node)
3366 feedback_fn("* Verifying orphan volumes")
3367 reserved = utils.FieldSet(*cluster.reserved_lvs)
3369 # We will get spurious "unknown volume" warnings if any node of this group
3370 # is secondary for an instance whose primary is in another group. To avoid
3371 # them, we find these instances and add their volumes to node_vol_should.
3372 for inst in self.all_inst_info.values():
3373 for secondary in inst.secondary_nodes:
3374 if (secondary in self.my_node_info
3375 and inst.name not in self.my_inst_info):
3376 inst.MapLVsByNode(node_vol_should)
3379 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3381 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3382 feedback_fn("* Verifying N+1 Memory redundancy")
3383 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3385 feedback_fn("* Other Notes")
3387 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3388 % len(i_non_redundant))
3390 if i_non_a_balanced:
3391 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3392 % len(i_non_a_balanced))
3395 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3398 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3401 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3405 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3406 """Analyze the post-hooks' result
3408 This method analyses the hook result, handles it, and sends some
3409 nicely-formatted feedback back to the user.
3411 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3412 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3413 @param hooks_results: the results of the multi-node hooks rpc call
3414 @param feedback_fn: function used send feedback back to the caller
3415 @param lu_result: previous Exec result
3416 @return: the new Exec result, based on the previous result
3420 # We only really run POST phase hooks, only for non-empty groups,
3421 # and are only interested in their results
3422 if not self.my_node_names:
3425 elif phase == constants.HOOKS_PHASE_POST:
3426 # Used to change hooks' output to proper indentation
3427 feedback_fn("* Hooks Results")
3428 assert hooks_results, "invalid result from hooks"
3430 for node_name in hooks_results:
3431 res = hooks_results[node_name]
3433 test = msg and not res.offline
3434 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3435 "Communication failure in hooks execution: %s", msg)
3436 if res.offline or msg:
3437 # No need to investigate payload if node is offline or gave
3440 for script, hkr, output in res.payload:
3441 test = hkr == constants.HKR_FAIL
3442 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3443 "Script %s failed, output:", script)
3445 output = self._HOOKS_INDENT_RE.sub(" ", output)
3446 feedback_fn("%s" % output)
3452 class LUClusterVerifyDisks(NoHooksLU):
3453 """Verifies the cluster disks status.
3458 def ExpandNames(self):
3459 self.share_locks = _ShareAll()
3460 self.needed_locks = {
3461 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3464 def Exec(self, feedback_fn):
3465 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3467 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3468 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3469 for group in group_names])
3472 class LUGroupVerifyDisks(NoHooksLU):
3473 """Verifies the status of all disks in a node group.
3478 def ExpandNames(self):
3479 # Raises errors.OpPrereqError on its own if group can't be found
3480 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3482 self.share_locks = _ShareAll()
3483 self.needed_locks = {
3484 locking.LEVEL_INSTANCE: [],
3485 locking.LEVEL_NODEGROUP: [],
3486 locking.LEVEL_NODE: [],
3489 def DeclareLocks(self, level):
3490 if level == locking.LEVEL_INSTANCE:
3491 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3493 # Lock instances optimistically, needs verification once node and group
3494 # locks have been acquired
3495 self.needed_locks[locking.LEVEL_INSTANCE] = \
3496 self.cfg.GetNodeGroupInstances(self.group_uuid)
3498 elif level == locking.LEVEL_NODEGROUP:
3499 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3501 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3502 set([self.group_uuid] +
3503 # Lock all groups used by instances optimistically; this requires
3504 # going via the node before it's locked, requiring verification
3507 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3508 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3510 elif level == locking.LEVEL_NODE:
3511 # This will only lock the nodes in the group to be verified which contain
3513 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3514 self._LockInstancesNodes()
3516 # Lock all nodes in group to be verified
3517 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3518 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3519 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3521 def CheckPrereq(self):
3522 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3523 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3524 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3526 assert self.group_uuid in owned_groups
3528 # Check if locked instances are still correct
3529 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3531 # Get instance information
3532 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3534 # Check if node groups for locked instances are still correct
3535 _CheckInstancesNodeGroups(self.cfg, self.instances,
3536 owned_groups, owned_nodes, self.group_uuid)
3538 def Exec(self, feedback_fn):
3539 """Verify integrity of cluster disks.
3541 @rtype: tuple of three items
3542 @return: a tuple of (dict of node-to-node_error, list of instances
3543 which need activate-disks, dict of instance: (node, volume) for
3548 res_instances = set()
3551 nv_dict = _MapInstanceDisksToNodes([inst
3552 for inst in self.instances.values()
3553 if inst.admin_state == constants.ADMINST_UP])
3556 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3557 set(self.cfg.GetVmCapableNodeList()))
3559 node_lvs = self.rpc.call_lv_list(nodes, [])
3561 for (node, node_res) in node_lvs.items():
3562 if node_res.offline:
3565 msg = node_res.fail_msg
3567 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3568 res_nodes[node] = msg
3571 for lv_name, (_, _, lv_online) in node_res.payload.items():
3572 inst = nv_dict.pop((node, lv_name), None)
3573 if not (lv_online or inst is None):
3574 res_instances.add(inst)
3576 # any leftover items in nv_dict are missing LVs, let's arrange the data
3578 for key, inst in nv_dict.iteritems():
3579 res_missing.setdefault(inst, []).append(list(key))
3581 return (res_nodes, list(res_instances), res_missing)
3584 class LUClusterRepairDiskSizes(NoHooksLU):
3585 """Verifies the cluster disks sizes.
3590 def ExpandNames(self):
3591 if self.op.instances:
3592 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3593 self.needed_locks = {
3594 locking.LEVEL_NODE_RES: [],
3595 locking.LEVEL_INSTANCE: self.wanted_names,
3597 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3599 self.wanted_names = None
3600 self.needed_locks = {
3601 locking.LEVEL_NODE_RES: locking.ALL_SET,
3602 locking.LEVEL_INSTANCE: locking.ALL_SET,
3604 self.share_locks = {
3605 locking.LEVEL_NODE_RES: 1,
3606 locking.LEVEL_INSTANCE: 0,
3609 def DeclareLocks(self, level):
3610 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3611 self._LockInstancesNodes(primary_only=True, level=level)
3613 def CheckPrereq(self):
3614 """Check prerequisites.
3616 This only checks the optional instance list against the existing names.
3619 if self.wanted_names is None:
3620 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3622 self.wanted_instances = \
3623 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3625 def _EnsureChildSizes(self, disk):
3626 """Ensure children of the disk have the needed disk size.
3628 This is valid mainly for DRBD8 and fixes an issue where the
3629 children have smaller disk size.
3631 @param disk: an L{ganeti.objects.Disk} object
3634 if disk.dev_type == constants.LD_DRBD8:
3635 assert disk.children, "Empty children for DRBD8?"
3636 fchild = disk.children[0]
3637 mismatch = fchild.size < disk.size
3639 self.LogInfo("Child disk has size %d, parent %d, fixing",
3640 fchild.size, disk.size)
3641 fchild.size = disk.size
3643 # and we recurse on this child only, not on the metadev
3644 return self._EnsureChildSizes(fchild) or mismatch
3648 def Exec(self, feedback_fn):
3649 """Verify the size of cluster disks.
3652 # TODO: check child disks too
3653 # TODO: check differences in size between primary/secondary nodes
3655 for instance in self.wanted_instances:
3656 pnode = instance.primary_node
3657 if pnode not in per_node_disks:
3658 per_node_disks[pnode] = []
3659 for idx, disk in enumerate(instance.disks):
3660 per_node_disks[pnode].append((instance, idx, disk))
3662 assert not (frozenset(per_node_disks.keys()) -
3663 self.owned_locks(locking.LEVEL_NODE_RES)), \
3664 "Not owning correct locks"
3665 assert not self.owned_locks(locking.LEVEL_NODE)
3668 for node, dskl in per_node_disks.items():
3669 newl = [v[2].Copy() for v in dskl]
3671 self.cfg.SetDiskID(dsk, node)
3672 result = self.rpc.call_blockdev_getsize(node, newl)
3674 self.LogWarning("Failure in blockdev_getsize call to node"
3675 " %s, ignoring", node)
3677 if len(result.payload) != len(dskl):
3678 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3679 " result.payload=%s", node, len(dskl), result.payload)
3680 self.LogWarning("Invalid result from node %s, ignoring node results",
3683 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3685 self.LogWarning("Disk %d of instance %s did not return size"
3686 " information, ignoring", idx, instance.name)
3688 if not isinstance(size, (int, long)):
3689 self.LogWarning("Disk %d of instance %s did not return valid"
3690 " size information, ignoring", idx, instance.name)
3693 if size != disk.size:
3694 self.LogInfo("Disk %d of instance %s has mismatched size,"
3695 " correcting: recorded %d, actual %d", idx,
3696 instance.name, disk.size, size)
3698 self.cfg.Update(instance, feedback_fn)
3699 changed.append((instance.name, idx, size))
3700 if self._EnsureChildSizes(disk):
3701 self.cfg.Update(instance, feedback_fn)
3702 changed.append((instance.name, idx, disk.size))
3706 class LUClusterRename(LogicalUnit):
3707 """Rename the cluster.
3710 HPATH = "cluster-rename"
3711 HTYPE = constants.HTYPE_CLUSTER
3713 def BuildHooksEnv(self):
3718 "OP_TARGET": self.cfg.GetClusterName(),
3719 "NEW_NAME": self.op.name,
3722 def BuildHooksNodes(self):
3723 """Build hooks nodes.
3726 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3728 def CheckPrereq(self):
3729 """Verify that the passed name is a valid one.
3732 hostname = netutils.GetHostname(name=self.op.name,
3733 family=self.cfg.GetPrimaryIPFamily())
3735 new_name = hostname.name
3736 self.ip = new_ip = hostname.ip
3737 old_name = self.cfg.GetClusterName()
3738 old_ip = self.cfg.GetMasterIP()
3739 if new_name == old_name and new_ip == old_ip:
3740 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3741 " cluster has changed",
3743 if new_ip != old_ip:
3744 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3745 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3746 " reachable on the network" %
3747 new_ip, errors.ECODE_NOTUNIQUE)
3749 self.op.name = new_name
3751 def Exec(self, feedback_fn):
3752 """Rename the cluster.
3755 clustername = self.op.name
3758 # shutdown the master IP
3759 master_params = self.cfg.GetMasterNetworkParameters()
3760 ems = self.cfg.GetUseExternalMipScript()
3761 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3763 result.Raise("Could not disable the master role")
3766 cluster = self.cfg.GetClusterInfo()
3767 cluster.cluster_name = clustername
3768 cluster.master_ip = new_ip
3769 self.cfg.Update(cluster, feedback_fn)
3771 # update the known hosts file
3772 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3773 node_list = self.cfg.GetOnlineNodeList()
3775 node_list.remove(master_params.name)
3778 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3780 master_params.ip = new_ip
3781 result = self.rpc.call_node_activate_master_ip(master_params.name,
3783 msg = result.fail_msg
3785 self.LogWarning("Could not re-enable the master role on"
3786 " the master, please restart manually: %s", msg)
3791 def _ValidateNetmask(cfg, netmask):
3792 """Checks if a netmask is valid.
3794 @type cfg: L{config.ConfigWriter}
3795 @param cfg: The cluster configuration
3797 @param netmask: the netmask to be verified
3798 @raise errors.OpPrereqError: if the validation fails
3801 ip_family = cfg.GetPrimaryIPFamily()
3803 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3804 except errors.ProgrammerError:
3805 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3807 if not ipcls.ValidateNetmask(netmask):
3808 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3812 class LUClusterSetParams(LogicalUnit):
3813 """Change the parameters of the cluster.
3816 HPATH = "cluster-modify"
3817 HTYPE = constants.HTYPE_CLUSTER
3820 def CheckArguments(self):
3824 if self.op.uid_pool:
3825 uidpool.CheckUidPool(self.op.uid_pool)
3827 if self.op.add_uids:
3828 uidpool.CheckUidPool(self.op.add_uids)
3830 if self.op.remove_uids:
3831 uidpool.CheckUidPool(self.op.remove_uids)
3833 if self.op.master_netmask is not None:
3834 _ValidateNetmask(self.cfg, self.op.master_netmask)
3836 if self.op.diskparams:
3837 for dt_params in self.op.diskparams.values():
3838 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3840 def ExpandNames(self):
3841 # FIXME: in the future maybe other cluster params won't require checking on
3842 # all nodes to be modified.
3843 self.needed_locks = {
3844 locking.LEVEL_NODE: locking.ALL_SET,
3845 locking.LEVEL_INSTANCE: locking.ALL_SET,
3846 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3848 self.share_locks = {
3849 locking.LEVEL_NODE: 1,
3850 locking.LEVEL_INSTANCE: 1,
3851 locking.LEVEL_NODEGROUP: 1,
3854 def BuildHooksEnv(self):
3859 "OP_TARGET": self.cfg.GetClusterName(),
3860 "NEW_VG_NAME": self.op.vg_name,
3863 def BuildHooksNodes(self):
3864 """Build hooks nodes.
3867 mn = self.cfg.GetMasterNode()
3870 def CheckPrereq(self):
3871 """Check prerequisites.
3873 This checks whether the given params don't conflict and
3874 if the given volume group is valid.
3877 if self.op.vg_name is not None and not self.op.vg_name:
3878 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3879 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3880 " instances exist", errors.ECODE_INVAL)
3882 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3883 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3884 raise errors.OpPrereqError("Cannot disable drbd helper while"
3885 " drbd-based instances exist",
3888 node_list = self.owned_locks(locking.LEVEL_NODE)
3890 # if vg_name not None, checks given volume group on all nodes
3892 vglist = self.rpc.call_vg_list(node_list)
3893 for node in node_list:
3894 msg = vglist[node].fail_msg
3896 # ignoring down node
3897 self.LogWarning("Error while gathering data on node %s"
3898 " (ignoring node): %s", node, msg)
3900 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3902 constants.MIN_VG_SIZE)
3904 raise errors.OpPrereqError("Error on node '%s': %s" %
3905 (node, vgstatus), errors.ECODE_ENVIRON)
3907 if self.op.drbd_helper:
3908 # checks given drbd helper on all nodes
3909 helpers = self.rpc.call_drbd_helper(node_list)
3910 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3912 self.LogInfo("Not checking drbd helper on offline node %s", node)
3914 msg = helpers[node].fail_msg
3916 raise errors.OpPrereqError("Error checking drbd helper on node"
3917 " '%s': %s" % (node, msg),
3918 errors.ECODE_ENVIRON)
3919 node_helper = helpers[node].payload
3920 if node_helper != self.op.drbd_helper:
3921 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3922 (node, node_helper), errors.ECODE_ENVIRON)
3924 self.cluster = cluster = self.cfg.GetClusterInfo()
3925 # validate params changes
3926 if self.op.beparams:
3927 objects.UpgradeBeParams(self.op.beparams)
3928 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3929 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3931 if self.op.ndparams:
3932 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3933 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3935 # TODO: we need a more general way to handle resetting
3936 # cluster-level parameters to default values
3937 if self.new_ndparams["oob_program"] == "":
3938 self.new_ndparams["oob_program"] = \
3939 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3941 if self.op.hv_state:
3942 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3943 self.cluster.hv_state_static)
3944 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3945 for hv, values in new_hv_state.items())
3947 if self.op.disk_state:
3948 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3949 self.cluster.disk_state_static)
3950 self.new_disk_state = \
3951 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3952 for name, values in svalues.items()))
3953 for storage, svalues in new_disk_state.items())
3956 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3959 all_instances = self.cfg.GetAllInstancesInfo().values()
3961 for group in self.cfg.GetAllNodeGroupsInfo().values():
3962 instances = frozenset([inst for inst in all_instances
3963 if compat.any(node in group.members
3964 for node in inst.all_nodes)])
3965 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3966 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3968 new_ipolicy, instances)
3970 violations.update(new)
3973 self.LogWarning("After the ipolicy change the following instances"
3974 " violate them: %s",
3975 utils.CommaJoin(violations))
3977 if self.op.nicparams:
3978 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3979 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3980 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3983 # check all instances for consistency
3984 for instance in self.cfg.GetAllInstancesInfo().values():
3985 for nic_idx, nic in enumerate(instance.nics):
3986 params_copy = copy.deepcopy(nic.nicparams)
3987 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3989 # check parameter syntax
3991 objects.NIC.CheckParameterSyntax(params_filled)
3992 except errors.ConfigurationError, err:
3993 nic_errors.append("Instance %s, nic/%d: %s" %
3994 (instance.name, nic_idx, err))
3996 # if we're moving instances to routed, check that they have an ip
3997 target_mode = params_filled[constants.NIC_MODE]
3998 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3999 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4000 " address" % (instance.name, nic_idx))
4002 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4003 "\n".join(nic_errors))
4005 # hypervisor list/parameters
4006 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4007 if self.op.hvparams:
4008 for hv_name, hv_dict in self.op.hvparams.items():
4009 if hv_name not in self.new_hvparams:
4010 self.new_hvparams[hv_name] = hv_dict
4012 self.new_hvparams[hv_name].update(hv_dict)
4014 # disk template parameters
4015 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4016 if self.op.diskparams:
4017 for dt_name, dt_params in self.op.diskparams.items():
4018 if dt_name not in self.op.diskparams:
4019 self.new_diskparams[dt_name] = dt_params
4021 self.new_diskparams[dt_name].update(dt_params)
4023 # os hypervisor parameters
4024 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4026 for os_name, hvs in self.op.os_hvp.items():
4027 if os_name not in self.new_os_hvp:
4028 self.new_os_hvp[os_name] = hvs
4030 for hv_name, hv_dict in hvs.items():
4031 if hv_name not in self.new_os_hvp[os_name]:
4032 self.new_os_hvp[os_name][hv_name] = hv_dict
4034 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4037 self.new_osp = objects.FillDict(cluster.osparams, {})
4038 if self.op.osparams:
4039 for os_name, osp in self.op.osparams.items():
4040 if os_name not in self.new_osp:
4041 self.new_osp[os_name] = {}
4043 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4046 if not self.new_osp[os_name]:
4047 # we removed all parameters
4048 del self.new_osp[os_name]
4050 # check the parameter validity (remote check)
4051 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4052 os_name, self.new_osp[os_name])
4054 # changes to the hypervisor list
4055 if self.op.enabled_hypervisors is not None:
4056 self.hv_list = self.op.enabled_hypervisors
4057 for hv in self.hv_list:
4058 # if the hypervisor doesn't already exist in the cluster
4059 # hvparams, we initialize it to empty, and then (in both
4060 # cases) we make sure to fill the defaults, as we might not
4061 # have a complete defaults list if the hypervisor wasn't
4063 if hv not in new_hvp:
4065 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4066 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4068 self.hv_list = cluster.enabled_hypervisors
4070 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4071 # either the enabled list has changed, or the parameters have, validate
4072 for hv_name, hv_params in self.new_hvparams.items():
4073 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4074 (self.op.enabled_hypervisors and
4075 hv_name in self.op.enabled_hypervisors)):
4076 # either this is a new hypervisor, or its parameters have changed
4077 hv_class = hypervisor.GetHypervisor(hv_name)
4078 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4079 hv_class.CheckParameterSyntax(hv_params)
4080 _CheckHVParams(self, node_list, hv_name, hv_params)
4083 # no need to check any newly-enabled hypervisors, since the
4084 # defaults have already been checked in the above code-block
4085 for os_name, os_hvp in self.new_os_hvp.items():
4086 for hv_name, hv_params in os_hvp.items():
4087 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4088 # we need to fill in the new os_hvp on top of the actual hv_p
4089 cluster_defaults = self.new_hvparams.get(hv_name, {})
4090 new_osp = objects.FillDict(cluster_defaults, hv_params)
4091 hv_class = hypervisor.GetHypervisor(hv_name)
4092 hv_class.CheckParameterSyntax(new_osp)
4093 _CheckHVParams(self, node_list, hv_name, new_osp)
4095 if self.op.default_iallocator:
4096 alloc_script = utils.FindFile(self.op.default_iallocator,
4097 constants.IALLOCATOR_SEARCH_PATH,
4099 if alloc_script is None:
4100 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4101 " specified" % self.op.default_iallocator,
4104 def Exec(self, feedback_fn):
4105 """Change the parameters of the cluster.
4108 if self.op.vg_name is not None:
4109 new_volume = self.op.vg_name
4112 if new_volume != self.cfg.GetVGName():
4113 self.cfg.SetVGName(new_volume)
4115 feedback_fn("Cluster LVM configuration already in desired"
4116 " state, not changing")
4117 if self.op.drbd_helper is not None:
4118 new_helper = self.op.drbd_helper
4121 if new_helper != self.cfg.GetDRBDHelper():
4122 self.cfg.SetDRBDHelper(new_helper)
4124 feedback_fn("Cluster DRBD helper already in desired state,"
4126 if self.op.hvparams:
4127 self.cluster.hvparams = self.new_hvparams
4129 self.cluster.os_hvp = self.new_os_hvp
4130 if self.op.enabled_hypervisors is not None:
4131 self.cluster.hvparams = self.new_hvparams
4132 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4133 if self.op.beparams:
4134 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4135 if self.op.nicparams:
4136 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4138 self.cluster.ipolicy = self.new_ipolicy
4139 if self.op.osparams:
4140 self.cluster.osparams = self.new_osp
4141 if self.op.ndparams:
4142 self.cluster.ndparams = self.new_ndparams
4143 if self.op.diskparams:
4144 self.cluster.diskparams = self.new_diskparams
4145 if self.op.hv_state:
4146 self.cluster.hv_state_static = self.new_hv_state
4147 if self.op.disk_state:
4148 self.cluster.disk_state_static = self.new_disk_state
4150 if self.op.candidate_pool_size is not None:
4151 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4152 # we need to update the pool size here, otherwise the save will fail
4153 _AdjustCandidatePool(self, [])
4155 if self.op.maintain_node_health is not None:
4156 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4157 feedback_fn("Note: CONFD was disabled at build time, node health"
4158 " maintenance is not useful (still enabling it)")
4159 self.cluster.maintain_node_health = self.op.maintain_node_health
4161 if self.op.prealloc_wipe_disks is not None:
4162 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4164 if self.op.add_uids is not None:
4165 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4167 if self.op.remove_uids is not None:
4168 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4170 if self.op.uid_pool is not None:
4171 self.cluster.uid_pool = self.op.uid_pool
4173 if self.op.default_iallocator is not None:
4174 self.cluster.default_iallocator = self.op.default_iallocator
4176 if self.op.reserved_lvs is not None:
4177 self.cluster.reserved_lvs = self.op.reserved_lvs
4179 if self.op.use_external_mip_script is not None:
4180 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4182 def helper_os(aname, mods, desc):
4184 lst = getattr(self.cluster, aname)
4185 for key, val in mods:
4186 if key == constants.DDM_ADD:
4188 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4191 elif key == constants.DDM_REMOVE:
4195 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4197 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4199 if self.op.hidden_os:
4200 helper_os("hidden_os", self.op.hidden_os, "hidden")
4202 if self.op.blacklisted_os:
4203 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4205 if self.op.master_netdev:
4206 master_params = self.cfg.GetMasterNetworkParameters()
4207 ems = self.cfg.GetUseExternalMipScript()
4208 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4209 self.cluster.master_netdev)
4210 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4212 result.Raise("Could not disable the master ip")
4213 feedback_fn("Changing master_netdev from %s to %s" %
4214 (master_params.netdev, self.op.master_netdev))
4215 self.cluster.master_netdev = self.op.master_netdev
4217 if self.op.master_netmask:
4218 master_params = self.cfg.GetMasterNetworkParameters()
4219 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4220 result = self.rpc.call_node_change_master_netmask(master_params.name,
4221 master_params.netmask,
4222 self.op.master_netmask,
4224 master_params.netdev)
4226 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4229 self.cluster.master_netmask = self.op.master_netmask
4231 self.cfg.Update(self.cluster, feedback_fn)
4233 if self.op.master_netdev:
4234 master_params = self.cfg.GetMasterNetworkParameters()
4235 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4236 self.op.master_netdev)
4237 ems = self.cfg.GetUseExternalMipScript()
4238 result = self.rpc.call_node_activate_master_ip(master_params.name,
4241 self.LogWarning("Could not re-enable the master ip on"
4242 " the master, please restart manually: %s",
4246 def _UploadHelper(lu, nodes, fname):
4247 """Helper for uploading a file and showing warnings.
4250 if os.path.exists(fname):
4251 result = lu.rpc.call_upload_file(nodes, fname)
4252 for to_node, to_result in result.items():
4253 msg = to_result.fail_msg
4255 msg = ("Copy of file %s to node %s failed: %s" %
4256 (fname, to_node, msg))
4257 lu.proc.LogWarning(msg)
4260 def _ComputeAncillaryFiles(cluster, redist):
4261 """Compute files external to Ganeti which need to be consistent.
4263 @type redist: boolean
4264 @param redist: Whether to include files which need to be redistributed
4267 # Compute files for all nodes
4269 constants.SSH_KNOWN_HOSTS_FILE,
4270 constants.CONFD_HMAC_KEY,
4271 constants.CLUSTER_DOMAIN_SECRET_FILE,
4272 constants.SPICE_CERT_FILE,
4273 constants.SPICE_CACERT_FILE,
4274 constants.RAPI_USERS_FILE,
4278 files_all.update(constants.ALL_CERT_FILES)
4279 files_all.update(ssconf.SimpleStore().GetFileList())
4281 # we need to ship at least the RAPI certificate
4282 files_all.add(constants.RAPI_CERT_FILE)
4284 if cluster.modify_etc_hosts:
4285 files_all.add(constants.ETC_HOSTS)
4287 # Files which are optional, these must:
4288 # - be present in one other category as well
4289 # - either exist or not exist on all nodes of that category (mc, vm all)
4291 constants.RAPI_USERS_FILE,
4294 # Files which should only be on master candidates
4298 files_mc.add(constants.CLUSTER_CONF_FILE)
4300 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4302 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4304 # Files which should only be on VM-capable nodes
4305 files_vm = set(filename
4306 for hv_name in cluster.enabled_hypervisors
4307 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4309 files_opt |= set(filename
4310 for hv_name in cluster.enabled_hypervisors
4311 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4313 # Filenames in each category must be unique
4314 all_files_set = files_all | files_mc | files_vm
4315 assert (len(all_files_set) ==
4316 sum(map(len, [files_all, files_mc, files_vm]))), \
4317 "Found file listed in more than one file list"
4319 # Optional files must be present in one other category
4320 assert all_files_set.issuperset(files_opt), \
4321 "Optional file not in a different required list"
4323 return (files_all, files_opt, files_mc, files_vm)
4326 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4327 """Distribute additional files which are part of the cluster configuration.
4329 ConfigWriter takes care of distributing the config and ssconf files, but
4330 there are more files which should be distributed to all nodes. This function
4331 makes sure those are copied.
4333 @param lu: calling logical unit
4334 @param additional_nodes: list of nodes not in the config to distribute to
4335 @type additional_vm: boolean
4336 @param additional_vm: whether the additional nodes are vm-capable or not
4339 # Gather target nodes
4340 cluster = lu.cfg.GetClusterInfo()
4341 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4343 online_nodes = lu.cfg.GetOnlineNodeList()
4344 vm_nodes = lu.cfg.GetVmCapableNodeList()
4346 if additional_nodes is not None:
4347 online_nodes.extend(additional_nodes)
4349 vm_nodes.extend(additional_nodes)
4351 # Never distribute to master node
4352 for nodelist in [online_nodes, vm_nodes]:
4353 if master_info.name in nodelist:
4354 nodelist.remove(master_info.name)
4357 (files_all, _, files_mc, files_vm) = \
4358 _ComputeAncillaryFiles(cluster, True)
4360 # Never re-distribute configuration file from here
4361 assert not (constants.CLUSTER_CONF_FILE in files_all or
4362 constants.CLUSTER_CONF_FILE in files_vm)
4363 assert not files_mc, "Master candidates not handled in this function"
4366 (online_nodes, files_all),
4367 (vm_nodes, files_vm),
4371 for (node_list, files) in filemap:
4373 _UploadHelper(lu, node_list, fname)
4376 class LUClusterRedistConf(NoHooksLU):
4377 """Force the redistribution of cluster configuration.
4379 This is a very simple LU.
4384 def ExpandNames(self):
4385 self.needed_locks = {
4386 locking.LEVEL_NODE: locking.ALL_SET,
4388 self.share_locks[locking.LEVEL_NODE] = 1
4390 def Exec(self, feedback_fn):
4391 """Redistribute the configuration.
4394 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4395 _RedistributeAncillaryFiles(self)
4398 class LUClusterActivateMasterIp(NoHooksLU):
4399 """Activate the master IP on the master node.
4402 def Exec(self, feedback_fn):
4403 """Activate the master IP.
4406 master_params = self.cfg.GetMasterNetworkParameters()
4407 ems = self.cfg.GetUseExternalMipScript()
4408 result = self.rpc.call_node_activate_master_ip(master_params.name,
4410 result.Raise("Could not activate the master IP")
4413 class LUClusterDeactivateMasterIp(NoHooksLU):
4414 """Deactivate the master IP on the master node.
4417 def Exec(self, feedback_fn):
4418 """Deactivate the master IP.
4421 master_params = self.cfg.GetMasterNetworkParameters()
4422 ems = self.cfg.GetUseExternalMipScript()
4423 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4425 result.Raise("Could not deactivate the master IP")
4428 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4429 """Sleep and poll for an instance's disk to sync.
4432 if not instance.disks or disks is not None and not disks:
4435 disks = _ExpandCheckDisks(instance, disks)
4438 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4440 node = instance.primary_node
4443 lu.cfg.SetDiskID(dev, node)
4445 # TODO: Convert to utils.Retry
4448 degr_retries = 10 # in seconds, as we sleep 1 second each time
4452 cumul_degraded = False
4453 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4454 msg = rstats.fail_msg
4456 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4459 raise errors.RemoteError("Can't contact node %s for mirror data,"
4460 " aborting." % node)
4463 rstats = rstats.payload
4465 for i, mstat in enumerate(rstats):
4467 lu.LogWarning("Can't compute data for node %s/%s",
4468 node, disks[i].iv_name)
4471 cumul_degraded = (cumul_degraded or
4472 (mstat.is_degraded and mstat.sync_percent is None))
4473 if mstat.sync_percent is not None:
4475 if mstat.estimated_time is not None:
4476 rem_time = ("%s remaining (estimated)" %
4477 utils.FormatSeconds(mstat.estimated_time))
4478 max_time = mstat.estimated_time
4480 rem_time = "no time estimate"
4481 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4482 (disks[i].iv_name, mstat.sync_percent, rem_time))
4484 # if we're done but degraded, let's do a few small retries, to
4485 # make sure we see a stable and not transient situation; therefore
4486 # we force restart of the loop
4487 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4488 logging.info("Degraded disks found, %d retries left", degr_retries)
4496 time.sleep(min(60, max_time))
4499 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4500 return not cumul_degraded
4503 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4504 """Check that mirrors are not degraded.
4506 The ldisk parameter, if True, will change the test from the
4507 is_degraded attribute (which represents overall non-ok status for
4508 the device(s)) to the ldisk (representing the local storage status).
4511 lu.cfg.SetDiskID(dev, node)
4515 if on_primary or dev.AssembleOnSecondary():
4516 rstats = lu.rpc.call_blockdev_find(node, dev)
4517 msg = rstats.fail_msg
4519 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4521 elif not rstats.payload:
4522 lu.LogWarning("Can't find disk on node %s", node)
4526 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4528 result = result and not rstats.payload.is_degraded
4531 for child in dev.children:
4532 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4537 class LUOobCommand(NoHooksLU):
4538 """Logical unit for OOB handling.
4542 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4544 def ExpandNames(self):
4545 """Gather locks we need.
4548 if self.op.node_names:
4549 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4550 lock_names = self.op.node_names
4552 lock_names = locking.ALL_SET
4554 self.needed_locks = {
4555 locking.LEVEL_NODE: lock_names,
4558 def CheckPrereq(self):
4559 """Check prerequisites.
4562 - the node exists in the configuration
4565 Any errors are signaled by raising errors.OpPrereqError.
4569 self.master_node = self.cfg.GetMasterNode()
4571 assert self.op.power_delay >= 0.0
4573 if self.op.node_names:
4574 if (self.op.command in self._SKIP_MASTER and
4575 self.master_node in self.op.node_names):
4576 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4577 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4579 if master_oob_handler:
4580 additional_text = ("run '%s %s %s' if you want to operate on the"
4581 " master regardless") % (master_oob_handler,
4585 additional_text = "it does not support out-of-band operations"
4587 raise errors.OpPrereqError(("Operating on the master node %s is not"
4588 " allowed for %s; %s") %
4589 (self.master_node, self.op.command,
4590 additional_text), errors.ECODE_INVAL)
4592 self.op.node_names = self.cfg.GetNodeList()
4593 if self.op.command in self._SKIP_MASTER:
4594 self.op.node_names.remove(self.master_node)
4596 if self.op.command in self._SKIP_MASTER:
4597 assert self.master_node not in self.op.node_names
4599 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4601 raise errors.OpPrereqError("Node %s not found" % node_name,
4604 self.nodes.append(node)
4606 if (not self.op.ignore_status and
4607 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4608 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4609 " not marked offline") % node_name,
4612 def Exec(self, feedback_fn):
4613 """Execute OOB and return result if we expect any.
4616 master_node = self.master_node
4619 for idx, node in enumerate(utils.NiceSort(self.nodes,
4620 key=lambda node: node.name)):
4621 node_entry = [(constants.RS_NORMAL, node.name)]
4622 ret.append(node_entry)
4624 oob_program = _SupportsOob(self.cfg, node)
4627 node_entry.append((constants.RS_UNAVAIL, None))
4630 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4631 self.op.command, oob_program, node.name)
4632 result = self.rpc.call_run_oob(master_node, oob_program,
4633 self.op.command, node.name,
4637 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4638 node.name, result.fail_msg)
4639 node_entry.append((constants.RS_NODATA, None))
4642 self._CheckPayload(result)
4643 except errors.OpExecError, err:
4644 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4646 node_entry.append((constants.RS_NODATA, None))
4648 if self.op.command == constants.OOB_HEALTH:
4649 # For health we should log important events
4650 for item, status in result.payload:
4651 if status in [constants.OOB_STATUS_WARNING,
4652 constants.OOB_STATUS_CRITICAL]:
4653 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4654 item, node.name, status)
4656 if self.op.command == constants.OOB_POWER_ON:
4658 elif self.op.command == constants.OOB_POWER_OFF:
4659 node.powered = False
4660 elif self.op.command == constants.OOB_POWER_STATUS:
4661 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4662 if powered != node.powered:
4663 logging.warning(("Recorded power state (%s) of node '%s' does not"
4664 " match actual power state (%s)"), node.powered,
4667 # For configuration changing commands we should update the node
4668 if self.op.command in (constants.OOB_POWER_ON,
4669 constants.OOB_POWER_OFF):
4670 self.cfg.Update(node, feedback_fn)
4672 node_entry.append((constants.RS_NORMAL, result.payload))
4674 if (self.op.command == constants.OOB_POWER_ON and
4675 idx < len(self.nodes) - 1):
4676 time.sleep(self.op.power_delay)
4680 def _CheckPayload(self, result):
4681 """Checks if the payload is valid.
4683 @param result: RPC result
4684 @raises errors.OpExecError: If payload is not valid
4688 if self.op.command == constants.OOB_HEALTH:
4689 if not isinstance(result.payload, list):
4690 errs.append("command 'health' is expected to return a list but got %s" %
4691 type(result.payload))
4693 for item, status in result.payload:
4694 if status not in constants.OOB_STATUSES:
4695 errs.append("health item '%s' has invalid status '%s'" %
4698 if self.op.command == constants.OOB_POWER_STATUS:
4699 if not isinstance(result.payload, dict):
4700 errs.append("power-status is expected to return a dict but got %s" %
4701 type(result.payload))
4703 if self.op.command in [
4704 constants.OOB_POWER_ON,
4705 constants.OOB_POWER_OFF,
4706 constants.OOB_POWER_CYCLE,
4708 if result.payload is not None:
4709 errs.append("%s is expected to not return payload but got '%s'" %
4710 (self.op.command, result.payload))
4713 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4714 utils.CommaJoin(errs))
4717 class _OsQuery(_QueryBase):
4718 FIELDS = query.OS_FIELDS
4720 def ExpandNames(self, lu):
4721 # Lock all nodes in shared mode
4722 # Temporary removal of locks, should be reverted later
4723 # TODO: reintroduce locks when they are lighter-weight
4724 lu.needed_locks = {}
4725 #self.share_locks[locking.LEVEL_NODE] = 1
4726 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4728 # The following variables interact with _QueryBase._GetNames
4730 self.wanted = self.names
4732 self.wanted = locking.ALL_SET
4734 self.do_locking = self.use_locking
4736 def DeclareLocks(self, lu, level):
4740 def _DiagnoseByOS(rlist):
4741 """Remaps a per-node return list into an a per-os per-node dictionary
4743 @param rlist: a map with node names as keys and OS objects as values
4746 @return: a dictionary with osnames as keys and as value another
4747 map, with nodes as keys and tuples of (path, status, diagnose,
4748 variants, parameters, api_versions) as values, eg::
4750 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4751 (/srv/..., False, "invalid api")],
4752 "node2": [(/srv/..., True, "", [], [])]}
4757 # we build here the list of nodes that didn't fail the RPC (at RPC
4758 # level), so that nodes with a non-responding node daemon don't
4759 # make all OSes invalid
4760 good_nodes = [node_name for node_name in rlist
4761 if not rlist[node_name].fail_msg]
4762 for node_name, nr in rlist.items():
4763 if nr.fail_msg or not nr.payload:
4765 for (name, path, status, diagnose, variants,
4766 params, api_versions) in nr.payload:
4767 if name not in all_os:
4768 # build a list of nodes for this os containing empty lists
4769 # for each node in node_list
4771 for nname in good_nodes:
4772 all_os[name][nname] = []
4773 # convert params from [name, help] to (name, help)
4774 params = [tuple(v) for v in params]
4775 all_os[name][node_name].append((path, status, diagnose,
4776 variants, params, api_versions))
4779 def _GetQueryData(self, lu):
4780 """Computes the list of nodes and their attributes.
4783 # Locking is not used
4784 assert not (compat.any(lu.glm.is_owned(level)
4785 for level in locking.LEVELS
4786 if level != locking.LEVEL_CLUSTER) or
4787 self.do_locking or self.use_locking)
4789 valid_nodes = [node.name
4790 for node in lu.cfg.GetAllNodesInfo().values()
4791 if not node.offline and node.vm_capable]
4792 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4793 cluster = lu.cfg.GetClusterInfo()
4797 for (os_name, os_data) in pol.items():
4798 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4799 hidden=(os_name in cluster.hidden_os),
4800 blacklisted=(os_name in cluster.blacklisted_os))
4804 api_versions = set()
4806 for idx, osl in enumerate(os_data.values()):
4807 info.valid = bool(info.valid and osl and osl[0][1])
4811 (node_variants, node_params, node_api) = osl[0][3:6]
4814 variants.update(node_variants)
4815 parameters.update(node_params)
4816 api_versions.update(node_api)
4818 # Filter out inconsistent values
4819 variants.intersection_update(node_variants)
4820 parameters.intersection_update(node_params)
4821 api_versions.intersection_update(node_api)
4823 info.variants = list(variants)
4824 info.parameters = list(parameters)
4825 info.api_versions = list(api_versions)
4827 data[os_name] = info
4829 # Prepare data in requested order
4830 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4834 class LUOsDiagnose(NoHooksLU):
4835 """Logical unit for OS diagnose/query.
4841 def _BuildFilter(fields, names):
4842 """Builds a filter for querying OSes.
4845 name_filter = qlang.MakeSimpleFilter("name", names)
4847 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4848 # respective field is not requested
4849 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4850 for fname in ["hidden", "blacklisted"]
4851 if fname not in fields]
4852 if "valid" not in fields:
4853 status_filter.append([qlang.OP_TRUE, "valid"])
4856 status_filter.insert(0, qlang.OP_AND)
4858 status_filter = None
4860 if name_filter and status_filter:
4861 return [qlang.OP_AND, name_filter, status_filter]
4865 return status_filter
4867 def CheckArguments(self):
4868 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4869 self.op.output_fields, False)
4871 def ExpandNames(self):
4872 self.oq.ExpandNames(self)
4874 def Exec(self, feedback_fn):
4875 return self.oq.OldStyleQuery(self)
4878 class LUNodeRemove(LogicalUnit):
4879 """Logical unit for removing a node.
4882 HPATH = "node-remove"
4883 HTYPE = constants.HTYPE_NODE
4885 def BuildHooksEnv(self):
4890 "OP_TARGET": self.op.node_name,
4891 "NODE_NAME": self.op.node_name,
4894 def BuildHooksNodes(self):
4895 """Build hooks nodes.
4897 This doesn't run on the target node in the pre phase as a failed
4898 node would then be impossible to remove.
4901 all_nodes = self.cfg.GetNodeList()
4903 all_nodes.remove(self.op.node_name)
4906 return (all_nodes, all_nodes)
4908 def CheckPrereq(self):
4909 """Check prerequisites.
4912 - the node exists in the configuration
4913 - it does not have primary or secondary instances
4914 - it's not the master
4916 Any errors are signaled by raising errors.OpPrereqError.
4919 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4920 node = self.cfg.GetNodeInfo(self.op.node_name)
4921 assert node is not None
4923 masternode = self.cfg.GetMasterNode()
4924 if node.name == masternode:
4925 raise errors.OpPrereqError("Node is the master node, failover to another"
4926 " node is required", errors.ECODE_INVAL)
4928 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4929 if node.name in instance.all_nodes:
4930 raise errors.OpPrereqError("Instance %s is still running on the node,"
4931 " please remove first" % instance_name,
4933 self.op.node_name = node.name
4936 def Exec(self, feedback_fn):
4937 """Removes the node from the cluster.
4941 logging.info("Stopping the node daemon and removing configs from node %s",
4944 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4946 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4949 # Promote nodes to master candidate as needed
4950 _AdjustCandidatePool(self, exceptions=[node.name])
4951 self.context.RemoveNode(node.name)
4953 # Run post hooks on the node before it's removed
4954 _RunPostHook(self, node.name)
4956 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4957 msg = result.fail_msg
4959 self.LogWarning("Errors encountered on the remote node while leaving"
4960 " the cluster: %s", msg)
4962 # Remove node from our /etc/hosts
4963 if self.cfg.GetClusterInfo().modify_etc_hosts:
4964 master_node = self.cfg.GetMasterNode()
4965 result = self.rpc.call_etc_hosts_modify(master_node,
4966 constants.ETC_HOSTS_REMOVE,
4968 result.Raise("Can't update hosts file with new host data")
4969 _RedistributeAncillaryFiles(self)
4972 class _NodeQuery(_QueryBase):
4973 FIELDS = query.NODE_FIELDS
4975 def ExpandNames(self, lu):
4976 lu.needed_locks = {}
4977 lu.share_locks = _ShareAll()
4980 self.wanted = _GetWantedNodes(lu, self.names)
4982 self.wanted = locking.ALL_SET
4984 self.do_locking = (self.use_locking and
4985 query.NQ_LIVE in self.requested_data)
4988 # If any non-static field is requested we need to lock the nodes
4989 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4991 def DeclareLocks(self, lu, level):
4994 def _GetQueryData(self, lu):
4995 """Computes the list of nodes and their attributes.
4998 all_info = lu.cfg.GetAllNodesInfo()
5000 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5002 # Gather data as requested
5003 if query.NQ_LIVE in self.requested_data:
5004 # filter out non-vm_capable nodes
5005 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5007 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5008 [lu.cfg.GetHypervisorType()])
5009 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5010 for (name, nresult) in node_data.items()
5011 if not nresult.fail_msg and nresult.payload)
5015 if query.NQ_INST in self.requested_data:
5016 node_to_primary = dict([(name, set()) for name in nodenames])
5017 node_to_secondary = dict([(name, set()) for name in nodenames])
5019 inst_data = lu.cfg.GetAllInstancesInfo()
5021 for inst in inst_data.values():
5022 if inst.primary_node in node_to_primary:
5023 node_to_primary[inst.primary_node].add(inst.name)
5024 for secnode in inst.secondary_nodes:
5025 if secnode in node_to_secondary:
5026 node_to_secondary[secnode].add(inst.name)
5028 node_to_primary = None
5029 node_to_secondary = None
5031 if query.NQ_OOB in self.requested_data:
5032 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5033 for name, node in all_info.iteritems())
5037 if query.NQ_GROUP in self.requested_data:
5038 groups = lu.cfg.GetAllNodeGroupsInfo()
5042 return query.NodeQueryData([all_info[name] for name in nodenames],
5043 live_data, lu.cfg.GetMasterNode(),
5044 node_to_primary, node_to_secondary, groups,
5045 oob_support, lu.cfg.GetClusterInfo())
5048 class LUNodeQuery(NoHooksLU):
5049 """Logical unit for querying nodes.
5052 # pylint: disable=W0142
5055 def CheckArguments(self):
5056 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5057 self.op.output_fields, self.op.use_locking)
5059 def ExpandNames(self):
5060 self.nq.ExpandNames(self)
5062 def DeclareLocks(self, level):
5063 self.nq.DeclareLocks(self, level)
5065 def Exec(self, feedback_fn):
5066 return self.nq.OldStyleQuery(self)
5069 class LUNodeQueryvols(NoHooksLU):
5070 """Logical unit for getting volumes on node(s).
5074 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5075 _FIELDS_STATIC = utils.FieldSet("node")
5077 def CheckArguments(self):
5078 _CheckOutputFields(static=self._FIELDS_STATIC,
5079 dynamic=self._FIELDS_DYNAMIC,
5080 selected=self.op.output_fields)
5082 def ExpandNames(self):
5083 self.share_locks = _ShareAll()
5084 self.needed_locks = {}
5086 if not self.op.nodes:
5087 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5089 self.needed_locks[locking.LEVEL_NODE] = \
5090 _GetWantedNodes(self, self.op.nodes)
5092 def Exec(self, feedback_fn):
5093 """Computes the list of nodes and their attributes.
5096 nodenames = self.owned_locks(locking.LEVEL_NODE)
5097 volumes = self.rpc.call_node_volumes(nodenames)
5099 ilist = self.cfg.GetAllInstancesInfo()
5100 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5103 for node in nodenames:
5104 nresult = volumes[node]
5107 msg = nresult.fail_msg
5109 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5112 node_vols = sorted(nresult.payload,
5113 key=operator.itemgetter("dev"))
5115 for vol in node_vols:
5117 for field in self.op.output_fields:
5120 elif field == "phys":
5124 elif field == "name":
5126 elif field == "size":
5127 val = int(float(vol["size"]))
5128 elif field == "instance":
5129 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5131 raise errors.ParameterError(field)
5132 node_output.append(str(val))
5134 output.append(node_output)
5139 class LUNodeQueryStorage(NoHooksLU):
5140 """Logical unit for getting information on storage units on node(s).
5143 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5146 def CheckArguments(self):
5147 _CheckOutputFields(static=self._FIELDS_STATIC,
5148 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5149 selected=self.op.output_fields)
5151 def ExpandNames(self):
5152 self.share_locks = _ShareAll()
5153 self.needed_locks = {}
5156 self.needed_locks[locking.LEVEL_NODE] = \
5157 _GetWantedNodes(self, self.op.nodes)
5159 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5161 def Exec(self, feedback_fn):
5162 """Computes the list of nodes and their attributes.
5165 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5167 # Always get name to sort by
5168 if constants.SF_NAME in self.op.output_fields:
5169 fields = self.op.output_fields[:]
5171 fields = [constants.SF_NAME] + self.op.output_fields
5173 # Never ask for node or type as it's only known to the LU
5174 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5175 while extra in fields:
5176 fields.remove(extra)
5178 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5179 name_idx = field_idx[constants.SF_NAME]
5181 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5182 data = self.rpc.call_storage_list(self.nodes,
5183 self.op.storage_type, st_args,
5184 self.op.name, fields)
5188 for node in utils.NiceSort(self.nodes):
5189 nresult = data[node]
5193 msg = nresult.fail_msg
5195 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5198 rows = dict([(row[name_idx], row) for row in nresult.payload])
5200 for name in utils.NiceSort(rows.keys()):
5205 for field in self.op.output_fields:
5206 if field == constants.SF_NODE:
5208 elif field == constants.SF_TYPE:
5209 val = self.op.storage_type
5210 elif field in field_idx:
5211 val = row[field_idx[field]]
5213 raise errors.ParameterError(field)
5222 class _InstanceQuery(_QueryBase):
5223 FIELDS = query.INSTANCE_FIELDS
5225 def ExpandNames(self, lu):
5226 lu.needed_locks = {}
5227 lu.share_locks = _ShareAll()
5230 self.wanted = _GetWantedInstances(lu, self.names)
5232 self.wanted = locking.ALL_SET
5234 self.do_locking = (self.use_locking and
5235 query.IQ_LIVE in self.requested_data)
5237 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5238 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5239 lu.needed_locks[locking.LEVEL_NODE] = []
5240 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5242 self.do_grouplocks = (self.do_locking and
5243 query.IQ_NODES in self.requested_data)
5245 def DeclareLocks(self, lu, level):
5247 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5248 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5250 # Lock all groups used by instances optimistically; this requires going
5251 # via the node before it's locked, requiring verification later on
5252 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5254 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5255 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5256 elif level == locking.LEVEL_NODE:
5257 lu._LockInstancesNodes() # pylint: disable=W0212
5260 def _CheckGroupLocks(lu):
5261 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5262 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5264 # Check if node groups for locked instances are still correct
5265 for instance_name in owned_instances:
5266 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5268 def _GetQueryData(self, lu):
5269 """Computes the list of instances and their attributes.
5272 if self.do_grouplocks:
5273 self._CheckGroupLocks(lu)
5275 cluster = lu.cfg.GetClusterInfo()
5276 all_info = lu.cfg.GetAllInstancesInfo()
5278 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5280 instance_list = [all_info[name] for name in instance_names]
5281 nodes = frozenset(itertools.chain(*(inst.all_nodes
5282 for inst in instance_list)))
5283 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5286 wrongnode_inst = set()
5288 # Gather data as requested
5289 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5291 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5293 result = node_data[name]
5295 # offline nodes will be in both lists
5296 assert result.fail_msg
5297 offline_nodes.append(name)
5299 bad_nodes.append(name)
5300 elif result.payload:
5301 for inst in result.payload:
5302 if inst in all_info:
5303 if all_info[inst].primary_node == name:
5304 live_data.update(result.payload)
5306 wrongnode_inst.add(inst)
5308 # orphan instance; we don't list it here as we don't
5309 # handle this case yet in the output of instance listing
5310 logging.warning("Orphan instance '%s' found on node %s",
5312 # else no instance is alive
5316 if query.IQ_DISKUSAGE in self.requested_data:
5317 disk_usage = dict((inst.name,
5318 _ComputeDiskSize(inst.disk_template,
5319 [{constants.IDISK_SIZE: disk.size}
5320 for disk in inst.disks]))
5321 for inst in instance_list)
5325 if query.IQ_CONSOLE in self.requested_data:
5327 for inst in instance_list:
5328 if inst.name in live_data:
5329 # Instance is running
5330 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5332 consinfo[inst.name] = None
5333 assert set(consinfo.keys()) == set(instance_names)
5337 if query.IQ_NODES in self.requested_data:
5338 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5340 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5341 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5342 for uuid in set(map(operator.attrgetter("group"),
5348 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5349 disk_usage, offline_nodes, bad_nodes,
5350 live_data, wrongnode_inst, consinfo,
5354 class LUQuery(NoHooksLU):
5355 """Query for resources/items of a certain kind.
5358 # pylint: disable=W0142
5361 def CheckArguments(self):
5362 qcls = _GetQueryImplementation(self.op.what)
5364 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5366 def ExpandNames(self):
5367 self.impl.ExpandNames(self)
5369 def DeclareLocks(self, level):
5370 self.impl.DeclareLocks(self, level)
5372 def Exec(self, feedback_fn):
5373 return self.impl.NewStyleQuery(self)
5376 class LUQueryFields(NoHooksLU):
5377 """Query for resources/items of a certain kind.
5380 # pylint: disable=W0142
5383 def CheckArguments(self):
5384 self.qcls = _GetQueryImplementation(self.op.what)
5386 def ExpandNames(self):
5387 self.needed_locks = {}
5389 def Exec(self, feedback_fn):
5390 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5393 class LUNodeModifyStorage(NoHooksLU):
5394 """Logical unit for modifying a storage volume on a node.
5399 def CheckArguments(self):
5400 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5402 storage_type = self.op.storage_type
5405 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5407 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5408 " modified" % storage_type,
5411 diff = set(self.op.changes.keys()) - modifiable
5413 raise errors.OpPrereqError("The following fields can not be modified for"
5414 " storage units of type '%s': %r" %
5415 (storage_type, list(diff)),
5418 def ExpandNames(self):
5419 self.needed_locks = {
5420 locking.LEVEL_NODE: self.op.node_name,
5423 def Exec(self, feedback_fn):
5424 """Computes the list of nodes and their attributes.
5427 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5428 result = self.rpc.call_storage_modify(self.op.node_name,
5429 self.op.storage_type, st_args,
5430 self.op.name, self.op.changes)
5431 result.Raise("Failed to modify storage unit '%s' on %s" %
5432 (self.op.name, self.op.node_name))
5435 class LUNodeAdd(LogicalUnit):
5436 """Logical unit for adding node to the cluster.
5440 HTYPE = constants.HTYPE_NODE
5441 _NFLAGS = ["master_capable", "vm_capable"]
5443 def CheckArguments(self):
5444 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5445 # validate/normalize the node name
5446 self.hostname = netutils.GetHostname(name=self.op.node_name,
5447 family=self.primary_ip_family)
5448 self.op.node_name = self.hostname.name
5450 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5451 raise errors.OpPrereqError("Cannot readd the master node",
5454 if self.op.readd and self.op.group:
5455 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5456 " being readded", errors.ECODE_INVAL)
5458 def BuildHooksEnv(self):
5461 This will run on all nodes before, and on all nodes + the new node after.
5465 "OP_TARGET": self.op.node_name,
5466 "NODE_NAME": self.op.node_name,
5467 "NODE_PIP": self.op.primary_ip,
5468 "NODE_SIP": self.op.secondary_ip,
5469 "MASTER_CAPABLE": str(self.op.master_capable),
5470 "VM_CAPABLE": str(self.op.vm_capable),
5473 def BuildHooksNodes(self):
5474 """Build hooks nodes.
5477 # Exclude added node
5478 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5479 post_nodes = pre_nodes + [self.op.node_name, ]
5481 return (pre_nodes, post_nodes)
5483 def CheckPrereq(self):
5484 """Check prerequisites.
5487 - the new node is not already in the config
5489 - its parameters (single/dual homed) matches the cluster
5491 Any errors are signaled by raising errors.OpPrereqError.
5495 hostname = self.hostname
5496 node = hostname.name
5497 primary_ip = self.op.primary_ip = hostname.ip
5498 if self.op.secondary_ip is None:
5499 if self.primary_ip_family == netutils.IP6Address.family:
5500 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5501 " IPv4 address must be given as secondary",
5503 self.op.secondary_ip = primary_ip
5505 secondary_ip = self.op.secondary_ip
5506 if not netutils.IP4Address.IsValid(secondary_ip):
5507 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5508 " address" % secondary_ip, errors.ECODE_INVAL)
5510 node_list = cfg.GetNodeList()
5511 if not self.op.readd and node in node_list:
5512 raise errors.OpPrereqError("Node %s is already in the configuration" %
5513 node, errors.ECODE_EXISTS)
5514 elif self.op.readd and node not in node_list:
5515 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5518 self.changed_primary_ip = False
5520 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5521 if self.op.readd and node == existing_node_name:
5522 if existing_node.secondary_ip != secondary_ip:
5523 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5524 " address configuration as before",
5526 if existing_node.primary_ip != primary_ip:
5527 self.changed_primary_ip = True
5531 if (existing_node.primary_ip == primary_ip or
5532 existing_node.secondary_ip == primary_ip or
5533 existing_node.primary_ip == secondary_ip or
5534 existing_node.secondary_ip == secondary_ip):
5535 raise errors.OpPrereqError("New node ip address(es) conflict with"
5536 " existing node %s" % existing_node.name,
5537 errors.ECODE_NOTUNIQUE)
5539 # After this 'if' block, None is no longer a valid value for the
5540 # _capable op attributes
5542 old_node = self.cfg.GetNodeInfo(node)
5543 assert old_node is not None, "Can't retrieve locked node %s" % node
5544 for attr in self._NFLAGS:
5545 if getattr(self.op, attr) is None:
5546 setattr(self.op, attr, getattr(old_node, attr))
5548 for attr in self._NFLAGS:
5549 if getattr(self.op, attr) is None:
5550 setattr(self.op, attr, True)
5552 if self.op.readd and not self.op.vm_capable:
5553 pri, sec = cfg.GetNodeInstances(node)
5555 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5556 " flag set to false, but it already holds"
5557 " instances" % node,
5560 # check that the type of the node (single versus dual homed) is the
5561 # same as for the master
5562 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5563 master_singlehomed = myself.secondary_ip == myself.primary_ip
5564 newbie_singlehomed = secondary_ip == primary_ip
5565 if master_singlehomed != newbie_singlehomed:
5566 if master_singlehomed:
5567 raise errors.OpPrereqError("The master has no secondary ip but the"
5568 " new node has one",
5571 raise errors.OpPrereqError("The master has a secondary ip but the"
5572 " new node doesn't have one",
5575 # checks reachability
5576 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5577 raise errors.OpPrereqError("Node not reachable by ping",
5578 errors.ECODE_ENVIRON)
5580 if not newbie_singlehomed:
5581 # check reachability from my secondary ip to newbie's secondary ip
5582 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5583 source=myself.secondary_ip):
5584 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5585 " based ping to node daemon port",
5586 errors.ECODE_ENVIRON)
5593 if self.op.master_capable:
5594 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5596 self.master_candidate = False
5599 self.new_node = old_node
5601 node_group = cfg.LookupNodeGroup(self.op.group)
5602 self.new_node = objects.Node(name=node,
5603 primary_ip=primary_ip,
5604 secondary_ip=secondary_ip,
5605 master_candidate=self.master_candidate,
5606 offline=False, drained=False,
5609 if self.op.ndparams:
5610 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5612 if self.op.hv_state:
5613 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5615 if self.op.disk_state:
5616 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5618 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5619 # it a property on the base class.
5620 result = rpc.DnsOnlyRunner().call_version([node])[node]
5621 result.Raise("Can't get version information from node %s" % node)
5622 if constants.PROTOCOL_VERSION == result.payload:
5623 logging.info("Communication to node %s fine, sw version %s match",
5624 node, result.payload)
5626 raise errors.OpPrereqError("Version mismatch master version %s,"
5627 " node version %s" %
5628 (constants.PROTOCOL_VERSION, result.payload),
5629 errors.ECODE_ENVIRON)
5631 def Exec(self, feedback_fn):
5632 """Adds the new node to the cluster.
5635 new_node = self.new_node
5636 node = new_node.name
5638 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5641 # We adding a new node so we assume it's powered
5642 new_node.powered = True
5644 # for re-adds, reset the offline/drained/master-candidate flags;
5645 # we need to reset here, otherwise offline would prevent RPC calls
5646 # later in the procedure; this also means that if the re-add
5647 # fails, we are left with a non-offlined, broken node
5649 new_node.drained = new_node.offline = False # pylint: disable=W0201
5650 self.LogInfo("Readding a node, the offline/drained flags were reset")
5651 # if we demote the node, we do cleanup later in the procedure
5652 new_node.master_candidate = self.master_candidate
5653 if self.changed_primary_ip:
5654 new_node.primary_ip = self.op.primary_ip
5656 # copy the master/vm_capable flags
5657 for attr in self._NFLAGS:
5658 setattr(new_node, attr, getattr(self.op, attr))
5660 # notify the user about any possible mc promotion
5661 if new_node.master_candidate:
5662 self.LogInfo("Node will be a master candidate")
5664 if self.op.ndparams:
5665 new_node.ndparams = self.op.ndparams
5667 new_node.ndparams = {}
5669 if self.op.hv_state:
5670 new_node.hv_state_static = self.new_hv_state
5672 if self.op.disk_state:
5673 new_node.disk_state_static = self.new_disk_state
5675 # Add node to our /etc/hosts, and add key to known_hosts
5676 if self.cfg.GetClusterInfo().modify_etc_hosts:
5677 master_node = self.cfg.GetMasterNode()
5678 result = self.rpc.call_etc_hosts_modify(master_node,
5679 constants.ETC_HOSTS_ADD,
5682 result.Raise("Can't update hosts file with new host data")
5684 if new_node.secondary_ip != new_node.primary_ip:
5685 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5688 node_verify_list = [self.cfg.GetMasterNode()]
5689 node_verify_param = {
5690 constants.NV_NODELIST: ([node], {}),
5691 # TODO: do a node-net-test as well?
5694 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5695 self.cfg.GetClusterName())
5696 for verifier in node_verify_list:
5697 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5698 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5700 for failed in nl_payload:
5701 feedback_fn("ssh/hostname verification failed"
5702 " (checking from %s): %s" %
5703 (verifier, nl_payload[failed]))
5704 raise errors.OpExecError("ssh/hostname verification failed")
5707 _RedistributeAncillaryFiles(self)
5708 self.context.ReaddNode(new_node)
5709 # make sure we redistribute the config
5710 self.cfg.Update(new_node, feedback_fn)
5711 # and make sure the new node will not have old files around
5712 if not new_node.master_candidate:
5713 result = self.rpc.call_node_demote_from_mc(new_node.name)
5714 msg = result.fail_msg
5716 self.LogWarning("Node failed to demote itself from master"
5717 " candidate status: %s" % msg)
5719 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5720 additional_vm=self.op.vm_capable)
5721 self.context.AddNode(new_node, self.proc.GetECId())
5724 class LUNodeSetParams(LogicalUnit):
5725 """Modifies the parameters of a node.
5727 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5728 to the node role (as _ROLE_*)
5729 @cvar _R2F: a dictionary from node role to tuples of flags
5730 @cvar _FLAGS: a list of attribute names corresponding to the flags
5733 HPATH = "node-modify"
5734 HTYPE = constants.HTYPE_NODE
5736 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5738 (True, False, False): _ROLE_CANDIDATE,
5739 (False, True, False): _ROLE_DRAINED,
5740 (False, False, True): _ROLE_OFFLINE,
5741 (False, False, False): _ROLE_REGULAR,
5743 _R2F = dict((v, k) for k, v in _F2R.items())
5744 _FLAGS = ["master_candidate", "drained", "offline"]
5746 def CheckArguments(self):
5747 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5748 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5749 self.op.master_capable, self.op.vm_capable,
5750 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5752 if all_mods.count(None) == len(all_mods):
5753 raise errors.OpPrereqError("Please pass at least one modification",
5755 if all_mods.count(True) > 1:
5756 raise errors.OpPrereqError("Can't set the node into more than one"
5757 " state at the same time",
5760 # Boolean value that tells us whether we might be demoting from MC
5761 self.might_demote = (self.op.master_candidate == False or
5762 self.op.offline == True or
5763 self.op.drained == True or
5764 self.op.master_capable == False)
5766 if self.op.secondary_ip:
5767 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5768 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5769 " address" % self.op.secondary_ip,
5772 self.lock_all = self.op.auto_promote and self.might_demote
5773 self.lock_instances = self.op.secondary_ip is not None
5775 def _InstanceFilter(self, instance):
5776 """Filter for getting affected instances.
5779 return (instance.disk_template in constants.DTS_INT_MIRROR and
5780 self.op.node_name in instance.all_nodes)
5782 def ExpandNames(self):
5784 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5786 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5788 # Since modifying a node can have severe effects on currently running
5789 # operations the resource lock is at least acquired in shared mode
5790 self.needed_locks[locking.LEVEL_NODE_RES] = \
5791 self.needed_locks[locking.LEVEL_NODE]
5793 # Get node resource and instance locks in shared mode; they are not used
5794 # for anything but read-only access
5795 self.share_locks[locking.LEVEL_NODE_RES] = 1
5796 self.share_locks[locking.LEVEL_INSTANCE] = 1
5798 if self.lock_instances:
5799 self.needed_locks[locking.LEVEL_INSTANCE] = \
5800 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5802 def BuildHooksEnv(self):
5805 This runs on the master node.
5809 "OP_TARGET": self.op.node_name,
5810 "MASTER_CANDIDATE": str(self.op.master_candidate),
5811 "OFFLINE": str(self.op.offline),
5812 "DRAINED": str(self.op.drained),
5813 "MASTER_CAPABLE": str(self.op.master_capable),
5814 "VM_CAPABLE": str(self.op.vm_capable),
5817 def BuildHooksNodes(self):
5818 """Build hooks nodes.
5821 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5824 def CheckPrereq(self):
5825 """Check prerequisites.
5827 This only checks the instance list against the existing names.
5830 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5832 if self.lock_instances:
5833 affected_instances = \
5834 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5836 # Verify instance locks
5837 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5838 wanted_instances = frozenset(affected_instances.keys())
5839 if wanted_instances - owned_instances:
5840 raise errors.OpPrereqError("Instances affected by changing node %s's"
5841 " secondary IP address have changed since"
5842 " locks were acquired, wanted '%s', have"
5843 " '%s'; retry the operation" %
5845 utils.CommaJoin(wanted_instances),
5846 utils.CommaJoin(owned_instances)),
5849 affected_instances = None
5851 if (self.op.master_candidate is not None or
5852 self.op.drained is not None or
5853 self.op.offline is not None):
5854 # we can't change the master's node flags
5855 if self.op.node_name == self.cfg.GetMasterNode():
5856 raise errors.OpPrereqError("The master role can be changed"
5857 " only via master-failover",
5860 if self.op.master_candidate and not node.master_capable:
5861 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5862 " it a master candidate" % node.name,
5865 if self.op.vm_capable == False:
5866 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5868 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5869 " the vm_capable flag" % node.name,
5872 if node.master_candidate and self.might_demote and not self.lock_all:
5873 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5874 # check if after removing the current node, we're missing master
5876 (mc_remaining, mc_should, _) = \
5877 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5878 if mc_remaining < mc_should:
5879 raise errors.OpPrereqError("Not enough master candidates, please"
5880 " pass auto promote option to allow"
5881 " promotion", errors.ECODE_STATE)
5883 self.old_flags = old_flags = (node.master_candidate,
5884 node.drained, node.offline)
5885 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5886 self.old_role = old_role = self._F2R[old_flags]
5888 # Check for ineffective changes
5889 for attr in self._FLAGS:
5890 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5891 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5892 setattr(self.op, attr, None)
5894 # Past this point, any flag change to False means a transition
5895 # away from the respective state, as only real changes are kept
5897 # TODO: We might query the real power state if it supports OOB
5898 if _SupportsOob(self.cfg, node):
5899 if self.op.offline is False and not (node.powered or
5900 self.op.powered == True):
5901 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5902 " offline status can be reset") %
5904 elif self.op.powered is not None:
5905 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5906 " as it does not support out-of-band"
5907 " handling") % self.op.node_name)
5909 # If we're being deofflined/drained, we'll MC ourself if needed
5910 if (self.op.drained == False or self.op.offline == False or
5911 (self.op.master_capable and not node.master_capable)):
5912 if _DecideSelfPromotion(self):
5913 self.op.master_candidate = True
5914 self.LogInfo("Auto-promoting node to master candidate")
5916 # If we're no longer master capable, we'll demote ourselves from MC
5917 if self.op.master_capable == False and node.master_candidate:
5918 self.LogInfo("Demoting from master candidate")
5919 self.op.master_candidate = False
5922 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5923 if self.op.master_candidate:
5924 new_role = self._ROLE_CANDIDATE
5925 elif self.op.drained:
5926 new_role = self._ROLE_DRAINED
5927 elif self.op.offline:
5928 new_role = self._ROLE_OFFLINE
5929 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5930 # False is still in new flags, which means we're un-setting (the
5932 new_role = self._ROLE_REGULAR
5933 else: # no new flags, nothing, keep old role
5936 self.new_role = new_role
5938 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5939 # Trying to transition out of offline status
5940 result = self.rpc.call_version([node.name])[node.name]
5942 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5943 " to report its version: %s" %
5944 (node.name, result.fail_msg),
5947 self.LogWarning("Transitioning node from offline to online state"
5948 " without using re-add. Please make sure the node"
5951 if self.op.secondary_ip:
5952 # Ok even without locking, because this can't be changed by any LU
5953 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5954 master_singlehomed = master.secondary_ip == master.primary_ip
5955 if master_singlehomed and self.op.secondary_ip:
5956 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5957 " homed cluster", errors.ECODE_INVAL)
5959 assert not (frozenset(affected_instances) -
5960 self.owned_locks(locking.LEVEL_INSTANCE))
5963 if affected_instances:
5964 raise errors.OpPrereqError("Cannot change secondary IP address:"
5965 " offline node has instances (%s)"
5966 " configured to use it" %
5967 utils.CommaJoin(affected_instances.keys()))
5969 # On online nodes, check that no instances are running, and that
5970 # the node has the new ip and we can reach it.
5971 for instance in affected_instances.values():
5972 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5973 msg="cannot change secondary ip")
5975 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5976 if master.name != node.name:
5977 # check reachability from master secondary ip to new secondary ip
5978 if not netutils.TcpPing(self.op.secondary_ip,
5979 constants.DEFAULT_NODED_PORT,
5980 source=master.secondary_ip):
5981 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5982 " based ping to node daemon port",
5983 errors.ECODE_ENVIRON)
5985 if self.op.ndparams:
5986 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5987 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5988 self.new_ndparams = new_ndparams
5990 if self.op.hv_state:
5991 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5992 self.node.hv_state_static)
5994 if self.op.disk_state:
5995 self.new_disk_state = \
5996 _MergeAndVerifyDiskState(self.op.disk_state,
5997 self.node.disk_state_static)
5999 def Exec(self, feedback_fn):
6004 old_role = self.old_role
6005 new_role = self.new_role
6009 if self.op.ndparams:
6010 node.ndparams = self.new_ndparams
6012 if self.op.powered is not None:
6013 node.powered = self.op.powered
6015 if self.op.hv_state:
6016 node.hv_state_static = self.new_hv_state
6018 if self.op.disk_state:
6019 node.disk_state_static = self.new_disk_state
6021 for attr in ["master_capable", "vm_capable"]:
6022 val = getattr(self.op, attr)
6024 setattr(node, attr, val)
6025 result.append((attr, str(val)))
6027 if new_role != old_role:
6028 # Tell the node to demote itself, if no longer MC and not offline
6029 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6030 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6032 self.LogWarning("Node failed to demote itself: %s", msg)
6034 new_flags = self._R2F[new_role]
6035 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6037 result.append((desc, str(nf)))
6038 (node.master_candidate, node.drained, node.offline) = new_flags
6040 # we locked all nodes, we adjust the CP before updating this node
6042 _AdjustCandidatePool(self, [node.name])
6044 if self.op.secondary_ip:
6045 node.secondary_ip = self.op.secondary_ip
6046 result.append(("secondary_ip", self.op.secondary_ip))
6048 # this will trigger configuration file update, if needed
6049 self.cfg.Update(node, feedback_fn)
6051 # this will trigger job queue propagation or cleanup if the mc
6053 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6054 self.context.ReaddNode(node)
6059 class LUNodePowercycle(NoHooksLU):
6060 """Powercycles a node.
6065 def CheckArguments(self):
6066 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6067 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6068 raise errors.OpPrereqError("The node is the master and the force"
6069 " parameter was not set",
6072 def ExpandNames(self):
6073 """Locking for PowercycleNode.
6075 This is a last-resort option and shouldn't block on other
6076 jobs. Therefore, we grab no locks.
6079 self.needed_locks = {}
6081 def Exec(self, feedback_fn):
6085 result = self.rpc.call_node_powercycle(self.op.node_name,
6086 self.cfg.GetHypervisorType())
6087 result.Raise("Failed to schedule the reboot")
6088 return result.payload
6091 class LUClusterQuery(NoHooksLU):
6092 """Query cluster configuration.
6097 def ExpandNames(self):
6098 self.needed_locks = {}
6100 def Exec(self, feedback_fn):
6101 """Return cluster config.
6104 cluster = self.cfg.GetClusterInfo()
6107 # Filter just for enabled hypervisors
6108 for os_name, hv_dict in cluster.os_hvp.items():
6109 os_hvp[os_name] = {}
6110 for hv_name, hv_params in hv_dict.items():
6111 if hv_name in cluster.enabled_hypervisors:
6112 os_hvp[os_name][hv_name] = hv_params
6114 # Convert ip_family to ip_version
6115 primary_ip_version = constants.IP4_VERSION
6116 if cluster.primary_ip_family == netutils.IP6Address.family:
6117 primary_ip_version = constants.IP6_VERSION
6120 "software_version": constants.RELEASE_VERSION,
6121 "protocol_version": constants.PROTOCOL_VERSION,
6122 "config_version": constants.CONFIG_VERSION,
6123 "os_api_version": max(constants.OS_API_VERSIONS),
6124 "export_version": constants.EXPORT_VERSION,
6125 "architecture": runtime.GetArchInfo(),
6126 "name": cluster.cluster_name,
6127 "master": cluster.master_node,
6128 "default_hypervisor": cluster.primary_hypervisor,
6129 "enabled_hypervisors": cluster.enabled_hypervisors,
6130 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6131 for hypervisor_name in cluster.enabled_hypervisors]),
6133 "beparams": cluster.beparams,
6134 "osparams": cluster.osparams,
6135 "ipolicy": cluster.ipolicy,
6136 "nicparams": cluster.nicparams,
6137 "ndparams": cluster.ndparams,
6138 "candidate_pool_size": cluster.candidate_pool_size,
6139 "master_netdev": cluster.master_netdev,
6140 "master_netmask": cluster.master_netmask,
6141 "use_external_mip_script": cluster.use_external_mip_script,
6142 "volume_group_name": cluster.volume_group_name,
6143 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6144 "file_storage_dir": cluster.file_storage_dir,
6145 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6146 "maintain_node_health": cluster.maintain_node_health,
6147 "ctime": cluster.ctime,
6148 "mtime": cluster.mtime,
6149 "uuid": cluster.uuid,
6150 "tags": list(cluster.GetTags()),
6151 "uid_pool": cluster.uid_pool,
6152 "default_iallocator": cluster.default_iallocator,
6153 "reserved_lvs": cluster.reserved_lvs,
6154 "primary_ip_version": primary_ip_version,
6155 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6156 "hidden_os": cluster.hidden_os,
6157 "blacklisted_os": cluster.blacklisted_os,
6163 class LUClusterConfigQuery(NoHooksLU):
6164 """Return configuration values.
6169 def CheckArguments(self):
6170 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6172 def ExpandNames(self):
6173 self.cq.ExpandNames(self)
6175 def DeclareLocks(self, level):
6176 self.cq.DeclareLocks(self, level)
6178 def Exec(self, feedback_fn):
6179 result = self.cq.OldStyleQuery(self)
6181 assert len(result) == 1
6186 class _ClusterQuery(_QueryBase):
6187 FIELDS = query.CLUSTER_FIELDS
6189 #: Do not sort (there is only one item)
6192 def ExpandNames(self, lu):
6193 lu.needed_locks = {}
6195 # The following variables interact with _QueryBase._GetNames
6196 self.wanted = locking.ALL_SET
6197 self.do_locking = self.use_locking
6200 raise errors.OpPrereqError("Can not use locking for cluster queries",
6203 def DeclareLocks(self, lu, level):
6206 def _GetQueryData(self, lu):
6207 """Computes the list of nodes and their attributes.
6210 # Locking is not used
6211 assert not (compat.any(lu.glm.is_owned(level)
6212 for level in locking.LEVELS
6213 if level != locking.LEVEL_CLUSTER) or
6214 self.do_locking or self.use_locking)
6216 if query.CQ_CONFIG in self.requested_data:
6217 cluster = lu.cfg.GetClusterInfo()
6219 cluster = NotImplemented
6221 if query.CQ_QUEUE_DRAINED in self.requested_data:
6222 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6224 drain_flag = NotImplemented
6226 if query.CQ_WATCHER_PAUSE in self.requested_data:
6227 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6229 watcher_pause = NotImplemented
6231 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6234 class LUInstanceActivateDisks(NoHooksLU):
6235 """Bring up an instance's disks.
6240 def ExpandNames(self):
6241 self._ExpandAndLockInstance()
6242 self.needed_locks[locking.LEVEL_NODE] = []
6243 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6245 def DeclareLocks(self, level):
6246 if level == locking.LEVEL_NODE:
6247 self._LockInstancesNodes()
6249 def CheckPrereq(self):
6250 """Check prerequisites.
6252 This checks that the instance is in the cluster.
6255 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6256 assert self.instance is not None, \
6257 "Cannot retrieve locked instance %s" % self.op.instance_name
6258 _CheckNodeOnline(self, self.instance.primary_node)
6260 def Exec(self, feedback_fn):
6261 """Activate the disks.
6264 disks_ok, disks_info = \
6265 _AssembleInstanceDisks(self, self.instance,
6266 ignore_size=self.op.ignore_size)
6268 raise errors.OpExecError("Cannot activate block devices")
6273 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6275 """Prepare the block devices for an instance.
6277 This sets up the block devices on all nodes.
6279 @type lu: L{LogicalUnit}
6280 @param lu: the logical unit on whose behalf we execute
6281 @type instance: L{objects.Instance}
6282 @param instance: the instance for whose disks we assemble
6283 @type disks: list of L{objects.Disk} or None
6284 @param disks: which disks to assemble (or all, if None)
6285 @type ignore_secondaries: boolean
6286 @param ignore_secondaries: if true, errors on secondary nodes
6287 won't result in an error return from the function
6288 @type ignore_size: boolean
6289 @param ignore_size: if true, the current known size of the disk
6290 will not be used during the disk activation, useful for cases
6291 when the size is wrong
6292 @return: False if the operation failed, otherwise a list of
6293 (host, instance_visible_name, node_visible_name)
6294 with the mapping from node devices to instance devices
6299 iname = instance.name
6300 disks = _ExpandCheckDisks(instance, disks)
6302 # With the two passes mechanism we try to reduce the window of
6303 # opportunity for the race condition of switching DRBD to primary
6304 # before handshaking occured, but we do not eliminate it
6306 # The proper fix would be to wait (with some limits) until the
6307 # connection has been made and drbd transitions from WFConnection
6308 # into any other network-connected state (Connected, SyncTarget,
6311 # 1st pass, assemble on all nodes in secondary mode
6312 for idx, inst_disk in enumerate(disks):
6313 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6315 node_disk = node_disk.Copy()
6316 node_disk.UnsetSize()
6317 lu.cfg.SetDiskID(node_disk, node)
6318 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6319 msg = result.fail_msg
6321 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6322 " (is_primary=False, pass=1): %s",
6323 inst_disk.iv_name, node, msg)
6324 if not ignore_secondaries:
6327 # FIXME: race condition on drbd migration to primary
6329 # 2nd pass, do only the primary node
6330 for idx, inst_disk in enumerate(disks):
6333 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6334 if node != instance.primary_node:
6337 node_disk = node_disk.Copy()
6338 node_disk.UnsetSize()
6339 lu.cfg.SetDiskID(node_disk, node)
6340 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6341 msg = result.fail_msg
6343 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6344 " (is_primary=True, pass=2): %s",
6345 inst_disk.iv_name, node, msg)
6348 dev_path = result.payload
6350 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6352 # leave the disks configured for the primary node
6353 # this is a workaround that would be fixed better by
6354 # improving the logical/physical id handling
6356 lu.cfg.SetDiskID(disk, instance.primary_node)
6358 return disks_ok, device_info
6361 def _StartInstanceDisks(lu, instance, force):
6362 """Start the disks of an instance.
6365 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6366 ignore_secondaries=force)
6368 _ShutdownInstanceDisks(lu, instance)
6369 if force is not None and not force:
6370 lu.proc.LogWarning("", hint="If the message above refers to a"
6372 " you can retry the operation using '--force'.")
6373 raise errors.OpExecError("Disk consistency error")
6376 class LUInstanceDeactivateDisks(NoHooksLU):
6377 """Shutdown an instance's disks.
6382 def ExpandNames(self):
6383 self._ExpandAndLockInstance()
6384 self.needed_locks[locking.LEVEL_NODE] = []
6385 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6387 def DeclareLocks(self, level):
6388 if level == locking.LEVEL_NODE:
6389 self._LockInstancesNodes()
6391 def CheckPrereq(self):
6392 """Check prerequisites.
6394 This checks that the instance is in the cluster.
6397 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6398 assert self.instance is not None, \
6399 "Cannot retrieve locked instance %s" % self.op.instance_name
6401 def Exec(self, feedback_fn):
6402 """Deactivate the disks
6405 instance = self.instance
6407 _ShutdownInstanceDisks(self, instance)
6409 _SafeShutdownInstanceDisks(self, instance)
6412 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6413 """Shutdown block devices of an instance.
6415 This function checks if an instance is running, before calling
6416 _ShutdownInstanceDisks.
6419 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6420 _ShutdownInstanceDisks(lu, instance, disks=disks)
6423 def _ExpandCheckDisks(instance, disks):
6424 """Return the instance disks selected by the disks list
6426 @type disks: list of L{objects.Disk} or None
6427 @param disks: selected disks
6428 @rtype: list of L{objects.Disk}
6429 @return: selected instance disks to act on
6433 return instance.disks
6435 if not set(disks).issubset(instance.disks):
6436 raise errors.ProgrammerError("Can only act on disks belonging to the"
6441 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6442 """Shutdown block devices of an instance.
6444 This does the shutdown on all nodes of the instance.
6446 If the ignore_primary is false, errors on the primary node are
6451 disks = _ExpandCheckDisks(instance, disks)
6454 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6455 lu.cfg.SetDiskID(top_disk, node)
6456 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6457 msg = result.fail_msg
6459 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6460 disk.iv_name, node, msg)
6461 if ((node == instance.primary_node and not ignore_primary) or
6462 (node != instance.primary_node and not result.offline)):
6467 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6468 """Checks if a node has enough free memory.
6470 This function check if a given node has the needed amount of free
6471 memory. In case the node has less memory or we cannot get the
6472 information from the node, this function raise an OpPrereqError
6475 @type lu: C{LogicalUnit}
6476 @param lu: a logical unit from which we get configuration data
6478 @param node: the node to check
6479 @type reason: C{str}
6480 @param reason: string to use in the error message
6481 @type requested: C{int}
6482 @param requested: the amount of memory in MiB to check for
6483 @type hypervisor_name: C{str}
6484 @param hypervisor_name: the hypervisor to ask for memory stats
6486 @return: node current free memory
6487 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6488 we cannot check the node
6491 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6492 nodeinfo[node].Raise("Can't get data from node %s" % node,
6493 prereq=True, ecode=errors.ECODE_ENVIRON)
6494 (_, _, (hv_info, )) = nodeinfo[node].payload
6496 free_mem = hv_info.get("memory_free", None)
6497 if not isinstance(free_mem, int):
6498 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6499 " was '%s'" % (node, free_mem),
6500 errors.ECODE_ENVIRON)
6501 if requested > free_mem:
6502 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6503 " needed %s MiB, available %s MiB" %
6504 (node, reason, requested, free_mem),
6509 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6510 """Checks if nodes have enough free disk space in the all VGs.
6512 This function check if all given nodes have the needed amount of
6513 free disk. In case any node has less disk or we cannot get the
6514 information from the node, this function raise an OpPrereqError
6517 @type lu: C{LogicalUnit}
6518 @param lu: a logical unit from which we get configuration data
6519 @type nodenames: C{list}
6520 @param nodenames: the list of node names to check
6521 @type req_sizes: C{dict}
6522 @param req_sizes: the hash of vg and corresponding amount of disk in
6524 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6525 or we cannot check the node
6528 for vg, req_size in req_sizes.items():
6529 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6532 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6533 """Checks if nodes have enough free disk space in the specified VG.
6535 This function check if all given nodes have the needed amount of
6536 free disk. In case any node has less disk or we cannot get the
6537 information from the node, this function raise an OpPrereqError
6540 @type lu: C{LogicalUnit}
6541 @param lu: a logical unit from which we get configuration data
6542 @type nodenames: C{list}
6543 @param nodenames: the list of node names to check
6545 @param vg: the volume group to check
6546 @type requested: C{int}
6547 @param requested: the amount of disk in MiB to check for
6548 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6549 or we cannot check the node
6552 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6553 for node in nodenames:
6554 info = nodeinfo[node]
6555 info.Raise("Cannot get current information from node %s" % node,
6556 prereq=True, ecode=errors.ECODE_ENVIRON)
6557 (_, (vg_info, ), _) = info.payload
6558 vg_free = vg_info.get("vg_free", None)
6559 if not isinstance(vg_free, int):
6560 raise errors.OpPrereqError("Can't compute free disk space on node"
6561 " %s for vg %s, result was '%s'" %
6562 (node, vg, vg_free), errors.ECODE_ENVIRON)
6563 if requested > vg_free:
6564 raise errors.OpPrereqError("Not enough disk space on target node %s"
6565 " vg %s: required %d MiB, available %d MiB" %
6566 (node, vg, requested, vg_free),
6570 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6571 """Checks if nodes have enough physical CPUs
6573 This function checks if all given nodes have the needed number of
6574 physical CPUs. In case any node has less CPUs or we cannot get the
6575 information from the node, this function raises an OpPrereqError
6578 @type lu: C{LogicalUnit}
6579 @param lu: a logical unit from which we get configuration data
6580 @type nodenames: C{list}
6581 @param nodenames: the list of node names to check
6582 @type requested: C{int}
6583 @param requested: the minimum acceptable number of physical CPUs
6584 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6585 or we cannot check the node
6588 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6589 for node in nodenames:
6590 info = nodeinfo[node]
6591 info.Raise("Cannot get current information from node %s" % node,
6592 prereq=True, ecode=errors.ECODE_ENVIRON)
6593 (_, _, (hv_info, )) = info.payload
6594 num_cpus = hv_info.get("cpu_total", None)
6595 if not isinstance(num_cpus, int):
6596 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6597 " on node %s, result was '%s'" %
6598 (node, num_cpus), errors.ECODE_ENVIRON)
6599 if requested > num_cpus:
6600 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6601 "required" % (node, num_cpus, requested),
6605 class LUInstanceStartup(LogicalUnit):
6606 """Starts an instance.
6609 HPATH = "instance-start"
6610 HTYPE = constants.HTYPE_INSTANCE
6613 def CheckArguments(self):
6615 if self.op.beparams:
6616 # fill the beparams dict
6617 objects.UpgradeBeParams(self.op.beparams)
6618 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6620 def ExpandNames(self):
6621 self._ExpandAndLockInstance()
6622 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6624 def DeclareLocks(self, level):
6625 if level == locking.LEVEL_NODE_RES:
6626 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6628 def BuildHooksEnv(self):
6631 This runs on master, primary and secondary nodes of the instance.
6635 "FORCE": self.op.force,
6638 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6642 def BuildHooksNodes(self):
6643 """Build hooks nodes.
6646 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6649 def CheckPrereq(self):
6650 """Check prerequisites.
6652 This checks that the instance is in the cluster.
6655 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6656 assert self.instance is not None, \
6657 "Cannot retrieve locked instance %s" % self.op.instance_name
6660 if self.op.hvparams:
6661 # check hypervisor parameter syntax (locally)
6662 cluster = self.cfg.GetClusterInfo()
6663 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6664 filled_hvp = cluster.FillHV(instance)
6665 filled_hvp.update(self.op.hvparams)
6666 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6667 hv_type.CheckParameterSyntax(filled_hvp)
6668 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6670 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6672 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6674 if self.primary_offline and self.op.ignore_offline_nodes:
6675 self.proc.LogWarning("Ignoring offline primary node")
6677 if self.op.hvparams or self.op.beparams:
6678 self.proc.LogWarning("Overridden parameters are ignored")
6680 _CheckNodeOnline(self, instance.primary_node)
6682 bep = self.cfg.GetClusterInfo().FillBE(instance)
6683 bep.update(self.op.beparams)
6685 # check bridges existence
6686 _CheckInstanceBridgesExist(self, instance)
6688 remote_info = self.rpc.call_instance_info(instance.primary_node,
6690 instance.hypervisor)
6691 remote_info.Raise("Error checking node %s" % instance.primary_node,
6692 prereq=True, ecode=errors.ECODE_ENVIRON)
6693 if not remote_info.payload: # not running already
6694 _CheckNodeFreeMemory(self, instance.primary_node,
6695 "starting instance %s" % instance.name,
6696 bep[constants.BE_MINMEM], instance.hypervisor)
6698 def Exec(self, feedback_fn):
6699 """Start the instance.
6702 instance = self.instance
6703 force = self.op.force
6705 if not self.op.no_remember:
6706 self.cfg.MarkInstanceUp(instance.name)
6708 if self.primary_offline:
6709 assert self.op.ignore_offline_nodes
6710 self.proc.LogInfo("Primary node offline, marked instance as started")
6712 node_current = instance.primary_node
6714 _StartInstanceDisks(self, instance, force)
6717 self.rpc.call_instance_start(node_current,
6718 (instance, self.op.hvparams,
6720 self.op.startup_paused)
6721 msg = result.fail_msg
6723 _ShutdownInstanceDisks(self, instance)
6724 raise errors.OpExecError("Could not start instance: %s" % msg)
6727 class LUInstanceReboot(LogicalUnit):
6728 """Reboot an instance.
6731 HPATH = "instance-reboot"
6732 HTYPE = constants.HTYPE_INSTANCE
6735 def ExpandNames(self):
6736 self._ExpandAndLockInstance()
6738 def BuildHooksEnv(self):
6741 This runs on master, primary and secondary nodes of the instance.
6745 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6746 "REBOOT_TYPE": self.op.reboot_type,
6747 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6750 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6754 def BuildHooksNodes(self):
6755 """Build hooks nodes.
6758 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6761 def CheckPrereq(self):
6762 """Check prerequisites.
6764 This checks that the instance is in the cluster.
6767 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6768 assert self.instance is not None, \
6769 "Cannot retrieve locked instance %s" % self.op.instance_name
6770 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6771 _CheckNodeOnline(self, instance.primary_node)
6773 # check bridges existence
6774 _CheckInstanceBridgesExist(self, instance)
6776 def Exec(self, feedback_fn):
6777 """Reboot the instance.
6780 instance = self.instance
6781 ignore_secondaries = self.op.ignore_secondaries
6782 reboot_type = self.op.reboot_type
6784 remote_info = self.rpc.call_instance_info(instance.primary_node,
6786 instance.hypervisor)
6787 remote_info.Raise("Error checking node %s" % instance.primary_node)
6788 instance_running = bool(remote_info.payload)
6790 node_current = instance.primary_node
6792 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6793 constants.INSTANCE_REBOOT_HARD]:
6794 for disk in instance.disks:
6795 self.cfg.SetDiskID(disk, node_current)
6796 result = self.rpc.call_instance_reboot(node_current, instance,
6798 self.op.shutdown_timeout)
6799 result.Raise("Could not reboot instance")
6801 if instance_running:
6802 result = self.rpc.call_instance_shutdown(node_current, instance,
6803 self.op.shutdown_timeout)
6804 result.Raise("Could not shutdown instance for full reboot")
6805 _ShutdownInstanceDisks(self, instance)
6807 self.LogInfo("Instance %s was already stopped, starting now",
6809 _StartInstanceDisks(self, instance, ignore_secondaries)
6810 result = self.rpc.call_instance_start(node_current,
6811 (instance, None, None), False)
6812 msg = result.fail_msg
6814 _ShutdownInstanceDisks(self, instance)
6815 raise errors.OpExecError("Could not start instance for"
6816 " full reboot: %s" % msg)
6818 self.cfg.MarkInstanceUp(instance.name)
6821 class LUInstanceShutdown(LogicalUnit):
6822 """Shutdown an instance.
6825 HPATH = "instance-stop"
6826 HTYPE = constants.HTYPE_INSTANCE
6829 def ExpandNames(self):
6830 self._ExpandAndLockInstance()
6832 def BuildHooksEnv(self):
6835 This runs on master, primary and secondary nodes of the instance.
6838 env = _BuildInstanceHookEnvByObject(self, self.instance)
6839 env["TIMEOUT"] = self.op.timeout
6842 def BuildHooksNodes(self):
6843 """Build hooks nodes.
6846 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6849 def CheckPrereq(self):
6850 """Check prerequisites.
6852 This checks that the instance is in the cluster.
6855 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6856 assert self.instance is not None, \
6857 "Cannot retrieve locked instance %s" % self.op.instance_name
6859 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6861 self.primary_offline = \
6862 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6864 if self.primary_offline and self.op.ignore_offline_nodes:
6865 self.proc.LogWarning("Ignoring offline primary node")
6867 _CheckNodeOnline(self, self.instance.primary_node)
6869 def Exec(self, feedback_fn):
6870 """Shutdown the instance.
6873 instance = self.instance
6874 node_current = instance.primary_node
6875 timeout = self.op.timeout
6877 if not self.op.no_remember:
6878 self.cfg.MarkInstanceDown(instance.name)
6880 if self.primary_offline:
6881 assert self.op.ignore_offline_nodes
6882 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6884 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6885 msg = result.fail_msg
6887 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6889 _ShutdownInstanceDisks(self, instance)
6892 class LUInstanceReinstall(LogicalUnit):
6893 """Reinstall an instance.
6896 HPATH = "instance-reinstall"
6897 HTYPE = constants.HTYPE_INSTANCE
6900 def ExpandNames(self):
6901 self._ExpandAndLockInstance()
6903 def BuildHooksEnv(self):
6906 This runs on master, primary and secondary nodes of the instance.
6909 return _BuildInstanceHookEnvByObject(self, self.instance)
6911 def BuildHooksNodes(self):
6912 """Build hooks nodes.
6915 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6918 def CheckPrereq(self):
6919 """Check prerequisites.
6921 This checks that the instance is in the cluster and is not running.
6924 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6925 assert instance is not None, \
6926 "Cannot retrieve locked instance %s" % self.op.instance_name
6927 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6928 " offline, cannot reinstall")
6929 for node in instance.secondary_nodes:
6930 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6931 " cannot reinstall")
6933 if instance.disk_template == constants.DT_DISKLESS:
6934 raise errors.OpPrereqError("Instance '%s' has no disks" %
6935 self.op.instance_name,
6937 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6939 if self.op.os_type is not None:
6941 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6942 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6943 instance_os = self.op.os_type
6945 instance_os = instance.os
6947 nodelist = list(instance.all_nodes)
6949 if self.op.osparams:
6950 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6951 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6952 self.os_inst = i_osdict # the new dict (without defaults)
6956 self.instance = instance
6958 def Exec(self, feedback_fn):
6959 """Reinstall the instance.
6962 inst = self.instance
6964 if self.op.os_type is not None:
6965 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6966 inst.os = self.op.os_type
6967 # Write to configuration
6968 self.cfg.Update(inst, feedback_fn)
6970 _StartInstanceDisks(self, inst, None)
6972 feedback_fn("Running the instance OS create scripts...")
6973 # FIXME: pass debug option from opcode to backend
6974 result = self.rpc.call_instance_os_add(inst.primary_node,
6975 (inst, self.os_inst), True,
6976 self.op.debug_level)
6977 result.Raise("Could not install OS for instance %s on node %s" %
6978 (inst.name, inst.primary_node))
6980 _ShutdownInstanceDisks(self, inst)
6983 class LUInstanceRecreateDisks(LogicalUnit):
6984 """Recreate an instance's missing disks.
6987 HPATH = "instance-recreate-disks"
6988 HTYPE = constants.HTYPE_INSTANCE
6991 _MODIFYABLE = frozenset([
6992 constants.IDISK_SIZE,
6993 constants.IDISK_MODE,
6996 # New or changed disk parameters may have different semantics
6997 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6998 constants.IDISK_ADOPT,
7000 # TODO: Implement support changing VG while recreating
7002 constants.IDISK_METAVG,
7005 def CheckArguments(self):
7006 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7007 # Normalize and convert deprecated list of disk indices
7008 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7010 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7012 raise errors.OpPrereqError("Some disks have been specified more than"
7013 " once: %s" % utils.CommaJoin(duplicates),
7016 for (idx, params) in self.op.disks:
7017 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7018 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7020 raise errors.OpPrereqError("Parameters for disk %s try to change"
7021 " unmodifyable parameter(s): %s" %
7022 (idx, utils.CommaJoin(unsupported)),
7025 def ExpandNames(self):
7026 self._ExpandAndLockInstance()
7027 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7029 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7030 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7032 self.needed_locks[locking.LEVEL_NODE] = []
7033 self.needed_locks[locking.LEVEL_NODE_RES] = []
7035 def DeclareLocks(self, level):
7036 if level == locking.LEVEL_NODE:
7037 # if we replace the nodes, we only need to lock the old primary,
7038 # otherwise we need to lock all nodes for disk re-creation
7039 primary_only = bool(self.op.nodes)
7040 self._LockInstancesNodes(primary_only=primary_only)
7041 elif level == locking.LEVEL_NODE_RES:
7043 self.needed_locks[locking.LEVEL_NODE_RES] = \
7044 self.needed_locks[locking.LEVEL_NODE][:]
7046 def BuildHooksEnv(self):
7049 This runs on master, primary and secondary nodes of the instance.
7052 return _BuildInstanceHookEnvByObject(self, self.instance)
7054 def BuildHooksNodes(self):
7055 """Build hooks nodes.
7058 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7061 def CheckPrereq(self):
7062 """Check prerequisites.
7064 This checks that the instance is in the cluster and is not running.
7067 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7068 assert instance is not None, \
7069 "Cannot retrieve locked instance %s" % self.op.instance_name
7071 if len(self.op.nodes) != len(instance.all_nodes):
7072 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7073 " %d replacement nodes were specified" %
7074 (instance.name, len(instance.all_nodes),
7075 len(self.op.nodes)),
7077 assert instance.disk_template != constants.DT_DRBD8 or \
7078 len(self.op.nodes) == 2
7079 assert instance.disk_template != constants.DT_PLAIN or \
7080 len(self.op.nodes) == 1
7081 primary_node = self.op.nodes[0]
7083 primary_node = instance.primary_node
7084 _CheckNodeOnline(self, primary_node)
7086 if instance.disk_template == constants.DT_DISKLESS:
7087 raise errors.OpPrereqError("Instance '%s' has no disks" %
7088 self.op.instance_name, errors.ECODE_INVAL)
7090 # if we replace nodes *and* the old primary is offline, we don't
7092 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7093 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7094 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7095 if not (self.op.nodes and old_pnode.offline):
7096 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7097 msg="cannot recreate disks")
7100 self.disks = dict(self.op.disks)
7102 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7104 maxidx = max(self.disks.keys())
7105 if maxidx >= len(instance.disks):
7106 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7109 if (self.op.nodes and
7110 sorted(self.disks.keys()) != range(len(instance.disks))):
7111 raise errors.OpPrereqError("Can't recreate disks partially and"
7112 " change the nodes at the same time",
7115 self.instance = instance
7117 def Exec(self, feedback_fn):
7118 """Recreate the disks.
7121 instance = self.instance
7123 assert (self.owned_locks(locking.LEVEL_NODE) ==
7124 self.owned_locks(locking.LEVEL_NODE_RES))
7127 mods = [] # keeps track of needed changes
7129 for idx, disk in enumerate(instance.disks):
7131 changes = self.disks[idx]
7133 # Disk should not be recreated
7137 # update secondaries for disks, if needed
7138 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7139 # need to update the nodes and minors
7140 assert len(self.op.nodes) == 2
7141 assert len(disk.logical_id) == 6 # otherwise disk internals
7143 (_, _, old_port, _, _, old_secret) = disk.logical_id
7144 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7145 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7146 new_minors[0], new_minors[1], old_secret)
7147 assert len(disk.logical_id) == len(new_id)
7151 mods.append((idx, new_id, changes))
7153 # now that we have passed all asserts above, we can apply the mods
7154 # in a single run (to avoid partial changes)
7155 for idx, new_id, changes in mods:
7156 disk = instance.disks[idx]
7157 if new_id is not None:
7158 assert disk.dev_type == constants.LD_DRBD8
7159 disk.logical_id = new_id
7161 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7162 mode=changes.get(constants.IDISK_MODE, None))
7164 # change primary node, if needed
7166 instance.primary_node = self.op.nodes[0]
7167 self.LogWarning("Changing the instance's nodes, you will have to"
7168 " remove any disks left on the older nodes manually")
7171 self.cfg.Update(instance, feedback_fn)
7173 _CreateDisks(self, instance, to_skip=to_skip)
7176 class LUInstanceRename(LogicalUnit):
7177 """Rename an instance.
7180 HPATH = "instance-rename"
7181 HTYPE = constants.HTYPE_INSTANCE
7183 def CheckArguments(self):
7187 if self.op.ip_check and not self.op.name_check:
7188 # TODO: make the ip check more flexible and not depend on the name check
7189 raise errors.OpPrereqError("IP address check requires a name check",
7192 def BuildHooksEnv(self):
7195 This runs on master, primary and secondary nodes of the instance.
7198 env = _BuildInstanceHookEnvByObject(self, self.instance)
7199 env["INSTANCE_NEW_NAME"] = self.op.new_name
7202 def BuildHooksNodes(self):
7203 """Build hooks nodes.
7206 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7209 def CheckPrereq(self):
7210 """Check prerequisites.
7212 This checks that the instance is in the cluster and is not running.
7215 self.op.instance_name = _ExpandInstanceName(self.cfg,
7216 self.op.instance_name)
7217 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7218 assert instance is not None
7219 _CheckNodeOnline(self, instance.primary_node)
7220 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7221 msg="cannot rename")
7222 self.instance = instance
7224 new_name = self.op.new_name
7225 if self.op.name_check:
7226 hostname = netutils.GetHostname(name=new_name)
7227 if hostname.name != new_name:
7228 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7230 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7231 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7232 " same as given hostname '%s'") %
7233 (hostname.name, self.op.new_name),
7235 new_name = self.op.new_name = hostname.name
7236 if (self.op.ip_check and
7237 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7238 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7239 (hostname.ip, new_name),
7240 errors.ECODE_NOTUNIQUE)
7242 instance_list = self.cfg.GetInstanceList()
7243 if new_name in instance_list and new_name != instance.name:
7244 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7245 new_name, errors.ECODE_EXISTS)
7247 def Exec(self, feedback_fn):
7248 """Rename the instance.
7251 inst = self.instance
7252 old_name = inst.name
7254 rename_file_storage = False
7255 if (inst.disk_template in constants.DTS_FILEBASED and
7256 self.op.new_name != inst.name):
7257 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7258 rename_file_storage = True
7260 self.cfg.RenameInstance(inst.name, self.op.new_name)
7261 # Change the instance lock. This is definitely safe while we hold the BGL.
7262 # Otherwise the new lock would have to be added in acquired mode.
7264 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7265 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7267 # re-read the instance from the configuration after rename
7268 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7270 if rename_file_storage:
7271 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7272 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7273 old_file_storage_dir,
7274 new_file_storage_dir)
7275 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7276 " (but the instance has been renamed in Ganeti)" %
7277 (inst.primary_node, old_file_storage_dir,
7278 new_file_storage_dir))
7280 _StartInstanceDisks(self, inst, None)
7282 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7283 old_name, self.op.debug_level)
7284 msg = result.fail_msg
7286 msg = ("Could not run OS rename script for instance %s on node %s"
7287 " (but the instance has been renamed in Ganeti): %s" %
7288 (inst.name, inst.primary_node, msg))
7289 self.proc.LogWarning(msg)
7291 _ShutdownInstanceDisks(self, inst)
7296 class LUInstanceRemove(LogicalUnit):
7297 """Remove an instance.
7300 HPATH = "instance-remove"
7301 HTYPE = constants.HTYPE_INSTANCE
7304 def ExpandNames(self):
7305 self._ExpandAndLockInstance()
7306 self.needed_locks[locking.LEVEL_NODE] = []
7307 self.needed_locks[locking.LEVEL_NODE_RES] = []
7308 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7310 def DeclareLocks(self, level):
7311 if level == locking.LEVEL_NODE:
7312 self._LockInstancesNodes()
7313 elif level == locking.LEVEL_NODE_RES:
7315 self.needed_locks[locking.LEVEL_NODE_RES] = \
7316 self.needed_locks[locking.LEVEL_NODE][:]
7318 def BuildHooksEnv(self):
7321 This runs on master, primary and secondary nodes of the instance.
7324 env = _BuildInstanceHookEnvByObject(self, self.instance)
7325 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7328 def BuildHooksNodes(self):
7329 """Build hooks nodes.
7332 nl = [self.cfg.GetMasterNode()]
7333 nl_post = list(self.instance.all_nodes) + nl
7334 return (nl, nl_post)
7336 def CheckPrereq(self):
7337 """Check prerequisites.
7339 This checks that the instance is in the cluster.
7342 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7343 assert self.instance is not None, \
7344 "Cannot retrieve locked instance %s" % self.op.instance_name
7346 def Exec(self, feedback_fn):
7347 """Remove the instance.
7350 instance = self.instance
7351 logging.info("Shutting down instance %s on node %s",
7352 instance.name, instance.primary_node)
7354 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7355 self.op.shutdown_timeout)
7356 msg = result.fail_msg
7358 if self.op.ignore_failures:
7359 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7361 raise errors.OpExecError("Could not shutdown instance %s on"
7363 (instance.name, instance.primary_node, msg))
7365 assert (self.owned_locks(locking.LEVEL_NODE) ==
7366 self.owned_locks(locking.LEVEL_NODE_RES))
7367 assert not (set(instance.all_nodes) -
7368 self.owned_locks(locking.LEVEL_NODE)), \
7369 "Not owning correct locks"
7371 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7374 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7375 """Utility function to remove an instance.
7378 logging.info("Removing block devices for instance %s", instance.name)
7380 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7381 if not ignore_failures:
7382 raise errors.OpExecError("Can't remove instance's disks")
7383 feedback_fn("Warning: can't remove instance's disks")
7385 logging.info("Removing instance %s out of cluster config", instance.name)
7387 lu.cfg.RemoveInstance(instance.name)
7389 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7390 "Instance lock removal conflict"
7392 # Remove lock for the instance
7393 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7396 class LUInstanceQuery(NoHooksLU):
7397 """Logical unit for querying instances.
7400 # pylint: disable=W0142
7403 def CheckArguments(self):
7404 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7405 self.op.output_fields, self.op.use_locking)
7407 def ExpandNames(self):
7408 self.iq.ExpandNames(self)
7410 def DeclareLocks(self, level):
7411 self.iq.DeclareLocks(self, level)
7413 def Exec(self, feedback_fn):
7414 return self.iq.OldStyleQuery(self)
7417 class LUInstanceFailover(LogicalUnit):
7418 """Failover an instance.
7421 HPATH = "instance-failover"
7422 HTYPE = constants.HTYPE_INSTANCE
7425 def CheckArguments(self):
7426 """Check the arguments.
7429 self.iallocator = getattr(self.op, "iallocator", None)
7430 self.target_node = getattr(self.op, "target_node", None)
7432 def ExpandNames(self):
7433 self._ExpandAndLockInstance()
7435 if self.op.target_node is not None:
7436 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7438 self.needed_locks[locking.LEVEL_NODE] = []
7439 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7441 self.needed_locks[locking.LEVEL_NODE_RES] = []
7442 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7444 ignore_consistency = self.op.ignore_consistency
7445 shutdown_timeout = self.op.shutdown_timeout
7446 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7449 ignore_consistency=ignore_consistency,
7450 shutdown_timeout=shutdown_timeout,
7451 ignore_ipolicy=self.op.ignore_ipolicy)
7452 self.tasklets = [self._migrater]
7454 def DeclareLocks(self, level):
7455 if level == locking.LEVEL_NODE:
7456 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7457 if instance.disk_template in constants.DTS_EXT_MIRROR:
7458 if self.op.target_node is None:
7459 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7461 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7462 self.op.target_node]
7463 del self.recalculate_locks[locking.LEVEL_NODE]
7465 self._LockInstancesNodes()
7466 elif level == locking.LEVEL_NODE_RES:
7468 self.needed_locks[locking.LEVEL_NODE_RES] = \
7469 self.needed_locks[locking.LEVEL_NODE][:]
7471 def BuildHooksEnv(self):
7474 This runs on master, primary and secondary nodes of the instance.
7477 instance = self._migrater.instance
7478 source_node = instance.primary_node
7479 target_node = self.op.target_node
7481 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7482 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7483 "OLD_PRIMARY": source_node,
7484 "NEW_PRIMARY": target_node,
7487 if instance.disk_template in constants.DTS_INT_MIRROR:
7488 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7489 env["NEW_SECONDARY"] = source_node
7491 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7493 env.update(_BuildInstanceHookEnvByObject(self, instance))
7497 def BuildHooksNodes(self):
7498 """Build hooks nodes.
7501 instance = self._migrater.instance
7502 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7503 return (nl, nl + [instance.primary_node])
7506 class LUInstanceMigrate(LogicalUnit):
7507 """Migrate an instance.
7509 This is migration without shutting down, compared to the failover,
7510 which is done with shutdown.
7513 HPATH = "instance-migrate"
7514 HTYPE = constants.HTYPE_INSTANCE
7517 def ExpandNames(self):
7518 self._ExpandAndLockInstance()
7520 if self.op.target_node is not None:
7521 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7523 self.needed_locks[locking.LEVEL_NODE] = []
7524 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7526 self.needed_locks[locking.LEVEL_NODE] = []
7527 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7530 TLMigrateInstance(self, self.op.instance_name,
7531 cleanup=self.op.cleanup,
7533 fallback=self.op.allow_failover,
7534 allow_runtime_changes=self.op.allow_runtime_changes,
7535 ignore_ipolicy=self.op.ignore_ipolicy)
7536 self.tasklets = [self._migrater]
7538 def DeclareLocks(self, level):
7539 if level == locking.LEVEL_NODE:
7540 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7541 if instance.disk_template in constants.DTS_EXT_MIRROR:
7542 if self.op.target_node is None:
7543 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7545 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7546 self.op.target_node]
7547 del self.recalculate_locks[locking.LEVEL_NODE]
7549 self._LockInstancesNodes()
7550 elif level == locking.LEVEL_NODE_RES:
7552 self.needed_locks[locking.LEVEL_NODE_RES] = \
7553 self.needed_locks[locking.LEVEL_NODE][:]
7555 def BuildHooksEnv(self):
7558 This runs on master, primary and secondary nodes of the instance.
7561 instance = self._migrater.instance
7562 source_node = instance.primary_node
7563 target_node = self.op.target_node
7564 env = _BuildInstanceHookEnvByObject(self, instance)
7566 "MIGRATE_LIVE": self._migrater.live,
7567 "MIGRATE_CLEANUP": self.op.cleanup,
7568 "OLD_PRIMARY": source_node,
7569 "NEW_PRIMARY": target_node,
7570 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7573 if instance.disk_template in constants.DTS_INT_MIRROR:
7574 env["OLD_SECONDARY"] = target_node
7575 env["NEW_SECONDARY"] = source_node
7577 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7581 def BuildHooksNodes(self):
7582 """Build hooks nodes.
7585 instance = self._migrater.instance
7586 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7587 return (nl, nl + [instance.primary_node])
7590 class LUInstanceMove(LogicalUnit):
7591 """Move an instance by data-copying.
7594 HPATH = "instance-move"
7595 HTYPE = constants.HTYPE_INSTANCE
7598 def ExpandNames(self):
7599 self._ExpandAndLockInstance()
7600 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7601 self.op.target_node = target_node
7602 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7603 self.needed_locks[locking.LEVEL_NODE_RES] = []
7604 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7606 def DeclareLocks(self, level):
7607 if level == locking.LEVEL_NODE:
7608 self._LockInstancesNodes(primary_only=True)
7609 elif level == locking.LEVEL_NODE_RES:
7611 self.needed_locks[locking.LEVEL_NODE_RES] = \
7612 self.needed_locks[locking.LEVEL_NODE][:]
7614 def BuildHooksEnv(self):
7617 This runs on master, primary and secondary nodes of the instance.
7621 "TARGET_NODE": self.op.target_node,
7622 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7624 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7627 def BuildHooksNodes(self):
7628 """Build hooks nodes.
7632 self.cfg.GetMasterNode(),
7633 self.instance.primary_node,
7634 self.op.target_node,
7638 def CheckPrereq(self):
7639 """Check prerequisites.
7641 This checks that the instance is in the cluster.
7644 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7645 assert self.instance is not None, \
7646 "Cannot retrieve locked instance %s" % self.op.instance_name
7648 node = self.cfg.GetNodeInfo(self.op.target_node)
7649 assert node is not None, \
7650 "Cannot retrieve locked node %s" % self.op.target_node
7652 self.target_node = target_node = node.name
7654 if target_node == instance.primary_node:
7655 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7656 (instance.name, target_node),
7659 bep = self.cfg.GetClusterInfo().FillBE(instance)
7661 for idx, dsk in enumerate(instance.disks):
7662 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7663 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7664 " cannot copy" % idx, errors.ECODE_STATE)
7666 _CheckNodeOnline(self, target_node)
7667 _CheckNodeNotDrained(self, target_node)
7668 _CheckNodeVmCapable(self, target_node)
7669 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7670 self.cfg.GetNodeGroup(node.group))
7671 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7672 ignore=self.op.ignore_ipolicy)
7674 if instance.admin_state == constants.ADMINST_UP:
7675 # check memory requirements on the secondary node
7676 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7677 instance.name, bep[constants.BE_MAXMEM],
7678 instance.hypervisor)
7680 self.LogInfo("Not checking memory on the secondary node as"
7681 " instance will not be started")
7683 # check bridge existance
7684 _CheckInstanceBridgesExist(self, instance, node=target_node)
7686 def Exec(self, feedback_fn):
7687 """Move an instance.
7689 The move is done by shutting it down on its present node, copying
7690 the data over (slow) and starting it on the new node.
7693 instance = self.instance
7695 source_node = instance.primary_node
7696 target_node = self.target_node
7698 self.LogInfo("Shutting down instance %s on source node %s",
7699 instance.name, source_node)
7701 assert (self.owned_locks(locking.LEVEL_NODE) ==
7702 self.owned_locks(locking.LEVEL_NODE_RES))
7704 result = self.rpc.call_instance_shutdown(source_node, instance,
7705 self.op.shutdown_timeout)
7706 msg = result.fail_msg
7708 if self.op.ignore_consistency:
7709 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7710 " Proceeding anyway. Please make sure node"
7711 " %s is down. Error details: %s",
7712 instance.name, source_node, source_node, msg)
7714 raise errors.OpExecError("Could not shutdown instance %s on"
7716 (instance.name, source_node, msg))
7718 # create the target disks
7720 _CreateDisks(self, instance, target_node=target_node)
7721 except errors.OpExecError:
7722 self.LogWarning("Device creation failed, reverting...")
7724 _RemoveDisks(self, instance, target_node=target_node)
7726 self.cfg.ReleaseDRBDMinors(instance.name)
7729 cluster_name = self.cfg.GetClusterInfo().cluster_name
7732 # activate, get path, copy the data over
7733 for idx, disk in enumerate(instance.disks):
7734 self.LogInfo("Copying data for disk %d", idx)
7735 result = self.rpc.call_blockdev_assemble(target_node, disk,
7736 instance.name, True, idx)
7738 self.LogWarning("Can't assemble newly created disk %d: %s",
7739 idx, result.fail_msg)
7740 errs.append(result.fail_msg)
7742 dev_path = result.payload
7743 result = self.rpc.call_blockdev_export(source_node, disk,
7744 target_node, dev_path,
7747 self.LogWarning("Can't copy data over for disk %d: %s",
7748 idx, result.fail_msg)
7749 errs.append(result.fail_msg)
7753 self.LogWarning("Some disks failed to copy, aborting")
7755 _RemoveDisks(self, instance, target_node=target_node)
7757 self.cfg.ReleaseDRBDMinors(instance.name)
7758 raise errors.OpExecError("Errors during disk copy: %s" %
7761 instance.primary_node = target_node
7762 self.cfg.Update(instance, feedback_fn)
7764 self.LogInfo("Removing the disks on the original node")
7765 _RemoveDisks(self, instance, target_node=source_node)
7767 # Only start the instance if it's marked as up
7768 if instance.admin_state == constants.ADMINST_UP:
7769 self.LogInfo("Starting instance %s on node %s",
7770 instance.name, target_node)
7772 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7773 ignore_secondaries=True)
7775 _ShutdownInstanceDisks(self, instance)
7776 raise errors.OpExecError("Can't activate the instance's disks")
7778 result = self.rpc.call_instance_start(target_node,
7779 (instance, None, None), False)
7780 msg = result.fail_msg
7782 _ShutdownInstanceDisks(self, instance)
7783 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7784 (instance.name, target_node, msg))
7787 class LUNodeMigrate(LogicalUnit):
7788 """Migrate all instances from a node.
7791 HPATH = "node-migrate"
7792 HTYPE = constants.HTYPE_NODE
7795 def CheckArguments(self):
7798 def ExpandNames(self):
7799 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7801 self.share_locks = _ShareAll()
7802 self.needed_locks = {
7803 locking.LEVEL_NODE: [self.op.node_name],
7806 def BuildHooksEnv(self):
7809 This runs on the master, the primary and all the secondaries.
7813 "NODE_NAME": self.op.node_name,
7814 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7817 def BuildHooksNodes(self):
7818 """Build hooks nodes.
7821 nl = [self.cfg.GetMasterNode()]
7824 def CheckPrereq(self):
7827 def Exec(self, feedback_fn):
7828 # Prepare jobs for migration instances
7829 allow_runtime_changes = self.op.allow_runtime_changes
7831 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7834 iallocator=self.op.iallocator,
7835 target_node=self.op.target_node,
7836 allow_runtime_changes=allow_runtime_changes,
7837 ignore_ipolicy=self.op.ignore_ipolicy)]
7838 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7841 # TODO: Run iallocator in this opcode and pass correct placement options to
7842 # OpInstanceMigrate. Since other jobs can modify the cluster between
7843 # running the iallocator and the actual migration, a good consistency model
7844 # will have to be found.
7846 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7847 frozenset([self.op.node_name]))
7849 return ResultWithJobs(jobs)
7852 class TLMigrateInstance(Tasklet):
7853 """Tasklet class for instance migration.
7856 @ivar live: whether the migration will be done live or non-live;
7857 this variable is initalized only after CheckPrereq has run
7858 @type cleanup: boolean
7859 @ivar cleanup: Wheater we cleanup from a failed migration
7860 @type iallocator: string
7861 @ivar iallocator: The iallocator used to determine target_node
7862 @type target_node: string
7863 @ivar target_node: If given, the target_node to reallocate the instance to
7864 @type failover: boolean
7865 @ivar failover: Whether operation results in failover or migration
7866 @type fallback: boolean
7867 @ivar fallback: Whether fallback to failover is allowed if migration not
7869 @type ignore_consistency: boolean
7870 @ivar ignore_consistency: Wheter we should ignore consistency between source
7872 @type shutdown_timeout: int
7873 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7874 @type ignore_ipolicy: bool
7875 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7880 _MIGRATION_POLL_INTERVAL = 1 # seconds
7881 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7883 def __init__(self, lu, instance_name, cleanup=False,
7884 failover=False, fallback=False,
7885 ignore_consistency=False,
7886 allow_runtime_changes=True,
7887 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7888 ignore_ipolicy=False):
7889 """Initializes this class.
7892 Tasklet.__init__(self, lu)
7895 self.instance_name = instance_name
7896 self.cleanup = cleanup
7897 self.live = False # will be overridden later
7898 self.failover = failover
7899 self.fallback = fallback
7900 self.ignore_consistency = ignore_consistency
7901 self.shutdown_timeout = shutdown_timeout
7902 self.ignore_ipolicy = ignore_ipolicy
7903 self.allow_runtime_changes = allow_runtime_changes
7905 def CheckPrereq(self):
7906 """Check prerequisites.
7908 This checks that the instance is in the cluster.
7911 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7912 instance = self.cfg.GetInstanceInfo(instance_name)
7913 assert instance is not None
7914 self.instance = instance
7915 cluster = self.cfg.GetClusterInfo()
7917 if (not self.cleanup and
7918 not instance.admin_state == constants.ADMINST_UP and
7919 not self.failover and self.fallback):
7920 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7921 " switching to failover")
7922 self.failover = True
7924 if instance.disk_template not in constants.DTS_MIRRORED:
7929 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7930 " %s" % (instance.disk_template, text),
7933 if instance.disk_template in constants.DTS_EXT_MIRROR:
7934 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7936 if self.lu.op.iallocator:
7937 self._RunAllocator()
7939 # We set set self.target_node as it is required by
7941 self.target_node = self.lu.op.target_node
7943 # Check that the target node is correct in terms of instance policy
7944 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7945 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7946 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7947 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7948 ignore=self.ignore_ipolicy)
7950 # self.target_node is already populated, either directly or by the
7952 target_node = self.target_node
7953 if self.target_node == instance.primary_node:
7954 raise errors.OpPrereqError("Cannot migrate instance %s"
7955 " to its primary (%s)" %
7956 (instance.name, instance.primary_node))
7958 if len(self.lu.tasklets) == 1:
7959 # It is safe to release locks only when we're the only tasklet
7961 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7962 keep=[instance.primary_node, self.target_node])
7965 secondary_nodes = instance.secondary_nodes
7966 if not secondary_nodes:
7967 raise errors.ConfigurationError("No secondary node but using"
7968 " %s disk template" %
7969 instance.disk_template)
7970 target_node = secondary_nodes[0]
7971 if self.lu.op.iallocator or (self.lu.op.target_node and
7972 self.lu.op.target_node != target_node):
7974 text = "failed over"
7977 raise errors.OpPrereqError("Instances with disk template %s cannot"
7978 " be %s to arbitrary nodes"
7979 " (neither an iallocator nor a target"
7980 " node can be passed)" %
7981 (instance.disk_template, text),
7983 nodeinfo = self.cfg.GetNodeInfo(target_node)
7984 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7985 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7986 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7987 ignore=self.ignore_ipolicy)
7989 i_be = cluster.FillBE(instance)
7991 # check memory requirements on the secondary node
7992 if (not self.cleanup and
7993 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7994 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7995 "migrating instance %s" %
7997 i_be[constants.BE_MINMEM],
7998 instance.hypervisor)
8000 self.lu.LogInfo("Not checking memory on the secondary node as"
8001 " instance will not be started")
8003 # check if failover must be forced instead of migration
8004 if (not self.cleanup and not self.failover and
8005 i_be[constants.BE_ALWAYS_FAILOVER]):
8007 self.lu.LogInfo("Instance configured to always failover; fallback"
8009 self.failover = True
8011 raise errors.OpPrereqError("This instance has been configured to"
8012 " always failover, please allow failover",
8015 # check bridge existance
8016 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8018 if not self.cleanup:
8019 _CheckNodeNotDrained(self.lu, target_node)
8020 if not self.failover:
8021 result = self.rpc.call_instance_migratable(instance.primary_node,
8023 if result.fail_msg and self.fallback:
8024 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8026 self.failover = True
8028 result.Raise("Can't migrate, please use failover",
8029 prereq=True, ecode=errors.ECODE_STATE)
8031 assert not (self.failover and self.cleanup)
8033 if not self.failover:
8034 if self.lu.op.live is not None and self.lu.op.mode is not None:
8035 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8036 " parameters are accepted",
8038 if self.lu.op.live is not None:
8040 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8042 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8043 # reset the 'live' parameter to None so that repeated
8044 # invocations of CheckPrereq do not raise an exception
8045 self.lu.op.live = None
8046 elif self.lu.op.mode is None:
8047 # read the default value from the hypervisor
8048 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8049 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8051 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8053 # Failover is never live
8056 if not (self.failover or self.cleanup):
8057 remote_info = self.rpc.call_instance_info(instance.primary_node,
8059 instance.hypervisor)
8060 remote_info.Raise("Error checking instance on node %s" %
8061 instance.primary_node)
8062 instance_running = bool(remote_info.payload)
8063 if instance_running:
8064 self.current_mem = int(remote_info.payload["memory"])
8066 def _RunAllocator(self):
8067 """Run the allocator based on input opcode.
8070 # FIXME: add a self.ignore_ipolicy option
8071 ial = IAllocator(self.cfg, self.rpc,
8072 mode=constants.IALLOCATOR_MODE_RELOC,
8073 name=self.instance_name,
8074 relocate_from=[self.instance.primary_node],
8077 ial.Run(self.lu.op.iallocator)
8080 raise errors.OpPrereqError("Can't compute nodes using"
8081 " iallocator '%s': %s" %
8082 (self.lu.op.iallocator, ial.info),
8084 if len(ial.result) != ial.required_nodes:
8085 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8086 " of nodes (%s), required %s" %
8087 (self.lu.op.iallocator, len(ial.result),
8088 ial.required_nodes), errors.ECODE_FAULT)
8089 self.target_node = ial.result[0]
8090 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8091 self.instance_name, self.lu.op.iallocator,
8092 utils.CommaJoin(ial.result))
8094 def _WaitUntilSync(self):
8095 """Poll with custom rpc for disk sync.
8097 This uses our own step-based rpc call.
8100 self.feedback_fn("* wait until resync is done")
8104 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8106 self.instance.disks)
8108 for node, nres in result.items():
8109 nres.Raise("Cannot resync disks on node %s" % node)
8110 node_done, node_percent = nres.payload
8111 all_done = all_done and node_done
8112 if node_percent is not None:
8113 min_percent = min(min_percent, node_percent)
8115 if min_percent < 100:
8116 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8119 def _EnsureSecondary(self, node):
8120 """Demote a node to secondary.
8123 self.feedback_fn("* switching node %s to secondary mode" % node)
8125 for dev in self.instance.disks:
8126 self.cfg.SetDiskID(dev, node)
8128 result = self.rpc.call_blockdev_close(node, self.instance.name,
8129 self.instance.disks)
8130 result.Raise("Cannot change disk to secondary on node %s" % node)
8132 def _GoStandalone(self):
8133 """Disconnect from the network.
8136 self.feedback_fn("* changing into standalone mode")
8137 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8138 self.instance.disks)
8139 for node, nres in result.items():
8140 nres.Raise("Cannot disconnect disks node %s" % node)
8142 def _GoReconnect(self, multimaster):
8143 """Reconnect to the network.
8149 msg = "single-master"
8150 self.feedback_fn("* changing disks into %s mode" % msg)
8151 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8152 self.instance.disks,
8153 self.instance.name, multimaster)
8154 for node, nres in result.items():
8155 nres.Raise("Cannot change disks config on node %s" % node)
8157 def _ExecCleanup(self):
8158 """Try to cleanup after a failed migration.
8160 The cleanup is done by:
8161 - check that the instance is running only on one node
8162 (and update the config if needed)
8163 - change disks on its secondary node to secondary
8164 - wait until disks are fully synchronized
8165 - disconnect from the network
8166 - change disks into single-master mode
8167 - wait again until disks are fully synchronized
8170 instance = self.instance
8171 target_node = self.target_node
8172 source_node = self.source_node
8174 # check running on only one node
8175 self.feedback_fn("* checking where the instance actually runs"
8176 " (if this hangs, the hypervisor might be in"
8178 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8179 for node, result in ins_l.items():
8180 result.Raise("Can't contact node %s" % node)
8182 runningon_source = instance.name in ins_l[source_node].payload
8183 runningon_target = instance.name in ins_l[target_node].payload
8185 if runningon_source and runningon_target:
8186 raise errors.OpExecError("Instance seems to be running on two nodes,"
8187 " or the hypervisor is confused; you will have"
8188 " to ensure manually that it runs only on one"
8189 " and restart this operation")
8191 if not (runningon_source or runningon_target):
8192 raise errors.OpExecError("Instance does not seem to be running at all;"
8193 " in this case it's safer to repair by"
8194 " running 'gnt-instance stop' to ensure disk"
8195 " shutdown, and then restarting it")
8197 if runningon_target:
8198 # the migration has actually succeeded, we need to update the config
8199 self.feedback_fn("* instance running on secondary node (%s),"
8200 " updating config" % target_node)
8201 instance.primary_node = target_node
8202 self.cfg.Update(instance, self.feedback_fn)
8203 demoted_node = source_node
8205 self.feedback_fn("* instance confirmed to be running on its"
8206 " primary node (%s)" % source_node)
8207 demoted_node = target_node
8209 if instance.disk_template in constants.DTS_INT_MIRROR:
8210 self._EnsureSecondary(demoted_node)
8212 self._WaitUntilSync()
8213 except errors.OpExecError:
8214 # we ignore here errors, since if the device is standalone, it
8215 # won't be able to sync
8217 self._GoStandalone()
8218 self._GoReconnect(False)
8219 self._WaitUntilSync()
8221 self.feedback_fn("* done")
8223 def _RevertDiskStatus(self):
8224 """Try to revert the disk status after a failed migration.
8227 target_node = self.target_node
8228 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8232 self._EnsureSecondary(target_node)
8233 self._GoStandalone()
8234 self._GoReconnect(False)
8235 self._WaitUntilSync()
8236 except errors.OpExecError, err:
8237 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8238 " please try to recover the instance manually;"
8239 " error '%s'" % str(err))
8241 def _AbortMigration(self):
8242 """Call the hypervisor code to abort a started migration.
8245 instance = self.instance
8246 target_node = self.target_node
8247 source_node = self.source_node
8248 migration_info = self.migration_info
8250 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8254 abort_msg = abort_result.fail_msg
8256 logging.error("Aborting migration failed on target node %s: %s",
8257 target_node, abort_msg)
8258 # Don't raise an exception here, as we stil have to try to revert the
8259 # disk status, even if this step failed.
8261 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8262 instance, False, self.live)
8263 abort_msg = abort_result.fail_msg
8265 logging.error("Aborting migration failed on source node %s: %s",
8266 source_node, abort_msg)
8268 def _ExecMigration(self):
8269 """Migrate an instance.
8271 The migrate is done by:
8272 - change the disks into dual-master mode
8273 - wait until disks are fully synchronized again
8274 - migrate the instance
8275 - change disks on the new secondary node (the old primary) to secondary
8276 - wait until disks are fully synchronized
8277 - change disks into single-master mode
8280 instance = self.instance
8281 target_node = self.target_node
8282 source_node = self.source_node
8284 # Check for hypervisor version mismatch and warn the user.
8285 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8286 None, [self.instance.hypervisor])
8287 for ninfo in nodeinfo.values():
8288 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8290 (_, _, (src_info, )) = nodeinfo[source_node].payload
8291 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8293 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8294 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8295 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8296 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8297 if src_version != dst_version:
8298 self.feedback_fn("* warning: hypervisor version mismatch between"
8299 " source (%s) and target (%s) node" %
8300 (src_version, dst_version))
8302 self.feedback_fn("* checking disk consistency between source and target")
8303 for (idx, dev) in enumerate(instance.disks):
8304 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8305 raise errors.OpExecError("Disk %s is degraded or not fully"
8306 " synchronized on target node,"
8307 " aborting migration" % idx)
8309 if self.current_mem > self.tgt_free_mem:
8310 if not self.allow_runtime_changes:
8311 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8312 " free memory to fit instance %s on target"
8313 " node %s (have %dMB, need %dMB)" %
8314 (instance.name, target_node,
8315 self.tgt_free_mem, self.current_mem))
8316 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8317 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8320 rpcres.Raise("Cannot modify instance runtime memory")
8322 # First get the migration information from the remote node
8323 result = self.rpc.call_migration_info(source_node, instance)
8324 msg = result.fail_msg
8326 log_err = ("Failed fetching source migration information from %s: %s" %
8328 logging.error(log_err)
8329 raise errors.OpExecError(log_err)
8331 self.migration_info = migration_info = result.payload
8333 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8334 # Then switch the disks to master/master mode
8335 self._EnsureSecondary(target_node)
8336 self._GoStandalone()
8337 self._GoReconnect(True)
8338 self._WaitUntilSync()
8340 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8341 result = self.rpc.call_accept_instance(target_node,
8344 self.nodes_ip[target_node])
8346 msg = result.fail_msg
8348 logging.error("Instance pre-migration failed, trying to revert"
8349 " disk status: %s", msg)
8350 self.feedback_fn("Pre-migration failed, aborting")
8351 self._AbortMigration()
8352 self._RevertDiskStatus()
8353 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8354 (instance.name, msg))
8356 self.feedback_fn("* migrating instance to %s" % target_node)
8357 result = self.rpc.call_instance_migrate(source_node, instance,
8358 self.nodes_ip[target_node],
8360 msg = result.fail_msg
8362 logging.error("Instance migration failed, trying to revert"
8363 " disk status: %s", msg)
8364 self.feedback_fn("Migration failed, aborting")
8365 self._AbortMigration()
8366 self._RevertDiskStatus()
8367 raise errors.OpExecError("Could not migrate instance %s: %s" %
8368 (instance.name, msg))
8370 self.feedback_fn("* starting memory transfer")
8371 last_feedback = time.time()
8373 result = self.rpc.call_instance_get_migration_status(source_node,
8375 msg = result.fail_msg
8376 ms = result.payload # MigrationStatus instance
8377 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8378 logging.error("Instance migration failed, trying to revert"
8379 " disk status: %s", msg)
8380 self.feedback_fn("Migration failed, aborting")
8381 self._AbortMigration()
8382 self._RevertDiskStatus()
8383 raise errors.OpExecError("Could not migrate instance %s: %s" %
8384 (instance.name, msg))
8386 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8387 self.feedback_fn("* memory transfer complete")
8390 if (utils.TimeoutExpired(last_feedback,
8391 self._MIGRATION_FEEDBACK_INTERVAL) and
8392 ms.transferred_ram is not None):
8393 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8394 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8395 last_feedback = time.time()
8397 time.sleep(self._MIGRATION_POLL_INTERVAL)
8399 result = self.rpc.call_instance_finalize_migration_src(source_node,
8403 msg = result.fail_msg
8405 logging.error("Instance migration succeeded, but finalization failed"
8406 " on the source node: %s", msg)
8407 raise errors.OpExecError("Could not finalize instance migration: %s" %
8410 instance.primary_node = target_node
8412 # distribute new instance config to the other nodes
8413 self.cfg.Update(instance, self.feedback_fn)
8415 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8419 msg = result.fail_msg
8421 logging.error("Instance migration succeeded, but finalization failed"
8422 " on the target node: %s", msg)
8423 raise errors.OpExecError("Could not finalize instance migration: %s" %
8426 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8427 self._EnsureSecondary(source_node)
8428 self._WaitUntilSync()
8429 self._GoStandalone()
8430 self._GoReconnect(False)
8431 self._WaitUntilSync()
8433 # If the instance's disk template is `rbd' and there was a successful
8434 # migration, unmap the device from the source node.
8435 if self.instance.disk_template == constants.DT_RBD:
8436 disks = _ExpandCheckDisks(instance, instance.disks)
8437 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8439 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8440 msg = result.fail_msg
8442 logging.error("Migration was successful, but couldn't unmap the"
8443 " block device %s on source node %s: %s",
8444 disk.iv_name, source_node, msg)
8445 logging.error("You need to unmap the device %s manually on %s",
8446 disk.iv_name, source_node)
8448 self.feedback_fn("* done")
8450 def _ExecFailover(self):
8451 """Failover an instance.
8453 The failover is done by shutting it down on its present node and
8454 starting it on the secondary.
8457 instance = self.instance
8458 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8460 source_node = instance.primary_node
8461 target_node = self.target_node
8463 if instance.admin_state == constants.ADMINST_UP:
8464 self.feedback_fn("* checking disk consistency between source and target")
8465 for (idx, dev) in enumerate(instance.disks):
8466 # for drbd, these are drbd over lvm
8467 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8468 if primary_node.offline:
8469 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8471 (primary_node.name, idx, target_node))
8472 elif not self.ignore_consistency:
8473 raise errors.OpExecError("Disk %s is degraded on target node,"
8474 " aborting failover" % idx)
8476 self.feedback_fn("* not checking disk consistency as instance is not"
8479 self.feedback_fn("* shutting down instance on source node")
8480 logging.info("Shutting down instance %s on node %s",
8481 instance.name, source_node)
8483 result = self.rpc.call_instance_shutdown(source_node, instance,
8484 self.shutdown_timeout)
8485 msg = result.fail_msg
8487 if self.ignore_consistency or primary_node.offline:
8488 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8489 " proceeding anyway; please make sure node"
8490 " %s is down; error details: %s",
8491 instance.name, source_node, source_node, msg)
8493 raise errors.OpExecError("Could not shutdown instance %s on"
8495 (instance.name, source_node, msg))
8497 self.feedback_fn("* deactivating the instance's disks on source node")
8498 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8499 raise errors.OpExecError("Can't shut down the instance's disks")
8501 instance.primary_node = target_node
8502 # distribute new instance config to the other nodes
8503 self.cfg.Update(instance, self.feedback_fn)
8505 # Only start the instance if it's marked as up
8506 if instance.admin_state == constants.ADMINST_UP:
8507 self.feedback_fn("* activating the instance's disks on target node %s" %
8509 logging.info("Starting instance %s on node %s",
8510 instance.name, target_node)
8512 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8513 ignore_secondaries=True)
8515 _ShutdownInstanceDisks(self.lu, instance)
8516 raise errors.OpExecError("Can't activate the instance's disks")
8518 self.feedback_fn("* starting the instance on the target node %s" %
8520 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8522 msg = result.fail_msg
8524 _ShutdownInstanceDisks(self.lu, instance)
8525 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8526 (instance.name, target_node, msg))
8528 def Exec(self, feedback_fn):
8529 """Perform the migration.
8532 self.feedback_fn = feedback_fn
8533 self.source_node = self.instance.primary_node
8535 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8536 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8537 self.target_node = self.instance.secondary_nodes[0]
8538 # Otherwise self.target_node has been populated either
8539 # directly, or through an iallocator.
8541 self.all_nodes = [self.source_node, self.target_node]
8542 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8543 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8546 feedback_fn("Failover instance %s" % self.instance.name)
8547 self._ExecFailover()
8549 feedback_fn("Migrating instance %s" % self.instance.name)
8552 return self._ExecCleanup()
8554 return self._ExecMigration()
8557 def _CreateBlockDev(lu, node, instance, device, force_create,
8559 """Create a tree of block devices on a given node.
8561 If this device type has to be created on secondaries, create it and
8564 If not, just recurse to children keeping the same 'force' value.
8566 @param lu: the lu on whose behalf we execute
8567 @param node: the node on which to create the device
8568 @type instance: L{objects.Instance}
8569 @param instance: the instance which owns the device
8570 @type device: L{objects.Disk}
8571 @param device: the device to create
8572 @type force_create: boolean
8573 @param force_create: whether to force creation of this device; this
8574 will be change to True whenever we find a device which has
8575 CreateOnSecondary() attribute
8576 @param info: the extra 'metadata' we should attach to the device
8577 (this will be represented as a LVM tag)
8578 @type force_open: boolean
8579 @param force_open: this parameter will be passes to the
8580 L{backend.BlockdevCreate} function where it specifies
8581 whether we run on primary or not, and it affects both
8582 the child assembly and the device own Open() execution
8585 if device.CreateOnSecondary():
8589 for child in device.children:
8590 _CreateBlockDev(lu, node, instance, child, force_create,
8593 if not force_create:
8596 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8599 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8600 """Create a single block device on a given node.
8602 This will not recurse over children of the device, so they must be
8605 @param lu: the lu on whose behalf we execute
8606 @param node: the node on which to create the device
8607 @type instance: L{objects.Instance}
8608 @param instance: the instance which owns the device
8609 @type device: L{objects.Disk}
8610 @param device: the device to create
8611 @param info: the extra 'metadata' we should attach to the device
8612 (this will be represented as a LVM tag)
8613 @type force_open: boolean
8614 @param force_open: this parameter will be passes to the
8615 L{backend.BlockdevCreate} function where it specifies
8616 whether we run on primary or not, and it affects both
8617 the child assembly and the device own Open() execution
8620 lu.cfg.SetDiskID(device, node)
8621 result = lu.rpc.call_blockdev_create(node, device, device.size,
8622 instance.name, force_open, info)
8623 result.Raise("Can't create block device %s on"
8624 " node %s for instance %s" % (device, node, instance.name))
8625 if device.physical_id is None:
8626 device.physical_id = result.payload
8629 def _GenerateUniqueNames(lu, exts):
8630 """Generate a suitable LV name.
8632 This will generate a logical volume name for the given instance.
8637 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8638 results.append("%s%s" % (new_id, val))
8642 def _ComputeLDParams(disk_template, disk_params):
8643 """Computes Logical Disk parameters from Disk Template parameters.
8645 @type disk_template: string
8646 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8647 @type disk_params: dict
8648 @param disk_params: disk template parameters; dict(template_name -> parameters
8650 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8651 contains the LD parameters of the node. The tree is flattened in-order.
8654 if disk_template not in constants.DISK_TEMPLATES:
8655 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8658 dt_params = disk_params[disk_template]
8659 if disk_template == constants.DT_DRBD8:
8661 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8662 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8663 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8664 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8665 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8666 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8667 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8668 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8669 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8670 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8671 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8672 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8676 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8679 result.append(drbd_params)
8683 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8686 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8688 result.append(data_params)
8692 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8695 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8697 result.append(meta_params)
8699 elif (disk_template == constants.DT_FILE or
8700 disk_template == constants.DT_SHARED_FILE):
8701 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8703 elif disk_template == constants.DT_PLAIN:
8705 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8708 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8710 result.append(params)
8712 elif disk_template == constants.DT_BLOCK:
8713 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8715 elif disk_template == constants.DT_RBD:
8717 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8720 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8722 result.append(params)
8727 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8728 iv_name, p_minor, s_minor, drbd_params, data_params,
8730 """Generate a drbd8 device complete with its children.
8733 assert len(vgnames) == len(names) == 2
8734 port = lu.cfg.AllocatePort()
8735 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8737 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8738 logical_id=(vgnames[0], names[0]),
8740 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8741 logical_id=(vgnames[1], names[1]),
8743 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8744 logical_id=(primary, secondary, port,
8747 children=[dev_data, dev_meta],
8748 iv_name=iv_name, params=drbd_params)
8752 _DISK_TEMPLATE_NAME_PREFIX = {
8753 constants.DT_PLAIN: "",
8754 constants.DT_RBD: ".rbd",
8758 _DISK_TEMPLATE_DEVICE_TYPE = {
8759 constants.DT_PLAIN: constants.LD_LV,
8760 constants.DT_FILE: constants.LD_FILE,
8761 constants.DT_SHARED_FILE: constants.LD_FILE,
8762 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8763 constants.DT_RBD: constants.LD_RBD,
8767 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8768 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8769 feedback_fn, disk_params,
8770 _req_file_storage=opcodes.RequireFileStorage,
8771 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8772 """Generate the entire disk layout for a given template type.
8775 #TODO: compute space requirements
8777 vgname = lu.cfg.GetVGName()
8778 disk_count = len(disk_info)
8780 ld_params = _ComputeLDParams(template_name, disk_params)
8782 if template_name == constants.DT_DISKLESS:
8784 elif template_name == constants.DT_DRBD8:
8785 drbd_params, data_params, meta_params = ld_params
8786 if len(secondary_nodes) != 1:
8787 raise errors.ProgrammerError("Wrong template configuration")
8788 remote_node = secondary_nodes[0]
8789 minors = lu.cfg.AllocateDRBDMinor(
8790 [primary_node, remote_node] * len(disk_info), instance_name)
8793 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8794 for i in range(disk_count)]):
8795 names.append(lv_prefix + "_data")
8796 names.append(lv_prefix + "_meta")
8797 for idx, disk in enumerate(disk_info):
8798 disk_index = idx + base_index
8799 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8800 data_vg = disk.get(constants.IDISK_VG, vgname)
8801 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8802 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8803 disk[constants.IDISK_SIZE],
8805 names[idx * 2:idx * 2 + 2],
8806 "disk/%d" % disk_index,
8807 minors[idx * 2], minors[idx * 2 + 1],
8808 drbd_params, data_params, meta_params)
8809 disk_dev.mode = disk[constants.IDISK_MODE]
8810 disks.append(disk_dev)
8813 raise errors.ProgrammerError("Wrong template configuration")
8815 if template_name == constants.DT_FILE:
8817 elif template_name == constants.DT_SHARED_FILE:
8818 _req_shr_file_storage()
8820 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8821 if name_prefix is None:
8824 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8825 (name_prefix, base_index + i)
8826 for i in range(disk_count)])
8828 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8830 if template_name == constants.DT_PLAIN:
8831 def logical_id_fn(idx, _, disk):
8832 vg = disk.get(constants.IDISK_VG, vgname)
8833 return (vg, names[idx])
8834 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8836 lambda _, disk_index, disk: (file_driver,
8837 "%s/disk%d" % (file_storage_dir,
8839 elif template_name == constants.DT_BLOCK:
8841 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8842 disk[constants.IDISK_ADOPT])
8843 elif template_name == constants.DT_RBD:
8844 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8846 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8848 for idx, disk in enumerate(disk_info):
8849 disk_index = idx + base_index
8850 size = disk[constants.IDISK_SIZE]
8851 feedback_fn("* disk %s, size %s" %
8852 (disk_index, utils.FormatUnit(size, "h")))
8853 disks.append(objects.Disk(dev_type=dev_type, size=size,
8854 logical_id=logical_id_fn(idx, disk_index, disk),
8855 iv_name="disk/%d" % disk_index,
8856 mode=disk[constants.IDISK_MODE],
8857 params=ld_params[0]))
8862 def _GetInstanceInfoText(instance):
8863 """Compute that text that should be added to the disk's metadata.
8866 return "originstname+%s" % instance.name
8869 def _CalcEta(time_taken, written, total_size):
8870 """Calculates the ETA based on size written and total size.
8872 @param time_taken: The time taken so far
8873 @param written: amount written so far
8874 @param total_size: The total size of data to be written
8875 @return: The remaining time in seconds
8878 avg_time = time_taken / float(written)
8879 return (total_size - written) * avg_time
8882 def _WipeDisks(lu, instance):
8883 """Wipes instance disks.
8885 @type lu: L{LogicalUnit}
8886 @param lu: the logical unit on whose behalf we execute
8887 @type instance: L{objects.Instance}
8888 @param instance: the instance whose disks we should create
8889 @return: the success of the wipe
8892 node = instance.primary_node
8894 for device in instance.disks:
8895 lu.cfg.SetDiskID(device, node)
8897 logging.info("Pause sync of instance %s disks", instance.name)
8898 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8900 for idx, success in enumerate(result.payload):
8902 logging.warn("pause-sync of instance %s for disks %d failed",
8906 for idx, device in enumerate(instance.disks):
8907 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8908 # MAX_WIPE_CHUNK at max
8909 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8910 constants.MIN_WIPE_CHUNK_PERCENT)
8911 # we _must_ make this an int, otherwise rounding errors will
8913 wipe_chunk_size = int(wipe_chunk_size)
8915 lu.LogInfo("* Wiping disk %d", idx)
8916 logging.info("Wiping disk %d for instance %s, node %s using"
8917 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8922 start_time = time.time()
8924 while offset < size:
8925 wipe_size = min(wipe_chunk_size, size - offset)
8926 logging.debug("Wiping disk %d, offset %s, chunk %s",
8927 idx, offset, wipe_size)
8928 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8929 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8930 (idx, offset, wipe_size))
8933 if now - last_output >= 60:
8934 eta = _CalcEta(now - start_time, offset, size)
8935 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8936 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8939 logging.info("Resume sync of instance %s disks", instance.name)
8941 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8943 for idx, success in enumerate(result.payload):
8945 lu.LogWarning("Resume sync of disk %d failed, please have a"
8946 " look at the status and troubleshoot the issue", idx)
8947 logging.warn("resume-sync of instance %s for disks %d failed",
8951 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8952 """Create all disks for an instance.
8954 This abstracts away some work from AddInstance.
8956 @type lu: L{LogicalUnit}
8957 @param lu: the logical unit on whose behalf we execute
8958 @type instance: L{objects.Instance}
8959 @param instance: the instance whose disks we should create
8961 @param to_skip: list of indices to skip
8962 @type target_node: string
8963 @param target_node: if passed, overrides the target node for creation
8965 @return: the success of the creation
8968 info = _GetInstanceInfoText(instance)
8969 if target_node is None:
8970 pnode = instance.primary_node
8971 all_nodes = instance.all_nodes
8976 if instance.disk_template in constants.DTS_FILEBASED:
8977 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8978 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8980 result.Raise("Failed to create directory '%s' on"
8981 " node %s" % (file_storage_dir, pnode))
8983 # Note: this needs to be kept in sync with adding of disks in
8984 # LUInstanceSetParams
8985 for idx, device in enumerate(instance.disks):
8986 if to_skip and idx in to_skip:
8988 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8990 for node in all_nodes:
8991 f_create = node == pnode
8992 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8995 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8996 """Remove all disks for an instance.
8998 This abstracts away some work from `AddInstance()` and
8999 `RemoveInstance()`. Note that in case some of the devices couldn't
9000 be removed, the removal will continue with the other ones (compare
9001 with `_CreateDisks()`).
9003 @type lu: L{LogicalUnit}
9004 @param lu: the logical unit on whose behalf we execute
9005 @type instance: L{objects.Instance}
9006 @param instance: the instance whose disks we should remove
9007 @type target_node: string
9008 @param target_node: used to override the node on which to remove the disks
9010 @return: the success of the removal
9013 logging.info("Removing block devices for instance %s", instance.name)
9016 ports_to_release = set()
9017 for (idx, device) in enumerate(instance.disks):
9019 edata = [(target_node, device)]
9021 edata = device.ComputeNodeTree(instance.primary_node)
9022 for node, disk in edata:
9023 lu.cfg.SetDiskID(disk, node)
9024 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
9026 lu.LogWarning("Could not remove disk %s on node %s,"
9027 " continuing anyway: %s", idx, node, msg)
9030 # if this is a DRBD disk, return its port to the pool
9031 if device.dev_type in constants.LDS_DRBD:
9032 ports_to_release.add(device.logical_id[2])
9034 if all_result or ignore_failures:
9035 for port in ports_to_release:
9036 lu.cfg.AddTcpUdpPort(port)
9038 if instance.disk_template == constants.DT_FILE:
9039 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9043 tgt = instance.primary_node
9044 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9046 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9047 file_storage_dir, instance.primary_node, result.fail_msg)
9053 def _ComputeDiskSizePerVG(disk_template, disks):
9054 """Compute disk size requirements in the volume group
9057 def _compute(disks, payload):
9058 """Universal algorithm.
9063 vgs[disk[constants.IDISK_VG]] = \
9064 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9068 # Required free disk space as a function of disk and swap space
9070 constants.DT_DISKLESS: {},
9071 constants.DT_PLAIN: _compute(disks, 0),
9072 # 128 MB are added for drbd metadata for each disk
9073 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9074 constants.DT_FILE: {},
9075 constants.DT_SHARED_FILE: {},
9078 if disk_template not in req_size_dict:
9079 raise errors.ProgrammerError("Disk template '%s' size requirement"
9080 " is unknown" % disk_template)
9082 return req_size_dict[disk_template]
9085 def _ComputeDiskSize(disk_template, disks):
9086 """Compute disk size requirements in the volume group
9089 # Required free disk space as a function of disk and swap space
9091 constants.DT_DISKLESS: None,
9092 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9093 # 128 MB are added for drbd metadata for each disk
9095 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9096 constants.DT_FILE: None,
9097 constants.DT_SHARED_FILE: 0,
9098 constants.DT_BLOCK: 0,
9099 constants.DT_RBD: 0,
9102 if disk_template not in req_size_dict:
9103 raise errors.ProgrammerError("Disk template '%s' size requirement"
9104 " is unknown" % disk_template)
9106 return req_size_dict[disk_template]
9109 def _FilterVmNodes(lu, nodenames):
9110 """Filters out non-vm_capable nodes from a list.
9112 @type lu: L{LogicalUnit}
9113 @param lu: the logical unit for which we check
9114 @type nodenames: list
9115 @param nodenames: the list of nodes on which we should check
9117 @return: the list of vm-capable nodes
9120 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9121 return [name for name in nodenames if name not in vm_nodes]
9124 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9125 """Hypervisor parameter validation.
9127 This function abstract the hypervisor parameter validation to be
9128 used in both instance create and instance modify.
9130 @type lu: L{LogicalUnit}
9131 @param lu: the logical unit for which we check
9132 @type nodenames: list
9133 @param nodenames: the list of nodes on which we should check
9134 @type hvname: string
9135 @param hvname: the name of the hypervisor we should use
9136 @type hvparams: dict
9137 @param hvparams: the parameters which we need to check
9138 @raise errors.OpPrereqError: if the parameters are not valid
9141 nodenames = _FilterVmNodes(lu, nodenames)
9143 cluster = lu.cfg.GetClusterInfo()
9144 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9146 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9147 for node in nodenames:
9151 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9154 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9155 """OS parameters validation.
9157 @type lu: L{LogicalUnit}
9158 @param lu: the logical unit for which we check
9159 @type required: boolean
9160 @param required: whether the validation should fail if the OS is not
9162 @type nodenames: list
9163 @param nodenames: the list of nodes on which we should check
9164 @type osname: string
9165 @param osname: the name of the hypervisor we should use
9166 @type osparams: dict
9167 @param osparams: the parameters which we need to check
9168 @raise errors.OpPrereqError: if the parameters are not valid
9171 nodenames = _FilterVmNodes(lu, nodenames)
9172 result = lu.rpc.call_os_validate(nodenames, required, osname,
9173 [constants.OS_VALIDATE_PARAMETERS],
9175 for node, nres in result.items():
9176 # we don't check for offline cases since this should be run only
9177 # against the master node and/or an instance's nodes
9178 nres.Raise("OS Parameters validation failed on node %s" % node)
9179 if not nres.payload:
9180 lu.LogInfo("OS %s not found on node %s, validation skipped",
9184 class LUInstanceCreate(LogicalUnit):
9185 """Create an instance.
9188 HPATH = "instance-add"
9189 HTYPE = constants.HTYPE_INSTANCE
9192 def CheckArguments(self):
9196 # do not require name_check to ease forward/backward compatibility
9198 if self.op.no_install and self.op.start:
9199 self.LogInfo("No-installation mode selected, disabling startup")
9200 self.op.start = False
9201 # validate/normalize the instance name
9202 self.op.instance_name = \
9203 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9205 if self.op.ip_check and not self.op.name_check:
9206 # TODO: make the ip check more flexible and not depend on the name check
9207 raise errors.OpPrereqError("Cannot do IP address check without a name"
9208 " check", errors.ECODE_INVAL)
9210 # check nics' parameter names
9211 for nic in self.op.nics:
9212 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9214 # check disks. parameter names and consistent adopt/no-adopt strategy
9215 has_adopt = has_no_adopt = False
9216 for disk in self.op.disks:
9217 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9218 if constants.IDISK_ADOPT in disk:
9222 if has_adopt and has_no_adopt:
9223 raise errors.OpPrereqError("Either all disks are adopted or none is",
9226 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9227 raise errors.OpPrereqError("Disk adoption is not supported for the"
9228 " '%s' disk template" %
9229 self.op.disk_template,
9231 if self.op.iallocator is not None:
9232 raise errors.OpPrereqError("Disk adoption not allowed with an"
9233 " iallocator script", errors.ECODE_INVAL)
9234 if self.op.mode == constants.INSTANCE_IMPORT:
9235 raise errors.OpPrereqError("Disk adoption not allowed for"
9236 " instance import", errors.ECODE_INVAL)
9238 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9239 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9240 " but no 'adopt' parameter given" %
9241 self.op.disk_template,
9244 self.adopt_disks = has_adopt
9246 # instance name verification
9247 if self.op.name_check:
9248 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9249 self.op.instance_name = self.hostname1.name
9250 # used in CheckPrereq for ip ping check
9251 self.check_ip = self.hostname1.ip
9253 self.check_ip = None
9255 # file storage checks
9256 if (self.op.file_driver and
9257 not self.op.file_driver in constants.FILE_DRIVER):
9258 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9259 self.op.file_driver, errors.ECODE_INVAL)
9261 if self.op.disk_template == constants.DT_FILE:
9262 opcodes.RequireFileStorage()
9263 elif self.op.disk_template == constants.DT_SHARED_FILE:
9264 opcodes.RequireSharedFileStorage()
9266 ### Node/iallocator related checks
9267 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9269 if self.op.pnode is not None:
9270 if self.op.disk_template in constants.DTS_INT_MIRROR:
9271 if self.op.snode is None:
9272 raise errors.OpPrereqError("The networked disk templates need"
9273 " a mirror node", errors.ECODE_INVAL)
9275 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9277 self.op.snode = None
9279 self._cds = _GetClusterDomainSecret()
9281 if self.op.mode == constants.INSTANCE_IMPORT:
9282 # On import force_variant must be True, because if we forced it at
9283 # initial install, our only chance when importing it back is that it
9285 self.op.force_variant = True
9287 if self.op.no_install:
9288 self.LogInfo("No-installation mode has no effect during import")
9290 elif self.op.mode == constants.INSTANCE_CREATE:
9291 if self.op.os_type is None:
9292 raise errors.OpPrereqError("No guest OS specified",
9294 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9295 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9296 " installation" % self.op.os_type,
9298 if self.op.disk_template is None:
9299 raise errors.OpPrereqError("No disk template specified",
9302 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9303 # Check handshake to ensure both clusters have the same domain secret
9304 src_handshake = self.op.source_handshake
9305 if not src_handshake:
9306 raise errors.OpPrereqError("Missing source handshake",
9309 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9312 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9315 # Load and check source CA
9316 self.source_x509_ca_pem = self.op.source_x509_ca
9317 if not self.source_x509_ca_pem:
9318 raise errors.OpPrereqError("Missing source X509 CA",
9322 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9324 except OpenSSL.crypto.Error, err:
9325 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9326 (err, ), errors.ECODE_INVAL)
9328 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9329 if errcode is not None:
9330 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9333 self.source_x509_ca = cert
9335 src_instance_name = self.op.source_instance_name
9336 if not src_instance_name:
9337 raise errors.OpPrereqError("Missing source instance name",
9340 self.source_instance_name = \
9341 netutils.GetHostname(name=src_instance_name).name
9344 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9345 self.op.mode, errors.ECODE_INVAL)
9347 def ExpandNames(self):
9348 """ExpandNames for CreateInstance.
9350 Figure out the right locks for instance creation.
9353 self.needed_locks = {}
9355 instance_name = self.op.instance_name
9356 # this is just a preventive check, but someone might still add this
9357 # instance in the meantime, and creation will fail at lock-add time
9358 if instance_name in self.cfg.GetInstanceList():
9359 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9360 instance_name, errors.ECODE_EXISTS)
9362 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9364 if self.op.iallocator:
9365 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9366 # specifying a group on instance creation and then selecting nodes from
9368 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9369 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9371 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9372 nodelist = [self.op.pnode]
9373 if self.op.snode is not None:
9374 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9375 nodelist.append(self.op.snode)
9376 self.needed_locks[locking.LEVEL_NODE] = nodelist
9377 # Lock resources of instance's primary and secondary nodes (copy to
9378 # prevent accidential modification)
9379 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9381 # in case of import lock the source node too
9382 if self.op.mode == constants.INSTANCE_IMPORT:
9383 src_node = self.op.src_node
9384 src_path = self.op.src_path
9386 if src_path is None:
9387 self.op.src_path = src_path = self.op.instance_name
9389 if src_node is None:
9390 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9391 self.op.src_node = None
9392 if os.path.isabs(src_path):
9393 raise errors.OpPrereqError("Importing an instance from a path"
9394 " requires a source node option",
9397 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9398 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9399 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9400 if not os.path.isabs(src_path):
9401 self.op.src_path = src_path = \
9402 utils.PathJoin(constants.EXPORT_DIR, src_path)
9404 def _RunAllocator(self):
9405 """Run the allocator based on input opcode.
9408 nics = [n.ToDict() for n in self.nics]
9409 ial = IAllocator(self.cfg, self.rpc,
9410 mode=constants.IALLOCATOR_MODE_ALLOC,
9411 name=self.op.instance_name,
9412 disk_template=self.op.disk_template,
9415 vcpus=self.be_full[constants.BE_VCPUS],
9416 memory=self.be_full[constants.BE_MAXMEM],
9417 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9420 hypervisor=self.op.hypervisor,
9423 ial.Run(self.op.iallocator)
9426 raise errors.OpPrereqError("Can't compute nodes using"
9427 " iallocator '%s': %s" %
9428 (self.op.iallocator, ial.info),
9430 if len(ial.result) != ial.required_nodes:
9431 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9432 " of nodes (%s), required %s" %
9433 (self.op.iallocator, len(ial.result),
9434 ial.required_nodes), errors.ECODE_FAULT)
9435 self.op.pnode = ial.result[0]
9436 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9437 self.op.instance_name, self.op.iallocator,
9438 utils.CommaJoin(ial.result))
9439 if ial.required_nodes == 2:
9440 self.op.snode = ial.result[1]
9442 def BuildHooksEnv(self):
9445 This runs on master, primary and secondary nodes of the instance.
9449 "ADD_MODE": self.op.mode,
9451 if self.op.mode == constants.INSTANCE_IMPORT:
9452 env["SRC_NODE"] = self.op.src_node
9453 env["SRC_PATH"] = self.op.src_path
9454 env["SRC_IMAGES"] = self.src_images
9456 env.update(_BuildInstanceHookEnv(
9457 name=self.op.instance_name,
9458 primary_node=self.op.pnode,
9459 secondary_nodes=self.secondaries,
9460 status=self.op.start,
9461 os_type=self.op.os_type,
9462 minmem=self.be_full[constants.BE_MINMEM],
9463 maxmem=self.be_full[constants.BE_MAXMEM],
9464 vcpus=self.be_full[constants.BE_VCPUS],
9465 nics=_NICListToTuple(self, self.nics),
9466 disk_template=self.op.disk_template,
9467 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9468 for d in self.disks],
9471 hypervisor_name=self.op.hypervisor,
9477 def BuildHooksNodes(self):
9478 """Build hooks nodes.
9481 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9484 def _ReadExportInfo(self):
9485 """Reads the export information from disk.
9487 It will override the opcode source node and path with the actual
9488 information, if these two were not specified before.
9490 @return: the export information
9493 assert self.op.mode == constants.INSTANCE_IMPORT
9495 src_node = self.op.src_node
9496 src_path = self.op.src_path
9498 if src_node is None:
9499 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9500 exp_list = self.rpc.call_export_list(locked_nodes)
9502 for node in exp_list:
9503 if exp_list[node].fail_msg:
9505 if src_path in exp_list[node].payload:
9507 self.op.src_node = src_node = node
9508 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9512 raise errors.OpPrereqError("No export found for relative path %s" %
9513 src_path, errors.ECODE_INVAL)
9515 _CheckNodeOnline(self, src_node)
9516 result = self.rpc.call_export_info(src_node, src_path)
9517 result.Raise("No export or invalid export found in dir %s" % src_path)
9519 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9520 if not export_info.has_section(constants.INISECT_EXP):
9521 raise errors.ProgrammerError("Corrupted export config",
9522 errors.ECODE_ENVIRON)
9524 ei_version = export_info.get(constants.INISECT_EXP, "version")
9525 if (int(ei_version) != constants.EXPORT_VERSION):
9526 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9527 (ei_version, constants.EXPORT_VERSION),
9528 errors.ECODE_ENVIRON)
9531 def _ReadExportParams(self, einfo):
9532 """Use export parameters as defaults.
9534 In case the opcode doesn't specify (as in override) some instance
9535 parameters, then try to use them from the export information, if
9539 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9541 if self.op.disk_template is None:
9542 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9543 self.op.disk_template = einfo.get(constants.INISECT_INS,
9545 if self.op.disk_template not in constants.DISK_TEMPLATES:
9546 raise errors.OpPrereqError("Disk template specified in configuration"
9547 " file is not one of the allowed values:"
9548 " %s" % " ".join(constants.DISK_TEMPLATES))
9550 raise errors.OpPrereqError("No disk template specified and the export"
9551 " is missing the disk_template information",
9554 if not self.op.disks:
9556 # TODO: import the disk iv_name too
9557 for idx in range(constants.MAX_DISKS):
9558 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9559 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9560 disks.append({constants.IDISK_SIZE: disk_sz})
9561 self.op.disks = disks
9562 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9563 raise errors.OpPrereqError("No disk info specified and the export"
9564 " is missing the disk information",
9567 if not self.op.nics:
9569 for idx in range(constants.MAX_NICS):
9570 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9572 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9573 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9580 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9581 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9583 if (self.op.hypervisor is None and
9584 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9585 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9587 if einfo.has_section(constants.INISECT_HYP):
9588 # use the export parameters but do not override the ones
9589 # specified by the user
9590 for name, value in einfo.items(constants.INISECT_HYP):
9591 if name not in self.op.hvparams:
9592 self.op.hvparams[name] = value
9594 if einfo.has_section(constants.INISECT_BEP):
9595 # use the parameters, without overriding
9596 for name, value in einfo.items(constants.INISECT_BEP):
9597 if name not in self.op.beparams:
9598 self.op.beparams[name] = value
9599 # Compatibility for the old "memory" be param
9600 if name == constants.BE_MEMORY:
9601 if constants.BE_MAXMEM not in self.op.beparams:
9602 self.op.beparams[constants.BE_MAXMEM] = value
9603 if constants.BE_MINMEM not in self.op.beparams:
9604 self.op.beparams[constants.BE_MINMEM] = value
9606 # try to read the parameters old style, from the main section
9607 for name in constants.BES_PARAMETERS:
9608 if (name not in self.op.beparams and
9609 einfo.has_option(constants.INISECT_INS, name)):
9610 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9612 if einfo.has_section(constants.INISECT_OSP):
9613 # use the parameters, without overriding
9614 for name, value in einfo.items(constants.INISECT_OSP):
9615 if name not in self.op.osparams:
9616 self.op.osparams[name] = value
9618 def _RevertToDefaults(self, cluster):
9619 """Revert the instance parameters to the default values.
9623 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9624 for name in self.op.hvparams.keys():
9625 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9626 del self.op.hvparams[name]
9628 be_defs = cluster.SimpleFillBE({})
9629 for name in self.op.beparams.keys():
9630 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9631 del self.op.beparams[name]
9633 nic_defs = cluster.SimpleFillNIC({})
9634 for nic in self.op.nics:
9635 for name in constants.NICS_PARAMETERS:
9636 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9639 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9640 for name in self.op.osparams.keys():
9641 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9642 del self.op.osparams[name]
9644 def _CalculateFileStorageDir(self):
9645 """Calculate final instance file storage dir.
9648 # file storage dir calculation/check
9649 self.instance_file_storage_dir = None
9650 if self.op.disk_template in constants.DTS_FILEBASED:
9651 # build the full file storage dir path
9654 if self.op.disk_template == constants.DT_SHARED_FILE:
9655 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9657 get_fsd_fn = self.cfg.GetFileStorageDir
9659 cfg_storagedir = get_fsd_fn()
9660 if not cfg_storagedir:
9661 raise errors.OpPrereqError("Cluster file storage dir not defined")
9662 joinargs.append(cfg_storagedir)
9664 if self.op.file_storage_dir is not None:
9665 joinargs.append(self.op.file_storage_dir)
9667 joinargs.append(self.op.instance_name)
9669 # pylint: disable=W0142
9670 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9672 def CheckPrereq(self): # pylint: disable=R0914
9673 """Check prerequisites.
9676 self._CalculateFileStorageDir()
9678 if self.op.mode == constants.INSTANCE_IMPORT:
9679 export_info = self._ReadExportInfo()
9680 self._ReadExportParams(export_info)
9682 if (not self.cfg.GetVGName() and
9683 self.op.disk_template not in constants.DTS_NOT_LVM):
9684 raise errors.OpPrereqError("Cluster does not support lvm-based"
9685 " instances", errors.ECODE_STATE)
9687 if (self.op.hypervisor is None or
9688 self.op.hypervisor == constants.VALUE_AUTO):
9689 self.op.hypervisor = self.cfg.GetHypervisorType()
9691 cluster = self.cfg.GetClusterInfo()
9692 enabled_hvs = cluster.enabled_hypervisors
9693 if self.op.hypervisor not in enabled_hvs:
9694 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9695 " cluster (%s)" % (self.op.hypervisor,
9696 ",".join(enabled_hvs)),
9699 # Check tag validity
9700 for tag in self.op.tags:
9701 objects.TaggableObject.ValidateTag(tag)
9703 # check hypervisor parameter syntax (locally)
9704 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9705 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9707 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9708 hv_type.CheckParameterSyntax(filled_hvp)
9709 self.hv_full = filled_hvp
9710 # check that we don't specify global parameters on an instance
9711 _CheckGlobalHvParams(self.op.hvparams)
9713 # fill and remember the beparams dict
9714 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9715 for param, value in self.op.beparams.iteritems():
9716 if value == constants.VALUE_AUTO:
9717 self.op.beparams[param] = default_beparams[param]
9718 objects.UpgradeBeParams(self.op.beparams)
9719 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9720 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9722 # build os parameters
9723 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9725 # now that hvp/bep are in final format, let's reset to defaults,
9727 if self.op.identify_defaults:
9728 self._RevertToDefaults(cluster)
9732 for idx, nic in enumerate(self.op.nics):
9733 nic_mode_req = nic.get(constants.INIC_MODE, None)
9734 nic_mode = nic_mode_req
9735 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9736 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9738 # in routed mode, for the first nic, the default ip is 'auto'
9739 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9740 default_ip_mode = constants.VALUE_AUTO
9742 default_ip_mode = constants.VALUE_NONE
9744 # ip validity checks
9745 ip = nic.get(constants.INIC_IP, default_ip_mode)
9746 if ip is None or ip.lower() == constants.VALUE_NONE:
9748 elif ip.lower() == constants.VALUE_AUTO:
9749 if not self.op.name_check:
9750 raise errors.OpPrereqError("IP address set to auto but name checks"
9751 " have been skipped",
9753 nic_ip = self.hostname1.ip
9755 if not netutils.IPAddress.IsValid(ip):
9756 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9760 # TODO: check the ip address for uniqueness
9761 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9762 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9765 # MAC address verification
9766 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9767 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9768 mac = utils.NormalizeAndValidateMac(mac)
9771 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9772 except errors.ReservationError:
9773 raise errors.OpPrereqError("MAC address %s already in use"
9774 " in cluster" % mac,
9775 errors.ECODE_NOTUNIQUE)
9777 # Build nic parameters
9778 link = nic.get(constants.INIC_LINK, None)
9779 if link == constants.VALUE_AUTO:
9780 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9783 nicparams[constants.NIC_MODE] = nic_mode
9785 nicparams[constants.NIC_LINK] = link
9787 check_params = cluster.SimpleFillNIC(nicparams)
9788 objects.NIC.CheckParameterSyntax(check_params)
9789 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9791 # disk checks/pre-build
9792 default_vg = self.cfg.GetVGName()
9794 for disk in self.op.disks:
9795 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9796 if mode not in constants.DISK_ACCESS_SET:
9797 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9798 mode, errors.ECODE_INVAL)
9799 size = disk.get(constants.IDISK_SIZE, None)
9801 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9804 except (TypeError, ValueError):
9805 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9808 data_vg = disk.get(constants.IDISK_VG, default_vg)
9810 constants.IDISK_SIZE: size,
9811 constants.IDISK_MODE: mode,
9812 constants.IDISK_VG: data_vg,
9814 if constants.IDISK_METAVG in disk:
9815 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9816 if constants.IDISK_ADOPT in disk:
9817 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9818 self.disks.append(new_disk)
9820 if self.op.mode == constants.INSTANCE_IMPORT:
9822 for idx in range(len(self.disks)):
9823 option = "disk%d_dump" % idx
9824 if export_info.has_option(constants.INISECT_INS, option):
9825 # FIXME: are the old os-es, disk sizes, etc. useful?
9826 export_name = export_info.get(constants.INISECT_INS, option)
9827 image = utils.PathJoin(self.op.src_path, export_name)
9828 disk_images.append(image)
9830 disk_images.append(False)
9832 self.src_images = disk_images
9834 old_name = export_info.get(constants.INISECT_INS, "name")
9835 if self.op.instance_name == old_name:
9836 for idx, nic in enumerate(self.nics):
9837 if nic.mac == constants.VALUE_AUTO:
9838 nic_mac_ini = "nic%d_mac" % idx
9839 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9841 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9843 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9844 if self.op.ip_check:
9845 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9846 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9847 (self.check_ip, self.op.instance_name),
9848 errors.ECODE_NOTUNIQUE)
9850 #### mac address generation
9851 # By generating here the mac address both the allocator and the hooks get
9852 # the real final mac address rather than the 'auto' or 'generate' value.
9853 # There is a race condition between the generation and the instance object
9854 # creation, which means that we know the mac is valid now, but we're not
9855 # sure it will be when we actually add the instance. If things go bad
9856 # adding the instance will abort because of a duplicate mac, and the
9857 # creation job will fail.
9858 for nic in self.nics:
9859 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9860 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9864 if self.op.iallocator is not None:
9865 self._RunAllocator()
9867 # Release all unneeded node locks
9868 _ReleaseLocks(self, locking.LEVEL_NODE,
9869 keep=filter(None, [self.op.pnode, self.op.snode,
9871 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9872 keep=filter(None, [self.op.pnode, self.op.snode,
9875 #### node related checks
9877 # check primary node
9878 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9879 assert self.pnode is not None, \
9880 "Cannot retrieve locked node %s" % self.op.pnode
9882 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9883 pnode.name, errors.ECODE_STATE)
9885 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9886 pnode.name, errors.ECODE_STATE)
9887 if not pnode.vm_capable:
9888 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9889 " '%s'" % pnode.name, errors.ECODE_STATE)
9891 self.secondaries = []
9893 # mirror node verification
9894 if self.op.disk_template in constants.DTS_INT_MIRROR:
9895 if self.op.snode == pnode.name:
9896 raise errors.OpPrereqError("The secondary node cannot be the"
9897 " primary node", errors.ECODE_INVAL)
9898 _CheckNodeOnline(self, self.op.snode)
9899 _CheckNodeNotDrained(self, self.op.snode)
9900 _CheckNodeVmCapable(self, self.op.snode)
9901 self.secondaries.append(self.op.snode)
9903 snode = self.cfg.GetNodeInfo(self.op.snode)
9904 if pnode.group != snode.group:
9905 self.LogWarning("The primary and secondary nodes are in two"
9906 " different node groups; the disk parameters"
9907 " from the first disk's node group will be"
9910 nodenames = [pnode.name] + self.secondaries
9912 # Verify instance specs
9913 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9915 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9916 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9917 constants.ISPEC_DISK_COUNT: len(self.disks),
9918 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9919 constants.ISPEC_NIC_COUNT: len(self.nics),
9920 constants.ISPEC_SPINDLE_USE: spindle_use,
9923 group_info = self.cfg.GetNodeGroup(pnode.group)
9924 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9925 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9926 if not self.op.ignore_ipolicy and res:
9927 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9928 " policy: %s") % (pnode.group,
9929 utils.CommaJoin(res)),
9932 # disk parameters (not customizable at instance or node level)
9933 # just use the primary node parameters, ignoring the secondary.
9934 self.diskparams = group_info.diskparams
9936 if not self.adopt_disks:
9937 if self.op.disk_template == constants.DT_RBD:
9938 # _CheckRADOSFreeSpace() is just a placeholder.
9939 # Any function that checks prerequisites can be placed here.
9940 # Check if there is enough space on the RADOS cluster.
9941 _CheckRADOSFreeSpace()
9943 # Check lv size requirements, if not adopting
9944 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9945 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9947 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9948 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9949 disk[constants.IDISK_ADOPT])
9950 for disk in self.disks])
9951 if len(all_lvs) != len(self.disks):
9952 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9954 for lv_name in all_lvs:
9956 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9957 # to ReserveLV uses the same syntax
9958 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9959 except errors.ReservationError:
9960 raise errors.OpPrereqError("LV named %s used by another instance" %
9961 lv_name, errors.ECODE_NOTUNIQUE)
9963 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9964 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9966 node_lvs = self.rpc.call_lv_list([pnode.name],
9967 vg_names.payload.keys())[pnode.name]
9968 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9969 node_lvs = node_lvs.payload
9971 delta = all_lvs.difference(node_lvs.keys())
9973 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9974 utils.CommaJoin(delta),
9976 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9978 raise errors.OpPrereqError("Online logical volumes found, cannot"
9979 " adopt: %s" % utils.CommaJoin(online_lvs),
9981 # update the size of disk based on what is found
9982 for dsk in self.disks:
9983 dsk[constants.IDISK_SIZE] = \
9984 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9985 dsk[constants.IDISK_ADOPT])][0]))
9987 elif self.op.disk_template == constants.DT_BLOCK:
9988 # Normalize and de-duplicate device paths
9989 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9990 for disk in self.disks])
9991 if len(all_disks) != len(self.disks):
9992 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9994 baddisks = [d for d in all_disks
9995 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9997 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9998 " cannot be adopted" %
9999 (", ".join(baddisks),
10000 constants.ADOPTABLE_BLOCKDEV_ROOT),
10001 errors.ECODE_INVAL)
10003 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10004 list(all_disks))[pnode.name]
10005 node_disks.Raise("Cannot get block device information from node %s" %
10007 node_disks = node_disks.payload
10008 delta = all_disks.difference(node_disks.keys())
10010 raise errors.OpPrereqError("Missing block device(s): %s" %
10011 utils.CommaJoin(delta),
10012 errors.ECODE_INVAL)
10013 for dsk in self.disks:
10014 dsk[constants.IDISK_SIZE] = \
10015 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10017 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10019 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10020 # check OS parameters (remotely)
10021 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10023 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10025 # memory check on primary node
10026 #TODO(dynmem): use MINMEM for checking
10028 _CheckNodeFreeMemory(self, self.pnode.name,
10029 "creating instance %s" % self.op.instance_name,
10030 self.be_full[constants.BE_MAXMEM],
10031 self.op.hypervisor)
10033 self.dry_run_result = list(nodenames)
10035 def Exec(self, feedback_fn):
10036 """Create and add the instance to the cluster.
10039 instance = self.op.instance_name
10040 pnode_name = self.pnode.name
10042 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10043 self.owned_locks(locking.LEVEL_NODE)), \
10044 "Node locks differ from node resource locks"
10046 ht_kind = self.op.hypervisor
10047 if ht_kind in constants.HTS_REQ_PORT:
10048 network_port = self.cfg.AllocatePort()
10050 network_port = None
10052 disks = _GenerateDiskTemplate(self,
10053 self.op.disk_template,
10054 instance, pnode_name,
10057 self.instance_file_storage_dir,
10058 self.op.file_driver,
10063 iobj = objects.Instance(name=instance, os=self.op.os_type,
10064 primary_node=pnode_name,
10065 nics=self.nics, disks=disks,
10066 disk_template=self.op.disk_template,
10067 admin_state=constants.ADMINST_DOWN,
10068 network_port=network_port,
10069 beparams=self.op.beparams,
10070 hvparams=self.op.hvparams,
10071 hypervisor=self.op.hypervisor,
10072 osparams=self.op.osparams,
10076 for tag in self.op.tags:
10079 if self.adopt_disks:
10080 if self.op.disk_template == constants.DT_PLAIN:
10081 # rename LVs to the newly-generated names; we need to construct
10082 # 'fake' LV disks with the old data, plus the new unique_id
10083 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10085 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10086 rename_to.append(t_dsk.logical_id)
10087 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10088 self.cfg.SetDiskID(t_dsk, pnode_name)
10089 result = self.rpc.call_blockdev_rename(pnode_name,
10090 zip(tmp_disks, rename_to))
10091 result.Raise("Failed to rename adoped LVs")
10093 feedback_fn("* creating instance disks...")
10095 _CreateDisks(self, iobj)
10096 except errors.OpExecError:
10097 self.LogWarning("Device creation failed, reverting...")
10099 _RemoveDisks(self, iobj)
10101 self.cfg.ReleaseDRBDMinors(instance)
10104 feedback_fn("adding instance %s to cluster config" % instance)
10106 self.cfg.AddInstance(iobj, self.proc.GetECId())
10108 # Declare that we don't want to remove the instance lock anymore, as we've
10109 # added the instance to the config
10110 del self.remove_locks[locking.LEVEL_INSTANCE]
10112 if self.op.mode == constants.INSTANCE_IMPORT:
10113 # Release unused nodes
10114 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10116 # Release all nodes
10117 _ReleaseLocks(self, locking.LEVEL_NODE)
10120 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10121 feedback_fn("* wiping instance disks...")
10123 _WipeDisks(self, iobj)
10124 except errors.OpExecError, err:
10125 logging.exception("Wiping disks failed")
10126 self.LogWarning("Wiping instance disks failed (%s)", err)
10130 # Something is already wrong with the disks, don't do anything else
10132 elif self.op.wait_for_sync:
10133 disk_abort = not _WaitForSync(self, iobj)
10134 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10135 # make sure the disks are not degraded (still sync-ing is ok)
10136 feedback_fn("* checking mirrors status")
10137 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10142 _RemoveDisks(self, iobj)
10143 self.cfg.RemoveInstance(iobj.name)
10144 # Make sure the instance lock gets removed
10145 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10146 raise errors.OpExecError("There are some degraded disks for"
10149 # Release all node resource locks
10150 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10152 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10153 if self.op.mode == constants.INSTANCE_CREATE:
10154 if not self.op.no_install:
10155 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10156 not self.op.wait_for_sync)
10158 feedback_fn("* pausing disk sync to install instance OS")
10159 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10161 for idx, success in enumerate(result.payload):
10163 logging.warn("pause-sync of instance %s for disk %d failed",
10166 feedback_fn("* running the instance OS create scripts...")
10167 # FIXME: pass debug option from opcode to backend
10169 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10170 self.op.debug_level)
10172 feedback_fn("* resuming disk sync")
10173 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10175 for idx, success in enumerate(result.payload):
10177 logging.warn("resume-sync of instance %s for disk %d failed",
10180 os_add_result.Raise("Could not add os for instance %s"
10181 " on node %s" % (instance, pnode_name))
10183 elif self.op.mode == constants.INSTANCE_IMPORT:
10184 feedback_fn("* running the instance OS import scripts...")
10188 for idx, image in enumerate(self.src_images):
10192 # FIXME: pass debug option from opcode to backend
10193 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10194 constants.IEIO_FILE, (image, ),
10195 constants.IEIO_SCRIPT,
10196 (iobj.disks[idx], idx),
10198 transfers.append(dt)
10201 masterd.instance.TransferInstanceData(self, feedback_fn,
10202 self.op.src_node, pnode_name,
10203 self.pnode.secondary_ip,
10205 if not compat.all(import_result):
10206 self.LogWarning("Some disks for instance %s on node %s were not"
10207 " imported successfully" % (instance, pnode_name))
10209 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10210 feedback_fn("* preparing remote import...")
10211 # The source cluster will stop the instance before attempting to make a
10212 # connection. In some cases stopping an instance can take a long time,
10213 # hence the shutdown timeout is added to the connection timeout.
10214 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10215 self.op.source_shutdown_timeout)
10216 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10218 assert iobj.primary_node == self.pnode.name
10220 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10221 self.source_x509_ca,
10222 self._cds, timeouts)
10223 if not compat.all(disk_results):
10224 # TODO: Should the instance still be started, even if some disks
10225 # failed to import (valid for local imports, too)?
10226 self.LogWarning("Some disks for instance %s on node %s were not"
10227 " imported successfully" % (instance, pnode_name))
10229 # Run rename script on newly imported instance
10230 assert iobj.name == instance
10231 feedback_fn("Running rename script for %s" % instance)
10232 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10233 self.source_instance_name,
10234 self.op.debug_level)
10235 if result.fail_msg:
10236 self.LogWarning("Failed to run rename script for %s on node"
10237 " %s: %s" % (instance, pnode_name, result.fail_msg))
10240 # also checked in the prereq part
10241 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10244 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10247 iobj.admin_state = constants.ADMINST_UP
10248 self.cfg.Update(iobj, feedback_fn)
10249 logging.info("Starting instance %s on node %s", instance, pnode_name)
10250 feedback_fn("* starting instance...")
10251 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10253 result.Raise("Could not start instance")
10255 return list(iobj.all_nodes)
10258 def _CheckRADOSFreeSpace():
10259 """Compute disk size requirements inside the RADOS cluster.
10262 # For the RADOS cluster we assume there is always enough space.
10266 class LUInstanceConsole(NoHooksLU):
10267 """Connect to an instance's console.
10269 This is somewhat special in that it returns the command line that
10270 you need to run on the master node in order to connect to the
10276 def ExpandNames(self):
10277 self.share_locks = _ShareAll()
10278 self._ExpandAndLockInstance()
10280 def CheckPrereq(self):
10281 """Check prerequisites.
10283 This checks that the instance is in the cluster.
10286 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10287 assert self.instance is not None, \
10288 "Cannot retrieve locked instance %s" % self.op.instance_name
10289 _CheckNodeOnline(self, self.instance.primary_node)
10291 def Exec(self, feedback_fn):
10292 """Connect to the console of an instance
10295 instance = self.instance
10296 node = instance.primary_node
10298 node_insts = self.rpc.call_instance_list([node],
10299 [instance.hypervisor])[node]
10300 node_insts.Raise("Can't get node information from %s" % node)
10302 if instance.name not in node_insts.payload:
10303 if instance.admin_state == constants.ADMINST_UP:
10304 state = constants.INSTST_ERRORDOWN
10305 elif instance.admin_state == constants.ADMINST_DOWN:
10306 state = constants.INSTST_ADMINDOWN
10308 state = constants.INSTST_ADMINOFFLINE
10309 raise errors.OpExecError("Instance %s is not running (state %s)" %
10310 (instance.name, state))
10312 logging.debug("Connecting to console of %s on %s", instance.name, node)
10314 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10317 def _GetInstanceConsole(cluster, instance):
10318 """Returns console information for an instance.
10320 @type cluster: L{objects.Cluster}
10321 @type instance: L{objects.Instance}
10325 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10326 # beparams and hvparams are passed separately, to avoid editing the
10327 # instance and then saving the defaults in the instance itself.
10328 hvparams = cluster.FillHV(instance)
10329 beparams = cluster.FillBE(instance)
10330 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10332 assert console.instance == instance.name
10333 assert console.Validate()
10335 return console.ToDict()
10338 class LUInstanceReplaceDisks(LogicalUnit):
10339 """Replace the disks of an instance.
10342 HPATH = "mirrors-replace"
10343 HTYPE = constants.HTYPE_INSTANCE
10346 def CheckArguments(self):
10347 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10348 self.op.iallocator)
10350 def ExpandNames(self):
10351 self._ExpandAndLockInstance()
10353 assert locking.LEVEL_NODE not in self.needed_locks
10354 assert locking.LEVEL_NODE_RES not in self.needed_locks
10355 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10357 assert self.op.iallocator is None or self.op.remote_node is None, \
10358 "Conflicting options"
10360 if self.op.remote_node is not None:
10361 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10363 # Warning: do not remove the locking of the new secondary here
10364 # unless DRBD8.AddChildren is changed to work in parallel;
10365 # currently it doesn't since parallel invocations of
10366 # FindUnusedMinor will conflict
10367 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10368 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10370 self.needed_locks[locking.LEVEL_NODE] = []
10371 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10373 if self.op.iallocator is not None:
10374 # iallocator will select a new node in the same group
10375 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10377 self.needed_locks[locking.LEVEL_NODE_RES] = []
10379 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10380 self.op.iallocator, self.op.remote_node,
10381 self.op.disks, False, self.op.early_release,
10382 self.op.ignore_ipolicy)
10384 self.tasklets = [self.replacer]
10386 def DeclareLocks(self, level):
10387 if level == locking.LEVEL_NODEGROUP:
10388 assert self.op.remote_node is None
10389 assert self.op.iallocator is not None
10390 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10392 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10393 # Lock all groups used by instance optimistically; this requires going
10394 # via the node before it's locked, requiring verification later on
10395 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10396 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10398 elif level == locking.LEVEL_NODE:
10399 if self.op.iallocator is not None:
10400 assert self.op.remote_node is None
10401 assert not self.needed_locks[locking.LEVEL_NODE]
10403 # Lock member nodes of all locked groups
10404 self.needed_locks[locking.LEVEL_NODE] = [node_name
10405 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10406 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10408 self._LockInstancesNodes()
10409 elif level == locking.LEVEL_NODE_RES:
10411 self.needed_locks[locking.LEVEL_NODE_RES] = \
10412 self.needed_locks[locking.LEVEL_NODE]
10414 def BuildHooksEnv(self):
10415 """Build hooks env.
10417 This runs on the master, the primary and all the secondaries.
10420 instance = self.replacer.instance
10422 "MODE": self.op.mode,
10423 "NEW_SECONDARY": self.op.remote_node,
10424 "OLD_SECONDARY": instance.secondary_nodes[0],
10426 env.update(_BuildInstanceHookEnvByObject(self, instance))
10429 def BuildHooksNodes(self):
10430 """Build hooks nodes.
10433 instance = self.replacer.instance
10435 self.cfg.GetMasterNode(),
10436 instance.primary_node,
10438 if self.op.remote_node is not None:
10439 nl.append(self.op.remote_node)
10442 def CheckPrereq(self):
10443 """Check prerequisites.
10446 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10447 self.op.iallocator is None)
10449 # Verify if node group locks are still correct
10450 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10452 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10454 return LogicalUnit.CheckPrereq(self)
10457 class TLReplaceDisks(Tasklet):
10458 """Replaces disks for an instance.
10460 Note: Locking is not within the scope of this class.
10463 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10464 disks, delay_iallocator, early_release, ignore_ipolicy):
10465 """Initializes this class.
10468 Tasklet.__init__(self, lu)
10471 self.instance_name = instance_name
10473 self.iallocator_name = iallocator_name
10474 self.remote_node = remote_node
10476 self.delay_iallocator = delay_iallocator
10477 self.early_release = early_release
10478 self.ignore_ipolicy = ignore_ipolicy
10481 self.instance = None
10482 self.new_node = None
10483 self.target_node = None
10484 self.other_node = None
10485 self.remote_node_info = None
10486 self.node_secondary_ip = None
10489 def CheckArguments(mode, remote_node, iallocator):
10490 """Helper function for users of this class.
10493 # check for valid parameter combination
10494 if mode == constants.REPLACE_DISK_CHG:
10495 if remote_node is None and iallocator is None:
10496 raise errors.OpPrereqError("When changing the secondary either an"
10497 " iallocator script must be used or the"
10498 " new node given", errors.ECODE_INVAL)
10500 if remote_node is not None and iallocator is not None:
10501 raise errors.OpPrereqError("Give either the iallocator or the new"
10502 " secondary, not both", errors.ECODE_INVAL)
10504 elif remote_node is not None or iallocator is not None:
10505 # Not replacing the secondary
10506 raise errors.OpPrereqError("The iallocator and new node options can"
10507 " only be used when changing the"
10508 " secondary node", errors.ECODE_INVAL)
10511 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10512 """Compute a new secondary node using an IAllocator.
10515 ial = IAllocator(lu.cfg, lu.rpc,
10516 mode=constants.IALLOCATOR_MODE_RELOC,
10517 name=instance_name,
10518 relocate_from=list(relocate_from))
10520 ial.Run(iallocator_name)
10522 if not ial.success:
10523 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10524 " %s" % (iallocator_name, ial.info),
10525 errors.ECODE_NORES)
10527 if len(ial.result) != ial.required_nodes:
10528 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10529 " of nodes (%s), required %s" %
10531 len(ial.result), ial.required_nodes),
10532 errors.ECODE_FAULT)
10534 remote_node_name = ial.result[0]
10536 lu.LogInfo("Selected new secondary for instance '%s': %s",
10537 instance_name, remote_node_name)
10539 return remote_node_name
10541 def _FindFaultyDisks(self, node_name):
10542 """Wrapper for L{_FindFaultyInstanceDisks}.
10545 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10548 def _CheckDisksActivated(self, instance):
10549 """Checks if the instance disks are activated.
10551 @param instance: The instance to check disks
10552 @return: True if they are activated, False otherwise
10555 nodes = instance.all_nodes
10557 for idx, dev in enumerate(instance.disks):
10559 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10560 self.cfg.SetDiskID(dev, node)
10562 result = self.rpc.call_blockdev_find(node, dev)
10566 elif result.fail_msg or not result.payload:
10571 def CheckPrereq(self):
10572 """Check prerequisites.
10574 This checks that the instance is in the cluster.
10577 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10578 assert instance is not None, \
10579 "Cannot retrieve locked instance %s" % self.instance_name
10581 if instance.disk_template != constants.DT_DRBD8:
10582 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10583 " instances", errors.ECODE_INVAL)
10585 if len(instance.secondary_nodes) != 1:
10586 raise errors.OpPrereqError("The instance has a strange layout,"
10587 " expected one secondary but found %d" %
10588 len(instance.secondary_nodes),
10589 errors.ECODE_FAULT)
10591 if not self.delay_iallocator:
10592 self._CheckPrereq2()
10594 def _CheckPrereq2(self):
10595 """Check prerequisites, second part.
10597 This function should always be part of CheckPrereq. It was separated and is
10598 now called from Exec because during node evacuation iallocator was only
10599 called with an unmodified cluster model, not taking planned changes into
10603 instance = self.instance
10604 secondary_node = instance.secondary_nodes[0]
10606 if self.iallocator_name is None:
10607 remote_node = self.remote_node
10609 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10610 instance.name, instance.secondary_nodes)
10612 if remote_node is None:
10613 self.remote_node_info = None
10615 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10616 "Remote node '%s' is not locked" % remote_node
10618 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10619 assert self.remote_node_info is not None, \
10620 "Cannot retrieve locked node %s" % remote_node
10622 if remote_node == self.instance.primary_node:
10623 raise errors.OpPrereqError("The specified node is the primary node of"
10624 " the instance", errors.ECODE_INVAL)
10626 if remote_node == secondary_node:
10627 raise errors.OpPrereqError("The specified node is already the"
10628 " secondary node of the instance",
10629 errors.ECODE_INVAL)
10631 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10632 constants.REPLACE_DISK_CHG):
10633 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10634 errors.ECODE_INVAL)
10636 if self.mode == constants.REPLACE_DISK_AUTO:
10637 if not self._CheckDisksActivated(instance):
10638 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10639 " first" % self.instance_name,
10640 errors.ECODE_STATE)
10641 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10642 faulty_secondary = self._FindFaultyDisks(secondary_node)
10644 if faulty_primary and faulty_secondary:
10645 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10646 " one node and can not be repaired"
10647 " automatically" % self.instance_name,
10648 errors.ECODE_STATE)
10651 self.disks = faulty_primary
10652 self.target_node = instance.primary_node
10653 self.other_node = secondary_node
10654 check_nodes = [self.target_node, self.other_node]
10655 elif faulty_secondary:
10656 self.disks = faulty_secondary
10657 self.target_node = secondary_node
10658 self.other_node = instance.primary_node
10659 check_nodes = [self.target_node, self.other_node]
10665 # Non-automatic modes
10666 if self.mode == constants.REPLACE_DISK_PRI:
10667 self.target_node = instance.primary_node
10668 self.other_node = secondary_node
10669 check_nodes = [self.target_node, self.other_node]
10671 elif self.mode == constants.REPLACE_DISK_SEC:
10672 self.target_node = secondary_node
10673 self.other_node = instance.primary_node
10674 check_nodes = [self.target_node, self.other_node]
10676 elif self.mode == constants.REPLACE_DISK_CHG:
10677 self.new_node = remote_node
10678 self.other_node = instance.primary_node
10679 self.target_node = secondary_node
10680 check_nodes = [self.new_node, self.other_node]
10682 _CheckNodeNotDrained(self.lu, remote_node)
10683 _CheckNodeVmCapable(self.lu, remote_node)
10685 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10686 assert old_node_info is not None
10687 if old_node_info.offline and not self.early_release:
10688 # doesn't make sense to delay the release
10689 self.early_release = True
10690 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10691 " early-release mode", secondary_node)
10694 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10697 # If not specified all disks should be replaced
10699 self.disks = range(len(self.instance.disks))
10701 # TODO: This is ugly, but right now we can't distinguish between internal
10702 # submitted opcode and external one. We should fix that.
10703 if self.remote_node_info:
10704 # We change the node, lets verify it still meets instance policy
10705 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10706 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10708 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10709 ignore=self.ignore_ipolicy)
10711 # TODO: compute disk parameters
10712 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10713 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10714 if primary_node_info.group != secondary_node_info.group:
10715 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10716 " different node groups; the disk parameters of the"
10717 " primary node's group will be applied.")
10719 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10721 for node in check_nodes:
10722 _CheckNodeOnline(self.lu, node)
10724 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10727 if node_name is not None)
10729 # Release unneeded node and node resource locks
10730 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10731 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10733 # Release any owned node group
10734 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10735 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10737 # Check whether disks are valid
10738 for disk_idx in self.disks:
10739 instance.FindDisk(disk_idx)
10741 # Get secondary node IP addresses
10742 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10743 in self.cfg.GetMultiNodeInfo(touched_nodes))
10745 def Exec(self, feedback_fn):
10746 """Execute disk replacement.
10748 This dispatches the disk replacement to the appropriate handler.
10751 if self.delay_iallocator:
10752 self._CheckPrereq2()
10755 # Verify owned locks before starting operation
10756 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10757 assert set(owned_nodes) == set(self.node_secondary_ip), \
10758 ("Incorrect node locks, owning %s, expected %s" %
10759 (owned_nodes, self.node_secondary_ip.keys()))
10760 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10761 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10763 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10764 assert list(owned_instances) == [self.instance_name], \
10765 "Instance '%s' not locked" % self.instance_name
10767 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10768 "Should not own any node group lock at this point"
10771 feedback_fn("No disks need replacement")
10774 feedback_fn("Replacing disk(s) %s for %s" %
10775 (utils.CommaJoin(self.disks), self.instance.name))
10777 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10779 # Activate the instance disks if we're replacing them on a down instance
10781 _StartInstanceDisks(self.lu, self.instance, True)
10784 # Should we replace the secondary node?
10785 if self.new_node is not None:
10786 fn = self._ExecDrbd8Secondary
10788 fn = self._ExecDrbd8DiskOnly
10790 result = fn(feedback_fn)
10792 # Deactivate the instance disks if we're replacing them on a
10795 _SafeShutdownInstanceDisks(self.lu, self.instance)
10797 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10800 # Verify owned locks
10801 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10802 nodes = frozenset(self.node_secondary_ip)
10803 assert ((self.early_release and not owned_nodes) or
10804 (not self.early_release and not (set(owned_nodes) - nodes))), \
10805 ("Not owning the correct locks, early_release=%s, owned=%r,"
10806 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10810 def _CheckVolumeGroup(self, nodes):
10811 self.lu.LogInfo("Checking volume groups")
10813 vgname = self.cfg.GetVGName()
10815 # Make sure volume group exists on all involved nodes
10816 results = self.rpc.call_vg_list(nodes)
10818 raise errors.OpExecError("Can't list volume groups on the nodes")
10821 res = results[node]
10822 res.Raise("Error checking node %s" % node)
10823 if vgname not in res.payload:
10824 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10827 def _CheckDisksExistence(self, nodes):
10828 # Check disk existence
10829 for idx, dev in enumerate(self.instance.disks):
10830 if idx not in self.disks:
10834 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10835 self.cfg.SetDiskID(dev, node)
10837 result = self.rpc.call_blockdev_find(node, dev)
10839 msg = result.fail_msg
10840 if msg or not result.payload:
10842 msg = "disk not found"
10843 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10846 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10847 for idx, dev in enumerate(self.instance.disks):
10848 if idx not in self.disks:
10851 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10854 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10856 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10857 " replace disks for instance %s" %
10858 (node_name, self.instance.name))
10860 def _CreateNewStorage(self, node_name):
10861 """Create new storage on the primary or secondary node.
10863 This is only used for same-node replaces, not for changing the
10864 secondary node, hence we don't want to modify the existing disk.
10869 for idx, dev in enumerate(self.instance.disks):
10870 if idx not in self.disks:
10873 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10875 self.cfg.SetDiskID(dev, node_name)
10877 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10878 names = _GenerateUniqueNames(self.lu, lv_names)
10880 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10882 vg_data = dev.children[0].logical_id[0]
10883 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10884 logical_id=(vg_data, names[0]), params=data_p)
10885 vg_meta = dev.children[1].logical_id[0]
10886 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10887 logical_id=(vg_meta, names[1]), params=meta_p)
10889 new_lvs = [lv_data, lv_meta]
10890 old_lvs = [child.Copy() for child in dev.children]
10891 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10893 # we pass force_create=True to force the LVM creation
10894 for new_lv in new_lvs:
10895 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10896 _GetInstanceInfoText(self.instance), False)
10900 def _CheckDevices(self, node_name, iv_names):
10901 for name, (dev, _, _) in iv_names.iteritems():
10902 self.cfg.SetDiskID(dev, node_name)
10904 result = self.rpc.call_blockdev_find(node_name, dev)
10906 msg = result.fail_msg
10907 if msg or not result.payload:
10909 msg = "disk not found"
10910 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10913 if result.payload.is_degraded:
10914 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10916 def _RemoveOldStorage(self, node_name, iv_names):
10917 for name, (_, old_lvs, _) in iv_names.iteritems():
10918 self.lu.LogInfo("Remove logical volumes for %s" % name)
10921 self.cfg.SetDiskID(lv, node_name)
10923 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10925 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10926 hint="remove unused LVs manually")
10928 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10929 """Replace a disk on the primary or secondary for DRBD 8.
10931 The algorithm for replace is quite complicated:
10933 1. for each disk to be replaced:
10935 1. create new LVs on the target node with unique names
10936 1. detach old LVs from the drbd device
10937 1. rename old LVs to name_replaced.<time_t>
10938 1. rename new LVs to old LVs
10939 1. attach the new LVs (with the old names now) to the drbd device
10941 1. wait for sync across all devices
10943 1. for each modified disk:
10945 1. remove old LVs (which have the name name_replaces.<time_t>)
10947 Failures are not very well handled.
10952 # Step: check device activation
10953 self.lu.LogStep(1, steps_total, "Check device existence")
10954 self._CheckDisksExistence([self.other_node, self.target_node])
10955 self._CheckVolumeGroup([self.target_node, self.other_node])
10957 # Step: check other node consistency
10958 self.lu.LogStep(2, steps_total, "Check peer consistency")
10959 self._CheckDisksConsistency(self.other_node,
10960 self.other_node == self.instance.primary_node,
10963 # Step: create new storage
10964 self.lu.LogStep(3, steps_total, "Allocate new storage")
10965 iv_names = self._CreateNewStorage(self.target_node)
10967 # Step: for each lv, detach+rename*2+attach
10968 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10969 for dev, old_lvs, new_lvs in iv_names.itervalues():
10970 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10972 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10974 result.Raise("Can't detach drbd from local storage on node"
10975 " %s for device %s" % (self.target_node, dev.iv_name))
10977 #cfg.Update(instance)
10979 # ok, we created the new LVs, so now we know we have the needed
10980 # storage; as such, we proceed on the target node to rename
10981 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10982 # using the assumption that logical_id == physical_id (which in
10983 # turn is the unique_id on that node)
10985 # FIXME(iustin): use a better name for the replaced LVs
10986 temp_suffix = int(time.time())
10987 ren_fn = lambda d, suff: (d.physical_id[0],
10988 d.physical_id[1] + "_replaced-%s" % suff)
10990 # Build the rename list based on what LVs exist on the node
10991 rename_old_to_new = []
10992 for to_ren in old_lvs:
10993 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10994 if not result.fail_msg and result.payload:
10996 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10998 self.lu.LogInfo("Renaming the old LVs on the target node")
10999 result = self.rpc.call_blockdev_rename(self.target_node,
11001 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11003 # Now we rename the new LVs to the old LVs
11004 self.lu.LogInfo("Renaming the new LVs on the target node")
11005 rename_new_to_old = [(new, old.physical_id)
11006 for old, new in zip(old_lvs, new_lvs)]
11007 result = self.rpc.call_blockdev_rename(self.target_node,
11009 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11011 # Intermediate steps of in memory modifications
11012 for old, new in zip(old_lvs, new_lvs):
11013 new.logical_id = old.logical_id
11014 self.cfg.SetDiskID(new, self.target_node)
11016 # We need to modify old_lvs so that removal later removes the
11017 # right LVs, not the newly added ones; note that old_lvs is a
11019 for disk in old_lvs:
11020 disk.logical_id = ren_fn(disk, temp_suffix)
11021 self.cfg.SetDiskID(disk, self.target_node)
11023 # Now that the new lvs have the old name, we can add them to the device
11024 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11025 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
11027 msg = result.fail_msg
11029 for new_lv in new_lvs:
11030 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11033 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11034 hint=("cleanup manually the unused logical"
11036 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11038 cstep = itertools.count(5)
11040 if self.early_release:
11041 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11042 self._RemoveOldStorage(self.target_node, iv_names)
11043 # TODO: Check if releasing locks early still makes sense
11044 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11046 # Release all resource locks except those used by the instance
11047 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11048 keep=self.node_secondary_ip.keys())
11050 # Release all node locks while waiting for sync
11051 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11053 # TODO: Can the instance lock be downgraded here? Take the optional disk
11054 # shutdown in the caller into consideration.
11057 # This can fail as the old devices are degraded and _WaitForSync
11058 # does a combined result over all disks, so we don't check its return value
11059 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11060 _WaitForSync(self.lu, self.instance)
11062 # Check all devices manually
11063 self._CheckDevices(self.instance.primary_node, iv_names)
11065 # Step: remove old storage
11066 if not self.early_release:
11067 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11068 self._RemoveOldStorage(self.target_node, iv_names)
11070 def _ExecDrbd8Secondary(self, feedback_fn):
11071 """Replace the secondary node for DRBD 8.
11073 The algorithm for replace is quite complicated:
11074 - for all disks of the instance:
11075 - create new LVs on the new node with same names
11076 - shutdown the drbd device on the old secondary
11077 - disconnect the drbd network on the primary
11078 - create the drbd device on the new secondary
11079 - network attach the drbd on the primary, using an artifice:
11080 the drbd code for Attach() will connect to the network if it
11081 finds a device which is connected to the good local disks but
11082 not network enabled
11083 - wait for sync across all devices
11084 - remove all disks from the old secondary
11086 Failures are not very well handled.
11091 pnode = self.instance.primary_node
11093 # Step: check device activation
11094 self.lu.LogStep(1, steps_total, "Check device existence")
11095 self._CheckDisksExistence([self.instance.primary_node])
11096 self._CheckVolumeGroup([self.instance.primary_node])
11098 # Step: check other node consistency
11099 self.lu.LogStep(2, steps_total, "Check peer consistency")
11100 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11102 # Step: create new storage
11103 self.lu.LogStep(3, steps_total, "Allocate new storage")
11104 for idx, dev in enumerate(self.instance.disks):
11105 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11106 (self.new_node, idx))
11107 # we pass force_create=True to force LVM creation
11108 for new_lv in dev.children:
11109 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11110 _GetInstanceInfoText(self.instance), False)
11112 # Step 4: dbrd minors and drbd setups changes
11113 # after this, we must manually remove the drbd minors on both the
11114 # error and the success paths
11115 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11116 minors = self.cfg.AllocateDRBDMinor([self.new_node
11117 for dev in self.instance.disks],
11118 self.instance.name)
11119 logging.debug("Allocated minors %r", minors)
11122 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11123 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11124 (self.new_node, idx))
11125 # create new devices on new_node; note that we create two IDs:
11126 # one without port, so the drbd will be activated without
11127 # networking information on the new node at this stage, and one
11128 # with network, for the latter activation in step 4
11129 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11130 if self.instance.primary_node == o_node1:
11133 assert self.instance.primary_node == o_node2, "Three-node instance?"
11136 new_alone_id = (self.instance.primary_node, self.new_node, None,
11137 p_minor, new_minor, o_secret)
11138 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11139 p_minor, new_minor, o_secret)
11141 iv_names[idx] = (dev, dev.children, new_net_id)
11142 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11144 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11145 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11146 logical_id=new_alone_id,
11147 children=dev.children,
11149 params=drbd_params)
11151 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11152 _GetInstanceInfoText(self.instance), False)
11153 except errors.GenericError:
11154 self.cfg.ReleaseDRBDMinors(self.instance.name)
11157 # We have new devices, shutdown the drbd on the old secondary
11158 for idx, dev in enumerate(self.instance.disks):
11159 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11160 self.cfg.SetDiskID(dev, self.target_node)
11161 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11163 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11164 "node: %s" % (idx, msg),
11165 hint=("Please cleanup this device manually as"
11166 " soon as possible"))
11168 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11169 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11170 self.instance.disks)[pnode]
11172 msg = result.fail_msg
11174 # detaches didn't succeed (unlikely)
11175 self.cfg.ReleaseDRBDMinors(self.instance.name)
11176 raise errors.OpExecError("Can't detach the disks from the network on"
11177 " old node: %s" % (msg,))
11179 # if we managed to detach at least one, we update all the disks of
11180 # the instance to point to the new secondary
11181 self.lu.LogInfo("Updating instance configuration")
11182 for dev, _, new_logical_id in iv_names.itervalues():
11183 dev.logical_id = new_logical_id
11184 self.cfg.SetDiskID(dev, self.instance.primary_node)
11186 self.cfg.Update(self.instance, feedback_fn)
11188 # Release all node locks (the configuration has been updated)
11189 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11191 # and now perform the drbd attach
11192 self.lu.LogInfo("Attaching primary drbds to new secondary"
11193 " (standalone => connected)")
11194 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11196 self.node_secondary_ip,
11197 self.instance.disks,
11198 self.instance.name,
11200 for to_node, to_result in result.items():
11201 msg = to_result.fail_msg
11203 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11205 hint=("please do a gnt-instance info to see the"
11206 " status of disks"))
11208 cstep = itertools.count(5)
11210 if self.early_release:
11211 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11212 self._RemoveOldStorage(self.target_node, iv_names)
11213 # TODO: Check if releasing locks early still makes sense
11214 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11216 # Release all resource locks except those used by the instance
11217 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11218 keep=self.node_secondary_ip.keys())
11220 # TODO: Can the instance lock be downgraded here? Take the optional disk
11221 # shutdown in the caller into consideration.
11224 # This can fail as the old devices are degraded and _WaitForSync
11225 # does a combined result over all disks, so we don't check its return value
11226 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11227 _WaitForSync(self.lu, self.instance)
11229 # Check all devices manually
11230 self._CheckDevices(self.instance.primary_node, iv_names)
11232 # Step: remove old storage
11233 if not self.early_release:
11234 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11235 self._RemoveOldStorage(self.target_node, iv_names)
11238 class LURepairNodeStorage(NoHooksLU):
11239 """Repairs the volume group on a node.
11244 def CheckArguments(self):
11245 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11247 storage_type = self.op.storage_type
11249 if (constants.SO_FIX_CONSISTENCY not in
11250 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11251 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11252 " repaired" % storage_type,
11253 errors.ECODE_INVAL)
11255 def ExpandNames(self):
11256 self.needed_locks = {
11257 locking.LEVEL_NODE: [self.op.node_name],
11260 def _CheckFaultyDisks(self, instance, node_name):
11261 """Ensure faulty disks abort the opcode or at least warn."""
11263 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11265 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11266 " node '%s'" % (instance.name, node_name),
11267 errors.ECODE_STATE)
11268 except errors.OpPrereqError, err:
11269 if self.op.ignore_consistency:
11270 self.proc.LogWarning(str(err.args[0]))
11274 def CheckPrereq(self):
11275 """Check prerequisites.
11278 # Check whether any instance on this node has faulty disks
11279 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11280 if inst.admin_state != constants.ADMINST_UP:
11282 check_nodes = set(inst.all_nodes)
11283 check_nodes.discard(self.op.node_name)
11284 for inst_node_name in check_nodes:
11285 self._CheckFaultyDisks(inst, inst_node_name)
11287 def Exec(self, feedback_fn):
11288 feedback_fn("Repairing storage unit '%s' on %s ..." %
11289 (self.op.name, self.op.node_name))
11291 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11292 result = self.rpc.call_storage_execute(self.op.node_name,
11293 self.op.storage_type, st_args,
11295 constants.SO_FIX_CONSISTENCY)
11296 result.Raise("Failed to repair storage unit '%s' on %s" %
11297 (self.op.name, self.op.node_name))
11300 class LUNodeEvacuate(NoHooksLU):
11301 """Evacuates instances off a list of nodes.
11306 _MODE2IALLOCATOR = {
11307 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11308 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11309 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11311 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11312 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11313 constants.IALLOCATOR_NEVAC_MODES)
11315 def CheckArguments(self):
11316 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11318 def ExpandNames(self):
11319 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11321 if self.op.remote_node is not None:
11322 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11323 assert self.op.remote_node
11325 if self.op.remote_node == self.op.node_name:
11326 raise errors.OpPrereqError("Can not use evacuated node as a new"
11327 " secondary node", errors.ECODE_INVAL)
11329 if self.op.mode != constants.NODE_EVAC_SEC:
11330 raise errors.OpPrereqError("Without the use of an iallocator only"
11331 " secondary instances can be evacuated",
11332 errors.ECODE_INVAL)
11335 self.share_locks = _ShareAll()
11336 self.needed_locks = {
11337 locking.LEVEL_INSTANCE: [],
11338 locking.LEVEL_NODEGROUP: [],
11339 locking.LEVEL_NODE: [],
11342 # Determine nodes (via group) optimistically, needs verification once locks
11343 # have been acquired
11344 self.lock_nodes = self._DetermineNodes()
11346 def _DetermineNodes(self):
11347 """Gets the list of nodes to operate on.
11350 if self.op.remote_node is None:
11351 # Iallocator will choose any node(s) in the same group
11352 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11354 group_nodes = frozenset([self.op.remote_node])
11356 # Determine nodes to be locked
11357 return set([self.op.node_name]) | group_nodes
11359 def _DetermineInstances(self):
11360 """Builds list of instances to operate on.
11363 assert self.op.mode in constants.NODE_EVAC_MODES
11365 if self.op.mode == constants.NODE_EVAC_PRI:
11366 # Primary instances only
11367 inst_fn = _GetNodePrimaryInstances
11368 assert self.op.remote_node is None, \
11369 "Evacuating primary instances requires iallocator"
11370 elif self.op.mode == constants.NODE_EVAC_SEC:
11371 # Secondary instances only
11372 inst_fn = _GetNodeSecondaryInstances
11375 assert self.op.mode == constants.NODE_EVAC_ALL
11376 inst_fn = _GetNodeInstances
11377 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11379 raise errors.OpPrereqError("Due to an issue with the iallocator"
11380 " interface it is not possible to evacuate"
11381 " all instances at once; specify explicitly"
11382 " whether to evacuate primary or secondary"
11384 errors.ECODE_INVAL)
11386 return inst_fn(self.cfg, self.op.node_name)
11388 def DeclareLocks(self, level):
11389 if level == locking.LEVEL_INSTANCE:
11390 # Lock instances optimistically, needs verification once node and group
11391 # locks have been acquired
11392 self.needed_locks[locking.LEVEL_INSTANCE] = \
11393 set(i.name for i in self._DetermineInstances())
11395 elif level == locking.LEVEL_NODEGROUP:
11396 # Lock node groups for all potential target nodes optimistically, needs
11397 # verification once nodes have been acquired
11398 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11399 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11401 elif level == locking.LEVEL_NODE:
11402 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11404 def CheckPrereq(self):
11406 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11407 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11408 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11410 need_nodes = self._DetermineNodes()
11412 if not owned_nodes.issuperset(need_nodes):
11413 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11414 " locks were acquired, current nodes are"
11415 " are '%s', used to be '%s'; retry the"
11417 (self.op.node_name,
11418 utils.CommaJoin(need_nodes),
11419 utils.CommaJoin(owned_nodes)),
11420 errors.ECODE_STATE)
11422 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11423 if owned_groups != wanted_groups:
11424 raise errors.OpExecError("Node groups changed since locks were acquired,"
11425 " current groups are '%s', used to be '%s';"
11426 " retry the operation" %
11427 (utils.CommaJoin(wanted_groups),
11428 utils.CommaJoin(owned_groups)))
11430 # Determine affected instances
11431 self.instances = self._DetermineInstances()
11432 self.instance_names = [i.name for i in self.instances]
11434 if set(self.instance_names) != owned_instances:
11435 raise errors.OpExecError("Instances on node '%s' changed since locks"
11436 " were acquired, current instances are '%s',"
11437 " used to be '%s'; retry the operation" %
11438 (self.op.node_name,
11439 utils.CommaJoin(self.instance_names),
11440 utils.CommaJoin(owned_instances)))
11442 if self.instance_names:
11443 self.LogInfo("Evacuating instances from node '%s': %s",
11445 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11447 self.LogInfo("No instances to evacuate from node '%s'",
11450 if self.op.remote_node is not None:
11451 for i in self.instances:
11452 if i.primary_node == self.op.remote_node:
11453 raise errors.OpPrereqError("Node %s is the primary node of"
11454 " instance %s, cannot use it as"
11456 (self.op.remote_node, i.name),
11457 errors.ECODE_INVAL)
11459 def Exec(self, feedback_fn):
11460 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11462 if not self.instance_names:
11463 # No instances to evacuate
11466 elif self.op.iallocator is not None:
11467 # TODO: Implement relocation to other group
11468 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11469 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11470 instances=list(self.instance_names))
11472 ial.Run(self.op.iallocator)
11474 if not ial.success:
11475 raise errors.OpPrereqError("Can't compute node evacuation using"
11476 " iallocator '%s': %s" %
11477 (self.op.iallocator, ial.info),
11478 errors.ECODE_NORES)
11480 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11482 elif self.op.remote_node is not None:
11483 assert self.op.mode == constants.NODE_EVAC_SEC
11485 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11486 remote_node=self.op.remote_node,
11488 mode=constants.REPLACE_DISK_CHG,
11489 early_release=self.op.early_release)]
11490 for instance_name in self.instance_names
11494 raise errors.ProgrammerError("No iallocator or remote node")
11496 return ResultWithJobs(jobs)
11499 def _SetOpEarlyRelease(early_release, op):
11500 """Sets C{early_release} flag on opcodes if available.
11504 op.early_release = early_release
11505 except AttributeError:
11506 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11511 def _NodeEvacDest(use_nodes, group, nodes):
11512 """Returns group or nodes depending on caller's choice.
11516 return utils.CommaJoin(nodes)
11521 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11522 """Unpacks the result of change-group and node-evacuate iallocator requests.
11524 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11525 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11527 @type lu: L{LogicalUnit}
11528 @param lu: Logical unit instance
11529 @type alloc_result: tuple/list
11530 @param alloc_result: Result from iallocator
11531 @type early_release: bool
11532 @param early_release: Whether to release locks early if possible
11533 @type use_nodes: bool
11534 @param use_nodes: Whether to display node names instead of groups
11537 (moved, failed, jobs) = alloc_result
11540 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11541 for (name, reason) in failed)
11542 lu.LogWarning("Unable to evacuate instances %s", failreason)
11543 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11546 lu.LogInfo("Instances to be moved: %s",
11547 utils.CommaJoin("%s (to %s)" %
11548 (name, _NodeEvacDest(use_nodes, group, nodes))
11549 for (name, group, nodes) in moved))
11551 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11552 map(opcodes.OpCode.LoadOpCode, ops))
11556 class LUInstanceGrowDisk(LogicalUnit):
11557 """Grow a disk of an instance.
11560 HPATH = "disk-grow"
11561 HTYPE = constants.HTYPE_INSTANCE
11564 def ExpandNames(self):
11565 self._ExpandAndLockInstance()
11566 self.needed_locks[locking.LEVEL_NODE] = []
11567 self.needed_locks[locking.LEVEL_NODE_RES] = []
11568 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11569 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11571 def DeclareLocks(self, level):
11572 if level == locking.LEVEL_NODE:
11573 self._LockInstancesNodes()
11574 elif level == locking.LEVEL_NODE_RES:
11576 self.needed_locks[locking.LEVEL_NODE_RES] = \
11577 self.needed_locks[locking.LEVEL_NODE][:]
11579 def BuildHooksEnv(self):
11580 """Build hooks env.
11582 This runs on the master, the primary and all the secondaries.
11586 "DISK": self.op.disk,
11587 "AMOUNT": self.op.amount,
11588 "ABSOLUTE": self.op.absolute,
11590 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11593 def BuildHooksNodes(self):
11594 """Build hooks nodes.
11597 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11600 def CheckPrereq(self):
11601 """Check prerequisites.
11603 This checks that the instance is in the cluster.
11606 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11607 assert instance is not None, \
11608 "Cannot retrieve locked instance %s" % self.op.instance_name
11609 nodenames = list(instance.all_nodes)
11610 for node in nodenames:
11611 _CheckNodeOnline(self, node)
11613 self.instance = instance
11615 if instance.disk_template not in constants.DTS_GROWABLE:
11616 raise errors.OpPrereqError("Instance's disk layout does not support"
11617 " growing", errors.ECODE_INVAL)
11619 self.disk = instance.FindDisk(self.op.disk)
11621 if self.op.absolute:
11622 self.target = self.op.amount
11623 self.delta = self.target - self.disk.size
11625 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11626 "current disk size (%s)" %
11627 (utils.FormatUnit(self.target, "h"),
11628 utils.FormatUnit(self.disk.size, "h")),
11629 errors.ECODE_STATE)
11631 self.delta = self.op.amount
11632 self.target = self.disk.size + self.delta
11634 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11635 utils.FormatUnit(self.delta, "h"),
11636 errors.ECODE_INVAL)
11638 if instance.disk_template not in (constants.DT_FILE,
11639 constants.DT_SHARED_FILE,
11641 # TODO: check the free disk space for file, when that feature will be
11643 _CheckNodesFreeDiskPerVG(self, nodenames,
11644 self.disk.ComputeGrowth(self.delta))
11646 def Exec(self, feedback_fn):
11647 """Execute disk grow.
11650 instance = self.instance
11653 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11654 assert (self.owned_locks(locking.LEVEL_NODE) ==
11655 self.owned_locks(locking.LEVEL_NODE_RES))
11657 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11659 raise errors.OpExecError("Cannot activate block device to grow")
11661 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11662 (self.op.disk, instance.name,
11663 utils.FormatUnit(self.delta, "h"),
11664 utils.FormatUnit(self.target, "h")))
11666 # First run all grow ops in dry-run mode
11667 for node in instance.all_nodes:
11668 self.cfg.SetDiskID(disk, node)
11669 result = self.rpc.call_blockdev_grow(node, disk, self.delta, True)
11670 result.Raise("Grow request failed to node %s" % node)
11672 # We know that (as far as we can test) operations across different
11673 # nodes will succeed, time to run it for real
11674 for node in instance.all_nodes:
11675 self.cfg.SetDiskID(disk, node)
11676 result = self.rpc.call_blockdev_grow(node, disk, self.delta, False)
11677 result.Raise("Grow request failed to node %s" % node)
11679 # TODO: Rewrite code to work properly
11680 # DRBD goes into sync mode for a short amount of time after executing the
11681 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11682 # calling "resize" in sync mode fails. Sleeping for a short amount of
11683 # time is a work-around.
11686 disk.RecordGrow(self.delta)
11687 self.cfg.Update(instance, feedback_fn)
11689 # Changes have been recorded, release node lock
11690 _ReleaseLocks(self, locking.LEVEL_NODE)
11692 # Downgrade lock while waiting for sync
11693 self.glm.downgrade(locking.LEVEL_INSTANCE)
11695 if self.op.wait_for_sync:
11696 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11698 self.proc.LogWarning("Disk sync-ing has not returned a good"
11699 " status; please check the instance")
11700 if instance.admin_state != constants.ADMINST_UP:
11701 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11702 elif instance.admin_state != constants.ADMINST_UP:
11703 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11704 " not supposed to be running because no wait for"
11705 " sync mode was requested")
11707 assert self.owned_locks(locking.LEVEL_NODE_RES)
11708 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11711 class LUInstanceQueryData(NoHooksLU):
11712 """Query runtime instance data.
11717 def ExpandNames(self):
11718 self.needed_locks = {}
11720 # Use locking if requested or when non-static information is wanted
11721 if not (self.op.static or self.op.use_locking):
11722 self.LogWarning("Non-static data requested, locks need to be acquired")
11723 self.op.use_locking = True
11725 if self.op.instances or not self.op.use_locking:
11726 # Expand instance names right here
11727 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11729 # Will use acquired locks
11730 self.wanted_names = None
11732 if self.op.use_locking:
11733 self.share_locks = _ShareAll()
11735 if self.wanted_names is None:
11736 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11738 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11740 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11741 self.needed_locks[locking.LEVEL_NODE] = []
11742 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11744 def DeclareLocks(self, level):
11745 if self.op.use_locking:
11746 if level == locking.LEVEL_NODEGROUP:
11747 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11749 # Lock all groups used by instances optimistically; this requires going
11750 # via the node before it's locked, requiring verification later on
11751 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11752 frozenset(group_uuid
11753 for instance_name in owned_instances
11755 self.cfg.GetInstanceNodeGroups(instance_name))
11757 elif level == locking.LEVEL_NODE:
11758 self._LockInstancesNodes()
11760 def CheckPrereq(self):
11761 """Check prerequisites.
11763 This only checks the optional instance list against the existing names.
11766 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11767 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11768 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11770 if self.wanted_names is None:
11771 assert self.op.use_locking, "Locking was not used"
11772 self.wanted_names = owned_instances
11774 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11776 if self.op.use_locking:
11777 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11780 assert not (owned_instances or owned_groups or owned_nodes)
11782 self.wanted_instances = instances.values()
11784 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11785 """Returns the status of a block device
11788 if self.op.static or not node:
11791 self.cfg.SetDiskID(dev, node)
11793 result = self.rpc.call_blockdev_find(node, dev)
11797 result.Raise("Can't compute disk status for %s" % instance_name)
11799 status = result.payload
11803 return (status.dev_path, status.major, status.minor,
11804 status.sync_percent, status.estimated_time,
11805 status.is_degraded, status.ldisk_status)
11807 def _ComputeDiskStatus(self, instance, snode, dev):
11808 """Compute block device status.
11811 if dev.dev_type in constants.LDS_DRBD:
11812 # we change the snode then (otherwise we use the one passed in)
11813 if dev.logical_id[0] == instance.primary_node:
11814 snode = dev.logical_id[1]
11816 snode = dev.logical_id[0]
11818 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11819 instance.name, dev)
11820 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11823 dev_children = map(compat.partial(self._ComputeDiskStatus,
11830 "iv_name": dev.iv_name,
11831 "dev_type": dev.dev_type,
11832 "logical_id": dev.logical_id,
11833 "physical_id": dev.physical_id,
11834 "pstatus": dev_pstatus,
11835 "sstatus": dev_sstatus,
11836 "children": dev_children,
11841 def Exec(self, feedback_fn):
11842 """Gather and return data"""
11845 cluster = self.cfg.GetClusterInfo()
11847 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11848 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11850 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11851 for node in nodes.values()))
11853 group2name_fn = lambda uuid: groups[uuid].name
11855 for instance in self.wanted_instances:
11856 pnode = nodes[instance.primary_node]
11858 if self.op.static or pnode.offline:
11859 remote_state = None
11861 self.LogWarning("Primary node %s is marked offline, returning static"
11862 " information only for instance %s" %
11863 (pnode.name, instance.name))
11865 remote_info = self.rpc.call_instance_info(instance.primary_node,
11867 instance.hypervisor)
11868 remote_info.Raise("Error checking node %s" % instance.primary_node)
11869 remote_info = remote_info.payload
11870 if remote_info and "state" in remote_info:
11871 remote_state = "up"
11873 if instance.admin_state == constants.ADMINST_UP:
11874 remote_state = "down"
11876 remote_state = instance.admin_state
11878 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11881 snodes_group_uuids = [nodes[snode_name].group
11882 for snode_name in instance.secondary_nodes]
11884 result[instance.name] = {
11885 "name": instance.name,
11886 "config_state": instance.admin_state,
11887 "run_state": remote_state,
11888 "pnode": instance.primary_node,
11889 "pnode_group_uuid": pnode.group,
11890 "pnode_group_name": group2name_fn(pnode.group),
11891 "snodes": instance.secondary_nodes,
11892 "snodes_group_uuids": snodes_group_uuids,
11893 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11895 # this happens to be the same format used for hooks
11896 "nics": _NICListToTuple(self, instance.nics),
11897 "disk_template": instance.disk_template,
11899 "hypervisor": instance.hypervisor,
11900 "network_port": instance.network_port,
11901 "hv_instance": instance.hvparams,
11902 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11903 "be_instance": instance.beparams,
11904 "be_actual": cluster.FillBE(instance),
11905 "os_instance": instance.osparams,
11906 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11907 "serial_no": instance.serial_no,
11908 "mtime": instance.mtime,
11909 "ctime": instance.ctime,
11910 "uuid": instance.uuid,
11916 def PrepareContainerMods(mods, private_fn):
11917 """Prepares a list of container modifications by adding a private data field.
11919 @type mods: list of tuples; (operation, index, parameters)
11920 @param mods: List of modifications
11921 @type private_fn: callable or None
11922 @param private_fn: Callable for constructing a private data field for a
11927 if private_fn is None:
11932 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11935 #: Type description for changes as returned by L{ApplyContainerMods}'s
11937 _TApplyContModsCbChanges = \
11938 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11939 ht.TNonEmptyString,
11944 def ApplyContainerMods(kind, container, chgdesc, mods,
11945 create_fn, modify_fn, remove_fn):
11946 """Applies descriptions in C{mods} to C{container}.
11949 @param kind: One-word item description
11950 @type container: list
11951 @param container: Container to modify
11952 @type chgdesc: None or list
11953 @param chgdesc: List of applied changes
11955 @param mods: Modifications as returned by L{PrepareContainerMods}
11956 @type create_fn: callable
11957 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11958 receives absolute item index, parameters and private data object as added
11959 by L{PrepareContainerMods}, returns tuple containing new item and changes
11961 @type modify_fn: callable
11962 @param modify_fn: Callback for modifying an existing item
11963 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11964 and private data object as added by L{PrepareContainerMods}, returns
11966 @type remove_fn: callable
11967 @param remove_fn: Callback on removing item; receives absolute item index,
11968 item and private data object as added by L{PrepareContainerMods}
11971 for (op, idx, params, private) in mods:
11974 absidx = len(container) - 1
11976 raise IndexError("Not accepting negative indices other than -1")
11977 elif idx > len(container):
11978 raise IndexError("Got %s index %s, but there are only %s" %
11979 (kind, idx, len(container)))
11985 if op == constants.DDM_ADD:
11986 # Calculate where item will be added
11988 addidx = len(container)
11992 if create_fn is None:
11995 (item, changes) = create_fn(addidx, params, private)
11998 container.append(item)
12001 assert idx <= len(container)
12002 # list.insert does so before the specified index
12003 container.insert(idx, item)
12005 # Retrieve existing item
12007 item = container[absidx]
12009 raise IndexError("Invalid %s index %s" % (kind, idx))
12011 if op == constants.DDM_REMOVE:
12014 if remove_fn is not None:
12015 remove_fn(absidx, item, private)
12017 changes = [("%s/%s" % (kind, absidx), "remove")]
12019 assert container[absidx] == item
12020 del container[absidx]
12021 elif op == constants.DDM_MODIFY:
12022 if modify_fn is not None:
12023 changes = modify_fn(absidx, item, params, private)
12025 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12027 assert _TApplyContModsCbChanges(changes)
12029 if not (chgdesc is None or changes is None):
12030 chgdesc.extend(changes)
12033 def _UpdateIvNames(base_index, disks):
12034 """Updates the C{iv_name} attribute of disks.
12036 @type disks: list of L{objects.Disk}
12039 for (idx, disk) in enumerate(disks):
12040 disk.iv_name = "disk/%s" % (base_index + idx, )
12043 class _InstNicModPrivate:
12044 """Data structure for network interface modifications.
12046 Used by L{LUInstanceSetParams}.
12049 def __init__(self):
12054 class LUInstanceSetParams(LogicalUnit):
12055 """Modifies an instances's parameters.
12058 HPATH = "instance-modify"
12059 HTYPE = constants.HTYPE_INSTANCE
12063 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12064 assert ht.TList(mods)
12065 assert not mods or len(mods[0]) in (2, 3)
12067 if mods and len(mods[0]) == 2:
12071 for op, params in mods:
12072 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12073 result.append((op, -1, params))
12077 raise errors.OpPrereqError("Only one %s add or remove operation is"
12078 " supported at a time" % kind,
12079 errors.ECODE_INVAL)
12081 result.append((constants.DDM_MODIFY, op, params))
12083 assert verify_fn(result)
12090 def _CheckMods(kind, mods, key_types, item_fn):
12091 """Ensures requested disk/NIC modifications are valid.
12094 for (op, _, params) in mods:
12095 assert ht.TDict(params)
12097 utils.ForceDictType(params, key_types)
12099 if op == constants.DDM_REMOVE:
12101 raise errors.OpPrereqError("No settings should be passed when"
12102 " removing a %s" % kind,
12103 errors.ECODE_INVAL)
12104 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12105 item_fn(op, params)
12107 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12110 def _VerifyDiskModification(op, params):
12111 """Verifies a disk modification.
12114 if op == constants.DDM_ADD:
12115 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12116 if mode not in constants.DISK_ACCESS_SET:
12117 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12118 errors.ECODE_INVAL)
12120 size = params.get(constants.IDISK_SIZE, None)
12122 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12123 constants.IDISK_SIZE, errors.ECODE_INVAL)
12127 except (TypeError, ValueError), err:
12128 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12129 errors.ECODE_INVAL)
12131 params[constants.IDISK_SIZE] = size
12133 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12134 raise errors.OpPrereqError("Disk size change not possible, use"
12135 " grow-disk", errors.ECODE_INVAL)
12138 def _VerifyNicModification(op, params):
12139 """Verifies a network interface modification.
12142 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12143 ip = params.get(constants.INIC_IP, None)
12146 elif ip.lower() == constants.VALUE_NONE:
12147 params[constants.INIC_IP] = None
12148 elif not netutils.IPAddress.IsValid(ip):
12149 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12150 errors.ECODE_INVAL)
12152 bridge = params.get("bridge", None)
12153 link = params.get(constants.INIC_LINK, None)
12154 if bridge and link:
12155 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12156 " at the same time", errors.ECODE_INVAL)
12157 elif bridge and bridge.lower() == constants.VALUE_NONE:
12158 params["bridge"] = None
12159 elif link and link.lower() == constants.VALUE_NONE:
12160 params[constants.INIC_LINK] = None
12162 if op == constants.DDM_ADD:
12163 macaddr = params.get(constants.INIC_MAC, None)
12164 if macaddr is None:
12165 params[constants.INIC_MAC] = constants.VALUE_AUTO
12167 if constants.INIC_MAC in params:
12168 macaddr = params[constants.INIC_MAC]
12169 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12170 macaddr = utils.NormalizeAndValidateMac(macaddr)
12172 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12173 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12174 " modifying an existing NIC",
12175 errors.ECODE_INVAL)
12177 def CheckArguments(self):
12178 if not (self.op.nics or self.op.disks or self.op.disk_template or
12179 self.op.hvparams or self.op.beparams or self.op.os_name or
12180 self.op.offline is not None or self.op.runtime_mem):
12181 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12183 if self.op.hvparams:
12184 _CheckGlobalHvParams(self.op.hvparams)
12187 self._UpgradeDiskNicMods("disk", self.op.disks,
12188 opcodes.OpInstanceSetParams.TestDiskModifications)
12190 self._UpgradeDiskNicMods("NIC", self.op.nics,
12191 opcodes.OpInstanceSetParams.TestNicModifications)
12193 # Check disk modifications
12194 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12195 self._VerifyDiskModification)
12197 if self.op.disks and self.op.disk_template is not None:
12198 raise errors.OpPrereqError("Disk template conversion and other disk"
12199 " changes not supported at the same time",
12200 errors.ECODE_INVAL)
12202 if (self.op.disk_template and
12203 self.op.disk_template in constants.DTS_INT_MIRROR and
12204 self.op.remote_node is None):
12205 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12206 " one requires specifying a secondary node",
12207 errors.ECODE_INVAL)
12209 # Check NIC modifications
12210 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12211 self._VerifyNicModification)
12213 def ExpandNames(self):
12214 self._ExpandAndLockInstance()
12215 # Can't even acquire node locks in shared mode as upcoming changes in
12216 # Ganeti 2.6 will start to modify the node object on disk conversion
12217 self.needed_locks[locking.LEVEL_NODE] = []
12218 self.needed_locks[locking.LEVEL_NODE_RES] = []
12219 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12221 def DeclareLocks(self, level):
12222 # TODO: Acquire group lock in shared mode (disk parameters)
12223 if level == locking.LEVEL_NODE:
12224 self._LockInstancesNodes()
12225 if self.op.disk_template and self.op.remote_node:
12226 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12227 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12228 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12230 self.needed_locks[locking.LEVEL_NODE_RES] = \
12231 self.needed_locks[locking.LEVEL_NODE][:]
12233 def BuildHooksEnv(self):
12234 """Build hooks env.
12236 This runs on the master, primary and secondaries.
12240 if constants.BE_MINMEM in self.be_new:
12241 args["minmem"] = self.be_new[constants.BE_MINMEM]
12242 if constants.BE_MAXMEM in self.be_new:
12243 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12244 if constants.BE_VCPUS in self.be_new:
12245 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12246 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12247 # information at all.
12249 if self._new_nics is not None:
12252 for nic in self._new_nics:
12253 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12254 mode = nicparams[constants.NIC_MODE]
12255 link = nicparams[constants.NIC_LINK]
12256 nics.append((nic.ip, nic.mac, mode, link))
12258 args["nics"] = nics
12260 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12261 if self.op.disk_template:
12262 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12263 if self.op.runtime_mem:
12264 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12268 def BuildHooksNodes(self):
12269 """Build hooks nodes.
12272 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12275 def _PrepareNicModification(self, params, private, old_ip, old_params,
12277 update_params_dict = dict([(key, params[key])
12278 for key in constants.NICS_PARAMETERS
12281 if "bridge" in params:
12282 update_params_dict[constants.NIC_LINK] = params["bridge"]
12284 new_params = _GetUpdatedParams(old_params, update_params_dict)
12285 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12287 new_filled_params = cluster.SimpleFillNIC(new_params)
12288 objects.NIC.CheckParameterSyntax(new_filled_params)
12290 new_mode = new_filled_params[constants.NIC_MODE]
12291 if new_mode == constants.NIC_MODE_BRIDGED:
12292 bridge = new_filled_params[constants.NIC_LINK]
12293 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12295 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12297 self.warn.append(msg)
12299 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12301 elif new_mode == constants.NIC_MODE_ROUTED:
12302 ip = params.get(constants.INIC_IP, old_ip)
12304 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12305 " on a routed NIC", errors.ECODE_INVAL)
12307 if constants.INIC_MAC in params:
12308 mac = params[constants.INIC_MAC]
12310 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12311 errors.ECODE_INVAL)
12312 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12313 # otherwise generate the MAC address
12314 params[constants.INIC_MAC] = \
12315 self.cfg.GenerateMAC(self.proc.GetECId())
12317 # or validate/reserve the current one
12319 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12320 except errors.ReservationError:
12321 raise errors.OpPrereqError("MAC address '%s' already in use"
12322 " in cluster" % mac,
12323 errors.ECODE_NOTUNIQUE)
12325 private.params = new_params
12326 private.filled = new_filled_params
12328 return (None, None)
12330 def CheckPrereq(self):
12331 """Check prerequisites.
12333 This only checks the instance list against the existing names.
12336 # checking the new params on the primary/secondary nodes
12338 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12339 cluster = self.cluster = self.cfg.GetClusterInfo()
12340 assert self.instance is not None, \
12341 "Cannot retrieve locked instance %s" % self.op.instance_name
12342 pnode = instance.primary_node
12343 nodelist = list(instance.all_nodes)
12344 pnode_info = self.cfg.GetNodeInfo(pnode)
12345 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12347 # Prepare disk/NIC modifications
12348 self.diskmod = PrepareContainerMods(self.op.disks, None)
12349 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12352 if self.op.os_name and not self.op.force:
12353 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12354 self.op.force_variant)
12355 instance_os = self.op.os_name
12357 instance_os = instance.os
12359 assert not (self.op.disk_template and self.op.disks), \
12360 "Can't modify disk template and apply disk changes at the same time"
12362 if self.op.disk_template:
12363 if instance.disk_template == self.op.disk_template:
12364 raise errors.OpPrereqError("Instance already has disk template %s" %
12365 instance.disk_template, errors.ECODE_INVAL)
12367 if (instance.disk_template,
12368 self.op.disk_template) not in self._DISK_CONVERSIONS:
12369 raise errors.OpPrereqError("Unsupported disk template conversion from"
12370 " %s to %s" % (instance.disk_template,
12371 self.op.disk_template),
12372 errors.ECODE_INVAL)
12373 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12374 msg="cannot change disk template")
12375 if self.op.disk_template in constants.DTS_INT_MIRROR:
12376 if self.op.remote_node == pnode:
12377 raise errors.OpPrereqError("Given new secondary node %s is the same"
12378 " as the primary node of the instance" %
12379 self.op.remote_node, errors.ECODE_STATE)
12380 _CheckNodeOnline(self, self.op.remote_node)
12381 _CheckNodeNotDrained(self, self.op.remote_node)
12382 # FIXME: here we assume that the old instance type is DT_PLAIN
12383 assert instance.disk_template == constants.DT_PLAIN
12384 disks = [{constants.IDISK_SIZE: d.size,
12385 constants.IDISK_VG: d.logical_id[0]}
12386 for d in instance.disks]
12387 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12388 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12390 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12391 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12392 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12393 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12394 ignore=self.op.ignore_ipolicy)
12395 if pnode_info.group != snode_info.group:
12396 self.LogWarning("The primary and secondary nodes are in two"
12397 " different node groups; the disk parameters"
12398 " from the first disk's node group will be"
12401 # hvparams processing
12402 if self.op.hvparams:
12403 hv_type = instance.hypervisor
12404 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12405 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12406 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12409 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12410 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12411 self.hv_proposed = self.hv_new = hv_new # the new actual values
12412 self.hv_inst = i_hvdict # the new dict (without defaults)
12414 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12416 self.hv_new = self.hv_inst = {}
12418 # beparams processing
12419 if self.op.beparams:
12420 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12422 objects.UpgradeBeParams(i_bedict)
12423 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12424 be_new = cluster.SimpleFillBE(i_bedict)
12425 self.be_proposed = self.be_new = be_new # the new actual values
12426 self.be_inst = i_bedict # the new dict (without defaults)
12428 self.be_new = self.be_inst = {}
12429 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12430 be_old = cluster.FillBE(instance)
12432 # CPU param validation -- checking every time a paramtere is
12433 # changed to cover all cases where either CPU mask or vcpus have
12435 if (constants.BE_VCPUS in self.be_proposed and
12436 constants.HV_CPU_MASK in self.hv_proposed):
12438 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12439 # Verify mask is consistent with number of vCPUs. Can skip this
12440 # test if only 1 entry in the CPU mask, which means same mask
12441 # is applied to all vCPUs.
12442 if (len(cpu_list) > 1 and
12443 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12444 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12446 (self.be_proposed[constants.BE_VCPUS],
12447 self.hv_proposed[constants.HV_CPU_MASK]),
12448 errors.ECODE_INVAL)
12450 # Only perform this test if a new CPU mask is given
12451 if constants.HV_CPU_MASK in self.hv_new:
12452 # Calculate the largest CPU number requested
12453 max_requested_cpu = max(map(max, cpu_list))
12454 # Check that all of the instance's nodes have enough physical CPUs to
12455 # satisfy the requested CPU mask
12456 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12457 max_requested_cpu + 1, instance.hypervisor)
12459 # osparams processing
12460 if self.op.osparams:
12461 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12462 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12463 self.os_inst = i_osdict # the new dict (without defaults)
12469 #TODO(dynmem): do the appropriate check involving MINMEM
12470 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12471 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12472 mem_check_list = [pnode]
12473 if be_new[constants.BE_AUTO_BALANCE]:
12474 # either we changed auto_balance to yes or it was from before
12475 mem_check_list.extend(instance.secondary_nodes)
12476 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12477 instance.hypervisor)
12478 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12479 [instance.hypervisor])
12480 pninfo = nodeinfo[pnode]
12481 msg = pninfo.fail_msg
12483 # Assume the primary node is unreachable and go ahead
12484 self.warn.append("Can't get info from primary node %s: %s" %
12487 (_, _, (pnhvinfo, )) = pninfo.payload
12488 if not isinstance(pnhvinfo.get("memory_free", None), int):
12489 self.warn.append("Node data from primary node %s doesn't contain"
12490 " free memory information" % pnode)
12491 elif instance_info.fail_msg:
12492 self.warn.append("Can't get instance runtime information: %s" %
12493 instance_info.fail_msg)
12495 if instance_info.payload:
12496 current_mem = int(instance_info.payload["memory"])
12498 # Assume instance not running
12499 # (there is a slight race condition here, but it's not very
12500 # probable, and we have no other way to check)
12501 # TODO: Describe race condition
12503 #TODO(dynmem): do the appropriate check involving MINMEM
12504 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12505 pnhvinfo["memory_free"])
12507 raise errors.OpPrereqError("This change will prevent the instance"
12508 " from starting, due to %d MB of memory"
12509 " missing on its primary node" %
12511 errors.ECODE_NORES)
12513 if be_new[constants.BE_AUTO_BALANCE]:
12514 for node, nres in nodeinfo.items():
12515 if node not in instance.secondary_nodes:
12517 nres.Raise("Can't get info from secondary node %s" % node,
12518 prereq=True, ecode=errors.ECODE_STATE)
12519 (_, _, (nhvinfo, )) = nres.payload
12520 if not isinstance(nhvinfo.get("memory_free", None), int):
12521 raise errors.OpPrereqError("Secondary node %s didn't return free"
12522 " memory information" % node,
12523 errors.ECODE_STATE)
12524 #TODO(dynmem): do the appropriate check involving MINMEM
12525 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12526 raise errors.OpPrereqError("This change will prevent the instance"
12527 " from failover to its secondary node"
12528 " %s, due to not enough memory" % node,
12529 errors.ECODE_STATE)
12531 if self.op.runtime_mem:
12532 remote_info = self.rpc.call_instance_info(instance.primary_node,
12534 instance.hypervisor)
12535 remote_info.Raise("Error checking node %s" % instance.primary_node)
12536 if not remote_info.payload: # not running already
12537 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12538 errors.ECODE_STATE)
12540 current_memory = remote_info.payload["memory"]
12541 if (not self.op.force and
12542 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12543 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12544 raise errors.OpPrereqError("Instance %s must have memory between %d"
12545 " and %d MB of memory unless --force is"
12546 " given" % (instance.name,
12547 self.be_proposed[constants.BE_MINMEM],
12548 self.be_proposed[constants.BE_MAXMEM]),
12549 errors.ECODE_INVAL)
12551 if self.op.runtime_mem > current_memory:
12552 _CheckNodeFreeMemory(self, instance.primary_node,
12553 "ballooning memory for instance %s" %
12555 self.op.memory - current_memory,
12556 instance.hypervisor)
12558 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12559 raise errors.OpPrereqError("Disk operations not supported for"
12560 " diskless instances",
12561 errors.ECODE_INVAL)
12563 def _PrepareNicCreate(_, params, private):
12564 return self._PrepareNicModification(params, private, None, {},
12567 def _PrepareNicMod(_, nic, params, private):
12568 return self._PrepareNicModification(params, private, nic.ip,
12569 nic.nicparams, cluster, pnode)
12571 # Verify NIC changes (operating on copy)
12572 nics = instance.nics[:]
12573 ApplyContainerMods("NIC", nics, None, self.nicmod,
12574 _PrepareNicCreate, _PrepareNicMod, None)
12575 if len(nics) > constants.MAX_NICS:
12576 raise errors.OpPrereqError("Instance has too many network interfaces"
12577 " (%d), cannot add more" % constants.MAX_NICS,
12578 errors.ECODE_STATE)
12580 # Verify disk changes (operating on a copy)
12581 disks = instance.disks[:]
12582 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12583 if len(disks) > constants.MAX_DISKS:
12584 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12585 " more" % constants.MAX_DISKS,
12586 errors.ECODE_STATE)
12588 if self.op.offline is not None:
12589 if self.op.offline:
12590 msg = "can't change to offline"
12592 msg = "can't change to online"
12593 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12595 # Pre-compute NIC changes (necessary to use result in hooks)
12596 self._nic_chgdesc = []
12598 # Operate on copies as this is still in prereq
12599 nics = [nic.Copy() for nic in instance.nics]
12600 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12601 self._CreateNewNic, self._ApplyNicMods, None)
12602 self._new_nics = nics
12604 self._new_nics = None
12606 def _ConvertPlainToDrbd(self, feedback_fn):
12607 """Converts an instance from plain to drbd.
12610 feedback_fn("Converting template to drbd")
12611 instance = self.instance
12612 pnode = instance.primary_node
12613 snode = self.op.remote_node
12615 assert instance.disk_template == constants.DT_PLAIN
12617 # create a fake disk info for _GenerateDiskTemplate
12618 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12619 constants.IDISK_VG: d.logical_id[0]}
12620 for d in instance.disks]
12621 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12622 instance.name, pnode, [snode],
12623 disk_info, None, None, 0, feedback_fn,
12625 info = _GetInstanceInfoText(instance)
12626 feedback_fn("Creating additional volumes...")
12627 # first, create the missing data and meta devices
12628 for disk in new_disks:
12629 # unfortunately this is... not too nice
12630 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12632 for child in disk.children:
12633 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12634 # at this stage, all new LVs have been created, we can rename the
12636 feedback_fn("Renaming original volumes...")
12637 rename_list = [(o, n.children[0].logical_id)
12638 for (o, n) in zip(instance.disks, new_disks)]
12639 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12640 result.Raise("Failed to rename original LVs")
12642 feedback_fn("Initializing DRBD devices...")
12643 # all child devices are in place, we can now create the DRBD devices
12644 for disk in new_disks:
12645 for node in [pnode, snode]:
12646 f_create = node == pnode
12647 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12649 # at this point, the instance has been modified
12650 instance.disk_template = constants.DT_DRBD8
12651 instance.disks = new_disks
12652 self.cfg.Update(instance, feedback_fn)
12654 # Release node locks while waiting for sync
12655 _ReleaseLocks(self, locking.LEVEL_NODE)
12657 # disks are created, waiting for sync
12658 disk_abort = not _WaitForSync(self, instance,
12659 oneshot=not self.op.wait_for_sync)
12661 raise errors.OpExecError("There are some degraded disks for"
12662 " this instance, please cleanup manually")
12664 # Node resource locks will be released by caller
12666 def _ConvertDrbdToPlain(self, feedback_fn):
12667 """Converts an instance from drbd to plain.
12670 instance = self.instance
12672 assert len(instance.secondary_nodes) == 1
12673 assert instance.disk_template == constants.DT_DRBD8
12675 pnode = instance.primary_node
12676 snode = instance.secondary_nodes[0]
12677 feedback_fn("Converting template to plain")
12679 old_disks = instance.disks
12680 new_disks = [d.children[0] for d in old_disks]
12682 # copy over size and mode
12683 for parent, child in zip(old_disks, new_disks):
12684 child.size = parent.size
12685 child.mode = parent.mode
12687 # this is a DRBD disk, return its port to the pool
12688 # NOTE: this must be done right before the call to cfg.Update!
12689 for disk in old_disks:
12690 tcp_port = disk.logical_id[2]
12691 self.cfg.AddTcpUdpPort(tcp_port)
12693 # update instance structure
12694 instance.disks = new_disks
12695 instance.disk_template = constants.DT_PLAIN
12696 self.cfg.Update(instance, feedback_fn)
12698 # Release locks in case removing disks takes a while
12699 _ReleaseLocks(self, locking.LEVEL_NODE)
12701 feedback_fn("Removing volumes on the secondary node...")
12702 for disk in old_disks:
12703 self.cfg.SetDiskID(disk, snode)
12704 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12706 self.LogWarning("Could not remove block device %s on node %s,"
12707 " continuing anyway: %s", disk.iv_name, snode, msg)
12709 feedback_fn("Removing unneeded volumes on the primary node...")
12710 for idx, disk in enumerate(old_disks):
12711 meta = disk.children[1]
12712 self.cfg.SetDiskID(meta, pnode)
12713 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12715 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12716 " continuing anyway: %s", idx, pnode, msg)
12718 def _CreateNewDisk(self, idx, params, _):
12719 """Creates a new disk.
12722 instance = self.instance
12725 if instance.disk_template in constants.DTS_FILEBASED:
12726 (file_driver, file_path) = instance.disks[0].logical_id
12727 file_path = os.path.dirname(file_path)
12729 file_driver = file_path = None
12732 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12733 instance.primary_node, instance.secondary_nodes,
12734 [params], file_path, file_driver, idx,
12735 self.Log, self.diskparams)[0]
12737 info = _GetInstanceInfoText(instance)
12739 logging.info("Creating volume %s for instance %s",
12740 disk.iv_name, instance.name)
12741 # Note: this needs to be kept in sync with _CreateDisks
12743 for node in instance.all_nodes:
12744 f_create = (node == instance.primary_node)
12746 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12747 except errors.OpExecError, err:
12748 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12749 disk.iv_name, disk, node, err)
12752 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12756 def _ModifyDisk(idx, disk, params, _):
12757 """Modifies a disk.
12760 disk.mode = params[constants.IDISK_MODE]
12763 ("disk.mode/%d" % idx, disk.mode),
12766 def _RemoveDisk(self, idx, root, _):
12770 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12771 self.cfg.SetDiskID(disk, node)
12772 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12774 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12775 " continuing anyway", idx, node, msg)
12777 # if this is a DRBD disk, return its port to the pool
12778 if root.dev_type in constants.LDS_DRBD:
12779 self.cfg.AddTcpUdpPort(root.logical_id[2])
12782 def _CreateNewNic(idx, params, private):
12783 """Creates data structure for a new network interface.
12786 mac = params[constants.INIC_MAC]
12787 ip = params.get(constants.INIC_IP, None)
12788 nicparams = private.params
12790 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12792 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12793 (mac, ip, private.filled[constants.NIC_MODE],
12794 private.filled[constants.NIC_LINK])),
12798 def _ApplyNicMods(idx, nic, params, private):
12799 """Modifies a network interface.
12804 for key in [constants.INIC_MAC, constants.INIC_IP]:
12806 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12807 setattr(nic, key, params[key])
12810 nic.nicparams = private.params
12812 for (key, val) in params.items():
12813 changes.append(("nic.%s/%d" % (key, idx), val))
12817 def Exec(self, feedback_fn):
12818 """Modifies an instance.
12820 All parameters take effect only at the next restart of the instance.
12823 # Process here the warnings from CheckPrereq, as we don't have a
12824 # feedback_fn there.
12825 # TODO: Replace with self.LogWarning
12826 for warn in self.warn:
12827 feedback_fn("WARNING: %s" % warn)
12829 assert ((self.op.disk_template is None) ^
12830 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12831 "Not owning any node resource locks"
12834 instance = self.instance
12837 if self.op.runtime_mem:
12838 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12840 self.op.runtime_mem)
12841 rpcres.Raise("Cannot modify instance runtime memory")
12842 result.append(("runtime_memory", self.op.runtime_mem))
12844 # Apply disk changes
12845 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12846 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12847 _UpdateIvNames(0, instance.disks)
12849 if self.op.disk_template:
12851 check_nodes = set(instance.all_nodes)
12852 if self.op.remote_node:
12853 check_nodes.add(self.op.remote_node)
12854 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12855 owned = self.owned_locks(level)
12856 assert not (check_nodes - owned), \
12857 ("Not owning the correct locks, owning %r, expected at least %r" %
12858 (owned, check_nodes))
12860 r_shut = _ShutdownInstanceDisks(self, instance)
12862 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12863 " proceed with disk template conversion")
12864 mode = (instance.disk_template, self.op.disk_template)
12866 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12868 self.cfg.ReleaseDRBDMinors(instance.name)
12870 result.append(("disk_template", self.op.disk_template))
12872 assert instance.disk_template == self.op.disk_template, \
12873 ("Expected disk template '%s', found '%s'" %
12874 (self.op.disk_template, instance.disk_template))
12876 # Release node and resource locks if there are any (they might already have
12877 # been released during disk conversion)
12878 _ReleaseLocks(self, locking.LEVEL_NODE)
12879 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12881 # Apply NIC changes
12882 if self._new_nics is not None:
12883 instance.nics = self._new_nics
12884 result.extend(self._nic_chgdesc)
12887 if self.op.hvparams:
12888 instance.hvparams = self.hv_inst
12889 for key, val in self.op.hvparams.iteritems():
12890 result.append(("hv/%s" % key, val))
12893 if self.op.beparams:
12894 instance.beparams = self.be_inst
12895 for key, val in self.op.beparams.iteritems():
12896 result.append(("be/%s" % key, val))
12899 if self.op.os_name:
12900 instance.os = self.op.os_name
12903 if self.op.osparams:
12904 instance.osparams = self.os_inst
12905 for key, val in self.op.osparams.iteritems():
12906 result.append(("os/%s" % key, val))
12908 if self.op.offline is None:
12911 elif self.op.offline:
12912 # Mark instance as offline
12913 self.cfg.MarkInstanceOffline(instance.name)
12914 result.append(("admin_state", constants.ADMINST_OFFLINE))
12916 # Mark instance as online, but stopped
12917 self.cfg.MarkInstanceDown(instance.name)
12918 result.append(("admin_state", constants.ADMINST_DOWN))
12920 self.cfg.Update(instance, feedback_fn)
12922 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12923 self.owned_locks(locking.LEVEL_NODE)), \
12924 "All node locks should have been released by now"
12928 _DISK_CONVERSIONS = {
12929 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12930 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12934 class LUInstanceChangeGroup(LogicalUnit):
12935 HPATH = "instance-change-group"
12936 HTYPE = constants.HTYPE_INSTANCE
12939 def ExpandNames(self):
12940 self.share_locks = _ShareAll()
12941 self.needed_locks = {
12942 locking.LEVEL_NODEGROUP: [],
12943 locking.LEVEL_NODE: [],
12946 self._ExpandAndLockInstance()
12948 if self.op.target_groups:
12949 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12950 self.op.target_groups)
12952 self.req_target_uuids = None
12954 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12956 def DeclareLocks(self, level):
12957 if level == locking.LEVEL_NODEGROUP:
12958 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12960 if self.req_target_uuids:
12961 lock_groups = set(self.req_target_uuids)
12963 # Lock all groups used by instance optimistically; this requires going
12964 # via the node before it's locked, requiring verification later on
12965 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12966 lock_groups.update(instance_groups)
12968 # No target groups, need to lock all of them
12969 lock_groups = locking.ALL_SET
12971 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12973 elif level == locking.LEVEL_NODE:
12974 if self.req_target_uuids:
12975 # Lock all nodes used by instances
12976 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12977 self._LockInstancesNodes()
12979 # Lock all nodes in all potential target groups
12980 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12981 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12982 member_nodes = [node_name
12983 for group in lock_groups
12984 for node_name in self.cfg.GetNodeGroup(group).members]
12985 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12987 # Lock all nodes as all groups are potential targets
12988 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12990 def CheckPrereq(self):
12991 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12992 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12993 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12995 assert (self.req_target_uuids is None or
12996 owned_groups.issuperset(self.req_target_uuids))
12997 assert owned_instances == set([self.op.instance_name])
12999 # Get instance information
13000 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13002 # Check if node groups for locked instance are still correct
13003 assert owned_nodes.issuperset(self.instance.all_nodes), \
13004 ("Instance %s's nodes changed while we kept the lock" %
13005 self.op.instance_name)
13007 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13010 if self.req_target_uuids:
13011 # User requested specific target groups
13012 self.target_uuids = frozenset(self.req_target_uuids)
13014 # All groups except those used by the instance are potential targets
13015 self.target_uuids = owned_groups - inst_groups
13017 conflicting_groups = self.target_uuids & inst_groups
13018 if conflicting_groups:
13019 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13020 " used by the instance '%s'" %
13021 (utils.CommaJoin(conflicting_groups),
13022 self.op.instance_name),
13023 errors.ECODE_INVAL)
13025 if not self.target_uuids:
13026 raise errors.OpPrereqError("There are no possible target groups",
13027 errors.ECODE_INVAL)
13029 def BuildHooksEnv(self):
13030 """Build hooks env.
13033 assert self.target_uuids
13036 "TARGET_GROUPS": " ".join(self.target_uuids),
13039 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13043 def BuildHooksNodes(self):
13044 """Build hooks nodes.
13047 mn = self.cfg.GetMasterNode()
13048 return ([mn], [mn])
13050 def Exec(self, feedback_fn):
13051 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13053 assert instances == [self.op.instance_name], "Instance not locked"
13055 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13056 instances=instances, target_groups=list(self.target_uuids))
13058 ial.Run(self.op.iallocator)
13060 if not ial.success:
13061 raise errors.OpPrereqError("Can't compute solution for changing group of"
13062 " instance '%s' using iallocator '%s': %s" %
13063 (self.op.instance_name, self.op.iallocator,
13065 errors.ECODE_NORES)
13067 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13069 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13070 " instance '%s'", len(jobs), self.op.instance_name)
13072 return ResultWithJobs(jobs)
13075 class LUBackupQuery(NoHooksLU):
13076 """Query the exports list
13081 def CheckArguments(self):
13082 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13083 ["node", "export"], self.op.use_locking)
13085 def ExpandNames(self):
13086 self.expq.ExpandNames(self)
13088 def DeclareLocks(self, level):
13089 self.expq.DeclareLocks(self, level)
13091 def Exec(self, feedback_fn):
13094 for (node, expname) in self.expq.OldStyleQuery(self):
13095 if expname is None:
13096 result[node] = False
13098 result.setdefault(node, []).append(expname)
13103 class _ExportQuery(_QueryBase):
13104 FIELDS = query.EXPORT_FIELDS
13106 #: The node name is not a unique key for this query
13107 SORT_FIELD = "node"
13109 def ExpandNames(self, lu):
13110 lu.needed_locks = {}
13112 # The following variables interact with _QueryBase._GetNames
13114 self.wanted = _GetWantedNodes(lu, self.names)
13116 self.wanted = locking.ALL_SET
13118 self.do_locking = self.use_locking
13120 if self.do_locking:
13121 lu.share_locks = _ShareAll()
13122 lu.needed_locks = {
13123 locking.LEVEL_NODE: self.wanted,
13126 def DeclareLocks(self, lu, level):
13129 def _GetQueryData(self, lu):
13130 """Computes the list of nodes and their attributes.
13133 # Locking is not used
13135 assert not (compat.any(lu.glm.is_owned(level)
13136 for level in locking.LEVELS
13137 if level != locking.LEVEL_CLUSTER) or
13138 self.do_locking or self.use_locking)
13140 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13144 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13146 result.append((node, None))
13148 result.extend((node, expname) for expname in nres.payload)
13153 class LUBackupPrepare(NoHooksLU):
13154 """Prepares an instance for an export and returns useful information.
13159 def ExpandNames(self):
13160 self._ExpandAndLockInstance()
13162 def CheckPrereq(self):
13163 """Check prerequisites.
13166 instance_name = self.op.instance_name
13168 self.instance = self.cfg.GetInstanceInfo(instance_name)
13169 assert self.instance is not None, \
13170 "Cannot retrieve locked instance %s" % self.op.instance_name
13171 _CheckNodeOnline(self, self.instance.primary_node)
13173 self._cds = _GetClusterDomainSecret()
13175 def Exec(self, feedback_fn):
13176 """Prepares an instance for an export.
13179 instance = self.instance
13181 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13182 salt = utils.GenerateSecret(8)
13184 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13185 result = self.rpc.call_x509_cert_create(instance.primary_node,
13186 constants.RIE_CERT_VALIDITY)
13187 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13189 (name, cert_pem) = result.payload
13191 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13195 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13196 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13198 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13204 class LUBackupExport(LogicalUnit):
13205 """Export an instance to an image in the cluster.
13208 HPATH = "instance-export"
13209 HTYPE = constants.HTYPE_INSTANCE
13212 def CheckArguments(self):
13213 """Check the arguments.
13216 self.x509_key_name = self.op.x509_key_name
13217 self.dest_x509_ca_pem = self.op.destination_x509_ca
13219 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13220 if not self.x509_key_name:
13221 raise errors.OpPrereqError("Missing X509 key name for encryption",
13222 errors.ECODE_INVAL)
13224 if not self.dest_x509_ca_pem:
13225 raise errors.OpPrereqError("Missing destination X509 CA",
13226 errors.ECODE_INVAL)
13228 def ExpandNames(self):
13229 self._ExpandAndLockInstance()
13231 # Lock all nodes for local exports
13232 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13233 # FIXME: lock only instance primary and destination node
13235 # Sad but true, for now we have do lock all nodes, as we don't know where
13236 # the previous export might be, and in this LU we search for it and
13237 # remove it from its current node. In the future we could fix this by:
13238 # - making a tasklet to search (share-lock all), then create the
13239 # new one, then one to remove, after
13240 # - removing the removal operation altogether
13241 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13243 def DeclareLocks(self, level):
13244 """Last minute lock declaration."""
13245 # All nodes are locked anyway, so nothing to do here.
13247 def BuildHooksEnv(self):
13248 """Build hooks env.
13250 This will run on the master, primary node and target node.
13254 "EXPORT_MODE": self.op.mode,
13255 "EXPORT_NODE": self.op.target_node,
13256 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13257 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13258 # TODO: Generic function for boolean env variables
13259 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13262 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13266 def BuildHooksNodes(self):
13267 """Build hooks nodes.
13270 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13272 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13273 nl.append(self.op.target_node)
13277 def CheckPrereq(self):
13278 """Check prerequisites.
13280 This checks that the instance and node names are valid.
13283 instance_name = self.op.instance_name
13285 self.instance = self.cfg.GetInstanceInfo(instance_name)
13286 assert self.instance is not None, \
13287 "Cannot retrieve locked instance %s" % self.op.instance_name
13288 _CheckNodeOnline(self, self.instance.primary_node)
13290 if (self.op.remove_instance and
13291 self.instance.admin_state == constants.ADMINST_UP and
13292 not self.op.shutdown):
13293 raise errors.OpPrereqError("Can not remove instance without shutting it"
13296 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13297 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13298 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13299 assert self.dst_node is not None
13301 _CheckNodeOnline(self, self.dst_node.name)
13302 _CheckNodeNotDrained(self, self.dst_node.name)
13305 self.dest_disk_info = None
13306 self.dest_x509_ca = None
13308 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13309 self.dst_node = None
13311 if len(self.op.target_node) != len(self.instance.disks):
13312 raise errors.OpPrereqError(("Received destination information for %s"
13313 " disks, but instance %s has %s disks") %
13314 (len(self.op.target_node), instance_name,
13315 len(self.instance.disks)),
13316 errors.ECODE_INVAL)
13318 cds = _GetClusterDomainSecret()
13320 # Check X509 key name
13322 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13323 except (TypeError, ValueError), err:
13324 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13326 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13327 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13328 errors.ECODE_INVAL)
13330 # Load and verify CA
13332 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13333 except OpenSSL.crypto.Error, err:
13334 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13335 (err, ), errors.ECODE_INVAL)
13337 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13338 if errcode is not None:
13339 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13340 (msg, ), errors.ECODE_INVAL)
13342 self.dest_x509_ca = cert
13344 # Verify target information
13346 for idx, disk_data in enumerate(self.op.target_node):
13348 (host, port, magic) = \
13349 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13350 except errors.GenericError, err:
13351 raise errors.OpPrereqError("Target info for disk %s: %s" %
13352 (idx, err), errors.ECODE_INVAL)
13354 disk_info.append((host, port, magic))
13356 assert len(disk_info) == len(self.op.target_node)
13357 self.dest_disk_info = disk_info
13360 raise errors.ProgrammerError("Unhandled export mode %r" %
13363 # instance disk type verification
13364 # TODO: Implement export support for file-based disks
13365 for disk in self.instance.disks:
13366 if disk.dev_type == constants.LD_FILE:
13367 raise errors.OpPrereqError("Export not supported for instances with"
13368 " file-based disks", errors.ECODE_INVAL)
13370 def _CleanupExports(self, feedback_fn):
13371 """Removes exports of current instance from all other nodes.
13373 If an instance in a cluster with nodes A..D was exported to node C, its
13374 exports will be removed from the nodes A, B and D.
13377 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13379 nodelist = self.cfg.GetNodeList()
13380 nodelist.remove(self.dst_node.name)
13382 # on one-node clusters nodelist will be empty after the removal
13383 # if we proceed the backup would be removed because OpBackupQuery
13384 # substitutes an empty list with the full cluster node list.
13385 iname = self.instance.name
13387 feedback_fn("Removing old exports for instance %s" % iname)
13388 exportlist = self.rpc.call_export_list(nodelist)
13389 for node in exportlist:
13390 if exportlist[node].fail_msg:
13392 if iname in exportlist[node].payload:
13393 msg = self.rpc.call_export_remove(node, iname).fail_msg
13395 self.LogWarning("Could not remove older export for instance %s"
13396 " on node %s: %s", iname, node, msg)
13398 def Exec(self, feedback_fn):
13399 """Export an instance to an image in the cluster.
13402 assert self.op.mode in constants.EXPORT_MODES
13404 instance = self.instance
13405 src_node = instance.primary_node
13407 if self.op.shutdown:
13408 # shutdown the instance, but not the disks
13409 feedback_fn("Shutting down instance %s" % instance.name)
13410 result = self.rpc.call_instance_shutdown(src_node, instance,
13411 self.op.shutdown_timeout)
13412 # TODO: Maybe ignore failures if ignore_remove_failures is set
13413 result.Raise("Could not shutdown instance %s on"
13414 " node %s" % (instance.name, src_node))
13416 # set the disks ID correctly since call_instance_start needs the
13417 # correct drbd minor to create the symlinks
13418 for disk in instance.disks:
13419 self.cfg.SetDiskID(disk, src_node)
13421 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13424 # Activate the instance disks if we'exporting a stopped instance
13425 feedback_fn("Activating disks for %s" % instance.name)
13426 _StartInstanceDisks(self, instance, None)
13429 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13432 helper.CreateSnapshots()
13434 if (self.op.shutdown and
13435 instance.admin_state == constants.ADMINST_UP and
13436 not self.op.remove_instance):
13437 assert not activate_disks
13438 feedback_fn("Starting instance %s" % instance.name)
13439 result = self.rpc.call_instance_start(src_node,
13440 (instance, None, None), False)
13441 msg = result.fail_msg
13443 feedback_fn("Failed to start instance: %s" % msg)
13444 _ShutdownInstanceDisks(self, instance)
13445 raise errors.OpExecError("Could not start instance: %s" % msg)
13447 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13448 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13449 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13450 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13451 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13453 (key_name, _, _) = self.x509_key_name
13456 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13459 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13460 key_name, dest_ca_pem,
13465 # Check for backwards compatibility
13466 assert len(dresults) == len(instance.disks)
13467 assert compat.all(isinstance(i, bool) for i in dresults), \
13468 "Not all results are boolean: %r" % dresults
13472 feedback_fn("Deactivating disks for %s" % instance.name)
13473 _ShutdownInstanceDisks(self, instance)
13475 if not (compat.all(dresults) and fin_resu):
13478 failures.append("export finalization")
13479 if not compat.all(dresults):
13480 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13482 failures.append("disk export: disk(s) %s" % fdsk)
13484 raise errors.OpExecError("Export failed, errors in %s" %
13485 utils.CommaJoin(failures))
13487 # At this point, the export was successful, we can cleanup/finish
13489 # Remove instance if requested
13490 if self.op.remove_instance:
13491 feedback_fn("Removing instance %s" % instance.name)
13492 _RemoveInstance(self, feedback_fn, instance,
13493 self.op.ignore_remove_failures)
13495 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13496 self._CleanupExports(feedback_fn)
13498 return fin_resu, dresults
13501 class LUBackupRemove(NoHooksLU):
13502 """Remove exports related to the named instance.
13507 def ExpandNames(self):
13508 self.needed_locks = {}
13509 # We need all nodes to be locked in order for RemoveExport to work, but we
13510 # don't need to lock the instance itself, as nothing will happen to it (and
13511 # we can remove exports also for a removed instance)
13512 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13514 def Exec(self, feedback_fn):
13515 """Remove any export.
13518 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13519 # If the instance was not found we'll try with the name that was passed in.
13520 # This will only work if it was an FQDN, though.
13522 if not instance_name:
13524 instance_name = self.op.instance_name
13526 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13527 exportlist = self.rpc.call_export_list(locked_nodes)
13529 for node in exportlist:
13530 msg = exportlist[node].fail_msg
13532 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13534 if instance_name in exportlist[node].payload:
13536 result = self.rpc.call_export_remove(node, instance_name)
13537 msg = result.fail_msg
13539 logging.error("Could not remove export for instance %s"
13540 " on node %s: %s", instance_name, node, msg)
13542 if fqdn_warn and not found:
13543 feedback_fn("Export not found. If trying to remove an export belonging"
13544 " to a deleted instance please use its Fully Qualified"
13548 class LUGroupAdd(LogicalUnit):
13549 """Logical unit for creating node groups.
13552 HPATH = "group-add"
13553 HTYPE = constants.HTYPE_GROUP
13556 def ExpandNames(self):
13557 # We need the new group's UUID here so that we can create and acquire the
13558 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13559 # that it should not check whether the UUID exists in the configuration.
13560 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13561 self.needed_locks = {}
13562 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13564 def CheckPrereq(self):
13565 """Check prerequisites.
13567 This checks that the given group name is not an existing node group
13572 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13573 except errors.OpPrereqError:
13576 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13577 " node group (UUID: %s)" %
13578 (self.op.group_name, existing_uuid),
13579 errors.ECODE_EXISTS)
13581 if self.op.ndparams:
13582 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13584 if self.op.hv_state:
13585 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13587 self.new_hv_state = None
13589 if self.op.disk_state:
13590 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13592 self.new_disk_state = None
13594 if self.op.diskparams:
13595 for templ in constants.DISK_TEMPLATES:
13596 if templ not in self.op.diskparams:
13597 self.op.diskparams[templ] = {}
13598 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13600 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13602 if self.op.ipolicy:
13603 cluster = self.cfg.GetClusterInfo()
13604 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13606 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13607 except errors.ConfigurationError, err:
13608 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13609 errors.ECODE_INVAL)
13611 def BuildHooksEnv(self):
13612 """Build hooks env.
13616 "GROUP_NAME": self.op.group_name,
13619 def BuildHooksNodes(self):
13620 """Build hooks nodes.
13623 mn = self.cfg.GetMasterNode()
13624 return ([mn], [mn])
13626 def Exec(self, feedback_fn):
13627 """Add the node group to the cluster.
13630 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13631 uuid=self.group_uuid,
13632 alloc_policy=self.op.alloc_policy,
13633 ndparams=self.op.ndparams,
13634 diskparams=self.op.diskparams,
13635 ipolicy=self.op.ipolicy,
13636 hv_state_static=self.new_hv_state,
13637 disk_state_static=self.new_disk_state)
13639 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13640 del self.remove_locks[locking.LEVEL_NODEGROUP]
13643 class LUGroupAssignNodes(NoHooksLU):
13644 """Logical unit for assigning nodes to groups.
13649 def ExpandNames(self):
13650 # These raise errors.OpPrereqError on their own:
13651 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13652 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13654 # We want to lock all the affected nodes and groups. We have readily
13655 # available the list of nodes, and the *destination* group. To gather the
13656 # list of "source" groups, we need to fetch node information later on.
13657 self.needed_locks = {
13658 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13659 locking.LEVEL_NODE: self.op.nodes,
13662 def DeclareLocks(self, level):
13663 if level == locking.LEVEL_NODEGROUP:
13664 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13666 # Try to get all affected nodes' groups without having the group or node
13667 # lock yet. Needs verification later in the code flow.
13668 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13670 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13672 def CheckPrereq(self):
13673 """Check prerequisites.
13676 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13677 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13678 frozenset(self.op.nodes))
13680 expected_locks = (set([self.group_uuid]) |
13681 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13682 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13683 if actual_locks != expected_locks:
13684 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13685 " current groups are '%s', used to be '%s'" %
13686 (utils.CommaJoin(expected_locks),
13687 utils.CommaJoin(actual_locks)))
13689 self.node_data = self.cfg.GetAllNodesInfo()
13690 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13691 instance_data = self.cfg.GetAllInstancesInfo()
13693 if self.group is None:
13694 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13695 (self.op.group_name, self.group_uuid))
13697 (new_splits, previous_splits) = \
13698 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13699 for node in self.op.nodes],
13700 self.node_data, instance_data)
13703 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13705 if not self.op.force:
13706 raise errors.OpExecError("The following instances get split by this"
13707 " change and --force was not given: %s" %
13710 self.LogWarning("This operation will split the following instances: %s",
13713 if previous_splits:
13714 self.LogWarning("In addition, these already-split instances continue"
13715 " to be split across groups: %s",
13716 utils.CommaJoin(utils.NiceSort(previous_splits)))
13718 def Exec(self, feedback_fn):
13719 """Assign nodes to a new group.
13722 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13724 self.cfg.AssignGroupNodes(mods)
13727 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13728 """Check for split instances after a node assignment.
13730 This method considers a series of node assignments as an atomic operation,
13731 and returns information about split instances after applying the set of
13734 In particular, it returns information about newly split instances, and
13735 instances that were already split, and remain so after the change.
13737 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13740 @type changes: list of (node_name, new_group_uuid) pairs.
13741 @param changes: list of node assignments to consider.
13742 @param node_data: a dict with data for all nodes
13743 @param instance_data: a dict with all instances to consider
13744 @rtype: a two-tuple
13745 @return: a list of instances that were previously okay and result split as a
13746 consequence of this change, and a list of instances that were previously
13747 split and this change does not fix.
13750 changed_nodes = dict((node, group) for node, group in changes
13751 if node_data[node].group != group)
13753 all_split_instances = set()
13754 previously_split_instances = set()
13756 def InstanceNodes(instance):
13757 return [instance.primary_node] + list(instance.secondary_nodes)
13759 for inst in instance_data.values():
13760 if inst.disk_template not in constants.DTS_INT_MIRROR:
13763 instance_nodes = InstanceNodes(inst)
13765 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13766 previously_split_instances.add(inst.name)
13768 if len(set(changed_nodes.get(node, node_data[node].group)
13769 for node in instance_nodes)) > 1:
13770 all_split_instances.add(inst.name)
13772 return (list(all_split_instances - previously_split_instances),
13773 list(previously_split_instances & all_split_instances))
13776 class _GroupQuery(_QueryBase):
13777 FIELDS = query.GROUP_FIELDS
13779 def ExpandNames(self, lu):
13780 lu.needed_locks = {}
13782 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13783 self._cluster = lu.cfg.GetClusterInfo()
13784 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13787 self.wanted = [name_to_uuid[name]
13788 for name in utils.NiceSort(name_to_uuid.keys())]
13790 # Accept names to be either names or UUIDs.
13793 all_uuid = frozenset(self._all_groups.keys())
13795 for name in self.names:
13796 if name in all_uuid:
13797 self.wanted.append(name)
13798 elif name in name_to_uuid:
13799 self.wanted.append(name_to_uuid[name])
13801 missing.append(name)
13804 raise errors.OpPrereqError("Some groups do not exist: %s" %
13805 utils.CommaJoin(missing),
13806 errors.ECODE_NOENT)
13808 def DeclareLocks(self, lu, level):
13811 def _GetQueryData(self, lu):
13812 """Computes the list of node groups and their attributes.
13815 do_nodes = query.GQ_NODE in self.requested_data
13816 do_instances = query.GQ_INST in self.requested_data
13818 group_to_nodes = None
13819 group_to_instances = None
13821 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13822 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13823 # latter GetAllInstancesInfo() is not enough, for we have to go through
13824 # instance->node. Hence, we will need to process nodes even if we only need
13825 # instance information.
13826 if do_nodes or do_instances:
13827 all_nodes = lu.cfg.GetAllNodesInfo()
13828 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13831 for node in all_nodes.values():
13832 if node.group in group_to_nodes:
13833 group_to_nodes[node.group].append(node.name)
13834 node_to_group[node.name] = node.group
13837 all_instances = lu.cfg.GetAllInstancesInfo()
13838 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13840 for instance in all_instances.values():
13841 node = instance.primary_node
13842 if node in node_to_group:
13843 group_to_instances[node_to_group[node]].append(instance.name)
13846 # Do not pass on node information if it was not requested.
13847 group_to_nodes = None
13849 return query.GroupQueryData(self._cluster,
13850 [self._all_groups[uuid]
13851 for uuid in self.wanted],
13852 group_to_nodes, group_to_instances)
13855 class LUGroupQuery(NoHooksLU):
13856 """Logical unit for querying node groups.
13861 def CheckArguments(self):
13862 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13863 self.op.output_fields, False)
13865 def ExpandNames(self):
13866 self.gq.ExpandNames(self)
13868 def DeclareLocks(self, level):
13869 self.gq.DeclareLocks(self, level)
13871 def Exec(self, feedback_fn):
13872 return self.gq.OldStyleQuery(self)
13875 class LUGroupSetParams(LogicalUnit):
13876 """Modifies the parameters of a node group.
13879 HPATH = "group-modify"
13880 HTYPE = constants.HTYPE_GROUP
13883 def CheckArguments(self):
13886 self.op.diskparams,
13887 self.op.alloc_policy,
13889 self.op.disk_state,
13893 if all_changes.count(None) == len(all_changes):
13894 raise errors.OpPrereqError("Please pass at least one modification",
13895 errors.ECODE_INVAL)
13897 def ExpandNames(self):
13898 # This raises errors.OpPrereqError on its own:
13899 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13901 self.needed_locks = {
13902 locking.LEVEL_INSTANCE: [],
13903 locking.LEVEL_NODEGROUP: [self.group_uuid],
13906 self.share_locks[locking.LEVEL_INSTANCE] = 1
13908 def DeclareLocks(self, level):
13909 if level == locking.LEVEL_INSTANCE:
13910 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13912 # Lock instances optimistically, needs verification once group lock has
13914 self.needed_locks[locking.LEVEL_INSTANCE] = \
13915 self.cfg.GetNodeGroupInstances(self.group_uuid)
13917 def CheckPrereq(self):
13918 """Check prerequisites.
13921 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13923 # Check if locked instances are still correct
13924 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13926 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13927 cluster = self.cfg.GetClusterInfo()
13929 if self.group is None:
13930 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13931 (self.op.group_name, self.group_uuid))
13933 if self.op.ndparams:
13934 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13935 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13936 self.new_ndparams = new_ndparams
13938 if self.op.diskparams:
13939 self.new_diskparams = dict()
13940 for templ in constants.DISK_TEMPLATES:
13941 if templ not in self.op.diskparams:
13942 self.op.diskparams[templ] = {}
13943 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13944 self.op.diskparams[templ])
13945 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13946 self.new_diskparams[templ] = new_templ_params
13948 if self.op.hv_state:
13949 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13950 self.group.hv_state_static)
13952 if self.op.disk_state:
13953 self.new_disk_state = \
13954 _MergeAndVerifyDiskState(self.op.disk_state,
13955 self.group.disk_state_static)
13957 if self.op.ipolicy:
13958 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13962 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13963 inst_filter = lambda inst: inst.name in owned_instances
13964 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13966 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13968 new_ipolicy, instances)
13971 self.LogWarning("After the ipolicy change the following instances"
13972 " violate them: %s",
13973 utils.CommaJoin(violations))
13975 def BuildHooksEnv(self):
13976 """Build hooks env.
13980 "GROUP_NAME": self.op.group_name,
13981 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13984 def BuildHooksNodes(self):
13985 """Build hooks nodes.
13988 mn = self.cfg.GetMasterNode()
13989 return ([mn], [mn])
13991 def Exec(self, feedback_fn):
13992 """Modifies the node group.
13997 if self.op.ndparams:
13998 self.group.ndparams = self.new_ndparams
13999 result.append(("ndparams", str(self.group.ndparams)))
14001 if self.op.diskparams:
14002 self.group.diskparams = self.new_diskparams
14003 result.append(("diskparams", str(self.group.diskparams)))
14005 if self.op.alloc_policy:
14006 self.group.alloc_policy = self.op.alloc_policy
14008 if self.op.hv_state:
14009 self.group.hv_state_static = self.new_hv_state
14011 if self.op.disk_state:
14012 self.group.disk_state_static = self.new_disk_state
14014 if self.op.ipolicy:
14015 self.group.ipolicy = self.new_ipolicy
14017 self.cfg.Update(self.group, feedback_fn)
14021 class LUGroupRemove(LogicalUnit):
14022 HPATH = "group-remove"
14023 HTYPE = constants.HTYPE_GROUP
14026 def ExpandNames(self):
14027 # This will raises errors.OpPrereqError on its own:
14028 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14029 self.needed_locks = {
14030 locking.LEVEL_NODEGROUP: [self.group_uuid],
14033 def CheckPrereq(self):
14034 """Check prerequisites.
14036 This checks that the given group name exists as a node group, that is
14037 empty (i.e., contains no nodes), and that is not the last group of the
14041 # Verify that the group is empty.
14042 group_nodes = [node.name
14043 for node in self.cfg.GetAllNodesInfo().values()
14044 if node.group == self.group_uuid]
14047 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14049 (self.op.group_name,
14050 utils.CommaJoin(utils.NiceSort(group_nodes))),
14051 errors.ECODE_STATE)
14053 # Verify the cluster would not be left group-less.
14054 if len(self.cfg.GetNodeGroupList()) == 1:
14055 raise errors.OpPrereqError("Group '%s' is the only group,"
14056 " cannot be removed" %
14057 self.op.group_name,
14058 errors.ECODE_STATE)
14060 def BuildHooksEnv(self):
14061 """Build hooks env.
14065 "GROUP_NAME": self.op.group_name,
14068 def BuildHooksNodes(self):
14069 """Build hooks nodes.
14072 mn = self.cfg.GetMasterNode()
14073 return ([mn], [mn])
14075 def Exec(self, feedback_fn):
14076 """Remove the node group.
14080 self.cfg.RemoveNodeGroup(self.group_uuid)
14081 except errors.ConfigurationError:
14082 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14083 (self.op.group_name, self.group_uuid))
14085 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14088 class LUGroupRename(LogicalUnit):
14089 HPATH = "group-rename"
14090 HTYPE = constants.HTYPE_GROUP
14093 def ExpandNames(self):
14094 # This raises errors.OpPrereqError on its own:
14095 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14097 self.needed_locks = {
14098 locking.LEVEL_NODEGROUP: [self.group_uuid],
14101 def CheckPrereq(self):
14102 """Check prerequisites.
14104 Ensures requested new name is not yet used.
14108 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14109 except errors.OpPrereqError:
14112 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14113 " node group (UUID: %s)" %
14114 (self.op.new_name, new_name_uuid),
14115 errors.ECODE_EXISTS)
14117 def BuildHooksEnv(self):
14118 """Build hooks env.
14122 "OLD_NAME": self.op.group_name,
14123 "NEW_NAME": self.op.new_name,
14126 def BuildHooksNodes(self):
14127 """Build hooks nodes.
14130 mn = self.cfg.GetMasterNode()
14132 all_nodes = self.cfg.GetAllNodesInfo()
14133 all_nodes.pop(mn, None)
14136 run_nodes.extend(node.name for node in all_nodes.values()
14137 if node.group == self.group_uuid)
14139 return (run_nodes, run_nodes)
14141 def Exec(self, feedback_fn):
14142 """Rename the node group.
14145 group = self.cfg.GetNodeGroup(self.group_uuid)
14148 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14149 (self.op.group_name, self.group_uuid))
14151 group.name = self.op.new_name
14152 self.cfg.Update(group, feedback_fn)
14154 return self.op.new_name
14157 class LUGroupEvacuate(LogicalUnit):
14158 HPATH = "group-evacuate"
14159 HTYPE = constants.HTYPE_GROUP
14162 def ExpandNames(self):
14163 # This raises errors.OpPrereqError on its own:
14164 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14166 if self.op.target_groups:
14167 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14168 self.op.target_groups)
14170 self.req_target_uuids = []
14172 if self.group_uuid in self.req_target_uuids:
14173 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14174 " as a target group (targets are %s)" %
14176 utils.CommaJoin(self.req_target_uuids)),
14177 errors.ECODE_INVAL)
14179 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14181 self.share_locks = _ShareAll()
14182 self.needed_locks = {
14183 locking.LEVEL_INSTANCE: [],
14184 locking.LEVEL_NODEGROUP: [],
14185 locking.LEVEL_NODE: [],
14188 def DeclareLocks(self, level):
14189 if level == locking.LEVEL_INSTANCE:
14190 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14192 # Lock instances optimistically, needs verification once node and group
14193 # locks have been acquired
14194 self.needed_locks[locking.LEVEL_INSTANCE] = \
14195 self.cfg.GetNodeGroupInstances(self.group_uuid)
14197 elif level == locking.LEVEL_NODEGROUP:
14198 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14200 if self.req_target_uuids:
14201 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14203 # Lock all groups used by instances optimistically; this requires going
14204 # via the node before it's locked, requiring verification later on
14205 lock_groups.update(group_uuid
14206 for instance_name in
14207 self.owned_locks(locking.LEVEL_INSTANCE)
14209 self.cfg.GetInstanceNodeGroups(instance_name))
14211 # No target groups, need to lock all of them
14212 lock_groups = locking.ALL_SET
14214 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14216 elif level == locking.LEVEL_NODE:
14217 # This will only lock the nodes in the group to be evacuated which
14218 # contain actual instances
14219 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14220 self._LockInstancesNodes()
14222 # Lock all nodes in group to be evacuated and target groups
14223 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14224 assert self.group_uuid in owned_groups
14225 member_nodes = [node_name
14226 for group in owned_groups
14227 for node_name in self.cfg.GetNodeGroup(group).members]
14228 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14230 def CheckPrereq(self):
14231 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14232 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14233 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14235 assert owned_groups.issuperset(self.req_target_uuids)
14236 assert self.group_uuid in owned_groups
14238 # Check if locked instances are still correct
14239 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14241 # Get instance information
14242 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14244 # Check if node groups for locked instances are still correct
14245 _CheckInstancesNodeGroups(self.cfg, self.instances,
14246 owned_groups, owned_nodes, self.group_uuid)
14248 if self.req_target_uuids:
14249 # User requested specific target groups
14250 self.target_uuids = self.req_target_uuids
14252 # All groups except the one to be evacuated are potential targets
14253 self.target_uuids = [group_uuid for group_uuid in owned_groups
14254 if group_uuid != self.group_uuid]
14256 if not self.target_uuids:
14257 raise errors.OpPrereqError("There are no possible target groups",
14258 errors.ECODE_INVAL)
14260 def BuildHooksEnv(self):
14261 """Build hooks env.
14265 "GROUP_NAME": self.op.group_name,
14266 "TARGET_GROUPS": " ".join(self.target_uuids),
14269 def BuildHooksNodes(self):
14270 """Build hooks nodes.
14273 mn = self.cfg.GetMasterNode()
14275 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14277 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14279 return (run_nodes, run_nodes)
14281 def Exec(self, feedback_fn):
14282 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14284 assert self.group_uuid not in self.target_uuids
14286 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14287 instances=instances, target_groups=self.target_uuids)
14289 ial.Run(self.op.iallocator)
14291 if not ial.success:
14292 raise errors.OpPrereqError("Can't compute group evacuation using"
14293 " iallocator '%s': %s" %
14294 (self.op.iallocator, ial.info),
14295 errors.ECODE_NORES)
14297 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14299 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14300 len(jobs), self.op.group_name)
14302 return ResultWithJobs(jobs)
14305 class TagsLU(NoHooksLU): # pylint: disable=W0223
14306 """Generic tags LU.
14308 This is an abstract class which is the parent of all the other tags LUs.
14311 def ExpandNames(self):
14312 self.group_uuid = None
14313 self.needed_locks = {}
14315 if self.op.kind == constants.TAG_NODE:
14316 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14317 lock_level = locking.LEVEL_NODE
14318 lock_name = self.op.name
14319 elif self.op.kind == constants.TAG_INSTANCE:
14320 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14321 lock_level = locking.LEVEL_INSTANCE
14322 lock_name = self.op.name
14323 elif self.op.kind == constants.TAG_NODEGROUP:
14324 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14325 lock_level = locking.LEVEL_NODEGROUP
14326 lock_name = self.group_uuid
14331 if lock_level and getattr(self.op, "use_locking", True):
14332 self.needed_locks[lock_level] = lock_name
14334 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14335 # not possible to acquire the BGL based on opcode parameters)
14337 def CheckPrereq(self):
14338 """Check prerequisites.
14341 if self.op.kind == constants.TAG_CLUSTER:
14342 self.target = self.cfg.GetClusterInfo()
14343 elif self.op.kind == constants.TAG_NODE:
14344 self.target = self.cfg.GetNodeInfo(self.op.name)
14345 elif self.op.kind == constants.TAG_INSTANCE:
14346 self.target = self.cfg.GetInstanceInfo(self.op.name)
14347 elif self.op.kind == constants.TAG_NODEGROUP:
14348 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14350 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14351 str(self.op.kind), errors.ECODE_INVAL)
14354 class LUTagsGet(TagsLU):
14355 """Returns the tags of a given object.
14360 def ExpandNames(self):
14361 TagsLU.ExpandNames(self)
14363 # Share locks as this is only a read operation
14364 self.share_locks = _ShareAll()
14366 def Exec(self, feedback_fn):
14367 """Returns the tag list.
14370 return list(self.target.GetTags())
14373 class LUTagsSearch(NoHooksLU):
14374 """Searches the tags for a given pattern.
14379 def ExpandNames(self):
14380 self.needed_locks = {}
14382 def CheckPrereq(self):
14383 """Check prerequisites.
14385 This checks the pattern passed for validity by compiling it.
14389 self.re = re.compile(self.op.pattern)
14390 except re.error, err:
14391 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14392 (self.op.pattern, err), errors.ECODE_INVAL)
14394 def Exec(self, feedback_fn):
14395 """Returns the tag list.
14399 tgts = [("/cluster", cfg.GetClusterInfo())]
14400 ilist = cfg.GetAllInstancesInfo().values()
14401 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14402 nlist = cfg.GetAllNodesInfo().values()
14403 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14404 tgts.extend(("/nodegroup/%s" % n.name, n)
14405 for n in cfg.GetAllNodeGroupsInfo().values())
14407 for path, target in tgts:
14408 for tag in target.GetTags():
14409 if self.re.search(tag):
14410 results.append((path, tag))
14414 class LUTagsSet(TagsLU):
14415 """Sets a tag on a given object.
14420 def CheckPrereq(self):
14421 """Check prerequisites.
14423 This checks the type and length of the tag name and value.
14426 TagsLU.CheckPrereq(self)
14427 for tag in self.op.tags:
14428 objects.TaggableObject.ValidateTag(tag)
14430 def Exec(self, feedback_fn):
14435 for tag in self.op.tags:
14436 self.target.AddTag(tag)
14437 except errors.TagError, err:
14438 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14439 self.cfg.Update(self.target, feedback_fn)
14442 class LUTagsDel(TagsLU):
14443 """Delete a list of tags from a given object.
14448 def CheckPrereq(self):
14449 """Check prerequisites.
14451 This checks that we have the given tag.
14454 TagsLU.CheckPrereq(self)
14455 for tag in self.op.tags:
14456 objects.TaggableObject.ValidateTag(tag)
14457 del_tags = frozenset(self.op.tags)
14458 cur_tags = self.target.GetTags()
14460 diff_tags = del_tags - cur_tags
14462 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14463 raise errors.OpPrereqError("Tag(s) %s not found" %
14464 (utils.CommaJoin(diff_names), ),
14465 errors.ECODE_NOENT)
14467 def Exec(self, feedback_fn):
14468 """Remove the tag from the object.
14471 for tag in self.op.tags:
14472 self.target.RemoveTag(tag)
14473 self.cfg.Update(self.target, feedback_fn)
14476 class LUTestDelay(NoHooksLU):
14477 """Sleep for a specified amount of time.
14479 This LU sleeps on the master and/or nodes for a specified amount of
14485 def ExpandNames(self):
14486 """Expand names and set required locks.
14488 This expands the node list, if any.
14491 self.needed_locks = {}
14492 if self.op.on_nodes:
14493 # _GetWantedNodes can be used here, but is not always appropriate to use
14494 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14495 # more information.
14496 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14497 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14499 def _TestDelay(self):
14500 """Do the actual sleep.
14503 if self.op.on_master:
14504 if not utils.TestDelay(self.op.duration):
14505 raise errors.OpExecError("Error during master delay test")
14506 if self.op.on_nodes:
14507 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14508 for node, node_result in result.items():
14509 node_result.Raise("Failure during rpc call to node %s" % node)
14511 def Exec(self, feedback_fn):
14512 """Execute the test delay opcode, with the wanted repetitions.
14515 if self.op.repeat == 0:
14518 top_value = self.op.repeat - 1
14519 for i in range(self.op.repeat):
14520 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14524 class LUTestJqueue(NoHooksLU):
14525 """Utility LU to test some aspects of the job queue.
14530 # Must be lower than default timeout for WaitForJobChange to see whether it
14531 # notices changed jobs
14532 _CLIENT_CONNECT_TIMEOUT = 20.0
14533 _CLIENT_CONFIRM_TIMEOUT = 60.0
14536 def _NotifyUsingSocket(cls, cb, errcls):
14537 """Opens a Unix socket and waits for another program to connect.
14540 @param cb: Callback to send socket name to client
14541 @type errcls: class
14542 @param errcls: Exception class to use for errors
14545 # Using a temporary directory as there's no easy way to create temporary
14546 # sockets without writing a custom loop around tempfile.mktemp and
14548 tmpdir = tempfile.mkdtemp()
14550 tmpsock = utils.PathJoin(tmpdir, "sock")
14552 logging.debug("Creating temporary socket at %s", tmpsock)
14553 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14558 # Send details to client
14561 # Wait for client to connect before continuing
14562 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14564 (conn, _) = sock.accept()
14565 except socket.error, err:
14566 raise errcls("Client didn't connect in time (%s)" % err)
14570 # Remove as soon as client is connected
14571 shutil.rmtree(tmpdir)
14573 # Wait for client to close
14576 # pylint: disable=E1101
14577 # Instance of '_socketobject' has no ... member
14578 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14580 except socket.error, err:
14581 raise errcls("Client failed to confirm notification (%s)" % err)
14585 def _SendNotification(self, test, arg, sockname):
14586 """Sends a notification to the client.
14589 @param test: Test name
14590 @param arg: Test argument (depends on test)
14591 @type sockname: string
14592 @param sockname: Socket path
14595 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14597 def _Notify(self, prereq, test, arg):
14598 """Notifies the client of a test.
14601 @param prereq: Whether this is a prereq-phase test
14603 @param test: Test name
14604 @param arg: Test argument (depends on test)
14608 errcls = errors.OpPrereqError
14610 errcls = errors.OpExecError
14612 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14616 def CheckArguments(self):
14617 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14618 self.expandnames_calls = 0
14620 def ExpandNames(self):
14621 checkargs_calls = getattr(self, "checkargs_calls", 0)
14622 if checkargs_calls < 1:
14623 raise errors.ProgrammerError("CheckArguments was not called")
14625 self.expandnames_calls += 1
14627 if self.op.notify_waitlock:
14628 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14630 self.LogInfo("Expanding names")
14632 # Get lock on master node (just to get a lock, not for a particular reason)
14633 self.needed_locks = {
14634 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14637 def Exec(self, feedback_fn):
14638 if self.expandnames_calls < 1:
14639 raise errors.ProgrammerError("ExpandNames was not called")
14641 if self.op.notify_exec:
14642 self._Notify(False, constants.JQT_EXEC, None)
14644 self.LogInfo("Executing")
14646 if self.op.log_messages:
14647 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14648 for idx, msg in enumerate(self.op.log_messages):
14649 self.LogInfo("Sending log message %s", idx + 1)
14650 feedback_fn(constants.JQT_MSGPREFIX + msg)
14651 # Report how many test messages have been sent
14652 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14655 raise errors.OpExecError("Opcode failure was requested")
14660 class IAllocator(object):
14661 """IAllocator framework.
14663 An IAllocator instance has three sets of attributes:
14664 - cfg that is needed to query the cluster
14665 - input data (all members of the _KEYS class attribute are required)
14666 - four buffer attributes (in|out_data|text), that represent the
14667 input (to the external script) in text and data structure format,
14668 and the output from it, again in two formats
14669 - the result variables from the script (success, info, nodes) for
14673 # pylint: disable=R0902
14674 # lots of instance attributes
14676 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14678 self.rpc = rpc_runner
14679 # init buffer variables
14680 self.in_text = self.out_text = self.in_data = self.out_data = None
14681 # init all input fields so that pylint is happy
14683 self.memory = self.disks = self.disk_template = self.spindle_use = None
14684 self.os = self.tags = self.nics = self.vcpus = None
14685 self.hypervisor = None
14686 self.relocate_from = None
14688 self.instances = None
14689 self.evac_mode = None
14690 self.target_groups = []
14692 self.required_nodes = None
14693 # init result fields
14694 self.success = self.info = self.result = None
14697 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14699 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14700 " IAllocator" % self.mode)
14702 keyset = [n for (n, _) in keydata]
14705 if key not in keyset:
14706 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14707 " IAllocator" % key)
14708 setattr(self, key, kwargs[key])
14711 if key not in kwargs:
14712 raise errors.ProgrammerError("Missing input parameter '%s' to"
14713 " IAllocator" % key)
14714 self._BuildInputData(compat.partial(fn, self), keydata)
14716 def _ComputeClusterData(self):
14717 """Compute the generic allocator input data.
14719 This is the data that is independent of the actual operation.
14723 cluster_info = cfg.GetClusterInfo()
14726 "version": constants.IALLOCATOR_VERSION,
14727 "cluster_name": cfg.GetClusterName(),
14728 "cluster_tags": list(cluster_info.GetTags()),
14729 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14730 "ipolicy": cluster_info.ipolicy,
14732 ninfo = cfg.GetAllNodesInfo()
14733 iinfo = cfg.GetAllInstancesInfo().values()
14734 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14737 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14739 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14740 hypervisor_name = self.hypervisor
14741 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14742 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14744 hypervisor_name = cluster_info.primary_hypervisor
14746 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14749 self.rpc.call_all_instances_info(node_list,
14750 cluster_info.enabled_hypervisors)
14752 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14754 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14755 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14756 i_list, config_ndata)
14757 assert len(data["nodes"]) == len(ninfo), \
14758 "Incomplete node data computed"
14760 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14762 self.in_data = data
14765 def _ComputeNodeGroupData(cfg):
14766 """Compute node groups data.
14769 cluster = cfg.GetClusterInfo()
14770 ng = dict((guuid, {
14771 "name": gdata.name,
14772 "alloc_policy": gdata.alloc_policy,
14773 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14775 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14780 def _ComputeBasicNodeData(cfg, node_cfg):
14781 """Compute global node data.
14784 @returns: a dict of name: (node dict, node config)
14787 # fill in static (config-based) values
14788 node_results = dict((ninfo.name, {
14789 "tags": list(ninfo.GetTags()),
14790 "primary_ip": ninfo.primary_ip,
14791 "secondary_ip": ninfo.secondary_ip,
14792 "offline": ninfo.offline,
14793 "drained": ninfo.drained,
14794 "master_candidate": ninfo.master_candidate,
14795 "group": ninfo.group,
14796 "master_capable": ninfo.master_capable,
14797 "vm_capable": ninfo.vm_capable,
14798 "ndparams": cfg.GetNdParams(ninfo),
14800 for ninfo in node_cfg.values())
14802 return node_results
14805 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14807 """Compute global node data.
14809 @param node_results: the basic node structures as filled from the config
14812 #TODO(dynmem): compute the right data on MAX and MIN memory
14813 # make a copy of the current dict
14814 node_results = dict(node_results)
14815 for nname, nresult in node_data.items():
14816 assert nname in node_results, "Missing basic data for node %s" % nname
14817 ninfo = node_cfg[nname]
14819 if not (ninfo.offline or ninfo.drained):
14820 nresult.Raise("Can't get data for node %s" % nname)
14821 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14823 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14825 for attr in ["memory_total", "memory_free", "memory_dom0",
14826 "vg_size", "vg_free", "cpu_total"]:
14827 if attr not in remote_info:
14828 raise errors.OpExecError("Node '%s' didn't return attribute"
14829 " '%s'" % (nname, attr))
14830 if not isinstance(remote_info[attr], int):
14831 raise errors.OpExecError("Node '%s' returned invalid value"
14833 (nname, attr, remote_info[attr]))
14834 # compute memory used by primary instances
14835 i_p_mem = i_p_up_mem = 0
14836 for iinfo, beinfo in i_list:
14837 if iinfo.primary_node == nname:
14838 i_p_mem += beinfo[constants.BE_MAXMEM]
14839 if iinfo.name not in node_iinfo[nname].payload:
14842 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14843 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14844 remote_info["memory_free"] -= max(0, i_mem_diff)
14846 if iinfo.admin_state == constants.ADMINST_UP:
14847 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14849 # compute memory used by instances
14851 "total_memory": remote_info["memory_total"],
14852 "reserved_memory": remote_info["memory_dom0"],
14853 "free_memory": remote_info["memory_free"],
14854 "total_disk": remote_info["vg_size"],
14855 "free_disk": remote_info["vg_free"],
14856 "total_cpus": remote_info["cpu_total"],
14857 "i_pri_memory": i_p_mem,
14858 "i_pri_up_memory": i_p_up_mem,
14860 pnr_dyn.update(node_results[nname])
14861 node_results[nname] = pnr_dyn
14863 return node_results
14866 def _ComputeInstanceData(cluster_info, i_list):
14867 """Compute global instance data.
14871 for iinfo, beinfo in i_list:
14873 for nic in iinfo.nics:
14874 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14878 "mode": filled_params[constants.NIC_MODE],
14879 "link": filled_params[constants.NIC_LINK],
14881 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14882 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14883 nic_data.append(nic_dict)
14885 "tags": list(iinfo.GetTags()),
14886 "admin_state": iinfo.admin_state,
14887 "vcpus": beinfo[constants.BE_VCPUS],
14888 "memory": beinfo[constants.BE_MAXMEM],
14889 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14891 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14893 "disks": [{constants.IDISK_SIZE: dsk.size,
14894 constants.IDISK_MODE: dsk.mode}
14895 for dsk in iinfo.disks],
14896 "disk_template": iinfo.disk_template,
14897 "hypervisor": iinfo.hypervisor,
14899 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14901 instance_data[iinfo.name] = pir
14903 return instance_data
14905 def _AddNewInstance(self):
14906 """Add new instance data to allocator structure.
14908 This in combination with _AllocatorGetClusterData will create the
14909 correct structure needed as input for the allocator.
14911 The checks for the completeness of the opcode must have already been
14915 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14917 if self.disk_template in constants.DTS_INT_MIRROR:
14918 self.required_nodes = 2
14920 self.required_nodes = 1
14924 "disk_template": self.disk_template,
14927 "vcpus": self.vcpus,
14928 "memory": self.memory,
14929 "spindle_use": self.spindle_use,
14930 "disks": self.disks,
14931 "disk_space_total": disk_space,
14933 "required_nodes": self.required_nodes,
14934 "hypervisor": self.hypervisor,
14939 def _AddRelocateInstance(self):
14940 """Add relocate instance data to allocator structure.
14942 This in combination with _IAllocatorGetClusterData will create the
14943 correct structure needed as input for the allocator.
14945 The checks for the completeness of the opcode must have already been
14949 instance = self.cfg.GetInstanceInfo(self.name)
14950 if instance is None:
14951 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14952 " IAllocator" % self.name)
14954 if instance.disk_template not in constants.DTS_MIRRORED:
14955 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14956 errors.ECODE_INVAL)
14958 if instance.disk_template in constants.DTS_INT_MIRROR and \
14959 len(instance.secondary_nodes) != 1:
14960 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14961 errors.ECODE_STATE)
14963 self.required_nodes = 1
14964 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14965 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14969 "disk_space_total": disk_space,
14970 "required_nodes": self.required_nodes,
14971 "relocate_from": self.relocate_from,
14975 def _AddNodeEvacuate(self):
14976 """Get data for node-evacuate requests.
14980 "instances": self.instances,
14981 "evac_mode": self.evac_mode,
14984 def _AddChangeGroup(self):
14985 """Get data for node-evacuate requests.
14989 "instances": self.instances,
14990 "target_groups": self.target_groups,
14993 def _BuildInputData(self, fn, keydata):
14994 """Build input data structures.
14997 self._ComputeClusterData()
15000 request["type"] = self.mode
15001 for keyname, keytype in keydata:
15002 if keyname not in request:
15003 raise errors.ProgrammerError("Request parameter %s is missing" %
15005 val = request[keyname]
15006 if not keytype(val):
15007 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15008 " validation, value %s, expected"
15009 " type %s" % (keyname, val, keytype))
15010 self.in_data["request"] = request
15012 self.in_text = serializer.Dump(self.in_data)
15014 _STRING_LIST = ht.TListOf(ht.TString)
15015 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15016 # pylint: disable=E1101
15017 # Class '...' has no 'OP_ID' member
15018 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15019 opcodes.OpInstanceMigrate.OP_ID,
15020 opcodes.OpInstanceReplaceDisks.OP_ID])
15024 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15025 ht.TItems([ht.TNonEmptyString,
15026 ht.TNonEmptyString,
15027 ht.TListOf(ht.TNonEmptyString),
15030 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15031 ht.TItems([ht.TNonEmptyString,
15034 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15035 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15038 constants.IALLOCATOR_MODE_ALLOC:
15041 ("name", ht.TString),
15042 ("memory", ht.TInt),
15043 ("spindle_use", ht.TInt),
15044 ("disks", ht.TListOf(ht.TDict)),
15045 ("disk_template", ht.TString),
15046 ("os", ht.TString),
15047 ("tags", _STRING_LIST),
15048 ("nics", ht.TListOf(ht.TDict)),
15049 ("vcpus", ht.TInt),
15050 ("hypervisor", ht.TString),
15052 constants.IALLOCATOR_MODE_RELOC:
15053 (_AddRelocateInstance,
15054 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15056 constants.IALLOCATOR_MODE_NODE_EVAC:
15057 (_AddNodeEvacuate, [
15058 ("instances", _STRING_LIST),
15059 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15061 constants.IALLOCATOR_MODE_CHG_GROUP:
15062 (_AddChangeGroup, [
15063 ("instances", _STRING_LIST),
15064 ("target_groups", _STRING_LIST),
15068 def Run(self, name, validate=True, call_fn=None):
15069 """Run an instance allocator and return the results.
15072 if call_fn is None:
15073 call_fn = self.rpc.call_iallocator_runner
15075 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15076 result.Raise("Failure while running the iallocator script")
15078 self.out_text = result.payload
15080 self._ValidateResult()
15082 def _ValidateResult(self):
15083 """Process the allocator results.
15085 This will process and if successful save the result in
15086 self.out_data and the other parameters.
15090 rdict = serializer.Load(self.out_text)
15091 except Exception, err:
15092 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15094 if not isinstance(rdict, dict):
15095 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15097 # TODO: remove backwards compatiblity in later versions
15098 if "nodes" in rdict and "result" not in rdict:
15099 rdict["result"] = rdict["nodes"]
15102 for key in "success", "info", "result":
15103 if key not in rdict:
15104 raise errors.OpExecError("Can't parse iallocator results:"
15105 " missing key '%s'" % key)
15106 setattr(self, key, rdict[key])
15108 if not self._result_check(self.result):
15109 raise errors.OpExecError("Iallocator returned invalid result,"
15110 " expected %s, got %s" %
15111 (self._result_check, self.result),
15112 errors.ECODE_INVAL)
15114 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15115 assert self.relocate_from is not None
15116 assert self.required_nodes == 1
15118 node2group = dict((name, ndata["group"])
15119 for (name, ndata) in self.in_data["nodes"].items())
15121 fn = compat.partial(self._NodesToGroups, node2group,
15122 self.in_data["nodegroups"])
15124 instance = self.cfg.GetInstanceInfo(self.name)
15125 request_groups = fn(self.relocate_from + [instance.primary_node])
15126 result_groups = fn(rdict["result"] + [instance.primary_node])
15128 if self.success and not set(result_groups).issubset(request_groups):
15129 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15130 " differ from original groups (%s)" %
15131 (utils.CommaJoin(result_groups),
15132 utils.CommaJoin(request_groups)))
15134 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15135 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15137 self.out_data = rdict
15140 def _NodesToGroups(node2group, groups, nodes):
15141 """Returns a list of unique group names for a list of nodes.
15143 @type node2group: dict
15144 @param node2group: Map from node name to group UUID
15146 @param groups: Group information
15148 @param nodes: Node names
15155 group_uuid = node2group[node]
15157 # Ignore unknown node
15161 group = groups[group_uuid]
15163 # Can't find group, let's use UUID
15164 group_name = group_uuid
15166 group_name = group["name"]
15168 result.add(group_name)
15170 return sorted(result)
15173 class LUTestAllocator(NoHooksLU):
15174 """Run allocator tests.
15176 This LU runs the allocator tests
15179 def CheckPrereq(self):
15180 """Check prerequisites.
15182 This checks the opcode parameters depending on the director and mode test.
15185 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15186 for attr in ["memory", "disks", "disk_template",
15187 "os", "tags", "nics", "vcpus"]:
15188 if not hasattr(self.op, attr):
15189 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15190 attr, errors.ECODE_INVAL)
15191 iname = self.cfg.ExpandInstanceName(self.op.name)
15192 if iname is not None:
15193 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15194 iname, errors.ECODE_EXISTS)
15195 if not isinstance(self.op.nics, list):
15196 raise errors.OpPrereqError("Invalid parameter 'nics'",
15197 errors.ECODE_INVAL)
15198 if not isinstance(self.op.disks, list):
15199 raise errors.OpPrereqError("Invalid parameter 'disks'",
15200 errors.ECODE_INVAL)
15201 for row in self.op.disks:
15202 if (not isinstance(row, dict) or
15203 constants.IDISK_SIZE not in row or
15204 not isinstance(row[constants.IDISK_SIZE], int) or
15205 constants.IDISK_MODE not in row or
15206 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15207 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15208 " parameter", errors.ECODE_INVAL)
15209 if self.op.hypervisor is None:
15210 self.op.hypervisor = self.cfg.GetHypervisorType()
15211 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15212 fname = _ExpandInstanceName(self.cfg, self.op.name)
15213 self.op.name = fname
15214 self.relocate_from = \
15215 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15216 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15217 constants.IALLOCATOR_MODE_NODE_EVAC):
15218 if not self.op.instances:
15219 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15220 self.op.instances = _GetWantedInstances(self, self.op.instances)
15222 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15223 self.op.mode, errors.ECODE_INVAL)
15225 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15226 if self.op.allocator is None:
15227 raise errors.OpPrereqError("Missing allocator name",
15228 errors.ECODE_INVAL)
15229 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15230 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15231 self.op.direction, errors.ECODE_INVAL)
15233 def Exec(self, feedback_fn):
15234 """Run the allocator test.
15237 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15238 ial = IAllocator(self.cfg, self.rpc,
15241 memory=self.op.memory,
15242 disks=self.op.disks,
15243 disk_template=self.op.disk_template,
15247 vcpus=self.op.vcpus,
15248 hypervisor=self.op.hypervisor,
15250 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15251 ial = IAllocator(self.cfg, self.rpc,
15254 relocate_from=list(self.relocate_from),
15256 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15257 ial = IAllocator(self.cfg, self.rpc,
15259 instances=self.op.instances,
15260 target_groups=self.op.target_groups)
15261 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15262 ial = IAllocator(self.cfg, self.rpc,
15264 instances=self.op.instances,
15265 evac_mode=self.op.evac_mode)
15267 raise errors.ProgrammerError("Uncatched mode %s in"
15268 " LUTestAllocator.Exec", self.op.mode)
15270 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15271 result = ial.in_text
15273 ial.Run(self.op.allocator, validate=False)
15274 result = ial.out_text
15278 #: Query type implementations
15280 constants.QR_CLUSTER: _ClusterQuery,
15281 constants.QR_INSTANCE: _InstanceQuery,
15282 constants.QR_NODE: _NodeQuery,
15283 constants.QR_GROUP: _GroupQuery,
15284 constants.QR_OS: _OsQuery,
15285 constants.QR_EXPORT: _ExportQuery,
15288 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15291 def _GetQueryImplementation(name):
15292 """Returns the implemtnation for a query type.
15294 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15298 return _QUERY_IMPL[name]
15300 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15301 errors.ECODE_INVAL)