4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if not value or value == [constants.VALUE_DEFAULT]:
800 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
801 " on the cluster'" % key,
804 if key in constants.IPOLICY_PARAMETERS:
805 # FIXME: we assume all such values are float
807 ipolicy[key] = float(value)
808 except (TypeError, ValueError), err:
809 raise errors.OpPrereqError("Invalid value for attribute"
810 " '%s': '%s', error: %s" %
811 (key, value, err), errors.ECODE_INVAL)
813 # FIXME: we assume all others are lists; this should be redone
815 ipolicy[key] = list(value)
817 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
818 except errors.ConfigurationError, err:
819 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
824 def _UpdateAndVerifySubDict(base, updates, type_check):
825 """Updates and verifies a dict with sub dicts of the same type.
827 @param base: The dict with the old data
828 @param updates: The dict with the new data
829 @param type_check: Dict suitable to ForceDictType to verify correct types
830 @returns: A new dict with updated and verified values
834 new = _GetUpdatedParams(old, value)
835 utils.ForceDictType(new, type_check)
838 ret = copy.deepcopy(base)
839 ret.update(dict((key, fn(base.get(key, {}), value))
840 for key, value in updates.items()))
844 def _MergeAndVerifyHvState(op_input, obj_input):
845 """Combines the hv state from an opcode with the one of the object
847 @param op_input: The input dict from the opcode
848 @param obj_input: The input dict from the objects
849 @return: The verified and updated dict
853 invalid_hvs = set(op_input) - constants.HYPER_TYPES
855 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
856 " %s" % utils.CommaJoin(invalid_hvs),
858 if obj_input is None:
860 type_check = constants.HVSTS_PARAMETER_TYPES
861 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
866 def _MergeAndVerifyDiskState(op_input, obj_input):
867 """Combines the disk state from an opcode with the one of the object
869 @param op_input: The input dict from the opcode
870 @param obj_input: The input dict from the objects
871 @return: The verified and updated dict
874 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
876 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
877 utils.CommaJoin(invalid_dst),
879 type_check = constants.DSS_PARAMETER_TYPES
880 if obj_input is None:
882 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
884 for key, value in op_input.items())
889 def _ReleaseLocks(lu, level, names=None, keep=None):
890 """Releases locks owned by an LU.
892 @type lu: L{LogicalUnit}
893 @param level: Lock level
894 @type names: list or None
895 @param names: Names of locks to release
896 @type keep: list or None
897 @param keep: Names of locks to retain
900 assert not (keep is not None and names is not None), \
901 "Only one of the 'names' and the 'keep' parameters can be given"
903 if names is not None:
904 should_release = names.__contains__
906 should_release = lambda name: name not in keep
908 should_release = None
910 owned = lu.owned_locks(level)
912 # Not owning any lock at this level, do nothing
919 # Determine which locks to release
921 if should_release(name):
926 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
928 # Release just some locks
929 lu.glm.release(level, names=release)
931 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
934 lu.glm.release(level)
936 assert not lu.glm.is_owned(level), "No locks should be owned"
939 def _MapInstanceDisksToNodes(instances):
940 """Creates a map from (node, volume) to instance name.
942 @type instances: list of L{objects.Instance}
943 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
946 return dict(((node, vol), inst.name)
947 for inst in instances
948 for (node, vols) in inst.MapLVsByNode().items()
952 def _RunPostHook(lu, node_name):
953 """Runs the post-hook for an opcode on a single node.
956 hm = lu.proc.BuildHooksManager(lu)
958 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
960 # pylint: disable=W0702
961 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
964 def _CheckOutputFields(static, dynamic, selected):
965 """Checks whether all selected fields are valid.
967 @type static: L{utils.FieldSet}
968 @param static: static fields set
969 @type dynamic: L{utils.FieldSet}
970 @param dynamic: dynamic fields set
977 delta = f.NonMatching(selected)
979 raise errors.OpPrereqError("Unknown output fields selected: %s"
980 % ",".join(delta), errors.ECODE_INVAL)
983 def _CheckGlobalHvParams(params):
984 """Validates that given hypervisor params are not global ones.
986 This will ensure that instances don't get customised versions of
990 used_globals = constants.HVC_GLOBALS.intersection(params)
992 msg = ("The following hypervisor parameters are global and cannot"
993 " be customized at instance level, please modify them at"
994 " cluster level: %s" % utils.CommaJoin(used_globals))
995 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
998 def _CheckNodeOnline(lu, node, msg=None):
999 """Ensure that a given node is online.
1001 @param lu: the LU on behalf of which we make the check
1002 @param node: the node to check
1003 @param msg: if passed, should be a message to replace the default one
1004 @raise errors.OpPrereqError: if the node is offline
1008 msg = "Can't use offline node"
1009 if lu.cfg.GetNodeInfo(node).offline:
1010 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1013 def _CheckNodeNotDrained(lu, node):
1014 """Ensure that a given node is not drained.
1016 @param lu: the LU on behalf of which we make the check
1017 @param node: the node to check
1018 @raise errors.OpPrereqError: if the node is drained
1021 if lu.cfg.GetNodeInfo(node).drained:
1022 raise errors.OpPrereqError("Can't use drained node %s" % node,
1026 def _CheckNodeVmCapable(lu, node):
1027 """Ensure that a given node is vm capable.
1029 @param lu: the LU on behalf of which we make the check
1030 @param node: the node to check
1031 @raise errors.OpPrereqError: if the node is not vm capable
1034 if not lu.cfg.GetNodeInfo(node).vm_capable:
1035 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040 """Ensure that a node supports a given OS.
1042 @param lu: the LU on behalf of which we make the check
1043 @param node: the node to check
1044 @param os_name: the OS to query about
1045 @param force_variant: whether to ignore variant errors
1046 @raise errors.OpPrereqError: if the node is not supporting the OS
1049 result = lu.rpc.call_os_get(node, os_name)
1050 result.Raise("OS '%s' not in supported OS list for node %s" %
1052 prereq=True, ecode=errors.ECODE_INVAL)
1053 if not force_variant:
1054 _CheckOSVariant(result.payload, os_name)
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058 """Ensure that a node has the given secondary ip.
1060 @type lu: L{LogicalUnit}
1061 @param lu: the LU on behalf of which we make the check
1063 @param node: the node to check
1064 @type secondary_ip: string
1065 @param secondary_ip: the ip to check
1066 @type prereq: boolean
1067 @param prereq: whether to throw a prerequisite or an execute error
1068 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1072 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073 result.Raise("Failure checking secondary ip on node %s" % node,
1074 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075 if not result.payload:
1076 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077 " please fix and re-run this command" % secondary_ip)
1079 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1081 raise errors.OpExecError(msg)
1084 def _GetClusterDomainSecret():
1085 """Reads the cluster domain secret.
1088 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093 """Ensure that an instance is in one of the required states.
1095 @param lu: the LU on behalf of which we make the check
1096 @param instance: the instance to check
1097 @param msg: if passed, should be a message to replace the default one
1098 @raise errors.OpPrereqError: if the instance is not in the required state
1102 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103 if instance.admin_state not in req_states:
1104 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105 (instance.name, instance.admin_state, msg),
1108 if constants.ADMINST_UP not in req_states:
1109 pnode = instance.primary_node
1110 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1111 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1112 prereq=True, ecode=errors.ECODE_ENVIRON)
1114 if instance.name in ins_l.payload:
1115 raise errors.OpPrereqError("Instance %s is running, %s" %
1116 (instance.name, msg), errors.ECODE_STATE)
1119 def _ComputeMinMaxSpec(name, ipolicy, value):
1120 """Computes if value is in the desired range.
1122 @param name: name of the parameter for which we perform the check
1123 @param ipolicy: dictionary containing min, max and std values
1124 @param value: actual value that we want to use
1125 @return: None or element not meeting the criteria
1129 if value in [None, constants.VALUE_AUTO]:
1131 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1132 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1133 if value > max_v or min_v > value:
1134 return ("%s value %s is not in range [%s, %s]" %
1135 (name, value, min_v, max_v))
1139 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1140 nic_count, disk_sizes, spindle_use,
1141 _compute_fn=_ComputeMinMaxSpec):
1142 """Verifies ipolicy against provided specs.
1145 @param ipolicy: The ipolicy
1147 @param mem_size: The memory size
1148 @type cpu_count: int
1149 @param cpu_count: Used cpu cores
1150 @type disk_count: int
1151 @param disk_count: Number of disks used
1152 @type nic_count: int
1153 @param nic_count: Number of nics used
1154 @type disk_sizes: list of ints
1155 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1156 @type spindle_use: int
1157 @param spindle_use: The number of spindles this instance uses
1158 @param _compute_fn: The compute function (unittest only)
1159 @return: A list of violations, or an empty list of no violations are found
1162 assert disk_count == len(disk_sizes)
1165 (constants.ISPEC_MEM_SIZE, mem_size),
1166 (constants.ISPEC_CPU_COUNT, cpu_count),
1167 (constants.ISPEC_DISK_COUNT, disk_count),
1168 (constants.ISPEC_NIC_COUNT, nic_count),
1169 (constants.ISPEC_SPINDLE_USE, spindle_use),
1170 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1173 (_compute_fn(name, ipolicy, value)
1174 for (name, value) in test_settings))
1177 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1178 _compute_fn=_ComputeIPolicySpecViolation):
1179 """Compute if instance meets the specs of ipolicy.
1182 @param ipolicy: The ipolicy to verify against
1183 @type instance: L{objects.Instance}
1184 @param instance: The instance to verify
1185 @param _compute_fn: The function to verify ipolicy (unittest only)
1186 @see: L{_ComputeIPolicySpecViolation}
1189 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1190 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1191 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1192 disk_count = len(instance.disks)
1193 disk_sizes = [disk.size for disk in instance.disks]
1194 nic_count = len(instance.nics)
1196 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1197 disk_sizes, spindle_use)
1200 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1201 _compute_fn=_ComputeIPolicySpecViolation):
1202 """Compute if instance specs meets the specs of ipolicy.
1205 @param ipolicy: The ipolicy to verify against
1206 @param instance_spec: dict
1207 @param instance_spec: The instance spec to verify
1208 @param _compute_fn: The function to verify ipolicy (unittest only)
1209 @see: L{_ComputeIPolicySpecViolation}
1212 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1213 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1214 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1215 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1216 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1217 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1219 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1220 disk_sizes, spindle_use)
1223 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1225 _compute_fn=_ComputeIPolicyInstanceViolation):
1226 """Compute if instance meets the specs of the new target group.
1228 @param ipolicy: The ipolicy to verify
1229 @param instance: The instance object to verify
1230 @param current_group: The current group of the instance
1231 @param target_group: The new group of the instance
1232 @param _compute_fn: The function to verify ipolicy (unittest only)
1233 @see: L{_ComputeIPolicySpecViolation}
1236 if current_group == target_group:
1239 return _compute_fn(ipolicy, instance)
1242 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1243 _compute_fn=_ComputeIPolicyNodeViolation):
1244 """Checks that the target node is correct in terms of instance policy.
1246 @param ipolicy: The ipolicy to verify
1247 @param instance: The instance object to verify
1248 @param node: The new node to relocate
1249 @param ignore: Ignore violations of the ipolicy
1250 @param _compute_fn: The function to verify ipolicy (unittest only)
1251 @see: L{_ComputeIPolicySpecViolation}
1254 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1255 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1258 msg = ("Instance does not meet target node group's (%s) instance"
1259 " policy: %s") % (node.group, utils.CommaJoin(res))
1263 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1266 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1267 """Computes a set of any instances that would violate the new ipolicy.
1269 @param old_ipolicy: The current (still in-place) ipolicy
1270 @param new_ipolicy: The new (to become) ipolicy
1271 @param instances: List of instances to verify
1272 @return: A list of instances which violates the new ipolicy but did not before
1275 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1276 _ComputeViolatingInstances(new_ipolicy, instances))
1279 def _ExpandItemName(fn, name, kind):
1280 """Expand an item name.
1282 @param fn: the function to use for expansion
1283 @param name: requested item name
1284 @param kind: text description ('Node' or 'Instance')
1285 @return: the resolved (full) name
1286 @raise errors.OpPrereqError: if the item is not found
1289 full_name = fn(name)
1290 if full_name is None:
1291 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1296 def _ExpandNodeName(cfg, name):
1297 """Wrapper over L{_ExpandItemName} for nodes."""
1298 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1301 def _ExpandInstanceName(cfg, name):
1302 """Wrapper over L{_ExpandItemName} for instance."""
1303 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1306 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1307 minmem, maxmem, vcpus, nics, disk_template, disks,
1308 bep, hvp, hypervisor_name, tags):
1309 """Builds instance related env variables for hooks
1311 This builds the hook environment from individual variables.
1314 @param name: the name of the instance
1315 @type primary_node: string
1316 @param primary_node: the name of the instance's primary node
1317 @type secondary_nodes: list
1318 @param secondary_nodes: list of secondary nodes as strings
1319 @type os_type: string
1320 @param os_type: the name of the instance's OS
1321 @type status: string
1322 @param status: the desired status of the instance
1323 @type minmem: string
1324 @param minmem: the minimum memory size of the instance
1325 @type maxmem: string
1326 @param maxmem: the maximum memory size of the instance
1328 @param vcpus: the count of VCPUs the instance has
1330 @param nics: list of tuples (ip, mac, mode, link) representing
1331 the NICs the instance has
1332 @type disk_template: string
1333 @param disk_template: the disk template of the instance
1335 @param disks: the list of (size, mode) pairs
1337 @param bep: the backend parameters for the instance
1339 @param hvp: the hypervisor parameters for the instance
1340 @type hypervisor_name: string
1341 @param hypervisor_name: the hypervisor for the instance
1343 @param tags: list of instance tags as strings
1345 @return: the hook environment for this instance
1350 "INSTANCE_NAME": name,
1351 "INSTANCE_PRIMARY": primary_node,
1352 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1353 "INSTANCE_OS_TYPE": os_type,
1354 "INSTANCE_STATUS": status,
1355 "INSTANCE_MINMEM": minmem,
1356 "INSTANCE_MAXMEM": maxmem,
1357 # TODO(2.7) remove deprecated "memory" value
1358 "INSTANCE_MEMORY": maxmem,
1359 "INSTANCE_VCPUS": vcpus,
1360 "INSTANCE_DISK_TEMPLATE": disk_template,
1361 "INSTANCE_HYPERVISOR": hypervisor_name,
1364 nic_count = len(nics)
1365 for idx, (ip, mac, mode, link) in enumerate(nics):
1368 env["INSTANCE_NIC%d_IP" % idx] = ip
1369 env["INSTANCE_NIC%d_MAC" % idx] = mac
1370 env["INSTANCE_NIC%d_MODE" % idx] = mode
1371 env["INSTANCE_NIC%d_LINK" % idx] = link
1372 if mode == constants.NIC_MODE_BRIDGED:
1373 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1377 env["INSTANCE_NIC_COUNT"] = nic_count
1380 disk_count = len(disks)
1381 for idx, (size, mode) in enumerate(disks):
1382 env["INSTANCE_DISK%d_SIZE" % idx] = size
1383 env["INSTANCE_DISK%d_MODE" % idx] = mode
1387 env["INSTANCE_DISK_COUNT"] = disk_count
1392 env["INSTANCE_TAGS"] = " ".join(tags)
1394 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1395 for key, value in source.items():
1396 env["INSTANCE_%s_%s" % (kind, key)] = value
1401 def _NICListToTuple(lu, nics):
1402 """Build a list of nic information tuples.
1404 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1405 value in LUInstanceQueryData.
1407 @type lu: L{LogicalUnit}
1408 @param lu: the logical unit on whose behalf we execute
1409 @type nics: list of L{objects.NIC}
1410 @param nics: list of nics to convert to hooks tuples
1414 cluster = lu.cfg.GetClusterInfo()
1418 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1419 mode = filled_params[constants.NIC_MODE]
1420 link = filled_params[constants.NIC_LINK]
1421 hooks_nics.append((ip, mac, mode, link))
1425 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1426 """Builds instance related env variables for hooks from an object.
1428 @type lu: L{LogicalUnit}
1429 @param lu: the logical unit on whose behalf we execute
1430 @type instance: L{objects.Instance}
1431 @param instance: the instance for which we should build the
1433 @type override: dict
1434 @param override: dictionary with key/values that will override
1437 @return: the hook environment dictionary
1440 cluster = lu.cfg.GetClusterInfo()
1441 bep = cluster.FillBE(instance)
1442 hvp = cluster.FillHV(instance)
1444 "name": instance.name,
1445 "primary_node": instance.primary_node,
1446 "secondary_nodes": instance.secondary_nodes,
1447 "os_type": instance.os,
1448 "status": instance.admin_state,
1449 "maxmem": bep[constants.BE_MAXMEM],
1450 "minmem": bep[constants.BE_MINMEM],
1451 "vcpus": bep[constants.BE_VCPUS],
1452 "nics": _NICListToTuple(lu, instance.nics),
1453 "disk_template": instance.disk_template,
1454 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1457 "hypervisor_name": instance.hypervisor,
1458 "tags": instance.tags,
1461 args.update(override)
1462 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1465 def _AdjustCandidatePool(lu, exceptions):
1466 """Adjust the candidate pool after node operations.
1469 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1471 lu.LogInfo("Promoted nodes to master candidate role: %s",
1472 utils.CommaJoin(node.name for node in mod_list))
1473 for name in mod_list:
1474 lu.context.ReaddNode(name)
1475 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1477 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1481 def _DecideSelfPromotion(lu, exceptions=None):
1482 """Decide whether I should promote myself as a master candidate.
1485 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1486 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1487 # the new node will increase mc_max with one, so:
1488 mc_should = min(mc_should + 1, cp_size)
1489 return mc_now < mc_should
1492 def _CalculateGroupIPolicy(cluster, group):
1493 """Calculate instance policy for group.
1496 return cluster.SimpleFillIPolicy(group.ipolicy)
1499 def _ComputeViolatingInstances(ipolicy, instances):
1500 """Computes a set of instances who violates given ipolicy.
1502 @param ipolicy: The ipolicy to verify
1503 @type instances: object.Instance
1504 @param instances: List of instances to verify
1505 @return: A frozenset of instance names violating the ipolicy
1508 return frozenset([inst.name for inst in instances
1509 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1512 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1513 """Check that the brigdes needed by a list of nics exist.
1516 cluster = lu.cfg.GetClusterInfo()
1517 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1518 brlist = [params[constants.NIC_LINK] for params in paramslist
1519 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1521 result = lu.rpc.call_bridges_exist(target_node, brlist)
1522 result.Raise("Error checking bridges on destination node '%s'" %
1523 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1526 def _CheckInstanceBridgesExist(lu, instance, node=None):
1527 """Check that the brigdes needed by an instance exist.
1531 node = instance.primary_node
1532 _CheckNicsBridgesExist(lu, instance.nics, node)
1535 def _CheckOSVariant(os_obj, name):
1536 """Check whether an OS name conforms to the os variants specification.
1538 @type os_obj: L{objects.OS}
1539 @param os_obj: OS object to check
1541 @param name: OS name passed by the user, to check for validity
1544 variant = objects.OS.GetVariant(name)
1545 if not os_obj.supported_variants:
1547 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1548 " passed)" % (os_obj.name, variant),
1552 raise errors.OpPrereqError("OS name must include a variant",
1555 if variant not in os_obj.supported_variants:
1556 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1559 def _GetNodeInstancesInner(cfg, fn):
1560 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1563 def _GetNodeInstances(cfg, node_name):
1564 """Returns a list of all primary and secondary instances on a node.
1568 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1571 def _GetNodePrimaryInstances(cfg, node_name):
1572 """Returns primary instances on a node.
1575 return _GetNodeInstancesInner(cfg,
1576 lambda inst: node_name == inst.primary_node)
1579 def _GetNodeSecondaryInstances(cfg, node_name):
1580 """Returns secondary instances on a node.
1583 return _GetNodeInstancesInner(cfg,
1584 lambda inst: node_name in inst.secondary_nodes)
1587 def _GetStorageTypeArgs(cfg, storage_type):
1588 """Returns the arguments for a storage type.
1591 # Special case for file storage
1592 if storage_type == constants.ST_FILE:
1593 # storage.FileStorage wants a list of storage directories
1594 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1599 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1602 for dev in instance.disks:
1603 cfg.SetDiskID(dev, node_name)
1605 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1606 result.Raise("Failed to get disk status from node %s" % node_name,
1607 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1609 for idx, bdev_status in enumerate(result.payload):
1610 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1616 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1617 """Check the sanity of iallocator and node arguments and use the
1618 cluster-wide iallocator if appropriate.
1620 Check that at most one of (iallocator, node) is specified. If none is
1621 specified, then the LU's opcode's iallocator slot is filled with the
1622 cluster-wide default iallocator.
1624 @type iallocator_slot: string
1625 @param iallocator_slot: the name of the opcode iallocator slot
1626 @type node_slot: string
1627 @param node_slot: the name of the opcode target node slot
1630 node = getattr(lu.op, node_slot, None)
1631 iallocator = getattr(lu.op, iallocator_slot, None)
1633 if node is not None and iallocator is not None:
1634 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1636 elif node is None and iallocator is None:
1637 default_iallocator = lu.cfg.GetDefaultIAllocator()
1638 if default_iallocator:
1639 setattr(lu.op, iallocator_slot, default_iallocator)
1641 raise errors.OpPrereqError("No iallocator or node given and no"
1642 " cluster-wide default iallocator found;"
1643 " please specify either an iallocator or a"
1644 " node, or set a cluster-wide default"
1648 def _GetDefaultIAllocator(cfg, iallocator):
1649 """Decides on which iallocator to use.
1651 @type cfg: L{config.ConfigWriter}
1652 @param cfg: Cluster configuration object
1653 @type iallocator: string or None
1654 @param iallocator: Iallocator specified in opcode
1656 @return: Iallocator name
1660 # Use default iallocator
1661 iallocator = cfg.GetDefaultIAllocator()
1664 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1665 " opcode nor as a cluster-wide default",
1671 class LUClusterPostInit(LogicalUnit):
1672 """Logical unit for running hooks after cluster initialization.
1675 HPATH = "cluster-init"
1676 HTYPE = constants.HTYPE_CLUSTER
1678 def BuildHooksEnv(self):
1683 "OP_TARGET": self.cfg.GetClusterName(),
1686 def BuildHooksNodes(self):
1687 """Build hooks nodes.
1690 return ([], [self.cfg.GetMasterNode()])
1692 def Exec(self, feedback_fn):
1699 class LUClusterDestroy(LogicalUnit):
1700 """Logical unit for destroying the cluster.
1703 HPATH = "cluster-destroy"
1704 HTYPE = constants.HTYPE_CLUSTER
1706 def BuildHooksEnv(self):
1711 "OP_TARGET": self.cfg.GetClusterName(),
1714 def BuildHooksNodes(self):
1715 """Build hooks nodes.
1720 def CheckPrereq(self):
1721 """Check prerequisites.
1723 This checks whether the cluster is empty.
1725 Any errors are signaled by raising errors.OpPrereqError.
1728 master = self.cfg.GetMasterNode()
1730 nodelist = self.cfg.GetNodeList()
1731 if len(nodelist) != 1 or nodelist[0] != master:
1732 raise errors.OpPrereqError("There are still %d node(s) in"
1733 " this cluster." % (len(nodelist) - 1),
1735 instancelist = self.cfg.GetInstanceList()
1737 raise errors.OpPrereqError("There are still %d instance(s) in"
1738 " this cluster." % len(instancelist),
1741 def Exec(self, feedback_fn):
1742 """Destroys the cluster.
1745 master_params = self.cfg.GetMasterNetworkParameters()
1747 # Run post hooks on master node before it's removed
1748 _RunPostHook(self, master_params.name)
1750 ems = self.cfg.GetUseExternalMipScript()
1751 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1754 self.LogWarning("Error disabling the master IP address: %s",
1757 return master_params.name
1760 def _VerifyCertificate(filename):
1761 """Verifies a certificate for L{LUClusterVerifyConfig}.
1763 @type filename: string
1764 @param filename: Path to PEM file
1768 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1769 utils.ReadFile(filename))
1770 except Exception, err: # pylint: disable=W0703
1771 return (LUClusterVerifyConfig.ETYPE_ERROR,
1772 "Failed to load X509 certificate %s: %s" % (filename, err))
1775 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1776 constants.SSL_CERT_EXPIRATION_ERROR)
1779 fnamemsg = "While verifying %s: %s" % (filename, msg)
1784 return (None, fnamemsg)
1785 elif errcode == utils.CERT_WARNING:
1786 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1787 elif errcode == utils.CERT_ERROR:
1788 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1790 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1793 def _GetAllHypervisorParameters(cluster, instances):
1794 """Compute the set of all hypervisor parameters.
1796 @type cluster: L{objects.Cluster}
1797 @param cluster: the cluster object
1798 @param instances: list of L{objects.Instance}
1799 @param instances: additional instances from which to obtain parameters
1800 @rtype: list of (origin, hypervisor, parameters)
1801 @return: a list with all parameters found, indicating the hypervisor they
1802 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1807 for hv_name in cluster.enabled_hypervisors:
1808 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1810 for os_name, os_hvp in cluster.os_hvp.items():
1811 for hv_name, hv_params in os_hvp.items():
1813 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1814 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1816 # TODO: collapse identical parameter values in a single one
1817 for instance in instances:
1818 if instance.hvparams:
1819 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1820 cluster.FillHV(instance)))
1825 class _VerifyErrors(object):
1826 """Mix-in for cluster/group verify LUs.
1828 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1829 self.op and self._feedback_fn to be available.)
1833 ETYPE_FIELD = "code"
1834 ETYPE_ERROR = "ERROR"
1835 ETYPE_WARNING = "WARNING"
1837 def _Error(self, ecode, item, msg, *args, **kwargs):
1838 """Format an error message.
1840 Based on the opcode's error_codes parameter, either format a
1841 parseable error code, or a simpler error string.
1843 This must be called only from Exec and functions called from Exec.
1846 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1847 itype, etxt, _ = ecode
1848 # first complete the msg
1851 # then format the whole message
1852 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1853 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1859 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1860 # and finally report it via the feedback_fn
1861 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1863 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1864 """Log an error message if the passed condition is True.
1868 or self.op.debug_simulate_errors) # pylint: disable=E1101
1870 # If the error code is in the list of ignored errors, demote the error to a
1872 (_, etxt, _) = ecode
1873 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1874 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1877 self._Error(ecode, *args, **kwargs)
1879 # do not mark the operation as failed for WARN cases only
1880 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1881 self.bad = self.bad or cond
1884 class LUClusterVerify(NoHooksLU):
1885 """Submits all jobs necessary to verify the cluster.
1890 def ExpandNames(self):
1891 self.needed_locks = {}
1893 def Exec(self, feedback_fn):
1896 if self.op.group_name:
1897 groups = [self.op.group_name]
1898 depends_fn = lambda: None
1900 groups = self.cfg.GetNodeGroupList()
1902 # Verify global configuration
1904 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1907 # Always depend on global verification
1908 depends_fn = lambda: [(-len(jobs), [])]
1910 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1911 ignore_errors=self.op.ignore_errors,
1912 depends=depends_fn())]
1913 for group in groups)
1915 # Fix up all parameters
1916 for op in itertools.chain(*jobs): # pylint: disable=W0142
1917 op.debug_simulate_errors = self.op.debug_simulate_errors
1918 op.verbose = self.op.verbose
1919 op.error_codes = self.op.error_codes
1921 op.skip_checks = self.op.skip_checks
1922 except AttributeError:
1923 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1925 return ResultWithJobs(jobs)
1928 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1929 """Verifies the cluster config.
1934 def _VerifyHVP(self, hvp_data):
1935 """Verifies locally the syntax of the hypervisor parameters.
1938 for item, hv_name, hv_params in hvp_data:
1939 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1942 hv_class = hypervisor.GetHypervisor(hv_name)
1943 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1944 hv_class.CheckParameterSyntax(hv_params)
1945 except errors.GenericError, err:
1946 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1948 def ExpandNames(self):
1949 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1950 self.share_locks = _ShareAll()
1952 def CheckPrereq(self):
1953 """Check prerequisites.
1956 # Retrieve all information
1957 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1958 self.all_node_info = self.cfg.GetAllNodesInfo()
1959 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1961 def Exec(self, feedback_fn):
1962 """Verify integrity of cluster, performing various test on nodes.
1966 self._feedback_fn = feedback_fn
1968 feedback_fn("* Verifying cluster config")
1970 for msg in self.cfg.VerifyConfig():
1971 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1973 feedback_fn("* Verifying cluster certificate files")
1975 for cert_filename in constants.ALL_CERT_FILES:
1976 (errcode, msg) = _VerifyCertificate(cert_filename)
1977 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1979 feedback_fn("* Verifying hypervisor parameters")
1981 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1982 self.all_inst_info.values()))
1984 feedback_fn("* Verifying all nodes belong to an existing group")
1986 # We do this verification here because, should this bogus circumstance
1987 # occur, it would never be caught by VerifyGroup, which only acts on
1988 # nodes/instances reachable from existing node groups.
1990 dangling_nodes = set(node.name for node in self.all_node_info.values()
1991 if node.group not in self.all_group_info)
1993 dangling_instances = {}
1994 no_node_instances = []
1996 for inst in self.all_inst_info.values():
1997 if inst.primary_node in dangling_nodes:
1998 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1999 elif inst.primary_node not in self.all_node_info:
2000 no_node_instances.append(inst.name)
2005 utils.CommaJoin(dangling_instances.get(node.name,
2007 for node in dangling_nodes]
2009 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2011 "the following nodes (and their instances) belong to a non"
2012 " existing group: %s", utils.CommaJoin(pretty_dangling))
2014 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2016 "the following instances have a non-existing primary-node:"
2017 " %s", utils.CommaJoin(no_node_instances))
2022 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2023 """Verifies the status of a node group.
2026 HPATH = "cluster-verify"
2027 HTYPE = constants.HTYPE_CLUSTER
2030 _HOOKS_INDENT_RE = re.compile("^", re.M)
2032 class NodeImage(object):
2033 """A class representing the logical and physical status of a node.
2036 @ivar name: the node name to which this object refers
2037 @ivar volumes: a structure as returned from
2038 L{ganeti.backend.GetVolumeList} (runtime)
2039 @ivar instances: a list of running instances (runtime)
2040 @ivar pinst: list of configured primary instances (config)
2041 @ivar sinst: list of configured secondary instances (config)
2042 @ivar sbp: dictionary of {primary-node: list of instances} for all
2043 instances for which this node is secondary (config)
2044 @ivar mfree: free memory, as reported by hypervisor (runtime)
2045 @ivar dfree: free disk, as reported by the node (runtime)
2046 @ivar offline: the offline status (config)
2047 @type rpc_fail: boolean
2048 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2049 not whether the individual keys were correct) (runtime)
2050 @type lvm_fail: boolean
2051 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2052 @type hyp_fail: boolean
2053 @ivar hyp_fail: whether the RPC call didn't return the instance list
2054 @type ghost: boolean
2055 @ivar ghost: whether this is a known node or not (config)
2056 @type os_fail: boolean
2057 @ivar os_fail: whether the RPC call didn't return valid OS data
2059 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2060 @type vm_capable: boolean
2061 @ivar vm_capable: whether the node can host instances
2064 def __init__(self, offline=False, name=None, vm_capable=True):
2073 self.offline = offline
2074 self.vm_capable = vm_capable
2075 self.rpc_fail = False
2076 self.lvm_fail = False
2077 self.hyp_fail = False
2079 self.os_fail = False
2082 def ExpandNames(self):
2083 # This raises errors.OpPrereqError on its own:
2084 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2086 # Get instances in node group; this is unsafe and needs verification later
2088 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2090 self.needed_locks = {
2091 locking.LEVEL_INSTANCE: inst_names,
2092 locking.LEVEL_NODEGROUP: [self.group_uuid],
2093 locking.LEVEL_NODE: [],
2096 self.share_locks = _ShareAll()
2098 def DeclareLocks(self, level):
2099 if level == locking.LEVEL_NODE:
2100 # Get members of node group; this is unsafe and needs verification later
2101 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2103 all_inst_info = self.cfg.GetAllInstancesInfo()
2105 # In Exec(), we warn about mirrored instances that have primary and
2106 # secondary living in separate node groups. To fully verify that
2107 # volumes for these instances are healthy, we will need to do an
2108 # extra call to their secondaries. We ensure here those nodes will
2110 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2111 # Important: access only the instances whose lock is owned
2112 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2113 nodes.update(all_inst_info[inst].secondary_nodes)
2115 self.needed_locks[locking.LEVEL_NODE] = nodes
2117 def CheckPrereq(self):
2118 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2119 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2121 group_nodes = set(self.group_info.members)
2123 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2126 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2128 unlocked_instances = \
2129 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2132 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2133 utils.CommaJoin(unlocked_nodes),
2136 if unlocked_instances:
2137 raise errors.OpPrereqError("Missing lock for instances: %s" %
2138 utils.CommaJoin(unlocked_instances),
2141 self.all_node_info = self.cfg.GetAllNodesInfo()
2142 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2144 self.my_node_names = utils.NiceSort(group_nodes)
2145 self.my_inst_names = utils.NiceSort(group_instances)
2147 self.my_node_info = dict((name, self.all_node_info[name])
2148 for name in self.my_node_names)
2150 self.my_inst_info = dict((name, self.all_inst_info[name])
2151 for name in self.my_inst_names)
2153 # We detect here the nodes that will need the extra RPC calls for verifying
2154 # split LV volumes; they should be locked.
2155 extra_lv_nodes = set()
2157 for inst in self.my_inst_info.values():
2158 if inst.disk_template in constants.DTS_INT_MIRROR:
2159 for nname in inst.all_nodes:
2160 if self.all_node_info[nname].group != self.group_uuid:
2161 extra_lv_nodes.add(nname)
2163 unlocked_lv_nodes = \
2164 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2166 if unlocked_lv_nodes:
2167 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2168 utils.CommaJoin(unlocked_lv_nodes),
2170 self.extra_lv_nodes = list(extra_lv_nodes)
2172 def _VerifyNode(self, ninfo, nresult):
2173 """Perform some basic validation on data returned from a node.
2175 - check the result data structure is well formed and has all the
2177 - check ganeti version
2179 @type ninfo: L{objects.Node}
2180 @param ninfo: the node to check
2181 @param nresult: the results from the node
2183 @return: whether overall this call was successful (and we can expect
2184 reasonable values in the respose)
2188 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2190 # main result, nresult should be a non-empty dict
2191 test = not nresult or not isinstance(nresult, dict)
2192 _ErrorIf(test, constants.CV_ENODERPC, node,
2193 "unable to verify node: no data returned")
2197 # compares ganeti version
2198 local_version = constants.PROTOCOL_VERSION
2199 remote_version = nresult.get("version", None)
2200 test = not (remote_version and
2201 isinstance(remote_version, (list, tuple)) and
2202 len(remote_version) == 2)
2203 _ErrorIf(test, constants.CV_ENODERPC, node,
2204 "connection to node returned invalid data")
2208 test = local_version != remote_version[0]
2209 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2210 "incompatible protocol versions: master %s,"
2211 " node %s", local_version, remote_version[0])
2215 # node seems compatible, we can actually try to look into its results
2217 # full package version
2218 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2219 constants.CV_ENODEVERSION, node,
2220 "software version mismatch: master %s, node %s",
2221 constants.RELEASE_VERSION, remote_version[1],
2222 code=self.ETYPE_WARNING)
2224 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2225 if ninfo.vm_capable and isinstance(hyp_result, dict):
2226 for hv_name, hv_result in hyp_result.iteritems():
2227 test = hv_result is not None
2228 _ErrorIf(test, constants.CV_ENODEHV, node,
2229 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2231 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2232 if ninfo.vm_capable and isinstance(hvp_result, list):
2233 for item, hv_name, hv_result in hvp_result:
2234 _ErrorIf(True, constants.CV_ENODEHV, node,
2235 "hypervisor %s parameter verify failure (source %s): %s",
2236 hv_name, item, hv_result)
2238 test = nresult.get(constants.NV_NODESETUP,
2239 ["Missing NODESETUP results"])
2240 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2245 def _VerifyNodeTime(self, ninfo, nresult,
2246 nvinfo_starttime, nvinfo_endtime):
2247 """Check the node time.
2249 @type ninfo: L{objects.Node}
2250 @param ninfo: the node to check
2251 @param nresult: the remote results for the node
2252 @param nvinfo_starttime: the start time of the RPC call
2253 @param nvinfo_endtime: the end time of the RPC call
2257 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2259 ntime = nresult.get(constants.NV_TIME, None)
2261 ntime_merged = utils.MergeTime(ntime)
2262 except (ValueError, TypeError):
2263 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2266 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2267 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2268 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2269 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2273 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2274 "Node time diverges by at least %s from master node time",
2277 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2278 """Check the node LVM results.
2280 @type ninfo: L{objects.Node}
2281 @param ninfo: the node to check
2282 @param nresult: the remote results for the node
2283 @param vg_name: the configured VG name
2290 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2292 # checks vg existence and size > 20G
2293 vglist = nresult.get(constants.NV_VGLIST, None)
2295 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2297 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2298 constants.MIN_VG_SIZE)
2299 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2302 pvlist = nresult.get(constants.NV_PVLIST, None)
2303 test = pvlist is None
2304 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2306 # check that ':' is not present in PV names, since it's a
2307 # special character for lvcreate (denotes the range of PEs to
2309 for _, pvname, owner_vg in pvlist:
2310 test = ":" in pvname
2311 _ErrorIf(test, constants.CV_ENODELVM, node,
2312 "Invalid character ':' in PV '%s' of VG '%s'",
2315 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2316 """Check the node bridges.
2318 @type ninfo: L{objects.Node}
2319 @param ninfo: the node to check
2320 @param nresult: the remote results for the node
2321 @param bridges: the expected list of bridges
2328 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2330 missing = nresult.get(constants.NV_BRIDGES, None)
2331 test = not isinstance(missing, list)
2332 _ErrorIf(test, constants.CV_ENODENET, node,
2333 "did not return valid bridge information")
2335 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2336 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2338 def _VerifyNodeUserScripts(self, ninfo, nresult):
2339 """Check the results of user scripts presence and executability on the node
2341 @type ninfo: L{objects.Node}
2342 @param ninfo: the node to check
2343 @param nresult: the remote results for the node
2348 test = not constants.NV_USERSCRIPTS in nresult
2349 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2350 "did not return user scripts information")
2352 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2354 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2355 "user scripts not present or not executable: %s" %
2356 utils.CommaJoin(sorted(broken_scripts)))
2358 def _VerifyNodeNetwork(self, ninfo, nresult):
2359 """Check the node network connectivity results.
2361 @type ninfo: L{objects.Node}
2362 @param ninfo: the node to check
2363 @param nresult: the remote results for the node
2367 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2369 test = constants.NV_NODELIST not in nresult
2370 _ErrorIf(test, constants.CV_ENODESSH, node,
2371 "node hasn't returned node ssh connectivity data")
2373 if nresult[constants.NV_NODELIST]:
2374 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2375 _ErrorIf(True, constants.CV_ENODESSH, node,
2376 "ssh communication with node '%s': %s", a_node, a_msg)
2378 test = constants.NV_NODENETTEST not in nresult
2379 _ErrorIf(test, constants.CV_ENODENET, node,
2380 "node hasn't returned node tcp connectivity data")
2382 if nresult[constants.NV_NODENETTEST]:
2383 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2385 _ErrorIf(True, constants.CV_ENODENET, node,
2386 "tcp communication with node '%s': %s",
2387 anode, nresult[constants.NV_NODENETTEST][anode])
2389 test = constants.NV_MASTERIP not in nresult
2390 _ErrorIf(test, constants.CV_ENODENET, node,
2391 "node hasn't returned node master IP reachability data")
2393 if not nresult[constants.NV_MASTERIP]:
2394 if node == self.master_node:
2395 msg = "the master node cannot reach the master IP (not configured?)"
2397 msg = "cannot reach the master IP"
2398 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2400 def _VerifyInstance(self, instance, instanceconfig, node_image,
2402 """Verify an instance.
2404 This function checks to see if the required block devices are
2405 available on the instance's node.
2408 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2409 node_current = instanceconfig.primary_node
2411 node_vol_should = {}
2412 instanceconfig.MapLVsByNode(node_vol_should)
2414 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2415 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2416 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2418 for node in node_vol_should:
2419 n_img = node_image[node]
2420 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2421 # ignore missing volumes on offline or broken nodes
2423 for volume in node_vol_should[node]:
2424 test = volume not in n_img.volumes
2425 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2426 "volume %s missing on node %s", volume, node)
2428 if instanceconfig.admin_state == constants.ADMINST_UP:
2429 pri_img = node_image[node_current]
2430 test = instance not in pri_img.instances and not pri_img.offline
2431 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2432 "instance not running on its primary node %s",
2435 diskdata = [(nname, success, status, idx)
2436 for (nname, disks) in diskstatus.items()
2437 for idx, (success, status) in enumerate(disks)]
2439 for nname, success, bdev_status, idx in diskdata:
2440 # the 'ghost node' construction in Exec() ensures that we have a
2442 snode = node_image[nname]
2443 bad_snode = snode.ghost or snode.offline
2444 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2445 not success and not bad_snode,
2446 constants.CV_EINSTANCEFAULTYDISK, instance,
2447 "couldn't retrieve status for disk/%s on %s: %s",
2448 idx, nname, bdev_status)
2449 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2450 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2451 constants.CV_EINSTANCEFAULTYDISK, instance,
2452 "disk/%s on %s is faulty", idx, nname)
2454 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2455 """Verify if there are any unknown volumes in the cluster.
2457 The .os, .swap and backup volumes are ignored. All other volumes are
2458 reported as unknown.
2460 @type reserved: L{ganeti.utils.FieldSet}
2461 @param reserved: a FieldSet of reserved volume names
2464 for node, n_img in node_image.items():
2465 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2466 self.all_node_info[node].group != self.group_uuid):
2467 # skip non-healthy nodes
2469 for volume in n_img.volumes:
2470 test = ((node not in node_vol_should or
2471 volume not in node_vol_should[node]) and
2472 not reserved.Matches(volume))
2473 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2474 "volume %s is unknown", volume)
2476 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2477 """Verify N+1 Memory Resilience.
2479 Check that if one single node dies we can still start all the
2480 instances it was primary for.
2483 cluster_info = self.cfg.GetClusterInfo()
2484 for node, n_img in node_image.items():
2485 # This code checks that every node which is now listed as
2486 # secondary has enough memory to host all instances it is
2487 # supposed to should a single other node in the cluster fail.
2488 # FIXME: not ready for failover to an arbitrary node
2489 # FIXME: does not support file-backed instances
2490 # WARNING: we currently take into account down instances as well
2491 # as up ones, considering that even if they're down someone
2492 # might want to start them even in the event of a node failure.
2493 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2494 # we're skipping nodes marked offline and nodes in other groups from
2495 # the N+1 warning, since most likely we don't have good memory
2496 # infromation from them; we already list instances living on such
2497 # nodes, and that's enough warning
2499 #TODO(dynmem): also consider ballooning out other instances
2500 for prinode, instances in n_img.sbp.items():
2502 for instance in instances:
2503 bep = cluster_info.FillBE(instance_cfg[instance])
2504 if bep[constants.BE_AUTO_BALANCE]:
2505 needed_mem += bep[constants.BE_MINMEM]
2506 test = n_img.mfree < needed_mem
2507 self._ErrorIf(test, constants.CV_ENODEN1, node,
2508 "not enough memory to accomodate instance failovers"
2509 " should node %s fail (%dMiB needed, %dMiB available)",
2510 prinode, needed_mem, n_img.mfree)
2513 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2514 (files_all, files_opt, files_mc, files_vm)):
2515 """Verifies file checksums collected from all nodes.
2517 @param errorif: Callback for reporting errors
2518 @param nodeinfo: List of L{objects.Node} objects
2519 @param master_node: Name of master node
2520 @param all_nvinfo: RPC results
2523 # Define functions determining which nodes to consider for a file
2526 (files_mc, lambda node: (node.master_candidate or
2527 node.name == master_node)),
2528 (files_vm, lambda node: node.vm_capable),
2531 # Build mapping from filename to list of nodes which should have the file
2533 for (files, fn) in files2nodefn:
2535 filenodes = nodeinfo
2537 filenodes = filter(fn, nodeinfo)
2538 nodefiles.update((filename,
2539 frozenset(map(operator.attrgetter("name"), filenodes)))
2540 for filename in files)
2542 assert set(nodefiles) == (files_all | files_mc | files_vm)
2544 fileinfo = dict((filename, {}) for filename in nodefiles)
2545 ignore_nodes = set()
2547 for node in nodeinfo:
2549 ignore_nodes.add(node.name)
2552 nresult = all_nvinfo[node.name]
2554 if nresult.fail_msg or not nresult.payload:
2557 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2559 test = not (node_files and isinstance(node_files, dict))
2560 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2561 "Node did not return file checksum data")
2563 ignore_nodes.add(node.name)
2566 # Build per-checksum mapping from filename to nodes having it
2567 for (filename, checksum) in node_files.items():
2568 assert filename in nodefiles
2569 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2571 for (filename, checksums) in fileinfo.items():
2572 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2574 # Nodes having the file
2575 with_file = frozenset(node_name
2576 for nodes in fileinfo[filename].values()
2577 for node_name in nodes) - ignore_nodes
2579 expected_nodes = nodefiles[filename] - ignore_nodes
2581 # Nodes missing file
2582 missing_file = expected_nodes - with_file
2584 if filename in files_opt:
2586 errorif(missing_file and missing_file != expected_nodes,
2587 constants.CV_ECLUSTERFILECHECK, None,
2588 "File %s is optional, but it must exist on all or no"
2589 " nodes (not found on %s)",
2590 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2592 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2593 "File %s is missing from node(s) %s", filename,
2594 utils.CommaJoin(utils.NiceSort(missing_file)))
2596 # Warn if a node has a file it shouldn't
2597 unexpected = with_file - expected_nodes
2599 constants.CV_ECLUSTERFILECHECK, None,
2600 "File %s should not exist on node(s) %s",
2601 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2603 # See if there are multiple versions of the file
2604 test = len(checksums) > 1
2606 variants = ["variant %s on %s" %
2607 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2608 for (idx, (checksum, nodes)) in
2609 enumerate(sorted(checksums.items()))]
2613 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2614 "File %s found with %s different checksums (%s)",
2615 filename, len(checksums), "; ".join(variants))
2617 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2619 """Verifies and the node DRBD status.
2621 @type ninfo: L{objects.Node}
2622 @param ninfo: the node to check
2623 @param nresult: the remote results for the node
2624 @param instanceinfo: the dict of instances
2625 @param drbd_helper: the configured DRBD usermode helper
2626 @param drbd_map: the DRBD map as returned by
2627 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2631 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2634 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2635 test = (helper_result == None)
2636 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2637 "no drbd usermode helper returned")
2639 status, payload = helper_result
2641 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2642 "drbd usermode helper check unsuccessful: %s", payload)
2643 test = status and (payload != drbd_helper)
2644 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2645 "wrong drbd usermode helper: %s", payload)
2647 # compute the DRBD minors
2649 for minor, instance in drbd_map[node].items():
2650 test = instance not in instanceinfo
2651 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2652 "ghost instance '%s' in temporary DRBD map", instance)
2653 # ghost instance should not be running, but otherwise we
2654 # don't give double warnings (both ghost instance and
2655 # unallocated minor in use)
2657 node_drbd[minor] = (instance, False)
2659 instance = instanceinfo[instance]
2660 node_drbd[minor] = (instance.name,
2661 instance.admin_state == constants.ADMINST_UP)
2663 # and now check them
2664 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2665 test = not isinstance(used_minors, (tuple, list))
2666 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2667 "cannot parse drbd status file: %s", str(used_minors))
2669 # we cannot check drbd status
2672 for minor, (iname, must_exist) in node_drbd.items():
2673 test = minor not in used_minors and must_exist
2674 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2675 "drbd minor %d of instance %s is not active", minor, iname)
2676 for minor in used_minors:
2677 test = minor not in node_drbd
2678 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2679 "unallocated drbd minor %d is in use", minor)
2681 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2682 """Builds the node OS structures.
2684 @type ninfo: L{objects.Node}
2685 @param ninfo: the node to check
2686 @param nresult: the remote results for the node
2687 @param nimg: the node image object
2691 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2693 remote_os = nresult.get(constants.NV_OSLIST, None)
2694 test = (not isinstance(remote_os, list) or
2695 not compat.all(isinstance(v, list) and len(v) == 7
2696 for v in remote_os))
2698 _ErrorIf(test, constants.CV_ENODEOS, node,
2699 "node hasn't returned valid OS data")
2708 for (name, os_path, status, diagnose,
2709 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2711 if name not in os_dict:
2714 # parameters is a list of lists instead of list of tuples due to
2715 # JSON lacking a real tuple type, fix it:
2716 parameters = [tuple(v) for v in parameters]
2717 os_dict[name].append((os_path, status, diagnose,
2718 set(variants), set(parameters), set(api_ver)))
2720 nimg.oslist = os_dict
2722 def _VerifyNodeOS(self, ninfo, nimg, base):
2723 """Verifies the node OS list.
2725 @type ninfo: L{objects.Node}
2726 @param ninfo: the node to check
2727 @param nimg: the node image object
2728 @param base: the 'template' node we match against (e.g. from the master)
2732 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2734 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2736 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2737 for os_name, os_data in nimg.oslist.items():
2738 assert os_data, "Empty OS status for OS %s?!" % os_name
2739 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2740 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2741 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2742 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2743 "OS '%s' has multiple entries (first one shadows the rest): %s",
2744 os_name, utils.CommaJoin([v[0] for v in os_data]))
2745 # comparisons with the 'base' image
2746 test = os_name not in base.oslist
2747 _ErrorIf(test, constants.CV_ENODEOS, node,
2748 "Extra OS %s not present on reference node (%s)",
2752 assert base.oslist[os_name], "Base node has empty OS status?"
2753 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2755 # base OS is invalid, skipping
2757 for kind, a, b in [("API version", f_api, b_api),
2758 ("variants list", f_var, b_var),
2759 ("parameters", beautify_params(f_param),
2760 beautify_params(b_param))]:
2761 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2762 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2763 kind, os_name, base.name,
2764 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2766 # check any missing OSes
2767 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2768 _ErrorIf(missing, constants.CV_ENODEOS, node,
2769 "OSes present on reference node %s but missing on this node: %s",
2770 base.name, utils.CommaJoin(missing))
2772 def _VerifyOob(self, ninfo, nresult):
2773 """Verifies out of band functionality of a node.
2775 @type ninfo: L{objects.Node}
2776 @param ninfo: the node to check
2777 @param nresult: the remote results for the node
2781 # We just have to verify the paths on master and/or master candidates
2782 # as the oob helper is invoked on the master
2783 if ((ninfo.master_candidate or ninfo.master_capable) and
2784 constants.NV_OOB_PATHS in nresult):
2785 for path_result in nresult[constants.NV_OOB_PATHS]:
2786 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2788 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2789 """Verifies and updates the node volume data.
2791 This function will update a L{NodeImage}'s internal structures
2792 with data from the remote call.
2794 @type ninfo: L{objects.Node}
2795 @param ninfo: the node to check
2796 @param nresult: the remote results for the node
2797 @param nimg: the node image object
2798 @param vg_name: the configured VG name
2802 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2804 nimg.lvm_fail = True
2805 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2808 elif isinstance(lvdata, basestring):
2809 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2810 utils.SafeEncode(lvdata))
2811 elif not isinstance(lvdata, dict):
2812 _ErrorIf(True, constants.CV_ENODELVM, node,
2813 "rpc call to node failed (lvlist)")
2815 nimg.volumes = lvdata
2816 nimg.lvm_fail = False
2818 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2819 """Verifies and updates the node instance list.
2821 If the listing was successful, then updates this node's instance
2822 list. Otherwise, it marks the RPC call as failed for the instance
2825 @type ninfo: L{objects.Node}
2826 @param ninfo: the node to check
2827 @param nresult: the remote results for the node
2828 @param nimg: the node image object
2831 idata = nresult.get(constants.NV_INSTANCELIST, None)
2832 test = not isinstance(idata, list)
2833 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2834 "rpc call to node failed (instancelist): %s",
2835 utils.SafeEncode(str(idata)))
2837 nimg.hyp_fail = True
2839 nimg.instances = idata
2841 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2842 """Verifies and computes a node information map
2844 @type ninfo: L{objects.Node}
2845 @param ninfo: the node to check
2846 @param nresult: the remote results for the node
2847 @param nimg: the node image object
2848 @param vg_name: the configured VG name
2852 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2854 # try to read free memory (from the hypervisor)
2855 hv_info = nresult.get(constants.NV_HVINFO, None)
2856 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2857 _ErrorIf(test, constants.CV_ENODEHV, node,
2858 "rpc call to node failed (hvinfo)")
2861 nimg.mfree = int(hv_info["memory_free"])
2862 except (ValueError, TypeError):
2863 _ErrorIf(True, constants.CV_ENODERPC, node,
2864 "node returned invalid nodeinfo, check hypervisor")
2866 # FIXME: devise a free space model for file based instances as well
2867 if vg_name is not None:
2868 test = (constants.NV_VGLIST not in nresult or
2869 vg_name not in nresult[constants.NV_VGLIST])
2870 _ErrorIf(test, constants.CV_ENODELVM, node,
2871 "node didn't return data for the volume group '%s'"
2872 " - it is either missing or broken", vg_name)
2875 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2876 except (ValueError, TypeError):
2877 _ErrorIf(True, constants.CV_ENODERPC, node,
2878 "node returned invalid LVM info, check LVM status")
2880 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2881 """Gets per-disk status information for all instances.
2883 @type nodelist: list of strings
2884 @param nodelist: Node names
2885 @type node_image: dict of (name, L{objects.Node})
2886 @param node_image: Node objects
2887 @type instanceinfo: dict of (name, L{objects.Instance})
2888 @param instanceinfo: Instance objects
2889 @rtype: {instance: {node: [(succes, payload)]}}
2890 @return: a dictionary of per-instance dictionaries with nodes as
2891 keys and disk information as values; the disk information is a
2892 list of tuples (success, payload)
2895 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2898 node_disks_devonly = {}
2899 diskless_instances = set()
2900 diskless = constants.DT_DISKLESS
2902 for nname in nodelist:
2903 node_instances = list(itertools.chain(node_image[nname].pinst,
2904 node_image[nname].sinst))
2905 diskless_instances.update(inst for inst in node_instances
2906 if instanceinfo[inst].disk_template == diskless)
2907 disks = [(inst, disk)
2908 for inst in node_instances
2909 for disk in instanceinfo[inst].disks]
2912 # No need to collect data
2915 node_disks[nname] = disks
2917 # Creating copies as SetDiskID below will modify the objects and that can
2918 # lead to incorrect data returned from nodes
2919 devonly = [dev.Copy() for (_, dev) in disks]
2922 self.cfg.SetDiskID(dev, nname)
2924 node_disks_devonly[nname] = devonly
2926 assert len(node_disks) == len(node_disks_devonly)
2928 # Collect data from all nodes with disks
2929 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2932 assert len(result) == len(node_disks)
2936 for (nname, nres) in result.items():
2937 disks = node_disks[nname]
2940 # No data from this node
2941 data = len(disks) * [(False, "node offline")]
2944 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2945 "while getting disk information: %s", msg)
2947 # No data from this node
2948 data = len(disks) * [(False, msg)]
2951 for idx, i in enumerate(nres.payload):
2952 if isinstance(i, (tuple, list)) and len(i) == 2:
2955 logging.warning("Invalid result from node %s, entry %d: %s",
2957 data.append((False, "Invalid result from the remote node"))
2959 for ((inst, _), status) in zip(disks, data):
2960 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2962 # Add empty entries for diskless instances.
2963 for inst in diskless_instances:
2964 assert inst not in instdisk
2967 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2968 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2969 compat.all(isinstance(s, (tuple, list)) and
2970 len(s) == 2 for s in statuses)
2971 for inst, nnames in instdisk.items()
2972 for nname, statuses in nnames.items())
2973 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2978 def _SshNodeSelector(group_uuid, all_nodes):
2979 """Create endless iterators for all potential SSH check hosts.
2982 nodes = [node for node in all_nodes
2983 if (node.group != group_uuid and
2985 keyfunc = operator.attrgetter("group")
2987 return map(itertools.cycle,
2988 [sorted(map(operator.attrgetter("name"), names))
2989 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2993 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2994 """Choose which nodes should talk to which other nodes.
2996 We will make nodes contact all nodes in their group, and one node from
2999 @warning: This algorithm has a known issue if one node group is much
3000 smaller than others (e.g. just one node). In such a case all other
3001 nodes will talk to the single node.
3004 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3005 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3007 return (online_nodes,
3008 dict((name, sorted([i.next() for i in sel]))
3009 for name in online_nodes))
3011 def BuildHooksEnv(self):
3014 Cluster-Verify hooks just ran in the post phase and their failure makes
3015 the output be logged in the verify output and the verification to fail.
3019 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3022 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3023 for node in self.my_node_info.values())
3027 def BuildHooksNodes(self):
3028 """Build hooks nodes.
3031 return ([], self.my_node_names)
3033 def Exec(self, feedback_fn):
3034 """Verify integrity of the node group, performing various test on nodes.
3037 # This method has too many local variables. pylint: disable=R0914
3038 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3040 if not self.my_node_names:
3042 feedback_fn("* Empty node group, skipping verification")
3046 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3047 verbose = self.op.verbose
3048 self._feedback_fn = feedback_fn
3050 vg_name = self.cfg.GetVGName()
3051 drbd_helper = self.cfg.GetDRBDHelper()
3052 cluster = self.cfg.GetClusterInfo()
3053 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3054 hypervisors = cluster.enabled_hypervisors
3055 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3057 i_non_redundant = [] # Non redundant instances
3058 i_non_a_balanced = [] # Non auto-balanced instances
3059 i_offline = 0 # Count of offline instances
3060 n_offline = 0 # Count of offline nodes
3061 n_drained = 0 # Count of nodes being drained
3062 node_vol_should = {}
3064 # FIXME: verify OS list
3067 filemap = _ComputeAncillaryFiles(cluster, False)
3069 # do local checksums
3070 master_node = self.master_node = self.cfg.GetMasterNode()
3071 master_ip = self.cfg.GetMasterIP()
3073 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3076 if self.cfg.GetUseExternalMipScript():
3077 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3079 node_verify_param = {
3080 constants.NV_FILELIST:
3081 utils.UniqueSequence(filename
3082 for files in filemap
3083 for filename in files),
3084 constants.NV_NODELIST:
3085 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3086 self.all_node_info.values()),
3087 constants.NV_HYPERVISOR: hypervisors,
3088 constants.NV_HVPARAMS:
3089 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3090 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3091 for node in node_data_list
3092 if not node.offline],
3093 constants.NV_INSTANCELIST: hypervisors,
3094 constants.NV_VERSION: None,
3095 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3096 constants.NV_NODESETUP: None,
3097 constants.NV_TIME: None,
3098 constants.NV_MASTERIP: (master_node, master_ip),
3099 constants.NV_OSLIST: None,
3100 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3101 constants.NV_USERSCRIPTS: user_scripts,
3104 if vg_name is not None:
3105 node_verify_param[constants.NV_VGLIST] = None
3106 node_verify_param[constants.NV_LVLIST] = vg_name
3107 node_verify_param[constants.NV_PVLIST] = [vg_name]
3108 node_verify_param[constants.NV_DRBDLIST] = None
3111 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3114 # FIXME: this needs to be changed per node-group, not cluster-wide
3116 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3117 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3118 bridges.add(default_nicpp[constants.NIC_LINK])
3119 for instance in self.my_inst_info.values():
3120 for nic in instance.nics:
3121 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3122 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3123 bridges.add(full_nic[constants.NIC_LINK])
3126 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3128 # Build our expected cluster state
3129 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3131 vm_capable=node.vm_capable))
3132 for node in node_data_list)
3136 for node in self.all_node_info.values():
3137 path = _SupportsOob(self.cfg, node)
3138 if path and path not in oob_paths:
3139 oob_paths.append(path)
3142 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3144 for instance in self.my_inst_names:
3145 inst_config = self.my_inst_info[instance]
3147 for nname in inst_config.all_nodes:
3148 if nname not in node_image:
3149 gnode = self.NodeImage(name=nname)
3150 gnode.ghost = (nname not in self.all_node_info)
3151 node_image[nname] = gnode
3153 inst_config.MapLVsByNode(node_vol_should)
3155 pnode = inst_config.primary_node
3156 node_image[pnode].pinst.append(instance)
3158 for snode in inst_config.secondary_nodes:
3159 nimg = node_image[snode]
3160 nimg.sinst.append(instance)
3161 if pnode not in nimg.sbp:
3162 nimg.sbp[pnode] = []
3163 nimg.sbp[pnode].append(instance)
3165 # At this point, we have the in-memory data structures complete,
3166 # except for the runtime information, which we'll gather next
3168 # Due to the way our RPC system works, exact response times cannot be
3169 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3170 # time before and after executing the request, we can at least have a time
3172 nvinfo_starttime = time.time()
3173 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3175 self.cfg.GetClusterName())
3176 nvinfo_endtime = time.time()
3178 if self.extra_lv_nodes and vg_name is not None:
3180 self.rpc.call_node_verify(self.extra_lv_nodes,
3181 {constants.NV_LVLIST: vg_name},
3182 self.cfg.GetClusterName())
3184 extra_lv_nvinfo = {}
3186 all_drbd_map = self.cfg.ComputeDRBDMap()
3188 feedback_fn("* Gathering disk information (%s nodes)" %
3189 len(self.my_node_names))
3190 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3193 feedback_fn("* Verifying configuration file consistency")
3195 # If not all nodes are being checked, we need to make sure the master node
3196 # and a non-checked vm_capable node are in the list.
3197 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3199 vf_nvinfo = all_nvinfo.copy()
3200 vf_node_info = list(self.my_node_info.values())
3201 additional_nodes = []
3202 if master_node not in self.my_node_info:
3203 additional_nodes.append(master_node)
3204 vf_node_info.append(self.all_node_info[master_node])
3205 # Add the first vm_capable node we find which is not included
3206 for node in absent_nodes:
3207 nodeinfo = self.all_node_info[node]
3208 if nodeinfo.vm_capable and not nodeinfo.offline:
3209 additional_nodes.append(node)
3210 vf_node_info.append(self.all_node_info[node])
3212 key = constants.NV_FILELIST
3213 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3214 {key: node_verify_param[key]},
3215 self.cfg.GetClusterName()))
3217 vf_nvinfo = all_nvinfo
3218 vf_node_info = self.my_node_info.values()
3220 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3222 feedback_fn("* Verifying node status")
3226 for node_i in node_data_list:
3228 nimg = node_image[node]
3232 feedback_fn("* Skipping offline node %s" % (node,))
3236 if node == master_node:
3238 elif node_i.master_candidate:
3239 ntype = "master candidate"
3240 elif node_i.drained:
3246 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3248 msg = all_nvinfo[node].fail_msg
3249 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3252 nimg.rpc_fail = True
3255 nresult = all_nvinfo[node].payload
3257 nimg.call_ok = self._VerifyNode(node_i, nresult)
3258 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3259 self._VerifyNodeNetwork(node_i, nresult)
3260 self._VerifyNodeUserScripts(node_i, nresult)
3261 self._VerifyOob(node_i, nresult)
3264 self._VerifyNodeLVM(node_i, nresult, vg_name)
3265 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3268 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3269 self._UpdateNodeInstances(node_i, nresult, nimg)
3270 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3271 self._UpdateNodeOS(node_i, nresult, nimg)
3273 if not nimg.os_fail:
3274 if refos_img is None:
3276 self._VerifyNodeOS(node_i, nimg, refos_img)
3277 self._VerifyNodeBridges(node_i, nresult, bridges)
3279 # Check whether all running instancies are primary for the node. (This
3280 # can no longer be done from _VerifyInstance below, since some of the
3281 # wrong instances could be from other node groups.)
3282 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3284 for inst in non_primary_inst:
3285 # FIXME: investigate best way to handle offline insts
3286 if inst.admin_state == constants.ADMINST_OFFLINE:
3288 feedback_fn("* Skipping offline instance %s" % inst.name)
3291 test = inst in self.all_inst_info
3292 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3293 "instance should not run on node %s", node_i.name)
3294 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3295 "node is running unknown instance %s", inst)
3297 for node, result in extra_lv_nvinfo.items():
3298 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3299 node_image[node], vg_name)
3301 feedback_fn("* Verifying instance status")
3302 for instance in self.my_inst_names:
3304 feedback_fn("* Verifying instance %s" % instance)
3305 inst_config = self.my_inst_info[instance]
3306 self._VerifyInstance(instance, inst_config, node_image,
3308 inst_nodes_offline = []
3310 pnode = inst_config.primary_node
3311 pnode_img = node_image[pnode]
3312 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3313 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3314 " primary node failed", instance)
3316 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3318 constants.CV_EINSTANCEBADNODE, instance,
3319 "instance is marked as running and lives on offline node %s",
3320 inst_config.primary_node)
3322 # If the instance is non-redundant we cannot survive losing its primary
3323 # node, so we are not N+1 compliant. On the other hand we have no disk
3324 # templates with more than one secondary so that situation is not well
3326 # FIXME: does not support file-backed instances
3327 if not inst_config.secondary_nodes:
3328 i_non_redundant.append(instance)
3330 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3331 constants.CV_EINSTANCELAYOUT,
3332 instance, "instance has multiple secondary nodes: %s",
3333 utils.CommaJoin(inst_config.secondary_nodes),
3334 code=self.ETYPE_WARNING)
3336 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3337 pnode = inst_config.primary_node
3338 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3339 instance_groups = {}
3341 for node in instance_nodes:
3342 instance_groups.setdefault(self.all_node_info[node].group,
3346 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3347 # Sort so that we always list the primary node first.
3348 for group, nodes in sorted(instance_groups.items(),
3349 key=lambda (_, nodes): pnode in nodes,
3352 self._ErrorIf(len(instance_groups) > 1,
3353 constants.CV_EINSTANCESPLITGROUPS,
3354 instance, "instance has primary and secondary nodes in"
3355 " different groups: %s", utils.CommaJoin(pretty_list),
3356 code=self.ETYPE_WARNING)
3358 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3359 i_non_a_balanced.append(instance)
3361 for snode in inst_config.secondary_nodes:
3362 s_img = node_image[snode]
3363 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3364 snode, "instance %s, connection to secondary node failed",
3368 inst_nodes_offline.append(snode)
3370 # warn that the instance lives on offline nodes
3371 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3372 "instance has offline secondary node(s) %s",
3373 utils.CommaJoin(inst_nodes_offline))
3374 # ... or ghost/non-vm_capable nodes
3375 for node in inst_config.all_nodes:
3376 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3377 instance, "instance lives on ghost node %s", node)
3378 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3379 instance, "instance lives on non-vm_capable node %s", node)
3381 feedback_fn("* Verifying orphan volumes")
3382 reserved = utils.FieldSet(*cluster.reserved_lvs)
3384 # We will get spurious "unknown volume" warnings if any node of this group
3385 # is secondary for an instance whose primary is in another group. To avoid
3386 # them, we find these instances and add their volumes to node_vol_should.
3387 for inst in self.all_inst_info.values():
3388 for secondary in inst.secondary_nodes:
3389 if (secondary in self.my_node_info
3390 and inst.name not in self.my_inst_info):
3391 inst.MapLVsByNode(node_vol_should)
3394 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3396 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3397 feedback_fn("* Verifying N+1 Memory redundancy")
3398 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3400 feedback_fn("* Other Notes")
3402 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3403 % len(i_non_redundant))
3405 if i_non_a_balanced:
3406 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3407 % len(i_non_a_balanced))
3410 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3413 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3416 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3420 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3421 """Analyze the post-hooks' result
3423 This method analyses the hook result, handles it, and sends some
3424 nicely-formatted feedback back to the user.
3426 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3427 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3428 @param hooks_results: the results of the multi-node hooks rpc call
3429 @param feedback_fn: function used send feedback back to the caller
3430 @param lu_result: previous Exec result
3431 @return: the new Exec result, based on the previous result
3435 # We only really run POST phase hooks, only for non-empty groups,
3436 # and are only interested in their results
3437 if not self.my_node_names:
3440 elif phase == constants.HOOKS_PHASE_POST:
3441 # Used to change hooks' output to proper indentation
3442 feedback_fn("* Hooks Results")
3443 assert hooks_results, "invalid result from hooks"
3445 for node_name in hooks_results:
3446 res = hooks_results[node_name]
3448 test = msg and not res.offline
3449 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3450 "Communication failure in hooks execution: %s", msg)
3451 if res.offline or msg:
3452 # No need to investigate payload if node is offline or gave
3455 for script, hkr, output in res.payload:
3456 test = hkr == constants.HKR_FAIL
3457 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3458 "Script %s failed, output:", script)
3460 output = self._HOOKS_INDENT_RE.sub(" ", output)
3461 feedback_fn("%s" % output)
3467 class LUClusterVerifyDisks(NoHooksLU):
3468 """Verifies the cluster disks status.
3473 def ExpandNames(self):
3474 self.share_locks = _ShareAll()
3475 self.needed_locks = {
3476 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3479 def Exec(self, feedback_fn):
3480 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3482 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3483 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3484 for group in group_names])
3487 class LUGroupVerifyDisks(NoHooksLU):
3488 """Verifies the status of all disks in a node group.
3493 def ExpandNames(self):
3494 # Raises errors.OpPrereqError on its own if group can't be found
3495 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3497 self.share_locks = _ShareAll()
3498 self.needed_locks = {
3499 locking.LEVEL_INSTANCE: [],
3500 locking.LEVEL_NODEGROUP: [],
3501 locking.LEVEL_NODE: [],
3504 def DeclareLocks(self, level):
3505 if level == locking.LEVEL_INSTANCE:
3506 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3508 # Lock instances optimistically, needs verification once node and group
3509 # locks have been acquired
3510 self.needed_locks[locking.LEVEL_INSTANCE] = \
3511 self.cfg.GetNodeGroupInstances(self.group_uuid)
3513 elif level == locking.LEVEL_NODEGROUP:
3514 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3516 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3517 set([self.group_uuid] +
3518 # Lock all groups used by instances optimistically; this requires
3519 # going via the node before it's locked, requiring verification
3522 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3523 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3525 elif level == locking.LEVEL_NODE:
3526 # This will only lock the nodes in the group to be verified which contain
3528 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3529 self._LockInstancesNodes()
3531 # Lock all nodes in group to be verified
3532 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3533 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3534 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3536 def CheckPrereq(self):
3537 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3538 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3539 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3541 assert self.group_uuid in owned_groups
3543 # Check if locked instances are still correct
3544 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3546 # Get instance information
3547 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3549 # Check if node groups for locked instances are still correct
3550 _CheckInstancesNodeGroups(self.cfg, self.instances,
3551 owned_groups, owned_nodes, self.group_uuid)
3553 def Exec(self, feedback_fn):
3554 """Verify integrity of cluster disks.
3556 @rtype: tuple of three items
3557 @return: a tuple of (dict of node-to-node_error, list of instances
3558 which need activate-disks, dict of instance: (node, volume) for
3563 res_instances = set()
3566 nv_dict = _MapInstanceDisksToNodes([inst
3567 for inst in self.instances.values()
3568 if inst.admin_state == constants.ADMINST_UP])
3571 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3572 set(self.cfg.GetVmCapableNodeList()))
3574 node_lvs = self.rpc.call_lv_list(nodes, [])
3576 for (node, node_res) in node_lvs.items():
3577 if node_res.offline:
3580 msg = node_res.fail_msg
3582 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3583 res_nodes[node] = msg
3586 for lv_name, (_, _, lv_online) in node_res.payload.items():
3587 inst = nv_dict.pop((node, lv_name), None)
3588 if not (lv_online or inst is None):
3589 res_instances.add(inst)
3591 # any leftover items in nv_dict are missing LVs, let's arrange the data
3593 for key, inst in nv_dict.iteritems():
3594 res_missing.setdefault(inst, []).append(list(key))
3596 return (res_nodes, list(res_instances), res_missing)
3599 class LUClusterRepairDiskSizes(NoHooksLU):
3600 """Verifies the cluster disks sizes.
3605 def ExpandNames(self):
3606 if self.op.instances:
3607 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3608 self.needed_locks = {
3609 locking.LEVEL_NODE_RES: [],
3610 locking.LEVEL_INSTANCE: self.wanted_names,
3612 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3614 self.wanted_names = None
3615 self.needed_locks = {
3616 locking.LEVEL_NODE_RES: locking.ALL_SET,
3617 locking.LEVEL_INSTANCE: locking.ALL_SET,
3619 self.share_locks = {
3620 locking.LEVEL_NODE_RES: 1,
3621 locking.LEVEL_INSTANCE: 0,
3624 def DeclareLocks(self, level):
3625 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3626 self._LockInstancesNodes(primary_only=True, level=level)
3628 def CheckPrereq(self):
3629 """Check prerequisites.
3631 This only checks the optional instance list against the existing names.
3634 if self.wanted_names is None:
3635 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3637 self.wanted_instances = \
3638 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3640 def _EnsureChildSizes(self, disk):
3641 """Ensure children of the disk have the needed disk size.
3643 This is valid mainly for DRBD8 and fixes an issue where the
3644 children have smaller disk size.
3646 @param disk: an L{ganeti.objects.Disk} object
3649 if disk.dev_type == constants.LD_DRBD8:
3650 assert disk.children, "Empty children for DRBD8?"
3651 fchild = disk.children[0]
3652 mismatch = fchild.size < disk.size
3654 self.LogInfo("Child disk has size %d, parent %d, fixing",
3655 fchild.size, disk.size)
3656 fchild.size = disk.size
3658 # and we recurse on this child only, not on the metadev
3659 return self._EnsureChildSizes(fchild) or mismatch
3663 def Exec(self, feedback_fn):
3664 """Verify the size of cluster disks.
3667 # TODO: check child disks too
3668 # TODO: check differences in size between primary/secondary nodes
3670 for instance in self.wanted_instances:
3671 pnode = instance.primary_node
3672 if pnode not in per_node_disks:
3673 per_node_disks[pnode] = []
3674 for idx, disk in enumerate(instance.disks):
3675 per_node_disks[pnode].append((instance, idx, disk))
3677 assert not (frozenset(per_node_disks.keys()) -
3678 self.owned_locks(locking.LEVEL_NODE_RES)), \
3679 "Not owning correct locks"
3680 assert not self.owned_locks(locking.LEVEL_NODE)
3683 for node, dskl in per_node_disks.items():
3684 newl = [v[2].Copy() for v in dskl]
3686 self.cfg.SetDiskID(dsk, node)
3687 result = self.rpc.call_blockdev_getsize(node, newl)
3689 self.LogWarning("Failure in blockdev_getsize call to node"
3690 " %s, ignoring", node)
3692 if len(result.payload) != len(dskl):
3693 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3694 " result.payload=%s", node, len(dskl), result.payload)
3695 self.LogWarning("Invalid result from node %s, ignoring node results",
3698 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3700 self.LogWarning("Disk %d of instance %s did not return size"
3701 " information, ignoring", idx, instance.name)
3703 if not isinstance(size, (int, long)):
3704 self.LogWarning("Disk %d of instance %s did not return valid"
3705 " size information, ignoring", idx, instance.name)
3708 if size != disk.size:
3709 self.LogInfo("Disk %d of instance %s has mismatched size,"
3710 " correcting: recorded %d, actual %d", idx,
3711 instance.name, disk.size, size)
3713 self.cfg.Update(instance, feedback_fn)
3714 changed.append((instance.name, idx, size))
3715 if self._EnsureChildSizes(disk):
3716 self.cfg.Update(instance, feedback_fn)
3717 changed.append((instance.name, idx, disk.size))
3721 class LUClusterRename(LogicalUnit):
3722 """Rename the cluster.
3725 HPATH = "cluster-rename"
3726 HTYPE = constants.HTYPE_CLUSTER
3728 def BuildHooksEnv(self):
3733 "OP_TARGET": self.cfg.GetClusterName(),
3734 "NEW_NAME": self.op.name,
3737 def BuildHooksNodes(self):
3738 """Build hooks nodes.
3741 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3743 def CheckPrereq(self):
3744 """Verify that the passed name is a valid one.
3747 hostname = netutils.GetHostname(name=self.op.name,
3748 family=self.cfg.GetPrimaryIPFamily())
3750 new_name = hostname.name
3751 self.ip = new_ip = hostname.ip
3752 old_name = self.cfg.GetClusterName()
3753 old_ip = self.cfg.GetMasterIP()
3754 if new_name == old_name and new_ip == old_ip:
3755 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3756 " cluster has changed",
3758 if new_ip != old_ip:
3759 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3760 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3761 " reachable on the network" %
3762 new_ip, errors.ECODE_NOTUNIQUE)
3764 self.op.name = new_name
3766 def Exec(self, feedback_fn):
3767 """Rename the cluster.
3770 clustername = self.op.name
3773 # shutdown the master IP
3774 master_params = self.cfg.GetMasterNetworkParameters()
3775 ems = self.cfg.GetUseExternalMipScript()
3776 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3778 result.Raise("Could not disable the master role")
3781 cluster = self.cfg.GetClusterInfo()
3782 cluster.cluster_name = clustername
3783 cluster.master_ip = new_ip
3784 self.cfg.Update(cluster, feedback_fn)
3786 # update the known hosts file
3787 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3788 node_list = self.cfg.GetOnlineNodeList()
3790 node_list.remove(master_params.name)
3793 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3795 master_params.ip = new_ip
3796 result = self.rpc.call_node_activate_master_ip(master_params.name,
3798 msg = result.fail_msg
3800 self.LogWarning("Could not re-enable the master role on"
3801 " the master, please restart manually: %s", msg)
3806 def _ValidateNetmask(cfg, netmask):
3807 """Checks if a netmask is valid.
3809 @type cfg: L{config.ConfigWriter}
3810 @param cfg: The cluster configuration
3812 @param netmask: the netmask to be verified
3813 @raise errors.OpPrereqError: if the validation fails
3816 ip_family = cfg.GetPrimaryIPFamily()
3818 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3819 except errors.ProgrammerError:
3820 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3822 if not ipcls.ValidateNetmask(netmask):
3823 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3827 class LUClusterSetParams(LogicalUnit):
3828 """Change the parameters of the cluster.
3831 HPATH = "cluster-modify"
3832 HTYPE = constants.HTYPE_CLUSTER
3835 def CheckArguments(self):
3839 if self.op.uid_pool:
3840 uidpool.CheckUidPool(self.op.uid_pool)
3842 if self.op.add_uids:
3843 uidpool.CheckUidPool(self.op.add_uids)
3845 if self.op.remove_uids:
3846 uidpool.CheckUidPool(self.op.remove_uids)
3848 if self.op.master_netmask is not None:
3849 _ValidateNetmask(self.cfg, self.op.master_netmask)
3851 if self.op.diskparams:
3852 for dt_params in self.op.diskparams.values():
3853 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3855 def ExpandNames(self):
3856 # FIXME: in the future maybe other cluster params won't require checking on
3857 # all nodes to be modified.
3858 self.needed_locks = {
3859 locking.LEVEL_NODE: locking.ALL_SET,
3860 locking.LEVEL_INSTANCE: locking.ALL_SET,
3861 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3863 self.share_locks = {
3864 locking.LEVEL_NODE: 1,
3865 locking.LEVEL_INSTANCE: 1,
3866 locking.LEVEL_NODEGROUP: 1,
3869 def BuildHooksEnv(self):
3874 "OP_TARGET": self.cfg.GetClusterName(),
3875 "NEW_VG_NAME": self.op.vg_name,
3878 def BuildHooksNodes(self):
3879 """Build hooks nodes.
3882 mn = self.cfg.GetMasterNode()
3885 def CheckPrereq(self):
3886 """Check prerequisites.
3888 This checks whether the given params don't conflict and
3889 if the given volume group is valid.
3892 if self.op.vg_name is not None and not self.op.vg_name:
3893 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3894 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3895 " instances exist", errors.ECODE_INVAL)
3897 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3898 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3899 raise errors.OpPrereqError("Cannot disable drbd helper while"
3900 " drbd-based instances exist",
3903 node_list = self.owned_locks(locking.LEVEL_NODE)
3905 # if vg_name not None, checks given volume group on all nodes
3907 vglist = self.rpc.call_vg_list(node_list)
3908 for node in node_list:
3909 msg = vglist[node].fail_msg
3911 # ignoring down node
3912 self.LogWarning("Error while gathering data on node %s"
3913 " (ignoring node): %s", node, msg)
3915 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3917 constants.MIN_VG_SIZE)
3919 raise errors.OpPrereqError("Error on node '%s': %s" %
3920 (node, vgstatus), errors.ECODE_ENVIRON)
3922 if self.op.drbd_helper:
3923 # checks given drbd helper on all nodes
3924 helpers = self.rpc.call_drbd_helper(node_list)
3925 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3927 self.LogInfo("Not checking drbd helper on offline node %s", node)
3929 msg = helpers[node].fail_msg
3931 raise errors.OpPrereqError("Error checking drbd helper on node"
3932 " '%s': %s" % (node, msg),
3933 errors.ECODE_ENVIRON)
3934 node_helper = helpers[node].payload
3935 if node_helper != self.op.drbd_helper:
3936 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3937 (node, node_helper), errors.ECODE_ENVIRON)
3939 self.cluster = cluster = self.cfg.GetClusterInfo()
3940 # validate params changes
3941 if self.op.beparams:
3942 objects.UpgradeBeParams(self.op.beparams)
3943 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3944 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3946 if self.op.ndparams:
3947 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3948 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3950 # TODO: we need a more general way to handle resetting
3951 # cluster-level parameters to default values
3952 if self.new_ndparams["oob_program"] == "":
3953 self.new_ndparams["oob_program"] = \
3954 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3956 if self.op.hv_state:
3957 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3958 self.cluster.hv_state_static)
3959 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3960 for hv, values in new_hv_state.items())
3962 if self.op.disk_state:
3963 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3964 self.cluster.disk_state_static)
3965 self.new_disk_state = \
3966 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3967 for name, values in svalues.items()))
3968 for storage, svalues in new_disk_state.items())
3971 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3974 all_instances = self.cfg.GetAllInstancesInfo().values()
3976 for group in self.cfg.GetAllNodeGroupsInfo().values():
3977 instances = frozenset([inst for inst in all_instances
3978 if compat.any(node in group.members
3979 for node in inst.all_nodes)])
3980 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3981 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3983 new_ipolicy, instances)
3985 violations.update(new)
3988 self.LogWarning("After the ipolicy change the following instances"
3989 " violate them: %s",
3990 utils.CommaJoin(violations))
3992 if self.op.nicparams:
3993 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3994 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3995 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3998 # check all instances for consistency
3999 for instance in self.cfg.GetAllInstancesInfo().values():
4000 for nic_idx, nic in enumerate(instance.nics):
4001 params_copy = copy.deepcopy(nic.nicparams)
4002 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4004 # check parameter syntax
4006 objects.NIC.CheckParameterSyntax(params_filled)
4007 except errors.ConfigurationError, err:
4008 nic_errors.append("Instance %s, nic/%d: %s" %
4009 (instance.name, nic_idx, err))
4011 # if we're moving instances to routed, check that they have an ip
4012 target_mode = params_filled[constants.NIC_MODE]
4013 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4014 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4015 " address" % (instance.name, nic_idx))
4017 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4018 "\n".join(nic_errors))
4020 # hypervisor list/parameters
4021 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4022 if self.op.hvparams:
4023 for hv_name, hv_dict in self.op.hvparams.items():
4024 if hv_name not in self.new_hvparams:
4025 self.new_hvparams[hv_name] = hv_dict
4027 self.new_hvparams[hv_name].update(hv_dict)
4029 # disk template parameters
4030 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4031 if self.op.diskparams:
4032 for dt_name, dt_params in self.op.diskparams.items():
4033 if dt_name not in self.op.diskparams:
4034 self.new_diskparams[dt_name] = dt_params
4036 self.new_diskparams[dt_name].update(dt_params)
4038 # os hypervisor parameters
4039 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4041 for os_name, hvs in self.op.os_hvp.items():
4042 if os_name not in self.new_os_hvp:
4043 self.new_os_hvp[os_name] = hvs
4045 for hv_name, hv_dict in hvs.items():
4046 if hv_name not in self.new_os_hvp[os_name]:
4047 self.new_os_hvp[os_name][hv_name] = hv_dict
4049 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4052 self.new_osp = objects.FillDict(cluster.osparams, {})
4053 if self.op.osparams:
4054 for os_name, osp in self.op.osparams.items():
4055 if os_name not in self.new_osp:
4056 self.new_osp[os_name] = {}
4058 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4061 if not self.new_osp[os_name]:
4062 # we removed all parameters
4063 del self.new_osp[os_name]
4065 # check the parameter validity (remote check)
4066 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4067 os_name, self.new_osp[os_name])
4069 # changes to the hypervisor list
4070 if self.op.enabled_hypervisors is not None:
4071 self.hv_list = self.op.enabled_hypervisors
4072 for hv in self.hv_list:
4073 # if the hypervisor doesn't already exist in the cluster
4074 # hvparams, we initialize it to empty, and then (in both
4075 # cases) we make sure to fill the defaults, as we might not
4076 # have a complete defaults list if the hypervisor wasn't
4078 if hv not in new_hvp:
4080 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4081 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4083 self.hv_list = cluster.enabled_hypervisors
4085 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4086 # either the enabled list has changed, or the parameters have, validate
4087 for hv_name, hv_params in self.new_hvparams.items():
4088 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4089 (self.op.enabled_hypervisors and
4090 hv_name in self.op.enabled_hypervisors)):
4091 # either this is a new hypervisor, or its parameters have changed
4092 hv_class = hypervisor.GetHypervisor(hv_name)
4093 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4094 hv_class.CheckParameterSyntax(hv_params)
4095 _CheckHVParams(self, node_list, hv_name, hv_params)
4098 # no need to check any newly-enabled hypervisors, since the
4099 # defaults have already been checked in the above code-block
4100 for os_name, os_hvp in self.new_os_hvp.items():
4101 for hv_name, hv_params in os_hvp.items():
4102 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4103 # we need to fill in the new os_hvp on top of the actual hv_p
4104 cluster_defaults = self.new_hvparams.get(hv_name, {})
4105 new_osp = objects.FillDict(cluster_defaults, hv_params)
4106 hv_class = hypervisor.GetHypervisor(hv_name)
4107 hv_class.CheckParameterSyntax(new_osp)
4108 _CheckHVParams(self, node_list, hv_name, new_osp)
4110 if self.op.default_iallocator:
4111 alloc_script = utils.FindFile(self.op.default_iallocator,
4112 constants.IALLOCATOR_SEARCH_PATH,
4114 if alloc_script is None:
4115 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4116 " specified" % self.op.default_iallocator,
4119 def Exec(self, feedback_fn):
4120 """Change the parameters of the cluster.
4123 if self.op.vg_name is not None:
4124 new_volume = self.op.vg_name
4127 if new_volume != self.cfg.GetVGName():
4128 self.cfg.SetVGName(new_volume)
4130 feedback_fn("Cluster LVM configuration already in desired"
4131 " state, not changing")
4132 if self.op.drbd_helper is not None:
4133 new_helper = self.op.drbd_helper
4136 if new_helper != self.cfg.GetDRBDHelper():
4137 self.cfg.SetDRBDHelper(new_helper)
4139 feedback_fn("Cluster DRBD helper already in desired state,"
4141 if self.op.hvparams:
4142 self.cluster.hvparams = self.new_hvparams
4144 self.cluster.os_hvp = self.new_os_hvp
4145 if self.op.enabled_hypervisors is not None:
4146 self.cluster.hvparams = self.new_hvparams
4147 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4148 if self.op.beparams:
4149 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4150 if self.op.nicparams:
4151 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4153 self.cluster.ipolicy = self.new_ipolicy
4154 if self.op.osparams:
4155 self.cluster.osparams = self.new_osp
4156 if self.op.ndparams:
4157 self.cluster.ndparams = self.new_ndparams
4158 if self.op.diskparams:
4159 self.cluster.diskparams = self.new_diskparams
4160 if self.op.hv_state:
4161 self.cluster.hv_state_static = self.new_hv_state
4162 if self.op.disk_state:
4163 self.cluster.disk_state_static = self.new_disk_state
4165 if self.op.candidate_pool_size is not None:
4166 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4167 # we need to update the pool size here, otherwise the save will fail
4168 _AdjustCandidatePool(self, [])
4170 if self.op.maintain_node_health is not None:
4171 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4172 feedback_fn("Note: CONFD was disabled at build time, node health"
4173 " maintenance is not useful (still enabling it)")
4174 self.cluster.maintain_node_health = self.op.maintain_node_health
4176 if self.op.prealloc_wipe_disks is not None:
4177 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4179 if self.op.add_uids is not None:
4180 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4182 if self.op.remove_uids is not None:
4183 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4185 if self.op.uid_pool is not None:
4186 self.cluster.uid_pool = self.op.uid_pool
4188 if self.op.default_iallocator is not None:
4189 self.cluster.default_iallocator = self.op.default_iallocator
4191 if self.op.reserved_lvs is not None:
4192 self.cluster.reserved_lvs = self.op.reserved_lvs
4194 if self.op.use_external_mip_script is not None:
4195 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4197 def helper_os(aname, mods, desc):
4199 lst = getattr(self.cluster, aname)
4200 for key, val in mods:
4201 if key == constants.DDM_ADD:
4203 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4206 elif key == constants.DDM_REMOVE:
4210 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4212 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4214 if self.op.hidden_os:
4215 helper_os("hidden_os", self.op.hidden_os, "hidden")
4217 if self.op.blacklisted_os:
4218 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4220 if self.op.master_netdev:
4221 master_params = self.cfg.GetMasterNetworkParameters()
4222 ems = self.cfg.GetUseExternalMipScript()
4223 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4224 self.cluster.master_netdev)
4225 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4227 result.Raise("Could not disable the master ip")
4228 feedback_fn("Changing master_netdev from %s to %s" %
4229 (master_params.netdev, self.op.master_netdev))
4230 self.cluster.master_netdev = self.op.master_netdev
4232 if self.op.master_netmask:
4233 master_params = self.cfg.GetMasterNetworkParameters()
4234 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4235 result = self.rpc.call_node_change_master_netmask(master_params.name,
4236 master_params.netmask,
4237 self.op.master_netmask,
4239 master_params.netdev)
4241 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4244 self.cluster.master_netmask = self.op.master_netmask
4246 self.cfg.Update(self.cluster, feedback_fn)
4248 if self.op.master_netdev:
4249 master_params = self.cfg.GetMasterNetworkParameters()
4250 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4251 self.op.master_netdev)
4252 ems = self.cfg.GetUseExternalMipScript()
4253 result = self.rpc.call_node_activate_master_ip(master_params.name,
4256 self.LogWarning("Could not re-enable the master ip on"
4257 " the master, please restart manually: %s",
4261 def _UploadHelper(lu, nodes, fname):
4262 """Helper for uploading a file and showing warnings.
4265 if os.path.exists(fname):
4266 result = lu.rpc.call_upload_file(nodes, fname)
4267 for to_node, to_result in result.items():
4268 msg = to_result.fail_msg
4270 msg = ("Copy of file %s to node %s failed: %s" %
4271 (fname, to_node, msg))
4272 lu.proc.LogWarning(msg)
4275 def _ComputeAncillaryFiles(cluster, redist):
4276 """Compute files external to Ganeti which need to be consistent.
4278 @type redist: boolean
4279 @param redist: Whether to include files which need to be redistributed
4282 # Compute files for all nodes
4284 constants.SSH_KNOWN_HOSTS_FILE,
4285 constants.CONFD_HMAC_KEY,
4286 constants.CLUSTER_DOMAIN_SECRET_FILE,
4287 constants.SPICE_CERT_FILE,
4288 constants.SPICE_CACERT_FILE,
4289 constants.RAPI_USERS_FILE,
4293 files_all.update(constants.ALL_CERT_FILES)
4294 files_all.update(ssconf.SimpleStore().GetFileList())
4296 # we need to ship at least the RAPI certificate
4297 files_all.add(constants.RAPI_CERT_FILE)
4299 if cluster.modify_etc_hosts:
4300 files_all.add(constants.ETC_HOSTS)
4302 # Files which are optional, these must:
4303 # - be present in one other category as well
4304 # - either exist or not exist on all nodes of that category (mc, vm all)
4306 constants.RAPI_USERS_FILE,
4309 # Files which should only be on master candidates
4313 files_mc.add(constants.CLUSTER_CONF_FILE)
4315 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4317 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4319 # Files which should only be on VM-capable nodes
4320 files_vm = set(filename
4321 for hv_name in cluster.enabled_hypervisors
4322 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4324 files_opt |= set(filename
4325 for hv_name in cluster.enabled_hypervisors
4326 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4328 # Filenames in each category must be unique
4329 all_files_set = files_all | files_mc | files_vm
4330 assert (len(all_files_set) ==
4331 sum(map(len, [files_all, files_mc, files_vm]))), \
4332 "Found file listed in more than one file list"
4334 # Optional files must be present in one other category
4335 assert all_files_set.issuperset(files_opt), \
4336 "Optional file not in a different required list"
4338 return (files_all, files_opt, files_mc, files_vm)
4341 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4342 """Distribute additional files which are part of the cluster configuration.
4344 ConfigWriter takes care of distributing the config and ssconf files, but
4345 there are more files which should be distributed to all nodes. This function
4346 makes sure those are copied.
4348 @param lu: calling logical unit
4349 @param additional_nodes: list of nodes not in the config to distribute to
4350 @type additional_vm: boolean
4351 @param additional_vm: whether the additional nodes are vm-capable or not
4354 # Gather target nodes
4355 cluster = lu.cfg.GetClusterInfo()
4356 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4358 online_nodes = lu.cfg.GetOnlineNodeList()
4359 vm_nodes = lu.cfg.GetVmCapableNodeList()
4361 if additional_nodes is not None:
4362 online_nodes.extend(additional_nodes)
4364 vm_nodes.extend(additional_nodes)
4366 # Never distribute to master node
4367 for nodelist in [online_nodes, vm_nodes]:
4368 if master_info.name in nodelist:
4369 nodelist.remove(master_info.name)
4372 (files_all, _, files_mc, files_vm) = \
4373 _ComputeAncillaryFiles(cluster, True)
4375 # Never re-distribute configuration file from here
4376 assert not (constants.CLUSTER_CONF_FILE in files_all or
4377 constants.CLUSTER_CONF_FILE in files_vm)
4378 assert not files_mc, "Master candidates not handled in this function"
4381 (online_nodes, files_all),
4382 (vm_nodes, files_vm),
4386 for (node_list, files) in filemap:
4388 _UploadHelper(lu, node_list, fname)
4391 class LUClusterRedistConf(NoHooksLU):
4392 """Force the redistribution of cluster configuration.
4394 This is a very simple LU.
4399 def ExpandNames(self):
4400 self.needed_locks = {
4401 locking.LEVEL_NODE: locking.ALL_SET,
4403 self.share_locks[locking.LEVEL_NODE] = 1
4405 def Exec(self, feedback_fn):
4406 """Redistribute the configuration.
4409 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4410 _RedistributeAncillaryFiles(self)
4413 class LUClusterActivateMasterIp(NoHooksLU):
4414 """Activate the master IP on the master node.
4417 def Exec(self, feedback_fn):
4418 """Activate the master IP.
4421 master_params = self.cfg.GetMasterNetworkParameters()
4422 ems = self.cfg.GetUseExternalMipScript()
4423 result = self.rpc.call_node_activate_master_ip(master_params.name,
4425 result.Raise("Could not activate the master IP")
4428 class LUClusterDeactivateMasterIp(NoHooksLU):
4429 """Deactivate the master IP on the master node.
4432 def Exec(self, feedback_fn):
4433 """Deactivate the master IP.
4436 master_params = self.cfg.GetMasterNetworkParameters()
4437 ems = self.cfg.GetUseExternalMipScript()
4438 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4440 result.Raise("Could not deactivate the master IP")
4443 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4444 """Sleep and poll for an instance's disk to sync.
4447 if not instance.disks or disks is not None and not disks:
4450 disks = _ExpandCheckDisks(instance, disks)
4453 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4455 node = instance.primary_node
4458 lu.cfg.SetDiskID(dev, node)
4460 # TODO: Convert to utils.Retry
4463 degr_retries = 10 # in seconds, as we sleep 1 second each time
4467 cumul_degraded = False
4468 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4469 msg = rstats.fail_msg
4471 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4474 raise errors.RemoteError("Can't contact node %s for mirror data,"
4475 " aborting." % node)
4478 rstats = rstats.payload
4480 for i, mstat in enumerate(rstats):
4482 lu.LogWarning("Can't compute data for node %s/%s",
4483 node, disks[i].iv_name)
4486 cumul_degraded = (cumul_degraded or
4487 (mstat.is_degraded and mstat.sync_percent is None))
4488 if mstat.sync_percent is not None:
4490 if mstat.estimated_time is not None:
4491 rem_time = ("%s remaining (estimated)" %
4492 utils.FormatSeconds(mstat.estimated_time))
4493 max_time = mstat.estimated_time
4495 rem_time = "no time estimate"
4496 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4497 (disks[i].iv_name, mstat.sync_percent, rem_time))
4499 # if we're done but degraded, let's do a few small retries, to
4500 # make sure we see a stable and not transient situation; therefore
4501 # we force restart of the loop
4502 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4503 logging.info("Degraded disks found, %d retries left", degr_retries)
4511 time.sleep(min(60, max_time))
4514 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4515 return not cumul_degraded
4518 def _BlockdevFind(lu, node, dev, instance):
4519 """Wrapper around call_blockdev_find to annotate diskparams.
4521 @param lu: A reference to the lu object
4522 @param node: The node to call out
4523 @param dev: The device to find
4524 @param instance: The instance object the device belongs to
4525 @returns The result of the rpc call
4528 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4529 return lu.rpc.call_blockdev_find(node, disk)
4532 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4533 """Wrapper around L{_CheckDiskConsistencyInner}.
4536 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4537 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4541 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4543 """Check that mirrors are not degraded.
4545 @attention: The device has to be annotated already.
4547 The ldisk parameter, if True, will change the test from the
4548 is_degraded attribute (which represents overall non-ok status for
4549 the device(s)) to the ldisk (representing the local storage status).
4552 lu.cfg.SetDiskID(dev, node)
4556 if on_primary or dev.AssembleOnSecondary():
4557 rstats = lu.rpc.call_blockdev_find(node, dev)
4558 msg = rstats.fail_msg
4560 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4562 elif not rstats.payload:
4563 lu.LogWarning("Can't find disk on node %s", node)
4567 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4569 result = result and not rstats.payload.is_degraded
4572 for child in dev.children:
4573 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4579 class LUOobCommand(NoHooksLU):
4580 """Logical unit for OOB handling.
4584 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4586 def ExpandNames(self):
4587 """Gather locks we need.
4590 if self.op.node_names:
4591 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4592 lock_names = self.op.node_names
4594 lock_names = locking.ALL_SET
4596 self.needed_locks = {
4597 locking.LEVEL_NODE: lock_names,
4600 def CheckPrereq(self):
4601 """Check prerequisites.
4604 - the node exists in the configuration
4607 Any errors are signaled by raising errors.OpPrereqError.
4611 self.master_node = self.cfg.GetMasterNode()
4613 assert self.op.power_delay >= 0.0
4615 if self.op.node_names:
4616 if (self.op.command in self._SKIP_MASTER and
4617 self.master_node in self.op.node_names):
4618 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4619 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4621 if master_oob_handler:
4622 additional_text = ("run '%s %s %s' if you want to operate on the"
4623 " master regardless") % (master_oob_handler,
4627 additional_text = "it does not support out-of-band operations"
4629 raise errors.OpPrereqError(("Operating on the master node %s is not"
4630 " allowed for %s; %s") %
4631 (self.master_node, self.op.command,
4632 additional_text), errors.ECODE_INVAL)
4634 self.op.node_names = self.cfg.GetNodeList()
4635 if self.op.command in self._SKIP_MASTER:
4636 self.op.node_names.remove(self.master_node)
4638 if self.op.command in self._SKIP_MASTER:
4639 assert self.master_node not in self.op.node_names
4641 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4643 raise errors.OpPrereqError("Node %s not found" % node_name,
4646 self.nodes.append(node)
4648 if (not self.op.ignore_status and
4649 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4650 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4651 " not marked offline") % node_name,
4654 def Exec(self, feedback_fn):
4655 """Execute OOB and return result if we expect any.
4658 master_node = self.master_node
4661 for idx, node in enumerate(utils.NiceSort(self.nodes,
4662 key=lambda node: node.name)):
4663 node_entry = [(constants.RS_NORMAL, node.name)]
4664 ret.append(node_entry)
4666 oob_program = _SupportsOob(self.cfg, node)
4669 node_entry.append((constants.RS_UNAVAIL, None))
4672 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4673 self.op.command, oob_program, node.name)
4674 result = self.rpc.call_run_oob(master_node, oob_program,
4675 self.op.command, node.name,
4679 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4680 node.name, result.fail_msg)
4681 node_entry.append((constants.RS_NODATA, None))
4684 self._CheckPayload(result)
4685 except errors.OpExecError, err:
4686 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4688 node_entry.append((constants.RS_NODATA, None))
4690 if self.op.command == constants.OOB_HEALTH:
4691 # For health we should log important events
4692 for item, status in result.payload:
4693 if status in [constants.OOB_STATUS_WARNING,
4694 constants.OOB_STATUS_CRITICAL]:
4695 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4696 item, node.name, status)
4698 if self.op.command == constants.OOB_POWER_ON:
4700 elif self.op.command == constants.OOB_POWER_OFF:
4701 node.powered = False
4702 elif self.op.command == constants.OOB_POWER_STATUS:
4703 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4704 if powered != node.powered:
4705 logging.warning(("Recorded power state (%s) of node '%s' does not"
4706 " match actual power state (%s)"), node.powered,
4709 # For configuration changing commands we should update the node
4710 if self.op.command in (constants.OOB_POWER_ON,
4711 constants.OOB_POWER_OFF):
4712 self.cfg.Update(node, feedback_fn)
4714 node_entry.append((constants.RS_NORMAL, result.payload))
4716 if (self.op.command == constants.OOB_POWER_ON and
4717 idx < len(self.nodes) - 1):
4718 time.sleep(self.op.power_delay)
4722 def _CheckPayload(self, result):
4723 """Checks if the payload is valid.
4725 @param result: RPC result
4726 @raises errors.OpExecError: If payload is not valid
4730 if self.op.command == constants.OOB_HEALTH:
4731 if not isinstance(result.payload, list):
4732 errs.append("command 'health' is expected to return a list but got %s" %
4733 type(result.payload))
4735 for item, status in result.payload:
4736 if status not in constants.OOB_STATUSES:
4737 errs.append("health item '%s' has invalid status '%s'" %
4740 if self.op.command == constants.OOB_POWER_STATUS:
4741 if not isinstance(result.payload, dict):
4742 errs.append("power-status is expected to return a dict but got %s" %
4743 type(result.payload))
4745 if self.op.command in [
4746 constants.OOB_POWER_ON,
4747 constants.OOB_POWER_OFF,
4748 constants.OOB_POWER_CYCLE,
4750 if result.payload is not None:
4751 errs.append("%s is expected to not return payload but got '%s'" %
4752 (self.op.command, result.payload))
4755 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4756 utils.CommaJoin(errs))
4759 class _OsQuery(_QueryBase):
4760 FIELDS = query.OS_FIELDS
4762 def ExpandNames(self, lu):
4763 # Lock all nodes in shared mode
4764 # Temporary removal of locks, should be reverted later
4765 # TODO: reintroduce locks when they are lighter-weight
4766 lu.needed_locks = {}
4767 #self.share_locks[locking.LEVEL_NODE] = 1
4768 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4770 # The following variables interact with _QueryBase._GetNames
4772 self.wanted = self.names
4774 self.wanted = locking.ALL_SET
4776 self.do_locking = self.use_locking
4778 def DeclareLocks(self, lu, level):
4782 def _DiagnoseByOS(rlist):
4783 """Remaps a per-node return list into an a per-os per-node dictionary
4785 @param rlist: a map with node names as keys and OS objects as values
4788 @return: a dictionary with osnames as keys and as value another
4789 map, with nodes as keys and tuples of (path, status, diagnose,
4790 variants, parameters, api_versions) as values, eg::
4792 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4793 (/srv/..., False, "invalid api")],
4794 "node2": [(/srv/..., True, "", [], [])]}
4799 # we build here the list of nodes that didn't fail the RPC (at RPC
4800 # level), so that nodes with a non-responding node daemon don't
4801 # make all OSes invalid
4802 good_nodes = [node_name for node_name in rlist
4803 if not rlist[node_name].fail_msg]
4804 for node_name, nr in rlist.items():
4805 if nr.fail_msg or not nr.payload:
4807 for (name, path, status, diagnose, variants,
4808 params, api_versions) in nr.payload:
4809 if name not in all_os:
4810 # build a list of nodes for this os containing empty lists
4811 # for each node in node_list
4813 for nname in good_nodes:
4814 all_os[name][nname] = []
4815 # convert params from [name, help] to (name, help)
4816 params = [tuple(v) for v in params]
4817 all_os[name][node_name].append((path, status, diagnose,
4818 variants, params, api_versions))
4821 def _GetQueryData(self, lu):
4822 """Computes the list of nodes and their attributes.
4825 # Locking is not used
4826 assert not (compat.any(lu.glm.is_owned(level)
4827 for level in locking.LEVELS
4828 if level != locking.LEVEL_CLUSTER) or
4829 self.do_locking or self.use_locking)
4831 valid_nodes = [node.name
4832 for node in lu.cfg.GetAllNodesInfo().values()
4833 if not node.offline and node.vm_capable]
4834 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4835 cluster = lu.cfg.GetClusterInfo()
4839 for (os_name, os_data) in pol.items():
4840 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4841 hidden=(os_name in cluster.hidden_os),
4842 blacklisted=(os_name in cluster.blacklisted_os))
4846 api_versions = set()
4848 for idx, osl in enumerate(os_data.values()):
4849 info.valid = bool(info.valid and osl and osl[0][1])
4853 (node_variants, node_params, node_api) = osl[0][3:6]
4856 variants.update(node_variants)
4857 parameters.update(node_params)
4858 api_versions.update(node_api)
4860 # Filter out inconsistent values
4861 variants.intersection_update(node_variants)
4862 parameters.intersection_update(node_params)
4863 api_versions.intersection_update(node_api)
4865 info.variants = list(variants)
4866 info.parameters = list(parameters)
4867 info.api_versions = list(api_versions)
4869 data[os_name] = info
4871 # Prepare data in requested order
4872 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4876 class LUOsDiagnose(NoHooksLU):
4877 """Logical unit for OS diagnose/query.
4883 def _BuildFilter(fields, names):
4884 """Builds a filter for querying OSes.
4887 name_filter = qlang.MakeSimpleFilter("name", names)
4889 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4890 # respective field is not requested
4891 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4892 for fname in ["hidden", "blacklisted"]
4893 if fname not in fields]
4894 if "valid" not in fields:
4895 status_filter.append([qlang.OP_TRUE, "valid"])
4898 status_filter.insert(0, qlang.OP_AND)
4900 status_filter = None
4902 if name_filter and status_filter:
4903 return [qlang.OP_AND, name_filter, status_filter]
4907 return status_filter
4909 def CheckArguments(self):
4910 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4911 self.op.output_fields, False)
4913 def ExpandNames(self):
4914 self.oq.ExpandNames(self)
4916 def Exec(self, feedback_fn):
4917 return self.oq.OldStyleQuery(self)
4920 class LUNodeRemove(LogicalUnit):
4921 """Logical unit for removing a node.
4924 HPATH = "node-remove"
4925 HTYPE = constants.HTYPE_NODE
4927 def BuildHooksEnv(self):
4932 "OP_TARGET": self.op.node_name,
4933 "NODE_NAME": self.op.node_name,
4936 def BuildHooksNodes(self):
4937 """Build hooks nodes.
4939 This doesn't run on the target node in the pre phase as a failed
4940 node would then be impossible to remove.
4943 all_nodes = self.cfg.GetNodeList()
4945 all_nodes.remove(self.op.node_name)
4948 return (all_nodes, all_nodes)
4950 def CheckPrereq(self):
4951 """Check prerequisites.
4954 - the node exists in the configuration
4955 - it does not have primary or secondary instances
4956 - it's not the master
4958 Any errors are signaled by raising errors.OpPrereqError.
4961 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4962 node = self.cfg.GetNodeInfo(self.op.node_name)
4963 assert node is not None
4965 masternode = self.cfg.GetMasterNode()
4966 if node.name == masternode:
4967 raise errors.OpPrereqError("Node is the master node, failover to another"
4968 " node is required", errors.ECODE_INVAL)
4970 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4971 if node.name in instance.all_nodes:
4972 raise errors.OpPrereqError("Instance %s is still running on the node,"
4973 " please remove first" % instance_name,
4975 self.op.node_name = node.name
4978 def Exec(self, feedback_fn):
4979 """Removes the node from the cluster.
4983 logging.info("Stopping the node daemon and removing configs from node %s",
4986 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4988 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4991 # Promote nodes to master candidate as needed
4992 _AdjustCandidatePool(self, exceptions=[node.name])
4993 self.context.RemoveNode(node.name)
4995 # Run post hooks on the node before it's removed
4996 _RunPostHook(self, node.name)
4998 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4999 msg = result.fail_msg
5001 self.LogWarning("Errors encountered on the remote node while leaving"
5002 " the cluster: %s", msg)
5004 # Remove node from our /etc/hosts
5005 if self.cfg.GetClusterInfo().modify_etc_hosts:
5006 master_node = self.cfg.GetMasterNode()
5007 result = self.rpc.call_etc_hosts_modify(master_node,
5008 constants.ETC_HOSTS_REMOVE,
5010 result.Raise("Can't update hosts file with new host data")
5011 _RedistributeAncillaryFiles(self)
5014 class _NodeQuery(_QueryBase):
5015 FIELDS = query.NODE_FIELDS
5017 def ExpandNames(self, lu):
5018 lu.needed_locks = {}
5019 lu.share_locks = _ShareAll()
5022 self.wanted = _GetWantedNodes(lu, self.names)
5024 self.wanted = locking.ALL_SET
5026 self.do_locking = (self.use_locking and
5027 query.NQ_LIVE in self.requested_data)
5030 # If any non-static field is requested we need to lock the nodes
5031 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5033 def DeclareLocks(self, lu, level):
5036 def _GetQueryData(self, lu):
5037 """Computes the list of nodes and their attributes.
5040 all_info = lu.cfg.GetAllNodesInfo()
5042 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5044 # Gather data as requested
5045 if query.NQ_LIVE in self.requested_data:
5046 # filter out non-vm_capable nodes
5047 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5049 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5050 [lu.cfg.GetHypervisorType()])
5051 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5052 for (name, nresult) in node_data.items()
5053 if not nresult.fail_msg and nresult.payload)
5057 if query.NQ_INST in self.requested_data:
5058 node_to_primary = dict([(name, set()) for name in nodenames])
5059 node_to_secondary = dict([(name, set()) for name in nodenames])
5061 inst_data = lu.cfg.GetAllInstancesInfo()
5063 for inst in inst_data.values():
5064 if inst.primary_node in node_to_primary:
5065 node_to_primary[inst.primary_node].add(inst.name)
5066 for secnode in inst.secondary_nodes:
5067 if secnode in node_to_secondary:
5068 node_to_secondary[secnode].add(inst.name)
5070 node_to_primary = None
5071 node_to_secondary = None
5073 if query.NQ_OOB in self.requested_data:
5074 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5075 for name, node in all_info.iteritems())
5079 if query.NQ_GROUP in self.requested_data:
5080 groups = lu.cfg.GetAllNodeGroupsInfo()
5084 return query.NodeQueryData([all_info[name] for name in nodenames],
5085 live_data, lu.cfg.GetMasterNode(),
5086 node_to_primary, node_to_secondary, groups,
5087 oob_support, lu.cfg.GetClusterInfo())
5090 class LUNodeQuery(NoHooksLU):
5091 """Logical unit for querying nodes.
5094 # pylint: disable=W0142
5097 def CheckArguments(self):
5098 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5099 self.op.output_fields, self.op.use_locking)
5101 def ExpandNames(self):
5102 self.nq.ExpandNames(self)
5104 def DeclareLocks(self, level):
5105 self.nq.DeclareLocks(self, level)
5107 def Exec(self, feedback_fn):
5108 return self.nq.OldStyleQuery(self)
5111 class LUNodeQueryvols(NoHooksLU):
5112 """Logical unit for getting volumes on node(s).
5116 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5117 _FIELDS_STATIC = utils.FieldSet("node")
5119 def CheckArguments(self):
5120 _CheckOutputFields(static=self._FIELDS_STATIC,
5121 dynamic=self._FIELDS_DYNAMIC,
5122 selected=self.op.output_fields)
5124 def ExpandNames(self):
5125 self.share_locks = _ShareAll()
5126 self.needed_locks = {}
5128 if not self.op.nodes:
5129 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5131 self.needed_locks[locking.LEVEL_NODE] = \
5132 _GetWantedNodes(self, self.op.nodes)
5134 def Exec(self, feedback_fn):
5135 """Computes the list of nodes and their attributes.
5138 nodenames = self.owned_locks(locking.LEVEL_NODE)
5139 volumes = self.rpc.call_node_volumes(nodenames)
5141 ilist = self.cfg.GetAllInstancesInfo()
5142 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5145 for node in nodenames:
5146 nresult = volumes[node]
5149 msg = nresult.fail_msg
5151 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5154 node_vols = sorted(nresult.payload,
5155 key=operator.itemgetter("dev"))
5157 for vol in node_vols:
5159 for field in self.op.output_fields:
5162 elif field == "phys":
5166 elif field == "name":
5168 elif field == "size":
5169 val = int(float(vol["size"]))
5170 elif field == "instance":
5171 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5173 raise errors.ParameterError(field)
5174 node_output.append(str(val))
5176 output.append(node_output)
5181 class LUNodeQueryStorage(NoHooksLU):
5182 """Logical unit for getting information on storage units on node(s).
5185 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5188 def CheckArguments(self):
5189 _CheckOutputFields(static=self._FIELDS_STATIC,
5190 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5191 selected=self.op.output_fields)
5193 def ExpandNames(self):
5194 self.share_locks = _ShareAll()
5195 self.needed_locks = {}
5198 self.needed_locks[locking.LEVEL_NODE] = \
5199 _GetWantedNodes(self, self.op.nodes)
5201 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5203 def Exec(self, feedback_fn):
5204 """Computes the list of nodes and their attributes.
5207 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5209 # Always get name to sort by
5210 if constants.SF_NAME in self.op.output_fields:
5211 fields = self.op.output_fields[:]
5213 fields = [constants.SF_NAME] + self.op.output_fields
5215 # Never ask for node or type as it's only known to the LU
5216 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5217 while extra in fields:
5218 fields.remove(extra)
5220 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5221 name_idx = field_idx[constants.SF_NAME]
5223 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5224 data = self.rpc.call_storage_list(self.nodes,
5225 self.op.storage_type, st_args,
5226 self.op.name, fields)
5230 for node in utils.NiceSort(self.nodes):
5231 nresult = data[node]
5235 msg = nresult.fail_msg
5237 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5240 rows = dict([(row[name_idx], row) for row in nresult.payload])
5242 for name in utils.NiceSort(rows.keys()):
5247 for field in self.op.output_fields:
5248 if field == constants.SF_NODE:
5250 elif field == constants.SF_TYPE:
5251 val = self.op.storage_type
5252 elif field in field_idx:
5253 val = row[field_idx[field]]
5255 raise errors.ParameterError(field)
5264 class _InstanceQuery(_QueryBase):
5265 FIELDS = query.INSTANCE_FIELDS
5267 def ExpandNames(self, lu):
5268 lu.needed_locks = {}
5269 lu.share_locks = _ShareAll()
5272 self.wanted = _GetWantedInstances(lu, self.names)
5274 self.wanted = locking.ALL_SET
5276 self.do_locking = (self.use_locking and
5277 query.IQ_LIVE in self.requested_data)
5279 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5280 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5281 lu.needed_locks[locking.LEVEL_NODE] = []
5282 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5284 self.do_grouplocks = (self.do_locking and
5285 query.IQ_NODES in self.requested_data)
5287 def DeclareLocks(self, lu, level):
5289 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5290 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5292 # Lock all groups used by instances optimistically; this requires going
5293 # via the node before it's locked, requiring verification later on
5294 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5296 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5297 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5298 elif level == locking.LEVEL_NODE:
5299 lu._LockInstancesNodes() # pylint: disable=W0212
5302 def _CheckGroupLocks(lu):
5303 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5304 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5306 # Check if node groups for locked instances are still correct
5307 for instance_name in owned_instances:
5308 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5310 def _GetQueryData(self, lu):
5311 """Computes the list of instances and their attributes.
5314 if self.do_grouplocks:
5315 self._CheckGroupLocks(lu)
5317 cluster = lu.cfg.GetClusterInfo()
5318 all_info = lu.cfg.GetAllInstancesInfo()
5320 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5322 instance_list = [all_info[name] for name in instance_names]
5323 nodes = frozenset(itertools.chain(*(inst.all_nodes
5324 for inst in instance_list)))
5325 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5328 wrongnode_inst = set()
5330 # Gather data as requested
5331 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5333 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5335 result = node_data[name]
5337 # offline nodes will be in both lists
5338 assert result.fail_msg
5339 offline_nodes.append(name)
5341 bad_nodes.append(name)
5342 elif result.payload:
5343 for inst in result.payload:
5344 if inst in all_info:
5345 if all_info[inst].primary_node == name:
5346 live_data.update(result.payload)
5348 wrongnode_inst.add(inst)
5350 # orphan instance; we don't list it here as we don't
5351 # handle this case yet in the output of instance listing
5352 logging.warning("Orphan instance '%s' found on node %s",
5354 # else no instance is alive
5358 if query.IQ_DISKUSAGE in self.requested_data:
5359 disk_usage = dict((inst.name,
5360 _ComputeDiskSize(inst.disk_template,
5361 [{constants.IDISK_SIZE: disk.size}
5362 for disk in inst.disks]))
5363 for inst in instance_list)
5367 if query.IQ_CONSOLE in self.requested_data:
5369 for inst in instance_list:
5370 if inst.name in live_data:
5371 # Instance is running
5372 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5374 consinfo[inst.name] = None
5375 assert set(consinfo.keys()) == set(instance_names)
5379 if query.IQ_NODES in self.requested_data:
5380 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5382 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5383 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5384 for uuid in set(map(operator.attrgetter("group"),
5390 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5391 disk_usage, offline_nodes, bad_nodes,
5392 live_data, wrongnode_inst, consinfo,
5396 class LUQuery(NoHooksLU):
5397 """Query for resources/items of a certain kind.
5400 # pylint: disable=W0142
5403 def CheckArguments(self):
5404 qcls = _GetQueryImplementation(self.op.what)
5406 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5408 def ExpandNames(self):
5409 self.impl.ExpandNames(self)
5411 def DeclareLocks(self, level):
5412 self.impl.DeclareLocks(self, level)
5414 def Exec(self, feedback_fn):
5415 return self.impl.NewStyleQuery(self)
5418 class LUQueryFields(NoHooksLU):
5419 """Query for resources/items of a certain kind.
5422 # pylint: disable=W0142
5425 def CheckArguments(self):
5426 self.qcls = _GetQueryImplementation(self.op.what)
5428 def ExpandNames(self):
5429 self.needed_locks = {}
5431 def Exec(self, feedback_fn):
5432 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5435 class LUNodeModifyStorage(NoHooksLU):
5436 """Logical unit for modifying a storage volume on a node.
5441 def CheckArguments(self):
5442 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5444 storage_type = self.op.storage_type
5447 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5449 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5450 " modified" % storage_type,
5453 diff = set(self.op.changes.keys()) - modifiable
5455 raise errors.OpPrereqError("The following fields can not be modified for"
5456 " storage units of type '%s': %r" %
5457 (storage_type, list(diff)),
5460 def ExpandNames(self):
5461 self.needed_locks = {
5462 locking.LEVEL_NODE: self.op.node_name,
5465 def Exec(self, feedback_fn):
5466 """Computes the list of nodes and their attributes.
5469 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5470 result = self.rpc.call_storage_modify(self.op.node_name,
5471 self.op.storage_type, st_args,
5472 self.op.name, self.op.changes)
5473 result.Raise("Failed to modify storage unit '%s' on %s" %
5474 (self.op.name, self.op.node_name))
5477 class LUNodeAdd(LogicalUnit):
5478 """Logical unit for adding node to the cluster.
5482 HTYPE = constants.HTYPE_NODE
5483 _NFLAGS = ["master_capable", "vm_capable"]
5485 def CheckArguments(self):
5486 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5487 # validate/normalize the node name
5488 self.hostname = netutils.GetHostname(name=self.op.node_name,
5489 family=self.primary_ip_family)
5490 self.op.node_name = self.hostname.name
5492 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5493 raise errors.OpPrereqError("Cannot readd the master node",
5496 if self.op.readd and self.op.group:
5497 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5498 " being readded", errors.ECODE_INVAL)
5500 def BuildHooksEnv(self):
5503 This will run on all nodes before, and on all nodes + the new node after.
5507 "OP_TARGET": self.op.node_name,
5508 "NODE_NAME": self.op.node_name,
5509 "NODE_PIP": self.op.primary_ip,
5510 "NODE_SIP": self.op.secondary_ip,
5511 "MASTER_CAPABLE": str(self.op.master_capable),
5512 "VM_CAPABLE": str(self.op.vm_capable),
5515 def BuildHooksNodes(self):
5516 """Build hooks nodes.
5519 # Exclude added node
5520 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5521 post_nodes = pre_nodes + [self.op.node_name, ]
5523 return (pre_nodes, post_nodes)
5525 def CheckPrereq(self):
5526 """Check prerequisites.
5529 - the new node is not already in the config
5531 - its parameters (single/dual homed) matches the cluster
5533 Any errors are signaled by raising errors.OpPrereqError.
5537 hostname = self.hostname
5538 node = hostname.name
5539 primary_ip = self.op.primary_ip = hostname.ip
5540 if self.op.secondary_ip is None:
5541 if self.primary_ip_family == netutils.IP6Address.family:
5542 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5543 " IPv4 address must be given as secondary",
5545 self.op.secondary_ip = primary_ip
5547 secondary_ip = self.op.secondary_ip
5548 if not netutils.IP4Address.IsValid(secondary_ip):
5549 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5550 " address" % secondary_ip, errors.ECODE_INVAL)
5552 node_list = cfg.GetNodeList()
5553 if not self.op.readd and node in node_list:
5554 raise errors.OpPrereqError("Node %s is already in the configuration" %
5555 node, errors.ECODE_EXISTS)
5556 elif self.op.readd and node not in node_list:
5557 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5560 self.changed_primary_ip = False
5562 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5563 if self.op.readd and node == existing_node_name:
5564 if existing_node.secondary_ip != secondary_ip:
5565 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5566 " address configuration as before",
5568 if existing_node.primary_ip != primary_ip:
5569 self.changed_primary_ip = True
5573 if (existing_node.primary_ip == primary_ip or
5574 existing_node.secondary_ip == primary_ip or
5575 existing_node.primary_ip == secondary_ip or
5576 existing_node.secondary_ip == secondary_ip):
5577 raise errors.OpPrereqError("New node ip address(es) conflict with"
5578 " existing node %s" % existing_node.name,
5579 errors.ECODE_NOTUNIQUE)
5581 # After this 'if' block, None is no longer a valid value for the
5582 # _capable op attributes
5584 old_node = self.cfg.GetNodeInfo(node)
5585 assert old_node is not None, "Can't retrieve locked node %s" % node
5586 for attr in self._NFLAGS:
5587 if getattr(self.op, attr) is None:
5588 setattr(self.op, attr, getattr(old_node, attr))
5590 for attr in self._NFLAGS:
5591 if getattr(self.op, attr) is None:
5592 setattr(self.op, attr, True)
5594 if self.op.readd and not self.op.vm_capable:
5595 pri, sec = cfg.GetNodeInstances(node)
5597 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5598 " flag set to false, but it already holds"
5599 " instances" % node,
5602 # check that the type of the node (single versus dual homed) is the
5603 # same as for the master
5604 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5605 master_singlehomed = myself.secondary_ip == myself.primary_ip
5606 newbie_singlehomed = secondary_ip == primary_ip
5607 if master_singlehomed != newbie_singlehomed:
5608 if master_singlehomed:
5609 raise errors.OpPrereqError("The master has no secondary ip but the"
5610 " new node has one",
5613 raise errors.OpPrereqError("The master has a secondary ip but the"
5614 " new node doesn't have one",
5617 # checks reachability
5618 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5619 raise errors.OpPrereqError("Node not reachable by ping",
5620 errors.ECODE_ENVIRON)
5622 if not newbie_singlehomed:
5623 # check reachability from my secondary ip to newbie's secondary ip
5624 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5625 source=myself.secondary_ip):
5626 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5627 " based ping to node daemon port",
5628 errors.ECODE_ENVIRON)
5635 if self.op.master_capable:
5636 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5638 self.master_candidate = False
5641 self.new_node = old_node
5643 node_group = cfg.LookupNodeGroup(self.op.group)
5644 self.new_node = objects.Node(name=node,
5645 primary_ip=primary_ip,
5646 secondary_ip=secondary_ip,
5647 master_candidate=self.master_candidate,
5648 offline=False, drained=False,
5651 if self.op.ndparams:
5652 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5654 if self.op.hv_state:
5655 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5657 if self.op.disk_state:
5658 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5660 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5661 # it a property on the base class.
5662 result = rpc.DnsOnlyRunner().call_version([node])[node]
5663 result.Raise("Can't get version information from node %s" % node)
5664 if constants.PROTOCOL_VERSION == result.payload:
5665 logging.info("Communication to node %s fine, sw version %s match",
5666 node, result.payload)
5668 raise errors.OpPrereqError("Version mismatch master version %s,"
5669 " node version %s" %
5670 (constants.PROTOCOL_VERSION, result.payload),
5671 errors.ECODE_ENVIRON)
5673 def Exec(self, feedback_fn):
5674 """Adds the new node to the cluster.
5677 new_node = self.new_node
5678 node = new_node.name
5680 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5683 # We adding a new node so we assume it's powered
5684 new_node.powered = True
5686 # for re-adds, reset the offline/drained/master-candidate flags;
5687 # we need to reset here, otherwise offline would prevent RPC calls
5688 # later in the procedure; this also means that if the re-add
5689 # fails, we are left with a non-offlined, broken node
5691 new_node.drained = new_node.offline = False # pylint: disable=W0201
5692 self.LogInfo("Readding a node, the offline/drained flags were reset")
5693 # if we demote the node, we do cleanup later in the procedure
5694 new_node.master_candidate = self.master_candidate
5695 if self.changed_primary_ip:
5696 new_node.primary_ip = self.op.primary_ip
5698 # copy the master/vm_capable flags
5699 for attr in self._NFLAGS:
5700 setattr(new_node, attr, getattr(self.op, attr))
5702 # notify the user about any possible mc promotion
5703 if new_node.master_candidate:
5704 self.LogInfo("Node will be a master candidate")
5706 if self.op.ndparams:
5707 new_node.ndparams = self.op.ndparams
5709 new_node.ndparams = {}
5711 if self.op.hv_state:
5712 new_node.hv_state_static = self.new_hv_state
5714 if self.op.disk_state:
5715 new_node.disk_state_static = self.new_disk_state
5717 # Add node to our /etc/hosts, and add key to known_hosts
5718 if self.cfg.GetClusterInfo().modify_etc_hosts:
5719 master_node = self.cfg.GetMasterNode()
5720 result = self.rpc.call_etc_hosts_modify(master_node,
5721 constants.ETC_HOSTS_ADD,
5724 result.Raise("Can't update hosts file with new host data")
5726 if new_node.secondary_ip != new_node.primary_ip:
5727 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5730 node_verify_list = [self.cfg.GetMasterNode()]
5731 node_verify_param = {
5732 constants.NV_NODELIST: ([node], {}),
5733 # TODO: do a node-net-test as well?
5736 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5737 self.cfg.GetClusterName())
5738 for verifier in node_verify_list:
5739 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5740 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5742 for failed in nl_payload:
5743 feedback_fn("ssh/hostname verification failed"
5744 " (checking from %s): %s" %
5745 (verifier, nl_payload[failed]))
5746 raise errors.OpExecError("ssh/hostname verification failed")
5749 _RedistributeAncillaryFiles(self)
5750 self.context.ReaddNode(new_node)
5751 # make sure we redistribute the config
5752 self.cfg.Update(new_node, feedback_fn)
5753 # and make sure the new node will not have old files around
5754 if not new_node.master_candidate:
5755 result = self.rpc.call_node_demote_from_mc(new_node.name)
5756 msg = result.fail_msg
5758 self.LogWarning("Node failed to demote itself from master"
5759 " candidate status: %s" % msg)
5761 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5762 additional_vm=self.op.vm_capable)
5763 self.context.AddNode(new_node, self.proc.GetECId())
5766 class LUNodeSetParams(LogicalUnit):
5767 """Modifies the parameters of a node.
5769 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5770 to the node role (as _ROLE_*)
5771 @cvar _R2F: a dictionary from node role to tuples of flags
5772 @cvar _FLAGS: a list of attribute names corresponding to the flags
5775 HPATH = "node-modify"
5776 HTYPE = constants.HTYPE_NODE
5778 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5780 (True, False, False): _ROLE_CANDIDATE,
5781 (False, True, False): _ROLE_DRAINED,
5782 (False, False, True): _ROLE_OFFLINE,
5783 (False, False, False): _ROLE_REGULAR,
5785 _R2F = dict((v, k) for k, v in _F2R.items())
5786 _FLAGS = ["master_candidate", "drained", "offline"]
5788 def CheckArguments(self):
5789 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5790 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5791 self.op.master_capable, self.op.vm_capable,
5792 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5794 if all_mods.count(None) == len(all_mods):
5795 raise errors.OpPrereqError("Please pass at least one modification",
5797 if all_mods.count(True) > 1:
5798 raise errors.OpPrereqError("Can't set the node into more than one"
5799 " state at the same time",
5802 # Boolean value that tells us whether we might be demoting from MC
5803 self.might_demote = (self.op.master_candidate == False or
5804 self.op.offline == True or
5805 self.op.drained == True or
5806 self.op.master_capable == False)
5808 if self.op.secondary_ip:
5809 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5810 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5811 " address" % self.op.secondary_ip,
5814 self.lock_all = self.op.auto_promote and self.might_demote
5815 self.lock_instances = self.op.secondary_ip is not None
5817 def _InstanceFilter(self, instance):
5818 """Filter for getting affected instances.
5821 return (instance.disk_template in constants.DTS_INT_MIRROR and
5822 self.op.node_name in instance.all_nodes)
5824 def ExpandNames(self):
5826 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5828 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5830 # Since modifying a node can have severe effects on currently running
5831 # operations the resource lock is at least acquired in shared mode
5832 self.needed_locks[locking.LEVEL_NODE_RES] = \
5833 self.needed_locks[locking.LEVEL_NODE]
5835 # Get node resource and instance locks in shared mode; they are not used
5836 # for anything but read-only access
5837 self.share_locks[locking.LEVEL_NODE_RES] = 1
5838 self.share_locks[locking.LEVEL_INSTANCE] = 1
5840 if self.lock_instances:
5841 self.needed_locks[locking.LEVEL_INSTANCE] = \
5842 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5844 def BuildHooksEnv(self):
5847 This runs on the master node.
5851 "OP_TARGET": self.op.node_name,
5852 "MASTER_CANDIDATE": str(self.op.master_candidate),
5853 "OFFLINE": str(self.op.offline),
5854 "DRAINED": str(self.op.drained),
5855 "MASTER_CAPABLE": str(self.op.master_capable),
5856 "VM_CAPABLE": str(self.op.vm_capable),
5859 def BuildHooksNodes(self):
5860 """Build hooks nodes.
5863 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5866 def CheckPrereq(self):
5867 """Check prerequisites.
5869 This only checks the instance list against the existing names.
5872 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5874 if self.lock_instances:
5875 affected_instances = \
5876 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5878 # Verify instance locks
5879 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5880 wanted_instances = frozenset(affected_instances.keys())
5881 if wanted_instances - owned_instances:
5882 raise errors.OpPrereqError("Instances affected by changing node %s's"
5883 " secondary IP address have changed since"
5884 " locks were acquired, wanted '%s', have"
5885 " '%s'; retry the operation" %
5887 utils.CommaJoin(wanted_instances),
5888 utils.CommaJoin(owned_instances)),
5891 affected_instances = None
5893 if (self.op.master_candidate is not None or
5894 self.op.drained is not None or
5895 self.op.offline is not None):
5896 # we can't change the master's node flags
5897 if self.op.node_name == self.cfg.GetMasterNode():
5898 raise errors.OpPrereqError("The master role can be changed"
5899 " only via master-failover",
5902 if self.op.master_candidate and not node.master_capable:
5903 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5904 " it a master candidate" % node.name,
5907 if self.op.vm_capable == False:
5908 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5910 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5911 " the vm_capable flag" % node.name,
5914 if node.master_candidate and self.might_demote and not self.lock_all:
5915 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5916 # check if after removing the current node, we're missing master
5918 (mc_remaining, mc_should, _) = \
5919 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5920 if mc_remaining < mc_should:
5921 raise errors.OpPrereqError("Not enough master candidates, please"
5922 " pass auto promote option to allow"
5923 " promotion", errors.ECODE_STATE)
5925 self.old_flags = old_flags = (node.master_candidate,
5926 node.drained, node.offline)
5927 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5928 self.old_role = old_role = self._F2R[old_flags]
5930 # Check for ineffective changes
5931 for attr in self._FLAGS:
5932 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5933 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5934 setattr(self.op, attr, None)
5936 # Past this point, any flag change to False means a transition
5937 # away from the respective state, as only real changes are kept
5939 # TODO: We might query the real power state if it supports OOB
5940 if _SupportsOob(self.cfg, node):
5941 if self.op.offline is False and not (node.powered or
5942 self.op.powered == True):
5943 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5944 " offline status can be reset") %
5946 elif self.op.powered is not None:
5947 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5948 " as it does not support out-of-band"
5949 " handling") % self.op.node_name)
5951 # If we're being deofflined/drained, we'll MC ourself if needed
5952 if (self.op.drained == False or self.op.offline == False or
5953 (self.op.master_capable and not node.master_capable)):
5954 if _DecideSelfPromotion(self):
5955 self.op.master_candidate = True
5956 self.LogInfo("Auto-promoting node to master candidate")
5958 # If we're no longer master capable, we'll demote ourselves from MC
5959 if self.op.master_capable == False and node.master_candidate:
5960 self.LogInfo("Demoting from master candidate")
5961 self.op.master_candidate = False
5964 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5965 if self.op.master_candidate:
5966 new_role = self._ROLE_CANDIDATE
5967 elif self.op.drained:
5968 new_role = self._ROLE_DRAINED
5969 elif self.op.offline:
5970 new_role = self._ROLE_OFFLINE
5971 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5972 # False is still in new flags, which means we're un-setting (the
5974 new_role = self._ROLE_REGULAR
5975 else: # no new flags, nothing, keep old role
5978 self.new_role = new_role
5980 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5981 # Trying to transition out of offline status
5982 result = self.rpc.call_version([node.name])[node.name]
5984 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5985 " to report its version: %s" %
5986 (node.name, result.fail_msg),
5989 self.LogWarning("Transitioning node from offline to online state"
5990 " without using re-add. Please make sure the node"
5993 if self.op.secondary_ip:
5994 # Ok even without locking, because this can't be changed by any LU
5995 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5996 master_singlehomed = master.secondary_ip == master.primary_ip
5997 if master_singlehomed and self.op.secondary_ip:
5998 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5999 " homed cluster", errors.ECODE_INVAL)
6001 assert not (frozenset(affected_instances) -
6002 self.owned_locks(locking.LEVEL_INSTANCE))
6005 if affected_instances:
6006 raise errors.OpPrereqError("Cannot change secondary IP address:"
6007 " offline node has instances (%s)"
6008 " configured to use it" %
6009 utils.CommaJoin(affected_instances.keys()))
6011 # On online nodes, check that no instances are running, and that
6012 # the node has the new ip and we can reach it.
6013 for instance in affected_instances.values():
6014 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6015 msg="cannot change secondary ip")
6017 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6018 if master.name != node.name:
6019 # check reachability from master secondary ip to new secondary ip
6020 if not netutils.TcpPing(self.op.secondary_ip,
6021 constants.DEFAULT_NODED_PORT,
6022 source=master.secondary_ip):
6023 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6024 " based ping to node daemon port",
6025 errors.ECODE_ENVIRON)
6027 if self.op.ndparams:
6028 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6029 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6030 self.new_ndparams = new_ndparams
6032 if self.op.hv_state:
6033 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6034 self.node.hv_state_static)
6036 if self.op.disk_state:
6037 self.new_disk_state = \
6038 _MergeAndVerifyDiskState(self.op.disk_state,
6039 self.node.disk_state_static)
6041 def Exec(self, feedback_fn):
6046 old_role = self.old_role
6047 new_role = self.new_role
6051 if self.op.ndparams:
6052 node.ndparams = self.new_ndparams
6054 if self.op.powered is not None:
6055 node.powered = self.op.powered
6057 if self.op.hv_state:
6058 node.hv_state_static = self.new_hv_state
6060 if self.op.disk_state:
6061 node.disk_state_static = self.new_disk_state
6063 for attr in ["master_capable", "vm_capable"]:
6064 val = getattr(self.op, attr)
6066 setattr(node, attr, val)
6067 result.append((attr, str(val)))
6069 if new_role != old_role:
6070 # Tell the node to demote itself, if no longer MC and not offline
6071 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6072 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6074 self.LogWarning("Node failed to demote itself: %s", msg)
6076 new_flags = self._R2F[new_role]
6077 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6079 result.append((desc, str(nf)))
6080 (node.master_candidate, node.drained, node.offline) = new_flags
6082 # we locked all nodes, we adjust the CP before updating this node
6084 _AdjustCandidatePool(self, [node.name])
6086 if self.op.secondary_ip:
6087 node.secondary_ip = self.op.secondary_ip
6088 result.append(("secondary_ip", self.op.secondary_ip))
6090 # this will trigger configuration file update, if needed
6091 self.cfg.Update(node, feedback_fn)
6093 # this will trigger job queue propagation or cleanup if the mc
6095 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6096 self.context.ReaddNode(node)
6101 class LUNodePowercycle(NoHooksLU):
6102 """Powercycles a node.
6107 def CheckArguments(self):
6108 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6109 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6110 raise errors.OpPrereqError("The node is the master and the force"
6111 " parameter was not set",
6114 def ExpandNames(self):
6115 """Locking for PowercycleNode.
6117 This is a last-resort option and shouldn't block on other
6118 jobs. Therefore, we grab no locks.
6121 self.needed_locks = {}
6123 def Exec(self, feedback_fn):
6127 result = self.rpc.call_node_powercycle(self.op.node_name,
6128 self.cfg.GetHypervisorType())
6129 result.Raise("Failed to schedule the reboot")
6130 return result.payload
6133 class LUClusterQuery(NoHooksLU):
6134 """Query cluster configuration.
6139 def ExpandNames(self):
6140 self.needed_locks = {}
6142 def Exec(self, feedback_fn):
6143 """Return cluster config.
6146 cluster = self.cfg.GetClusterInfo()
6149 # Filter just for enabled hypervisors
6150 for os_name, hv_dict in cluster.os_hvp.items():
6151 os_hvp[os_name] = {}
6152 for hv_name, hv_params in hv_dict.items():
6153 if hv_name in cluster.enabled_hypervisors:
6154 os_hvp[os_name][hv_name] = hv_params
6156 # Convert ip_family to ip_version
6157 primary_ip_version = constants.IP4_VERSION
6158 if cluster.primary_ip_family == netutils.IP6Address.family:
6159 primary_ip_version = constants.IP6_VERSION
6162 "software_version": constants.RELEASE_VERSION,
6163 "protocol_version": constants.PROTOCOL_VERSION,
6164 "config_version": constants.CONFIG_VERSION,
6165 "os_api_version": max(constants.OS_API_VERSIONS),
6166 "export_version": constants.EXPORT_VERSION,
6167 "architecture": runtime.GetArchInfo(),
6168 "name": cluster.cluster_name,
6169 "master": cluster.master_node,
6170 "default_hypervisor": cluster.primary_hypervisor,
6171 "enabled_hypervisors": cluster.enabled_hypervisors,
6172 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6173 for hypervisor_name in cluster.enabled_hypervisors]),
6175 "beparams": cluster.beparams,
6176 "osparams": cluster.osparams,
6177 "ipolicy": cluster.ipolicy,
6178 "nicparams": cluster.nicparams,
6179 "ndparams": cluster.ndparams,
6180 "diskparams": cluster.diskparams,
6181 "candidate_pool_size": cluster.candidate_pool_size,
6182 "master_netdev": cluster.master_netdev,
6183 "master_netmask": cluster.master_netmask,
6184 "use_external_mip_script": cluster.use_external_mip_script,
6185 "volume_group_name": cluster.volume_group_name,
6186 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6187 "file_storage_dir": cluster.file_storage_dir,
6188 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6189 "maintain_node_health": cluster.maintain_node_health,
6190 "ctime": cluster.ctime,
6191 "mtime": cluster.mtime,
6192 "uuid": cluster.uuid,
6193 "tags": list(cluster.GetTags()),
6194 "uid_pool": cluster.uid_pool,
6195 "default_iallocator": cluster.default_iallocator,
6196 "reserved_lvs": cluster.reserved_lvs,
6197 "primary_ip_version": primary_ip_version,
6198 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6199 "hidden_os": cluster.hidden_os,
6200 "blacklisted_os": cluster.blacklisted_os,
6206 class LUClusterConfigQuery(NoHooksLU):
6207 """Return configuration values.
6212 def CheckArguments(self):
6213 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6215 def ExpandNames(self):
6216 self.cq.ExpandNames(self)
6218 def DeclareLocks(self, level):
6219 self.cq.DeclareLocks(self, level)
6221 def Exec(self, feedback_fn):
6222 result = self.cq.OldStyleQuery(self)
6224 assert len(result) == 1
6229 class _ClusterQuery(_QueryBase):
6230 FIELDS = query.CLUSTER_FIELDS
6232 #: Do not sort (there is only one item)
6235 def ExpandNames(self, lu):
6236 lu.needed_locks = {}
6238 # The following variables interact with _QueryBase._GetNames
6239 self.wanted = locking.ALL_SET
6240 self.do_locking = self.use_locking
6243 raise errors.OpPrereqError("Can not use locking for cluster queries",
6246 def DeclareLocks(self, lu, level):
6249 def _GetQueryData(self, lu):
6250 """Computes the list of nodes and their attributes.
6253 # Locking is not used
6254 assert not (compat.any(lu.glm.is_owned(level)
6255 for level in locking.LEVELS
6256 if level != locking.LEVEL_CLUSTER) or
6257 self.do_locking or self.use_locking)
6259 if query.CQ_CONFIG in self.requested_data:
6260 cluster = lu.cfg.GetClusterInfo()
6262 cluster = NotImplemented
6264 if query.CQ_QUEUE_DRAINED in self.requested_data:
6265 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6267 drain_flag = NotImplemented
6269 if query.CQ_WATCHER_PAUSE in self.requested_data:
6270 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6272 watcher_pause = NotImplemented
6274 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6277 class LUInstanceActivateDisks(NoHooksLU):
6278 """Bring up an instance's disks.
6283 def ExpandNames(self):
6284 self._ExpandAndLockInstance()
6285 self.needed_locks[locking.LEVEL_NODE] = []
6286 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6288 def DeclareLocks(self, level):
6289 if level == locking.LEVEL_NODE:
6290 self._LockInstancesNodes()
6292 def CheckPrereq(self):
6293 """Check prerequisites.
6295 This checks that the instance is in the cluster.
6298 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6299 assert self.instance is not None, \
6300 "Cannot retrieve locked instance %s" % self.op.instance_name
6301 _CheckNodeOnline(self, self.instance.primary_node)
6303 def Exec(self, feedback_fn):
6304 """Activate the disks.
6307 disks_ok, disks_info = \
6308 _AssembleInstanceDisks(self, self.instance,
6309 ignore_size=self.op.ignore_size)
6311 raise errors.OpExecError("Cannot activate block devices")
6316 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6318 """Prepare the block devices for an instance.
6320 This sets up the block devices on all nodes.
6322 @type lu: L{LogicalUnit}
6323 @param lu: the logical unit on whose behalf we execute
6324 @type instance: L{objects.Instance}
6325 @param instance: the instance for whose disks we assemble
6326 @type disks: list of L{objects.Disk} or None
6327 @param disks: which disks to assemble (or all, if None)
6328 @type ignore_secondaries: boolean
6329 @param ignore_secondaries: if true, errors on secondary nodes
6330 won't result in an error return from the function
6331 @type ignore_size: boolean
6332 @param ignore_size: if true, the current known size of the disk
6333 will not be used during the disk activation, useful for cases
6334 when the size is wrong
6335 @return: False if the operation failed, otherwise a list of
6336 (host, instance_visible_name, node_visible_name)
6337 with the mapping from node devices to instance devices
6342 iname = instance.name
6343 disks = _ExpandCheckDisks(instance, disks)
6345 # With the two passes mechanism we try to reduce the window of
6346 # opportunity for the race condition of switching DRBD to primary
6347 # before handshaking occured, but we do not eliminate it
6349 # The proper fix would be to wait (with some limits) until the
6350 # connection has been made and drbd transitions from WFConnection
6351 # into any other network-connected state (Connected, SyncTarget,
6354 # 1st pass, assemble on all nodes in secondary mode
6355 for idx, inst_disk in enumerate(disks):
6356 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6358 node_disk = node_disk.Copy()
6359 node_disk.UnsetSize()
6360 lu.cfg.SetDiskID(node_disk, node)
6361 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6363 msg = result.fail_msg
6365 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6366 " (is_primary=False, pass=1): %s",
6367 inst_disk.iv_name, node, msg)
6368 if not ignore_secondaries:
6371 # FIXME: race condition on drbd migration to primary
6373 # 2nd pass, do only the primary node
6374 for idx, inst_disk in enumerate(disks):
6377 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6378 if node != instance.primary_node:
6381 node_disk = node_disk.Copy()
6382 node_disk.UnsetSize()
6383 lu.cfg.SetDiskID(node_disk, node)
6384 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6386 msg = result.fail_msg
6388 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6389 " (is_primary=True, pass=2): %s",
6390 inst_disk.iv_name, node, msg)
6393 dev_path = result.payload
6395 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6397 # leave the disks configured for the primary node
6398 # this is a workaround that would be fixed better by
6399 # improving the logical/physical id handling
6401 lu.cfg.SetDiskID(disk, instance.primary_node)
6403 return disks_ok, device_info
6406 def _StartInstanceDisks(lu, instance, force):
6407 """Start the disks of an instance.
6410 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6411 ignore_secondaries=force)
6413 _ShutdownInstanceDisks(lu, instance)
6414 if force is not None and not force:
6415 lu.proc.LogWarning("", hint="If the message above refers to a"
6417 " you can retry the operation using '--force'.")
6418 raise errors.OpExecError("Disk consistency error")
6421 class LUInstanceDeactivateDisks(NoHooksLU):
6422 """Shutdown an instance's disks.
6427 def ExpandNames(self):
6428 self._ExpandAndLockInstance()
6429 self.needed_locks[locking.LEVEL_NODE] = []
6430 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6432 def DeclareLocks(self, level):
6433 if level == locking.LEVEL_NODE:
6434 self._LockInstancesNodes()
6436 def CheckPrereq(self):
6437 """Check prerequisites.
6439 This checks that the instance is in the cluster.
6442 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6443 assert self.instance is not None, \
6444 "Cannot retrieve locked instance %s" % self.op.instance_name
6446 def Exec(self, feedback_fn):
6447 """Deactivate the disks
6450 instance = self.instance
6452 _ShutdownInstanceDisks(self, instance)
6454 _SafeShutdownInstanceDisks(self, instance)
6457 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6458 """Shutdown block devices of an instance.
6460 This function checks if an instance is running, before calling
6461 _ShutdownInstanceDisks.
6464 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6465 _ShutdownInstanceDisks(lu, instance, disks=disks)
6468 def _ExpandCheckDisks(instance, disks):
6469 """Return the instance disks selected by the disks list
6471 @type disks: list of L{objects.Disk} or None
6472 @param disks: selected disks
6473 @rtype: list of L{objects.Disk}
6474 @return: selected instance disks to act on
6478 return instance.disks
6480 if not set(disks).issubset(instance.disks):
6481 raise errors.ProgrammerError("Can only act on disks belonging to the"
6486 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6487 """Shutdown block devices of an instance.
6489 This does the shutdown on all nodes of the instance.
6491 If the ignore_primary is false, errors on the primary node are
6496 disks = _ExpandCheckDisks(instance, disks)
6499 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6500 lu.cfg.SetDiskID(top_disk, node)
6501 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6502 msg = result.fail_msg
6504 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6505 disk.iv_name, node, msg)
6506 if ((node == instance.primary_node and not ignore_primary) or
6507 (node != instance.primary_node and not result.offline)):
6512 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6513 """Checks if a node has enough free memory.
6515 This function check if a given node has the needed amount of free
6516 memory. In case the node has less memory or we cannot get the
6517 information from the node, this function raise an OpPrereqError
6520 @type lu: C{LogicalUnit}
6521 @param lu: a logical unit from which we get configuration data
6523 @param node: the node to check
6524 @type reason: C{str}
6525 @param reason: string to use in the error message
6526 @type requested: C{int}
6527 @param requested: the amount of memory in MiB to check for
6528 @type hypervisor_name: C{str}
6529 @param hypervisor_name: the hypervisor to ask for memory stats
6531 @return: node current free memory
6532 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6533 we cannot check the node
6536 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6537 nodeinfo[node].Raise("Can't get data from node %s" % node,
6538 prereq=True, ecode=errors.ECODE_ENVIRON)
6539 (_, _, (hv_info, )) = nodeinfo[node].payload
6541 free_mem = hv_info.get("memory_free", None)
6542 if not isinstance(free_mem, int):
6543 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6544 " was '%s'" % (node, free_mem),
6545 errors.ECODE_ENVIRON)
6546 if requested > free_mem:
6547 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6548 " needed %s MiB, available %s MiB" %
6549 (node, reason, requested, free_mem),
6554 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6555 """Checks if nodes have enough free disk space in the all VGs.
6557 This function check if all given nodes have the needed amount of
6558 free disk. In case any node has less disk or we cannot get the
6559 information from the node, this function raise an OpPrereqError
6562 @type lu: C{LogicalUnit}
6563 @param lu: a logical unit from which we get configuration data
6564 @type nodenames: C{list}
6565 @param nodenames: the list of node names to check
6566 @type req_sizes: C{dict}
6567 @param req_sizes: the hash of vg and corresponding amount of disk in
6569 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6570 or we cannot check the node
6573 for vg, req_size in req_sizes.items():
6574 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6577 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6578 """Checks if nodes have enough free disk space in the specified VG.
6580 This function check if all given nodes have the needed amount of
6581 free disk. In case any node has less disk or we cannot get the
6582 information from the node, this function raise an OpPrereqError
6585 @type lu: C{LogicalUnit}
6586 @param lu: a logical unit from which we get configuration data
6587 @type nodenames: C{list}
6588 @param nodenames: the list of node names to check
6590 @param vg: the volume group to check
6591 @type requested: C{int}
6592 @param requested: the amount of disk in MiB to check for
6593 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6594 or we cannot check the node
6597 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6598 for node in nodenames:
6599 info = nodeinfo[node]
6600 info.Raise("Cannot get current information from node %s" % node,
6601 prereq=True, ecode=errors.ECODE_ENVIRON)
6602 (_, (vg_info, ), _) = info.payload
6603 vg_free = vg_info.get("vg_free", None)
6604 if not isinstance(vg_free, int):
6605 raise errors.OpPrereqError("Can't compute free disk space on node"
6606 " %s for vg %s, result was '%s'" %
6607 (node, vg, vg_free), errors.ECODE_ENVIRON)
6608 if requested > vg_free:
6609 raise errors.OpPrereqError("Not enough disk space on target node %s"
6610 " vg %s: required %d MiB, available %d MiB" %
6611 (node, vg, requested, vg_free),
6615 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6616 """Checks if nodes have enough physical CPUs
6618 This function checks if all given nodes have the needed number of
6619 physical CPUs. In case any node has less CPUs or we cannot get the
6620 information from the node, this function raises an OpPrereqError
6623 @type lu: C{LogicalUnit}
6624 @param lu: a logical unit from which we get configuration data
6625 @type nodenames: C{list}
6626 @param nodenames: the list of node names to check
6627 @type requested: C{int}
6628 @param requested: the minimum acceptable number of physical CPUs
6629 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6630 or we cannot check the node
6633 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6634 for node in nodenames:
6635 info = nodeinfo[node]
6636 info.Raise("Cannot get current information from node %s" % node,
6637 prereq=True, ecode=errors.ECODE_ENVIRON)
6638 (_, _, (hv_info, )) = info.payload
6639 num_cpus = hv_info.get("cpu_total", None)
6640 if not isinstance(num_cpus, int):
6641 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6642 " on node %s, result was '%s'" %
6643 (node, num_cpus), errors.ECODE_ENVIRON)
6644 if requested > num_cpus:
6645 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6646 "required" % (node, num_cpus, requested),
6650 class LUInstanceStartup(LogicalUnit):
6651 """Starts an instance.
6654 HPATH = "instance-start"
6655 HTYPE = constants.HTYPE_INSTANCE
6658 def CheckArguments(self):
6660 if self.op.beparams:
6661 # fill the beparams dict
6662 objects.UpgradeBeParams(self.op.beparams)
6663 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6665 def ExpandNames(self):
6666 self._ExpandAndLockInstance()
6667 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6669 def DeclareLocks(self, level):
6670 if level == locking.LEVEL_NODE_RES:
6671 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6673 def BuildHooksEnv(self):
6676 This runs on master, primary and secondary nodes of the instance.
6680 "FORCE": self.op.force,
6683 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6687 def BuildHooksNodes(self):
6688 """Build hooks nodes.
6691 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6694 def CheckPrereq(self):
6695 """Check prerequisites.
6697 This checks that the instance is in the cluster.
6700 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6701 assert self.instance is not None, \
6702 "Cannot retrieve locked instance %s" % self.op.instance_name
6705 if self.op.hvparams:
6706 # check hypervisor parameter syntax (locally)
6707 cluster = self.cfg.GetClusterInfo()
6708 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6709 filled_hvp = cluster.FillHV(instance)
6710 filled_hvp.update(self.op.hvparams)
6711 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6712 hv_type.CheckParameterSyntax(filled_hvp)
6713 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6715 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6717 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6719 if self.primary_offline and self.op.ignore_offline_nodes:
6720 self.proc.LogWarning("Ignoring offline primary node")
6722 if self.op.hvparams or self.op.beparams:
6723 self.proc.LogWarning("Overridden parameters are ignored")
6725 _CheckNodeOnline(self, instance.primary_node)
6727 bep = self.cfg.GetClusterInfo().FillBE(instance)
6728 bep.update(self.op.beparams)
6730 # check bridges existence
6731 _CheckInstanceBridgesExist(self, instance)
6733 remote_info = self.rpc.call_instance_info(instance.primary_node,
6735 instance.hypervisor)
6736 remote_info.Raise("Error checking node %s" % instance.primary_node,
6737 prereq=True, ecode=errors.ECODE_ENVIRON)
6738 if not remote_info.payload: # not running already
6739 _CheckNodeFreeMemory(self, instance.primary_node,
6740 "starting instance %s" % instance.name,
6741 bep[constants.BE_MINMEM], instance.hypervisor)
6743 def Exec(self, feedback_fn):
6744 """Start the instance.
6747 instance = self.instance
6748 force = self.op.force
6750 if not self.op.no_remember:
6751 self.cfg.MarkInstanceUp(instance.name)
6753 if self.primary_offline:
6754 assert self.op.ignore_offline_nodes
6755 self.proc.LogInfo("Primary node offline, marked instance as started")
6757 node_current = instance.primary_node
6759 _StartInstanceDisks(self, instance, force)
6762 self.rpc.call_instance_start(node_current,
6763 (instance, self.op.hvparams,
6765 self.op.startup_paused)
6766 msg = result.fail_msg
6768 _ShutdownInstanceDisks(self, instance)
6769 raise errors.OpExecError("Could not start instance: %s" % msg)
6772 class LUInstanceReboot(LogicalUnit):
6773 """Reboot an instance.
6776 HPATH = "instance-reboot"
6777 HTYPE = constants.HTYPE_INSTANCE
6780 def ExpandNames(self):
6781 self._ExpandAndLockInstance()
6783 def BuildHooksEnv(self):
6786 This runs on master, primary and secondary nodes of the instance.
6790 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6791 "REBOOT_TYPE": self.op.reboot_type,
6792 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6795 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6799 def BuildHooksNodes(self):
6800 """Build hooks nodes.
6803 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6806 def CheckPrereq(self):
6807 """Check prerequisites.
6809 This checks that the instance is in the cluster.
6812 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6813 assert self.instance is not None, \
6814 "Cannot retrieve locked instance %s" % self.op.instance_name
6815 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6816 _CheckNodeOnline(self, instance.primary_node)
6818 # check bridges existence
6819 _CheckInstanceBridgesExist(self, instance)
6821 def Exec(self, feedback_fn):
6822 """Reboot the instance.
6825 instance = self.instance
6826 ignore_secondaries = self.op.ignore_secondaries
6827 reboot_type = self.op.reboot_type
6829 remote_info = self.rpc.call_instance_info(instance.primary_node,
6831 instance.hypervisor)
6832 remote_info.Raise("Error checking node %s" % instance.primary_node)
6833 instance_running = bool(remote_info.payload)
6835 node_current = instance.primary_node
6837 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6838 constants.INSTANCE_REBOOT_HARD]:
6839 for disk in instance.disks:
6840 self.cfg.SetDiskID(disk, node_current)
6841 result = self.rpc.call_instance_reboot(node_current, instance,
6843 self.op.shutdown_timeout)
6844 result.Raise("Could not reboot instance")
6846 if instance_running:
6847 result = self.rpc.call_instance_shutdown(node_current, instance,
6848 self.op.shutdown_timeout)
6849 result.Raise("Could not shutdown instance for full reboot")
6850 _ShutdownInstanceDisks(self, instance)
6852 self.LogInfo("Instance %s was already stopped, starting now",
6854 _StartInstanceDisks(self, instance, ignore_secondaries)
6855 result = self.rpc.call_instance_start(node_current,
6856 (instance, None, None), False)
6857 msg = result.fail_msg
6859 _ShutdownInstanceDisks(self, instance)
6860 raise errors.OpExecError("Could not start instance for"
6861 " full reboot: %s" % msg)
6863 self.cfg.MarkInstanceUp(instance.name)
6866 class LUInstanceShutdown(LogicalUnit):
6867 """Shutdown an instance.
6870 HPATH = "instance-stop"
6871 HTYPE = constants.HTYPE_INSTANCE
6874 def ExpandNames(self):
6875 self._ExpandAndLockInstance()
6877 def BuildHooksEnv(self):
6880 This runs on master, primary and secondary nodes of the instance.
6883 env = _BuildInstanceHookEnvByObject(self, self.instance)
6884 env["TIMEOUT"] = self.op.timeout
6887 def BuildHooksNodes(self):
6888 """Build hooks nodes.
6891 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6894 def CheckPrereq(self):
6895 """Check prerequisites.
6897 This checks that the instance is in the cluster.
6900 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6901 assert self.instance is not None, \
6902 "Cannot retrieve locked instance %s" % self.op.instance_name
6904 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6906 self.primary_offline = \
6907 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6909 if self.primary_offline and self.op.ignore_offline_nodes:
6910 self.proc.LogWarning("Ignoring offline primary node")
6912 _CheckNodeOnline(self, self.instance.primary_node)
6914 def Exec(self, feedback_fn):
6915 """Shutdown the instance.
6918 instance = self.instance
6919 node_current = instance.primary_node
6920 timeout = self.op.timeout
6922 if not self.op.no_remember:
6923 self.cfg.MarkInstanceDown(instance.name)
6925 if self.primary_offline:
6926 assert self.op.ignore_offline_nodes
6927 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6929 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6930 msg = result.fail_msg
6932 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6934 _ShutdownInstanceDisks(self, instance)
6937 class LUInstanceReinstall(LogicalUnit):
6938 """Reinstall an instance.
6941 HPATH = "instance-reinstall"
6942 HTYPE = constants.HTYPE_INSTANCE
6945 def ExpandNames(self):
6946 self._ExpandAndLockInstance()
6948 def BuildHooksEnv(self):
6951 This runs on master, primary and secondary nodes of the instance.
6954 return _BuildInstanceHookEnvByObject(self, self.instance)
6956 def BuildHooksNodes(self):
6957 """Build hooks nodes.
6960 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6963 def CheckPrereq(self):
6964 """Check prerequisites.
6966 This checks that the instance is in the cluster and is not running.
6969 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6970 assert instance is not None, \
6971 "Cannot retrieve locked instance %s" % self.op.instance_name
6972 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6973 " offline, cannot reinstall")
6974 for node in instance.secondary_nodes:
6975 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6976 " cannot reinstall")
6978 if instance.disk_template == constants.DT_DISKLESS:
6979 raise errors.OpPrereqError("Instance '%s' has no disks" %
6980 self.op.instance_name,
6982 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6984 if self.op.os_type is not None:
6986 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6987 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6988 instance_os = self.op.os_type
6990 instance_os = instance.os
6992 nodelist = list(instance.all_nodes)
6994 if self.op.osparams:
6995 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6996 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6997 self.os_inst = i_osdict # the new dict (without defaults)
7001 self.instance = instance
7003 def Exec(self, feedback_fn):
7004 """Reinstall the instance.
7007 inst = self.instance
7009 if self.op.os_type is not None:
7010 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7011 inst.os = self.op.os_type
7012 # Write to configuration
7013 self.cfg.Update(inst, feedback_fn)
7015 _StartInstanceDisks(self, inst, None)
7017 feedback_fn("Running the instance OS create scripts...")
7018 # FIXME: pass debug option from opcode to backend
7019 result = self.rpc.call_instance_os_add(inst.primary_node,
7020 (inst, self.os_inst), True,
7021 self.op.debug_level)
7022 result.Raise("Could not install OS for instance %s on node %s" %
7023 (inst.name, inst.primary_node))
7025 _ShutdownInstanceDisks(self, inst)
7028 class LUInstanceRecreateDisks(LogicalUnit):
7029 """Recreate an instance's missing disks.
7032 HPATH = "instance-recreate-disks"
7033 HTYPE = constants.HTYPE_INSTANCE
7036 _MODIFYABLE = frozenset([
7037 constants.IDISK_SIZE,
7038 constants.IDISK_MODE,
7041 # New or changed disk parameters may have different semantics
7042 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7043 constants.IDISK_ADOPT,
7045 # TODO: Implement support changing VG while recreating
7047 constants.IDISK_METAVG,
7050 def CheckArguments(self):
7051 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7052 # Normalize and convert deprecated list of disk indices
7053 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7055 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7057 raise errors.OpPrereqError("Some disks have been specified more than"
7058 " once: %s" % utils.CommaJoin(duplicates),
7061 for (idx, params) in self.op.disks:
7062 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7063 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7065 raise errors.OpPrereqError("Parameters for disk %s try to change"
7066 " unmodifyable parameter(s): %s" %
7067 (idx, utils.CommaJoin(unsupported)),
7070 def ExpandNames(self):
7071 self._ExpandAndLockInstance()
7072 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7074 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7075 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7077 self.needed_locks[locking.LEVEL_NODE] = []
7078 self.needed_locks[locking.LEVEL_NODE_RES] = []
7080 def DeclareLocks(self, level):
7081 if level == locking.LEVEL_NODE:
7082 # if we replace the nodes, we only need to lock the old primary,
7083 # otherwise we need to lock all nodes for disk re-creation
7084 primary_only = bool(self.op.nodes)
7085 self._LockInstancesNodes(primary_only=primary_only)
7086 elif level == locking.LEVEL_NODE_RES:
7088 self.needed_locks[locking.LEVEL_NODE_RES] = \
7089 self.needed_locks[locking.LEVEL_NODE][:]
7091 def BuildHooksEnv(self):
7094 This runs on master, primary and secondary nodes of the instance.
7097 return _BuildInstanceHookEnvByObject(self, self.instance)
7099 def BuildHooksNodes(self):
7100 """Build hooks nodes.
7103 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7106 def CheckPrereq(self):
7107 """Check prerequisites.
7109 This checks that the instance is in the cluster and is not running.
7112 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7113 assert instance is not None, \
7114 "Cannot retrieve locked instance %s" % self.op.instance_name
7116 if len(self.op.nodes) != len(instance.all_nodes):
7117 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7118 " %d replacement nodes were specified" %
7119 (instance.name, len(instance.all_nodes),
7120 len(self.op.nodes)),
7122 assert instance.disk_template != constants.DT_DRBD8 or \
7123 len(self.op.nodes) == 2
7124 assert instance.disk_template != constants.DT_PLAIN or \
7125 len(self.op.nodes) == 1
7126 primary_node = self.op.nodes[0]
7128 primary_node = instance.primary_node
7129 _CheckNodeOnline(self, primary_node)
7131 if instance.disk_template == constants.DT_DISKLESS:
7132 raise errors.OpPrereqError("Instance '%s' has no disks" %
7133 self.op.instance_name, errors.ECODE_INVAL)
7135 # if we replace nodes *and* the old primary is offline, we don't
7137 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7138 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7139 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7140 if not (self.op.nodes and old_pnode.offline):
7141 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7142 msg="cannot recreate disks")
7145 self.disks = dict(self.op.disks)
7147 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7149 maxidx = max(self.disks.keys())
7150 if maxidx >= len(instance.disks):
7151 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7154 if (self.op.nodes and
7155 sorted(self.disks.keys()) != range(len(instance.disks))):
7156 raise errors.OpPrereqError("Can't recreate disks partially and"
7157 " change the nodes at the same time",
7160 self.instance = instance
7162 def Exec(self, feedback_fn):
7163 """Recreate the disks.
7166 instance = self.instance
7168 assert (self.owned_locks(locking.LEVEL_NODE) ==
7169 self.owned_locks(locking.LEVEL_NODE_RES))
7172 mods = [] # keeps track of needed changes
7174 for idx, disk in enumerate(instance.disks):
7176 changes = self.disks[idx]
7178 # Disk should not be recreated
7182 # update secondaries for disks, if needed
7183 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7184 # need to update the nodes and minors
7185 assert len(self.op.nodes) == 2
7186 assert len(disk.logical_id) == 6 # otherwise disk internals
7188 (_, _, old_port, _, _, old_secret) = disk.logical_id
7189 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7190 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7191 new_minors[0], new_minors[1], old_secret)
7192 assert len(disk.logical_id) == len(new_id)
7196 mods.append((idx, new_id, changes))
7198 # now that we have passed all asserts above, we can apply the mods
7199 # in a single run (to avoid partial changes)
7200 for idx, new_id, changes in mods:
7201 disk = instance.disks[idx]
7202 if new_id is not None:
7203 assert disk.dev_type == constants.LD_DRBD8
7204 disk.logical_id = new_id
7206 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7207 mode=changes.get(constants.IDISK_MODE, None))
7209 # change primary node, if needed
7211 instance.primary_node = self.op.nodes[0]
7212 self.LogWarning("Changing the instance's nodes, you will have to"
7213 " remove any disks left on the older nodes manually")
7216 self.cfg.Update(instance, feedback_fn)
7218 _CreateDisks(self, instance, to_skip=to_skip)
7221 class LUInstanceRename(LogicalUnit):
7222 """Rename an instance.
7225 HPATH = "instance-rename"
7226 HTYPE = constants.HTYPE_INSTANCE
7228 def CheckArguments(self):
7232 if self.op.ip_check and not self.op.name_check:
7233 # TODO: make the ip check more flexible and not depend on the name check
7234 raise errors.OpPrereqError("IP address check requires a name check",
7237 def BuildHooksEnv(self):
7240 This runs on master, primary and secondary nodes of the instance.
7243 env = _BuildInstanceHookEnvByObject(self, self.instance)
7244 env["INSTANCE_NEW_NAME"] = self.op.new_name
7247 def BuildHooksNodes(self):
7248 """Build hooks nodes.
7251 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7254 def CheckPrereq(self):
7255 """Check prerequisites.
7257 This checks that the instance is in the cluster and is not running.
7260 self.op.instance_name = _ExpandInstanceName(self.cfg,
7261 self.op.instance_name)
7262 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7263 assert instance is not None
7264 _CheckNodeOnline(self, instance.primary_node)
7265 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7266 msg="cannot rename")
7267 self.instance = instance
7269 new_name = self.op.new_name
7270 if self.op.name_check:
7271 hostname = netutils.GetHostname(name=new_name)
7272 if hostname.name != new_name:
7273 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7275 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7276 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7277 " same as given hostname '%s'") %
7278 (hostname.name, self.op.new_name),
7280 new_name = self.op.new_name = hostname.name
7281 if (self.op.ip_check and
7282 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7283 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7284 (hostname.ip, new_name),
7285 errors.ECODE_NOTUNIQUE)
7287 instance_list = self.cfg.GetInstanceList()
7288 if new_name in instance_list and new_name != instance.name:
7289 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7290 new_name, errors.ECODE_EXISTS)
7292 def Exec(self, feedback_fn):
7293 """Rename the instance.
7296 inst = self.instance
7297 old_name = inst.name
7299 rename_file_storage = False
7300 if (inst.disk_template in constants.DTS_FILEBASED and
7301 self.op.new_name != inst.name):
7302 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7303 rename_file_storage = True
7305 self.cfg.RenameInstance(inst.name, self.op.new_name)
7306 # Change the instance lock. This is definitely safe while we hold the BGL.
7307 # Otherwise the new lock would have to be added in acquired mode.
7309 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7310 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7312 # re-read the instance from the configuration after rename
7313 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7315 if rename_file_storage:
7316 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7317 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7318 old_file_storage_dir,
7319 new_file_storage_dir)
7320 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7321 " (but the instance has been renamed in Ganeti)" %
7322 (inst.primary_node, old_file_storage_dir,
7323 new_file_storage_dir))
7325 _StartInstanceDisks(self, inst, None)
7327 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7328 old_name, self.op.debug_level)
7329 msg = result.fail_msg
7331 msg = ("Could not run OS rename script for instance %s on node %s"
7332 " (but the instance has been renamed in Ganeti): %s" %
7333 (inst.name, inst.primary_node, msg))
7334 self.proc.LogWarning(msg)
7336 _ShutdownInstanceDisks(self, inst)
7341 class LUInstanceRemove(LogicalUnit):
7342 """Remove an instance.
7345 HPATH = "instance-remove"
7346 HTYPE = constants.HTYPE_INSTANCE
7349 def ExpandNames(self):
7350 self._ExpandAndLockInstance()
7351 self.needed_locks[locking.LEVEL_NODE] = []
7352 self.needed_locks[locking.LEVEL_NODE_RES] = []
7353 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7355 def DeclareLocks(self, level):
7356 if level == locking.LEVEL_NODE:
7357 self._LockInstancesNodes()
7358 elif level == locking.LEVEL_NODE_RES:
7360 self.needed_locks[locking.LEVEL_NODE_RES] = \
7361 self.needed_locks[locking.LEVEL_NODE][:]
7363 def BuildHooksEnv(self):
7366 This runs on master, primary and secondary nodes of the instance.
7369 env = _BuildInstanceHookEnvByObject(self, self.instance)
7370 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7373 def BuildHooksNodes(self):
7374 """Build hooks nodes.
7377 nl = [self.cfg.GetMasterNode()]
7378 nl_post = list(self.instance.all_nodes) + nl
7379 return (nl, nl_post)
7381 def CheckPrereq(self):
7382 """Check prerequisites.
7384 This checks that the instance is in the cluster.
7387 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7388 assert self.instance is not None, \
7389 "Cannot retrieve locked instance %s" % self.op.instance_name
7391 def Exec(self, feedback_fn):
7392 """Remove the instance.
7395 instance = self.instance
7396 logging.info("Shutting down instance %s on node %s",
7397 instance.name, instance.primary_node)
7399 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7400 self.op.shutdown_timeout)
7401 msg = result.fail_msg
7403 if self.op.ignore_failures:
7404 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7406 raise errors.OpExecError("Could not shutdown instance %s on"
7408 (instance.name, instance.primary_node, msg))
7410 assert (self.owned_locks(locking.LEVEL_NODE) ==
7411 self.owned_locks(locking.LEVEL_NODE_RES))
7412 assert not (set(instance.all_nodes) -
7413 self.owned_locks(locking.LEVEL_NODE)), \
7414 "Not owning correct locks"
7416 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7419 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7420 """Utility function to remove an instance.
7423 logging.info("Removing block devices for instance %s", instance.name)
7425 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7426 if not ignore_failures:
7427 raise errors.OpExecError("Can't remove instance's disks")
7428 feedback_fn("Warning: can't remove instance's disks")
7430 logging.info("Removing instance %s out of cluster config", instance.name)
7432 lu.cfg.RemoveInstance(instance.name)
7434 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7435 "Instance lock removal conflict"
7437 # Remove lock for the instance
7438 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7441 class LUInstanceQuery(NoHooksLU):
7442 """Logical unit for querying instances.
7445 # pylint: disable=W0142
7448 def CheckArguments(self):
7449 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7450 self.op.output_fields, self.op.use_locking)
7452 def ExpandNames(self):
7453 self.iq.ExpandNames(self)
7455 def DeclareLocks(self, level):
7456 self.iq.DeclareLocks(self, level)
7458 def Exec(self, feedback_fn):
7459 return self.iq.OldStyleQuery(self)
7462 class LUInstanceFailover(LogicalUnit):
7463 """Failover an instance.
7466 HPATH = "instance-failover"
7467 HTYPE = constants.HTYPE_INSTANCE
7470 def CheckArguments(self):
7471 """Check the arguments.
7474 self.iallocator = getattr(self.op, "iallocator", None)
7475 self.target_node = getattr(self.op, "target_node", None)
7477 def ExpandNames(self):
7478 self._ExpandAndLockInstance()
7480 if self.op.target_node is not None:
7481 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7483 self.needed_locks[locking.LEVEL_NODE] = []
7484 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7486 self.needed_locks[locking.LEVEL_NODE_RES] = []
7487 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7489 ignore_consistency = self.op.ignore_consistency
7490 shutdown_timeout = self.op.shutdown_timeout
7491 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7494 ignore_consistency=ignore_consistency,
7495 shutdown_timeout=shutdown_timeout,
7496 ignore_ipolicy=self.op.ignore_ipolicy)
7497 self.tasklets = [self._migrater]
7499 def DeclareLocks(self, level):
7500 if level == locking.LEVEL_NODE:
7501 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7502 if instance.disk_template in constants.DTS_EXT_MIRROR:
7503 if self.op.target_node is None:
7504 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7506 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7507 self.op.target_node]
7508 del self.recalculate_locks[locking.LEVEL_NODE]
7510 self._LockInstancesNodes()
7511 elif level == locking.LEVEL_NODE_RES:
7513 self.needed_locks[locking.LEVEL_NODE_RES] = \
7514 self.needed_locks[locking.LEVEL_NODE][:]
7516 def BuildHooksEnv(self):
7519 This runs on master, primary and secondary nodes of the instance.
7522 instance = self._migrater.instance
7523 source_node = instance.primary_node
7524 target_node = self.op.target_node
7526 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7527 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7528 "OLD_PRIMARY": source_node,
7529 "NEW_PRIMARY": target_node,
7532 if instance.disk_template in constants.DTS_INT_MIRROR:
7533 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7534 env["NEW_SECONDARY"] = source_node
7536 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7538 env.update(_BuildInstanceHookEnvByObject(self, instance))
7542 def BuildHooksNodes(self):
7543 """Build hooks nodes.
7546 instance = self._migrater.instance
7547 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7548 return (nl, nl + [instance.primary_node])
7551 class LUInstanceMigrate(LogicalUnit):
7552 """Migrate an instance.
7554 This is migration without shutting down, compared to the failover,
7555 which is done with shutdown.
7558 HPATH = "instance-migrate"
7559 HTYPE = constants.HTYPE_INSTANCE
7562 def ExpandNames(self):
7563 self._ExpandAndLockInstance()
7565 if self.op.target_node is not None:
7566 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7568 self.needed_locks[locking.LEVEL_NODE] = []
7569 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7571 self.needed_locks[locking.LEVEL_NODE] = []
7572 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7575 TLMigrateInstance(self, self.op.instance_name,
7576 cleanup=self.op.cleanup,
7578 fallback=self.op.allow_failover,
7579 allow_runtime_changes=self.op.allow_runtime_changes,
7580 ignore_ipolicy=self.op.ignore_ipolicy)
7581 self.tasklets = [self._migrater]
7583 def DeclareLocks(self, level):
7584 if level == locking.LEVEL_NODE:
7585 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7586 if instance.disk_template in constants.DTS_EXT_MIRROR:
7587 if self.op.target_node is None:
7588 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7590 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7591 self.op.target_node]
7592 del self.recalculate_locks[locking.LEVEL_NODE]
7594 self._LockInstancesNodes()
7595 elif level == locking.LEVEL_NODE_RES:
7597 self.needed_locks[locking.LEVEL_NODE_RES] = \
7598 self.needed_locks[locking.LEVEL_NODE][:]
7600 def BuildHooksEnv(self):
7603 This runs on master, primary and secondary nodes of the instance.
7606 instance = self._migrater.instance
7607 source_node = instance.primary_node
7608 target_node = self.op.target_node
7609 env = _BuildInstanceHookEnvByObject(self, instance)
7611 "MIGRATE_LIVE": self._migrater.live,
7612 "MIGRATE_CLEANUP": self.op.cleanup,
7613 "OLD_PRIMARY": source_node,
7614 "NEW_PRIMARY": target_node,
7615 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7618 if instance.disk_template in constants.DTS_INT_MIRROR:
7619 env["OLD_SECONDARY"] = target_node
7620 env["NEW_SECONDARY"] = source_node
7622 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7626 def BuildHooksNodes(self):
7627 """Build hooks nodes.
7630 instance = self._migrater.instance
7631 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7632 return (nl, nl + [instance.primary_node])
7635 class LUInstanceMove(LogicalUnit):
7636 """Move an instance by data-copying.
7639 HPATH = "instance-move"
7640 HTYPE = constants.HTYPE_INSTANCE
7643 def ExpandNames(self):
7644 self._ExpandAndLockInstance()
7645 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7646 self.op.target_node = target_node
7647 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7648 self.needed_locks[locking.LEVEL_NODE_RES] = []
7649 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7651 def DeclareLocks(self, level):
7652 if level == locking.LEVEL_NODE:
7653 self._LockInstancesNodes(primary_only=True)
7654 elif level == locking.LEVEL_NODE_RES:
7656 self.needed_locks[locking.LEVEL_NODE_RES] = \
7657 self.needed_locks[locking.LEVEL_NODE][:]
7659 def BuildHooksEnv(self):
7662 This runs on master, primary and secondary nodes of the instance.
7666 "TARGET_NODE": self.op.target_node,
7667 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7669 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7672 def BuildHooksNodes(self):
7673 """Build hooks nodes.
7677 self.cfg.GetMasterNode(),
7678 self.instance.primary_node,
7679 self.op.target_node,
7683 def CheckPrereq(self):
7684 """Check prerequisites.
7686 This checks that the instance is in the cluster.
7689 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7690 assert self.instance is not None, \
7691 "Cannot retrieve locked instance %s" % self.op.instance_name
7693 node = self.cfg.GetNodeInfo(self.op.target_node)
7694 assert node is not None, \
7695 "Cannot retrieve locked node %s" % self.op.target_node
7697 self.target_node = target_node = node.name
7699 if target_node == instance.primary_node:
7700 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7701 (instance.name, target_node),
7704 bep = self.cfg.GetClusterInfo().FillBE(instance)
7706 for idx, dsk in enumerate(instance.disks):
7707 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7708 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7709 " cannot copy" % idx, errors.ECODE_STATE)
7711 _CheckNodeOnline(self, target_node)
7712 _CheckNodeNotDrained(self, target_node)
7713 _CheckNodeVmCapable(self, target_node)
7714 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7715 self.cfg.GetNodeGroup(node.group))
7716 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7717 ignore=self.op.ignore_ipolicy)
7719 if instance.admin_state == constants.ADMINST_UP:
7720 # check memory requirements on the secondary node
7721 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7722 instance.name, bep[constants.BE_MAXMEM],
7723 instance.hypervisor)
7725 self.LogInfo("Not checking memory on the secondary node as"
7726 " instance will not be started")
7728 # check bridge existance
7729 _CheckInstanceBridgesExist(self, instance, node=target_node)
7731 def Exec(self, feedback_fn):
7732 """Move an instance.
7734 The move is done by shutting it down on its present node, copying
7735 the data over (slow) and starting it on the new node.
7738 instance = self.instance
7740 source_node = instance.primary_node
7741 target_node = self.target_node
7743 self.LogInfo("Shutting down instance %s on source node %s",
7744 instance.name, source_node)
7746 assert (self.owned_locks(locking.LEVEL_NODE) ==
7747 self.owned_locks(locking.LEVEL_NODE_RES))
7749 result = self.rpc.call_instance_shutdown(source_node, instance,
7750 self.op.shutdown_timeout)
7751 msg = result.fail_msg
7753 if self.op.ignore_consistency:
7754 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7755 " Proceeding anyway. Please make sure node"
7756 " %s is down. Error details: %s",
7757 instance.name, source_node, source_node, msg)
7759 raise errors.OpExecError("Could not shutdown instance %s on"
7761 (instance.name, source_node, msg))
7763 # create the target disks
7765 _CreateDisks(self, instance, target_node=target_node)
7766 except errors.OpExecError:
7767 self.LogWarning("Device creation failed, reverting...")
7769 _RemoveDisks(self, instance, target_node=target_node)
7771 self.cfg.ReleaseDRBDMinors(instance.name)
7774 cluster_name = self.cfg.GetClusterInfo().cluster_name
7777 # activate, get path, copy the data over
7778 for idx, disk in enumerate(instance.disks):
7779 self.LogInfo("Copying data for disk %d", idx)
7780 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7781 instance.name, True, idx)
7783 self.LogWarning("Can't assemble newly created disk %d: %s",
7784 idx, result.fail_msg)
7785 errs.append(result.fail_msg)
7787 dev_path = result.payload
7788 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7789 target_node, dev_path,
7792 self.LogWarning("Can't copy data over for disk %d: %s",
7793 idx, result.fail_msg)
7794 errs.append(result.fail_msg)
7798 self.LogWarning("Some disks failed to copy, aborting")
7800 _RemoveDisks(self, instance, target_node=target_node)
7802 self.cfg.ReleaseDRBDMinors(instance.name)
7803 raise errors.OpExecError("Errors during disk copy: %s" %
7806 instance.primary_node = target_node
7807 self.cfg.Update(instance, feedback_fn)
7809 self.LogInfo("Removing the disks on the original node")
7810 _RemoveDisks(self, instance, target_node=source_node)
7812 # Only start the instance if it's marked as up
7813 if instance.admin_state == constants.ADMINST_UP:
7814 self.LogInfo("Starting instance %s on node %s",
7815 instance.name, target_node)
7817 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7818 ignore_secondaries=True)
7820 _ShutdownInstanceDisks(self, instance)
7821 raise errors.OpExecError("Can't activate the instance's disks")
7823 result = self.rpc.call_instance_start(target_node,
7824 (instance, None, None), False)
7825 msg = result.fail_msg
7827 _ShutdownInstanceDisks(self, instance)
7828 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7829 (instance.name, target_node, msg))
7832 class LUNodeMigrate(LogicalUnit):
7833 """Migrate all instances from a node.
7836 HPATH = "node-migrate"
7837 HTYPE = constants.HTYPE_NODE
7840 def CheckArguments(self):
7843 def ExpandNames(self):
7844 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7846 self.share_locks = _ShareAll()
7847 self.needed_locks = {
7848 locking.LEVEL_NODE: [self.op.node_name],
7851 def BuildHooksEnv(self):
7854 This runs on the master, the primary and all the secondaries.
7858 "NODE_NAME": self.op.node_name,
7859 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7862 def BuildHooksNodes(self):
7863 """Build hooks nodes.
7866 nl = [self.cfg.GetMasterNode()]
7869 def CheckPrereq(self):
7872 def Exec(self, feedback_fn):
7873 # Prepare jobs for migration instances
7874 allow_runtime_changes = self.op.allow_runtime_changes
7876 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7879 iallocator=self.op.iallocator,
7880 target_node=self.op.target_node,
7881 allow_runtime_changes=allow_runtime_changes,
7882 ignore_ipolicy=self.op.ignore_ipolicy)]
7883 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7886 # TODO: Run iallocator in this opcode and pass correct placement options to
7887 # OpInstanceMigrate. Since other jobs can modify the cluster between
7888 # running the iallocator and the actual migration, a good consistency model
7889 # will have to be found.
7891 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7892 frozenset([self.op.node_name]))
7894 return ResultWithJobs(jobs)
7897 class TLMigrateInstance(Tasklet):
7898 """Tasklet class for instance migration.
7901 @ivar live: whether the migration will be done live or non-live;
7902 this variable is initalized only after CheckPrereq has run
7903 @type cleanup: boolean
7904 @ivar cleanup: Wheater we cleanup from a failed migration
7905 @type iallocator: string
7906 @ivar iallocator: The iallocator used to determine target_node
7907 @type target_node: string
7908 @ivar target_node: If given, the target_node to reallocate the instance to
7909 @type failover: boolean
7910 @ivar failover: Whether operation results in failover or migration
7911 @type fallback: boolean
7912 @ivar fallback: Whether fallback to failover is allowed if migration not
7914 @type ignore_consistency: boolean
7915 @ivar ignore_consistency: Wheter we should ignore consistency between source
7917 @type shutdown_timeout: int
7918 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7919 @type ignore_ipolicy: bool
7920 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7925 _MIGRATION_POLL_INTERVAL = 1 # seconds
7926 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7928 def __init__(self, lu, instance_name, cleanup=False,
7929 failover=False, fallback=False,
7930 ignore_consistency=False,
7931 allow_runtime_changes=True,
7932 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7933 ignore_ipolicy=False):
7934 """Initializes this class.
7937 Tasklet.__init__(self, lu)
7940 self.instance_name = instance_name
7941 self.cleanup = cleanup
7942 self.live = False # will be overridden later
7943 self.failover = failover
7944 self.fallback = fallback
7945 self.ignore_consistency = ignore_consistency
7946 self.shutdown_timeout = shutdown_timeout
7947 self.ignore_ipolicy = ignore_ipolicy
7948 self.allow_runtime_changes = allow_runtime_changes
7950 def CheckPrereq(self):
7951 """Check prerequisites.
7953 This checks that the instance is in the cluster.
7956 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7957 instance = self.cfg.GetInstanceInfo(instance_name)
7958 assert instance is not None
7959 self.instance = instance
7960 cluster = self.cfg.GetClusterInfo()
7962 if (not self.cleanup and
7963 not instance.admin_state == constants.ADMINST_UP and
7964 not self.failover and self.fallback):
7965 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7966 " switching to failover")
7967 self.failover = True
7969 if instance.disk_template not in constants.DTS_MIRRORED:
7974 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7975 " %s" % (instance.disk_template, text),
7978 if instance.disk_template in constants.DTS_EXT_MIRROR:
7979 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7981 if self.lu.op.iallocator:
7982 self._RunAllocator()
7984 # We set set self.target_node as it is required by
7986 self.target_node = self.lu.op.target_node
7988 # Check that the target node is correct in terms of instance policy
7989 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7990 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7991 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7992 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7993 ignore=self.ignore_ipolicy)
7995 # self.target_node is already populated, either directly or by the
7997 target_node = self.target_node
7998 if self.target_node == instance.primary_node:
7999 raise errors.OpPrereqError("Cannot migrate instance %s"
8000 " to its primary (%s)" %
8001 (instance.name, instance.primary_node))
8003 if len(self.lu.tasklets) == 1:
8004 # It is safe to release locks only when we're the only tasklet
8006 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8007 keep=[instance.primary_node, self.target_node])
8010 secondary_nodes = instance.secondary_nodes
8011 if not secondary_nodes:
8012 raise errors.ConfigurationError("No secondary node but using"
8013 " %s disk template" %
8014 instance.disk_template)
8015 target_node = secondary_nodes[0]
8016 if self.lu.op.iallocator or (self.lu.op.target_node and
8017 self.lu.op.target_node != target_node):
8019 text = "failed over"
8022 raise errors.OpPrereqError("Instances with disk template %s cannot"
8023 " be %s to arbitrary nodes"
8024 " (neither an iallocator nor a target"
8025 " node can be passed)" %
8026 (instance.disk_template, text),
8028 nodeinfo = self.cfg.GetNodeInfo(target_node)
8029 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8030 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8031 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8032 ignore=self.ignore_ipolicy)
8034 i_be = cluster.FillBE(instance)
8036 # check memory requirements on the secondary node
8037 if (not self.cleanup and
8038 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8039 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8040 "migrating instance %s" %
8042 i_be[constants.BE_MINMEM],
8043 instance.hypervisor)
8045 self.lu.LogInfo("Not checking memory on the secondary node as"
8046 " instance will not be started")
8048 # check if failover must be forced instead of migration
8049 if (not self.cleanup and not self.failover and
8050 i_be[constants.BE_ALWAYS_FAILOVER]):
8052 self.lu.LogInfo("Instance configured to always failover; fallback"
8054 self.failover = True
8056 raise errors.OpPrereqError("This instance has been configured to"
8057 " always failover, please allow failover",
8060 # check bridge existance
8061 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8063 if not self.cleanup:
8064 _CheckNodeNotDrained(self.lu, target_node)
8065 if not self.failover:
8066 result = self.rpc.call_instance_migratable(instance.primary_node,
8068 if result.fail_msg and self.fallback:
8069 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8071 self.failover = True
8073 result.Raise("Can't migrate, please use failover",
8074 prereq=True, ecode=errors.ECODE_STATE)
8076 assert not (self.failover and self.cleanup)
8078 if not self.failover:
8079 if self.lu.op.live is not None and self.lu.op.mode is not None:
8080 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8081 " parameters are accepted",
8083 if self.lu.op.live is not None:
8085 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8087 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8088 # reset the 'live' parameter to None so that repeated
8089 # invocations of CheckPrereq do not raise an exception
8090 self.lu.op.live = None
8091 elif self.lu.op.mode is None:
8092 # read the default value from the hypervisor
8093 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8094 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8096 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8098 # Failover is never live
8101 if not (self.failover or self.cleanup):
8102 remote_info = self.rpc.call_instance_info(instance.primary_node,
8104 instance.hypervisor)
8105 remote_info.Raise("Error checking instance on node %s" %
8106 instance.primary_node)
8107 instance_running = bool(remote_info.payload)
8108 if instance_running:
8109 self.current_mem = int(remote_info.payload["memory"])
8111 def _RunAllocator(self):
8112 """Run the allocator based on input opcode.
8115 # FIXME: add a self.ignore_ipolicy option
8116 ial = IAllocator(self.cfg, self.rpc,
8117 mode=constants.IALLOCATOR_MODE_RELOC,
8118 name=self.instance_name,
8119 relocate_from=[self.instance.primary_node],
8122 ial.Run(self.lu.op.iallocator)
8125 raise errors.OpPrereqError("Can't compute nodes using"
8126 " iallocator '%s': %s" %
8127 (self.lu.op.iallocator, ial.info),
8129 if len(ial.result) != ial.required_nodes:
8130 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8131 " of nodes (%s), required %s" %
8132 (self.lu.op.iallocator, len(ial.result),
8133 ial.required_nodes), errors.ECODE_FAULT)
8134 self.target_node = ial.result[0]
8135 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8136 self.instance_name, self.lu.op.iallocator,
8137 utils.CommaJoin(ial.result))
8139 def _WaitUntilSync(self):
8140 """Poll with custom rpc for disk sync.
8142 This uses our own step-based rpc call.
8145 self.feedback_fn("* wait until resync is done")
8149 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8151 (self.instance.disks,
8154 for node, nres in result.items():
8155 nres.Raise("Cannot resync disks on node %s" % node)
8156 node_done, node_percent = nres.payload
8157 all_done = all_done and node_done
8158 if node_percent is not None:
8159 min_percent = min(min_percent, node_percent)
8161 if min_percent < 100:
8162 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8165 def _EnsureSecondary(self, node):
8166 """Demote a node to secondary.
8169 self.feedback_fn("* switching node %s to secondary mode" % node)
8171 for dev in self.instance.disks:
8172 self.cfg.SetDiskID(dev, node)
8174 result = self.rpc.call_blockdev_close(node, self.instance.name,
8175 self.instance.disks)
8176 result.Raise("Cannot change disk to secondary on node %s" % node)
8178 def _GoStandalone(self):
8179 """Disconnect from the network.
8182 self.feedback_fn("* changing into standalone mode")
8183 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8184 self.instance.disks)
8185 for node, nres in result.items():
8186 nres.Raise("Cannot disconnect disks node %s" % node)
8188 def _GoReconnect(self, multimaster):
8189 """Reconnect to the network.
8195 msg = "single-master"
8196 self.feedback_fn("* changing disks into %s mode" % msg)
8197 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8198 (self.instance.disks, self.instance),
8199 self.instance.name, multimaster)
8200 for node, nres in result.items():
8201 nres.Raise("Cannot change disks config on node %s" % node)
8203 def _ExecCleanup(self):
8204 """Try to cleanup after a failed migration.
8206 The cleanup is done by:
8207 - check that the instance is running only on one node
8208 (and update the config if needed)
8209 - change disks on its secondary node to secondary
8210 - wait until disks are fully synchronized
8211 - disconnect from the network
8212 - change disks into single-master mode
8213 - wait again until disks are fully synchronized
8216 instance = self.instance
8217 target_node = self.target_node
8218 source_node = self.source_node
8220 # check running on only one node
8221 self.feedback_fn("* checking where the instance actually runs"
8222 " (if this hangs, the hypervisor might be in"
8224 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8225 for node, result in ins_l.items():
8226 result.Raise("Can't contact node %s" % node)
8228 runningon_source = instance.name in ins_l[source_node].payload
8229 runningon_target = instance.name in ins_l[target_node].payload
8231 if runningon_source and runningon_target:
8232 raise errors.OpExecError("Instance seems to be running on two nodes,"
8233 " or the hypervisor is confused; you will have"
8234 " to ensure manually that it runs only on one"
8235 " and restart this operation")
8237 if not (runningon_source or runningon_target):
8238 raise errors.OpExecError("Instance does not seem to be running at all;"
8239 " in this case it's safer to repair by"
8240 " running 'gnt-instance stop' to ensure disk"
8241 " shutdown, and then restarting it")
8243 if runningon_target:
8244 # the migration has actually succeeded, we need to update the config
8245 self.feedback_fn("* instance running on secondary node (%s),"
8246 " updating config" % target_node)
8247 instance.primary_node = target_node
8248 self.cfg.Update(instance, self.feedback_fn)
8249 demoted_node = source_node
8251 self.feedback_fn("* instance confirmed to be running on its"
8252 " primary node (%s)" % source_node)
8253 demoted_node = target_node
8255 if instance.disk_template in constants.DTS_INT_MIRROR:
8256 self._EnsureSecondary(demoted_node)
8258 self._WaitUntilSync()
8259 except errors.OpExecError:
8260 # we ignore here errors, since if the device is standalone, it
8261 # won't be able to sync
8263 self._GoStandalone()
8264 self._GoReconnect(False)
8265 self._WaitUntilSync()
8267 self.feedback_fn("* done")
8269 def _RevertDiskStatus(self):
8270 """Try to revert the disk status after a failed migration.
8273 target_node = self.target_node
8274 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8278 self._EnsureSecondary(target_node)
8279 self._GoStandalone()
8280 self._GoReconnect(False)
8281 self._WaitUntilSync()
8282 except errors.OpExecError, err:
8283 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8284 " please try to recover the instance manually;"
8285 " error '%s'" % str(err))
8287 def _AbortMigration(self):
8288 """Call the hypervisor code to abort a started migration.
8291 instance = self.instance
8292 target_node = self.target_node
8293 source_node = self.source_node
8294 migration_info = self.migration_info
8296 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8300 abort_msg = abort_result.fail_msg
8302 logging.error("Aborting migration failed on target node %s: %s",
8303 target_node, abort_msg)
8304 # Don't raise an exception here, as we stil have to try to revert the
8305 # disk status, even if this step failed.
8307 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8308 instance, False, self.live)
8309 abort_msg = abort_result.fail_msg
8311 logging.error("Aborting migration failed on source node %s: %s",
8312 source_node, abort_msg)
8314 def _ExecMigration(self):
8315 """Migrate an instance.
8317 The migrate is done by:
8318 - change the disks into dual-master mode
8319 - wait until disks are fully synchronized again
8320 - migrate the instance
8321 - change disks on the new secondary node (the old primary) to secondary
8322 - wait until disks are fully synchronized
8323 - change disks into single-master mode
8326 instance = self.instance
8327 target_node = self.target_node
8328 source_node = self.source_node
8330 # Check for hypervisor version mismatch and warn the user.
8331 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8332 None, [self.instance.hypervisor])
8333 for ninfo in nodeinfo.values():
8334 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8336 (_, _, (src_info, )) = nodeinfo[source_node].payload
8337 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8339 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8340 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8341 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8342 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8343 if src_version != dst_version:
8344 self.feedback_fn("* warning: hypervisor version mismatch between"
8345 " source (%s) and target (%s) node" %
8346 (src_version, dst_version))
8348 self.feedback_fn("* checking disk consistency between source and target")
8349 for (idx, dev) in enumerate(instance.disks):
8350 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8351 raise errors.OpExecError("Disk %s is degraded or not fully"
8352 " synchronized on target node,"
8353 " aborting migration" % idx)
8355 if self.current_mem > self.tgt_free_mem:
8356 if not self.allow_runtime_changes:
8357 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8358 " free memory to fit instance %s on target"
8359 " node %s (have %dMB, need %dMB)" %
8360 (instance.name, target_node,
8361 self.tgt_free_mem, self.current_mem))
8362 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8363 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8366 rpcres.Raise("Cannot modify instance runtime memory")
8368 # First get the migration information from the remote node
8369 result = self.rpc.call_migration_info(source_node, instance)
8370 msg = result.fail_msg
8372 log_err = ("Failed fetching source migration information from %s: %s" %
8374 logging.error(log_err)
8375 raise errors.OpExecError(log_err)
8377 self.migration_info = migration_info = result.payload
8379 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8380 # Then switch the disks to master/master mode
8381 self._EnsureSecondary(target_node)
8382 self._GoStandalone()
8383 self._GoReconnect(True)
8384 self._WaitUntilSync()
8386 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8387 result = self.rpc.call_accept_instance(target_node,
8390 self.nodes_ip[target_node])
8392 msg = result.fail_msg
8394 logging.error("Instance pre-migration failed, trying to revert"
8395 " disk status: %s", msg)
8396 self.feedback_fn("Pre-migration failed, aborting")
8397 self._AbortMigration()
8398 self._RevertDiskStatus()
8399 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8400 (instance.name, msg))
8402 self.feedback_fn("* migrating instance to %s" % target_node)
8403 result = self.rpc.call_instance_migrate(source_node, instance,
8404 self.nodes_ip[target_node],
8406 msg = result.fail_msg
8408 logging.error("Instance migration failed, trying to revert"
8409 " disk status: %s", msg)
8410 self.feedback_fn("Migration failed, aborting")
8411 self._AbortMigration()
8412 self._RevertDiskStatus()
8413 raise errors.OpExecError("Could not migrate instance %s: %s" %
8414 (instance.name, msg))
8416 self.feedback_fn("* starting memory transfer")
8417 last_feedback = time.time()
8419 result = self.rpc.call_instance_get_migration_status(source_node,
8421 msg = result.fail_msg
8422 ms = result.payload # MigrationStatus instance
8423 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8424 logging.error("Instance migration failed, trying to revert"
8425 " disk status: %s", msg)
8426 self.feedback_fn("Migration failed, aborting")
8427 self._AbortMigration()
8428 self._RevertDiskStatus()
8429 raise errors.OpExecError("Could not migrate instance %s: %s" %
8430 (instance.name, msg))
8432 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8433 self.feedback_fn("* memory transfer complete")
8436 if (utils.TimeoutExpired(last_feedback,
8437 self._MIGRATION_FEEDBACK_INTERVAL) and
8438 ms.transferred_ram is not None):
8439 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8440 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8441 last_feedback = time.time()
8443 time.sleep(self._MIGRATION_POLL_INTERVAL)
8445 result = self.rpc.call_instance_finalize_migration_src(source_node,
8449 msg = result.fail_msg
8451 logging.error("Instance migration succeeded, but finalization failed"
8452 " on the source node: %s", msg)
8453 raise errors.OpExecError("Could not finalize instance migration: %s" %
8456 instance.primary_node = target_node
8458 # distribute new instance config to the other nodes
8459 self.cfg.Update(instance, self.feedback_fn)
8461 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8465 msg = result.fail_msg
8467 logging.error("Instance migration succeeded, but finalization failed"
8468 " on the target node: %s", msg)
8469 raise errors.OpExecError("Could not finalize instance migration: %s" %
8472 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8473 self._EnsureSecondary(source_node)
8474 self._WaitUntilSync()
8475 self._GoStandalone()
8476 self._GoReconnect(False)
8477 self._WaitUntilSync()
8479 # If the instance's disk template is `rbd' and there was a successful
8480 # migration, unmap the device from the source node.
8481 if self.instance.disk_template == constants.DT_RBD:
8482 disks = _ExpandCheckDisks(instance, instance.disks)
8483 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8485 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8486 msg = result.fail_msg
8488 logging.error("Migration was successful, but couldn't unmap the"
8489 " block device %s on source node %s: %s",
8490 disk.iv_name, source_node, msg)
8491 logging.error("You need to unmap the device %s manually on %s",
8492 disk.iv_name, source_node)
8494 self.feedback_fn("* done")
8496 def _ExecFailover(self):
8497 """Failover an instance.
8499 The failover is done by shutting it down on its present node and
8500 starting it on the secondary.
8503 instance = self.instance
8504 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8506 source_node = instance.primary_node
8507 target_node = self.target_node
8509 if instance.admin_state == constants.ADMINST_UP:
8510 self.feedback_fn("* checking disk consistency between source and target")
8511 for (idx, dev) in enumerate(instance.disks):
8512 # for drbd, these are drbd over lvm
8513 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8515 if primary_node.offline:
8516 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8518 (primary_node.name, idx, target_node))
8519 elif not self.ignore_consistency:
8520 raise errors.OpExecError("Disk %s is degraded on target node,"
8521 " aborting failover" % idx)
8523 self.feedback_fn("* not checking disk consistency as instance is not"
8526 self.feedback_fn("* shutting down instance on source node")
8527 logging.info("Shutting down instance %s on node %s",
8528 instance.name, source_node)
8530 result = self.rpc.call_instance_shutdown(source_node, instance,
8531 self.shutdown_timeout)
8532 msg = result.fail_msg
8534 if self.ignore_consistency or primary_node.offline:
8535 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8536 " proceeding anyway; please make sure node"
8537 " %s is down; error details: %s",
8538 instance.name, source_node, source_node, msg)
8540 raise errors.OpExecError("Could not shutdown instance %s on"
8542 (instance.name, source_node, msg))
8544 self.feedback_fn("* deactivating the instance's disks on source node")
8545 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8546 raise errors.OpExecError("Can't shut down the instance's disks")
8548 instance.primary_node = target_node
8549 # distribute new instance config to the other nodes
8550 self.cfg.Update(instance, self.feedback_fn)
8552 # Only start the instance if it's marked as up
8553 if instance.admin_state == constants.ADMINST_UP:
8554 self.feedback_fn("* activating the instance's disks on target node %s" %
8556 logging.info("Starting instance %s on node %s",
8557 instance.name, target_node)
8559 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8560 ignore_secondaries=True)
8562 _ShutdownInstanceDisks(self.lu, instance)
8563 raise errors.OpExecError("Can't activate the instance's disks")
8565 self.feedback_fn("* starting the instance on the target node %s" %
8567 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8569 msg = result.fail_msg
8571 _ShutdownInstanceDisks(self.lu, instance)
8572 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8573 (instance.name, target_node, msg))
8575 def Exec(self, feedback_fn):
8576 """Perform the migration.
8579 self.feedback_fn = feedback_fn
8580 self.source_node = self.instance.primary_node
8582 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8583 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8584 self.target_node = self.instance.secondary_nodes[0]
8585 # Otherwise self.target_node has been populated either
8586 # directly, or through an iallocator.
8588 self.all_nodes = [self.source_node, self.target_node]
8589 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8590 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8593 feedback_fn("Failover instance %s" % self.instance.name)
8594 self._ExecFailover()
8596 feedback_fn("Migrating instance %s" % self.instance.name)
8599 return self._ExecCleanup()
8601 return self._ExecMigration()
8604 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8606 """Wrapper around L{_CreateBlockDevInner}.
8608 This method annotates the root device first.
8611 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8612 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8616 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8618 """Create a tree of block devices on a given node.
8620 If this device type has to be created on secondaries, create it and
8623 If not, just recurse to children keeping the same 'force' value.
8625 @attention: The device has to be annotated already.
8627 @param lu: the lu on whose behalf we execute
8628 @param node: the node on which to create the device
8629 @type instance: L{objects.Instance}
8630 @param instance: the instance which owns the device
8631 @type device: L{objects.Disk}
8632 @param device: the device to create
8633 @type force_create: boolean
8634 @param force_create: whether to force creation of this device; this
8635 will be change to True whenever we find a device which has
8636 CreateOnSecondary() attribute
8637 @param info: the extra 'metadata' we should attach to the device
8638 (this will be represented as a LVM tag)
8639 @type force_open: boolean
8640 @param force_open: this parameter will be passes to the
8641 L{backend.BlockdevCreate} function where it specifies
8642 whether we run on primary or not, and it affects both
8643 the child assembly and the device own Open() execution
8646 if device.CreateOnSecondary():
8650 for child in device.children:
8651 _CreateBlockDevInner(lu, node, instance, child, force_create,
8654 if not force_create:
8657 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8660 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8661 """Create a single block device on a given node.
8663 This will not recurse over children of the device, so they must be
8666 @param lu: the lu on whose behalf we execute
8667 @param node: the node on which to create the device
8668 @type instance: L{objects.Instance}
8669 @param instance: the instance which owns the device
8670 @type device: L{objects.Disk}
8671 @param device: the device to create
8672 @param info: the extra 'metadata' we should attach to the device
8673 (this will be represented as a LVM tag)
8674 @type force_open: boolean
8675 @param force_open: this parameter will be passes to the
8676 L{backend.BlockdevCreate} function where it specifies
8677 whether we run on primary or not, and it affects both
8678 the child assembly and the device own Open() execution
8681 lu.cfg.SetDiskID(device, node)
8682 result = lu.rpc.call_blockdev_create(node, device, device.size,
8683 instance.name, force_open, info)
8684 result.Raise("Can't create block device %s on"
8685 " node %s for instance %s" % (device, node, instance.name))
8686 if device.physical_id is None:
8687 device.physical_id = result.payload
8690 def _GenerateUniqueNames(lu, exts):
8691 """Generate a suitable LV name.
8693 This will generate a logical volume name for the given instance.
8698 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8699 results.append("%s%s" % (new_id, val))
8703 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8704 iv_name, p_minor, s_minor):
8705 """Generate a drbd8 device complete with its children.
8708 assert len(vgnames) == len(names) == 2
8709 port = lu.cfg.AllocatePort()
8710 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8712 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8713 logical_id=(vgnames[0], names[0]),
8715 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8716 logical_id=(vgnames[1], names[1]),
8718 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8719 logical_id=(primary, secondary, port,
8722 children=[dev_data, dev_meta],
8723 iv_name=iv_name, params={})
8727 _DISK_TEMPLATE_NAME_PREFIX = {
8728 constants.DT_PLAIN: "",
8729 constants.DT_RBD: ".rbd",
8733 _DISK_TEMPLATE_DEVICE_TYPE = {
8734 constants.DT_PLAIN: constants.LD_LV,
8735 constants.DT_FILE: constants.LD_FILE,
8736 constants.DT_SHARED_FILE: constants.LD_FILE,
8737 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8738 constants.DT_RBD: constants.LD_RBD,
8742 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8743 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8744 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8745 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8746 """Generate the entire disk layout for a given template type.
8749 #TODO: compute space requirements
8751 vgname = lu.cfg.GetVGName()
8752 disk_count = len(disk_info)
8755 if template_name == constants.DT_DISKLESS:
8757 elif template_name == constants.DT_DRBD8:
8758 if len(secondary_nodes) != 1:
8759 raise errors.ProgrammerError("Wrong template configuration")
8760 remote_node = secondary_nodes[0]
8761 minors = lu.cfg.AllocateDRBDMinor(
8762 [primary_node, remote_node] * len(disk_info), instance_name)
8764 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8766 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8769 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8770 for i in range(disk_count)]):
8771 names.append(lv_prefix + "_data")
8772 names.append(lv_prefix + "_meta")
8773 for idx, disk in enumerate(disk_info):
8774 disk_index = idx + base_index
8775 data_vg = disk.get(constants.IDISK_VG, vgname)
8776 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8777 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8778 disk[constants.IDISK_SIZE],
8780 names[idx * 2:idx * 2 + 2],
8781 "disk/%d" % disk_index,
8782 minors[idx * 2], minors[idx * 2 + 1])
8783 disk_dev.mode = disk[constants.IDISK_MODE]
8784 disks.append(disk_dev)
8787 raise errors.ProgrammerError("Wrong template configuration")
8789 if template_name == constants.DT_FILE:
8791 elif template_name == constants.DT_SHARED_FILE:
8792 _req_shr_file_storage()
8794 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8795 if name_prefix is None:
8798 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8799 (name_prefix, base_index + i)
8800 for i in range(disk_count)])
8802 if template_name == constants.DT_PLAIN:
8803 def logical_id_fn(idx, _, disk):
8804 vg = disk.get(constants.IDISK_VG, vgname)
8805 return (vg, names[idx])
8806 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8808 lambda _, disk_index, disk: (file_driver,
8809 "%s/disk%d" % (file_storage_dir,
8811 elif template_name == constants.DT_BLOCK:
8813 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8814 disk[constants.IDISK_ADOPT])
8815 elif template_name == constants.DT_RBD:
8816 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8818 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8820 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8822 for idx, disk in enumerate(disk_info):
8823 disk_index = idx + base_index
8824 size = disk[constants.IDISK_SIZE]
8825 feedback_fn("* disk %s, size %s" %
8826 (disk_index, utils.FormatUnit(size, "h")))
8827 disks.append(objects.Disk(dev_type=dev_type, size=size,
8828 logical_id=logical_id_fn(idx, disk_index, disk),
8829 iv_name="disk/%d" % disk_index,
8830 mode=disk[constants.IDISK_MODE],
8836 def _GetInstanceInfoText(instance):
8837 """Compute that text that should be added to the disk's metadata.
8840 return "originstname+%s" % instance.name
8843 def _CalcEta(time_taken, written, total_size):
8844 """Calculates the ETA based on size written and total size.
8846 @param time_taken: The time taken so far
8847 @param written: amount written so far
8848 @param total_size: The total size of data to be written
8849 @return: The remaining time in seconds
8852 avg_time = time_taken / float(written)
8853 return (total_size - written) * avg_time
8856 def _WipeDisks(lu, instance):
8857 """Wipes instance disks.
8859 @type lu: L{LogicalUnit}
8860 @param lu: the logical unit on whose behalf we execute
8861 @type instance: L{objects.Instance}
8862 @param instance: the instance whose disks we should create
8863 @return: the success of the wipe
8866 node = instance.primary_node
8868 for device in instance.disks:
8869 lu.cfg.SetDiskID(device, node)
8871 logging.info("Pause sync of instance %s disks", instance.name)
8872 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8873 (instance.disks, instance),
8876 for idx, success in enumerate(result.payload):
8878 logging.warn("pause-sync of instance %s for disks %d failed",
8882 for idx, device in enumerate(instance.disks):
8883 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8884 # MAX_WIPE_CHUNK at max
8885 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8886 constants.MIN_WIPE_CHUNK_PERCENT)
8887 # we _must_ make this an int, otherwise rounding errors will
8889 wipe_chunk_size = int(wipe_chunk_size)
8891 lu.LogInfo("* Wiping disk %d", idx)
8892 logging.info("Wiping disk %d for instance %s, node %s using"
8893 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8898 start_time = time.time()
8900 while offset < size:
8901 wipe_size = min(wipe_chunk_size, size - offset)
8902 logging.debug("Wiping disk %d, offset %s, chunk %s",
8903 idx, offset, wipe_size)
8904 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8906 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8907 (idx, offset, wipe_size))
8910 if now - last_output >= 60:
8911 eta = _CalcEta(now - start_time, offset, size)
8912 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8913 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8916 logging.info("Resume sync of instance %s disks", instance.name)
8918 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8919 (instance.disks, instance),
8922 for idx, success in enumerate(result.payload):
8924 lu.LogWarning("Resume sync of disk %d failed, please have a"
8925 " look at the status and troubleshoot the issue", idx)
8926 logging.warn("resume-sync of instance %s for disks %d failed",
8930 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8931 """Create all disks for an instance.
8933 This abstracts away some work from AddInstance.
8935 @type lu: L{LogicalUnit}
8936 @param lu: the logical unit on whose behalf we execute
8937 @type instance: L{objects.Instance}
8938 @param instance: the instance whose disks we should create
8940 @param to_skip: list of indices to skip
8941 @type target_node: string
8942 @param target_node: if passed, overrides the target node for creation
8944 @return: the success of the creation
8947 info = _GetInstanceInfoText(instance)
8948 if target_node is None:
8949 pnode = instance.primary_node
8950 all_nodes = instance.all_nodes
8955 if instance.disk_template in constants.DTS_FILEBASED:
8956 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8957 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8959 result.Raise("Failed to create directory '%s' on"
8960 " node %s" % (file_storage_dir, pnode))
8962 # Note: this needs to be kept in sync with adding of disks in
8963 # LUInstanceSetParams
8964 for idx, device in enumerate(instance.disks):
8965 if to_skip and idx in to_skip:
8967 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8969 for node in all_nodes:
8970 f_create = node == pnode
8971 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8974 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8975 """Remove all disks for an instance.
8977 This abstracts away some work from `AddInstance()` and
8978 `RemoveInstance()`. Note that in case some of the devices couldn't
8979 be removed, the removal will continue with the other ones (compare
8980 with `_CreateDisks()`).
8982 @type lu: L{LogicalUnit}
8983 @param lu: the logical unit on whose behalf we execute
8984 @type instance: L{objects.Instance}
8985 @param instance: the instance whose disks we should remove
8986 @type target_node: string
8987 @param target_node: used to override the node on which to remove the disks
8989 @return: the success of the removal
8992 logging.info("Removing block devices for instance %s", instance.name)
8995 ports_to_release = set()
8996 for (idx, device) in enumerate(instance.disks):
8998 edata = [(target_node, device)]
9000 edata = device.ComputeNodeTree(instance.primary_node)
9001 for node, disk in edata:
9002 lu.cfg.SetDiskID(disk, node)
9003 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
9005 lu.LogWarning("Could not remove disk %s on node %s,"
9006 " continuing anyway: %s", idx, node, msg)
9009 # if this is a DRBD disk, return its port to the pool
9010 if device.dev_type in constants.LDS_DRBD:
9011 ports_to_release.add(device.logical_id[2])
9013 if all_result or ignore_failures:
9014 for port in ports_to_release:
9015 lu.cfg.AddTcpUdpPort(port)
9017 if instance.disk_template == constants.DT_FILE:
9018 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9022 tgt = instance.primary_node
9023 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9025 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9026 file_storage_dir, instance.primary_node, result.fail_msg)
9032 def _ComputeDiskSizePerVG(disk_template, disks):
9033 """Compute disk size requirements in the volume group
9036 def _compute(disks, payload):
9037 """Universal algorithm.
9042 vgs[disk[constants.IDISK_VG]] = \
9043 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9047 # Required free disk space as a function of disk and swap space
9049 constants.DT_DISKLESS: {},
9050 constants.DT_PLAIN: _compute(disks, 0),
9051 # 128 MB are added for drbd metadata for each disk
9052 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9053 constants.DT_FILE: {},
9054 constants.DT_SHARED_FILE: {},
9057 if disk_template not in req_size_dict:
9058 raise errors.ProgrammerError("Disk template '%s' size requirement"
9059 " is unknown" % disk_template)
9061 return req_size_dict[disk_template]
9064 def _ComputeDiskSize(disk_template, disks):
9065 """Compute disk size requirements in the volume group
9068 # Required free disk space as a function of disk and swap space
9070 constants.DT_DISKLESS: None,
9071 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9072 # 128 MB are added for drbd metadata for each disk
9074 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9075 constants.DT_FILE: None,
9076 constants.DT_SHARED_FILE: 0,
9077 constants.DT_BLOCK: 0,
9078 constants.DT_RBD: 0,
9081 if disk_template not in req_size_dict:
9082 raise errors.ProgrammerError("Disk template '%s' size requirement"
9083 " is unknown" % disk_template)
9085 return req_size_dict[disk_template]
9088 def _FilterVmNodes(lu, nodenames):
9089 """Filters out non-vm_capable nodes from a list.
9091 @type lu: L{LogicalUnit}
9092 @param lu: the logical unit for which we check
9093 @type nodenames: list
9094 @param nodenames: the list of nodes on which we should check
9096 @return: the list of vm-capable nodes
9099 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9100 return [name for name in nodenames if name not in vm_nodes]
9103 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9104 """Hypervisor parameter validation.
9106 This function abstract the hypervisor parameter validation to be
9107 used in both instance create and instance modify.
9109 @type lu: L{LogicalUnit}
9110 @param lu: the logical unit for which we check
9111 @type nodenames: list
9112 @param nodenames: the list of nodes on which we should check
9113 @type hvname: string
9114 @param hvname: the name of the hypervisor we should use
9115 @type hvparams: dict
9116 @param hvparams: the parameters which we need to check
9117 @raise errors.OpPrereqError: if the parameters are not valid
9120 nodenames = _FilterVmNodes(lu, nodenames)
9122 cluster = lu.cfg.GetClusterInfo()
9123 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9125 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9126 for node in nodenames:
9130 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9133 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9134 """OS parameters validation.
9136 @type lu: L{LogicalUnit}
9137 @param lu: the logical unit for which we check
9138 @type required: boolean
9139 @param required: whether the validation should fail if the OS is not
9141 @type nodenames: list
9142 @param nodenames: the list of nodes on which we should check
9143 @type osname: string
9144 @param osname: the name of the hypervisor we should use
9145 @type osparams: dict
9146 @param osparams: the parameters which we need to check
9147 @raise errors.OpPrereqError: if the parameters are not valid
9150 nodenames = _FilterVmNodes(lu, nodenames)
9151 result = lu.rpc.call_os_validate(nodenames, required, osname,
9152 [constants.OS_VALIDATE_PARAMETERS],
9154 for node, nres in result.items():
9155 # we don't check for offline cases since this should be run only
9156 # against the master node and/or an instance's nodes
9157 nres.Raise("OS Parameters validation failed on node %s" % node)
9158 if not nres.payload:
9159 lu.LogInfo("OS %s not found on node %s, validation skipped",
9163 class LUInstanceCreate(LogicalUnit):
9164 """Create an instance.
9167 HPATH = "instance-add"
9168 HTYPE = constants.HTYPE_INSTANCE
9171 def CheckArguments(self):
9175 # do not require name_check to ease forward/backward compatibility
9177 if self.op.no_install and self.op.start:
9178 self.LogInfo("No-installation mode selected, disabling startup")
9179 self.op.start = False
9180 # validate/normalize the instance name
9181 self.op.instance_name = \
9182 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9184 if self.op.ip_check and not self.op.name_check:
9185 # TODO: make the ip check more flexible and not depend on the name check
9186 raise errors.OpPrereqError("Cannot do IP address check without a name"
9187 " check", errors.ECODE_INVAL)
9189 # check nics' parameter names
9190 for nic in self.op.nics:
9191 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9193 # check disks. parameter names and consistent adopt/no-adopt strategy
9194 has_adopt = has_no_adopt = False
9195 for disk in self.op.disks:
9196 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9197 if constants.IDISK_ADOPT in disk:
9201 if has_adopt and has_no_adopt:
9202 raise errors.OpPrereqError("Either all disks are adopted or none is",
9205 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9206 raise errors.OpPrereqError("Disk adoption is not supported for the"
9207 " '%s' disk template" %
9208 self.op.disk_template,
9210 if self.op.iallocator is not None:
9211 raise errors.OpPrereqError("Disk adoption not allowed with an"
9212 " iallocator script", errors.ECODE_INVAL)
9213 if self.op.mode == constants.INSTANCE_IMPORT:
9214 raise errors.OpPrereqError("Disk adoption not allowed for"
9215 " instance import", errors.ECODE_INVAL)
9217 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9218 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9219 " but no 'adopt' parameter given" %
9220 self.op.disk_template,
9223 self.adopt_disks = has_adopt
9225 # instance name verification
9226 if self.op.name_check:
9227 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9228 self.op.instance_name = self.hostname1.name
9229 # used in CheckPrereq for ip ping check
9230 self.check_ip = self.hostname1.ip
9232 self.check_ip = None
9234 # file storage checks
9235 if (self.op.file_driver and
9236 not self.op.file_driver in constants.FILE_DRIVER):
9237 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9238 self.op.file_driver, errors.ECODE_INVAL)
9240 if self.op.disk_template == constants.DT_FILE:
9241 opcodes.RequireFileStorage()
9242 elif self.op.disk_template == constants.DT_SHARED_FILE:
9243 opcodes.RequireSharedFileStorage()
9245 ### Node/iallocator related checks
9246 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9248 if self.op.pnode is not None:
9249 if self.op.disk_template in constants.DTS_INT_MIRROR:
9250 if self.op.snode is None:
9251 raise errors.OpPrereqError("The networked disk templates need"
9252 " a mirror node", errors.ECODE_INVAL)
9254 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9256 self.op.snode = None
9258 self._cds = _GetClusterDomainSecret()
9260 if self.op.mode == constants.INSTANCE_IMPORT:
9261 # On import force_variant must be True, because if we forced it at
9262 # initial install, our only chance when importing it back is that it
9264 self.op.force_variant = True
9266 if self.op.no_install:
9267 self.LogInfo("No-installation mode has no effect during import")
9269 elif self.op.mode == constants.INSTANCE_CREATE:
9270 if self.op.os_type is None:
9271 raise errors.OpPrereqError("No guest OS specified",
9273 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9274 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9275 " installation" % self.op.os_type,
9277 if self.op.disk_template is None:
9278 raise errors.OpPrereqError("No disk template specified",
9281 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9282 # Check handshake to ensure both clusters have the same domain secret
9283 src_handshake = self.op.source_handshake
9284 if not src_handshake:
9285 raise errors.OpPrereqError("Missing source handshake",
9288 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9291 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9294 # Load and check source CA
9295 self.source_x509_ca_pem = self.op.source_x509_ca
9296 if not self.source_x509_ca_pem:
9297 raise errors.OpPrereqError("Missing source X509 CA",
9301 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9303 except OpenSSL.crypto.Error, err:
9304 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9305 (err, ), errors.ECODE_INVAL)
9307 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9308 if errcode is not None:
9309 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9312 self.source_x509_ca = cert
9314 src_instance_name = self.op.source_instance_name
9315 if not src_instance_name:
9316 raise errors.OpPrereqError("Missing source instance name",
9319 self.source_instance_name = \
9320 netutils.GetHostname(name=src_instance_name).name
9323 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9324 self.op.mode, errors.ECODE_INVAL)
9326 def ExpandNames(self):
9327 """ExpandNames for CreateInstance.
9329 Figure out the right locks for instance creation.
9332 self.needed_locks = {}
9334 instance_name = self.op.instance_name
9335 # this is just a preventive check, but someone might still add this
9336 # instance in the meantime, and creation will fail at lock-add time
9337 if instance_name in self.cfg.GetInstanceList():
9338 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9339 instance_name, errors.ECODE_EXISTS)
9341 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9343 if self.op.iallocator:
9344 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9345 # specifying a group on instance creation and then selecting nodes from
9347 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9348 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9350 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9351 nodelist = [self.op.pnode]
9352 if self.op.snode is not None:
9353 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9354 nodelist.append(self.op.snode)
9355 self.needed_locks[locking.LEVEL_NODE] = nodelist
9356 # Lock resources of instance's primary and secondary nodes (copy to
9357 # prevent accidential modification)
9358 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9360 # in case of import lock the source node too
9361 if self.op.mode == constants.INSTANCE_IMPORT:
9362 src_node = self.op.src_node
9363 src_path = self.op.src_path
9365 if src_path is None:
9366 self.op.src_path = src_path = self.op.instance_name
9368 if src_node is None:
9369 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9370 self.op.src_node = None
9371 if os.path.isabs(src_path):
9372 raise errors.OpPrereqError("Importing an instance from a path"
9373 " requires a source node option",
9376 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9377 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9378 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9379 if not os.path.isabs(src_path):
9380 self.op.src_path = src_path = \
9381 utils.PathJoin(constants.EXPORT_DIR, src_path)
9383 def _RunAllocator(self):
9384 """Run the allocator based on input opcode.
9387 nics = [n.ToDict() for n in self.nics]
9388 ial = IAllocator(self.cfg, self.rpc,
9389 mode=constants.IALLOCATOR_MODE_ALLOC,
9390 name=self.op.instance_name,
9391 disk_template=self.op.disk_template,
9394 vcpus=self.be_full[constants.BE_VCPUS],
9395 memory=self.be_full[constants.BE_MAXMEM],
9396 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9399 hypervisor=self.op.hypervisor,
9402 ial.Run(self.op.iallocator)
9405 raise errors.OpPrereqError("Can't compute nodes using"
9406 " iallocator '%s': %s" %
9407 (self.op.iallocator, ial.info),
9409 if len(ial.result) != ial.required_nodes:
9410 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9411 " of nodes (%s), required %s" %
9412 (self.op.iallocator, len(ial.result),
9413 ial.required_nodes), errors.ECODE_FAULT)
9414 self.op.pnode = ial.result[0]
9415 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9416 self.op.instance_name, self.op.iallocator,
9417 utils.CommaJoin(ial.result))
9418 if ial.required_nodes == 2:
9419 self.op.snode = ial.result[1]
9421 def BuildHooksEnv(self):
9424 This runs on master, primary and secondary nodes of the instance.
9428 "ADD_MODE": self.op.mode,
9430 if self.op.mode == constants.INSTANCE_IMPORT:
9431 env["SRC_NODE"] = self.op.src_node
9432 env["SRC_PATH"] = self.op.src_path
9433 env["SRC_IMAGES"] = self.src_images
9435 env.update(_BuildInstanceHookEnv(
9436 name=self.op.instance_name,
9437 primary_node=self.op.pnode,
9438 secondary_nodes=self.secondaries,
9439 status=self.op.start,
9440 os_type=self.op.os_type,
9441 minmem=self.be_full[constants.BE_MINMEM],
9442 maxmem=self.be_full[constants.BE_MAXMEM],
9443 vcpus=self.be_full[constants.BE_VCPUS],
9444 nics=_NICListToTuple(self, self.nics),
9445 disk_template=self.op.disk_template,
9446 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9447 for d in self.disks],
9450 hypervisor_name=self.op.hypervisor,
9456 def BuildHooksNodes(self):
9457 """Build hooks nodes.
9460 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9463 def _ReadExportInfo(self):
9464 """Reads the export information from disk.
9466 It will override the opcode source node and path with the actual
9467 information, if these two were not specified before.
9469 @return: the export information
9472 assert self.op.mode == constants.INSTANCE_IMPORT
9474 src_node = self.op.src_node
9475 src_path = self.op.src_path
9477 if src_node is None:
9478 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9479 exp_list = self.rpc.call_export_list(locked_nodes)
9481 for node in exp_list:
9482 if exp_list[node].fail_msg:
9484 if src_path in exp_list[node].payload:
9486 self.op.src_node = src_node = node
9487 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9491 raise errors.OpPrereqError("No export found for relative path %s" %
9492 src_path, errors.ECODE_INVAL)
9494 _CheckNodeOnline(self, src_node)
9495 result = self.rpc.call_export_info(src_node, src_path)
9496 result.Raise("No export or invalid export found in dir %s" % src_path)
9498 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9499 if not export_info.has_section(constants.INISECT_EXP):
9500 raise errors.ProgrammerError("Corrupted export config",
9501 errors.ECODE_ENVIRON)
9503 ei_version = export_info.get(constants.INISECT_EXP, "version")
9504 if (int(ei_version) != constants.EXPORT_VERSION):
9505 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9506 (ei_version, constants.EXPORT_VERSION),
9507 errors.ECODE_ENVIRON)
9510 def _ReadExportParams(self, einfo):
9511 """Use export parameters as defaults.
9513 In case the opcode doesn't specify (as in override) some instance
9514 parameters, then try to use them from the export information, if
9518 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9520 if self.op.disk_template is None:
9521 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9522 self.op.disk_template = einfo.get(constants.INISECT_INS,
9524 if self.op.disk_template not in constants.DISK_TEMPLATES:
9525 raise errors.OpPrereqError("Disk template specified in configuration"
9526 " file is not one of the allowed values:"
9527 " %s" % " ".join(constants.DISK_TEMPLATES))
9529 raise errors.OpPrereqError("No disk template specified and the export"
9530 " is missing the disk_template information",
9533 if not self.op.disks:
9535 # TODO: import the disk iv_name too
9536 for idx in range(constants.MAX_DISKS):
9537 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9538 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9539 disks.append({constants.IDISK_SIZE: disk_sz})
9540 self.op.disks = disks
9541 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9542 raise errors.OpPrereqError("No disk info specified and the export"
9543 " is missing the disk information",
9546 if not self.op.nics:
9548 for idx in range(constants.MAX_NICS):
9549 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9551 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9552 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9559 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9560 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9562 if (self.op.hypervisor is None and
9563 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9564 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9566 if einfo.has_section(constants.INISECT_HYP):
9567 # use the export parameters but do not override the ones
9568 # specified by the user
9569 for name, value in einfo.items(constants.INISECT_HYP):
9570 if name not in self.op.hvparams:
9571 self.op.hvparams[name] = value
9573 if einfo.has_section(constants.INISECT_BEP):
9574 # use the parameters, without overriding
9575 for name, value in einfo.items(constants.INISECT_BEP):
9576 if name not in self.op.beparams:
9577 self.op.beparams[name] = value
9578 # Compatibility for the old "memory" be param
9579 if name == constants.BE_MEMORY:
9580 if constants.BE_MAXMEM not in self.op.beparams:
9581 self.op.beparams[constants.BE_MAXMEM] = value
9582 if constants.BE_MINMEM not in self.op.beparams:
9583 self.op.beparams[constants.BE_MINMEM] = value
9585 # try to read the parameters old style, from the main section
9586 for name in constants.BES_PARAMETERS:
9587 if (name not in self.op.beparams and
9588 einfo.has_option(constants.INISECT_INS, name)):
9589 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9591 if einfo.has_section(constants.INISECT_OSP):
9592 # use the parameters, without overriding
9593 for name, value in einfo.items(constants.INISECT_OSP):
9594 if name not in self.op.osparams:
9595 self.op.osparams[name] = value
9597 def _RevertToDefaults(self, cluster):
9598 """Revert the instance parameters to the default values.
9602 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9603 for name in self.op.hvparams.keys():
9604 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9605 del self.op.hvparams[name]
9607 be_defs = cluster.SimpleFillBE({})
9608 for name in self.op.beparams.keys():
9609 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9610 del self.op.beparams[name]
9612 nic_defs = cluster.SimpleFillNIC({})
9613 for nic in self.op.nics:
9614 for name in constants.NICS_PARAMETERS:
9615 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9618 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9619 for name in self.op.osparams.keys():
9620 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9621 del self.op.osparams[name]
9623 def _CalculateFileStorageDir(self):
9624 """Calculate final instance file storage dir.
9627 # file storage dir calculation/check
9628 self.instance_file_storage_dir = None
9629 if self.op.disk_template in constants.DTS_FILEBASED:
9630 # build the full file storage dir path
9633 if self.op.disk_template == constants.DT_SHARED_FILE:
9634 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9636 get_fsd_fn = self.cfg.GetFileStorageDir
9638 cfg_storagedir = get_fsd_fn()
9639 if not cfg_storagedir:
9640 raise errors.OpPrereqError("Cluster file storage dir not defined")
9641 joinargs.append(cfg_storagedir)
9643 if self.op.file_storage_dir is not None:
9644 joinargs.append(self.op.file_storage_dir)
9646 joinargs.append(self.op.instance_name)
9648 # pylint: disable=W0142
9649 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9651 def CheckPrereq(self): # pylint: disable=R0914
9652 """Check prerequisites.
9655 self._CalculateFileStorageDir()
9657 if self.op.mode == constants.INSTANCE_IMPORT:
9658 export_info = self._ReadExportInfo()
9659 self._ReadExportParams(export_info)
9661 if (not self.cfg.GetVGName() and
9662 self.op.disk_template not in constants.DTS_NOT_LVM):
9663 raise errors.OpPrereqError("Cluster does not support lvm-based"
9664 " instances", errors.ECODE_STATE)
9666 if (self.op.hypervisor is None or
9667 self.op.hypervisor == constants.VALUE_AUTO):
9668 self.op.hypervisor = self.cfg.GetHypervisorType()
9670 cluster = self.cfg.GetClusterInfo()
9671 enabled_hvs = cluster.enabled_hypervisors
9672 if self.op.hypervisor not in enabled_hvs:
9673 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9674 " cluster (%s)" % (self.op.hypervisor,
9675 ",".join(enabled_hvs)),
9678 # Check tag validity
9679 for tag in self.op.tags:
9680 objects.TaggableObject.ValidateTag(tag)
9682 # check hypervisor parameter syntax (locally)
9683 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9684 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9686 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9687 hv_type.CheckParameterSyntax(filled_hvp)
9688 self.hv_full = filled_hvp
9689 # check that we don't specify global parameters on an instance
9690 _CheckGlobalHvParams(self.op.hvparams)
9692 # fill and remember the beparams dict
9693 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9694 for param, value in self.op.beparams.iteritems():
9695 if value == constants.VALUE_AUTO:
9696 self.op.beparams[param] = default_beparams[param]
9697 objects.UpgradeBeParams(self.op.beparams)
9698 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9699 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9701 # build os parameters
9702 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9704 # now that hvp/bep are in final format, let's reset to defaults,
9706 if self.op.identify_defaults:
9707 self._RevertToDefaults(cluster)
9711 for idx, nic in enumerate(self.op.nics):
9712 nic_mode_req = nic.get(constants.INIC_MODE, None)
9713 nic_mode = nic_mode_req
9714 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9715 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9717 # in routed mode, for the first nic, the default ip is 'auto'
9718 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9719 default_ip_mode = constants.VALUE_AUTO
9721 default_ip_mode = constants.VALUE_NONE
9723 # ip validity checks
9724 ip = nic.get(constants.INIC_IP, default_ip_mode)
9725 if ip is None or ip.lower() == constants.VALUE_NONE:
9727 elif ip.lower() == constants.VALUE_AUTO:
9728 if not self.op.name_check:
9729 raise errors.OpPrereqError("IP address set to auto but name checks"
9730 " have been skipped",
9732 nic_ip = self.hostname1.ip
9734 if not netutils.IPAddress.IsValid(ip):
9735 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9739 # TODO: check the ip address for uniqueness
9740 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9741 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9744 # MAC address verification
9745 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9746 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9747 mac = utils.NormalizeAndValidateMac(mac)
9750 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9751 except errors.ReservationError:
9752 raise errors.OpPrereqError("MAC address %s already in use"
9753 " in cluster" % mac,
9754 errors.ECODE_NOTUNIQUE)
9756 # Build nic parameters
9757 link = nic.get(constants.INIC_LINK, None)
9758 if link == constants.VALUE_AUTO:
9759 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9762 nicparams[constants.NIC_MODE] = nic_mode
9764 nicparams[constants.NIC_LINK] = link
9766 check_params = cluster.SimpleFillNIC(nicparams)
9767 objects.NIC.CheckParameterSyntax(check_params)
9768 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9770 # disk checks/pre-build
9771 default_vg = self.cfg.GetVGName()
9773 for disk in self.op.disks:
9774 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9775 if mode not in constants.DISK_ACCESS_SET:
9776 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9777 mode, errors.ECODE_INVAL)
9778 size = disk.get(constants.IDISK_SIZE, None)
9780 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9783 except (TypeError, ValueError):
9784 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9787 data_vg = disk.get(constants.IDISK_VG, default_vg)
9789 constants.IDISK_SIZE: size,
9790 constants.IDISK_MODE: mode,
9791 constants.IDISK_VG: data_vg,
9793 if constants.IDISK_METAVG in disk:
9794 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9795 if constants.IDISK_ADOPT in disk:
9796 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9797 self.disks.append(new_disk)
9799 if self.op.mode == constants.INSTANCE_IMPORT:
9801 for idx in range(len(self.disks)):
9802 option = "disk%d_dump" % idx
9803 if export_info.has_option(constants.INISECT_INS, option):
9804 # FIXME: are the old os-es, disk sizes, etc. useful?
9805 export_name = export_info.get(constants.INISECT_INS, option)
9806 image = utils.PathJoin(self.op.src_path, export_name)
9807 disk_images.append(image)
9809 disk_images.append(False)
9811 self.src_images = disk_images
9813 old_name = export_info.get(constants.INISECT_INS, "name")
9814 if self.op.instance_name == old_name:
9815 for idx, nic in enumerate(self.nics):
9816 if nic.mac == constants.VALUE_AUTO:
9817 nic_mac_ini = "nic%d_mac" % idx
9818 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9820 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9822 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9823 if self.op.ip_check:
9824 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9825 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9826 (self.check_ip, self.op.instance_name),
9827 errors.ECODE_NOTUNIQUE)
9829 #### mac address generation
9830 # By generating here the mac address both the allocator and the hooks get
9831 # the real final mac address rather than the 'auto' or 'generate' value.
9832 # There is a race condition between the generation and the instance object
9833 # creation, which means that we know the mac is valid now, but we're not
9834 # sure it will be when we actually add the instance. If things go bad
9835 # adding the instance will abort because of a duplicate mac, and the
9836 # creation job will fail.
9837 for nic in self.nics:
9838 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9839 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9843 if self.op.iallocator is not None:
9844 self._RunAllocator()
9846 # Release all unneeded node locks
9847 _ReleaseLocks(self, locking.LEVEL_NODE,
9848 keep=filter(None, [self.op.pnode, self.op.snode,
9850 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9851 keep=filter(None, [self.op.pnode, self.op.snode,
9854 #### node related checks
9856 # check primary node
9857 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9858 assert self.pnode is not None, \
9859 "Cannot retrieve locked node %s" % self.op.pnode
9861 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9862 pnode.name, errors.ECODE_STATE)
9864 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9865 pnode.name, errors.ECODE_STATE)
9866 if not pnode.vm_capable:
9867 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9868 " '%s'" % pnode.name, errors.ECODE_STATE)
9870 self.secondaries = []
9872 # mirror node verification
9873 if self.op.disk_template in constants.DTS_INT_MIRROR:
9874 if self.op.snode == pnode.name:
9875 raise errors.OpPrereqError("The secondary node cannot be the"
9876 " primary node", errors.ECODE_INVAL)
9877 _CheckNodeOnline(self, self.op.snode)
9878 _CheckNodeNotDrained(self, self.op.snode)
9879 _CheckNodeVmCapable(self, self.op.snode)
9880 self.secondaries.append(self.op.snode)
9882 snode = self.cfg.GetNodeInfo(self.op.snode)
9883 if pnode.group != snode.group:
9884 self.LogWarning("The primary and secondary nodes are in two"
9885 " different node groups; the disk parameters"
9886 " from the first disk's node group will be"
9889 nodenames = [pnode.name] + self.secondaries
9891 # Verify instance specs
9892 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9894 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9895 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9896 constants.ISPEC_DISK_COUNT: len(self.disks),
9897 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9898 constants.ISPEC_NIC_COUNT: len(self.nics),
9899 constants.ISPEC_SPINDLE_USE: spindle_use,
9902 group_info = self.cfg.GetNodeGroup(pnode.group)
9903 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9904 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9905 if not self.op.ignore_ipolicy and res:
9906 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9907 " policy: %s") % (pnode.group,
9908 utils.CommaJoin(res)),
9911 if not self.adopt_disks:
9912 if self.op.disk_template == constants.DT_RBD:
9913 # _CheckRADOSFreeSpace() is just a placeholder.
9914 # Any function that checks prerequisites can be placed here.
9915 # Check if there is enough space on the RADOS cluster.
9916 _CheckRADOSFreeSpace()
9918 # Check lv size requirements, if not adopting
9919 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9920 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9922 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9923 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9924 disk[constants.IDISK_ADOPT])
9925 for disk in self.disks])
9926 if len(all_lvs) != len(self.disks):
9927 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9929 for lv_name in all_lvs:
9931 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9932 # to ReserveLV uses the same syntax
9933 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9934 except errors.ReservationError:
9935 raise errors.OpPrereqError("LV named %s used by another instance" %
9936 lv_name, errors.ECODE_NOTUNIQUE)
9938 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9939 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9941 node_lvs = self.rpc.call_lv_list([pnode.name],
9942 vg_names.payload.keys())[pnode.name]
9943 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9944 node_lvs = node_lvs.payload
9946 delta = all_lvs.difference(node_lvs.keys())
9948 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9949 utils.CommaJoin(delta),
9951 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9953 raise errors.OpPrereqError("Online logical volumes found, cannot"
9954 " adopt: %s" % utils.CommaJoin(online_lvs),
9956 # update the size of disk based on what is found
9957 for dsk in self.disks:
9958 dsk[constants.IDISK_SIZE] = \
9959 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9960 dsk[constants.IDISK_ADOPT])][0]))
9962 elif self.op.disk_template == constants.DT_BLOCK:
9963 # Normalize and de-duplicate device paths
9964 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9965 for disk in self.disks])
9966 if len(all_disks) != len(self.disks):
9967 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9969 baddisks = [d for d in all_disks
9970 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9972 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9973 " cannot be adopted" %
9974 (", ".join(baddisks),
9975 constants.ADOPTABLE_BLOCKDEV_ROOT),
9978 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9979 list(all_disks))[pnode.name]
9980 node_disks.Raise("Cannot get block device information from node %s" %
9982 node_disks = node_disks.payload
9983 delta = all_disks.difference(node_disks.keys())
9985 raise errors.OpPrereqError("Missing block device(s): %s" %
9986 utils.CommaJoin(delta),
9988 for dsk in self.disks:
9989 dsk[constants.IDISK_SIZE] = \
9990 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9992 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9994 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9995 # check OS parameters (remotely)
9996 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9998 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10000 # memory check on primary node
10001 #TODO(dynmem): use MINMEM for checking
10003 _CheckNodeFreeMemory(self, self.pnode.name,
10004 "creating instance %s" % self.op.instance_name,
10005 self.be_full[constants.BE_MAXMEM],
10006 self.op.hypervisor)
10008 self.dry_run_result = list(nodenames)
10010 def Exec(self, feedback_fn):
10011 """Create and add the instance to the cluster.
10014 instance = self.op.instance_name
10015 pnode_name = self.pnode.name
10017 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10018 self.owned_locks(locking.LEVEL_NODE)), \
10019 "Node locks differ from node resource locks"
10021 ht_kind = self.op.hypervisor
10022 if ht_kind in constants.HTS_REQ_PORT:
10023 network_port = self.cfg.AllocatePort()
10025 network_port = None
10027 # This is ugly but we got a chicken-egg problem here
10028 # We can only take the group disk parameters, as the instance
10029 # has no disks yet (we are generating them right here).
10030 node = self.cfg.GetNodeInfo(pnode_name)
10031 nodegroup = self.cfg.GetNodeGroup(node.group)
10032 disks = _GenerateDiskTemplate(self,
10033 self.op.disk_template,
10034 instance, pnode_name,
10037 self.instance_file_storage_dir,
10038 self.op.file_driver,
10041 self.cfg.GetGroupDiskParams(nodegroup))
10043 iobj = objects.Instance(name=instance, os=self.op.os_type,
10044 primary_node=pnode_name,
10045 nics=self.nics, disks=disks,
10046 disk_template=self.op.disk_template,
10047 admin_state=constants.ADMINST_DOWN,
10048 network_port=network_port,
10049 beparams=self.op.beparams,
10050 hvparams=self.op.hvparams,
10051 hypervisor=self.op.hypervisor,
10052 osparams=self.op.osparams,
10056 for tag in self.op.tags:
10059 if self.adopt_disks:
10060 if self.op.disk_template == constants.DT_PLAIN:
10061 # rename LVs to the newly-generated names; we need to construct
10062 # 'fake' LV disks with the old data, plus the new unique_id
10063 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10065 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10066 rename_to.append(t_dsk.logical_id)
10067 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10068 self.cfg.SetDiskID(t_dsk, pnode_name)
10069 result = self.rpc.call_blockdev_rename(pnode_name,
10070 zip(tmp_disks, rename_to))
10071 result.Raise("Failed to rename adoped LVs")
10073 feedback_fn("* creating instance disks...")
10075 _CreateDisks(self, iobj)
10076 except errors.OpExecError:
10077 self.LogWarning("Device creation failed, reverting...")
10079 _RemoveDisks(self, iobj)
10081 self.cfg.ReleaseDRBDMinors(instance)
10084 feedback_fn("adding instance %s to cluster config" % instance)
10086 self.cfg.AddInstance(iobj, self.proc.GetECId())
10088 # Declare that we don't want to remove the instance lock anymore, as we've
10089 # added the instance to the config
10090 del self.remove_locks[locking.LEVEL_INSTANCE]
10092 if self.op.mode == constants.INSTANCE_IMPORT:
10093 # Release unused nodes
10094 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10096 # Release all nodes
10097 _ReleaseLocks(self, locking.LEVEL_NODE)
10100 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10101 feedback_fn("* wiping instance disks...")
10103 _WipeDisks(self, iobj)
10104 except errors.OpExecError, err:
10105 logging.exception("Wiping disks failed")
10106 self.LogWarning("Wiping instance disks failed (%s)", err)
10110 # Something is already wrong with the disks, don't do anything else
10112 elif self.op.wait_for_sync:
10113 disk_abort = not _WaitForSync(self, iobj)
10114 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10115 # make sure the disks are not degraded (still sync-ing is ok)
10116 feedback_fn("* checking mirrors status")
10117 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10122 _RemoveDisks(self, iobj)
10123 self.cfg.RemoveInstance(iobj.name)
10124 # Make sure the instance lock gets removed
10125 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10126 raise errors.OpExecError("There are some degraded disks for"
10129 # Release all node resource locks
10130 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10132 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10133 if self.op.mode == constants.INSTANCE_CREATE:
10134 if not self.op.no_install:
10135 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10136 not self.op.wait_for_sync)
10138 feedback_fn("* pausing disk sync to install instance OS")
10139 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10142 for idx, success in enumerate(result.payload):
10144 logging.warn("pause-sync of instance %s for disk %d failed",
10147 feedback_fn("* running the instance OS create scripts...")
10148 # FIXME: pass debug option from opcode to backend
10150 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10151 self.op.debug_level)
10153 feedback_fn("* resuming disk sync")
10154 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10157 for idx, success in enumerate(result.payload):
10159 logging.warn("resume-sync of instance %s for disk %d failed",
10162 os_add_result.Raise("Could not add os for instance %s"
10163 " on node %s" % (instance, pnode_name))
10165 elif self.op.mode == constants.INSTANCE_IMPORT:
10166 feedback_fn("* running the instance OS import scripts...")
10170 for idx, image in enumerate(self.src_images):
10174 # FIXME: pass debug option from opcode to backend
10175 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10176 constants.IEIO_FILE, (image, ),
10177 constants.IEIO_SCRIPT,
10178 (iobj.disks[idx], idx),
10180 transfers.append(dt)
10183 masterd.instance.TransferInstanceData(self, feedback_fn,
10184 self.op.src_node, pnode_name,
10185 self.pnode.secondary_ip,
10187 if not compat.all(import_result):
10188 self.LogWarning("Some disks for instance %s on node %s were not"
10189 " imported successfully" % (instance, pnode_name))
10191 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10192 feedback_fn("* preparing remote import...")
10193 # The source cluster will stop the instance before attempting to make a
10194 # connection. In some cases stopping an instance can take a long time,
10195 # hence the shutdown timeout is added to the connection timeout.
10196 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10197 self.op.source_shutdown_timeout)
10198 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10200 assert iobj.primary_node == self.pnode.name
10202 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10203 self.source_x509_ca,
10204 self._cds, timeouts)
10205 if not compat.all(disk_results):
10206 # TODO: Should the instance still be started, even if some disks
10207 # failed to import (valid for local imports, too)?
10208 self.LogWarning("Some disks for instance %s on node %s were not"
10209 " imported successfully" % (instance, pnode_name))
10211 # Run rename script on newly imported instance
10212 assert iobj.name == instance
10213 feedback_fn("Running rename script for %s" % instance)
10214 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10215 self.source_instance_name,
10216 self.op.debug_level)
10217 if result.fail_msg:
10218 self.LogWarning("Failed to run rename script for %s on node"
10219 " %s: %s" % (instance, pnode_name, result.fail_msg))
10222 # also checked in the prereq part
10223 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10226 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10229 iobj.admin_state = constants.ADMINST_UP
10230 self.cfg.Update(iobj, feedback_fn)
10231 logging.info("Starting instance %s on node %s", instance, pnode_name)
10232 feedback_fn("* starting instance...")
10233 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10235 result.Raise("Could not start instance")
10237 return list(iobj.all_nodes)
10240 def _CheckRADOSFreeSpace():
10241 """Compute disk size requirements inside the RADOS cluster.
10244 # For the RADOS cluster we assume there is always enough space.
10248 class LUInstanceConsole(NoHooksLU):
10249 """Connect to an instance's console.
10251 This is somewhat special in that it returns the command line that
10252 you need to run on the master node in order to connect to the
10258 def ExpandNames(self):
10259 self.share_locks = _ShareAll()
10260 self._ExpandAndLockInstance()
10262 def CheckPrereq(self):
10263 """Check prerequisites.
10265 This checks that the instance is in the cluster.
10268 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10269 assert self.instance is not None, \
10270 "Cannot retrieve locked instance %s" % self.op.instance_name
10271 _CheckNodeOnline(self, self.instance.primary_node)
10273 def Exec(self, feedback_fn):
10274 """Connect to the console of an instance
10277 instance = self.instance
10278 node = instance.primary_node
10280 node_insts = self.rpc.call_instance_list([node],
10281 [instance.hypervisor])[node]
10282 node_insts.Raise("Can't get node information from %s" % node)
10284 if instance.name not in node_insts.payload:
10285 if instance.admin_state == constants.ADMINST_UP:
10286 state = constants.INSTST_ERRORDOWN
10287 elif instance.admin_state == constants.ADMINST_DOWN:
10288 state = constants.INSTST_ADMINDOWN
10290 state = constants.INSTST_ADMINOFFLINE
10291 raise errors.OpExecError("Instance %s is not running (state %s)" %
10292 (instance.name, state))
10294 logging.debug("Connecting to console of %s on %s", instance.name, node)
10296 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10299 def _GetInstanceConsole(cluster, instance):
10300 """Returns console information for an instance.
10302 @type cluster: L{objects.Cluster}
10303 @type instance: L{objects.Instance}
10307 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10308 # beparams and hvparams are passed separately, to avoid editing the
10309 # instance and then saving the defaults in the instance itself.
10310 hvparams = cluster.FillHV(instance)
10311 beparams = cluster.FillBE(instance)
10312 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10314 assert console.instance == instance.name
10315 assert console.Validate()
10317 return console.ToDict()
10320 class LUInstanceReplaceDisks(LogicalUnit):
10321 """Replace the disks of an instance.
10324 HPATH = "mirrors-replace"
10325 HTYPE = constants.HTYPE_INSTANCE
10328 def CheckArguments(self):
10329 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10330 self.op.iallocator)
10332 def ExpandNames(self):
10333 self._ExpandAndLockInstance()
10335 assert locking.LEVEL_NODE not in self.needed_locks
10336 assert locking.LEVEL_NODE_RES not in self.needed_locks
10337 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10339 assert self.op.iallocator is None or self.op.remote_node is None, \
10340 "Conflicting options"
10342 if self.op.remote_node is not None:
10343 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10345 # Warning: do not remove the locking of the new secondary here
10346 # unless DRBD8.AddChildren is changed to work in parallel;
10347 # currently it doesn't since parallel invocations of
10348 # FindUnusedMinor will conflict
10349 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10350 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10352 self.needed_locks[locking.LEVEL_NODE] = []
10353 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10355 if self.op.iallocator is not None:
10356 # iallocator will select a new node in the same group
10357 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10359 self.needed_locks[locking.LEVEL_NODE_RES] = []
10361 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10362 self.op.iallocator, self.op.remote_node,
10363 self.op.disks, False, self.op.early_release,
10364 self.op.ignore_ipolicy)
10366 self.tasklets = [self.replacer]
10368 def DeclareLocks(self, level):
10369 if level == locking.LEVEL_NODEGROUP:
10370 assert self.op.remote_node is None
10371 assert self.op.iallocator is not None
10372 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10374 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10375 # Lock all groups used by instance optimistically; this requires going
10376 # via the node before it's locked, requiring verification later on
10377 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10378 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10380 elif level == locking.LEVEL_NODE:
10381 if self.op.iallocator is not None:
10382 assert self.op.remote_node is None
10383 assert not self.needed_locks[locking.LEVEL_NODE]
10385 # Lock member nodes of all locked groups
10386 self.needed_locks[locking.LEVEL_NODE] = [node_name
10387 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10388 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10390 self._LockInstancesNodes()
10391 elif level == locking.LEVEL_NODE_RES:
10393 self.needed_locks[locking.LEVEL_NODE_RES] = \
10394 self.needed_locks[locking.LEVEL_NODE]
10396 def BuildHooksEnv(self):
10397 """Build hooks env.
10399 This runs on the master, the primary and all the secondaries.
10402 instance = self.replacer.instance
10404 "MODE": self.op.mode,
10405 "NEW_SECONDARY": self.op.remote_node,
10406 "OLD_SECONDARY": instance.secondary_nodes[0],
10408 env.update(_BuildInstanceHookEnvByObject(self, instance))
10411 def BuildHooksNodes(self):
10412 """Build hooks nodes.
10415 instance = self.replacer.instance
10417 self.cfg.GetMasterNode(),
10418 instance.primary_node,
10420 if self.op.remote_node is not None:
10421 nl.append(self.op.remote_node)
10424 def CheckPrereq(self):
10425 """Check prerequisites.
10428 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10429 self.op.iallocator is None)
10431 # Verify if node group locks are still correct
10432 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10434 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10436 return LogicalUnit.CheckPrereq(self)
10439 class TLReplaceDisks(Tasklet):
10440 """Replaces disks for an instance.
10442 Note: Locking is not within the scope of this class.
10445 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10446 disks, delay_iallocator, early_release, ignore_ipolicy):
10447 """Initializes this class.
10450 Tasklet.__init__(self, lu)
10453 self.instance_name = instance_name
10455 self.iallocator_name = iallocator_name
10456 self.remote_node = remote_node
10458 self.delay_iallocator = delay_iallocator
10459 self.early_release = early_release
10460 self.ignore_ipolicy = ignore_ipolicy
10463 self.instance = None
10464 self.new_node = None
10465 self.target_node = None
10466 self.other_node = None
10467 self.remote_node_info = None
10468 self.node_secondary_ip = None
10471 def CheckArguments(mode, remote_node, iallocator):
10472 """Helper function for users of this class.
10475 # check for valid parameter combination
10476 if mode == constants.REPLACE_DISK_CHG:
10477 if remote_node is None and iallocator is None:
10478 raise errors.OpPrereqError("When changing the secondary either an"
10479 " iallocator script must be used or the"
10480 " new node given", errors.ECODE_INVAL)
10482 if remote_node is not None and iallocator is not None:
10483 raise errors.OpPrereqError("Give either the iallocator or the new"
10484 " secondary, not both", errors.ECODE_INVAL)
10486 elif remote_node is not None or iallocator is not None:
10487 # Not replacing the secondary
10488 raise errors.OpPrereqError("The iallocator and new node options can"
10489 " only be used when changing the"
10490 " secondary node", errors.ECODE_INVAL)
10493 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10494 """Compute a new secondary node using an IAllocator.
10497 ial = IAllocator(lu.cfg, lu.rpc,
10498 mode=constants.IALLOCATOR_MODE_RELOC,
10499 name=instance_name,
10500 relocate_from=list(relocate_from))
10502 ial.Run(iallocator_name)
10504 if not ial.success:
10505 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10506 " %s" % (iallocator_name, ial.info),
10507 errors.ECODE_NORES)
10509 if len(ial.result) != ial.required_nodes:
10510 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10511 " of nodes (%s), required %s" %
10513 len(ial.result), ial.required_nodes),
10514 errors.ECODE_FAULT)
10516 remote_node_name = ial.result[0]
10518 lu.LogInfo("Selected new secondary for instance '%s': %s",
10519 instance_name, remote_node_name)
10521 return remote_node_name
10523 def _FindFaultyDisks(self, node_name):
10524 """Wrapper for L{_FindFaultyInstanceDisks}.
10527 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10530 def _CheckDisksActivated(self, instance):
10531 """Checks if the instance disks are activated.
10533 @param instance: The instance to check disks
10534 @return: True if they are activated, False otherwise
10537 nodes = instance.all_nodes
10539 for idx, dev in enumerate(instance.disks):
10541 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10542 self.cfg.SetDiskID(dev, node)
10544 result = _BlockdevFind(self, node, dev, instance)
10548 elif result.fail_msg or not result.payload:
10553 def CheckPrereq(self):
10554 """Check prerequisites.
10556 This checks that the instance is in the cluster.
10559 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10560 assert instance is not None, \
10561 "Cannot retrieve locked instance %s" % self.instance_name
10563 if instance.disk_template != constants.DT_DRBD8:
10564 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10565 " instances", errors.ECODE_INVAL)
10567 if len(instance.secondary_nodes) != 1:
10568 raise errors.OpPrereqError("The instance has a strange layout,"
10569 " expected one secondary but found %d" %
10570 len(instance.secondary_nodes),
10571 errors.ECODE_FAULT)
10573 if not self.delay_iallocator:
10574 self._CheckPrereq2()
10576 def _CheckPrereq2(self):
10577 """Check prerequisites, second part.
10579 This function should always be part of CheckPrereq. It was separated and is
10580 now called from Exec because during node evacuation iallocator was only
10581 called with an unmodified cluster model, not taking planned changes into
10585 instance = self.instance
10586 secondary_node = instance.secondary_nodes[0]
10588 if self.iallocator_name is None:
10589 remote_node = self.remote_node
10591 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10592 instance.name, instance.secondary_nodes)
10594 if remote_node is None:
10595 self.remote_node_info = None
10597 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10598 "Remote node '%s' is not locked" % remote_node
10600 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10601 assert self.remote_node_info is not None, \
10602 "Cannot retrieve locked node %s" % remote_node
10604 if remote_node == self.instance.primary_node:
10605 raise errors.OpPrereqError("The specified node is the primary node of"
10606 " the instance", errors.ECODE_INVAL)
10608 if remote_node == secondary_node:
10609 raise errors.OpPrereqError("The specified node is already the"
10610 " secondary node of the instance",
10611 errors.ECODE_INVAL)
10613 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10614 constants.REPLACE_DISK_CHG):
10615 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10616 errors.ECODE_INVAL)
10618 if self.mode == constants.REPLACE_DISK_AUTO:
10619 if not self._CheckDisksActivated(instance):
10620 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10621 " first" % self.instance_name,
10622 errors.ECODE_STATE)
10623 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10624 faulty_secondary = self._FindFaultyDisks(secondary_node)
10626 if faulty_primary and faulty_secondary:
10627 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10628 " one node and can not be repaired"
10629 " automatically" % self.instance_name,
10630 errors.ECODE_STATE)
10633 self.disks = faulty_primary
10634 self.target_node = instance.primary_node
10635 self.other_node = secondary_node
10636 check_nodes = [self.target_node, self.other_node]
10637 elif faulty_secondary:
10638 self.disks = faulty_secondary
10639 self.target_node = secondary_node
10640 self.other_node = instance.primary_node
10641 check_nodes = [self.target_node, self.other_node]
10647 # Non-automatic modes
10648 if self.mode == constants.REPLACE_DISK_PRI:
10649 self.target_node = instance.primary_node
10650 self.other_node = secondary_node
10651 check_nodes = [self.target_node, self.other_node]
10653 elif self.mode == constants.REPLACE_DISK_SEC:
10654 self.target_node = secondary_node
10655 self.other_node = instance.primary_node
10656 check_nodes = [self.target_node, self.other_node]
10658 elif self.mode == constants.REPLACE_DISK_CHG:
10659 self.new_node = remote_node
10660 self.other_node = instance.primary_node
10661 self.target_node = secondary_node
10662 check_nodes = [self.new_node, self.other_node]
10664 _CheckNodeNotDrained(self.lu, remote_node)
10665 _CheckNodeVmCapable(self.lu, remote_node)
10667 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10668 assert old_node_info is not None
10669 if old_node_info.offline and not self.early_release:
10670 # doesn't make sense to delay the release
10671 self.early_release = True
10672 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10673 " early-release mode", secondary_node)
10676 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10679 # If not specified all disks should be replaced
10681 self.disks = range(len(self.instance.disks))
10683 # TODO: This is ugly, but right now we can't distinguish between internal
10684 # submitted opcode and external one. We should fix that.
10685 if self.remote_node_info:
10686 # We change the node, lets verify it still meets instance policy
10687 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10688 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10690 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10691 ignore=self.ignore_ipolicy)
10693 for node in check_nodes:
10694 _CheckNodeOnline(self.lu, node)
10696 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10699 if node_name is not None)
10701 # Release unneeded node and node resource locks
10702 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10703 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10705 # Release any owned node group
10706 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10707 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10709 # Check whether disks are valid
10710 for disk_idx in self.disks:
10711 instance.FindDisk(disk_idx)
10713 # Get secondary node IP addresses
10714 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10715 in self.cfg.GetMultiNodeInfo(touched_nodes))
10717 def Exec(self, feedback_fn):
10718 """Execute disk replacement.
10720 This dispatches the disk replacement to the appropriate handler.
10723 if self.delay_iallocator:
10724 self._CheckPrereq2()
10727 # Verify owned locks before starting operation
10728 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10729 assert set(owned_nodes) == set(self.node_secondary_ip), \
10730 ("Incorrect node locks, owning %s, expected %s" %
10731 (owned_nodes, self.node_secondary_ip.keys()))
10732 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10733 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10735 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10736 assert list(owned_instances) == [self.instance_name], \
10737 "Instance '%s' not locked" % self.instance_name
10739 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10740 "Should not own any node group lock at this point"
10743 feedback_fn("No disks need replacement")
10746 feedback_fn("Replacing disk(s) %s for %s" %
10747 (utils.CommaJoin(self.disks), self.instance.name))
10749 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10751 # Activate the instance disks if we're replacing them on a down instance
10753 _StartInstanceDisks(self.lu, self.instance, True)
10756 # Should we replace the secondary node?
10757 if self.new_node is not None:
10758 fn = self._ExecDrbd8Secondary
10760 fn = self._ExecDrbd8DiskOnly
10762 result = fn(feedback_fn)
10764 # Deactivate the instance disks if we're replacing them on a
10767 _SafeShutdownInstanceDisks(self.lu, self.instance)
10769 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10772 # Verify owned locks
10773 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10774 nodes = frozenset(self.node_secondary_ip)
10775 assert ((self.early_release and not owned_nodes) or
10776 (not self.early_release and not (set(owned_nodes) - nodes))), \
10777 ("Not owning the correct locks, early_release=%s, owned=%r,"
10778 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10782 def _CheckVolumeGroup(self, nodes):
10783 self.lu.LogInfo("Checking volume groups")
10785 vgname = self.cfg.GetVGName()
10787 # Make sure volume group exists on all involved nodes
10788 results = self.rpc.call_vg_list(nodes)
10790 raise errors.OpExecError("Can't list volume groups on the nodes")
10793 res = results[node]
10794 res.Raise("Error checking node %s" % node)
10795 if vgname not in res.payload:
10796 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10799 def _CheckDisksExistence(self, nodes):
10800 # Check disk existence
10801 for idx, dev in enumerate(self.instance.disks):
10802 if idx not in self.disks:
10806 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10807 self.cfg.SetDiskID(dev, node)
10809 result = _BlockdevFind(self, node, dev, self.instance)
10811 msg = result.fail_msg
10812 if msg or not result.payload:
10814 msg = "disk not found"
10815 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10818 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10819 for idx, dev in enumerate(self.instance.disks):
10820 if idx not in self.disks:
10823 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10826 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10827 on_primary, ldisk=ldisk):
10828 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10829 " replace disks for instance %s" %
10830 (node_name, self.instance.name))
10832 def _CreateNewStorage(self, node_name):
10833 """Create new storage on the primary or secondary node.
10835 This is only used for same-node replaces, not for changing the
10836 secondary node, hence we don't want to modify the existing disk.
10841 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10842 for idx, dev in enumerate(disks):
10843 if idx not in self.disks:
10846 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10848 self.cfg.SetDiskID(dev, node_name)
10850 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10851 names = _GenerateUniqueNames(self.lu, lv_names)
10853 (data_disk, meta_disk) = dev.children
10854 vg_data = data_disk.logical_id[0]
10855 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10856 logical_id=(vg_data, names[0]),
10857 params=data_disk.params)
10858 vg_meta = meta_disk.logical_id[0]
10859 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10860 logical_id=(vg_meta, names[1]),
10861 params=meta_disk.params)
10863 new_lvs = [lv_data, lv_meta]
10864 old_lvs = [child.Copy() for child in dev.children]
10865 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10867 # we pass force_create=True to force the LVM creation
10868 for new_lv in new_lvs:
10869 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10870 _GetInstanceInfoText(self.instance), False)
10874 def _CheckDevices(self, node_name, iv_names):
10875 for name, (dev, _, _) in iv_names.iteritems():
10876 self.cfg.SetDiskID(dev, node_name)
10878 result = _BlockdevFind(self, node_name, dev, self.instance)
10880 msg = result.fail_msg
10881 if msg or not result.payload:
10883 msg = "disk not found"
10884 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10887 if result.payload.is_degraded:
10888 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10890 def _RemoveOldStorage(self, node_name, iv_names):
10891 for name, (_, old_lvs, _) in iv_names.iteritems():
10892 self.lu.LogInfo("Remove logical volumes for %s" % name)
10895 self.cfg.SetDiskID(lv, node_name)
10897 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10899 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10900 hint="remove unused LVs manually")
10902 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10903 """Replace a disk on the primary or secondary for DRBD 8.
10905 The algorithm for replace is quite complicated:
10907 1. for each disk to be replaced:
10909 1. create new LVs on the target node with unique names
10910 1. detach old LVs from the drbd device
10911 1. rename old LVs to name_replaced.<time_t>
10912 1. rename new LVs to old LVs
10913 1. attach the new LVs (with the old names now) to the drbd device
10915 1. wait for sync across all devices
10917 1. for each modified disk:
10919 1. remove old LVs (which have the name name_replaces.<time_t>)
10921 Failures are not very well handled.
10926 # Step: check device activation
10927 self.lu.LogStep(1, steps_total, "Check device existence")
10928 self._CheckDisksExistence([self.other_node, self.target_node])
10929 self._CheckVolumeGroup([self.target_node, self.other_node])
10931 # Step: check other node consistency
10932 self.lu.LogStep(2, steps_total, "Check peer consistency")
10933 self._CheckDisksConsistency(self.other_node,
10934 self.other_node == self.instance.primary_node,
10937 # Step: create new storage
10938 self.lu.LogStep(3, steps_total, "Allocate new storage")
10939 iv_names = self._CreateNewStorage(self.target_node)
10941 # Step: for each lv, detach+rename*2+attach
10942 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10943 for dev, old_lvs, new_lvs in iv_names.itervalues():
10944 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10946 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10948 result.Raise("Can't detach drbd from local storage on node"
10949 " %s for device %s" % (self.target_node, dev.iv_name))
10951 #cfg.Update(instance)
10953 # ok, we created the new LVs, so now we know we have the needed
10954 # storage; as such, we proceed on the target node to rename
10955 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10956 # using the assumption that logical_id == physical_id (which in
10957 # turn is the unique_id on that node)
10959 # FIXME(iustin): use a better name for the replaced LVs
10960 temp_suffix = int(time.time())
10961 ren_fn = lambda d, suff: (d.physical_id[0],
10962 d.physical_id[1] + "_replaced-%s" % suff)
10964 # Build the rename list based on what LVs exist on the node
10965 rename_old_to_new = []
10966 for to_ren in old_lvs:
10967 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10968 if not result.fail_msg and result.payload:
10970 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10972 self.lu.LogInfo("Renaming the old LVs on the target node")
10973 result = self.rpc.call_blockdev_rename(self.target_node,
10975 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10977 # Now we rename the new LVs to the old LVs
10978 self.lu.LogInfo("Renaming the new LVs on the target node")
10979 rename_new_to_old = [(new, old.physical_id)
10980 for old, new in zip(old_lvs, new_lvs)]
10981 result = self.rpc.call_blockdev_rename(self.target_node,
10983 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10985 # Intermediate steps of in memory modifications
10986 for old, new in zip(old_lvs, new_lvs):
10987 new.logical_id = old.logical_id
10988 self.cfg.SetDiskID(new, self.target_node)
10990 # We need to modify old_lvs so that removal later removes the
10991 # right LVs, not the newly added ones; note that old_lvs is a
10993 for disk in old_lvs:
10994 disk.logical_id = ren_fn(disk, temp_suffix)
10995 self.cfg.SetDiskID(disk, self.target_node)
10997 # Now that the new lvs have the old name, we can add them to the device
10998 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10999 result = self.rpc.call_blockdev_addchildren(self.target_node,
11000 (dev, self.instance), new_lvs)
11001 msg = result.fail_msg
11003 for new_lv in new_lvs:
11004 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11007 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11008 hint=("cleanup manually the unused logical"
11010 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11012 cstep = itertools.count(5)
11014 if self.early_release:
11015 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11016 self._RemoveOldStorage(self.target_node, iv_names)
11017 # TODO: Check if releasing locks early still makes sense
11018 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11020 # Release all resource locks except those used by the instance
11021 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11022 keep=self.node_secondary_ip.keys())
11024 # Release all node locks while waiting for sync
11025 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11027 # TODO: Can the instance lock be downgraded here? Take the optional disk
11028 # shutdown in the caller into consideration.
11031 # This can fail as the old devices are degraded and _WaitForSync
11032 # does a combined result over all disks, so we don't check its return value
11033 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11034 _WaitForSync(self.lu, self.instance)
11036 # Check all devices manually
11037 self._CheckDevices(self.instance.primary_node, iv_names)
11039 # Step: remove old storage
11040 if not self.early_release:
11041 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11042 self._RemoveOldStorage(self.target_node, iv_names)
11044 def _ExecDrbd8Secondary(self, feedback_fn):
11045 """Replace the secondary node for DRBD 8.
11047 The algorithm for replace is quite complicated:
11048 - for all disks of the instance:
11049 - create new LVs on the new node with same names
11050 - shutdown the drbd device on the old secondary
11051 - disconnect the drbd network on the primary
11052 - create the drbd device on the new secondary
11053 - network attach the drbd on the primary, using an artifice:
11054 the drbd code for Attach() will connect to the network if it
11055 finds a device which is connected to the good local disks but
11056 not network enabled
11057 - wait for sync across all devices
11058 - remove all disks from the old secondary
11060 Failures are not very well handled.
11065 pnode = self.instance.primary_node
11067 # Step: check device activation
11068 self.lu.LogStep(1, steps_total, "Check device existence")
11069 self._CheckDisksExistence([self.instance.primary_node])
11070 self._CheckVolumeGroup([self.instance.primary_node])
11072 # Step: check other node consistency
11073 self.lu.LogStep(2, steps_total, "Check peer consistency")
11074 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11076 # Step: create new storage
11077 self.lu.LogStep(3, steps_total, "Allocate new storage")
11078 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11079 for idx, dev in enumerate(disks):
11080 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11081 (self.new_node, idx))
11082 # we pass force_create=True to force LVM creation
11083 for new_lv in dev.children:
11084 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11085 True, _GetInstanceInfoText(self.instance), False)
11087 # Step 4: dbrd minors and drbd setups changes
11088 # after this, we must manually remove the drbd minors on both the
11089 # error and the success paths
11090 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11091 minors = self.cfg.AllocateDRBDMinor([self.new_node
11092 for dev in self.instance.disks],
11093 self.instance.name)
11094 logging.debug("Allocated minors %r", minors)
11097 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11098 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11099 (self.new_node, idx))
11100 # create new devices on new_node; note that we create two IDs:
11101 # one without port, so the drbd will be activated without
11102 # networking information on the new node at this stage, and one
11103 # with network, for the latter activation in step 4
11104 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11105 if self.instance.primary_node == o_node1:
11108 assert self.instance.primary_node == o_node2, "Three-node instance?"
11111 new_alone_id = (self.instance.primary_node, self.new_node, None,
11112 p_minor, new_minor, o_secret)
11113 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11114 p_minor, new_minor, o_secret)
11116 iv_names[idx] = (dev, dev.children, new_net_id)
11117 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11119 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11120 logical_id=new_alone_id,
11121 children=dev.children,
11124 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11127 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11129 _GetInstanceInfoText(self.instance), False)
11130 except errors.GenericError:
11131 self.cfg.ReleaseDRBDMinors(self.instance.name)
11134 # We have new devices, shutdown the drbd on the old secondary
11135 for idx, dev in enumerate(self.instance.disks):
11136 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11137 self.cfg.SetDiskID(dev, self.target_node)
11138 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11140 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11141 "node: %s" % (idx, msg),
11142 hint=("Please cleanup this device manually as"
11143 " soon as possible"))
11145 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11146 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11147 self.instance.disks)[pnode]
11149 msg = result.fail_msg
11151 # detaches didn't succeed (unlikely)
11152 self.cfg.ReleaseDRBDMinors(self.instance.name)
11153 raise errors.OpExecError("Can't detach the disks from the network on"
11154 " old node: %s" % (msg,))
11156 # if we managed to detach at least one, we update all the disks of
11157 # the instance to point to the new secondary
11158 self.lu.LogInfo("Updating instance configuration")
11159 for dev, _, new_logical_id in iv_names.itervalues():
11160 dev.logical_id = new_logical_id
11161 self.cfg.SetDiskID(dev, self.instance.primary_node)
11163 self.cfg.Update(self.instance, feedback_fn)
11165 # Release all node locks (the configuration has been updated)
11166 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11168 # and now perform the drbd attach
11169 self.lu.LogInfo("Attaching primary drbds to new secondary"
11170 " (standalone => connected)")
11171 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11173 self.node_secondary_ip,
11174 (self.instance.disks, self.instance),
11175 self.instance.name,
11177 for to_node, to_result in result.items():
11178 msg = to_result.fail_msg
11180 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11182 hint=("please do a gnt-instance info to see the"
11183 " status of disks"))
11185 cstep = itertools.count(5)
11187 if self.early_release:
11188 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11189 self._RemoveOldStorage(self.target_node, iv_names)
11190 # TODO: Check if releasing locks early still makes sense
11191 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11193 # Release all resource locks except those used by the instance
11194 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11195 keep=self.node_secondary_ip.keys())
11197 # TODO: Can the instance lock be downgraded here? Take the optional disk
11198 # shutdown in the caller into consideration.
11201 # This can fail as the old devices are degraded and _WaitForSync
11202 # does a combined result over all disks, so we don't check its return value
11203 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11204 _WaitForSync(self.lu, self.instance)
11206 # Check all devices manually
11207 self._CheckDevices(self.instance.primary_node, iv_names)
11209 # Step: remove old storage
11210 if not self.early_release:
11211 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11212 self._RemoveOldStorage(self.target_node, iv_names)
11215 class LURepairNodeStorage(NoHooksLU):
11216 """Repairs the volume group on a node.
11221 def CheckArguments(self):
11222 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11224 storage_type = self.op.storage_type
11226 if (constants.SO_FIX_CONSISTENCY not in
11227 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11228 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11229 " repaired" % storage_type,
11230 errors.ECODE_INVAL)
11232 def ExpandNames(self):
11233 self.needed_locks = {
11234 locking.LEVEL_NODE: [self.op.node_name],
11237 def _CheckFaultyDisks(self, instance, node_name):
11238 """Ensure faulty disks abort the opcode or at least warn."""
11240 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11242 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11243 " node '%s'" % (instance.name, node_name),
11244 errors.ECODE_STATE)
11245 except errors.OpPrereqError, err:
11246 if self.op.ignore_consistency:
11247 self.proc.LogWarning(str(err.args[0]))
11251 def CheckPrereq(self):
11252 """Check prerequisites.
11255 # Check whether any instance on this node has faulty disks
11256 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11257 if inst.admin_state != constants.ADMINST_UP:
11259 check_nodes = set(inst.all_nodes)
11260 check_nodes.discard(self.op.node_name)
11261 for inst_node_name in check_nodes:
11262 self._CheckFaultyDisks(inst, inst_node_name)
11264 def Exec(self, feedback_fn):
11265 feedback_fn("Repairing storage unit '%s' on %s ..." %
11266 (self.op.name, self.op.node_name))
11268 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11269 result = self.rpc.call_storage_execute(self.op.node_name,
11270 self.op.storage_type, st_args,
11272 constants.SO_FIX_CONSISTENCY)
11273 result.Raise("Failed to repair storage unit '%s' on %s" %
11274 (self.op.name, self.op.node_name))
11277 class LUNodeEvacuate(NoHooksLU):
11278 """Evacuates instances off a list of nodes.
11283 _MODE2IALLOCATOR = {
11284 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11285 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11286 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11288 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11289 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11290 constants.IALLOCATOR_NEVAC_MODES)
11292 def CheckArguments(self):
11293 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11295 def ExpandNames(self):
11296 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11298 if self.op.remote_node is not None:
11299 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11300 assert self.op.remote_node
11302 if self.op.remote_node == self.op.node_name:
11303 raise errors.OpPrereqError("Can not use evacuated node as a new"
11304 " secondary node", errors.ECODE_INVAL)
11306 if self.op.mode != constants.NODE_EVAC_SEC:
11307 raise errors.OpPrereqError("Without the use of an iallocator only"
11308 " secondary instances can be evacuated",
11309 errors.ECODE_INVAL)
11312 self.share_locks = _ShareAll()
11313 self.needed_locks = {
11314 locking.LEVEL_INSTANCE: [],
11315 locking.LEVEL_NODEGROUP: [],
11316 locking.LEVEL_NODE: [],
11319 # Determine nodes (via group) optimistically, needs verification once locks
11320 # have been acquired
11321 self.lock_nodes = self._DetermineNodes()
11323 def _DetermineNodes(self):
11324 """Gets the list of nodes to operate on.
11327 if self.op.remote_node is None:
11328 # Iallocator will choose any node(s) in the same group
11329 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11331 group_nodes = frozenset([self.op.remote_node])
11333 # Determine nodes to be locked
11334 return set([self.op.node_name]) | group_nodes
11336 def _DetermineInstances(self):
11337 """Builds list of instances to operate on.
11340 assert self.op.mode in constants.NODE_EVAC_MODES
11342 if self.op.mode == constants.NODE_EVAC_PRI:
11343 # Primary instances only
11344 inst_fn = _GetNodePrimaryInstances
11345 assert self.op.remote_node is None, \
11346 "Evacuating primary instances requires iallocator"
11347 elif self.op.mode == constants.NODE_EVAC_SEC:
11348 # Secondary instances only
11349 inst_fn = _GetNodeSecondaryInstances
11352 assert self.op.mode == constants.NODE_EVAC_ALL
11353 inst_fn = _GetNodeInstances
11354 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11356 raise errors.OpPrereqError("Due to an issue with the iallocator"
11357 " interface it is not possible to evacuate"
11358 " all instances at once; specify explicitly"
11359 " whether to evacuate primary or secondary"
11361 errors.ECODE_INVAL)
11363 return inst_fn(self.cfg, self.op.node_name)
11365 def DeclareLocks(self, level):
11366 if level == locking.LEVEL_INSTANCE:
11367 # Lock instances optimistically, needs verification once node and group
11368 # locks have been acquired
11369 self.needed_locks[locking.LEVEL_INSTANCE] = \
11370 set(i.name for i in self._DetermineInstances())
11372 elif level == locking.LEVEL_NODEGROUP:
11373 # Lock node groups for all potential target nodes optimistically, needs
11374 # verification once nodes have been acquired
11375 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11376 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11378 elif level == locking.LEVEL_NODE:
11379 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11381 def CheckPrereq(self):
11383 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11384 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11385 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11387 need_nodes = self._DetermineNodes()
11389 if not owned_nodes.issuperset(need_nodes):
11390 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11391 " locks were acquired, current nodes are"
11392 " are '%s', used to be '%s'; retry the"
11394 (self.op.node_name,
11395 utils.CommaJoin(need_nodes),
11396 utils.CommaJoin(owned_nodes)),
11397 errors.ECODE_STATE)
11399 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11400 if owned_groups != wanted_groups:
11401 raise errors.OpExecError("Node groups changed since locks were acquired,"
11402 " current groups are '%s', used to be '%s';"
11403 " retry the operation" %
11404 (utils.CommaJoin(wanted_groups),
11405 utils.CommaJoin(owned_groups)))
11407 # Determine affected instances
11408 self.instances = self._DetermineInstances()
11409 self.instance_names = [i.name for i in self.instances]
11411 if set(self.instance_names) != owned_instances:
11412 raise errors.OpExecError("Instances on node '%s' changed since locks"
11413 " were acquired, current instances are '%s',"
11414 " used to be '%s'; retry the operation" %
11415 (self.op.node_name,
11416 utils.CommaJoin(self.instance_names),
11417 utils.CommaJoin(owned_instances)))
11419 if self.instance_names:
11420 self.LogInfo("Evacuating instances from node '%s': %s",
11422 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11424 self.LogInfo("No instances to evacuate from node '%s'",
11427 if self.op.remote_node is not None:
11428 for i in self.instances:
11429 if i.primary_node == self.op.remote_node:
11430 raise errors.OpPrereqError("Node %s is the primary node of"
11431 " instance %s, cannot use it as"
11433 (self.op.remote_node, i.name),
11434 errors.ECODE_INVAL)
11436 def Exec(self, feedback_fn):
11437 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11439 if not self.instance_names:
11440 # No instances to evacuate
11443 elif self.op.iallocator is not None:
11444 # TODO: Implement relocation to other group
11445 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11446 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11447 instances=list(self.instance_names))
11449 ial.Run(self.op.iallocator)
11451 if not ial.success:
11452 raise errors.OpPrereqError("Can't compute node evacuation using"
11453 " iallocator '%s': %s" %
11454 (self.op.iallocator, ial.info),
11455 errors.ECODE_NORES)
11457 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11459 elif self.op.remote_node is not None:
11460 assert self.op.mode == constants.NODE_EVAC_SEC
11462 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11463 remote_node=self.op.remote_node,
11465 mode=constants.REPLACE_DISK_CHG,
11466 early_release=self.op.early_release)]
11467 for instance_name in self.instance_names
11471 raise errors.ProgrammerError("No iallocator or remote node")
11473 return ResultWithJobs(jobs)
11476 def _SetOpEarlyRelease(early_release, op):
11477 """Sets C{early_release} flag on opcodes if available.
11481 op.early_release = early_release
11482 except AttributeError:
11483 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11488 def _NodeEvacDest(use_nodes, group, nodes):
11489 """Returns group or nodes depending on caller's choice.
11493 return utils.CommaJoin(nodes)
11498 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11499 """Unpacks the result of change-group and node-evacuate iallocator requests.
11501 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11502 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11504 @type lu: L{LogicalUnit}
11505 @param lu: Logical unit instance
11506 @type alloc_result: tuple/list
11507 @param alloc_result: Result from iallocator
11508 @type early_release: bool
11509 @param early_release: Whether to release locks early if possible
11510 @type use_nodes: bool
11511 @param use_nodes: Whether to display node names instead of groups
11514 (moved, failed, jobs) = alloc_result
11517 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11518 for (name, reason) in failed)
11519 lu.LogWarning("Unable to evacuate instances %s", failreason)
11520 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11523 lu.LogInfo("Instances to be moved: %s",
11524 utils.CommaJoin("%s (to %s)" %
11525 (name, _NodeEvacDest(use_nodes, group, nodes))
11526 for (name, group, nodes) in moved))
11528 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11529 map(opcodes.OpCode.LoadOpCode, ops))
11533 class LUInstanceGrowDisk(LogicalUnit):
11534 """Grow a disk of an instance.
11537 HPATH = "disk-grow"
11538 HTYPE = constants.HTYPE_INSTANCE
11541 def ExpandNames(self):
11542 self._ExpandAndLockInstance()
11543 self.needed_locks[locking.LEVEL_NODE] = []
11544 self.needed_locks[locking.LEVEL_NODE_RES] = []
11545 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11546 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11548 def DeclareLocks(self, level):
11549 if level == locking.LEVEL_NODE:
11550 self._LockInstancesNodes()
11551 elif level == locking.LEVEL_NODE_RES:
11553 self.needed_locks[locking.LEVEL_NODE_RES] = \
11554 self.needed_locks[locking.LEVEL_NODE][:]
11556 def BuildHooksEnv(self):
11557 """Build hooks env.
11559 This runs on the master, the primary and all the secondaries.
11563 "DISK": self.op.disk,
11564 "AMOUNT": self.op.amount,
11565 "ABSOLUTE": self.op.absolute,
11567 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11570 def BuildHooksNodes(self):
11571 """Build hooks nodes.
11574 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11577 def CheckPrereq(self):
11578 """Check prerequisites.
11580 This checks that the instance is in the cluster.
11583 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11584 assert instance is not None, \
11585 "Cannot retrieve locked instance %s" % self.op.instance_name
11586 nodenames = list(instance.all_nodes)
11587 for node in nodenames:
11588 _CheckNodeOnline(self, node)
11590 self.instance = instance
11592 if instance.disk_template not in constants.DTS_GROWABLE:
11593 raise errors.OpPrereqError("Instance's disk layout does not support"
11594 " growing", errors.ECODE_INVAL)
11596 self.disk = instance.FindDisk(self.op.disk)
11598 if self.op.absolute:
11599 self.target = self.op.amount
11600 self.delta = self.target - self.disk.size
11602 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11603 "current disk size (%s)" %
11604 (utils.FormatUnit(self.target, "h"),
11605 utils.FormatUnit(self.disk.size, "h")),
11606 errors.ECODE_STATE)
11608 self.delta = self.op.amount
11609 self.target = self.disk.size + self.delta
11611 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11612 utils.FormatUnit(self.delta, "h"),
11613 errors.ECODE_INVAL)
11615 if instance.disk_template not in (constants.DT_FILE,
11616 constants.DT_SHARED_FILE,
11618 # TODO: check the free disk space for file, when that feature will be
11620 _CheckNodesFreeDiskPerVG(self, nodenames,
11621 self.disk.ComputeGrowth(self.delta))
11623 def Exec(self, feedback_fn):
11624 """Execute disk grow.
11627 instance = self.instance
11630 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11631 assert (self.owned_locks(locking.LEVEL_NODE) ==
11632 self.owned_locks(locking.LEVEL_NODE_RES))
11634 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11636 raise errors.OpExecError("Cannot activate block device to grow")
11638 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11639 (self.op.disk, instance.name,
11640 utils.FormatUnit(self.delta, "h"),
11641 utils.FormatUnit(self.target, "h")))
11643 # First run all grow ops in dry-run mode
11644 for node in instance.all_nodes:
11645 self.cfg.SetDiskID(disk, node)
11646 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11648 result.Raise("Grow request failed to node %s" % node)
11650 # We know that (as far as we can test) operations across different
11651 # nodes will succeed, time to run it for real
11652 for node in instance.all_nodes:
11653 self.cfg.SetDiskID(disk, node)
11654 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11656 result.Raise("Grow request failed to node %s" % node)
11658 # TODO: Rewrite code to work properly
11659 # DRBD goes into sync mode for a short amount of time after executing the
11660 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11661 # calling "resize" in sync mode fails. Sleeping for a short amount of
11662 # time is a work-around.
11665 disk.RecordGrow(self.delta)
11666 self.cfg.Update(instance, feedback_fn)
11668 # Changes have been recorded, release node lock
11669 _ReleaseLocks(self, locking.LEVEL_NODE)
11671 # Downgrade lock while waiting for sync
11672 self.glm.downgrade(locking.LEVEL_INSTANCE)
11674 if self.op.wait_for_sync:
11675 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11677 self.proc.LogWarning("Disk sync-ing has not returned a good"
11678 " status; please check the instance")
11679 if instance.admin_state != constants.ADMINST_UP:
11680 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11681 elif instance.admin_state != constants.ADMINST_UP:
11682 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11683 " not supposed to be running because no wait for"
11684 " sync mode was requested")
11686 assert self.owned_locks(locking.LEVEL_NODE_RES)
11687 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11690 class LUInstanceQueryData(NoHooksLU):
11691 """Query runtime instance data.
11696 def ExpandNames(self):
11697 self.needed_locks = {}
11699 # Use locking if requested or when non-static information is wanted
11700 if not (self.op.static or self.op.use_locking):
11701 self.LogWarning("Non-static data requested, locks need to be acquired")
11702 self.op.use_locking = True
11704 if self.op.instances or not self.op.use_locking:
11705 # Expand instance names right here
11706 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11708 # Will use acquired locks
11709 self.wanted_names = None
11711 if self.op.use_locking:
11712 self.share_locks = _ShareAll()
11714 if self.wanted_names is None:
11715 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11717 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11719 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11720 self.needed_locks[locking.LEVEL_NODE] = []
11721 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11723 def DeclareLocks(self, level):
11724 if self.op.use_locking:
11725 if level == locking.LEVEL_NODEGROUP:
11726 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11728 # Lock all groups used by instances optimistically; this requires going
11729 # via the node before it's locked, requiring verification later on
11730 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11731 frozenset(group_uuid
11732 for instance_name in owned_instances
11734 self.cfg.GetInstanceNodeGroups(instance_name))
11736 elif level == locking.LEVEL_NODE:
11737 self._LockInstancesNodes()
11739 def CheckPrereq(self):
11740 """Check prerequisites.
11742 This only checks the optional instance list against the existing names.
11745 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11746 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11747 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11749 if self.wanted_names is None:
11750 assert self.op.use_locking, "Locking was not used"
11751 self.wanted_names = owned_instances
11753 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11755 if self.op.use_locking:
11756 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11759 assert not (owned_instances or owned_groups or owned_nodes)
11761 self.wanted_instances = instances.values()
11763 def _ComputeBlockdevStatus(self, node, instance, dev):
11764 """Returns the status of a block device
11767 if self.op.static or not node:
11770 self.cfg.SetDiskID(dev, node)
11772 result = self.rpc.call_blockdev_find(node, dev)
11776 result.Raise("Can't compute disk status for %s" % instance.name)
11778 status = result.payload
11782 return (status.dev_path, status.major, status.minor,
11783 status.sync_percent, status.estimated_time,
11784 status.is_degraded, status.ldisk_status)
11786 def _ComputeDiskStatus(self, instance, snode, dev):
11787 """Compute block device status.
11790 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11792 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11794 def _ComputeDiskStatusInner(self, instance, snode, dev):
11795 """Compute block device status.
11797 @attention: The device has to be annotated already.
11800 if dev.dev_type in constants.LDS_DRBD:
11801 # we change the snode then (otherwise we use the one passed in)
11802 if dev.logical_id[0] == instance.primary_node:
11803 snode = dev.logical_id[1]
11805 snode = dev.logical_id[0]
11807 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11809 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11812 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11819 "iv_name": dev.iv_name,
11820 "dev_type": dev.dev_type,
11821 "logical_id": dev.logical_id,
11822 "physical_id": dev.physical_id,
11823 "pstatus": dev_pstatus,
11824 "sstatus": dev_sstatus,
11825 "children": dev_children,
11830 def Exec(self, feedback_fn):
11831 """Gather and return data"""
11834 cluster = self.cfg.GetClusterInfo()
11836 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11837 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11839 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11840 for node in nodes.values()))
11842 group2name_fn = lambda uuid: groups[uuid].name
11844 for instance in self.wanted_instances:
11845 pnode = nodes[instance.primary_node]
11847 if self.op.static or pnode.offline:
11848 remote_state = None
11850 self.LogWarning("Primary node %s is marked offline, returning static"
11851 " information only for instance %s" %
11852 (pnode.name, instance.name))
11854 remote_info = self.rpc.call_instance_info(instance.primary_node,
11856 instance.hypervisor)
11857 remote_info.Raise("Error checking node %s" % instance.primary_node)
11858 remote_info = remote_info.payload
11859 if remote_info and "state" in remote_info:
11860 remote_state = "up"
11862 if instance.admin_state == constants.ADMINST_UP:
11863 remote_state = "down"
11865 remote_state = instance.admin_state
11867 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11870 snodes_group_uuids = [nodes[snode_name].group
11871 for snode_name in instance.secondary_nodes]
11873 result[instance.name] = {
11874 "name": instance.name,
11875 "config_state": instance.admin_state,
11876 "run_state": remote_state,
11877 "pnode": instance.primary_node,
11878 "pnode_group_uuid": pnode.group,
11879 "pnode_group_name": group2name_fn(pnode.group),
11880 "snodes": instance.secondary_nodes,
11881 "snodes_group_uuids": snodes_group_uuids,
11882 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11884 # this happens to be the same format used for hooks
11885 "nics": _NICListToTuple(self, instance.nics),
11886 "disk_template": instance.disk_template,
11888 "hypervisor": instance.hypervisor,
11889 "network_port": instance.network_port,
11890 "hv_instance": instance.hvparams,
11891 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11892 "be_instance": instance.beparams,
11893 "be_actual": cluster.FillBE(instance),
11894 "os_instance": instance.osparams,
11895 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11896 "serial_no": instance.serial_no,
11897 "mtime": instance.mtime,
11898 "ctime": instance.ctime,
11899 "uuid": instance.uuid,
11905 def PrepareContainerMods(mods, private_fn):
11906 """Prepares a list of container modifications by adding a private data field.
11908 @type mods: list of tuples; (operation, index, parameters)
11909 @param mods: List of modifications
11910 @type private_fn: callable or None
11911 @param private_fn: Callable for constructing a private data field for a
11916 if private_fn is None:
11921 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11924 #: Type description for changes as returned by L{ApplyContainerMods}'s
11926 _TApplyContModsCbChanges = \
11927 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11928 ht.TNonEmptyString,
11933 def ApplyContainerMods(kind, container, chgdesc, mods,
11934 create_fn, modify_fn, remove_fn):
11935 """Applies descriptions in C{mods} to C{container}.
11938 @param kind: One-word item description
11939 @type container: list
11940 @param container: Container to modify
11941 @type chgdesc: None or list
11942 @param chgdesc: List of applied changes
11944 @param mods: Modifications as returned by L{PrepareContainerMods}
11945 @type create_fn: callable
11946 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11947 receives absolute item index, parameters and private data object as added
11948 by L{PrepareContainerMods}, returns tuple containing new item and changes
11950 @type modify_fn: callable
11951 @param modify_fn: Callback for modifying an existing item
11952 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11953 and private data object as added by L{PrepareContainerMods}, returns
11955 @type remove_fn: callable
11956 @param remove_fn: Callback on removing item; receives absolute item index,
11957 item and private data object as added by L{PrepareContainerMods}
11960 for (op, idx, params, private) in mods:
11963 absidx = len(container) - 1
11965 raise IndexError("Not accepting negative indices other than -1")
11966 elif idx > len(container):
11967 raise IndexError("Got %s index %s, but there are only %s" %
11968 (kind, idx, len(container)))
11974 if op == constants.DDM_ADD:
11975 # Calculate where item will be added
11977 addidx = len(container)
11981 if create_fn is None:
11984 (item, changes) = create_fn(addidx, params, private)
11987 container.append(item)
11990 assert idx <= len(container)
11991 # list.insert does so before the specified index
11992 container.insert(idx, item)
11994 # Retrieve existing item
11996 item = container[absidx]
11998 raise IndexError("Invalid %s index %s" % (kind, idx))
12000 if op == constants.DDM_REMOVE:
12003 if remove_fn is not None:
12004 remove_fn(absidx, item, private)
12006 changes = [("%s/%s" % (kind, absidx), "remove")]
12008 assert container[absidx] == item
12009 del container[absidx]
12010 elif op == constants.DDM_MODIFY:
12011 if modify_fn is not None:
12012 changes = modify_fn(absidx, item, params, private)
12014 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12016 assert _TApplyContModsCbChanges(changes)
12018 if not (chgdesc is None or changes is None):
12019 chgdesc.extend(changes)
12022 def _UpdateIvNames(base_index, disks):
12023 """Updates the C{iv_name} attribute of disks.
12025 @type disks: list of L{objects.Disk}
12028 for (idx, disk) in enumerate(disks):
12029 disk.iv_name = "disk/%s" % (base_index + idx, )
12032 class _InstNicModPrivate:
12033 """Data structure for network interface modifications.
12035 Used by L{LUInstanceSetParams}.
12038 def __init__(self):
12043 class LUInstanceSetParams(LogicalUnit):
12044 """Modifies an instances's parameters.
12047 HPATH = "instance-modify"
12048 HTYPE = constants.HTYPE_INSTANCE
12052 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12053 assert ht.TList(mods)
12054 assert not mods or len(mods[0]) in (2, 3)
12056 if mods and len(mods[0]) == 2:
12060 for op, params in mods:
12061 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12062 result.append((op, -1, params))
12066 raise errors.OpPrereqError("Only one %s add or remove operation is"
12067 " supported at a time" % kind,
12068 errors.ECODE_INVAL)
12070 result.append((constants.DDM_MODIFY, op, params))
12072 assert verify_fn(result)
12079 def _CheckMods(kind, mods, key_types, item_fn):
12080 """Ensures requested disk/NIC modifications are valid.
12083 for (op, _, params) in mods:
12084 assert ht.TDict(params)
12086 utils.ForceDictType(params, key_types)
12088 if op == constants.DDM_REMOVE:
12090 raise errors.OpPrereqError("No settings should be passed when"
12091 " removing a %s" % kind,
12092 errors.ECODE_INVAL)
12093 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12094 item_fn(op, params)
12096 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12099 def _VerifyDiskModification(op, params):
12100 """Verifies a disk modification.
12103 if op == constants.DDM_ADD:
12104 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12105 if mode not in constants.DISK_ACCESS_SET:
12106 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12107 errors.ECODE_INVAL)
12109 size = params.get(constants.IDISK_SIZE, None)
12111 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12112 constants.IDISK_SIZE, errors.ECODE_INVAL)
12116 except (TypeError, ValueError), err:
12117 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12118 errors.ECODE_INVAL)
12120 params[constants.IDISK_SIZE] = size
12122 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12123 raise errors.OpPrereqError("Disk size change not possible, use"
12124 " grow-disk", errors.ECODE_INVAL)
12127 def _VerifyNicModification(op, params):
12128 """Verifies a network interface modification.
12131 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12132 ip = params.get(constants.INIC_IP, None)
12135 elif ip.lower() == constants.VALUE_NONE:
12136 params[constants.INIC_IP] = None
12137 elif not netutils.IPAddress.IsValid(ip):
12138 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12139 errors.ECODE_INVAL)
12141 bridge = params.get("bridge", None)
12142 link = params.get(constants.INIC_LINK, None)
12143 if bridge and link:
12144 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12145 " at the same time", errors.ECODE_INVAL)
12146 elif bridge and bridge.lower() == constants.VALUE_NONE:
12147 params["bridge"] = None
12148 elif link and link.lower() == constants.VALUE_NONE:
12149 params[constants.INIC_LINK] = None
12151 if op == constants.DDM_ADD:
12152 macaddr = params.get(constants.INIC_MAC, None)
12153 if macaddr is None:
12154 params[constants.INIC_MAC] = constants.VALUE_AUTO
12156 if constants.INIC_MAC in params:
12157 macaddr = params[constants.INIC_MAC]
12158 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12159 macaddr = utils.NormalizeAndValidateMac(macaddr)
12161 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12162 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12163 " modifying an existing NIC",
12164 errors.ECODE_INVAL)
12166 def CheckArguments(self):
12167 if not (self.op.nics or self.op.disks or self.op.disk_template or
12168 self.op.hvparams or self.op.beparams or self.op.os_name or
12169 self.op.offline is not None or self.op.runtime_mem):
12170 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12172 if self.op.hvparams:
12173 _CheckGlobalHvParams(self.op.hvparams)
12176 self._UpgradeDiskNicMods("disk", self.op.disks,
12177 opcodes.OpInstanceSetParams.TestDiskModifications)
12179 self._UpgradeDiskNicMods("NIC", self.op.nics,
12180 opcodes.OpInstanceSetParams.TestNicModifications)
12182 # Check disk modifications
12183 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12184 self._VerifyDiskModification)
12186 if self.op.disks and self.op.disk_template is not None:
12187 raise errors.OpPrereqError("Disk template conversion and other disk"
12188 " changes not supported at the same time",
12189 errors.ECODE_INVAL)
12191 if (self.op.disk_template and
12192 self.op.disk_template in constants.DTS_INT_MIRROR and
12193 self.op.remote_node is None):
12194 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12195 " one requires specifying a secondary node",
12196 errors.ECODE_INVAL)
12198 # Check NIC modifications
12199 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12200 self._VerifyNicModification)
12202 def ExpandNames(self):
12203 self._ExpandAndLockInstance()
12204 # Can't even acquire node locks in shared mode as upcoming changes in
12205 # Ganeti 2.6 will start to modify the node object on disk conversion
12206 self.needed_locks[locking.LEVEL_NODE] = []
12207 self.needed_locks[locking.LEVEL_NODE_RES] = []
12208 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12210 def DeclareLocks(self, level):
12211 # TODO: Acquire group lock in shared mode (disk parameters)
12212 if level == locking.LEVEL_NODE:
12213 self._LockInstancesNodes()
12214 if self.op.disk_template and self.op.remote_node:
12215 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12216 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12217 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12219 self.needed_locks[locking.LEVEL_NODE_RES] = \
12220 self.needed_locks[locking.LEVEL_NODE][:]
12222 def BuildHooksEnv(self):
12223 """Build hooks env.
12225 This runs on the master, primary and secondaries.
12229 if constants.BE_MINMEM in self.be_new:
12230 args["minmem"] = self.be_new[constants.BE_MINMEM]
12231 if constants.BE_MAXMEM in self.be_new:
12232 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12233 if constants.BE_VCPUS in self.be_new:
12234 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12235 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12236 # information at all.
12238 if self._new_nics is not None:
12241 for nic in self._new_nics:
12242 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12243 mode = nicparams[constants.NIC_MODE]
12244 link = nicparams[constants.NIC_LINK]
12245 nics.append((nic.ip, nic.mac, mode, link))
12247 args["nics"] = nics
12249 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12250 if self.op.disk_template:
12251 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12252 if self.op.runtime_mem:
12253 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12257 def BuildHooksNodes(self):
12258 """Build hooks nodes.
12261 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12264 def _PrepareNicModification(self, params, private, old_ip, old_params,
12266 update_params_dict = dict([(key, params[key])
12267 for key in constants.NICS_PARAMETERS
12270 if "bridge" in params:
12271 update_params_dict[constants.NIC_LINK] = params["bridge"]
12273 new_params = _GetUpdatedParams(old_params, update_params_dict)
12274 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12276 new_filled_params = cluster.SimpleFillNIC(new_params)
12277 objects.NIC.CheckParameterSyntax(new_filled_params)
12279 new_mode = new_filled_params[constants.NIC_MODE]
12280 if new_mode == constants.NIC_MODE_BRIDGED:
12281 bridge = new_filled_params[constants.NIC_LINK]
12282 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12284 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12286 self.warn.append(msg)
12288 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12290 elif new_mode == constants.NIC_MODE_ROUTED:
12291 ip = params.get(constants.INIC_IP, old_ip)
12293 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12294 " on a routed NIC", errors.ECODE_INVAL)
12296 if constants.INIC_MAC in params:
12297 mac = params[constants.INIC_MAC]
12299 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12300 errors.ECODE_INVAL)
12301 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12302 # otherwise generate the MAC address
12303 params[constants.INIC_MAC] = \
12304 self.cfg.GenerateMAC(self.proc.GetECId())
12306 # or validate/reserve the current one
12308 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12309 except errors.ReservationError:
12310 raise errors.OpPrereqError("MAC address '%s' already in use"
12311 " in cluster" % mac,
12312 errors.ECODE_NOTUNIQUE)
12314 private.params = new_params
12315 private.filled = new_filled_params
12317 return (None, None)
12319 def CheckPrereq(self):
12320 """Check prerequisites.
12322 This only checks the instance list against the existing names.
12325 # checking the new params on the primary/secondary nodes
12327 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12328 cluster = self.cluster = self.cfg.GetClusterInfo()
12329 assert self.instance is not None, \
12330 "Cannot retrieve locked instance %s" % self.op.instance_name
12331 pnode = instance.primary_node
12332 nodelist = list(instance.all_nodes)
12333 pnode_info = self.cfg.GetNodeInfo(pnode)
12334 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12336 # Prepare disk/NIC modifications
12337 self.diskmod = PrepareContainerMods(self.op.disks, None)
12338 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12341 if self.op.os_name and not self.op.force:
12342 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12343 self.op.force_variant)
12344 instance_os = self.op.os_name
12346 instance_os = instance.os
12348 assert not (self.op.disk_template and self.op.disks), \
12349 "Can't modify disk template and apply disk changes at the same time"
12351 if self.op.disk_template:
12352 if instance.disk_template == self.op.disk_template:
12353 raise errors.OpPrereqError("Instance already has disk template %s" %
12354 instance.disk_template, errors.ECODE_INVAL)
12356 if (instance.disk_template,
12357 self.op.disk_template) not in self._DISK_CONVERSIONS:
12358 raise errors.OpPrereqError("Unsupported disk template conversion from"
12359 " %s to %s" % (instance.disk_template,
12360 self.op.disk_template),
12361 errors.ECODE_INVAL)
12362 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12363 msg="cannot change disk template")
12364 if self.op.disk_template in constants.DTS_INT_MIRROR:
12365 if self.op.remote_node == pnode:
12366 raise errors.OpPrereqError("Given new secondary node %s is the same"
12367 " as the primary node of the instance" %
12368 self.op.remote_node, errors.ECODE_STATE)
12369 _CheckNodeOnline(self, self.op.remote_node)
12370 _CheckNodeNotDrained(self, self.op.remote_node)
12371 # FIXME: here we assume that the old instance type is DT_PLAIN
12372 assert instance.disk_template == constants.DT_PLAIN
12373 disks = [{constants.IDISK_SIZE: d.size,
12374 constants.IDISK_VG: d.logical_id[0]}
12375 for d in instance.disks]
12376 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12377 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12379 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12380 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12381 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12382 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12383 ignore=self.op.ignore_ipolicy)
12384 if pnode_info.group != snode_info.group:
12385 self.LogWarning("The primary and secondary nodes are in two"
12386 " different node groups; the disk parameters"
12387 " from the first disk's node group will be"
12390 # hvparams processing
12391 if self.op.hvparams:
12392 hv_type = instance.hypervisor
12393 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12394 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12395 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12398 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12399 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12400 self.hv_proposed = self.hv_new = hv_new # the new actual values
12401 self.hv_inst = i_hvdict # the new dict (without defaults)
12403 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12405 self.hv_new = self.hv_inst = {}
12407 # beparams processing
12408 if self.op.beparams:
12409 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12411 objects.UpgradeBeParams(i_bedict)
12412 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12413 be_new = cluster.SimpleFillBE(i_bedict)
12414 self.be_proposed = self.be_new = be_new # the new actual values
12415 self.be_inst = i_bedict # the new dict (without defaults)
12417 self.be_new = self.be_inst = {}
12418 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12419 be_old = cluster.FillBE(instance)
12421 # CPU param validation -- checking every time a paramtere is
12422 # changed to cover all cases where either CPU mask or vcpus have
12424 if (constants.BE_VCPUS in self.be_proposed and
12425 constants.HV_CPU_MASK in self.hv_proposed):
12427 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12428 # Verify mask is consistent with number of vCPUs. Can skip this
12429 # test if only 1 entry in the CPU mask, which means same mask
12430 # is applied to all vCPUs.
12431 if (len(cpu_list) > 1 and
12432 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12433 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12435 (self.be_proposed[constants.BE_VCPUS],
12436 self.hv_proposed[constants.HV_CPU_MASK]),
12437 errors.ECODE_INVAL)
12439 # Only perform this test if a new CPU mask is given
12440 if constants.HV_CPU_MASK in self.hv_new:
12441 # Calculate the largest CPU number requested
12442 max_requested_cpu = max(map(max, cpu_list))
12443 # Check that all of the instance's nodes have enough physical CPUs to
12444 # satisfy the requested CPU mask
12445 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12446 max_requested_cpu + 1, instance.hypervisor)
12448 # osparams processing
12449 if self.op.osparams:
12450 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12451 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12452 self.os_inst = i_osdict # the new dict (without defaults)
12458 #TODO(dynmem): do the appropriate check involving MINMEM
12459 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12460 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12461 mem_check_list = [pnode]
12462 if be_new[constants.BE_AUTO_BALANCE]:
12463 # either we changed auto_balance to yes or it was from before
12464 mem_check_list.extend(instance.secondary_nodes)
12465 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12466 instance.hypervisor)
12467 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12468 [instance.hypervisor])
12469 pninfo = nodeinfo[pnode]
12470 msg = pninfo.fail_msg
12472 # Assume the primary node is unreachable and go ahead
12473 self.warn.append("Can't get info from primary node %s: %s" %
12476 (_, _, (pnhvinfo, )) = pninfo.payload
12477 if not isinstance(pnhvinfo.get("memory_free", None), int):
12478 self.warn.append("Node data from primary node %s doesn't contain"
12479 " free memory information" % pnode)
12480 elif instance_info.fail_msg:
12481 self.warn.append("Can't get instance runtime information: %s" %
12482 instance_info.fail_msg)
12484 if instance_info.payload:
12485 current_mem = int(instance_info.payload["memory"])
12487 # Assume instance not running
12488 # (there is a slight race condition here, but it's not very
12489 # probable, and we have no other way to check)
12490 # TODO: Describe race condition
12492 #TODO(dynmem): do the appropriate check involving MINMEM
12493 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12494 pnhvinfo["memory_free"])
12496 raise errors.OpPrereqError("This change will prevent the instance"
12497 " from starting, due to %d MB of memory"
12498 " missing on its primary node" %
12500 errors.ECODE_NORES)
12502 if be_new[constants.BE_AUTO_BALANCE]:
12503 for node, nres in nodeinfo.items():
12504 if node not in instance.secondary_nodes:
12506 nres.Raise("Can't get info from secondary node %s" % node,
12507 prereq=True, ecode=errors.ECODE_STATE)
12508 (_, _, (nhvinfo, )) = nres.payload
12509 if not isinstance(nhvinfo.get("memory_free", None), int):
12510 raise errors.OpPrereqError("Secondary node %s didn't return free"
12511 " memory information" % node,
12512 errors.ECODE_STATE)
12513 #TODO(dynmem): do the appropriate check involving MINMEM
12514 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12515 raise errors.OpPrereqError("This change will prevent the instance"
12516 " from failover to its secondary node"
12517 " %s, due to not enough memory" % node,
12518 errors.ECODE_STATE)
12520 if self.op.runtime_mem:
12521 remote_info = self.rpc.call_instance_info(instance.primary_node,
12523 instance.hypervisor)
12524 remote_info.Raise("Error checking node %s" % instance.primary_node)
12525 if not remote_info.payload: # not running already
12526 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12527 errors.ECODE_STATE)
12529 current_memory = remote_info.payload["memory"]
12530 if (not self.op.force and
12531 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12532 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12533 raise errors.OpPrereqError("Instance %s must have memory between %d"
12534 " and %d MB of memory unless --force is"
12535 " given" % (instance.name,
12536 self.be_proposed[constants.BE_MINMEM],
12537 self.be_proposed[constants.BE_MAXMEM]),
12538 errors.ECODE_INVAL)
12540 if self.op.runtime_mem > current_memory:
12541 _CheckNodeFreeMemory(self, instance.primary_node,
12542 "ballooning memory for instance %s" %
12544 self.op.memory - current_memory,
12545 instance.hypervisor)
12547 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12548 raise errors.OpPrereqError("Disk operations not supported for"
12549 " diskless instances",
12550 errors.ECODE_INVAL)
12552 def _PrepareNicCreate(_, params, private):
12553 return self._PrepareNicModification(params, private, None, {},
12556 def _PrepareNicMod(_, nic, params, private):
12557 return self._PrepareNicModification(params, private, nic.ip,
12558 nic.nicparams, cluster, pnode)
12560 # Verify NIC changes (operating on copy)
12561 nics = instance.nics[:]
12562 ApplyContainerMods("NIC", nics, None, self.nicmod,
12563 _PrepareNicCreate, _PrepareNicMod, None)
12564 if len(nics) > constants.MAX_NICS:
12565 raise errors.OpPrereqError("Instance has too many network interfaces"
12566 " (%d), cannot add more" % constants.MAX_NICS,
12567 errors.ECODE_STATE)
12569 # Verify disk changes (operating on a copy)
12570 disks = instance.disks[:]
12571 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12572 if len(disks) > constants.MAX_DISKS:
12573 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12574 " more" % constants.MAX_DISKS,
12575 errors.ECODE_STATE)
12577 if self.op.offline is not None:
12578 if self.op.offline:
12579 msg = "can't change to offline"
12581 msg = "can't change to online"
12582 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12584 # Pre-compute NIC changes (necessary to use result in hooks)
12585 self._nic_chgdesc = []
12587 # Operate on copies as this is still in prereq
12588 nics = [nic.Copy() for nic in instance.nics]
12589 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12590 self._CreateNewNic, self._ApplyNicMods, None)
12591 self._new_nics = nics
12593 self._new_nics = None
12595 def _ConvertPlainToDrbd(self, feedback_fn):
12596 """Converts an instance from plain to drbd.
12599 feedback_fn("Converting template to drbd")
12600 instance = self.instance
12601 pnode = instance.primary_node
12602 snode = self.op.remote_node
12604 assert instance.disk_template == constants.DT_PLAIN
12606 # create a fake disk info for _GenerateDiskTemplate
12607 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12608 constants.IDISK_VG: d.logical_id[0]}
12609 for d in instance.disks]
12610 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12611 instance.name, pnode, [snode],
12612 disk_info, None, None, 0, feedback_fn,
12614 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12616 info = _GetInstanceInfoText(instance)
12617 feedback_fn("Creating additional volumes...")
12618 # first, create the missing data and meta devices
12619 for disk in anno_disks:
12620 # unfortunately this is... not too nice
12621 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12623 for child in disk.children:
12624 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12625 # at this stage, all new LVs have been created, we can rename the
12627 feedback_fn("Renaming original volumes...")
12628 rename_list = [(o, n.children[0].logical_id)
12629 for (o, n) in zip(instance.disks, new_disks)]
12630 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12631 result.Raise("Failed to rename original LVs")
12633 feedback_fn("Initializing DRBD devices...")
12634 # all child devices are in place, we can now create the DRBD devices
12635 for disk in anno_disks:
12636 for node in [pnode, snode]:
12637 f_create = node == pnode
12638 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12640 # at this point, the instance has been modified
12641 instance.disk_template = constants.DT_DRBD8
12642 instance.disks = new_disks
12643 self.cfg.Update(instance, feedback_fn)
12645 # Release node locks while waiting for sync
12646 _ReleaseLocks(self, locking.LEVEL_NODE)
12648 # disks are created, waiting for sync
12649 disk_abort = not _WaitForSync(self, instance,
12650 oneshot=not self.op.wait_for_sync)
12652 raise errors.OpExecError("There are some degraded disks for"
12653 " this instance, please cleanup manually")
12655 # Node resource locks will be released by caller
12657 def _ConvertDrbdToPlain(self, feedback_fn):
12658 """Converts an instance from drbd to plain.
12661 instance = self.instance
12663 assert len(instance.secondary_nodes) == 1
12664 assert instance.disk_template == constants.DT_DRBD8
12666 pnode = instance.primary_node
12667 snode = instance.secondary_nodes[0]
12668 feedback_fn("Converting template to plain")
12670 old_disks = instance.disks
12671 new_disks = [d.children[0] for d in old_disks]
12673 # copy over size and mode
12674 for parent, child in zip(old_disks, new_disks):
12675 child.size = parent.size
12676 child.mode = parent.mode
12678 # this is a DRBD disk, return its port to the pool
12679 # NOTE: this must be done right before the call to cfg.Update!
12680 for disk in old_disks:
12681 tcp_port = disk.logical_id[2]
12682 self.cfg.AddTcpUdpPort(tcp_port)
12684 # update instance structure
12685 instance.disks = new_disks
12686 instance.disk_template = constants.DT_PLAIN
12687 self.cfg.Update(instance, feedback_fn)
12689 # Release locks in case removing disks takes a while
12690 _ReleaseLocks(self, locking.LEVEL_NODE)
12692 feedback_fn("Removing volumes on the secondary node...")
12693 for disk in old_disks:
12694 self.cfg.SetDiskID(disk, snode)
12695 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12697 self.LogWarning("Could not remove block device %s on node %s,"
12698 " continuing anyway: %s", disk.iv_name, snode, msg)
12700 feedback_fn("Removing unneeded volumes on the primary node...")
12701 for idx, disk in enumerate(old_disks):
12702 meta = disk.children[1]
12703 self.cfg.SetDiskID(meta, pnode)
12704 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12706 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12707 " continuing anyway: %s", idx, pnode, msg)
12709 def _CreateNewDisk(self, idx, params, _):
12710 """Creates a new disk.
12713 instance = self.instance
12716 if instance.disk_template in constants.DTS_FILEBASED:
12717 (file_driver, file_path) = instance.disks[0].logical_id
12718 file_path = os.path.dirname(file_path)
12720 file_driver = file_path = None
12723 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12724 instance.primary_node, instance.secondary_nodes,
12725 [params], file_path, file_driver, idx,
12726 self.Log, self.diskparams)[0]
12728 info = _GetInstanceInfoText(instance)
12730 logging.info("Creating volume %s for instance %s",
12731 disk.iv_name, instance.name)
12732 # Note: this needs to be kept in sync with _CreateDisks
12734 for node in instance.all_nodes:
12735 f_create = (node == instance.primary_node)
12737 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12738 except errors.OpExecError, err:
12739 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12740 disk.iv_name, disk, node, err)
12743 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12747 def _ModifyDisk(idx, disk, params, _):
12748 """Modifies a disk.
12751 disk.mode = params[constants.IDISK_MODE]
12754 ("disk.mode/%d" % idx, disk.mode),
12757 def _RemoveDisk(self, idx, root, _):
12761 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12762 self.cfg.SetDiskID(disk, node)
12763 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12765 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12766 " continuing anyway", idx, node, msg)
12768 # if this is a DRBD disk, return its port to the pool
12769 if root.dev_type in constants.LDS_DRBD:
12770 self.cfg.AddTcpUdpPort(root.logical_id[2])
12773 def _CreateNewNic(idx, params, private):
12774 """Creates data structure for a new network interface.
12777 mac = params[constants.INIC_MAC]
12778 ip = params.get(constants.INIC_IP, None)
12779 nicparams = private.params
12781 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12783 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12784 (mac, ip, private.filled[constants.NIC_MODE],
12785 private.filled[constants.NIC_LINK])),
12789 def _ApplyNicMods(idx, nic, params, private):
12790 """Modifies a network interface.
12795 for key in [constants.INIC_MAC, constants.INIC_IP]:
12797 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12798 setattr(nic, key, params[key])
12801 nic.nicparams = private.params
12803 for (key, val) in params.items():
12804 changes.append(("nic.%s/%d" % (key, idx), val))
12808 def Exec(self, feedback_fn):
12809 """Modifies an instance.
12811 All parameters take effect only at the next restart of the instance.
12814 # Process here the warnings from CheckPrereq, as we don't have a
12815 # feedback_fn there.
12816 # TODO: Replace with self.LogWarning
12817 for warn in self.warn:
12818 feedback_fn("WARNING: %s" % warn)
12820 assert ((self.op.disk_template is None) ^
12821 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12822 "Not owning any node resource locks"
12825 instance = self.instance
12828 if self.op.runtime_mem:
12829 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12831 self.op.runtime_mem)
12832 rpcres.Raise("Cannot modify instance runtime memory")
12833 result.append(("runtime_memory", self.op.runtime_mem))
12835 # Apply disk changes
12836 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12837 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12838 _UpdateIvNames(0, instance.disks)
12840 if self.op.disk_template:
12842 check_nodes = set(instance.all_nodes)
12843 if self.op.remote_node:
12844 check_nodes.add(self.op.remote_node)
12845 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12846 owned = self.owned_locks(level)
12847 assert not (check_nodes - owned), \
12848 ("Not owning the correct locks, owning %r, expected at least %r" %
12849 (owned, check_nodes))
12851 r_shut = _ShutdownInstanceDisks(self, instance)
12853 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12854 " proceed with disk template conversion")
12855 mode = (instance.disk_template, self.op.disk_template)
12857 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12859 self.cfg.ReleaseDRBDMinors(instance.name)
12861 result.append(("disk_template", self.op.disk_template))
12863 assert instance.disk_template == self.op.disk_template, \
12864 ("Expected disk template '%s', found '%s'" %
12865 (self.op.disk_template, instance.disk_template))
12867 # Release node and resource locks if there are any (they might already have
12868 # been released during disk conversion)
12869 _ReleaseLocks(self, locking.LEVEL_NODE)
12870 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12872 # Apply NIC changes
12873 if self._new_nics is not None:
12874 instance.nics = self._new_nics
12875 result.extend(self._nic_chgdesc)
12878 if self.op.hvparams:
12879 instance.hvparams = self.hv_inst
12880 for key, val in self.op.hvparams.iteritems():
12881 result.append(("hv/%s" % key, val))
12884 if self.op.beparams:
12885 instance.beparams = self.be_inst
12886 for key, val in self.op.beparams.iteritems():
12887 result.append(("be/%s" % key, val))
12890 if self.op.os_name:
12891 instance.os = self.op.os_name
12894 if self.op.osparams:
12895 instance.osparams = self.os_inst
12896 for key, val in self.op.osparams.iteritems():
12897 result.append(("os/%s" % key, val))
12899 if self.op.offline is None:
12902 elif self.op.offline:
12903 # Mark instance as offline
12904 self.cfg.MarkInstanceOffline(instance.name)
12905 result.append(("admin_state", constants.ADMINST_OFFLINE))
12907 # Mark instance as online, but stopped
12908 self.cfg.MarkInstanceDown(instance.name)
12909 result.append(("admin_state", constants.ADMINST_DOWN))
12911 self.cfg.Update(instance, feedback_fn)
12913 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12914 self.owned_locks(locking.LEVEL_NODE)), \
12915 "All node locks should have been released by now"
12919 _DISK_CONVERSIONS = {
12920 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12921 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12925 class LUInstanceChangeGroup(LogicalUnit):
12926 HPATH = "instance-change-group"
12927 HTYPE = constants.HTYPE_INSTANCE
12930 def ExpandNames(self):
12931 self.share_locks = _ShareAll()
12932 self.needed_locks = {
12933 locking.LEVEL_NODEGROUP: [],
12934 locking.LEVEL_NODE: [],
12937 self._ExpandAndLockInstance()
12939 if self.op.target_groups:
12940 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12941 self.op.target_groups)
12943 self.req_target_uuids = None
12945 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12947 def DeclareLocks(self, level):
12948 if level == locking.LEVEL_NODEGROUP:
12949 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12951 if self.req_target_uuids:
12952 lock_groups = set(self.req_target_uuids)
12954 # Lock all groups used by instance optimistically; this requires going
12955 # via the node before it's locked, requiring verification later on
12956 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12957 lock_groups.update(instance_groups)
12959 # No target groups, need to lock all of them
12960 lock_groups = locking.ALL_SET
12962 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12964 elif level == locking.LEVEL_NODE:
12965 if self.req_target_uuids:
12966 # Lock all nodes used by instances
12967 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12968 self._LockInstancesNodes()
12970 # Lock all nodes in all potential target groups
12971 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12972 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12973 member_nodes = [node_name
12974 for group in lock_groups
12975 for node_name in self.cfg.GetNodeGroup(group).members]
12976 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12978 # Lock all nodes as all groups are potential targets
12979 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12981 def CheckPrereq(self):
12982 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12983 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12984 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12986 assert (self.req_target_uuids is None or
12987 owned_groups.issuperset(self.req_target_uuids))
12988 assert owned_instances == set([self.op.instance_name])
12990 # Get instance information
12991 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12993 # Check if node groups for locked instance are still correct
12994 assert owned_nodes.issuperset(self.instance.all_nodes), \
12995 ("Instance %s's nodes changed while we kept the lock" %
12996 self.op.instance_name)
12998 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13001 if self.req_target_uuids:
13002 # User requested specific target groups
13003 self.target_uuids = frozenset(self.req_target_uuids)
13005 # All groups except those used by the instance are potential targets
13006 self.target_uuids = owned_groups - inst_groups
13008 conflicting_groups = self.target_uuids & inst_groups
13009 if conflicting_groups:
13010 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13011 " used by the instance '%s'" %
13012 (utils.CommaJoin(conflicting_groups),
13013 self.op.instance_name),
13014 errors.ECODE_INVAL)
13016 if not self.target_uuids:
13017 raise errors.OpPrereqError("There are no possible target groups",
13018 errors.ECODE_INVAL)
13020 def BuildHooksEnv(self):
13021 """Build hooks env.
13024 assert self.target_uuids
13027 "TARGET_GROUPS": " ".join(self.target_uuids),
13030 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13034 def BuildHooksNodes(self):
13035 """Build hooks nodes.
13038 mn = self.cfg.GetMasterNode()
13039 return ([mn], [mn])
13041 def Exec(self, feedback_fn):
13042 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13044 assert instances == [self.op.instance_name], "Instance not locked"
13046 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13047 instances=instances, target_groups=list(self.target_uuids))
13049 ial.Run(self.op.iallocator)
13051 if not ial.success:
13052 raise errors.OpPrereqError("Can't compute solution for changing group of"
13053 " instance '%s' using iallocator '%s': %s" %
13054 (self.op.instance_name, self.op.iallocator,
13056 errors.ECODE_NORES)
13058 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13060 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13061 " instance '%s'", len(jobs), self.op.instance_name)
13063 return ResultWithJobs(jobs)
13066 class LUBackupQuery(NoHooksLU):
13067 """Query the exports list
13072 def CheckArguments(self):
13073 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13074 ["node", "export"], self.op.use_locking)
13076 def ExpandNames(self):
13077 self.expq.ExpandNames(self)
13079 def DeclareLocks(self, level):
13080 self.expq.DeclareLocks(self, level)
13082 def Exec(self, feedback_fn):
13085 for (node, expname) in self.expq.OldStyleQuery(self):
13086 if expname is None:
13087 result[node] = False
13089 result.setdefault(node, []).append(expname)
13094 class _ExportQuery(_QueryBase):
13095 FIELDS = query.EXPORT_FIELDS
13097 #: The node name is not a unique key for this query
13098 SORT_FIELD = "node"
13100 def ExpandNames(self, lu):
13101 lu.needed_locks = {}
13103 # The following variables interact with _QueryBase._GetNames
13105 self.wanted = _GetWantedNodes(lu, self.names)
13107 self.wanted = locking.ALL_SET
13109 self.do_locking = self.use_locking
13111 if self.do_locking:
13112 lu.share_locks = _ShareAll()
13113 lu.needed_locks = {
13114 locking.LEVEL_NODE: self.wanted,
13117 def DeclareLocks(self, lu, level):
13120 def _GetQueryData(self, lu):
13121 """Computes the list of nodes and their attributes.
13124 # Locking is not used
13126 assert not (compat.any(lu.glm.is_owned(level)
13127 for level in locking.LEVELS
13128 if level != locking.LEVEL_CLUSTER) or
13129 self.do_locking or self.use_locking)
13131 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13135 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13137 result.append((node, None))
13139 result.extend((node, expname) for expname in nres.payload)
13144 class LUBackupPrepare(NoHooksLU):
13145 """Prepares an instance for an export and returns useful information.
13150 def ExpandNames(self):
13151 self._ExpandAndLockInstance()
13153 def CheckPrereq(self):
13154 """Check prerequisites.
13157 instance_name = self.op.instance_name
13159 self.instance = self.cfg.GetInstanceInfo(instance_name)
13160 assert self.instance is not None, \
13161 "Cannot retrieve locked instance %s" % self.op.instance_name
13162 _CheckNodeOnline(self, self.instance.primary_node)
13164 self._cds = _GetClusterDomainSecret()
13166 def Exec(self, feedback_fn):
13167 """Prepares an instance for an export.
13170 instance = self.instance
13172 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13173 salt = utils.GenerateSecret(8)
13175 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13176 result = self.rpc.call_x509_cert_create(instance.primary_node,
13177 constants.RIE_CERT_VALIDITY)
13178 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13180 (name, cert_pem) = result.payload
13182 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13186 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13187 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13189 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13195 class LUBackupExport(LogicalUnit):
13196 """Export an instance to an image in the cluster.
13199 HPATH = "instance-export"
13200 HTYPE = constants.HTYPE_INSTANCE
13203 def CheckArguments(self):
13204 """Check the arguments.
13207 self.x509_key_name = self.op.x509_key_name
13208 self.dest_x509_ca_pem = self.op.destination_x509_ca
13210 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13211 if not self.x509_key_name:
13212 raise errors.OpPrereqError("Missing X509 key name for encryption",
13213 errors.ECODE_INVAL)
13215 if not self.dest_x509_ca_pem:
13216 raise errors.OpPrereqError("Missing destination X509 CA",
13217 errors.ECODE_INVAL)
13219 def ExpandNames(self):
13220 self._ExpandAndLockInstance()
13222 # Lock all nodes for local exports
13223 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13224 # FIXME: lock only instance primary and destination node
13226 # Sad but true, for now we have do lock all nodes, as we don't know where
13227 # the previous export might be, and in this LU we search for it and
13228 # remove it from its current node. In the future we could fix this by:
13229 # - making a tasklet to search (share-lock all), then create the
13230 # new one, then one to remove, after
13231 # - removing the removal operation altogether
13232 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13234 def DeclareLocks(self, level):
13235 """Last minute lock declaration."""
13236 # All nodes are locked anyway, so nothing to do here.
13238 def BuildHooksEnv(self):
13239 """Build hooks env.
13241 This will run on the master, primary node and target node.
13245 "EXPORT_MODE": self.op.mode,
13246 "EXPORT_NODE": self.op.target_node,
13247 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13248 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13249 # TODO: Generic function for boolean env variables
13250 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13253 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13257 def BuildHooksNodes(self):
13258 """Build hooks nodes.
13261 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13263 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13264 nl.append(self.op.target_node)
13268 def CheckPrereq(self):
13269 """Check prerequisites.
13271 This checks that the instance and node names are valid.
13274 instance_name = self.op.instance_name
13276 self.instance = self.cfg.GetInstanceInfo(instance_name)
13277 assert self.instance is not None, \
13278 "Cannot retrieve locked instance %s" % self.op.instance_name
13279 _CheckNodeOnline(self, self.instance.primary_node)
13281 if (self.op.remove_instance and
13282 self.instance.admin_state == constants.ADMINST_UP and
13283 not self.op.shutdown):
13284 raise errors.OpPrereqError("Can not remove instance without shutting it"
13287 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13288 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13289 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13290 assert self.dst_node is not None
13292 _CheckNodeOnline(self, self.dst_node.name)
13293 _CheckNodeNotDrained(self, self.dst_node.name)
13296 self.dest_disk_info = None
13297 self.dest_x509_ca = None
13299 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13300 self.dst_node = None
13302 if len(self.op.target_node) != len(self.instance.disks):
13303 raise errors.OpPrereqError(("Received destination information for %s"
13304 " disks, but instance %s has %s disks") %
13305 (len(self.op.target_node), instance_name,
13306 len(self.instance.disks)),
13307 errors.ECODE_INVAL)
13309 cds = _GetClusterDomainSecret()
13311 # Check X509 key name
13313 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13314 except (TypeError, ValueError), err:
13315 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13317 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13318 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13319 errors.ECODE_INVAL)
13321 # Load and verify CA
13323 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13324 except OpenSSL.crypto.Error, err:
13325 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13326 (err, ), errors.ECODE_INVAL)
13328 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13329 if errcode is not None:
13330 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13331 (msg, ), errors.ECODE_INVAL)
13333 self.dest_x509_ca = cert
13335 # Verify target information
13337 for idx, disk_data in enumerate(self.op.target_node):
13339 (host, port, magic) = \
13340 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13341 except errors.GenericError, err:
13342 raise errors.OpPrereqError("Target info for disk %s: %s" %
13343 (idx, err), errors.ECODE_INVAL)
13345 disk_info.append((host, port, magic))
13347 assert len(disk_info) == len(self.op.target_node)
13348 self.dest_disk_info = disk_info
13351 raise errors.ProgrammerError("Unhandled export mode %r" %
13354 # instance disk type verification
13355 # TODO: Implement export support for file-based disks
13356 for disk in self.instance.disks:
13357 if disk.dev_type == constants.LD_FILE:
13358 raise errors.OpPrereqError("Export not supported for instances with"
13359 " file-based disks", errors.ECODE_INVAL)
13361 def _CleanupExports(self, feedback_fn):
13362 """Removes exports of current instance from all other nodes.
13364 If an instance in a cluster with nodes A..D was exported to node C, its
13365 exports will be removed from the nodes A, B and D.
13368 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13370 nodelist = self.cfg.GetNodeList()
13371 nodelist.remove(self.dst_node.name)
13373 # on one-node clusters nodelist will be empty after the removal
13374 # if we proceed the backup would be removed because OpBackupQuery
13375 # substitutes an empty list with the full cluster node list.
13376 iname = self.instance.name
13378 feedback_fn("Removing old exports for instance %s" % iname)
13379 exportlist = self.rpc.call_export_list(nodelist)
13380 for node in exportlist:
13381 if exportlist[node].fail_msg:
13383 if iname in exportlist[node].payload:
13384 msg = self.rpc.call_export_remove(node, iname).fail_msg
13386 self.LogWarning("Could not remove older export for instance %s"
13387 " on node %s: %s", iname, node, msg)
13389 def Exec(self, feedback_fn):
13390 """Export an instance to an image in the cluster.
13393 assert self.op.mode in constants.EXPORT_MODES
13395 instance = self.instance
13396 src_node = instance.primary_node
13398 if self.op.shutdown:
13399 # shutdown the instance, but not the disks
13400 feedback_fn("Shutting down instance %s" % instance.name)
13401 result = self.rpc.call_instance_shutdown(src_node, instance,
13402 self.op.shutdown_timeout)
13403 # TODO: Maybe ignore failures if ignore_remove_failures is set
13404 result.Raise("Could not shutdown instance %s on"
13405 " node %s" % (instance.name, src_node))
13407 # set the disks ID correctly since call_instance_start needs the
13408 # correct drbd minor to create the symlinks
13409 for disk in instance.disks:
13410 self.cfg.SetDiskID(disk, src_node)
13412 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13415 # Activate the instance disks if we'exporting a stopped instance
13416 feedback_fn("Activating disks for %s" % instance.name)
13417 _StartInstanceDisks(self, instance, None)
13420 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13423 helper.CreateSnapshots()
13425 if (self.op.shutdown and
13426 instance.admin_state == constants.ADMINST_UP and
13427 not self.op.remove_instance):
13428 assert not activate_disks
13429 feedback_fn("Starting instance %s" % instance.name)
13430 result = self.rpc.call_instance_start(src_node,
13431 (instance, None, None), False)
13432 msg = result.fail_msg
13434 feedback_fn("Failed to start instance: %s" % msg)
13435 _ShutdownInstanceDisks(self, instance)
13436 raise errors.OpExecError("Could not start instance: %s" % msg)
13438 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13439 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13440 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13441 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13442 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13444 (key_name, _, _) = self.x509_key_name
13447 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13450 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13451 key_name, dest_ca_pem,
13456 # Check for backwards compatibility
13457 assert len(dresults) == len(instance.disks)
13458 assert compat.all(isinstance(i, bool) for i in dresults), \
13459 "Not all results are boolean: %r" % dresults
13463 feedback_fn("Deactivating disks for %s" % instance.name)
13464 _ShutdownInstanceDisks(self, instance)
13466 if not (compat.all(dresults) and fin_resu):
13469 failures.append("export finalization")
13470 if not compat.all(dresults):
13471 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13473 failures.append("disk export: disk(s) %s" % fdsk)
13475 raise errors.OpExecError("Export failed, errors in %s" %
13476 utils.CommaJoin(failures))
13478 # At this point, the export was successful, we can cleanup/finish
13480 # Remove instance if requested
13481 if self.op.remove_instance:
13482 feedback_fn("Removing instance %s" % instance.name)
13483 _RemoveInstance(self, feedback_fn, instance,
13484 self.op.ignore_remove_failures)
13486 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13487 self._CleanupExports(feedback_fn)
13489 return fin_resu, dresults
13492 class LUBackupRemove(NoHooksLU):
13493 """Remove exports related to the named instance.
13498 def ExpandNames(self):
13499 self.needed_locks = {}
13500 # We need all nodes to be locked in order for RemoveExport to work, but we
13501 # don't need to lock the instance itself, as nothing will happen to it (and
13502 # we can remove exports also for a removed instance)
13503 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13505 def Exec(self, feedback_fn):
13506 """Remove any export.
13509 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13510 # If the instance was not found we'll try with the name that was passed in.
13511 # This will only work if it was an FQDN, though.
13513 if not instance_name:
13515 instance_name = self.op.instance_name
13517 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13518 exportlist = self.rpc.call_export_list(locked_nodes)
13520 for node in exportlist:
13521 msg = exportlist[node].fail_msg
13523 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13525 if instance_name in exportlist[node].payload:
13527 result = self.rpc.call_export_remove(node, instance_name)
13528 msg = result.fail_msg
13530 logging.error("Could not remove export for instance %s"
13531 " on node %s: %s", instance_name, node, msg)
13533 if fqdn_warn and not found:
13534 feedback_fn("Export not found. If trying to remove an export belonging"
13535 " to a deleted instance please use its Fully Qualified"
13539 class LUGroupAdd(LogicalUnit):
13540 """Logical unit for creating node groups.
13543 HPATH = "group-add"
13544 HTYPE = constants.HTYPE_GROUP
13547 def ExpandNames(self):
13548 # We need the new group's UUID here so that we can create and acquire the
13549 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13550 # that it should not check whether the UUID exists in the configuration.
13551 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13552 self.needed_locks = {}
13553 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13555 def CheckPrereq(self):
13556 """Check prerequisites.
13558 This checks that the given group name is not an existing node group
13563 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13564 except errors.OpPrereqError:
13567 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13568 " node group (UUID: %s)" %
13569 (self.op.group_name, existing_uuid),
13570 errors.ECODE_EXISTS)
13572 if self.op.ndparams:
13573 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13575 if self.op.hv_state:
13576 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13578 self.new_hv_state = None
13580 if self.op.disk_state:
13581 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13583 self.new_disk_state = None
13585 if self.op.diskparams:
13586 for templ in constants.DISK_TEMPLATES:
13587 if templ in self.op.diskparams:
13588 utils.ForceDictType(self.op.diskparams[templ],
13589 constants.DISK_DT_TYPES)
13590 self.new_diskparams = self.op.diskparams
13592 self.new_diskparams = {}
13594 if self.op.ipolicy:
13595 cluster = self.cfg.GetClusterInfo()
13596 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13598 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13599 except errors.ConfigurationError, err:
13600 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13601 errors.ECODE_INVAL)
13603 def BuildHooksEnv(self):
13604 """Build hooks env.
13608 "GROUP_NAME": self.op.group_name,
13611 def BuildHooksNodes(self):
13612 """Build hooks nodes.
13615 mn = self.cfg.GetMasterNode()
13616 return ([mn], [mn])
13618 def Exec(self, feedback_fn):
13619 """Add the node group to the cluster.
13622 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13623 uuid=self.group_uuid,
13624 alloc_policy=self.op.alloc_policy,
13625 ndparams=self.op.ndparams,
13626 diskparams=self.new_diskparams,
13627 ipolicy=self.op.ipolicy,
13628 hv_state_static=self.new_hv_state,
13629 disk_state_static=self.new_disk_state)
13631 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13632 del self.remove_locks[locking.LEVEL_NODEGROUP]
13635 class LUGroupAssignNodes(NoHooksLU):
13636 """Logical unit for assigning nodes to groups.
13641 def ExpandNames(self):
13642 # These raise errors.OpPrereqError on their own:
13643 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13644 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13646 # We want to lock all the affected nodes and groups. We have readily
13647 # available the list of nodes, and the *destination* group. To gather the
13648 # list of "source" groups, we need to fetch node information later on.
13649 self.needed_locks = {
13650 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13651 locking.LEVEL_NODE: self.op.nodes,
13654 def DeclareLocks(self, level):
13655 if level == locking.LEVEL_NODEGROUP:
13656 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13658 # Try to get all affected nodes' groups without having the group or node
13659 # lock yet. Needs verification later in the code flow.
13660 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13662 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13664 def CheckPrereq(self):
13665 """Check prerequisites.
13668 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13669 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13670 frozenset(self.op.nodes))
13672 expected_locks = (set([self.group_uuid]) |
13673 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13674 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13675 if actual_locks != expected_locks:
13676 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13677 " current groups are '%s', used to be '%s'" %
13678 (utils.CommaJoin(expected_locks),
13679 utils.CommaJoin(actual_locks)))
13681 self.node_data = self.cfg.GetAllNodesInfo()
13682 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13683 instance_data = self.cfg.GetAllInstancesInfo()
13685 if self.group is None:
13686 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13687 (self.op.group_name, self.group_uuid))
13689 (new_splits, previous_splits) = \
13690 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13691 for node in self.op.nodes],
13692 self.node_data, instance_data)
13695 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13697 if not self.op.force:
13698 raise errors.OpExecError("The following instances get split by this"
13699 " change and --force was not given: %s" %
13702 self.LogWarning("This operation will split the following instances: %s",
13705 if previous_splits:
13706 self.LogWarning("In addition, these already-split instances continue"
13707 " to be split across groups: %s",
13708 utils.CommaJoin(utils.NiceSort(previous_splits)))
13710 def Exec(self, feedback_fn):
13711 """Assign nodes to a new group.
13714 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13716 self.cfg.AssignGroupNodes(mods)
13719 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13720 """Check for split instances after a node assignment.
13722 This method considers a series of node assignments as an atomic operation,
13723 and returns information about split instances after applying the set of
13726 In particular, it returns information about newly split instances, and
13727 instances that were already split, and remain so after the change.
13729 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13732 @type changes: list of (node_name, new_group_uuid) pairs.
13733 @param changes: list of node assignments to consider.
13734 @param node_data: a dict with data for all nodes
13735 @param instance_data: a dict with all instances to consider
13736 @rtype: a two-tuple
13737 @return: a list of instances that were previously okay and result split as a
13738 consequence of this change, and a list of instances that were previously
13739 split and this change does not fix.
13742 changed_nodes = dict((node, group) for node, group in changes
13743 if node_data[node].group != group)
13745 all_split_instances = set()
13746 previously_split_instances = set()
13748 def InstanceNodes(instance):
13749 return [instance.primary_node] + list(instance.secondary_nodes)
13751 for inst in instance_data.values():
13752 if inst.disk_template not in constants.DTS_INT_MIRROR:
13755 instance_nodes = InstanceNodes(inst)
13757 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13758 previously_split_instances.add(inst.name)
13760 if len(set(changed_nodes.get(node, node_data[node].group)
13761 for node in instance_nodes)) > 1:
13762 all_split_instances.add(inst.name)
13764 return (list(all_split_instances - previously_split_instances),
13765 list(previously_split_instances & all_split_instances))
13768 class _GroupQuery(_QueryBase):
13769 FIELDS = query.GROUP_FIELDS
13771 def ExpandNames(self, lu):
13772 lu.needed_locks = {}
13774 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13775 self._cluster = lu.cfg.GetClusterInfo()
13776 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13779 self.wanted = [name_to_uuid[name]
13780 for name in utils.NiceSort(name_to_uuid.keys())]
13782 # Accept names to be either names or UUIDs.
13785 all_uuid = frozenset(self._all_groups.keys())
13787 for name in self.names:
13788 if name in all_uuid:
13789 self.wanted.append(name)
13790 elif name in name_to_uuid:
13791 self.wanted.append(name_to_uuid[name])
13793 missing.append(name)
13796 raise errors.OpPrereqError("Some groups do not exist: %s" %
13797 utils.CommaJoin(missing),
13798 errors.ECODE_NOENT)
13800 def DeclareLocks(self, lu, level):
13803 def _GetQueryData(self, lu):
13804 """Computes the list of node groups and their attributes.
13807 do_nodes = query.GQ_NODE in self.requested_data
13808 do_instances = query.GQ_INST in self.requested_data
13810 group_to_nodes = None
13811 group_to_instances = None
13813 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13814 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13815 # latter GetAllInstancesInfo() is not enough, for we have to go through
13816 # instance->node. Hence, we will need to process nodes even if we only need
13817 # instance information.
13818 if do_nodes or do_instances:
13819 all_nodes = lu.cfg.GetAllNodesInfo()
13820 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13823 for node in all_nodes.values():
13824 if node.group in group_to_nodes:
13825 group_to_nodes[node.group].append(node.name)
13826 node_to_group[node.name] = node.group
13829 all_instances = lu.cfg.GetAllInstancesInfo()
13830 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13832 for instance in all_instances.values():
13833 node = instance.primary_node
13834 if node in node_to_group:
13835 group_to_instances[node_to_group[node]].append(instance.name)
13838 # Do not pass on node information if it was not requested.
13839 group_to_nodes = None
13841 return query.GroupQueryData(self._cluster,
13842 [self._all_groups[uuid]
13843 for uuid in self.wanted],
13844 group_to_nodes, group_to_instances,
13845 query.GQ_DISKPARAMS in self.requested_data)
13848 class LUGroupQuery(NoHooksLU):
13849 """Logical unit for querying node groups.
13854 def CheckArguments(self):
13855 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13856 self.op.output_fields, False)
13858 def ExpandNames(self):
13859 self.gq.ExpandNames(self)
13861 def DeclareLocks(self, level):
13862 self.gq.DeclareLocks(self, level)
13864 def Exec(self, feedback_fn):
13865 return self.gq.OldStyleQuery(self)
13868 class LUGroupSetParams(LogicalUnit):
13869 """Modifies the parameters of a node group.
13872 HPATH = "group-modify"
13873 HTYPE = constants.HTYPE_GROUP
13876 def CheckArguments(self):
13879 self.op.diskparams,
13880 self.op.alloc_policy,
13882 self.op.disk_state,
13886 if all_changes.count(None) == len(all_changes):
13887 raise errors.OpPrereqError("Please pass at least one modification",
13888 errors.ECODE_INVAL)
13890 def ExpandNames(self):
13891 # This raises errors.OpPrereqError on its own:
13892 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13894 self.needed_locks = {
13895 locking.LEVEL_INSTANCE: [],
13896 locking.LEVEL_NODEGROUP: [self.group_uuid],
13899 self.share_locks[locking.LEVEL_INSTANCE] = 1
13901 def DeclareLocks(self, level):
13902 if level == locking.LEVEL_INSTANCE:
13903 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13905 # Lock instances optimistically, needs verification once group lock has
13907 self.needed_locks[locking.LEVEL_INSTANCE] = \
13908 self.cfg.GetNodeGroupInstances(self.group_uuid)
13911 def _UpdateAndVerifyDiskParams(old, new):
13912 """Updates and verifies disk parameters.
13915 new_params = _GetUpdatedParams(old, new)
13916 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13919 def CheckPrereq(self):
13920 """Check prerequisites.
13923 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13925 # Check if locked instances are still correct
13926 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13928 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13929 cluster = self.cfg.GetClusterInfo()
13931 if self.group is None:
13932 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13933 (self.op.group_name, self.group_uuid))
13935 if self.op.ndparams:
13936 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13937 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13938 self.new_ndparams = new_ndparams
13940 if self.op.diskparams:
13941 diskparams = self.group.diskparams
13942 uavdp = self._UpdateAndVerifyDiskParams
13943 # For each disktemplate subdict update and verify the values
13944 new_diskparams = dict((dt,
13945 uavdp(diskparams.get(dt, {}),
13946 self.op.diskparams[dt]))
13947 for dt in constants.DISK_TEMPLATES
13948 if dt in self.op.diskparams)
13949 # As we've all subdicts of diskparams ready, lets merge the actual
13950 # dict with all updated subdicts
13951 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13953 if self.op.hv_state:
13954 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13955 self.group.hv_state_static)
13957 if self.op.disk_state:
13958 self.new_disk_state = \
13959 _MergeAndVerifyDiskState(self.op.disk_state,
13960 self.group.disk_state_static)
13962 if self.op.ipolicy:
13963 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13967 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13968 inst_filter = lambda inst: inst.name in owned_instances
13969 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13971 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13973 new_ipolicy, instances)
13976 self.LogWarning("After the ipolicy change the following instances"
13977 " violate them: %s",
13978 utils.CommaJoin(violations))
13980 def BuildHooksEnv(self):
13981 """Build hooks env.
13985 "GROUP_NAME": self.op.group_name,
13986 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13989 def BuildHooksNodes(self):
13990 """Build hooks nodes.
13993 mn = self.cfg.GetMasterNode()
13994 return ([mn], [mn])
13996 def Exec(self, feedback_fn):
13997 """Modifies the node group.
14002 if self.op.ndparams:
14003 self.group.ndparams = self.new_ndparams
14004 result.append(("ndparams", str(self.group.ndparams)))
14006 if self.op.diskparams:
14007 self.group.diskparams = self.new_diskparams
14008 result.append(("diskparams", str(self.group.diskparams)))
14010 if self.op.alloc_policy:
14011 self.group.alloc_policy = self.op.alloc_policy
14013 if self.op.hv_state:
14014 self.group.hv_state_static = self.new_hv_state
14016 if self.op.disk_state:
14017 self.group.disk_state_static = self.new_disk_state
14019 if self.op.ipolicy:
14020 self.group.ipolicy = self.new_ipolicy
14022 self.cfg.Update(self.group, feedback_fn)
14026 class LUGroupRemove(LogicalUnit):
14027 HPATH = "group-remove"
14028 HTYPE = constants.HTYPE_GROUP
14031 def ExpandNames(self):
14032 # This will raises errors.OpPrereqError on its own:
14033 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14034 self.needed_locks = {
14035 locking.LEVEL_NODEGROUP: [self.group_uuid],
14038 def CheckPrereq(self):
14039 """Check prerequisites.
14041 This checks that the given group name exists as a node group, that is
14042 empty (i.e., contains no nodes), and that is not the last group of the
14046 # Verify that the group is empty.
14047 group_nodes = [node.name
14048 for node in self.cfg.GetAllNodesInfo().values()
14049 if node.group == self.group_uuid]
14052 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14054 (self.op.group_name,
14055 utils.CommaJoin(utils.NiceSort(group_nodes))),
14056 errors.ECODE_STATE)
14058 # Verify the cluster would not be left group-less.
14059 if len(self.cfg.GetNodeGroupList()) == 1:
14060 raise errors.OpPrereqError("Group '%s' is the only group,"
14061 " cannot be removed" %
14062 self.op.group_name,
14063 errors.ECODE_STATE)
14065 def BuildHooksEnv(self):
14066 """Build hooks env.
14070 "GROUP_NAME": self.op.group_name,
14073 def BuildHooksNodes(self):
14074 """Build hooks nodes.
14077 mn = self.cfg.GetMasterNode()
14078 return ([mn], [mn])
14080 def Exec(self, feedback_fn):
14081 """Remove the node group.
14085 self.cfg.RemoveNodeGroup(self.group_uuid)
14086 except errors.ConfigurationError:
14087 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14088 (self.op.group_name, self.group_uuid))
14090 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14093 class LUGroupRename(LogicalUnit):
14094 HPATH = "group-rename"
14095 HTYPE = constants.HTYPE_GROUP
14098 def ExpandNames(self):
14099 # This raises errors.OpPrereqError on its own:
14100 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14102 self.needed_locks = {
14103 locking.LEVEL_NODEGROUP: [self.group_uuid],
14106 def CheckPrereq(self):
14107 """Check prerequisites.
14109 Ensures requested new name is not yet used.
14113 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14114 except errors.OpPrereqError:
14117 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14118 " node group (UUID: %s)" %
14119 (self.op.new_name, new_name_uuid),
14120 errors.ECODE_EXISTS)
14122 def BuildHooksEnv(self):
14123 """Build hooks env.
14127 "OLD_NAME": self.op.group_name,
14128 "NEW_NAME": self.op.new_name,
14131 def BuildHooksNodes(self):
14132 """Build hooks nodes.
14135 mn = self.cfg.GetMasterNode()
14137 all_nodes = self.cfg.GetAllNodesInfo()
14138 all_nodes.pop(mn, None)
14141 run_nodes.extend(node.name for node in all_nodes.values()
14142 if node.group == self.group_uuid)
14144 return (run_nodes, run_nodes)
14146 def Exec(self, feedback_fn):
14147 """Rename the node group.
14150 group = self.cfg.GetNodeGroup(self.group_uuid)
14153 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14154 (self.op.group_name, self.group_uuid))
14156 group.name = self.op.new_name
14157 self.cfg.Update(group, feedback_fn)
14159 return self.op.new_name
14162 class LUGroupEvacuate(LogicalUnit):
14163 HPATH = "group-evacuate"
14164 HTYPE = constants.HTYPE_GROUP
14167 def ExpandNames(self):
14168 # This raises errors.OpPrereqError on its own:
14169 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14171 if self.op.target_groups:
14172 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14173 self.op.target_groups)
14175 self.req_target_uuids = []
14177 if self.group_uuid in self.req_target_uuids:
14178 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14179 " as a target group (targets are %s)" %
14181 utils.CommaJoin(self.req_target_uuids)),
14182 errors.ECODE_INVAL)
14184 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14186 self.share_locks = _ShareAll()
14187 self.needed_locks = {
14188 locking.LEVEL_INSTANCE: [],
14189 locking.LEVEL_NODEGROUP: [],
14190 locking.LEVEL_NODE: [],
14193 def DeclareLocks(self, level):
14194 if level == locking.LEVEL_INSTANCE:
14195 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14197 # Lock instances optimistically, needs verification once node and group
14198 # locks have been acquired
14199 self.needed_locks[locking.LEVEL_INSTANCE] = \
14200 self.cfg.GetNodeGroupInstances(self.group_uuid)
14202 elif level == locking.LEVEL_NODEGROUP:
14203 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14205 if self.req_target_uuids:
14206 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14208 # Lock all groups used by instances optimistically; this requires going
14209 # via the node before it's locked, requiring verification later on
14210 lock_groups.update(group_uuid
14211 for instance_name in
14212 self.owned_locks(locking.LEVEL_INSTANCE)
14214 self.cfg.GetInstanceNodeGroups(instance_name))
14216 # No target groups, need to lock all of them
14217 lock_groups = locking.ALL_SET
14219 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14221 elif level == locking.LEVEL_NODE:
14222 # This will only lock the nodes in the group to be evacuated which
14223 # contain actual instances
14224 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14225 self._LockInstancesNodes()
14227 # Lock all nodes in group to be evacuated and target groups
14228 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14229 assert self.group_uuid in owned_groups
14230 member_nodes = [node_name
14231 for group in owned_groups
14232 for node_name in self.cfg.GetNodeGroup(group).members]
14233 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14235 def CheckPrereq(self):
14236 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14237 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14238 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14240 assert owned_groups.issuperset(self.req_target_uuids)
14241 assert self.group_uuid in owned_groups
14243 # Check if locked instances are still correct
14244 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14246 # Get instance information
14247 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14249 # Check if node groups for locked instances are still correct
14250 _CheckInstancesNodeGroups(self.cfg, self.instances,
14251 owned_groups, owned_nodes, self.group_uuid)
14253 if self.req_target_uuids:
14254 # User requested specific target groups
14255 self.target_uuids = self.req_target_uuids
14257 # All groups except the one to be evacuated are potential targets
14258 self.target_uuids = [group_uuid for group_uuid in owned_groups
14259 if group_uuid != self.group_uuid]
14261 if not self.target_uuids:
14262 raise errors.OpPrereqError("There are no possible target groups",
14263 errors.ECODE_INVAL)
14265 def BuildHooksEnv(self):
14266 """Build hooks env.
14270 "GROUP_NAME": self.op.group_name,
14271 "TARGET_GROUPS": " ".join(self.target_uuids),
14274 def BuildHooksNodes(self):
14275 """Build hooks nodes.
14278 mn = self.cfg.GetMasterNode()
14280 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14282 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14284 return (run_nodes, run_nodes)
14286 def Exec(self, feedback_fn):
14287 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14289 assert self.group_uuid not in self.target_uuids
14291 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14292 instances=instances, target_groups=self.target_uuids)
14294 ial.Run(self.op.iallocator)
14296 if not ial.success:
14297 raise errors.OpPrereqError("Can't compute group evacuation using"
14298 " iallocator '%s': %s" %
14299 (self.op.iallocator, ial.info),
14300 errors.ECODE_NORES)
14302 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14304 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14305 len(jobs), self.op.group_name)
14307 return ResultWithJobs(jobs)
14310 class TagsLU(NoHooksLU): # pylint: disable=W0223
14311 """Generic tags LU.
14313 This is an abstract class which is the parent of all the other tags LUs.
14316 def ExpandNames(self):
14317 self.group_uuid = None
14318 self.needed_locks = {}
14320 if self.op.kind == constants.TAG_NODE:
14321 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14322 lock_level = locking.LEVEL_NODE
14323 lock_name = self.op.name
14324 elif self.op.kind == constants.TAG_INSTANCE:
14325 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14326 lock_level = locking.LEVEL_INSTANCE
14327 lock_name = self.op.name
14328 elif self.op.kind == constants.TAG_NODEGROUP:
14329 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14330 lock_level = locking.LEVEL_NODEGROUP
14331 lock_name = self.group_uuid
14336 if lock_level and getattr(self.op, "use_locking", True):
14337 self.needed_locks[lock_level] = lock_name
14339 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14340 # not possible to acquire the BGL based on opcode parameters)
14342 def CheckPrereq(self):
14343 """Check prerequisites.
14346 if self.op.kind == constants.TAG_CLUSTER:
14347 self.target = self.cfg.GetClusterInfo()
14348 elif self.op.kind == constants.TAG_NODE:
14349 self.target = self.cfg.GetNodeInfo(self.op.name)
14350 elif self.op.kind == constants.TAG_INSTANCE:
14351 self.target = self.cfg.GetInstanceInfo(self.op.name)
14352 elif self.op.kind == constants.TAG_NODEGROUP:
14353 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14355 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14356 str(self.op.kind), errors.ECODE_INVAL)
14359 class LUTagsGet(TagsLU):
14360 """Returns the tags of a given object.
14365 def ExpandNames(self):
14366 TagsLU.ExpandNames(self)
14368 # Share locks as this is only a read operation
14369 self.share_locks = _ShareAll()
14371 def Exec(self, feedback_fn):
14372 """Returns the tag list.
14375 return list(self.target.GetTags())
14378 class LUTagsSearch(NoHooksLU):
14379 """Searches the tags for a given pattern.
14384 def ExpandNames(self):
14385 self.needed_locks = {}
14387 def CheckPrereq(self):
14388 """Check prerequisites.
14390 This checks the pattern passed for validity by compiling it.
14394 self.re = re.compile(self.op.pattern)
14395 except re.error, err:
14396 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14397 (self.op.pattern, err), errors.ECODE_INVAL)
14399 def Exec(self, feedback_fn):
14400 """Returns the tag list.
14404 tgts = [("/cluster", cfg.GetClusterInfo())]
14405 ilist = cfg.GetAllInstancesInfo().values()
14406 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14407 nlist = cfg.GetAllNodesInfo().values()
14408 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14409 tgts.extend(("/nodegroup/%s" % n.name, n)
14410 for n in cfg.GetAllNodeGroupsInfo().values())
14412 for path, target in tgts:
14413 for tag in target.GetTags():
14414 if self.re.search(tag):
14415 results.append((path, tag))
14419 class LUTagsSet(TagsLU):
14420 """Sets a tag on a given object.
14425 def CheckPrereq(self):
14426 """Check prerequisites.
14428 This checks the type and length of the tag name and value.
14431 TagsLU.CheckPrereq(self)
14432 for tag in self.op.tags:
14433 objects.TaggableObject.ValidateTag(tag)
14435 def Exec(self, feedback_fn):
14440 for tag in self.op.tags:
14441 self.target.AddTag(tag)
14442 except errors.TagError, err:
14443 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14444 self.cfg.Update(self.target, feedback_fn)
14447 class LUTagsDel(TagsLU):
14448 """Delete a list of tags from a given object.
14453 def CheckPrereq(self):
14454 """Check prerequisites.
14456 This checks that we have the given tag.
14459 TagsLU.CheckPrereq(self)
14460 for tag in self.op.tags:
14461 objects.TaggableObject.ValidateTag(tag)
14462 del_tags = frozenset(self.op.tags)
14463 cur_tags = self.target.GetTags()
14465 diff_tags = del_tags - cur_tags
14467 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14468 raise errors.OpPrereqError("Tag(s) %s not found" %
14469 (utils.CommaJoin(diff_names), ),
14470 errors.ECODE_NOENT)
14472 def Exec(self, feedback_fn):
14473 """Remove the tag from the object.
14476 for tag in self.op.tags:
14477 self.target.RemoveTag(tag)
14478 self.cfg.Update(self.target, feedback_fn)
14481 class LUTestDelay(NoHooksLU):
14482 """Sleep for a specified amount of time.
14484 This LU sleeps on the master and/or nodes for a specified amount of
14490 def ExpandNames(self):
14491 """Expand names and set required locks.
14493 This expands the node list, if any.
14496 self.needed_locks = {}
14497 if self.op.on_nodes:
14498 # _GetWantedNodes can be used here, but is not always appropriate to use
14499 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14500 # more information.
14501 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14502 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14504 def _TestDelay(self):
14505 """Do the actual sleep.
14508 if self.op.on_master:
14509 if not utils.TestDelay(self.op.duration):
14510 raise errors.OpExecError("Error during master delay test")
14511 if self.op.on_nodes:
14512 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14513 for node, node_result in result.items():
14514 node_result.Raise("Failure during rpc call to node %s" % node)
14516 def Exec(self, feedback_fn):
14517 """Execute the test delay opcode, with the wanted repetitions.
14520 if self.op.repeat == 0:
14523 top_value = self.op.repeat - 1
14524 for i in range(self.op.repeat):
14525 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14529 class LUTestJqueue(NoHooksLU):
14530 """Utility LU to test some aspects of the job queue.
14535 # Must be lower than default timeout for WaitForJobChange to see whether it
14536 # notices changed jobs
14537 _CLIENT_CONNECT_TIMEOUT = 20.0
14538 _CLIENT_CONFIRM_TIMEOUT = 60.0
14541 def _NotifyUsingSocket(cls, cb, errcls):
14542 """Opens a Unix socket and waits for another program to connect.
14545 @param cb: Callback to send socket name to client
14546 @type errcls: class
14547 @param errcls: Exception class to use for errors
14550 # Using a temporary directory as there's no easy way to create temporary
14551 # sockets without writing a custom loop around tempfile.mktemp and
14553 tmpdir = tempfile.mkdtemp()
14555 tmpsock = utils.PathJoin(tmpdir, "sock")
14557 logging.debug("Creating temporary socket at %s", tmpsock)
14558 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14563 # Send details to client
14566 # Wait for client to connect before continuing
14567 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14569 (conn, _) = sock.accept()
14570 except socket.error, err:
14571 raise errcls("Client didn't connect in time (%s)" % err)
14575 # Remove as soon as client is connected
14576 shutil.rmtree(tmpdir)
14578 # Wait for client to close
14581 # pylint: disable=E1101
14582 # Instance of '_socketobject' has no ... member
14583 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14585 except socket.error, err:
14586 raise errcls("Client failed to confirm notification (%s)" % err)
14590 def _SendNotification(self, test, arg, sockname):
14591 """Sends a notification to the client.
14594 @param test: Test name
14595 @param arg: Test argument (depends on test)
14596 @type sockname: string
14597 @param sockname: Socket path
14600 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14602 def _Notify(self, prereq, test, arg):
14603 """Notifies the client of a test.
14606 @param prereq: Whether this is a prereq-phase test
14608 @param test: Test name
14609 @param arg: Test argument (depends on test)
14613 errcls = errors.OpPrereqError
14615 errcls = errors.OpExecError
14617 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14621 def CheckArguments(self):
14622 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14623 self.expandnames_calls = 0
14625 def ExpandNames(self):
14626 checkargs_calls = getattr(self, "checkargs_calls", 0)
14627 if checkargs_calls < 1:
14628 raise errors.ProgrammerError("CheckArguments was not called")
14630 self.expandnames_calls += 1
14632 if self.op.notify_waitlock:
14633 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14635 self.LogInfo("Expanding names")
14637 # Get lock on master node (just to get a lock, not for a particular reason)
14638 self.needed_locks = {
14639 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14642 def Exec(self, feedback_fn):
14643 if self.expandnames_calls < 1:
14644 raise errors.ProgrammerError("ExpandNames was not called")
14646 if self.op.notify_exec:
14647 self._Notify(False, constants.JQT_EXEC, None)
14649 self.LogInfo("Executing")
14651 if self.op.log_messages:
14652 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14653 for idx, msg in enumerate(self.op.log_messages):
14654 self.LogInfo("Sending log message %s", idx + 1)
14655 feedback_fn(constants.JQT_MSGPREFIX + msg)
14656 # Report how many test messages have been sent
14657 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14660 raise errors.OpExecError("Opcode failure was requested")
14665 class IAllocator(object):
14666 """IAllocator framework.
14668 An IAllocator instance has three sets of attributes:
14669 - cfg that is needed to query the cluster
14670 - input data (all members of the _KEYS class attribute are required)
14671 - four buffer attributes (in|out_data|text), that represent the
14672 input (to the external script) in text and data structure format,
14673 and the output from it, again in two formats
14674 - the result variables from the script (success, info, nodes) for
14678 # pylint: disable=R0902
14679 # lots of instance attributes
14681 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14683 self.rpc = rpc_runner
14684 # init buffer variables
14685 self.in_text = self.out_text = self.in_data = self.out_data = None
14686 # init all input fields so that pylint is happy
14688 self.memory = self.disks = self.disk_template = self.spindle_use = None
14689 self.os = self.tags = self.nics = self.vcpus = None
14690 self.hypervisor = None
14691 self.relocate_from = None
14693 self.instances = None
14694 self.evac_mode = None
14695 self.target_groups = []
14697 self.required_nodes = None
14698 # init result fields
14699 self.success = self.info = self.result = None
14702 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14704 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14705 " IAllocator" % self.mode)
14707 keyset = [n for (n, _) in keydata]
14710 if key not in keyset:
14711 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14712 " IAllocator" % key)
14713 setattr(self, key, kwargs[key])
14716 if key not in kwargs:
14717 raise errors.ProgrammerError("Missing input parameter '%s' to"
14718 " IAllocator" % key)
14719 self._BuildInputData(compat.partial(fn, self), keydata)
14721 def _ComputeClusterData(self):
14722 """Compute the generic allocator input data.
14724 This is the data that is independent of the actual operation.
14728 cluster_info = cfg.GetClusterInfo()
14731 "version": constants.IALLOCATOR_VERSION,
14732 "cluster_name": cfg.GetClusterName(),
14733 "cluster_tags": list(cluster_info.GetTags()),
14734 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14735 "ipolicy": cluster_info.ipolicy,
14737 ninfo = cfg.GetAllNodesInfo()
14738 iinfo = cfg.GetAllInstancesInfo().values()
14739 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14742 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14744 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14745 hypervisor_name = self.hypervisor
14746 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14747 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14749 hypervisor_name = cluster_info.primary_hypervisor
14751 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14754 self.rpc.call_all_instances_info(node_list,
14755 cluster_info.enabled_hypervisors)
14757 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14759 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14760 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14761 i_list, config_ndata)
14762 assert len(data["nodes"]) == len(ninfo), \
14763 "Incomplete node data computed"
14765 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14767 self.in_data = data
14770 def _ComputeNodeGroupData(cfg):
14771 """Compute node groups data.
14774 cluster = cfg.GetClusterInfo()
14775 ng = dict((guuid, {
14776 "name": gdata.name,
14777 "alloc_policy": gdata.alloc_policy,
14778 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14780 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14785 def _ComputeBasicNodeData(cfg, node_cfg):
14786 """Compute global node data.
14789 @returns: a dict of name: (node dict, node config)
14792 # fill in static (config-based) values
14793 node_results = dict((ninfo.name, {
14794 "tags": list(ninfo.GetTags()),
14795 "primary_ip": ninfo.primary_ip,
14796 "secondary_ip": ninfo.secondary_ip,
14797 "offline": ninfo.offline,
14798 "drained": ninfo.drained,
14799 "master_candidate": ninfo.master_candidate,
14800 "group": ninfo.group,
14801 "master_capable": ninfo.master_capable,
14802 "vm_capable": ninfo.vm_capable,
14803 "ndparams": cfg.GetNdParams(ninfo),
14805 for ninfo in node_cfg.values())
14807 return node_results
14810 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14812 """Compute global node data.
14814 @param node_results: the basic node structures as filled from the config
14817 #TODO(dynmem): compute the right data on MAX and MIN memory
14818 # make a copy of the current dict
14819 node_results = dict(node_results)
14820 for nname, nresult in node_data.items():
14821 assert nname in node_results, "Missing basic data for node %s" % nname
14822 ninfo = node_cfg[nname]
14824 if not (ninfo.offline or ninfo.drained):
14825 nresult.Raise("Can't get data for node %s" % nname)
14826 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14828 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14830 for attr in ["memory_total", "memory_free", "memory_dom0",
14831 "vg_size", "vg_free", "cpu_total"]:
14832 if attr not in remote_info:
14833 raise errors.OpExecError("Node '%s' didn't return attribute"
14834 " '%s'" % (nname, attr))
14835 if not isinstance(remote_info[attr], int):
14836 raise errors.OpExecError("Node '%s' returned invalid value"
14838 (nname, attr, remote_info[attr]))
14839 # compute memory used by primary instances
14840 i_p_mem = i_p_up_mem = 0
14841 for iinfo, beinfo in i_list:
14842 if iinfo.primary_node == nname:
14843 i_p_mem += beinfo[constants.BE_MAXMEM]
14844 if iinfo.name not in node_iinfo[nname].payload:
14847 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14848 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14849 remote_info["memory_free"] -= max(0, i_mem_diff)
14851 if iinfo.admin_state == constants.ADMINST_UP:
14852 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14854 # compute memory used by instances
14856 "total_memory": remote_info["memory_total"],
14857 "reserved_memory": remote_info["memory_dom0"],
14858 "free_memory": remote_info["memory_free"],
14859 "total_disk": remote_info["vg_size"],
14860 "free_disk": remote_info["vg_free"],
14861 "total_cpus": remote_info["cpu_total"],
14862 "i_pri_memory": i_p_mem,
14863 "i_pri_up_memory": i_p_up_mem,
14865 pnr_dyn.update(node_results[nname])
14866 node_results[nname] = pnr_dyn
14868 return node_results
14871 def _ComputeInstanceData(cluster_info, i_list):
14872 """Compute global instance data.
14876 for iinfo, beinfo in i_list:
14878 for nic in iinfo.nics:
14879 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14883 "mode": filled_params[constants.NIC_MODE],
14884 "link": filled_params[constants.NIC_LINK],
14886 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14887 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14888 nic_data.append(nic_dict)
14890 "tags": list(iinfo.GetTags()),
14891 "admin_state": iinfo.admin_state,
14892 "vcpus": beinfo[constants.BE_VCPUS],
14893 "memory": beinfo[constants.BE_MAXMEM],
14894 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14896 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14898 "disks": [{constants.IDISK_SIZE: dsk.size,
14899 constants.IDISK_MODE: dsk.mode}
14900 for dsk in iinfo.disks],
14901 "disk_template": iinfo.disk_template,
14902 "hypervisor": iinfo.hypervisor,
14904 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14906 instance_data[iinfo.name] = pir
14908 return instance_data
14910 def _AddNewInstance(self):
14911 """Add new instance data to allocator structure.
14913 This in combination with _AllocatorGetClusterData will create the
14914 correct structure needed as input for the allocator.
14916 The checks for the completeness of the opcode must have already been
14920 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14922 if self.disk_template in constants.DTS_INT_MIRROR:
14923 self.required_nodes = 2
14925 self.required_nodes = 1
14929 "disk_template": self.disk_template,
14932 "vcpus": self.vcpus,
14933 "memory": self.memory,
14934 "spindle_use": self.spindle_use,
14935 "disks": self.disks,
14936 "disk_space_total": disk_space,
14938 "required_nodes": self.required_nodes,
14939 "hypervisor": self.hypervisor,
14944 def _AddRelocateInstance(self):
14945 """Add relocate instance data to allocator structure.
14947 This in combination with _IAllocatorGetClusterData will create the
14948 correct structure needed as input for the allocator.
14950 The checks for the completeness of the opcode must have already been
14954 instance = self.cfg.GetInstanceInfo(self.name)
14955 if instance is None:
14956 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14957 " IAllocator" % self.name)
14959 if instance.disk_template not in constants.DTS_MIRRORED:
14960 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14961 errors.ECODE_INVAL)
14963 if instance.disk_template in constants.DTS_INT_MIRROR and \
14964 len(instance.secondary_nodes) != 1:
14965 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14966 errors.ECODE_STATE)
14968 self.required_nodes = 1
14969 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14970 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14974 "disk_space_total": disk_space,
14975 "required_nodes": self.required_nodes,
14976 "relocate_from": self.relocate_from,
14980 def _AddNodeEvacuate(self):
14981 """Get data for node-evacuate requests.
14985 "instances": self.instances,
14986 "evac_mode": self.evac_mode,
14989 def _AddChangeGroup(self):
14990 """Get data for node-evacuate requests.
14994 "instances": self.instances,
14995 "target_groups": self.target_groups,
14998 def _BuildInputData(self, fn, keydata):
14999 """Build input data structures.
15002 self._ComputeClusterData()
15005 request["type"] = self.mode
15006 for keyname, keytype in keydata:
15007 if keyname not in request:
15008 raise errors.ProgrammerError("Request parameter %s is missing" %
15010 val = request[keyname]
15011 if not keytype(val):
15012 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15013 " validation, value %s, expected"
15014 " type %s" % (keyname, val, keytype))
15015 self.in_data["request"] = request
15017 self.in_text = serializer.Dump(self.in_data)
15019 _STRING_LIST = ht.TListOf(ht.TString)
15020 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15021 # pylint: disable=E1101
15022 # Class '...' has no 'OP_ID' member
15023 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15024 opcodes.OpInstanceMigrate.OP_ID,
15025 opcodes.OpInstanceReplaceDisks.OP_ID])
15029 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15030 ht.TItems([ht.TNonEmptyString,
15031 ht.TNonEmptyString,
15032 ht.TListOf(ht.TNonEmptyString),
15035 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15036 ht.TItems([ht.TNonEmptyString,
15039 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15040 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15043 constants.IALLOCATOR_MODE_ALLOC:
15046 ("name", ht.TString),
15047 ("memory", ht.TInt),
15048 ("spindle_use", ht.TInt),
15049 ("disks", ht.TListOf(ht.TDict)),
15050 ("disk_template", ht.TString),
15051 ("os", ht.TString),
15052 ("tags", _STRING_LIST),
15053 ("nics", ht.TListOf(ht.TDict)),
15054 ("vcpus", ht.TInt),
15055 ("hypervisor", ht.TString),
15057 constants.IALLOCATOR_MODE_RELOC:
15058 (_AddRelocateInstance,
15059 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15061 constants.IALLOCATOR_MODE_NODE_EVAC:
15062 (_AddNodeEvacuate, [
15063 ("instances", _STRING_LIST),
15064 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15066 constants.IALLOCATOR_MODE_CHG_GROUP:
15067 (_AddChangeGroup, [
15068 ("instances", _STRING_LIST),
15069 ("target_groups", _STRING_LIST),
15073 def Run(self, name, validate=True, call_fn=None):
15074 """Run an instance allocator and return the results.
15077 if call_fn is None:
15078 call_fn = self.rpc.call_iallocator_runner
15080 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15081 result.Raise("Failure while running the iallocator script")
15083 self.out_text = result.payload
15085 self._ValidateResult()
15087 def _ValidateResult(self):
15088 """Process the allocator results.
15090 This will process and if successful save the result in
15091 self.out_data and the other parameters.
15095 rdict = serializer.Load(self.out_text)
15096 except Exception, err:
15097 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15099 if not isinstance(rdict, dict):
15100 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15102 # TODO: remove backwards compatiblity in later versions
15103 if "nodes" in rdict and "result" not in rdict:
15104 rdict["result"] = rdict["nodes"]
15107 for key in "success", "info", "result":
15108 if key not in rdict:
15109 raise errors.OpExecError("Can't parse iallocator results:"
15110 " missing key '%s'" % key)
15111 setattr(self, key, rdict[key])
15113 if not self._result_check(self.result):
15114 raise errors.OpExecError("Iallocator returned invalid result,"
15115 " expected %s, got %s" %
15116 (self._result_check, self.result),
15117 errors.ECODE_INVAL)
15119 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15120 assert self.relocate_from is not None
15121 assert self.required_nodes == 1
15123 node2group = dict((name, ndata["group"])
15124 for (name, ndata) in self.in_data["nodes"].items())
15126 fn = compat.partial(self._NodesToGroups, node2group,
15127 self.in_data["nodegroups"])
15129 instance = self.cfg.GetInstanceInfo(self.name)
15130 request_groups = fn(self.relocate_from + [instance.primary_node])
15131 result_groups = fn(rdict["result"] + [instance.primary_node])
15133 if self.success and not set(result_groups).issubset(request_groups):
15134 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15135 " differ from original groups (%s)" %
15136 (utils.CommaJoin(result_groups),
15137 utils.CommaJoin(request_groups)))
15139 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15140 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15142 self.out_data = rdict
15145 def _NodesToGroups(node2group, groups, nodes):
15146 """Returns a list of unique group names for a list of nodes.
15148 @type node2group: dict
15149 @param node2group: Map from node name to group UUID
15151 @param groups: Group information
15153 @param nodes: Node names
15160 group_uuid = node2group[node]
15162 # Ignore unknown node
15166 group = groups[group_uuid]
15168 # Can't find group, let's use UUID
15169 group_name = group_uuid
15171 group_name = group["name"]
15173 result.add(group_name)
15175 return sorted(result)
15178 class LUTestAllocator(NoHooksLU):
15179 """Run allocator tests.
15181 This LU runs the allocator tests
15184 def CheckPrereq(self):
15185 """Check prerequisites.
15187 This checks the opcode parameters depending on the director and mode test.
15190 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15191 for attr in ["memory", "disks", "disk_template",
15192 "os", "tags", "nics", "vcpus"]:
15193 if not hasattr(self.op, attr):
15194 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15195 attr, errors.ECODE_INVAL)
15196 iname = self.cfg.ExpandInstanceName(self.op.name)
15197 if iname is not None:
15198 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15199 iname, errors.ECODE_EXISTS)
15200 if not isinstance(self.op.nics, list):
15201 raise errors.OpPrereqError("Invalid parameter 'nics'",
15202 errors.ECODE_INVAL)
15203 if not isinstance(self.op.disks, list):
15204 raise errors.OpPrereqError("Invalid parameter 'disks'",
15205 errors.ECODE_INVAL)
15206 for row in self.op.disks:
15207 if (not isinstance(row, dict) or
15208 constants.IDISK_SIZE not in row or
15209 not isinstance(row[constants.IDISK_SIZE], int) or
15210 constants.IDISK_MODE not in row or
15211 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15212 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15213 " parameter", errors.ECODE_INVAL)
15214 if self.op.hypervisor is None:
15215 self.op.hypervisor = self.cfg.GetHypervisorType()
15216 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15217 fname = _ExpandInstanceName(self.cfg, self.op.name)
15218 self.op.name = fname
15219 self.relocate_from = \
15220 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15221 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15222 constants.IALLOCATOR_MODE_NODE_EVAC):
15223 if not self.op.instances:
15224 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15225 self.op.instances = _GetWantedInstances(self, self.op.instances)
15227 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15228 self.op.mode, errors.ECODE_INVAL)
15230 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15231 if self.op.allocator is None:
15232 raise errors.OpPrereqError("Missing allocator name",
15233 errors.ECODE_INVAL)
15234 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15235 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15236 self.op.direction, errors.ECODE_INVAL)
15238 def Exec(self, feedback_fn):
15239 """Run the allocator test.
15242 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15243 ial = IAllocator(self.cfg, self.rpc,
15246 memory=self.op.memory,
15247 disks=self.op.disks,
15248 disk_template=self.op.disk_template,
15252 vcpus=self.op.vcpus,
15253 hypervisor=self.op.hypervisor,
15255 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15256 ial = IAllocator(self.cfg, self.rpc,
15259 relocate_from=list(self.relocate_from),
15261 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15262 ial = IAllocator(self.cfg, self.rpc,
15264 instances=self.op.instances,
15265 target_groups=self.op.target_groups)
15266 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15267 ial = IAllocator(self.cfg, self.rpc,
15269 instances=self.op.instances,
15270 evac_mode=self.op.evac_mode)
15272 raise errors.ProgrammerError("Uncatched mode %s in"
15273 " LUTestAllocator.Exec", self.op.mode)
15275 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15276 result = ial.in_text
15278 ial.Run(self.op.allocator, validate=False)
15279 result = ial.out_text
15283 #: Query type implementations
15285 constants.QR_CLUSTER: _ClusterQuery,
15286 constants.QR_INSTANCE: _InstanceQuery,
15287 constants.QR_NODE: _NodeQuery,
15288 constants.QR_GROUP: _GroupQuery,
15289 constants.QR_OS: _OsQuery,
15290 constants.QR_EXPORT: _ExportQuery,
15293 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15296 def _GetQueryImplementation(name):
15297 """Returns the implemtnation for a query type.
15299 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15303 return _QUERY_IMPL[name]
15305 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15306 errors.ECODE_INVAL)