4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if not value or value == [constants.VALUE_DEFAULT]:
800 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
801 " on the cluster'" % key,
804 if key in constants.IPOLICY_PARAMETERS:
805 # FIXME: we assume all such values are float
807 ipolicy[key] = float(value)
808 except (TypeError, ValueError), err:
809 raise errors.OpPrereqError("Invalid value for attribute"
810 " '%s': '%s', error: %s" %
811 (key, value, err), errors.ECODE_INVAL)
813 # FIXME: we assume all others are lists; this should be redone
815 ipolicy[key] = list(value)
817 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
818 except errors.ConfigurationError, err:
819 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
824 def _UpdateAndVerifySubDict(base, updates, type_check):
825 """Updates and verifies a dict with sub dicts of the same type.
827 @param base: The dict with the old data
828 @param updates: The dict with the new data
829 @param type_check: Dict suitable to ForceDictType to verify correct types
830 @returns: A new dict with updated and verified values
834 new = _GetUpdatedParams(old, value)
835 utils.ForceDictType(new, type_check)
838 ret = copy.deepcopy(base)
839 ret.update(dict((key, fn(base.get(key, {}), value))
840 for key, value in updates.items()))
844 def _MergeAndVerifyHvState(op_input, obj_input):
845 """Combines the hv state from an opcode with the one of the object
847 @param op_input: The input dict from the opcode
848 @param obj_input: The input dict from the objects
849 @return: The verified and updated dict
853 invalid_hvs = set(op_input) - constants.HYPER_TYPES
855 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
856 " %s" % utils.CommaJoin(invalid_hvs),
858 if obj_input is None:
860 type_check = constants.HVSTS_PARAMETER_TYPES
861 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
866 def _MergeAndVerifyDiskState(op_input, obj_input):
867 """Combines the disk state from an opcode with the one of the object
869 @param op_input: The input dict from the opcode
870 @param obj_input: The input dict from the objects
871 @return: The verified and updated dict
874 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
876 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
877 utils.CommaJoin(invalid_dst),
879 type_check = constants.DSS_PARAMETER_TYPES
880 if obj_input is None:
882 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
884 for key, value in op_input.items())
889 def _ReleaseLocks(lu, level, names=None, keep=None):
890 """Releases locks owned by an LU.
892 @type lu: L{LogicalUnit}
893 @param level: Lock level
894 @type names: list or None
895 @param names: Names of locks to release
896 @type keep: list or None
897 @param keep: Names of locks to retain
900 assert not (keep is not None and names is not None), \
901 "Only one of the 'names' and the 'keep' parameters can be given"
903 if names is not None:
904 should_release = names.__contains__
906 should_release = lambda name: name not in keep
908 should_release = None
910 owned = lu.owned_locks(level)
912 # Not owning any lock at this level, do nothing
919 # Determine which locks to release
921 if should_release(name):
926 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
928 # Release just some locks
929 lu.glm.release(level, names=release)
931 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
934 lu.glm.release(level)
936 assert not lu.glm.is_owned(level), "No locks should be owned"
939 def _MapInstanceDisksToNodes(instances):
940 """Creates a map from (node, volume) to instance name.
942 @type instances: list of L{objects.Instance}
943 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
946 return dict(((node, vol), inst.name)
947 for inst in instances
948 for (node, vols) in inst.MapLVsByNode().items()
952 def _RunPostHook(lu, node_name):
953 """Runs the post-hook for an opcode on a single node.
956 hm = lu.proc.BuildHooksManager(lu)
958 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
960 # pylint: disable=W0702
961 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
964 def _CheckOutputFields(static, dynamic, selected):
965 """Checks whether all selected fields are valid.
967 @type static: L{utils.FieldSet}
968 @param static: static fields set
969 @type dynamic: L{utils.FieldSet}
970 @param dynamic: dynamic fields set
977 delta = f.NonMatching(selected)
979 raise errors.OpPrereqError("Unknown output fields selected: %s"
980 % ",".join(delta), errors.ECODE_INVAL)
983 def _CheckGlobalHvParams(params):
984 """Validates that given hypervisor params are not global ones.
986 This will ensure that instances don't get customised versions of
990 used_globals = constants.HVC_GLOBALS.intersection(params)
992 msg = ("The following hypervisor parameters are global and cannot"
993 " be customized at instance level, please modify them at"
994 " cluster level: %s" % utils.CommaJoin(used_globals))
995 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
998 def _CheckNodeOnline(lu, node, msg=None):
999 """Ensure that a given node is online.
1001 @param lu: the LU on behalf of which we make the check
1002 @param node: the node to check
1003 @param msg: if passed, should be a message to replace the default one
1004 @raise errors.OpPrereqError: if the node is offline
1008 msg = "Can't use offline node"
1009 if lu.cfg.GetNodeInfo(node).offline:
1010 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1013 def _CheckNodeNotDrained(lu, node):
1014 """Ensure that a given node is not drained.
1016 @param lu: the LU on behalf of which we make the check
1017 @param node: the node to check
1018 @raise errors.OpPrereqError: if the node is drained
1021 if lu.cfg.GetNodeInfo(node).drained:
1022 raise errors.OpPrereqError("Can't use drained node %s" % node,
1026 def _CheckNodeVmCapable(lu, node):
1027 """Ensure that a given node is vm capable.
1029 @param lu: the LU on behalf of which we make the check
1030 @param node: the node to check
1031 @raise errors.OpPrereqError: if the node is not vm capable
1034 if not lu.cfg.GetNodeInfo(node).vm_capable:
1035 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040 """Ensure that a node supports a given OS.
1042 @param lu: the LU on behalf of which we make the check
1043 @param node: the node to check
1044 @param os_name: the OS to query about
1045 @param force_variant: whether to ignore variant errors
1046 @raise errors.OpPrereqError: if the node is not supporting the OS
1049 result = lu.rpc.call_os_get(node, os_name)
1050 result.Raise("OS '%s' not in supported OS list for node %s" %
1052 prereq=True, ecode=errors.ECODE_INVAL)
1053 if not force_variant:
1054 _CheckOSVariant(result.payload, os_name)
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058 """Ensure that a node has the given secondary ip.
1060 @type lu: L{LogicalUnit}
1061 @param lu: the LU on behalf of which we make the check
1063 @param node: the node to check
1064 @type secondary_ip: string
1065 @param secondary_ip: the ip to check
1066 @type prereq: boolean
1067 @param prereq: whether to throw a prerequisite or an execute error
1068 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1072 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073 result.Raise("Failure checking secondary ip on node %s" % node,
1074 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075 if not result.payload:
1076 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077 " please fix and re-run this command" % secondary_ip)
1079 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1081 raise errors.OpExecError(msg)
1084 def _GetClusterDomainSecret():
1085 """Reads the cluster domain secret.
1088 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093 """Ensure that an instance is in one of the required states.
1095 @param lu: the LU on behalf of which we make the check
1096 @param instance: the instance to check
1097 @param msg: if passed, should be a message to replace the default one
1098 @raise errors.OpPrereqError: if the instance is not in the required state
1102 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103 if instance.admin_state not in req_states:
1104 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105 (instance.name, instance.admin_state, msg),
1108 if constants.ADMINST_UP not in req_states:
1109 pnode = instance.primary_node
1110 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1111 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1112 prereq=True, ecode=errors.ECODE_ENVIRON)
1114 if instance.name in ins_l.payload:
1115 raise errors.OpPrereqError("Instance %s is running, %s" %
1116 (instance.name, msg), errors.ECODE_STATE)
1119 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1120 """Computes if value is in the desired range.
1122 @param name: name of the parameter for which we perform the check
1123 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1125 @param ipolicy: dictionary containing min, max and std values
1126 @param value: actual value that we want to use
1127 @return: None or element not meeting the criteria
1131 if value in [None, constants.VALUE_AUTO]:
1133 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1134 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1135 if value > max_v or min_v > value:
1137 fqn = "%s/%s" % (name, qualifier)
1140 return ("%s value %s is not in range [%s, %s]" %
1141 (fqn, value, min_v, max_v))
1145 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1146 nic_count, disk_sizes, spindle_use,
1147 _compute_fn=_ComputeMinMaxSpec):
1148 """Verifies ipolicy against provided specs.
1151 @param ipolicy: The ipolicy
1153 @param mem_size: The memory size
1154 @type cpu_count: int
1155 @param cpu_count: Used cpu cores
1156 @type disk_count: int
1157 @param disk_count: Number of disks used
1158 @type nic_count: int
1159 @param nic_count: Number of nics used
1160 @type disk_sizes: list of ints
1161 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1162 @type spindle_use: int
1163 @param spindle_use: The number of spindles this instance uses
1164 @param _compute_fn: The compute function (unittest only)
1165 @return: A list of violations, or an empty list of no violations are found
1168 assert disk_count == len(disk_sizes)
1171 (constants.ISPEC_MEM_SIZE, "", mem_size),
1172 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1173 (constants.ISPEC_DISK_COUNT, "", disk_count),
1174 (constants.ISPEC_NIC_COUNT, "", nic_count),
1175 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1176 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1177 for idx, d in enumerate(disk_sizes)]
1180 (_compute_fn(name, qualifier, ipolicy, value)
1181 for (name, qualifier, value) in test_settings))
1184 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1185 _compute_fn=_ComputeIPolicySpecViolation):
1186 """Compute if instance meets the specs of ipolicy.
1189 @param ipolicy: The ipolicy to verify against
1190 @type instance: L{objects.Instance}
1191 @param instance: The instance to verify
1192 @param _compute_fn: The function to verify ipolicy (unittest only)
1193 @see: L{_ComputeIPolicySpecViolation}
1196 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1197 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1198 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1199 disk_count = len(instance.disks)
1200 disk_sizes = [disk.size for disk in instance.disks]
1201 nic_count = len(instance.nics)
1203 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1204 disk_sizes, spindle_use)
1207 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1208 _compute_fn=_ComputeIPolicySpecViolation):
1209 """Compute if instance specs meets the specs of ipolicy.
1212 @param ipolicy: The ipolicy to verify against
1213 @param instance_spec: dict
1214 @param instance_spec: The instance spec to verify
1215 @param _compute_fn: The function to verify ipolicy (unittest only)
1216 @see: L{_ComputeIPolicySpecViolation}
1219 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1220 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1221 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1222 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1223 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1224 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1226 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1227 disk_sizes, spindle_use)
1230 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1232 _compute_fn=_ComputeIPolicyInstanceViolation):
1233 """Compute if instance meets the specs of the new target group.
1235 @param ipolicy: The ipolicy to verify
1236 @param instance: The instance object to verify
1237 @param current_group: The current group of the instance
1238 @param target_group: The new group of the instance
1239 @param _compute_fn: The function to verify ipolicy (unittest only)
1240 @see: L{_ComputeIPolicySpecViolation}
1243 if current_group == target_group:
1246 return _compute_fn(ipolicy, instance)
1249 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1250 _compute_fn=_ComputeIPolicyNodeViolation):
1251 """Checks that the target node is correct in terms of instance policy.
1253 @param ipolicy: The ipolicy to verify
1254 @param instance: The instance object to verify
1255 @param node: The new node to relocate
1256 @param ignore: Ignore violations of the ipolicy
1257 @param _compute_fn: The function to verify ipolicy (unittest only)
1258 @see: L{_ComputeIPolicySpecViolation}
1261 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1262 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1265 msg = ("Instance does not meet target node group's (%s) instance"
1266 " policy: %s") % (node.group, utils.CommaJoin(res))
1270 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1273 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1274 """Computes a set of any instances that would violate the new ipolicy.
1276 @param old_ipolicy: The current (still in-place) ipolicy
1277 @param new_ipolicy: The new (to become) ipolicy
1278 @param instances: List of instances to verify
1279 @return: A list of instances which violates the new ipolicy but
1283 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1284 _ComputeViolatingInstances(old_ipolicy, instances))
1287 def _ExpandItemName(fn, name, kind):
1288 """Expand an item name.
1290 @param fn: the function to use for expansion
1291 @param name: requested item name
1292 @param kind: text description ('Node' or 'Instance')
1293 @return: the resolved (full) name
1294 @raise errors.OpPrereqError: if the item is not found
1297 full_name = fn(name)
1298 if full_name is None:
1299 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1304 def _ExpandNodeName(cfg, name):
1305 """Wrapper over L{_ExpandItemName} for nodes."""
1306 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1309 def _ExpandInstanceName(cfg, name):
1310 """Wrapper over L{_ExpandItemName} for instance."""
1311 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1314 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1315 minmem, maxmem, vcpus, nics, disk_template, disks,
1316 bep, hvp, hypervisor_name, tags):
1317 """Builds instance related env variables for hooks
1319 This builds the hook environment from individual variables.
1322 @param name: the name of the instance
1323 @type primary_node: string
1324 @param primary_node: the name of the instance's primary node
1325 @type secondary_nodes: list
1326 @param secondary_nodes: list of secondary nodes as strings
1327 @type os_type: string
1328 @param os_type: the name of the instance's OS
1329 @type status: string
1330 @param status: the desired status of the instance
1331 @type minmem: string
1332 @param minmem: the minimum memory size of the instance
1333 @type maxmem: string
1334 @param maxmem: the maximum memory size of the instance
1336 @param vcpus: the count of VCPUs the instance has
1338 @param nics: list of tuples (ip, mac, mode, link) representing
1339 the NICs the instance has
1340 @type disk_template: string
1341 @param disk_template: the disk template of the instance
1343 @param disks: the list of (size, mode) pairs
1345 @param bep: the backend parameters for the instance
1347 @param hvp: the hypervisor parameters for the instance
1348 @type hypervisor_name: string
1349 @param hypervisor_name: the hypervisor for the instance
1351 @param tags: list of instance tags as strings
1353 @return: the hook environment for this instance
1358 "INSTANCE_NAME": name,
1359 "INSTANCE_PRIMARY": primary_node,
1360 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1361 "INSTANCE_OS_TYPE": os_type,
1362 "INSTANCE_STATUS": status,
1363 "INSTANCE_MINMEM": minmem,
1364 "INSTANCE_MAXMEM": maxmem,
1365 # TODO(2.7) remove deprecated "memory" value
1366 "INSTANCE_MEMORY": maxmem,
1367 "INSTANCE_VCPUS": vcpus,
1368 "INSTANCE_DISK_TEMPLATE": disk_template,
1369 "INSTANCE_HYPERVISOR": hypervisor_name,
1372 nic_count = len(nics)
1373 for idx, (ip, mac, mode, link) in enumerate(nics):
1376 env["INSTANCE_NIC%d_IP" % idx] = ip
1377 env["INSTANCE_NIC%d_MAC" % idx] = mac
1378 env["INSTANCE_NIC%d_MODE" % idx] = mode
1379 env["INSTANCE_NIC%d_LINK" % idx] = link
1380 if mode == constants.NIC_MODE_BRIDGED:
1381 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1385 env["INSTANCE_NIC_COUNT"] = nic_count
1388 disk_count = len(disks)
1389 for idx, (size, mode) in enumerate(disks):
1390 env["INSTANCE_DISK%d_SIZE" % idx] = size
1391 env["INSTANCE_DISK%d_MODE" % idx] = mode
1395 env["INSTANCE_DISK_COUNT"] = disk_count
1400 env["INSTANCE_TAGS"] = " ".join(tags)
1402 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1403 for key, value in source.items():
1404 env["INSTANCE_%s_%s" % (kind, key)] = value
1409 def _NICListToTuple(lu, nics):
1410 """Build a list of nic information tuples.
1412 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1413 value in LUInstanceQueryData.
1415 @type lu: L{LogicalUnit}
1416 @param lu: the logical unit on whose behalf we execute
1417 @type nics: list of L{objects.NIC}
1418 @param nics: list of nics to convert to hooks tuples
1422 cluster = lu.cfg.GetClusterInfo()
1426 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1427 mode = filled_params[constants.NIC_MODE]
1428 link = filled_params[constants.NIC_LINK]
1429 hooks_nics.append((ip, mac, mode, link))
1433 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1434 """Builds instance related env variables for hooks from an object.
1436 @type lu: L{LogicalUnit}
1437 @param lu: the logical unit on whose behalf we execute
1438 @type instance: L{objects.Instance}
1439 @param instance: the instance for which we should build the
1441 @type override: dict
1442 @param override: dictionary with key/values that will override
1445 @return: the hook environment dictionary
1448 cluster = lu.cfg.GetClusterInfo()
1449 bep = cluster.FillBE(instance)
1450 hvp = cluster.FillHV(instance)
1452 "name": instance.name,
1453 "primary_node": instance.primary_node,
1454 "secondary_nodes": instance.secondary_nodes,
1455 "os_type": instance.os,
1456 "status": instance.admin_state,
1457 "maxmem": bep[constants.BE_MAXMEM],
1458 "minmem": bep[constants.BE_MINMEM],
1459 "vcpus": bep[constants.BE_VCPUS],
1460 "nics": _NICListToTuple(lu, instance.nics),
1461 "disk_template": instance.disk_template,
1462 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1465 "hypervisor_name": instance.hypervisor,
1466 "tags": instance.tags,
1469 args.update(override)
1470 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1473 def _AdjustCandidatePool(lu, exceptions):
1474 """Adjust the candidate pool after node operations.
1477 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1479 lu.LogInfo("Promoted nodes to master candidate role: %s",
1480 utils.CommaJoin(node.name for node in mod_list))
1481 for name in mod_list:
1482 lu.context.ReaddNode(name)
1483 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1485 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1489 def _DecideSelfPromotion(lu, exceptions=None):
1490 """Decide whether I should promote myself as a master candidate.
1493 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1494 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1495 # the new node will increase mc_max with one, so:
1496 mc_should = min(mc_should + 1, cp_size)
1497 return mc_now < mc_should
1500 def _CalculateGroupIPolicy(cluster, group):
1501 """Calculate instance policy for group.
1504 return cluster.SimpleFillIPolicy(group.ipolicy)
1507 def _ComputeViolatingInstances(ipolicy, instances):
1508 """Computes a set of instances who violates given ipolicy.
1510 @param ipolicy: The ipolicy to verify
1511 @type instances: object.Instance
1512 @param instances: List of instances to verify
1513 @return: A frozenset of instance names violating the ipolicy
1516 return frozenset([inst.name for inst in instances
1517 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1520 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1521 """Check that the brigdes needed by a list of nics exist.
1524 cluster = lu.cfg.GetClusterInfo()
1525 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1526 brlist = [params[constants.NIC_LINK] for params in paramslist
1527 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1529 result = lu.rpc.call_bridges_exist(target_node, brlist)
1530 result.Raise("Error checking bridges on destination node '%s'" %
1531 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1534 def _CheckInstanceBridgesExist(lu, instance, node=None):
1535 """Check that the brigdes needed by an instance exist.
1539 node = instance.primary_node
1540 _CheckNicsBridgesExist(lu, instance.nics, node)
1543 def _CheckOSVariant(os_obj, name):
1544 """Check whether an OS name conforms to the os variants specification.
1546 @type os_obj: L{objects.OS}
1547 @param os_obj: OS object to check
1549 @param name: OS name passed by the user, to check for validity
1552 variant = objects.OS.GetVariant(name)
1553 if not os_obj.supported_variants:
1555 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1556 " passed)" % (os_obj.name, variant),
1560 raise errors.OpPrereqError("OS name must include a variant",
1563 if variant not in os_obj.supported_variants:
1564 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1567 def _GetNodeInstancesInner(cfg, fn):
1568 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1571 def _GetNodeInstances(cfg, node_name):
1572 """Returns a list of all primary and secondary instances on a node.
1576 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1579 def _GetNodePrimaryInstances(cfg, node_name):
1580 """Returns primary instances on a node.
1583 return _GetNodeInstancesInner(cfg,
1584 lambda inst: node_name == inst.primary_node)
1587 def _GetNodeSecondaryInstances(cfg, node_name):
1588 """Returns secondary instances on a node.
1591 return _GetNodeInstancesInner(cfg,
1592 lambda inst: node_name in inst.secondary_nodes)
1595 def _GetStorageTypeArgs(cfg, storage_type):
1596 """Returns the arguments for a storage type.
1599 # Special case for file storage
1600 if storage_type == constants.ST_FILE:
1601 # storage.FileStorage wants a list of storage directories
1602 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1607 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1610 for dev in instance.disks:
1611 cfg.SetDiskID(dev, node_name)
1613 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1614 result.Raise("Failed to get disk status from node %s" % node_name,
1615 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1617 for idx, bdev_status in enumerate(result.payload):
1618 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1624 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1625 """Check the sanity of iallocator and node arguments and use the
1626 cluster-wide iallocator if appropriate.
1628 Check that at most one of (iallocator, node) is specified. If none is
1629 specified, then the LU's opcode's iallocator slot is filled with the
1630 cluster-wide default iallocator.
1632 @type iallocator_slot: string
1633 @param iallocator_slot: the name of the opcode iallocator slot
1634 @type node_slot: string
1635 @param node_slot: the name of the opcode target node slot
1638 node = getattr(lu.op, node_slot, None)
1639 iallocator = getattr(lu.op, iallocator_slot, None)
1641 if node is not None and iallocator is not None:
1642 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1644 elif node is None and iallocator is None:
1645 default_iallocator = lu.cfg.GetDefaultIAllocator()
1646 if default_iallocator:
1647 setattr(lu.op, iallocator_slot, default_iallocator)
1649 raise errors.OpPrereqError("No iallocator or node given and no"
1650 " cluster-wide default iallocator found;"
1651 " please specify either an iallocator or a"
1652 " node, or set a cluster-wide default"
1656 def _GetDefaultIAllocator(cfg, iallocator):
1657 """Decides on which iallocator to use.
1659 @type cfg: L{config.ConfigWriter}
1660 @param cfg: Cluster configuration object
1661 @type iallocator: string or None
1662 @param iallocator: Iallocator specified in opcode
1664 @return: Iallocator name
1668 # Use default iallocator
1669 iallocator = cfg.GetDefaultIAllocator()
1672 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1673 " opcode nor as a cluster-wide default",
1679 class LUClusterPostInit(LogicalUnit):
1680 """Logical unit for running hooks after cluster initialization.
1683 HPATH = "cluster-init"
1684 HTYPE = constants.HTYPE_CLUSTER
1686 def BuildHooksEnv(self):
1691 "OP_TARGET": self.cfg.GetClusterName(),
1694 def BuildHooksNodes(self):
1695 """Build hooks nodes.
1698 return ([], [self.cfg.GetMasterNode()])
1700 def Exec(self, feedback_fn):
1707 class LUClusterDestroy(LogicalUnit):
1708 """Logical unit for destroying the cluster.
1711 HPATH = "cluster-destroy"
1712 HTYPE = constants.HTYPE_CLUSTER
1714 def BuildHooksEnv(self):
1719 "OP_TARGET": self.cfg.GetClusterName(),
1722 def BuildHooksNodes(self):
1723 """Build hooks nodes.
1728 def CheckPrereq(self):
1729 """Check prerequisites.
1731 This checks whether the cluster is empty.
1733 Any errors are signaled by raising errors.OpPrereqError.
1736 master = self.cfg.GetMasterNode()
1738 nodelist = self.cfg.GetNodeList()
1739 if len(nodelist) != 1 or nodelist[0] != master:
1740 raise errors.OpPrereqError("There are still %d node(s) in"
1741 " this cluster." % (len(nodelist) - 1),
1743 instancelist = self.cfg.GetInstanceList()
1745 raise errors.OpPrereqError("There are still %d instance(s) in"
1746 " this cluster." % len(instancelist),
1749 def Exec(self, feedback_fn):
1750 """Destroys the cluster.
1753 master_params = self.cfg.GetMasterNetworkParameters()
1755 # Run post hooks on master node before it's removed
1756 _RunPostHook(self, master_params.name)
1758 ems = self.cfg.GetUseExternalMipScript()
1759 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1762 self.LogWarning("Error disabling the master IP address: %s",
1765 return master_params.name
1768 def _VerifyCertificate(filename):
1769 """Verifies a certificate for L{LUClusterVerifyConfig}.
1771 @type filename: string
1772 @param filename: Path to PEM file
1776 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1777 utils.ReadFile(filename))
1778 except Exception, err: # pylint: disable=W0703
1779 return (LUClusterVerifyConfig.ETYPE_ERROR,
1780 "Failed to load X509 certificate %s: %s" % (filename, err))
1783 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1784 constants.SSL_CERT_EXPIRATION_ERROR)
1787 fnamemsg = "While verifying %s: %s" % (filename, msg)
1792 return (None, fnamemsg)
1793 elif errcode == utils.CERT_WARNING:
1794 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1795 elif errcode == utils.CERT_ERROR:
1796 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1798 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1801 def _GetAllHypervisorParameters(cluster, instances):
1802 """Compute the set of all hypervisor parameters.
1804 @type cluster: L{objects.Cluster}
1805 @param cluster: the cluster object
1806 @param instances: list of L{objects.Instance}
1807 @param instances: additional instances from which to obtain parameters
1808 @rtype: list of (origin, hypervisor, parameters)
1809 @return: a list with all parameters found, indicating the hypervisor they
1810 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1815 for hv_name in cluster.enabled_hypervisors:
1816 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1818 for os_name, os_hvp in cluster.os_hvp.items():
1819 for hv_name, hv_params in os_hvp.items():
1821 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1822 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1824 # TODO: collapse identical parameter values in a single one
1825 for instance in instances:
1826 if instance.hvparams:
1827 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1828 cluster.FillHV(instance)))
1833 class _VerifyErrors(object):
1834 """Mix-in for cluster/group verify LUs.
1836 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1837 self.op and self._feedback_fn to be available.)
1841 ETYPE_FIELD = "code"
1842 ETYPE_ERROR = "ERROR"
1843 ETYPE_WARNING = "WARNING"
1845 def _Error(self, ecode, item, msg, *args, **kwargs):
1846 """Format an error message.
1848 Based on the opcode's error_codes parameter, either format a
1849 parseable error code, or a simpler error string.
1851 This must be called only from Exec and functions called from Exec.
1854 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1855 itype, etxt, _ = ecode
1856 # first complete the msg
1859 # then format the whole message
1860 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1861 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1867 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1868 # and finally report it via the feedback_fn
1869 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1871 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1872 """Log an error message if the passed condition is True.
1876 or self.op.debug_simulate_errors) # pylint: disable=E1101
1878 # If the error code is in the list of ignored errors, demote the error to a
1880 (_, etxt, _) = ecode
1881 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1882 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1885 self._Error(ecode, *args, **kwargs)
1887 # do not mark the operation as failed for WARN cases only
1888 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1889 self.bad = self.bad or cond
1892 class LUClusterVerify(NoHooksLU):
1893 """Submits all jobs necessary to verify the cluster.
1898 def ExpandNames(self):
1899 self.needed_locks = {}
1901 def Exec(self, feedback_fn):
1904 if self.op.group_name:
1905 groups = [self.op.group_name]
1906 depends_fn = lambda: None
1908 groups = self.cfg.GetNodeGroupList()
1910 # Verify global configuration
1912 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1915 # Always depend on global verification
1916 depends_fn = lambda: [(-len(jobs), [])]
1918 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1919 ignore_errors=self.op.ignore_errors,
1920 depends=depends_fn())]
1921 for group in groups)
1923 # Fix up all parameters
1924 for op in itertools.chain(*jobs): # pylint: disable=W0142
1925 op.debug_simulate_errors = self.op.debug_simulate_errors
1926 op.verbose = self.op.verbose
1927 op.error_codes = self.op.error_codes
1929 op.skip_checks = self.op.skip_checks
1930 except AttributeError:
1931 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1933 return ResultWithJobs(jobs)
1936 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1937 """Verifies the cluster config.
1942 def _VerifyHVP(self, hvp_data):
1943 """Verifies locally the syntax of the hypervisor parameters.
1946 for item, hv_name, hv_params in hvp_data:
1947 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1950 hv_class = hypervisor.GetHypervisor(hv_name)
1951 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1952 hv_class.CheckParameterSyntax(hv_params)
1953 except errors.GenericError, err:
1954 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1956 def ExpandNames(self):
1957 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1958 self.share_locks = _ShareAll()
1960 def CheckPrereq(self):
1961 """Check prerequisites.
1964 # Retrieve all information
1965 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1966 self.all_node_info = self.cfg.GetAllNodesInfo()
1967 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1969 def Exec(self, feedback_fn):
1970 """Verify integrity of cluster, performing various test on nodes.
1974 self._feedback_fn = feedback_fn
1976 feedback_fn("* Verifying cluster config")
1978 for msg in self.cfg.VerifyConfig():
1979 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1981 feedback_fn("* Verifying cluster certificate files")
1983 for cert_filename in constants.ALL_CERT_FILES:
1984 (errcode, msg) = _VerifyCertificate(cert_filename)
1985 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1987 feedback_fn("* Verifying hypervisor parameters")
1989 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1990 self.all_inst_info.values()))
1992 feedback_fn("* Verifying all nodes belong to an existing group")
1994 # We do this verification here because, should this bogus circumstance
1995 # occur, it would never be caught by VerifyGroup, which only acts on
1996 # nodes/instances reachable from existing node groups.
1998 dangling_nodes = set(node.name for node in self.all_node_info.values()
1999 if node.group not in self.all_group_info)
2001 dangling_instances = {}
2002 no_node_instances = []
2004 for inst in self.all_inst_info.values():
2005 if inst.primary_node in dangling_nodes:
2006 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2007 elif inst.primary_node not in self.all_node_info:
2008 no_node_instances.append(inst.name)
2013 utils.CommaJoin(dangling_instances.get(node.name,
2015 for node in dangling_nodes]
2017 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2019 "the following nodes (and their instances) belong to a non"
2020 " existing group: %s", utils.CommaJoin(pretty_dangling))
2022 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2024 "the following instances have a non-existing primary-node:"
2025 " %s", utils.CommaJoin(no_node_instances))
2030 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2031 """Verifies the status of a node group.
2034 HPATH = "cluster-verify"
2035 HTYPE = constants.HTYPE_CLUSTER
2038 _HOOKS_INDENT_RE = re.compile("^", re.M)
2040 class NodeImage(object):
2041 """A class representing the logical and physical status of a node.
2044 @ivar name: the node name to which this object refers
2045 @ivar volumes: a structure as returned from
2046 L{ganeti.backend.GetVolumeList} (runtime)
2047 @ivar instances: a list of running instances (runtime)
2048 @ivar pinst: list of configured primary instances (config)
2049 @ivar sinst: list of configured secondary instances (config)
2050 @ivar sbp: dictionary of {primary-node: list of instances} for all
2051 instances for which this node is secondary (config)
2052 @ivar mfree: free memory, as reported by hypervisor (runtime)
2053 @ivar dfree: free disk, as reported by the node (runtime)
2054 @ivar offline: the offline status (config)
2055 @type rpc_fail: boolean
2056 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2057 not whether the individual keys were correct) (runtime)
2058 @type lvm_fail: boolean
2059 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2060 @type hyp_fail: boolean
2061 @ivar hyp_fail: whether the RPC call didn't return the instance list
2062 @type ghost: boolean
2063 @ivar ghost: whether this is a known node or not (config)
2064 @type os_fail: boolean
2065 @ivar os_fail: whether the RPC call didn't return valid OS data
2067 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2068 @type vm_capable: boolean
2069 @ivar vm_capable: whether the node can host instances
2072 def __init__(self, offline=False, name=None, vm_capable=True):
2081 self.offline = offline
2082 self.vm_capable = vm_capable
2083 self.rpc_fail = False
2084 self.lvm_fail = False
2085 self.hyp_fail = False
2087 self.os_fail = False
2090 def ExpandNames(self):
2091 # This raises errors.OpPrereqError on its own:
2092 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2094 # Get instances in node group; this is unsafe and needs verification later
2096 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2098 self.needed_locks = {
2099 locking.LEVEL_INSTANCE: inst_names,
2100 locking.LEVEL_NODEGROUP: [self.group_uuid],
2101 locking.LEVEL_NODE: [],
2104 self.share_locks = _ShareAll()
2106 def DeclareLocks(self, level):
2107 if level == locking.LEVEL_NODE:
2108 # Get members of node group; this is unsafe and needs verification later
2109 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2111 all_inst_info = self.cfg.GetAllInstancesInfo()
2113 # In Exec(), we warn about mirrored instances that have primary and
2114 # secondary living in separate node groups. To fully verify that
2115 # volumes for these instances are healthy, we will need to do an
2116 # extra call to their secondaries. We ensure here those nodes will
2118 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2119 # Important: access only the instances whose lock is owned
2120 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2121 nodes.update(all_inst_info[inst].secondary_nodes)
2123 self.needed_locks[locking.LEVEL_NODE] = nodes
2125 def CheckPrereq(self):
2126 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2127 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2129 group_nodes = set(self.group_info.members)
2131 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2134 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2136 unlocked_instances = \
2137 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2140 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2141 utils.CommaJoin(unlocked_nodes),
2144 if unlocked_instances:
2145 raise errors.OpPrereqError("Missing lock for instances: %s" %
2146 utils.CommaJoin(unlocked_instances),
2149 self.all_node_info = self.cfg.GetAllNodesInfo()
2150 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2152 self.my_node_names = utils.NiceSort(group_nodes)
2153 self.my_inst_names = utils.NiceSort(group_instances)
2155 self.my_node_info = dict((name, self.all_node_info[name])
2156 for name in self.my_node_names)
2158 self.my_inst_info = dict((name, self.all_inst_info[name])
2159 for name in self.my_inst_names)
2161 # We detect here the nodes that will need the extra RPC calls for verifying
2162 # split LV volumes; they should be locked.
2163 extra_lv_nodes = set()
2165 for inst in self.my_inst_info.values():
2166 if inst.disk_template in constants.DTS_INT_MIRROR:
2167 for nname in inst.all_nodes:
2168 if self.all_node_info[nname].group != self.group_uuid:
2169 extra_lv_nodes.add(nname)
2171 unlocked_lv_nodes = \
2172 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2174 if unlocked_lv_nodes:
2175 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2176 utils.CommaJoin(unlocked_lv_nodes),
2178 self.extra_lv_nodes = list(extra_lv_nodes)
2180 def _VerifyNode(self, ninfo, nresult):
2181 """Perform some basic validation on data returned from a node.
2183 - check the result data structure is well formed and has all the
2185 - check ganeti version
2187 @type ninfo: L{objects.Node}
2188 @param ninfo: the node to check
2189 @param nresult: the results from the node
2191 @return: whether overall this call was successful (and we can expect
2192 reasonable values in the respose)
2196 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2198 # main result, nresult should be a non-empty dict
2199 test = not nresult or not isinstance(nresult, dict)
2200 _ErrorIf(test, constants.CV_ENODERPC, node,
2201 "unable to verify node: no data returned")
2205 # compares ganeti version
2206 local_version = constants.PROTOCOL_VERSION
2207 remote_version = nresult.get("version", None)
2208 test = not (remote_version and
2209 isinstance(remote_version, (list, tuple)) and
2210 len(remote_version) == 2)
2211 _ErrorIf(test, constants.CV_ENODERPC, node,
2212 "connection to node returned invalid data")
2216 test = local_version != remote_version[0]
2217 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2218 "incompatible protocol versions: master %s,"
2219 " node %s", local_version, remote_version[0])
2223 # node seems compatible, we can actually try to look into its results
2225 # full package version
2226 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2227 constants.CV_ENODEVERSION, node,
2228 "software version mismatch: master %s, node %s",
2229 constants.RELEASE_VERSION, remote_version[1],
2230 code=self.ETYPE_WARNING)
2232 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2233 if ninfo.vm_capable and isinstance(hyp_result, dict):
2234 for hv_name, hv_result in hyp_result.iteritems():
2235 test = hv_result is not None
2236 _ErrorIf(test, constants.CV_ENODEHV, node,
2237 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2239 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2240 if ninfo.vm_capable and isinstance(hvp_result, list):
2241 for item, hv_name, hv_result in hvp_result:
2242 _ErrorIf(True, constants.CV_ENODEHV, node,
2243 "hypervisor %s parameter verify failure (source %s): %s",
2244 hv_name, item, hv_result)
2246 test = nresult.get(constants.NV_NODESETUP,
2247 ["Missing NODESETUP results"])
2248 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2253 def _VerifyNodeTime(self, ninfo, nresult,
2254 nvinfo_starttime, nvinfo_endtime):
2255 """Check the node time.
2257 @type ninfo: L{objects.Node}
2258 @param ninfo: the node to check
2259 @param nresult: the remote results for the node
2260 @param nvinfo_starttime: the start time of the RPC call
2261 @param nvinfo_endtime: the end time of the RPC call
2265 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2267 ntime = nresult.get(constants.NV_TIME, None)
2269 ntime_merged = utils.MergeTime(ntime)
2270 except (ValueError, TypeError):
2271 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2274 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2275 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2276 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2277 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2281 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2282 "Node time diverges by at least %s from master node time",
2285 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2286 """Check the node LVM results.
2288 @type ninfo: L{objects.Node}
2289 @param ninfo: the node to check
2290 @param nresult: the remote results for the node
2291 @param vg_name: the configured VG name
2298 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2300 # checks vg existence and size > 20G
2301 vglist = nresult.get(constants.NV_VGLIST, None)
2303 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2305 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2306 constants.MIN_VG_SIZE)
2307 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2310 pvlist = nresult.get(constants.NV_PVLIST, None)
2311 test = pvlist is None
2312 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2314 # check that ':' is not present in PV names, since it's a
2315 # special character for lvcreate (denotes the range of PEs to
2317 for _, pvname, owner_vg in pvlist:
2318 test = ":" in pvname
2319 _ErrorIf(test, constants.CV_ENODELVM, node,
2320 "Invalid character ':' in PV '%s' of VG '%s'",
2323 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2324 """Check the node bridges.
2326 @type ninfo: L{objects.Node}
2327 @param ninfo: the node to check
2328 @param nresult: the remote results for the node
2329 @param bridges: the expected list of bridges
2336 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2338 missing = nresult.get(constants.NV_BRIDGES, None)
2339 test = not isinstance(missing, list)
2340 _ErrorIf(test, constants.CV_ENODENET, node,
2341 "did not return valid bridge information")
2343 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2344 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2346 def _VerifyNodeUserScripts(self, ninfo, nresult):
2347 """Check the results of user scripts presence and executability on the node
2349 @type ninfo: L{objects.Node}
2350 @param ninfo: the node to check
2351 @param nresult: the remote results for the node
2356 test = not constants.NV_USERSCRIPTS in nresult
2357 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2358 "did not return user scripts information")
2360 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2362 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2363 "user scripts not present or not executable: %s" %
2364 utils.CommaJoin(sorted(broken_scripts)))
2366 def _VerifyNodeNetwork(self, ninfo, nresult):
2367 """Check the node network connectivity results.
2369 @type ninfo: L{objects.Node}
2370 @param ninfo: the node to check
2371 @param nresult: the remote results for the node
2375 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2377 test = constants.NV_NODELIST not in nresult
2378 _ErrorIf(test, constants.CV_ENODESSH, node,
2379 "node hasn't returned node ssh connectivity data")
2381 if nresult[constants.NV_NODELIST]:
2382 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2383 _ErrorIf(True, constants.CV_ENODESSH, node,
2384 "ssh communication with node '%s': %s", a_node, a_msg)
2386 test = constants.NV_NODENETTEST not in nresult
2387 _ErrorIf(test, constants.CV_ENODENET, node,
2388 "node hasn't returned node tcp connectivity data")
2390 if nresult[constants.NV_NODENETTEST]:
2391 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2393 _ErrorIf(True, constants.CV_ENODENET, node,
2394 "tcp communication with node '%s': %s",
2395 anode, nresult[constants.NV_NODENETTEST][anode])
2397 test = constants.NV_MASTERIP not in nresult
2398 _ErrorIf(test, constants.CV_ENODENET, node,
2399 "node hasn't returned node master IP reachability data")
2401 if not nresult[constants.NV_MASTERIP]:
2402 if node == self.master_node:
2403 msg = "the master node cannot reach the master IP (not configured?)"
2405 msg = "cannot reach the master IP"
2406 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2408 def _VerifyInstance(self, instance, instanceconfig, node_image,
2410 """Verify an instance.
2412 This function checks to see if the required block devices are
2413 available on the instance's node.
2416 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2417 node_current = instanceconfig.primary_node
2419 node_vol_should = {}
2420 instanceconfig.MapLVsByNode(node_vol_should)
2422 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2423 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2424 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2426 for node in node_vol_should:
2427 n_img = node_image[node]
2428 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2429 # ignore missing volumes on offline or broken nodes
2431 for volume in node_vol_should[node]:
2432 test = volume not in n_img.volumes
2433 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2434 "volume %s missing on node %s", volume, node)
2436 if instanceconfig.admin_state == constants.ADMINST_UP:
2437 pri_img = node_image[node_current]
2438 test = instance not in pri_img.instances and not pri_img.offline
2439 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2440 "instance not running on its primary node %s",
2443 diskdata = [(nname, success, status, idx)
2444 for (nname, disks) in diskstatus.items()
2445 for idx, (success, status) in enumerate(disks)]
2447 for nname, success, bdev_status, idx in diskdata:
2448 # the 'ghost node' construction in Exec() ensures that we have a
2450 snode = node_image[nname]
2451 bad_snode = snode.ghost or snode.offline
2452 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2453 not success and not bad_snode,
2454 constants.CV_EINSTANCEFAULTYDISK, instance,
2455 "couldn't retrieve status for disk/%s on %s: %s",
2456 idx, nname, bdev_status)
2457 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2458 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2459 constants.CV_EINSTANCEFAULTYDISK, instance,
2460 "disk/%s on %s is faulty", idx, nname)
2462 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2463 """Verify if there are any unknown volumes in the cluster.
2465 The .os, .swap and backup volumes are ignored. All other volumes are
2466 reported as unknown.
2468 @type reserved: L{ganeti.utils.FieldSet}
2469 @param reserved: a FieldSet of reserved volume names
2472 for node, n_img in node_image.items():
2473 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2474 self.all_node_info[node].group != self.group_uuid):
2475 # skip non-healthy nodes
2477 for volume in n_img.volumes:
2478 test = ((node not in node_vol_should or
2479 volume not in node_vol_should[node]) and
2480 not reserved.Matches(volume))
2481 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2482 "volume %s is unknown", volume)
2484 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2485 """Verify N+1 Memory Resilience.
2487 Check that if one single node dies we can still start all the
2488 instances it was primary for.
2491 cluster_info = self.cfg.GetClusterInfo()
2492 for node, n_img in node_image.items():
2493 # This code checks that every node which is now listed as
2494 # secondary has enough memory to host all instances it is
2495 # supposed to should a single other node in the cluster fail.
2496 # FIXME: not ready for failover to an arbitrary node
2497 # FIXME: does not support file-backed instances
2498 # WARNING: we currently take into account down instances as well
2499 # as up ones, considering that even if they're down someone
2500 # might want to start them even in the event of a node failure.
2501 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2502 # we're skipping nodes marked offline and nodes in other groups from
2503 # the N+1 warning, since most likely we don't have good memory
2504 # infromation from them; we already list instances living on such
2505 # nodes, and that's enough warning
2507 #TODO(dynmem): also consider ballooning out other instances
2508 for prinode, instances in n_img.sbp.items():
2510 for instance in instances:
2511 bep = cluster_info.FillBE(instance_cfg[instance])
2512 if bep[constants.BE_AUTO_BALANCE]:
2513 needed_mem += bep[constants.BE_MINMEM]
2514 test = n_img.mfree < needed_mem
2515 self._ErrorIf(test, constants.CV_ENODEN1, node,
2516 "not enough memory to accomodate instance failovers"
2517 " should node %s fail (%dMiB needed, %dMiB available)",
2518 prinode, needed_mem, n_img.mfree)
2521 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2522 (files_all, files_opt, files_mc, files_vm)):
2523 """Verifies file checksums collected from all nodes.
2525 @param errorif: Callback for reporting errors
2526 @param nodeinfo: List of L{objects.Node} objects
2527 @param master_node: Name of master node
2528 @param all_nvinfo: RPC results
2531 # Define functions determining which nodes to consider for a file
2534 (files_mc, lambda node: (node.master_candidate or
2535 node.name == master_node)),
2536 (files_vm, lambda node: node.vm_capable),
2539 # Build mapping from filename to list of nodes which should have the file
2541 for (files, fn) in files2nodefn:
2543 filenodes = nodeinfo
2545 filenodes = filter(fn, nodeinfo)
2546 nodefiles.update((filename,
2547 frozenset(map(operator.attrgetter("name"), filenodes)))
2548 for filename in files)
2550 assert set(nodefiles) == (files_all | files_mc | files_vm)
2552 fileinfo = dict((filename, {}) for filename in nodefiles)
2553 ignore_nodes = set()
2555 for node in nodeinfo:
2557 ignore_nodes.add(node.name)
2560 nresult = all_nvinfo[node.name]
2562 if nresult.fail_msg or not nresult.payload:
2565 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2567 test = not (node_files and isinstance(node_files, dict))
2568 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2569 "Node did not return file checksum data")
2571 ignore_nodes.add(node.name)
2574 # Build per-checksum mapping from filename to nodes having it
2575 for (filename, checksum) in node_files.items():
2576 assert filename in nodefiles
2577 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2579 for (filename, checksums) in fileinfo.items():
2580 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2582 # Nodes having the file
2583 with_file = frozenset(node_name
2584 for nodes in fileinfo[filename].values()
2585 for node_name in nodes) - ignore_nodes
2587 expected_nodes = nodefiles[filename] - ignore_nodes
2589 # Nodes missing file
2590 missing_file = expected_nodes - with_file
2592 if filename in files_opt:
2594 errorif(missing_file and missing_file != expected_nodes,
2595 constants.CV_ECLUSTERFILECHECK, None,
2596 "File %s is optional, but it must exist on all or no"
2597 " nodes (not found on %s)",
2598 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2600 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2601 "File %s is missing from node(s) %s", filename,
2602 utils.CommaJoin(utils.NiceSort(missing_file)))
2604 # Warn if a node has a file it shouldn't
2605 unexpected = with_file - expected_nodes
2607 constants.CV_ECLUSTERFILECHECK, None,
2608 "File %s should not exist on node(s) %s",
2609 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2611 # See if there are multiple versions of the file
2612 test = len(checksums) > 1
2614 variants = ["variant %s on %s" %
2615 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2616 for (idx, (checksum, nodes)) in
2617 enumerate(sorted(checksums.items()))]
2621 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2622 "File %s found with %s different checksums (%s)",
2623 filename, len(checksums), "; ".join(variants))
2625 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2627 """Verifies and the node DRBD status.
2629 @type ninfo: L{objects.Node}
2630 @param ninfo: the node to check
2631 @param nresult: the remote results for the node
2632 @param instanceinfo: the dict of instances
2633 @param drbd_helper: the configured DRBD usermode helper
2634 @param drbd_map: the DRBD map as returned by
2635 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2639 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2642 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2643 test = (helper_result == None)
2644 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2645 "no drbd usermode helper returned")
2647 status, payload = helper_result
2649 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2650 "drbd usermode helper check unsuccessful: %s", payload)
2651 test = status and (payload != drbd_helper)
2652 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2653 "wrong drbd usermode helper: %s", payload)
2655 # compute the DRBD minors
2657 for minor, instance in drbd_map[node].items():
2658 test = instance not in instanceinfo
2659 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2660 "ghost instance '%s' in temporary DRBD map", instance)
2661 # ghost instance should not be running, but otherwise we
2662 # don't give double warnings (both ghost instance and
2663 # unallocated minor in use)
2665 node_drbd[minor] = (instance, False)
2667 instance = instanceinfo[instance]
2668 node_drbd[minor] = (instance.name,
2669 instance.admin_state == constants.ADMINST_UP)
2671 # and now check them
2672 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2673 test = not isinstance(used_minors, (tuple, list))
2674 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2675 "cannot parse drbd status file: %s", str(used_minors))
2677 # we cannot check drbd status
2680 for minor, (iname, must_exist) in node_drbd.items():
2681 test = minor not in used_minors and must_exist
2682 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2683 "drbd minor %d of instance %s is not active", minor, iname)
2684 for minor in used_minors:
2685 test = minor not in node_drbd
2686 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2687 "unallocated drbd minor %d is in use", minor)
2689 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2690 """Builds the node OS structures.
2692 @type ninfo: L{objects.Node}
2693 @param ninfo: the node to check
2694 @param nresult: the remote results for the node
2695 @param nimg: the node image object
2699 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2701 remote_os = nresult.get(constants.NV_OSLIST, None)
2702 test = (not isinstance(remote_os, list) or
2703 not compat.all(isinstance(v, list) and len(v) == 7
2704 for v in remote_os))
2706 _ErrorIf(test, constants.CV_ENODEOS, node,
2707 "node hasn't returned valid OS data")
2716 for (name, os_path, status, diagnose,
2717 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2719 if name not in os_dict:
2722 # parameters is a list of lists instead of list of tuples due to
2723 # JSON lacking a real tuple type, fix it:
2724 parameters = [tuple(v) for v in parameters]
2725 os_dict[name].append((os_path, status, diagnose,
2726 set(variants), set(parameters), set(api_ver)))
2728 nimg.oslist = os_dict
2730 def _VerifyNodeOS(self, ninfo, nimg, base):
2731 """Verifies the node OS list.
2733 @type ninfo: L{objects.Node}
2734 @param ninfo: the node to check
2735 @param nimg: the node image object
2736 @param base: the 'template' node we match against (e.g. from the master)
2740 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2742 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2744 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2745 for os_name, os_data in nimg.oslist.items():
2746 assert os_data, "Empty OS status for OS %s?!" % os_name
2747 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2748 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2749 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2750 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2751 "OS '%s' has multiple entries (first one shadows the rest): %s",
2752 os_name, utils.CommaJoin([v[0] for v in os_data]))
2753 # comparisons with the 'base' image
2754 test = os_name not in base.oslist
2755 _ErrorIf(test, constants.CV_ENODEOS, node,
2756 "Extra OS %s not present on reference node (%s)",
2760 assert base.oslist[os_name], "Base node has empty OS status?"
2761 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2763 # base OS is invalid, skipping
2765 for kind, a, b in [("API version", f_api, b_api),
2766 ("variants list", f_var, b_var),
2767 ("parameters", beautify_params(f_param),
2768 beautify_params(b_param))]:
2769 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2770 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2771 kind, os_name, base.name,
2772 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2774 # check any missing OSes
2775 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2776 _ErrorIf(missing, constants.CV_ENODEOS, node,
2777 "OSes present on reference node %s but missing on this node: %s",
2778 base.name, utils.CommaJoin(missing))
2780 def _VerifyOob(self, ninfo, nresult):
2781 """Verifies out of band functionality of a node.
2783 @type ninfo: L{objects.Node}
2784 @param ninfo: the node to check
2785 @param nresult: the remote results for the node
2789 # We just have to verify the paths on master and/or master candidates
2790 # as the oob helper is invoked on the master
2791 if ((ninfo.master_candidate or ninfo.master_capable) and
2792 constants.NV_OOB_PATHS in nresult):
2793 for path_result in nresult[constants.NV_OOB_PATHS]:
2794 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2796 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2797 """Verifies and updates the node volume data.
2799 This function will update a L{NodeImage}'s internal structures
2800 with data from the remote call.
2802 @type ninfo: L{objects.Node}
2803 @param ninfo: the node to check
2804 @param nresult: the remote results for the node
2805 @param nimg: the node image object
2806 @param vg_name: the configured VG name
2810 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2812 nimg.lvm_fail = True
2813 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2816 elif isinstance(lvdata, basestring):
2817 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2818 utils.SafeEncode(lvdata))
2819 elif not isinstance(lvdata, dict):
2820 _ErrorIf(True, constants.CV_ENODELVM, node,
2821 "rpc call to node failed (lvlist)")
2823 nimg.volumes = lvdata
2824 nimg.lvm_fail = False
2826 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2827 """Verifies and updates the node instance list.
2829 If the listing was successful, then updates this node's instance
2830 list. Otherwise, it marks the RPC call as failed for the instance
2833 @type ninfo: L{objects.Node}
2834 @param ninfo: the node to check
2835 @param nresult: the remote results for the node
2836 @param nimg: the node image object
2839 idata = nresult.get(constants.NV_INSTANCELIST, None)
2840 test = not isinstance(idata, list)
2841 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2842 "rpc call to node failed (instancelist): %s",
2843 utils.SafeEncode(str(idata)))
2845 nimg.hyp_fail = True
2847 nimg.instances = idata
2849 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2850 """Verifies and computes a node information map
2852 @type ninfo: L{objects.Node}
2853 @param ninfo: the node to check
2854 @param nresult: the remote results for the node
2855 @param nimg: the node image object
2856 @param vg_name: the configured VG name
2860 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2862 # try to read free memory (from the hypervisor)
2863 hv_info = nresult.get(constants.NV_HVINFO, None)
2864 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2865 _ErrorIf(test, constants.CV_ENODEHV, node,
2866 "rpc call to node failed (hvinfo)")
2869 nimg.mfree = int(hv_info["memory_free"])
2870 except (ValueError, TypeError):
2871 _ErrorIf(True, constants.CV_ENODERPC, node,
2872 "node returned invalid nodeinfo, check hypervisor")
2874 # FIXME: devise a free space model for file based instances as well
2875 if vg_name is not None:
2876 test = (constants.NV_VGLIST not in nresult or
2877 vg_name not in nresult[constants.NV_VGLIST])
2878 _ErrorIf(test, constants.CV_ENODELVM, node,
2879 "node didn't return data for the volume group '%s'"
2880 " - it is either missing or broken", vg_name)
2883 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2884 except (ValueError, TypeError):
2885 _ErrorIf(True, constants.CV_ENODERPC, node,
2886 "node returned invalid LVM info, check LVM status")
2888 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2889 """Gets per-disk status information for all instances.
2891 @type nodelist: list of strings
2892 @param nodelist: Node names
2893 @type node_image: dict of (name, L{objects.Node})
2894 @param node_image: Node objects
2895 @type instanceinfo: dict of (name, L{objects.Instance})
2896 @param instanceinfo: Instance objects
2897 @rtype: {instance: {node: [(succes, payload)]}}
2898 @return: a dictionary of per-instance dictionaries with nodes as
2899 keys and disk information as values; the disk information is a
2900 list of tuples (success, payload)
2903 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2906 node_disks_devonly = {}
2907 diskless_instances = set()
2908 diskless = constants.DT_DISKLESS
2910 for nname in nodelist:
2911 node_instances = list(itertools.chain(node_image[nname].pinst,
2912 node_image[nname].sinst))
2913 diskless_instances.update(inst for inst in node_instances
2914 if instanceinfo[inst].disk_template == diskless)
2915 disks = [(inst, disk)
2916 for inst in node_instances
2917 for disk in instanceinfo[inst].disks]
2920 # No need to collect data
2923 node_disks[nname] = disks
2925 # Creating copies as SetDiskID below will modify the objects and that can
2926 # lead to incorrect data returned from nodes
2927 devonly = [dev.Copy() for (_, dev) in disks]
2930 self.cfg.SetDiskID(dev, nname)
2932 node_disks_devonly[nname] = devonly
2934 assert len(node_disks) == len(node_disks_devonly)
2936 # Collect data from all nodes with disks
2937 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2940 assert len(result) == len(node_disks)
2944 for (nname, nres) in result.items():
2945 disks = node_disks[nname]
2948 # No data from this node
2949 data = len(disks) * [(False, "node offline")]
2952 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2953 "while getting disk information: %s", msg)
2955 # No data from this node
2956 data = len(disks) * [(False, msg)]
2959 for idx, i in enumerate(nres.payload):
2960 if isinstance(i, (tuple, list)) and len(i) == 2:
2963 logging.warning("Invalid result from node %s, entry %d: %s",
2965 data.append((False, "Invalid result from the remote node"))
2967 for ((inst, _), status) in zip(disks, data):
2968 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2970 # Add empty entries for diskless instances.
2971 for inst in diskless_instances:
2972 assert inst not in instdisk
2975 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2976 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2977 compat.all(isinstance(s, (tuple, list)) and
2978 len(s) == 2 for s in statuses)
2979 for inst, nnames in instdisk.items()
2980 for nname, statuses in nnames.items())
2981 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2986 def _SshNodeSelector(group_uuid, all_nodes):
2987 """Create endless iterators for all potential SSH check hosts.
2990 nodes = [node for node in all_nodes
2991 if (node.group != group_uuid and
2993 keyfunc = operator.attrgetter("group")
2995 return map(itertools.cycle,
2996 [sorted(map(operator.attrgetter("name"), names))
2997 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3001 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3002 """Choose which nodes should talk to which other nodes.
3004 We will make nodes contact all nodes in their group, and one node from
3007 @warning: This algorithm has a known issue if one node group is much
3008 smaller than others (e.g. just one node). In such a case all other
3009 nodes will talk to the single node.
3012 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3013 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3015 return (online_nodes,
3016 dict((name, sorted([i.next() for i in sel]))
3017 for name in online_nodes))
3019 def BuildHooksEnv(self):
3022 Cluster-Verify hooks just ran in the post phase and their failure makes
3023 the output be logged in the verify output and the verification to fail.
3027 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3030 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3031 for node in self.my_node_info.values())
3035 def BuildHooksNodes(self):
3036 """Build hooks nodes.
3039 return ([], self.my_node_names)
3041 def Exec(self, feedback_fn):
3042 """Verify integrity of the node group, performing various test on nodes.
3045 # This method has too many local variables. pylint: disable=R0914
3046 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3048 if not self.my_node_names:
3050 feedback_fn("* Empty node group, skipping verification")
3054 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3055 verbose = self.op.verbose
3056 self._feedback_fn = feedback_fn
3058 vg_name = self.cfg.GetVGName()
3059 drbd_helper = self.cfg.GetDRBDHelper()
3060 cluster = self.cfg.GetClusterInfo()
3061 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3062 hypervisors = cluster.enabled_hypervisors
3063 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3065 i_non_redundant = [] # Non redundant instances
3066 i_non_a_balanced = [] # Non auto-balanced instances
3067 i_offline = 0 # Count of offline instances
3068 n_offline = 0 # Count of offline nodes
3069 n_drained = 0 # Count of nodes being drained
3070 node_vol_should = {}
3072 # FIXME: verify OS list
3075 filemap = _ComputeAncillaryFiles(cluster, False)
3077 # do local checksums
3078 master_node = self.master_node = self.cfg.GetMasterNode()
3079 master_ip = self.cfg.GetMasterIP()
3081 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3084 if self.cfg.GetUseExternalMipScript():
3085 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3087 node_verify_param = {
3088 constants.NV_FILELIST:
3089 utils.UniqueSequence(filename
3090 for files in filemap
3091 for filename in files),
3092 constants.NV_NODELIST:
3093 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3094 self.all_node_info.values()),
3095 constants.NV_HYPERVISOR: hypervisors,
3096 constants.NV_HVPARAMS:
3097 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3098 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3099 for node in node_data_list
3100 if not node.offline],
3101 constants.NV_INSTANCELIST: hypervisors,
3102 constants.NV_VERSION: None,
3103 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3104 constants.NV_NODESETUP: None,
3105 constants.NV_TIME: None,
3106 constants.NV_MASTERIP: (master_node, master_ip),
3107 constants.NV_OSLIST: None,
3108 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3109 constants.NV_USERSCRIPTS: user_scripts,
3112 if vg_name is not None:
3113 node_verify_param[constants.NV_VGLIST] = None
3114 node_verify_param[constants.NV_LVLIST] = vg_name
3115 node_verify_param[constants.NV_PVLIST] = [vg_name]
3116 node_verify_param[constants.NV_DRBDLIST] = None
3119 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3122 # FIXME: this needs to be changed per node-group, not cluster-wide
3124 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3125 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3126 bridges.add(default_nicpp[constants.NIC_LINK])
3127 for instance in self.my_inst_info.values():
3128 for nic in instance.nics:
3129 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3130 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3131 bridges.add(full_nic[constants.NIC_LINK])
3134 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3136 # Build our expected cluster state
3137 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3139 vm_capable=node.vm_capable))
3140 for node in node_data_list)
3144 for node in self.all_node_info.values():
3145 path = _SupportsOob(self.cfg, node)
3146 if path and path not in oob_paths:
3147 oob_paths.append(path)
3150 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3152 for instance in self.my_inst_names:
3153 inst_config = self.my_inst_info[instance]
3155 for nname in inst_config.all_nodes:
3156 if nname not in node_image:
3157 gnode = self.NodeImage(name=nname)
3158 gnode.ghost = (nname not in self.all_node_info)
3159 node_image[nname] = gnode
3161 inst_config.MapLVsByNode(node_vol_should)
3163 pnode = inst_config.primary_node
3164 node_image[pnode].pinst.append(instance)
3166 for snode in inst_config.secondary_nodes:
3167 nimg = node_image[snode]
3168 nimg.sinst.append(instance)
3169 if pnode not in nimg.sbp:
3170 nimg.sbp[pnode] = []
3171 nimg.sbp[pnode].append(instance)
3173 # At this point, we have the in-memory data structures complete,
3174 # except for the runtime information, which we'll gather next
3176 # Due to the way our RPC system works, exact response times cannot be
3177 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3178 # time before and after executing the request, we can at least have a time
3180 nvinfo_starttime = time.time()
3181 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3183 self.cfg.GetClusterName())
3184 nvinfo_endtime = time.time()
3186 if self.extra_lv_nodes and vg_name is not None:
3188 self.rpc.call_node_verify(self.extra_lv_nodes,
3189 {constants.NV_LVLIST: vg_name},
3190 self.cfg.GetClusterName())
3192 extra_lv_nvinfo = {}
3194 all_drbd_map = self.cfg.ComputeDRBDMap()
3196 feedback_fn("* Gathering disk information (%s nodes)" %
3197 len(self.my_node_names))
3198 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3201 feedback_fn("* Verifying configuration file consistency")
3203 # If not all nodes are being checked, we need to make sure the master node
3204 # and a non-checked vm_capable node are in the list.
3205 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3207 vf_nvinfo = all_nvinfo.copy()
3208 vf_node_info = list(self.my_node_info.values())
3209 additional_nodes = []
3210 if master_node not in self.my_node_info:
3211 additional_nodes.append(master_node)
3212 vf_node_info.append(self.all_node_info[master_node])
3213 # Add the first vm_capable node we find which is not included
3214 for node in absent_nodes:
3215 nodeinfo = self.all_node_info[node]
3216 if nodeinfo.vm_capable and not nodeinfo.offline:
3217 additional_nodes.append(node)
3218 vf_node_info.append(self.all_node_info[node])
3220 key = constants.NV_FILELIST
3221 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3222 {key: node_verify_param[key]},
3223 self.cfg.GetClusterName()))
3225 vf_nvinfo = all_nvinfo
3226 vf_node_info = self.my_node_info.values()
3228 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3230 feedback_fn("* Verifying node status")
3234 for node_i in node_data_list:
3236 nimg = node_image[node]
3240 feedback_fn("* Skipping offline node %s" % (node,))
3244 if node == master_node:
3246 elif node_i.master_candidate:
3247 ntype = "master candidate"
3248 elif node_i.drained:
3254 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3256 msg = all_nvinfo[node].fail_msg
3257 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3260 nimg.rpc_fail = True
3263 nresult = all_nvinfo[node].payload
3265 nimg.call_ok = self._VerifyNode(node_i, nresult)
3266 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3267 self._VerifyNodeNetwork(node_i, nresult)
3268 self._VerifyNodeUserScripts(node_i, nresult)
3269 self._VerifyOob(node_i, nresult)
3272 self._VerifyNodeLVM(node_i, nresult, vg_name)
3273 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3276 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3277 self._UpdateNodeInstances(node_i, nresult, nimg)
3278 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3279 self._UpdateNodeOS(node_i, nresult, nimg)
3281 if not nimg.os_fail:
3282 if refos_img is None:
3284 self._VerifyNodeOS(node_i, nimg, refos_img)
3285 self._VerifyNodeBridges(node_i, nresult, bridges)
3287 # Check whether all running instancies are primary for the node. (This
3288 # can no longer be done from _VerifyInstance below, since some of the
3289 # wrong instances could be from other node groups.)
3290 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3292 for inst in non_primary_inst:
3293 # FIXME: investigate best way to handle offline insts
3294 if inst.admin_state == constants.ADMINST_OFFLINE:
3296 feedback_fn("* Skipping offline instance %s" % inst.name)
3299 test = inst in self.all_inst_info
3300 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3301 "instance should not run on node %s", node_i.name)
3302 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3303 "node is running unknown instance %s", inst)
3305 for node, result in extra_lv_nvinfo.items():
3306 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3307 node_image[node], vg_name)
3309 feedback_fn("* Verifying instance status")
3310 for instance in self.my_inst_names:
3312 feedback_fn("* Verifying instance %s" % instance)
3313 inst_config = self.my_inst_info[instance]
3314 self._VerifyInstance(instance, inst_config, node_image,
3316 inst_nodes_offline = []
3318 pnode = inst_config.primary_node
3319 pnode_img = node_image[pnode]
3320 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3321 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3322 " primary node failed", instance)
3324 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3326 constants.CV_EINSTANCEBADNODE, instance,
3327 "instance is marked as running and lives on offline node %s",
3328 inst_config.primary_node)
3330 # If the instance is non-redundant we cannot survive losing its primary
3331 # node, so we are not N+1 compliant. On the other hand we have no disk
3332 # templates with more than one secondary so that situation is not well
3334 # FIXME: does not support file-backed instances
3335 if not inst_config.secondary_nodes:
3336 i_non_redundant.append(instance)
3338 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3339 constants.CV_EINSTANCELAYOUT,
3340 instance, "instance has multiple secondary nodes: %s",
3341 utils.CommaJoin(inst_config.secondary_nodes),
3342 code=self.ETYPE_WARNING)
3344 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3345 pnode = inst_config.primary_node
3346 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3347 instance_groups = {}
3349 for node in instance_nodes:
3350 instance_groups.setdefault(self.all_node_info[node].group,
3354 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3355 # Sort so that we always list the primary node first.
3356 for group, nodes in sorted(instance_groups.items(),
3357 key=lambda (_, nodes): pnode in nodes,
3360 self._ErrorIf(len(instance_groups) > 1,
3361 constants.CV_EINSTANCESPLITGROUPS,
3362 instance, "instance has primary and secondary nodes in"
3363 " different groups: %s", utils.CommaJoin(pretty_list),
3364 code=self.ETYPE_WARNING)
3366 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3367 i_non_a_balanced.append(instance)
3369 for snode in inst_config.secondary_nodes:
3370 s_img = node_image[snode]
3371 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3372 snode, "instance %s, connection to secondary node failed",
3376 inst_nodes_offline.append(snode)
3378 # warn that the instance lives on offline nodes
3379 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3380 "instance has offline secondary node(s) %s",
3381 utils.CommaJoin(inst_nodes_offline))
3382 # ... or ghost/non-vm_capable nodes
3383 for node in inst_config.all_nodes:
3384 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3385 instance, "instance lives on ghost node %s", node)
3386 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3387 instance, "instance lives on non-vm_capable node %s", node)
3389 feedback_fn("* Verifying orphan volumes")
3390 reserved = utils.FieldSet(*cluster.reserved_lvs)
3392 # We will get spurious "unknown volume" warnings if any node of this group
3393 # is secondary for an instance whose primary is in another group. To avoid
3394 # them, we find these instances and add their volumes to node_vol_should.
3395 for inst in self.all_inst_info.values():
3396 for secondary in inst.secondary_nodes:
3397 if (secondary in self.my_node_info
3398 and inst.name not in self.my_inst_info):
3399 inst.MapLVsByNode(node_vol_should)
3402 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3404 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3405 feedback_fn("* Verifying N+1 Memory redundancy")
3406 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3408 feedback_fn("* Other Notes")
3410 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3411 % len(i_non_redundant))
3413 if i_non_a_balanced:
3414 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3415 % len(i_non_a_balanced))
3418 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3421 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3424 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3428 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3429 """Analyze the post-hooks' result
3431 This method analyses the hook result, handles it, and sends some
3432 nicely-formatted feedback back to the user.
3434 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3435 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3436 @param hooks_results: the results of the multi-node hooks rpc call
3437 @param feedback_fn: function used send feedback back to the caller
3438 @param lu_result: previous Exec result
3439 @return: the new Exec result, based on the previous result
3443 # We only really run POST phase hooks, only for non-empty groups,
3444 # and are only interested in their results
3445 if not self.my_node_names:
3448 elif phase == constants.HOOKS_PHASE_POST:
3449 # Used to change hooks' output to proper indentation
3450 feedback_fn("* Hooks Results")
3451 assert hooks_results, "invalid result from hooks"
3453 for node_name in hooks_results:
3454 res = hooks_results[node_name]
3456 test = msg and not res.offline
3457 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3458 "Communication failure in hooks execution: %s", msg)
3459 if res.offline or msg:
3460 # No need to investigate payload if node is offline or gave
3463 for script, hkr, output in res.payload:
3464 test = hkr == constants.HKR_FAIL
3465 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3466 "Script %s failed, output:", script)
3468 output = self._HOOKS_INDENT_RE.sub(" ", output)
3469 feedback_fn("%s" % output)
3475 class LUClusterVerifyDisks(NoHooksLU):
3476 """Verifies the cluster disks status.
3481 def ExpandNames(self):
3482 self.share_locks = _ShareAll()
3483 self.needed_locks = {
3484 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3487 def Exec(self, feedback_fn):
3488 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3490 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3491 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3492 for group in group_names])
3495 class LUGroupVerifyDisks(NoHooksLU):
3496 """Verifies the status of all disks in a node group.
3501 def ExpandNames(self):
3502 # Raises errors.OpPrereqError on its own if group can't be found
3503 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3505 self.share_locks = _ShareAll()
3506 self.needed_locks = {
3507 locking.LEVEL_INSTANCE: [],
3508 locking.LEVEL_NODEGROUP: [],
3509 locking.LEVEL_NODE: [],
3512 def DeclareLocks(self, level):
3513 if level == locking.LEVEL_INSTANCE:
3514 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3516 # Lock instances optimistically, needs verification once node and group
3517 # locks have been acquired
3518 self.needed_locks[locking.LEVEL_INSTANCE] = \
3519 self.cfg.GetNodeGroupInstances(self.group_uuid)
3521 elif level == locking.LEVEL_NODEGROUP:
3522 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3524 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3525 set([self.group_uuid] +
3526 # Lock all groups used by instances optimistically; this requires
3527 # going via the node before it's locked, requiring verification
3530 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3531 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3533 elif level == locking.LEVEL_NODE:
3534 # This will only lock the nodes in the group to be verified which contain
3536 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3537 self._LockInstancesNodes()
3539 # Lock all nodes in group to be verified
3540 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3541 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3542 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3544 def CheckPrereq(self):
3545 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3546 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3547 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3549 assert self.group_uuid in owned_groups
3551 # Check if locked instances are still correct
3552 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3554 # Get instance information
3555 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3557 # Check if node groups for locked instances are still correct
3558 _CheckInstancesNodeGroups(self.cfg, self.instances,
3559 owned_groups, owned_nodes, self.group_uuid)
3561 def Exec(self, feedback_fn):
3562 """Verify integrity of cluster disks.
3564 @rtype: tuple of three items
3565 @return: a tuple of (dict of node-to-node_error, list of instances
3566 which need activate-disks, dict of instance: (node, volume) for
3571 res_instances = set()
3574 nv_dict = _MapInstanceDisksToNodes([inst
3575 for inst in self.instances.values()
3576 if inst.admin_state == constants.ADMINST_UP])
3579 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3580 set(self.cfg.GetVmCapableNodeList()))
3582 node_lvs = self.rpc.call_lv_list(nodes, [])
3584 for (node, node_res) in node_lvs.items():
3585 if node_res.offline:
3588 msg = node_res.fail_msg
3590 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3591 res_nodes[node] = msg
3594 for lv_name, (_, _, lv_online) in node_res.payload.items():
3595 inst = nv_dict.pop((node, lv_name), None)
3596 if not (lv_online or inst is None):
3597 res_instances.add(inst)
3599 # any leftover items in nv_dict are missing LVs, let's arrange the data
3601 for key, inst in nv_dict.iteritems():
3602 res_missing.setdefault(inst, []).append(list(key))
3604 return (res_nodes, list(res_instances), res_missing)
3607 class LUClusterRepairDiskSizes(NoHooksLU):
3608 """Verifies the cluster disks sizes.
3613 def ExpandNames(self):
3614 if self.op.instances:
3615 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3616 self.needed_locks = {
3617 locking.LEVEL_NODE_RES: [],
3618 locking.LEVEL_INSTANCE: self.wanted_names,
3620 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3622 self.wanted_names = None
3623 self.needed_locks = {
3624 locking.LEVEL_NODE_RES: locking.ALL_SET,
3625 locking.LEVEL_INSTANCE: locking.ALL_SET,
3627 self.share_locks = {
3628 locking.LEVEL_NODE_RES: 1,
3629 locking.LEVEL_INSTANCE: 0,
3632 def DeclareLocks(self, level):
3633 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3634 self._LockInstancesNodes(primary_only=True, level=level)
3636 def CheckPrereq(self):
3637 """Check prerequisites.
3639 This only checks the optional instance list against the existing names.
3642 if self.wanted_names is None:
3643 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3645 self.wanted_instances = \
3646 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3648 def _EnsureChildSizes(self, disk):
3649 """Ensure children of the disk have the needed disk size.
3651 This is valid mainly for DRBD8 and fixes an issue where the
3652 children have smaller disk size.
3654 @param disk: an L{ganeti.objects.Disk} object
3657 if disk.dev_type == constants.LD_DRBD8:
3658 assert disk.children, "Empty children for DRBD8?"
3659 fchild = disk.children[0]
3660 mismatch = fchild.size < disk.size
3662 self.LogInfo("Child disk has size %d, parent %d, fixing",
3663 fchild.size, disk.size)
3664 fchild.size = disk.size
3666 # and we recurse on this child only, not on the metadev
3667 return self._EnsureChildSizes(fchild) or mismatch
3671 def Exec(self, feedback_fn):
3672 """Verify the size of cluster disks.
3675 # TODO: check child disks too
3676 # TODO: check differences in size between primary/secondary nodes
3678 for instance in self.wanted_instances:
3679 pnode = instance.primary_node
3680 if pnode not in per_node_disks:
3681 per_node_disks[pnode] = []
3682 for idx, disk in enumerate(instance.disks):
3683 per_node_disks[pnode].append((instance, idx, disk))
3685 assert not (frozenset(per_node_disks.keys()) -
3686 self.owned_locks(locking.LEVEL_NODE_RES)), \
3687 "Not owning correct locks"
3688 assert not self.owned_locks(locking.LEVEL_NODE)
3691 for node, dskl in per_node_disks.items():
3692 newl = [v[2].Copy() for v in dskl]
3694 self.cfg.SetDiskID(dsk, node)
3695 result = self.rpc.call_blockdev_getsize(node, newl)
3697 self.LogWarning("Failure in blockdev_getsize call to node"
3698 " %s, ignoring", node)
3700 if len(result.payload) != len(dskl):
3701 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3702 " result.payload=%s", node, len(dskl), result.payload)
3703 self.LogWarning("Invalid result from node %s, ignoring node results",
3706 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3708 self.LogWarning("Disk %d of instance %s did not return size"
3709 " information, ignoring", idx, instance.name)
3711 if not isinstance(size, (int, long)):
3712 self.LogWarning("Disk %d of instance %s did not return valid"
3713 " size information, ignoring", idx, instance.name)
3716 if size != disk.size:
3717 self.LogInfo("Disk %d of instance %s has mismatched size,"
3718 " correcting: recorded %d, actual %d", idx,
3719 instance.name, disk.size, size)
3721 self.cfg.Update(instance, feedback_fn)
3722 changed.append((instance.name, idx, size))
3723 if self._EnsureChildSizes(disk):
3724 self.cfg.Update(instance, feedback_fn)
3725 changed.append((instance.name, idx, disk.size))
3729 class LUClusterRename(LogicalUnit):
3730 """Rename the cluster.
3733 HPATH = "cluster-rename"
3734 HTYPE = constants.HTYPE_CLUSTER
3736 def BuildHooksEnv(self):
3741 "OP_TARGET": self.cfg.GetClusterName(),
3742 "NEW_NAME": self.op.name,
3745 def BuildHooksNodes(self):
3746 """Build hooks nodes.
3749 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3751 def CheckPrereq(self):
3752 """Verify that the passed name is a valid one.
3755 hostname = netutils.GetHostname(name=self.op.name,
3756 family=self.cfg.GetPrimaryIPFamily())
3758 new_name = hostname.name
3759 self.ip = new_ip = hostname.ip
3760 old_name = self.cfg.GetClusterName()
3761 old_ip = self.cfg.GetMasterIP()
3762 if new_name == old_name and new_ip == old_ip:
3763 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3764 " cluster has changed",
3766 if new_ip != old_ip:
3767 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3768 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3769 " reachable on the network" %
3770 new_ip, errors.ECODE_NOTUNIQUE)
3772 self.op.name = new_name
3774 def Exec(self, feedback_fn):
3775 """Rename the cluster.
3778 clustername = self.op.name
3781 # shutdown the master IP
3782 master_params = self.cfg.GetMasterNetworkParameters()
3783 ems = self.cfg.GetUseExternalMipScript()
3784 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3786 result.Raise("Could not disable the master role")
3789 cluster = self.cfg.GetClusterInfo()
3790 cluster.cluster_name = clustername
3791 cluster.master_ip = new_ip
3792 self.cfg.Update(cluster, feedback_fn)
3794 # update the known hosts file
3795 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3796 node_list = self.cfg.GetOnlineNodeList()
3798 node_list.remove(master_params.name)
3801 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3803 master_params.ip = new_ip
3804 result = self.rpc.call_node_activate_master_ip(master_params.name,
3806 msg = result.fail_msg
3808 self.LogWarning("Could not re-enable the master role on"
3809 " the master, please restart manually: %s", msg)
3814 def _ValidateNetmask(cfg, netmask):
3815 """Checks if a netmask is valid.
3817 @type cfg: L{config.ConfigWriter}
3818 @param cfg: The cluster configuration
3820 @param netmask: the netmask to be verified
3821 @raise errors.OpPrereqError: if the validation fails
3824 ip_family = cfg.GetPrimaryIPFamily()
3826 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3827 except errors.ProgrammerError:
3828 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3830 if not ipcls.ValidateNetmask(netmask):
3831 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3835 class LUClusterSetParams(LogicalUnit):
3836 """Change the parameters of the cluster.
3839 HPATH = "cluster-modify"
3840 HTYPE = constants.HTYPE_CLUSTER
3843 def CheckArguments(self):
3847 if self.op.uid_pool:
3848 uidpool.CheckUidPool(self.op.uid_pool)
3850 if self.op.add_uids:
3851 uidpool.CheckUidPool(self.op.add_uids)
3853 if self.op.remove_uids:
3854 uidpool.CheckUidPool(self.op.remove_uids)
3856 if self.op.master_netmask is not None:
3857 _ValidateNetmask(self.cfg, self.op.master_netmask)
3859 if self.op.diskparams:
3860 for dt_params in self.op.diskparams.values():
3861 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3863 def ExpandNames(self):
3864 # FIXME: in the future maybe other cluster params won't require checking on
3865 # all nodes to be modified.
3866 self.needed_locks = {
3867 locking.LEVEL_NODE: locking.ALL_SET,
3868 locking.LEVEL_INSTANCE: locking.ALL_SET,
3869 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3871 self.share_locks = {
3872 locking.LEVEL_NODE: 1,
3873 locking.LEVEL_INSTANCE: 1,
3874 locking.LEVEL_NODEGROUP: 1,
3877 def BuildHooksEnv(self):
3882 "OP_TARGET": self.cfg.GetClusterName(),
3883 "NEW_VG_NAME": self.op.vg_name,
3886 def BuildHooksNodes(self):
3887 """Build hooks nodes.
3890 mn = self.cfg.GetMasterNode()
3893 def CheckPrereq(self):
3894 """Check prerequisites.
3896 This checks whether the given params don't conflict and
3897 if the given volume group is valid.
3900 if self.op.vg_name is not None and not self.op.vg_name:
3901 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3902 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3903 " instances exist", errors.ECODE_INVAL)
3905 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3906 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3907 raise errors.OpPrereqError("Cannot disable drbd helper while"
3908 " drbd-based instances exist",
3911 node_list = self.owned_locks(locking.LEVEL_NODE)
3913 # if vg_name not None, checks given volume group on all nodes
3915 vglist = self.rpc.call_vg_list(node_list)
3916 for node in node_list:
3917 msg = vglist[node].fail_msg
3919 # ignoring down node
3920 self.LogWarning("Error while gathering data on node %s"
3921 " (ignoring node): %s", node, msg)
3923 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3925 constants.MIN_VG_SIZE)
3927 raise errors.OpPrereqError("Error on node '%s': %s" %
3928 (node, vgstatus), errors.ECODE_ENVIRON)
3930 if self.op.drbd_helper:
3931 # checks given drbd helper on all nodes
3932 helpers = self.rpc.call_drbd_helper(node_list)
3933 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3935 self.LogInfo("Not checking drbd helper on offline node %s", node)
3937 msg = helpers[node].fail_msg
3939 raise errors.OpPrereqError("Error checking drbd helper on node"
3940 " '%s': %s" % (node, msg),
3941 errors.ECODE_ENVIRON)
3942 node_helper = helpers[node].payload
3943 if node_helper != self.op.drbd_helper:
3944 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3945 (node, node_helper), errors.ECODE_ENVIRON)
3947 self.cluster = cluster = self.cfg.GetClusterInfo()
3948 # validate params changes
3949 if self.op.beparams:
3950 objects.UpgradeBeParams(self.op.beparams)
3951 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3952 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3954 if self.op.ndparams:
3955 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3956 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3958 # TODO: we need a more general way to handle resetting
3959 # cluster-level parameters to default values
3960 if self.new_ndparams["oob_program"] == "":
3961 self.new_ndparams["oob_program"] = \
3962 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3964 if self.op.hv_state:
3965 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3966 self.cluster.hv_state_static)
3967 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3968 for hv, values in new_hv_state.items())
3970 if self.op.disk_state:
3971 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3972 self.cluster.disk_state_static)
3973 self.new_disk_state = \
3974 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3975 for name, values in svalues.items()))
3976 for storage, svalues in new_disk_state.items())
3979 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3982 all_instances = self.cfg.GetAllInstancesInfo().values()
3984 for group in self.cfg.GetAllNodeGroupsInfo().values():
3985 instances = frozenset([inst for inst in all_instances
3986 if compat.any(node in group.members
3987 for node in inst.all_nodes)])
3988 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3989 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3991 new_ipolicy, instances)
3993 violations.update(new)
3996 self.LogWarning("After the ipolicy change the following instances"
3997 " violate them: %s",
3998 utils.CommaJoin(utils.NiceSort(violations)))
4000 if self.op.nicparams:
4001 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4002 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4003 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4006 # check all instances for consistency
4007 for instance in self.cfg.GetAllInstancesInfo().values():
4008 for nic_idx, nic in enumerate(instance.nics):
4009 params_copy = copy.deepcopy(nic.nicparams)
4010 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4012 # check parameter syntax
4014 objects.NIC.CheckParameterSyntax(params_filled)
4015 except errors.ConfigurationError, err:
4016 nic_errors.append("Instance %s, nic/%d: %s" %
4017 (instance.name, nic_idx, err))
4019 # if we're moving instances to routed, check that they have an ip
4020 target_mode = params_filled[constants.NIC_MODE]
4021 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4022 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4023 " address" % (instance.name, nic_idx))
4025 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4026 "\n".join(nic_errors))
4028 # hypervisor list/parameters
4029 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4030 if self.op.hvparams:
4031 for hv_name, hv_dict in self.op.hvparams.items():
4032 if hv_name not in self.new_hvparams:
4033 self.new_hvparams[hv_name] = hv_dict
4035 self.new_hvparams[hv_name].update(hv_dict)
4037 # disk template parameters
4038 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4039 if self.op.diskparams:
4040 for dt_name, dt_params in self.op.diskparams.items():
4041 if dt_name not in self.op.diskparams:
4042 self.new_diskparams[dt_name] = dt_params
4044 self.new_diskparams[dt_name].update(dt_params)
4046 # os hypervisor parameters
4047 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4049 for os_name, hvs in self.op.os_hvp.items():
4050 if os_name not in self.new_os_hvp:
4051 self.new_os_hvp[os_name] = hvs
4053 for hv_name, hv_dict in hvs.items():
4054 if hv_name not in self.new_os_hvp[os_name]:
4055 self.new_os_hvp[os_name][hv_name] = hv_dict
4057 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4060 self.new_osp = objects.FillDict(cluster.osparams, {})
4061 if self.op.osparams:
4062 for os_name, osp in self.op.osparams.items():
4063 if os_name not in self.new_osp:
4064 self.new_osp[os_name] = {}
4066 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4069 if not self.new_osp[os_name]:
4070 # we removed all parameters
4071 del self.new_osp[os_name]
4073 # check the parameter validity (remote check)
4074 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4075 os_name, self.new_osp[os_name])
4077 # changes to the hypervisor list
4078 if self.op.enabled_hypervisors is not None:
4079 self.hv_list = self.op.enabled_hypervisors
4080 for hv in self.hv_list:
4081 # if the hypervisor doesn't already exist in the cluster
4082 # hvparams, we initialize it to empty, and then (in both
4083 # cases) we make sure to fill the defaults, as we might not
4084 # have a complete defaults list if the hypervisor wasn't
4086 if hv not in new_hvp:
4088 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4089 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4091 self.hv_list = cluster.enabled_hypervisors
4093 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4094 # either the enabled list has changed, or the parameters have, validate
4095 for hv_name, hv_params in self.new_hvparams.items():
4096 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4097 (self.op.enabled_hypervisors and
4098 hv_name in self.op.enabled_hypervisors)):
4099 # either this is a new hypervisor, or its parameters have changed
4100 hv_class = hypervisor.GetHypervisor(hv_name)
4101 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4102 hv_class.CheckParameterSyntax(hv_params)
4103 _CheckHVParams(self, node_list, hv_name, hv_params)
4106 # no need to check any newly-enabled hypervisors, since the
4107 # defaults have already been checked in the above code-block
4108 for os_name, os_hvp in self.new_os_hvp.items():
4109 for hv_name, hv_params in os_hvp.items():
4110 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4111 # we need to fill in the new os_hvp on top of the actual hv_p
4112 cluster_defaults = self.new_hvparams.get(hv_name, {})
4113 new_osp = objects.FillDict(cluster_defaults, hv_params)
4114 hv_class = hypervisor.GetHypervisor(hv_name)
4115 hv_class.CheckParameterSyntax(new_osp)
4116 _CheckHVParams(self, node_list, hv_name, new_osp)
4118 if self.op.default_iallocator:
4119 alloc_script = utils.FindFile(self.op.default_iallocator,
4120 constants.IALLOCATOR_SEARCH_PATH,
4122 if alloc_script is None:
4123 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4124 " specified" % self.op.default_iallocator,
4127 def Exec(self, feedback_fn):
4128 """Change the parameters of the cluster.
4131 if self.op.vg_name is not None:
4132 new_volume = self.op.vg_name
4135 if new_volume != self.cfg.GetVGName():
4136 self.cfg.SetVGName(new_volume)
4138 feedback_fn("Cluster LVM configuration already in desired"
4139 " state, not changing")
4140 if self.op.drbd_helper is not None:
4141 new_helper = self.op.drbd_helper
4144 if new_helper != self.cfg.GetDRBDHelper():
4145 self.cfg.SetDRBDHelper(new_helper)
4147 feedback_fn("Cluster DRBD helper already in desired state,"
4149 if self.op.hvparams:
4150 self.cluster.hvparams = self.new_hvparams
4152 self.cluster.os_hvp = self.new_os_hvp
4153 if self.op.enabled_hypervisors is not None:
4154 self.cluster.hvparams = self.new_hvparams
4155 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4156 if self.op.beparams:
4157 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4158 if self.op.nicparams:
4159 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4161 self.cluster.ipolicy = self.new_ipolicy
4162 if self.op.osparams:
4163 self.cluster.osparams = self.new_osp
4164 if self.op.ndparams:
4165 self.cluster.ndparams = self.new_ndparams
4166 if self.op.diskparams:
4167 self.cluster.diskparams = self.new_diskparams
4168 if self.op.hv_state:
4169 self.cluster.hv_state_static = self.new_hv_state
4170 if self.op.disk_state:
4171 self.cluster.disk_state_static = self.new_disk_state
4173 if self.op.candidate_pool_size is not None:
4174 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4175 # we need to update the pool size here, otherwise the save will fail
4176 _AdjustCandidatePool(self, [])
4178 if self.op.maintain_node_health is not None:
4179 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4180 feedback_fn("Note: CONFD was disabled at build time, node health"
4181 " maintenance is not useful (still enabling it)")
4182 self.cluster.maintain_node_health = self.op.maintain_node_health
4184 if self.op.prealloc_wipe_disks is not None:
4185 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4187 if self.op.add_uids is not None:
4188 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4190 if self.op.remove_uids is not None:
4191 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4193 if self.op.uid_pool is not None:
4194 self.cluster.uid_pool = self.op.uid_pool
4196 if self.op.default_iallocator is not None:
4197 self.cluster.default_iallocator = self.op.default_iallocator
4199 if self.op.reserved_lvs is not None:
4200 self.cluster.reserved_lvs = self.op.reserved_lvs
4202 if self.op.use_external_mip_script is not None:
4203 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4205 def helper_os(aname, mods, desc):
4207 lst = getattr(self.cluster, aname)
4208 for key, val in mods:
4209 if key == constants.DDM_ADD:
4211 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4214 elif key == constants.DDM_REMOVE:
4218 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4220 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4222 if self.op.hidden_os:
4223 helper_os("hidden_os", self.op.hidden_os, "hidden")
4225 if self.op.blacklisted_os:
4226 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4228 if self.op.master_netdev:
4229 master_params = self.cfg.GetMasterNetworkParameters()
4230 ems = self.cfg.GetUseExternalMipScript()
4231 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4232 self.cluster.master_netdev)
4233 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4235 result.Raise("Could not disable the master ip")
4236 feedback_fn("Changing master_netdev from %s to %s" %
4237 (master_params.netdev, self.op.master_netdev))
4238 self.cluster.master_netdev = self.op.master_netdev
4240 if self.op.master_netmask:
4241 master_params = self.cfg.GetMasterNetworkParameters()
4242 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4243 result = self.rpc.call_node_change_master_netmask(master_params.name,
4244 master_params.netmask,
4245 self.op.master_netmask,
4247 master_params.netdev)
4249 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4252 self.cluster.master_netmask = self.op.master_netmask
4254 self.cfg.Update(self.cluster, feedback_fn)
4256 if self.op.master_netdev:
4257 master_params = self.cfg.GetMasterNetworkParameters()
4258 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4259 self.op.master_netdev)
4260 ems = self.cfg.GetUseExternalMipScript()
4261 result = self.rpc.call_node_activate_master_ip(master_params.name,
4264 self.LogWarning("Could not re-enable the master ip on"
4265 " the master, please restart manually: %s",
4269 def _UploadHelper(lu, nodes, fname):
4270 """Helper for uploading a file and showing warnings.
4273 if os.path.exists(fname):
4274 result = lu.rpc.call_upload_file(nodes, fname)
4275 for to_node, to_result in result.items():
4276 msg = to_result.fail_msg
4278 msg = ("Copy of file %s to node %s failed: %s" %
4279 (fname, to_node, msg))
4280 lu.proc.LogWarning(msg)
4283 def _ComputeAncillaryFiles(cluster, redist):
4284 """Compute files external to Ganeti which need to be consistent.
4286 @type redist: boolean
4287 @param redist: Whether to include files which need to be redistributed
4290 # Compute files for all nodes
4292 constants.SSH_KNOWN_HOSTS_FILE,
4293 constants.CONFD_HMAC_KEY,
4294 constants.CLUSTER_DOMAIN_SECRET_FILE,
4295 constants.SPICE_CERT_FILE,
4296 constants.SPICE_CACERT_FILE,
4297 constants.RAPI_USERS_FILE,
4301 files_all.update(constants.ALL_CERT_FILES)
4302 files_all.update(ssconf.SimpleStore().GetFileList())
4304 # we need to ship at least the RAPI certificate
4305 files_all.add(constants.RAPI_CERT_FILE)
4307 if cluster.modify_etc_hosts:
4308 files_all.add(constants.ETC_HOSTS)
4310 # Files which are optional, these must:
4311 # - be present in one other category as well
4312 # - either exist or not exist on all nodes of that category (mc, vm all)
4314 constants.RAPI_USERS_FILE,
4317 # Files which should only be on master candidates
4321 files_mc.add(constants.CLUSTER_CONF_FILE)
4323 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4325 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4327 # Files which should only be on VM-capable nodes
4328 files_vm = set(filename
4329 for hv_name in cluster.enabled_hypervisors
4330 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4332 files_opt |= set(filename
4333 for hv_name in cluster.enabled_hypervisors
4334 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4336 # Filenames in each category must be unique
4337 all_files_set = files_all | files_mc | files_vm
4338 assert (len(all_files_set) ==
4339 sum(map(len, [files_all, files_mc, files_vm]))), \
4340 "Found file listed in more than one file list"
4342 # Optional files must be present in one other category
4343 assert all_files_set.issuperset(files_opt), \
4344 "Optional file not in a different required list"
4346 return (files_all, files_opt, files_mc, files_vm)
4349 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4350 """Distribute additional files which are part of the cluster configuration.
4352 ConfigWriter takes care of distributing the config and ssconf files, but
4353 there are more files which should be distributed to all nodes. This function
4354 makes sure those are copied.
4356 @param lu: calling logical unit
4357 @param additional_nodes: list of nodes not in the config to distribute to
4358 @type additional_vm: boolean
4359 @param additional_vm: whether the additional nodes are vm-capable or not
4362 # Gather target nodes
4363 cluster = lu.cfg.GetClusterInfo()
4364 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4366 online_nodes = lu.cfg.GetOnlineNodeList()
4367 vm_nodes = lu.cfg.GetVmCapableNodeList()
4369 if additional_nodes is not None:
4370 online_nodes.extend(additional_nodes)
4372 vm_nodes.extend(additional_nodes)
4374 # Never distribute to master node
4375 for nodelist in [online_nodes, vm_nodes]:
4376 if master_info.name in nodelist:
4377 nodelist.remove(master_info.name)
4380 (files_all, _, files_mc, files_vm) = \
4381 _ComputeAncillaryFiles(cluster, True)
4383 # Never re-distribute configuration file from here
4384 assert not (constants.CLUSTER_CONF_FILE in files_all or
4385 constants.CLUSTER_CONF_FILE in files_vm)
4386 assert not files_mc, "Master candidates not handled in this function"
4389 (online_nodes, files_all),
4390 (vm_nodes, files_vm),
4394 for (node_list, files) in filemap:
4396 _UploadHelper(lu, node_list, fname)
4399 class LUClusterRedistConf(NoHooksLU):
4400 """Force the redistribution of cluster configuration.
4402 This is a very simple LU.
4407 def ExpandNames(self):
4408 self.needed_locks = {
4409 locking.LEVEL_NODE: locking.ALL_SET,
4411 self.share_locks[locking.LEVEL_NODE] = 1
4413 def Exec(self, feedback_fn):
4414 """Redistribute the configuration.
4417 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4418 _RedistributeAncillaryFiles(self)
4421 class LUClusterActivateMasterIp(NoHooksLU):
4422 """Activate the master IP on the master node.
4425 def Exec(self, feedback_fn):
4426 """Activate the master IP.
4429 master_params = self.cfg.GetMasterNetworkParameters()
4430 ems = self.cfg.GetUseExternalMipScript()
4431 result = self.rpc.call_node_activate_master_ip(master_params.name,
4433 result.Raise("Could not activate the master IP")
4436 class LUClusterDeactivateMasterIp(NoHooksLU):
4437 """Deactivate the master IP on the master node.
4440 def Exec(self, feedback_fn):
4441 """Deactivate the master IP.
4444 master_params = self.cfg.GetMasterNetworkParameters()
4445 ems = self.cfg.GetUseExternalMipScript()
4446 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4448 result.Raise("Could not deactivate the master IP")
4451 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4452 """Sleep and poll for an instance's disk to sync.
4455 if not instance.disks or disks is not None and not disks:
4458 disks = _ExpandCheckDisks(instance, disks)
4461 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4463 node = instance.primary_node
4466 lu.cfg.SetDiskID(dev, node)
4468 # TODO: Convert to utils.Retry
4471 degr_retries = 10 # in seconds, as we sleep 1 second each time
4475 cumul_degraded = False
4476 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4477 msg = rstats.fail_msg
4479 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4482 raise errors.RemoteError("Can't contact node %s for mirror data,"
4483 " aborting." % node)
4486 rstats = rstats.payload
4488 for i, mstat in enumerate(rstats):
4490 lu.LogWarning("Can't compute data for node %s/%s",
4491 node, disks[i].iv_name)
4494 cumul_degraded = (cumul_degraded or
4495 (mstat.is_degraded and mstat.sync_percent is None))
4496 if mstat.sync_percent is not None:
4498 if mstat.estimated_time is not None:
4499 rem_time = ("%s remaining (estimated)" %
4500 utils.FormatSeconds(mstat.estimated_time))
4501 max_time = mstat.estimated_time
4503 rem_time = "no time estimate"
4504 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4505 (disks[i].iv_name, mstat.sync_percent, rem_time))
4507 # if we're done but degraded, let's do a few small retries, to
4508 # make sure we see a stable and not transient situation; therefore
4509 # we force restart of the loop
4510 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4511 logging.info("Degraded disks found, %d retries left", degr_retries)
4519 time.sleep(min(60, max_time))
4522 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4523 return not cumul_degraded
4526 def _BlockdevFind(lu, node, dev, instance):
4527 """Wrapper around call_blockdev_find to annotate diskparams.
4529 @param lu: A reference to the lu object
4530 @param node: The node to call out
4531 @param dev: The device to find
4532 @param instance: The instance object the device belongs to
4533 @returns The result of the rpc call
4536 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4537 return lu.rpc.call_blockdev_find(node, disk)
4540 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4541 """Wrapper around L{_CheckDiskConsistencyInner}.
4544 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4545 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4549 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4551 """Check that mirrors are not degraded.
4553 @attention: The device has to be annotated already.
4555 The ldisk parameter, if True, will change the test from the
4556 is_degraded attribute (which represents overall non-ok status for
4557 the device(s)) to the ldisk (representing the local storage status).
4560 lu.cfg.SetDiskID(dev, node)
4564 if on_primary or dev.AssembleOnSecondary():
4565 rstats = lu.rpc.call_blockdev_find(node, dev)
4566 msg = rstats.fail_msg
4568 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4570 elif not rstats.payload:
4571 lu.LogWarning("Can't find disk on node %s", node)
4575 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4577 result = result and not rstats.payload.is_degraded
4580 for child in dev.children:
4581 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4587 class LUOobCommand(NoHooksLU):
4588 """Logical unit for OOB handling.
4592 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4594 def ExpandNames(self):
4595 """Gather locks we need.
4598 if self.op.node_names:
4599 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4600 lock_names = self.op.node_names
4602 lock_names = locking.ALL_SET
4604 self.needed_locks = {
4605 locking.LEVEL_NODE: lock_names,
4608 def CheckPrereq(self):
4609 """Check prerequisites.
4612 - the node exists in the configuration
4615 Any errors are signaled by raising errors.OpPrereqError.
4619 self.master_node = self.cfg.GetMasterNode()
4621 assert self.op.power_delay >= 0.0
4623 if self.op.node_names:
4624 if (self.op.command in self._SKIP_MASTER and
4625 self.master_node in self.op.node_names):
4626 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4627 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4629 if master_oob_handler:
4630 additional_text = ("run '%s %s %s' if you want to operate on the"
4631 " master regardless") % (master_oob_handler,
4635 additional_text = "it does not support out-of-band operations"
4637 raise errors.OpPrereqError(("Operating on the master node %s is not"
4638 " allowed for %s; %s") %
4639 (self.master_node, self.op.command,
4640 additional_text), errors.ECODE_INVAL)
4642 self.op.node_names = self.cfg.GetNodeList()
4643 if self.op.command in self._SKIP_MASTER:
4644 self.op.node_names.remove(self.master_node)
4646 if self.op.command in self._SKIP_MASTER:
4647 assert self.master_node not in self.op.node_names
4649 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4651 raise errors.OpPrereqError("Node %s not found" % node_name,
4654 self.nodes.append(node)
4656 if (not self.op.ignore_status and
4657 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4658 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4659 " not marked offline") % node_name,
4662 def Exec(self, feedback_fn):
4663 """Execute OOB and return result if we expect any.
4666 master_node = self.master_node
4669 for idx, node in enumerate(utils.NiceSort(self.nodes,
4670 key=lambda node: node.name)):
4671 node_entry = [(constants.RS_NORMAL, node.name)]
4672 ret.append(node_entry)
4674 oob_program = _SupportsOob(self.cfg, node)
4677 node_entry.append((constants.RS_UNAVAIL, None))
4680 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4681 self.op.command, oob_program, node.name)
4682 result = self.rpc.call_run_oob(master_node, oob_program,
4683 self.op.command, node.name,
4687 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4688 node.name, result.fail_msg)
4689 node_entry.append((constants.RS_NODATA, None))
4692 self._CheckPayload(result)
4693 except errors.OpExecError, err:
4694 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4696 node_entry.append((constants.RS_NODATA, None))
4698 if self.op.command == constants.OOB_HEALTH:
4699 # For health we should log important events
4700 for item, status in result.payload:
4701 if status in [constants.OOB_STATUS_WARNING,
4702 constants.OOB_STATUS_CRITICAL]:
4703 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4704 item, node.name, status)
4706 if self.op.command == constants.OOB_POWER_ON:
4708 elif self.op.command == constants.OOB_POWER_OFF:
4709 node.powered = False
4710 elif self.op.command == constants.OOB_POWER_STATUS:
4711 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4712 if powered != node.powered:
4713 logging.warning(("Recorded power state (%s) of node '%s' does not"
4714 " match actual power state (%s)"), node.powered,
4717 # For configuration changing commands we should update the node
4718 if self.op.command in (constants.OOB_POWER_ON,
4719 constants.OOB_POWER_OFF):
4720 self.cfg.Update(node, feedback_fn)
4722 node_entry.append((constants.RS_NORMAL, result.payload))
4724 if (self.op.command == constants.OOB_POWER_ON and
4725 idx < len(self.nodes) - 1):
4726 time.sleep(self.op.power_delay)
4730 def _CheckPayload(self, result):
4731 """Checks if the payload is valid.
4733 @param result: RPC result
4734 @raises errors.OpExecError: If payload is not valid
4738 if self.op.command == constants.OOB_HEALTH:
4739 if not isinstance(result.payload, list):
4740 errs.append("command 'health' is expected to return a list but got %s" %
4741 type(result.payload))
4743 for item, status in result.payload:
4744 if status not in constants.OOB_STATUSES:
4745 errs.append("health item '%s' has invalid status '%s'" %
4748 if self.op.command == constants.OOB_POWER_STATUS:
4749 if not isinstance(result.payload, dict):
4750 errs.append("power-status is expected to return a dict but got %s" %
4751 type(result.payload))
4753 if self.op.command in [
4754 constants.OOB_POWER_ON,
4755 constants.OOB_POWER_OFF,
4756 constants.OOB_POWER_CYCLE,
4758 if result.payload is not None:
4759 errs.append("%s is expected to not return payload but got '%s'" %
4760 (self.op.command, result.payload))
4763 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4764 utils.CommaJoin(errs))
4767 class _OsQuery(_QueryBase):
4768 FIELDS = query.OS_FIELDS
4770 def ExpandNames(self, lu):
4771 # Lock all nodes in shared mode
4772 # Temporary removal of locks, should be reverted later
4773 # TODO: reintroduce locks when they are lighter-weight
4774 lu.needed_locks = {}
4775 #self.share_locks[locking.LEVEL_NODE] = 1
4776 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4778 # The following variables interact with _QueryBase._GetNames
4780 self.wanted = self.names
4782 self.wanted = locking.ALL_SET
4784 self.do_locking = self.use_locking
4786 def DeclareLocks(self, lu, level):
4790 def _DiagnoseByOS(rlist):
4791 """Remaps a per-node return list into an a per-os per-node dictionary
4793 @param rlist: a map with node names as keys and OS objects as values
4796 @return: a dictionary with osnames as keys and as value another
4797 map, with nodes as keys and tuples of (path, status, diagnose,
4798 variants, parameters, api_versions) as values, eg::
4800 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4801 (/srv/..., False, "invalid api")],
4802 "node2": [(/srv/..., True, "", [], [])]}
4807 # we build here the list of nodes that didn't fail the RPC (at RPC
4808 # level), so that nodes with a non-responding node daemon don't
4809 # make all OSes invalid
4810 good_nodes = [node_name for node_name in rlist
4811 if not rlist[node_name].fail_msg]
4812 for node_name, nr in rlist.items():
4813 if nr.fail_msg or not nr.payload:
4815 for (name, path, status, diagnose, variants,
4816 params, api_versions) in nr.payload:
4817 if name not in all_os:
4818 # build a list of nodes for this os containing empty lists
4819 # for each node in node_list
4821 for nname in good_nodes:
4822 all_os[name][nname] = []
4823 # convert params from [name, help] to (name, help)
4824 params = [tuple(v) for v in params]
4825 all_os[name][node_name].append((path, status, diagnose,
4826 variants, params, api_versions))
4829 def _GetQueryData(self, lu):
4830 """Computes the list of nodes and their attributes.
4833 # Locking is not used
4834 assert not (compat.any(lu.glm.is_owned(level)
4835 for level in locking.LEVELS
4836 if level != locking.LEVEL_CLUSTER) or
4837 self.do_locking or self.use_locking)
4839 valid_nodes = [node.name
4840 for node in lu.cfg.GetAllNodesInfo().values()
4841 if not node.offline and node.vm_capable]
4842 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4843 cluster = lu.cfg.GetClusterInfo()
4847 for (os_name, os_data) in pol.items():
4848 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4849 hidden=(os_name in cluster.hidden_os),
4850 blacklisted=(os_name in cluster.blacklisted_os))
4854 api_versions = set()
4856 for idx, osl in enumerate(os_data.values()):
4857 info.valid = bool(info.valid and osl and osl[0][1])
4861 (node_variants, node_params, node_api) = osl[0][3:6]
4864 variants.update(node_variants)
4865 parameters.update(node_params)
4866 api_versions.update(node_api)
4868 # Filter out inconsistent values
4869 variants.intersection_update(node_variants)
4870 parameters.intersection_update(node_params)
4871 api_versions.intersection_update(node_api)
4873 info.variants = list(variants)
4874 info.parameters = list(parameters)
4875 info.api_versions = list(api_versions)
4877 data[os_name] = info
4879 # Prepare data in requested order
4880 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4884 class LUOsDiagnose(NoHooksLU):
4885 """Logical unit for OS diagnose/query.
4891 def _BuildFilter(fields, names):
4892 """Builds a filter for querying OSes.
4895 name_filter = qlang.MakeSimpleFilter("name", names)
4897 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4898 # respective field is not requested
4899 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4900 for fname in ["hidden", "blacklisted"]
4901 if fname not in fields]
4902 if "valid" not in fields:
4903 status_filter.append([qlang.OP_TRUE, "valid"])
4906 status_filter.insert(0, qlang.OP_AND)
4908 status_filter = None
4910 if name_filter and status_filter:
4911 return [qlang.OP_AND, name_filter, status_filter]
4915 return status_filter
4917 def CheckArguments(self):
4918 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4919 self.op.output_fields, False)
4921 def ExpandNames(self):
4922 self.oq.ExpandNames(self)
4924 def Exec(self, feedback_fn):
4925 return self.oq.OldStyleQuery(self)
4928 class LUNodeRemove(LogicalUnit):
4929 """Logical unit for removing a node.
4932 HPATH = "node-remove"
4933 HTYPE = constants.HTYPE_NODE
4935 def BuildHooksEnv(self):
4940 "OP_TARGET": self.op.node_name,
4941 "NODE_NAME": self.op.node_name,
4944 def BuildHooksNodes(self):
4945 """Build hooks nodes.
4947 This doesn't run on the target node in the pre phase as a failed
4948 node would then be impossible to remove.
4951 all_nodes = self.cfg.GetNodeList()
4953 all_nodes.remove(self.op.node_name)
4956 return (all_nodes, all_nodes)
4958 def CheckPrereq(self):
4959 """Check prerequisites.
4962 - the node exists in the configuration
4963 - it does not have primary or secondary instances
4964 - it's not the master
4966 Any errors are signaled by raising errors.OpPrereqError.
4969 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4970 node = self.cfg.GetNodeInfo(self.op.node_name)
4971 assert node is not None
4973 masternode = self.cfg.GetMasterNode()
4974 if node.name == masternode:
4975 raise errors.OpPrereqError("Node is the master node, failover to another"
4976 " node is required", errors.ECODE_INVAL)
4978 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4979 if node.name in instance.all_nodes:
4980 raise errors.OpPrereqError("Instance %s is still running on the node,"
4981 " please remove first" % instance_name,
4983 self.op.node_name = node.name
4986 def Exec(self, feedback_fn):
4987 """Removes the node from the cluster.
4991 logging.info("Stopping the node daemon and removing configs from node %s",
4994 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4996 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4999 # Promote nodes to master candidate as needed
5000 _AdjustCandidatePool(self, exceptions=[node.name])
5001 self.context.RemoveNode(node.name)
5003 # Run post hooks on the node before it's removed
5004 _RunPostHook(self, node.name)
5006 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5007 msg = result.fail_msg
5009 self.LogWarning("Errors encountered on the remote node while leaving"
5010 " the cluster: %s", msg)
5012 # Remove node from our /etc/hosts
5013 if self.cfg.GetClusterInfo().modify_etc_hosts:
5014 master_node = self.cfg.GetMasterNode()
5015 result = self.rpc.call_etc_hosts_modify(master_node,
5016 constants.ETC_HOSTS_REMOVE,
5018 result.Raise("Can't update hosts file with new host data")
5019 _RedistributeAncillaryFiles(self)
5022 class _NodeQuery(_QueryBase):
5023 FIELDS = query.NODE_FIELDS
5025 def ExpandNames(self, lu):
5026 lu.needed_locks = {}
5027 lu.share_locks = _ShareAll()
5030 self.wanted = _GetWantedNodes(lu, self.names)
5032 self.wanted = locking.ALL_SET
5034 self.do_locking = (self.use_locking and
5035 query.NQ_LIVE in self.requested_data)
5038 # If any non-static field is requested we need to lock the nodes
5039 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5041 def DeclareLocks(self, lu, level):
5044 def _GetQueryData(self, lu):
5045 """Computes the list of nodes and their attributes.
5048 all_info = lu.cfg.GetAllNodesInfo()
5050 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5052 # Gather data as requested
5053 if query.NQ_LIVE in self.requested_data:
5054 # filter out non-vm_capable nodes
5055 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5057 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5058 [lu.cfg.GetHypervisorType()])
5059 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5060 for (name, nresult) in node_data.items()
5061 if not nresult.fail_msg and nresult.payload)
5065 if query.NQ_INST in self.requested_data:
5066 node_to_primary = dict([(name, set()) for name in nodenames])
5067 node_to_secondary = dict([(name, set()) for name in nodenames])
5069 inst_data = lu.cfg.GetAllInstancesInfo()
5071 for inst in inst_data.values():
5072 if inst.primary_node in node_to_primary:
5073 node_to_primary[inst.primary_node].add(inst.name)
5074 for secnode in inst.secondary_nodes:
5075 if secnode in node_to_secondary:
5076 node_to_secondary[secnode].add(inst.name)
5078 node_to_primary = None
5079 node_to_secondary = None
5081 if query.NQ_OOB in self.requested_data:
5082 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5083 for name, node in all_info.iteritems())
5087 if query.NQ_GROUP in self.requested_data:
5088 groups = lu.cfg.GetAllNodeGroupsInfo()
5092 return query.NodeQueryData([all_info[name] for name in nodenames],
5093 live_data, lu.cfg.GetMasterNode(),
5094 node_to_primary, node_to_secondary, groups,
5095 oob_support, lu.cfg.GetClusterInfo())
5098 class LUNodeQuery(NoHooksLU):
5099 """Logical unit for querying nodes.
5102 # pylint: disable=W0142
5105 def CheckArguments(self):
5106 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5107 self.op.output_fields, self.op.use_locking)
5109 def ExpandNames(self):
5110 self.nq.ExpandNames(self)
5112 def DeclareLocks(self, level):
5113 self.nq.DeclareLocks(self, level)
5115 def Exec(self, feedback_fn):
5116 return self.nq.OldStyleQuery(self)
5119 class LUNodeQueryvols(NoHooksLU):
5120 """Logical unit for getting volumes on node(s).
5124 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5125 _FIELDS_STATIC = utils.FieldSet("node")
5127 def CheckArguments(self):
5128 _CheckOutputFields(static=self._FIELDS_STATIC,
5129 dynamic=self._FIELDS_DYNAMIC,
5130 selected=self.op.output_fields)
5132 def ExpandNames(self):
5133 self.share_locks = _ShareAll()
5134 self.needed_locks = {}
5136 if not self.op.nodes:
5137 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5139 self.needed_locks[locking.LEVEL_NODE] = \
5140 _GetWantedNodes(self, self.op.nodes)
5142 def Exec(self, feedback_fn):
5143 """Computes the list of nodes and their attributes.
5146 nodenames = self.owned_locks(locking.LEVEL_NODE)
5147 volumes = self.rpc.call_node_volumes(nodenames)
5149 ilist = self.cfg.GetAllInstancesInfo()
5150 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5153 for node in nodenames:
5154 nresult = volumes[node]
5157 msg = nresult.fail_msg
5159 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5162 node_vols = sorted(nresult.payload,
5163 key=operator.itemgetter("dev"))
5165 for vol in node_vols:
5167 for field in self.op.output_fields:
5170 elif field == "phys":
5174 elif field == "name":
5176 elif field == "size":
5177 val = int(float(vol["size"]))
5178 elif field == "instance":
5179 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5181 raise errors.ParameterError(field)
5182 node_output.append(str(val))
5184 output.append(node_output)
5189 class LUNodeQueryStorage(NoHooksLU):
5190 """Logical unit for getting information on storage units on node(s).
5193 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5196 def CheckArguments(self):
5197 _CheckOutputFields(static=self._FIELDS_STATIC,
5198 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5199 selected=self.op.output_fields)
5201 def ExpandNames(self):
5202 self.share_locks = _ShareAll()
5203 self.needed_locks = {}
5206 self.needed_locks[locking.LEVEL_NODE] = \
5207 _GetWantedNodes(self, self.op.nodes)
5209 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5211 def Exec(self, feedback_fn):
5212 """Computes the list of nodes and their attributes.
5215 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5217 # Always get name to sort by
5218 if constants.SF_NAME in self.op.output_fields:
5219 fields = self.op.output_fields[:]
5221 fields = [constants.SF_NAME] + self.op.output_fields
5223 # Never ask for node or type as it's only known to the LU
5224 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5225 while extra in fields:
5226 fields.remove(extra)
5228 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5229 name_idx = field_idx[constants.SF_NAME]
5231 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5232 data = self.rpc.call_storage_list(self.nodes,
5233 self.op.storage_type, st_args,
5234 self.op.name, fields)
5238 for node in utils.NiceSort(self.nodes):
5239 nresult = data[node]
5243 msg = nresult.fail_msg
5245 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5248 rows = dict([(row[name_idx], row) for row in nresult.payload])
5250 for name in utils.NiceSort(rows.keys()):
5255 for field in self.op.output_fields:
5256 if field == constants.SF_NODE:
5258 elif field == constants.SF_TYPE:
5259 val = self.op.storage_type
5260 elif field in field_idx:
5261 val = row[field_idx[field]]
5263 raise errors.ParameterError(field)
5272 class _InstanceQuery(_QueryBase):
5273 FIELDS = query.INSTANCE_FIELDS
5275 def ExpandNames(self, lu):
5276 lu.needed_locks = {}
5277 lu.share_locks = _ShareAll()
5280 self.wanted = _GetWantedInstances(lu, self.names)
5282 self.wanted = locking.ALL_SET
5284 self.do_locking = (self.use_locking and
5285 query.IQ_LIVE in self.requested_data)
5287 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5288 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5289 lu.needed_locks[locking.LEVEL_NODE] = []
5290 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5292 self.do_grouplocks = (self.do_locking and
5293 query.IQ_NODES in self.requested_data)
5295 def DeclareLocks(self, lu, level):
5297 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5298 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5300 # Lock all groups used by instances optimistically; this requires going
5301 # via the node before it's locked, requiring verification later on
5302 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5304 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5305 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5306 elif level == locking.LEVEL_NODE:
5307 lu._LockInstancesNodes() # pylint: disable=W0212
5310 def _CheckGroupLocks(lu):
5311 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5312 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5314 # Check if node groups for locked instances are still correct
5315 for instance_name in owned_instances:
5316 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5318 def _GetQueryData(self, lu):
5319 """Computes the list of instances and their attributes.
5322 if self.do_grouplocks:
5323 self._CheckGroupLocks(lu)
5325 cluster = lu.cfg.GetClusterInfo()
5326 all_info = lu.cfg.GetAllInstancesInfo()
5328 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5330 instance_list = [all_info[name] for name in instance_names]
5331 nodes = frozenset(itertools.chain(*(inst.all_nodes
5332 for inst in instance_list)))
5333 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5336 wrongnode_inst = set()
5338 # Gather data as requested
5339 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5341 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5343 result = node_data[name]
5345 # offline nodes will be in both lists
5346 assert result.fail_msg
5347 offline_nodes.append(name)
5349 bad_nodes.append(name)
5350 elif result.payload:
5351 for inst in result.payload:
5352 if inst in all_info:
5353 if all_info[inst].primary_node == name:
5354 live_data.update(result.payload)
5356 wrongnode_inst.add(inst)
5358 # orphan instance; we don't list it here as we don't
5359 # handle this case yet in the output of instance listing
5360 logging.warning("Orphan instance '%s' found on node %s",
5362 # else no instance is alive
5366 if query.IQ_DISKUSAGE in self.requested_data:
5367 disk_usage = dict((inst.name,
5368 _ComputeDiskSize(inst.disk_template,
5369 [{constants.IDISK_SIZE: disk.size}
5370 for disk in inst.disks]))
5371 for inst in instance_list)
5375 if query.IQ_CONSOLE in self.requested_data:
5377 for inst in instance_list:
5378 if inst.name in live_data:
5379 # Instance is running
5380 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5382 consinfo[inst.name] = None
5383 assert set(consinfo.keys()) == set(instance_names)
5387 if query.IQ_NODES in self.requested_data:
5388 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5390 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5391 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5392 for uuid in set(map(operator.attrgetter("group"),
5398 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5399 disk_usage, offline_nodes, bad_nodes,
5400 live_data, wrongnode_inst, consinfo,
5404 class LUQuery(NoHooksLU):
5405 """Query for resources/items of a certain kind.
5408 # pylint: disable=W0142
5411 def CheckArguments(self):
5412 qcls = _GetQueryImplementation(self.op.what)
5414 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5416 def ExpandNames(self):
5417 self.impl.ExpandNames(self)
5419 def DeclareLocks(self, level):
5420 self.impl.DeclareLocks(self, level)
5422 def Exec(self, feedback_fn):
5423 return self.impl.NewStyleQuery(self)
5426 class LUQueryFields(NoHooksLU):
5427 """Query for resources/items of a certain kind.
5430 # pylint: disable=W0142
5433 def CheckArguments(self):
5434 self.qcls = _GetQueryImplementation(self.op.what)
5436 def ExpandNames(self):
5437 self.needed_locks = {}
5439 def Exec(self, feedback_fn):
5440 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5443 class LUNodeModifyStorage(NoHooksLU):
5444 """Logical unit for modifying a storage volume on a node.
5449 def CheckArguments(self):
5450 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5452 storage_type = self.op.storage_type
5455 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5457 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5458 " modified" % storage_type,
5461 diff = set(self.op.changes.keys()) - modifiable
5463 raise errors.OpPrereqError("The following fields can not be modified for"
5464 " storage units of type '%s': %r" %
5465 (storage_type, list(diff)),
5468 def ExpandNames(self):
5469 self.needed_locks = {
5470 locking.LEVEL_NODE: self.op.node_name,
5473 def Exec(self, feedback_fn):
5474 """Computes the list of nodes and their attributes.
5477 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5478 result = self.rpc.call_storage_modify(self.op.node_name,
5479 self.op.storage_type, st_args,
5480 self.op.name, self.op.changes)
5481 result.Raise("Failed to modify storage unit '%s' on %s" %
5482 (self.op.name, self.op.node_name))
5485 class LUNodeAdd(LogicalUnit):
5486 """Logical unit for adding node to the cluster.
5490 HTYPE = constants.HTYPE_NODE
5491 _NFLAGS = ["master_capable", "vm_capable"]
5493 def CheckArguments(self):
5494 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5495 # validate/normalize the node name
5496 self.hostname = netutils.GetHostname(name=self.op.node_name,
5497 family=self.primary_ip_family)
5498 self.op.node_name = self.hostname.name
5500 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5501 raise errors.OpPrereqError("Cannot readd the master node",
5504 if self.op.readd and self.op.group:
5505 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5506 " being readded", errors.ECODE_INVAL)
5508 def BuildHooksEnv(self):
5511 This will run on all nodes before, and on all nodes + the new node after.
5515 "OP_TARGET": self.op.node_name,
5516 "NODE_NAME": self.op.node_name,
5517 "NODE_PIP": self.op.primary_ip,
5518 "NODE_SIP": self.op.secondary_ip,
5519 "MASTER_CAPABLE": str(self.op.master_capable),
5520 "VM_CAPABLE": str(self.op.vm_capable),
5523 def BuildHooksNodes(self):
5524 """Build hooks nodes.
5527 # Exclude added node
5528 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5529 post_nodes = pre_nodes + [self.op.node_name, ]
5531 return (pre_nodes, post_nodes)
5533 def CheckPrereq(self):
5534 """Check prerequisites.
5537 - the new node is not already in the config
5539 - its parameters (single/dual homed) matches the cluster
5541 Any errors are signaled by raising errors.OpPrereqError.
5545 hostname = self.hostname
5546 node = hostname.name
5547 primary_ip = self.op.primary_ip = hostname.ip
5548 if self.op.secondary_ip is None:
5549 if self.primary_ip_family == netutils.IP6Address.family:
5550 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5551 " IPv4 address must be given as secondary",
5553 self.op.secondary_ip = primary_ip
5555 secondary_ip = self.op.secondary_ip
5556 if not netutils.IP4Address.IsValid(secondary_ip):
5557 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5558 " address" % secondary_ip, errors.ECODE_INVAL)
5560 node_list = cfg.GetNodeList()
5561 if not self.op.readd and node in node_list:
5562 raise errors.OpPrereqError("Node %s is already in the configuration" %
5563 node, errors.ECODE_EXISTS)
5564 elif self.op.readd and node not in node_list:
5565 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5568 self.changed_primary_ip = False
5570 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5571 if self.op.readd and node == existing_node_name:
5572 if existing_node.secondary_ip != secondary_ip:
5573 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5574 " address configuration as before",
5576 if existing_node.primary_ip != primary_ip:
5577 self.changed_primary_ip = True
5581 if (existing_node.primary_ip == primary_ip or
5582 existing_node.secondary_ip == primary_ip or
5583 existing_node.primary_ip == secondary_ip or
5584 existing_node.secondary_ip == secondary_ip):
5585 raise errors.OpPrereqError("New node ip address(es) conflict with"
5586 " existing node %s" % existing_node.name,
5587 errors.ECODE_NOTUNIQUE)
5589 # After this 'if' block, None is no longer a valid value for the
5590 # _capable op attributes
5592 old_node = self.cfg.GetNodeInfo(node)
5593 assert old_node is not None, "Can't retrieve locked node %s" % node
5594 for attr in self._NFLAGS:
5595 if getattr(self.op, attr) is None:
5596 setattr(self.op, attr, getattr(old_node, attr))
5598 for attr in self._NFLAGS:
5599 if getattr(self.op, attr) is None:
5600 setattr(self.op, attr, True)
5602 if self.op.readd and not self.op.vm_capable:
5603 pri, sec = cfg.GetNodeInstances(node)
5605 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5606 " flag set to false, but it already holds"
5607 " instances" % node,
5610 # check that the type of the node (single versus dual homed) is the
5611 # same as for the master
5612 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5613 master_singlehomed = myself.secondary_ip == myself.primary_ip
5614 newbie_singlehomed = secondary_ip == primary_ip
5615 if master_singlehomed != newbie_singlehomed:
5616 if master_singlehomed:
5617 raise errors.OpPrereqError("The master has no secondary ip but the"
5618 " new node has one",
5621 raise errors.OpPrereqError("The master has a secondary ip but the"
5622 " new node doesn't have one",
5625 # checks reachability
5626 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5627 raise errors.OpPrereqError("Node not reachable by ping",
5628 errors.ECODE_ENVIRON)
5630 if not newbie_singlehomed:
5631 # check reachability from my secondary ip to newbie's secondary ip
5632 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5633 source=myself.secondary_ip):
5634 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5635 " based ping to node daemon port",
5636 errors.ECODE_ENVIRON)
5643 if self.op.master_capable:
5644 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5646 self.master_candidate = False
5649 self.new_node = old_node
5651 node_group = cfg.LookupNodeGroup(self.op.group)
5652 self.new_node = objects.Node(name=node,
5653 primary_ip=primary_ip,
5654 secondary_ip=secondary_ip,
5655 master_candidate=self.master_candidate,
5656 offline=False, drained=False,
5659 if self.op.ndparams:
5660 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5662 if self.op.hv_state:
5663 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5665 if self.op.disk_state:
5666 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5668 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5669 # it a property on the base class.
5670 result = rpc.DnsOnlyRunner().call_version([node])[node]
5671 result.Raise("Can't get version information from node %s" % node)
5672 if constants.PROTOCOL_VERSION == result.payload:
5673 logging.info("Communication to node %s fine, sw version %s match",
5674 node, result.payload)
5676 raise errors.OpPrereqError("Version mismatch master version %s,"
5677 " node version %s" %
5678 (constants.PROTOCOL_VERSION, result.payload),
5679 errors.ECODE_ENVIRON)
5681 def Exec(self, feedback_fn):
5682 """Adds the new node to the cluster.
5685 new_node = self.new_node
5686 node = new_node.name
5688 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5691 # We adding a new node so we assume it's powered
5692 new_node.powered = True
5694 # for re-adds, reset the offline/drained/master-candidate flags;
5695 # we need to reset here, otherwise offline would prevent RPC calls
5696 # later in the procedure; this also means that if the re-add
5697 # fails, we are left with a non-offlined, broken node
5699 new_node.drained = new_node.offline = False # pylint: disable=W0201
5700 self.LogInfo("Readding a node, the offline/drained flags were reset")
5701 # if we demote the node, we do cleanup later in the procedure
5702 new_node.master_candidate = self.master_candidate
5703 if self.changed_primary_ip:
5704 new_node.primary_ip = self.op.primary_ip
5706 # copy the master/vm_capable flags
5707 for attr in self._NFLAGS:
5708 setattr(new_node, attr, getattr(self.op, attr))
5710 # notify the user about any possible mc promotion
5711 if new_node.master_candidate:
5712 self.LogInfo("Node will be a master candidate")
5714 if self.op.ndparams:
5715 new_node.ndparams = self.op.ndparams
5717 new_node.ndparams = {}
5719 if self.op.hv_state:
5720 new_node.hv_state_static = self.new_hv_state
5722 if self.op.disk_state:
5723 new_node.disk_state_static = self.new_disk_state
5725 # Add node to our /etc/hosts, and add key to known_hosts
5726 if self.cfg.GetClusterInfo().modify_etc_hosts:
5727 master_node = self.cfg.GetMasterNode()
5728 result = self.rpc.call_etc_hosts_modify(master_node,
5729 constants.ETC_HOSTS_ADD,
5732 result.Raise("Can't update hosts file with new host data")
5734 if new_node.secondary_ip != new_node.primary_ip:
5735 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5738 node_verify_list = [self.cfg.GetMasterNode()]
5739 node_verify_param = {
5740 constants.NV_NODELIST: ([node], {}),
5741 # TODO: do a node-net-test as well?
5744 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5745 self.cfg.GetClusterName())
5746 for verifier in node_verify_list:
5747 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5748 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5750 for failed in nl_payload:
5751 feedback_fn("ssh/hostname verification failed"
5752 " (checking from %s): %s" %
5753 (verifier, nl_payload[failed]))
5754 raise errors.OpExecError("ssh/hostname verification failed")
5757 _RedistributeAncillaryFiles(self)
5758 self.context.ReaddNode(new_node)
5759 # make sure we redistribute the config
5760 self.cfg.Update(new_node, feedback_fn)
5761 # and make sure the new node will not have old files around
5762 if not new_node.master_candidate:
5763 result = self.rpc.call_node_demote_from_mc(new_node.name)
5764 msg = result.fail_msg
5766 self.LogWarning("Node failed to demote itself from master"
5767 " candidate status: %s" % msg)
5769 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5770 additional_vm=self.op.vm_capable)
5771 self.context.AddNode(new_node, self.proc.GetECId())
5774 class LUNodeSetParams(LogicalUnit):
5775 """Modifies the parameters of a node.
5777 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5778 to the node role (as _ROLE_*)
5779 @cvar _R2F: a dictionary from node role to tuples of flags
5780 @cvar _FLAGS: a list of attribute names corresponding to the flags
5783 HPATH = "node-modify"
5784 HTYPE = constants.HTYPE_NODE
5786 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5788 (True, False, False): _ROLE_CANDIDATE,
5789 (False, True, False): _ROLE_DRAINED,
5790 (False, False, True): _ROLE_OFFLINE,
5791 (False, False, False): _ROLE_REGULAR,
5793 _R2F = dict((v, k) for k, v in _F2R.items())
5794 _FLAGS = ["master_candidate", "drained", "offline"]
5796 def CheckArguments(self):
5797 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5798 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5799 self.op.master_capable, self.op.vm_capable,
5800 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5802 if all_mods.count(None) == len(all_mods):
5803 raise errors.OpPrereqError("Please pass at least one modification",
5805 if all_mods.count(True) > 1:
5806 raise errors.OpPrereqError("Can't set the node into more than one"
5807 " state at the same time",
5810 # Boolean value that tells us whether we might be demoting from MC
5811 self.might_demote = (self.op.master_candidate == False or
5812 self.op.offline == True or
5813 self.op.drained == True or
5814 self.op.master_capable == False)
5816 if self.op.secondary_ip:
5817 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5818 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5819 " address" % self.op.secondary_ip,
5822 self.lock_all = self.op.auto_promote and self.might_demote
5823 self.lock_instances = self.op.secondary_ip is not None
5825 def _InstanceFilter(self, instance):
5826 """Filter for getting affected instances.
5829 return (instance.disk_template in constants.DTS_INT_MIRROR and
5830 self.op.node_name in instance.all_nodes)
5832 def ExpandNames(self):
5834 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5836 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5838 # Since modifying a node can have severe effects on currently running
5839 # operations the resource lock is at least acquired in shared mode
5840 self.needed_locks[locking.LEVEL_NODE_RES] = \
5841 self.needed_locks[locking.LEVEL_NODE]
5843 # Get node resource and instance locks in shared mode; they are not used
5844 # for anything but read-only access
5845 self.share_locks[locking.LEVEL_NODE_RES] = 1
5846 self.share_locks[locking.LEVEL_INSTANCE] = 1
5848 if self.lock_instances:
5849 self.needed_locks[locking.LEVEL_INSTANCE] = \
5850 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5852 def BuildHooksEnv(self):
5855 This runs on the master node.
5859 "OP_TARGET": self.op.node_name,
5860 "MASTER_CANDIDATE": str(self.op.master_candidate),
5861 "OFFLINE": str(self.op.offline),
5862 "DRAINED": str(self.op.drained),
5863 "MASTER_CAPABLE": str(self.op.master_capable),
5864 "VM_CAPABLE": str(self.op.vm_capable),
5867 def BuildHooksNodes(self):
5868 """Build hooks nodes.
5871 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5874 def CheckPrereq(self):
5875 """Check prerequisites.
5877 This only checks the instance list against the existing names.
5880 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5882 if self.lock_instances:
5883 affected_instances = \
5884 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5886 # Verify instance locks
5887 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5888 wanted_instances = frozenset(affected_instances.keys())
5889 if wanted_instances - owned_instances:
5890 raise errors.OpPrereqError("Instances affected by changing node %s's"
5891 " secondary IP address have changed since"
5892 " locks were acquired, wanted '%s', have"
5893 " '%s'; retry the operation" %
5895 utils.CommaJoin(wanted_instances),
5896 utils.CommaJoin(owned_instances)),
5899 affected_instances = None
5901 if (self.op.master_candidate is not None or
5902 self.op.drained is not None or
5903 self.op.offline is not None):
5904 # we can't change the master's node flags
5905 if self.op.node_name == self.cfg.GetMasterNode():
5906 raise errors.OpPrereqError("The master role can be changed"
5907 " only via master-failover",
5910 if self.op.master_candidate and not node.master_capable:
5911 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5912 " it a master candidate" % node.name,
5915 if self.op.vm_capable == False:
5916 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5918 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5919 " the vm_capable flag" % node.name,
5922 if node.master_candidate and self.might_demote and not self.lock_all:
5923 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5924 # check if after removing the current node, we're missing master
5926 (mc_remaining, mc_should, _) = \
5927 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5928 if mc_remaining < mc_should:
5929 raise errors.OpPrereqError("Not enough master candidates, please"
5930 " pass auto promote option to allow"
5931 " promotion", errors.ECODE_STATE)
5933 self.old_flags = old_flags = (node.master_candidate,
5934 node.drained, node.offline)
5935 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5936 self.old_role = old_role = self._F2R[old_flags]
5938 # Check for ineffective changes
5939 for attr in self._FLAGS:
5940 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5941 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5942 setattr(self.op, attr, None)
5944 # Past this point, any flag change to False means a transition
5945 # away from the respective state, as only real changes are kept
5947 # TODO: We might query the real power state if it supports OOB
5948 if _SupportsOob(self.cfg, node):
5949 if self.op.offline is False and not (node.powered or
5950 self.op.powered == True):
5951 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5952 " offline status can be reset") %
5954 elif self.op.powered is not None:
5955 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5956 " as it does not support out-of-band"
5957 " handling") % self.op.node_name)
5959 # If we're being deofflined/drained, we'll MC ourself if needed
5960 if (self.op.drained == False or self.op.offline == False or
5961 (self.op.master_capable and not node.master_capable)):
5962 if _DecideSelfPromotion(self):
5963 self.op.master_candidate = True
5964 self.LogInfo("Auto-promoting node to master candidate")
5966 # If we're no longer master capable, we'll demote ourselves from MC
5967 if self.op.master_capable == False and node.master_candidate:
5968 self.LogInfo("Demoting from master candidate")
5969 self.op.master_candidate = False
5972 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5973 if self.op.master_candidate:
5974 new_role = self._ROLE_CANDIDATE
5975 elif self.op.drained:
5976 new_role = self._ROLE_DRAINED
5977 elif self.op.offline:
5978 new_role = self._ROLE_OFFLINE
5979 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5980 # False is still in new flags, which means we're un-setting (the
5982 new_role = self._ROLE_REGULAR
5983 else: # no new flags, nothing, keep old role
5986 self.new_role = new_role
5988 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5989 # Trying to transition out of offline status
5990 result = self.rpc.call_version([node.name])[node.name]
5992 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5993 " to report its version: %s" %
5994 (node.name, result.fail_msg),
5997 self.LogWarning("Transitioning node from offline to online state"
5998 " without using re-add. Please make sure the node"
6001 if self.op.secondary_ip:
6002 # Ok even without locking, because this can't be changed by any LU
6003 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6004 master_singlehomed = master.secondary_ip == master.primary_ip
6005 if master_singlehomed and self.op.secondary_ip:
6006 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6007 " homed cluster", errors.ECODE_INVAL)
6009 assert not (frozenset(affected_instances) -
6010 self.owned_locks(locking.LEVEL_INSTANCE))
6013 if affected_instances:
6014 raise errors.OpPrereqError("Cannot change secondary IP address:"
6015 " offline node has instances (%s)"
6016 " configured to use it" %
6017 utils.CommaJoin(affected_instances.keys()))
6019 # On online nodes, check that no instances are running, and that
6020 # the node has the new ip and we can reach it.
6021 for instance in affected_instances.values():
6022 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6023 msg="cannot change secondary ip")
6025 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6026 if master.name != node.name:
6027 # check reachability from master secondary ip to new secondary ip
6028 if not netutils.TcpPing(self.op.secondary_ip,
6029 constants.DEFAULT_NODED_PORT,
6030 source=master.secondary_ip):
6031 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6032 " based ping to node daemon port",
6033 errors.ECODE_ENVIRON)
6035 if self.op.ndparams:
6036 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6037 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6038 self.new_ndparams = new_ndparams
6040 if self.op.hv_state:
6041 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6042 self.node.hv_state_static)
6044 if self.op.disk_state:
6045 self.new_disk_state = \
6046 _MergeAndVerifyDiskState(self.op.disk_state,
6047 self.node.disk_state_static)
6049 def Exec(self, feedback_fn):
6054 old_role = self.old_role
6055 new_role = self.new_role
6059 if self.op.ndparams:
6060 node.ndparams = self.new_ndparams
6062 if self.op.powered is not None:
6063 node.powered = self.op.powered
6065 if self.op.hv_state:
6066 node.hv_state_static = self.new_hv_state
6068 if self.op.disk_state:
6069 node.disk_state_static = self.new_disk_state
6071 for attr in ["master_capable", "vm_capable"]:
6072 val = getattr(self.op, attr)
6074 setattr(node, attr, val)
6075 result.append((attr, str(val)))
6077 if new_role != old_role:
6078 # Tell the node to demote itself, if no longer MC and not offline
6079 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6080 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6082 self.LogWarning("Node failed to demote itself: %s", msg)
6084 new_flags = self._R2F[new_role]
6085 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6087 result.append((desc, str(nf)))
6088 (node.master_candidate, node.drained, node.offline) = new_flags
6090 # we locked all nodes, we adjust the CP before updating this node
6092 _AdjustCandidatePool(self, [node.name])
6094 if self.op.secondary_ip:
6095 node.secondary_ip = self.op.secondary_ip
6096 result.append(("secondary_ip", self.op.secondary_ip))
6098 # this will trigger configuration file update, if needed
6099 self.cfg.Update(node, feedback_fn)
6101 # this will trigger job queue propagation or cleanup if the mc
6103 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6104 self.context.ReaddNode(node)
6109 class LUNodePowercycle(NoHooksLU):
6110 """Powercycles a node.
6115 def CheckArguments(self):
6116 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6117 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6118 raise errors.OpPrereqError("The node is the master and the force"
6119 " parameter was not set",
6122 def ExpandNames(self):
6123 """Locking for PowercycleNode.
6125 This is a last-resort option and shouldn't block on other
6126 jobs. Therefore, we grab no locks.
6129 self.needed_locks = {}
6131 def Exec(self, feedback_fn):
6135 result = self.rpc.call_node_powercycle(self.op.node_name,
6136 self.cfg.GetHypervisorType())
6137 result.Raise("Failed to schedule the reboot")
6138 return result.payload
6141 class LUClusterQuery(NoHooksLU):
6142 """Query cluster configuration.
6147 def ExpandNames(self):
6148 self.needed_locks = {}
6150 def Exec(self, feedback_fn):
6151 """Return cluster config.
6154 cluster = self.cfg.GetClusterInfo()
6157 # Filter just for enabled hypervisors
6158 for os_name, hv_dict in cluster.os_hvp.items():
6159 os_hvp[os_name] = {}
6160 for hv_name, hv_params in hv_dict.items():
6161 if hv_name in cluster.enabled_hypervisors:
6162 os_hvp[os_name][hv_name] = hv_params
6164 # Convert ip_family to ip_version
6165 primary_ip_version = constants.IP4_VERSION
6166 if cluster.primary_ip_family == netutils.IP6Address.family:
6167 primary_ip_version = constants.IP6_VERSION
6170 "software_version": constants.RELEASE_VERSION,
6171 "protocol_version": constants.PROTOCOL_VERSION,
6172 "config_version": constants.CONFIG_VERSION,
6173 "os_api_version": max(constants.OS_API_VERSIONS),
6174 "export_version": constants.EXPORT_VERSION,
6175 "architecture": runtime.GetArchInfo(),
6176 "name": cluster.cluster_name,
6177 "master": cluster.master_node,
6178 "default_hypervisor": cluster.primary_hypervisor,
6179 "enabled_hypervisors": cluster.enabled_hypervisors,
6180 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6181 for hypervisor_name in cluster.enabled_hypervisors]),
6183 "beparams": cluster.beparams,
6184 "osparams": cluster.osparams,
6185 "ipolicy": cluster.ipolicy,
6186 "nicparams": cluster.nicparams,
6187 "ndparams": cluster.ndparams,
6188 "diskparams": cluster.diskparams,
6189 "candidate_pool_size": cluster.candidate_pool_size,
6190 "master_netdev": cluster.master_netdev,
6191 "master_netmask": cluster.master_netmask,
6192 "use_external_mip_script": cluster.use_external_mip_script,
6193 "volume_group_name": cluster.volume_group_name,
6194 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6195 "file_storage_dir": cluster.file_storage_dir,
6196 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6197 "maintain_node_health": cluster.maintain_node_health,
6198 "ctime": cluster.ctime,
6199 "mtime": cluster.mtime,
6200 "uuid": cluster.uuid,
6201 "tags": list(cluster.GetTags()),
6202 "uid_pool": cluster.uid_pool,
6203 "default_iallocator": cluster.default_iallocator,
6204 "reserved_lvs": cluster.reserved_lvs,
6205 "primary_ip_version": primary_ip_version,
6206 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6207 "hidden_os": cluster.hidden_os,
6208 "blacklisted_os": cluster.blacklisted_os,
6214 class LUClusterConfigQuery(NoHooksLU):
6215 """Return configuration values.
6220 def CheckArguments(self):
6221 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6223 def ExpandNames(self):
6224 self.cq.ExpandNames(self)
6226 def DeclareLocks(self, level):
6227 self.cq.DeclareLocks(self, level)
6229 def Exec(self, feedback_fn):
6230 result = self.cq.OldStyleQuery(self)
6232 assert len(result) == 1
6237 class _ClusterQuery(_QueryBase):
6238 FIELDS = query.CLUSTER_FIELDS
6240 #: Do not sort (there is only one item)
6243 def ExpandNames(self, lu):
6244 lu.needed_locks = {}
6246 # The following variables interact with _QueryBase._GetNames
6247 self.wanted = locking.ALL_SET
6248 self.do_locking = self.use_locking
6251 raise errors.OpPrereqError("Can not use locking for cluster queries",
6254 def DeclareLocks(self, lu, level):
6257 def _GetQueryData(self, lu):
6258 """Computes the list of nodes and their attributes.
6261 # Locking is not used
6262 assert not (compat.any(lu.glm.is_owned(level)
6263 for level in locking.LEVELS
6264 if level != locking.LEVEL_CLUSTER) or
6265 self.do_locking or self.use_locking)
6267 if query.CQ_CONFIG in self.requested_data:
6268 cluster = lu.cfg.GetClusterInfo()
6270 cluster = NotImplemented
6272 if query.CQ_QUEUE_DRAINED in self.requested_data:
6273 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6275 drain_flag = NotImplemented
6277 if query.CQ_WATCHER_PAUSE in self.requested_data:
6278 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6280 watcher_pause = NotImplemented
6282 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6285 class LUInstanceActivateDisks(NoHooksLU):
6286 """Bring up an instance's disks.
6291 def ExpandNames(self):
6292 self._ExpandAndLockInstance()
6293 self.needed_locks[locking.LEVEL_NODE] = []
6294 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6296 def DeclareLocks(self, level):
6297 if level == locking.LEVEL_NODE:
6298 self._LockInstancesNodes()
6300 def CheckPrereq(self):
6301 """Check prerequisites.
6303 This checks that the instance is in the cluster.
6306 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6307 assert self.instance is not None, \
6308 "Cannot retrieve locked instance %s" % self.op.instance_name
6309 _CheckNodeOnline(self, self.instance.primary_node)
6311 def Exec(self, feedback_fn):
6312 """Activate the disks.
6315 disks_ok, disks_info = \
6316 _AssembleInstanceDisks(self, self.instance,
6317 ignore_size=self.op.ignore_size)
6319 raise errors.OpExecError("Cannot activate block devices")
6324 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6326 """Prepare the block devices for an instance.
6328 This sets up the block devices on all nodes.
6330 @type lu: L{LogicalUnit}
6331 @param lu: the logical unit on whose behalf we execute
6332 @type instance: L{objects.Instance}
6333 @param instance: the instance for whose disks we assemble
6334 @type disks: list of L{objects.Disk} or None
6335 @param disks: which disks to assemble (or all, if None)
6336 @type ignore_secondaries: boolean
6337 @param ignore_secondaries: if true, errors on secondary nodes
6338 won't result in an error return from the function
6339 @type ignore_size: boolean
6340 @param ignore_size: if true, the current known size of the disk
6341 will not be used during the disk activation, useful for cases
6342 when the size is wrong
6343 @return: False if the operation failed, otherwise a list of
6344 (host, instance_visible_name, node_visible_name)
6345 with the mapping from node devices to instance devices
6350 iname = instance.name
6351 disks = _ExpandCheckDisks(instance, disks)
6353 # With the two passes mechanism we try to reduce the window of
6354 # opportunity for the race condition of switching DRBD to primary
6355 # before handshaking occured, but we do not eliminate it
6357 # The proper fix would be to wait (with some limits) until the
6358 # connection has been made and drbd transitions from WFConnection
6359 # into any other network-connected state (Connected, SyncTarget,
6362 # 1st pass, assemble on all nodes in secondary mode
6363 for idx, inst_disk in enumerate(disks):
6364 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6366 node_disk = node_disk.Copy()
6367 node_disk.UnsetSize()
6368 lu.cfg.SetDiskID(node_disk, node)
6369 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6371 msg = result.fail_msg
6373 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6374 " (is_primary=False, pass=1): %s",
6375 inst_disk.iv_name, node, msg)
6376 if not ignore_secondaries:
6379 # FIXME: race condition on drbd migration to primary
6381 # 2nd pass, do only the primary node
6382 for idx, inst_disk in enumerate(disks):
6385 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6386 if node != instance.primary_node:
6389 node_disk = node_disk.Copy()
6390 node_disk.UnsetSize()
6391 lu.cfg.SetDiskID(node_disk, node)
6392 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6394 msg = result.fail_msg
6396 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6397 " (is_primary=True, pass=2): %s",
6398 inst_disk.iv_name, node, msg)
6401 dev_path = result.payload
6403 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6405 # leave the disks configured for the primary node
6406 # this is a workaround that would be fixed better by
6407 # improving the logical/physical id handling
6409 lu.cfg.SetDiskID(disk, instance.primary_node)
6411 return disks_ok, device_info
6414 def _StartInstanceDisks(lu, instance, force):
6415 """Start the disks of an instance.
6418 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6419 ignore_secondaries=force)
6421 _ShutdownInstanceDisks(lu, instance)
6422 if force is not None and not force:
6423 lu.proc.LogWarning("", hint="If the message above refers to a"
6425 " you can retry the operation using '--force'.")
6426 raise errors.OpExecError("Disk consistency error")
6429 class LUInstanceDeactivateDisks(NoHooksLU):
6430 """Shutdown an instance's disks.
6435 def ExpandNames(self):
6436 self._ExpandAndLockInstance()
6437 self.needed_locks[locking.LEVEL_NODE] = []
6438 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6440 def DeclareLocks(self, level):
6441 if level == locking.LEVEL_NODE:
6442 self._LockInstancesNodes()
6444 def CheckPrereq(self):
6445 """Check prerequisites.
6447 This checks that the instance is in the cluster.
6450 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6451 assert self.instance is not None, \
6452 "Cannot retrieve locked instance %s" % self.op.instance_name
6454 def Exec(self, feedback_fn):
6455 """Deactivate the disks
6458 instance = self.instance
6460 _ShutdownInstanceDisks(self, instance)
6462 _SafeShutdownInstanceDisks(self, instance)
6465 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6466 """Shutdown block devices of an instance.
6468 This function checks if an instance is running, before calling
6469 _ShutdownInstanceDisks.
6472 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6473 _ShutdownInstanceDisks(lu, instance, disks=disks)
6476 def _ExpandCheckDisks(instance, disks):
6477 """Return the instance disks selected by the disks list
6479 @type disks: list of L{objects.Disk} or None
6480 @param disks: selected disks
6481 @rtype: list of L{objects.Disk}
6482 @return: selected instance disks to act on
6486 return instance.disks
6488 if not set(disks).issubset(instance.disks):
6489 raise errors.ProgrammerError("Can only act on disks belonging to the"
6494 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6495 """Shutdown block devices of an instance.
6497 This does the shutdown on all nodes of the instance.
6499 If the ignore_primary is false, errors on the primary node are
6504 disks = _ExpandCheckDisks(instance, disks)
6507 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6508 lu.cfg.SetDiskID(top_disk, node)
6509 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6510 msg = result.fail_msg
6512 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6513 disk.iv_name, node, msg)
6514 if ((node == instance.primary_node and not ignore_primary) or
6515 (node != instance.primary_node and not result.offline)):
6520 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6521 """Checks if a node has enough free memory.
6523 This function check if a given node has the needed amount of free
6524 memory. In case the node has less memory or we cannot get the
6525 information from the node, this function raise an OpPrereqError
6528 @type lu: C{LogicalUnit}
6529 @param lu: a logical unit from which we get configuration data
6531 @param node: the node to check
6532 @type reason: C{str}
6533 @param reason: string to use in the error message
6534 @type requested: C{int}
6535 @param requested: the amount of memory in MiB to check for
6536 @type hypervisor_name: C{str}
6537 @param hypervisor_name: the hypervisor to ask for memory stats
6539 @return: node current free memory
6540 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6541 we cannot check the node
6544 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6545 nodeinfo[node].Raise("Can't get data from node %s" % node,
6546 prereq=True, ecode=errors.ECODE_ENVIRON)
6547 (_, _, (hv_info, )) = nodeinfo[node].payload
6549 free_mem = hv_info.get("memory_free", None)
6550 if not isinstance(free_mem, int):
6551 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6552 " was '%s'" % (node, free_mem),
6553 errors.ECODE_ENVIRON)
6554 if requested > free_mem:
6555 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6556 " needed %s MiB, available %s MiB" %
6557 (node, reason, requested, free_mem),
6562 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6563 """Checks if nodes have enough free disk space in the all VGs.
6565 This function check if all given nodes have the needed amount of
6566 free disk. In case any node has less disk or we cannot get the
6567 information from the node, this function raise an OpPrereqError
6570 @type lu: C{LogicalUnit}
6571 @param lu: a logical unit from which we get configuration data
6572 @type nodenames: C{list}
6573 @param nodenames: the list of node names to check
6574 @type req_sizes: C{dict}
6575 @param req_sizes: the hash of vg and corresponding amount of disk in
6577 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6578 or we cannot check the node
6581 for vg, req_size in req_sizes.items():
6582 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6585 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6586 """Checks if nodes have enough free disk space in the specified VG.
6588 This function check if all given nodes have the needed amount of
6589 free disk. In case any node has less disk or we cannot get the
6590 information from the node, this function raise an OpPrereqError
6593 @type lu: C{LogicalUnit}
6594 @param lu: a logical unit from which we get configuration data
6595 @type nodenames: C{list}
6596 @param nodenames: the list of node names to check
6598 @param vg: the volume group to check
6599 @type requested: C{int}
6600 @param requested: the amount of disk in MiB to check for
6601 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6602 or we cannot check the node
6605 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6606 for node in nodenames:
6607 info = nodeinfo[node]
6608 info.Raise("Cannot get current information from node %s" % node,
6609 prereq=True, ecode=errors.ECODE_ENVIRON)
6610 (_, (vg_info, ), _) = info.payload
6611 vg_free = vg_info.get("vg_free", None)
6612 if not isinstance(vg_free, int):
6613 raise errors.OpPrereqError("Can't compute free disk space on node"
6614 " %s for vg %s, result was '%s'" %
6615 (node, vg, vg_free), errors.ECODE_ENVIRON)
6616 if requested > vg_free:
6617 raise errors.OpPrereqError("Not enough disk space on target node %s"
6618 " vg %s: required %d MiB, available %d MiB" %
6619 (node, vg, requested, vg_free),
6623 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6624 """Checks if nodes have enough physical CPUs
6626 This function checks if all given nodes have the needed number of
6627 physical CPUs. In case any node has less CPUs or we cannot get the
6628 information from the node, this function raises an OpPrereqError
6631 @type lu: C{LogicalUnit}
6632 @param lu: a logical unit from which we get configuration data
6633 @type nodenames: C{list}
6634 @param nodenames: the list of node names to check
6635 @type requested: C{int}
6636 @param requested: the minimum acceptable number of physical CPUs
6637 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6638 or we cannot check the node
6641 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6642 for node in nodenames:
6643 info = nodeinfo[node]
6644 info.Raise("Cannot get current information from node %s" % node,
6645 prereq=True, ecode=errors.ECODE_ENVIRON)
6646 (_, _, (hv_info, )) = info.payload
6647 num_cpus = hv_info.get("cpu_total", None)
6648 if not isinstance(num_cpus, int):
6649 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6650 " on node %s, result was '%s'" %
6651 (node, num_cpus), errors.ECODE_ENVIRON)
6652 if requested > num_cpus:
6653 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6654 "required" % (node, num_cpus, requested),
6658 class LUInstanceStartup(LogicalUnit):
6659 """Starts an instance.
6662 HPATH = "instance-start"
6663 HTYPE = constants.HTYPE_INSTANCE
6666 def CheckArguments(self):
6668 if self.op.beparams:
6669 # fill the beparams dict
6670 objects.UpgradeBeParams(self.op.beparams)
6671 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6673 def ExpandNames(self):
6674 self._ExpandAndLockInstance()
6675 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6677 def DeclareLocks(self, level):
6678 if level == locking.LEVEL_NODE_RES:
6679 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6681 def BuildHooksEnv(self):
6684 This runs on master, primary and secondary nodes of the instance.
6688 "FORCE": self.op.force,
6691 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6695 def BuildHooksNodes(self):
6696 """Build hooks nodes.
6699 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6702 def CheckPrereq(self):
6703 """Check prerequisites.
6705 This checks that the instance is in the cluster.
6708 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6709 assert self.instance is not None, \
6710 "Cannot retrieve locked instance %s" % self.op.instance_name
6713 if self.op.hvparams:
6714 # check hypervisor parameter syntax (locally)
6715 cluster = self.cfg.GetClusterInfo()
6716 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6717 filled_hvp = cluster.FillHV(instance)
6718 filled_hvp.update(self.op.hvparams)
6719 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6720 hv_type.CheckParameterSyntax(filled_hvp)
6721 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6723 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6725 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6727 if self.primary_offline and self.op.ignore_offline_nodes:
6728 self.proc.LogWarning("Ignoring offline primary node")
6730 if self.op.hvparams or self.op.beparams:
6731 self.proc.LogWarning("Overridden parameters are ignored")
6733 _CheckNodeOnline(self, instance.primary_node)
6735 bep = self.cfg.GetClusterInfo().FillBE(instance)
6736 bep.update(self.op.beparams)
6738 # check bridges existence
6739 _CheckInstanceBridgesExist(self, instance)
6741 remote_info = self.rpc.call_instance_info(instance.primary_node,
6743 instance.hypervisor)
6744 remote_info.Raise("Error checking node %s" % instance.primary_node,
6745 prereq=True, ecode=errors.ECODE_ENVIRON)
6746 if not remote_info.payload: # not running already
6747 _CheckNodeFreeMemory(self, instance.primary_node,
6748 "starting instance %s" % instance.name,
6749 bep[constants.BE_MINMEM], instance.hypervisor)
6751 def Exec(self, feedback_fn):
6752 """Start the instance.
6755 instance = self.instance
6756 force = self.op.force
6758 if not self.op.no_remember:
6759 self.cfg.MarkInstanceUp(instance.name)
6761 if self.primary_offline:
6762 assert self.op.ignore_offline_nodes
6763 self.proc.LogInfo("Primary node offline, marked instance as started")
6765 node_current = instance.primary_node
6767 _StartInstanceDisks(self, instance, force)
6770 self.rpc.call_instance_start(node_current,
6771 (instance, self.op.hvparams,
6773 self.op.startup_paused)
6774 msg = result.fail_msg
6776 _ShutdownInstanceDisks(self, instance)
6777 raise errors.OpExecError("Could not start instance: %s" % msg)
6780 class LUInstanceReboot(LogicalUnit):
6781 """Reboot an instance.
6784 HPATH = "instance-reboot"
6785 HTYPE = constants.HTYPE_INSTANCE
6788 def ExpandNames(self):
6789 self._ExpandAndLockInstance()
6791 def BuildHooksEnv(self):
6794 This runs on master, primary and secondary nodes of the instance.
6798 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6799 "REBOOT_TYPE": self.op.reboot_type,
6800 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6803 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6807 def BuildHooksNodes(self):
6808 """Build hooks nodes.
6811 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6814 def CheckPrereq(self):
6815 """Check prerequisites.
6817 This checks that the instance is in the cluster.
6820 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6821 assert self.instance is not None, \
6822 "Cannot retrieve locked instance %s" % self.op.instance_name
6823 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6824 _CheckNodeOnline(self, instance.primary_node)
6826 # check bridges existence
6827 _CheckInstanceBridgesExist(self, instance)
6829 def Exec(self, feedback_fn):
6830 """Reboot the instance.
6833 instance = self.instance
6834 ignore_secondaries = self.op.ignore_secondaries
6835 reboot_type = self.op.reboot_type
6837 remote_info = self.rpc.call_instance_info(instance.primary_node,
6839 instance.hypervisor)
6840 remote_info.Raise("Error checking node %s" % instance.primary_node)
6841 instance_running = bool(remote_info.payload)
6843 node_current = instance.primary_node
6845 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6846 constants.INSTANCE_REBOOT_HARD]:
6847 for disk in instance.disks:
6848 self.cfg.SetDiskID(disk, node_current)
6849 result = self.rpc.call_instance_reboot(node_current, instance,
6851 self.op.shutdown_timeout)
6852 result.Raise("Could not reboot instance")
6854 if instance_running:
6855 result = self.rpc.call_instance_shutdown(node_current, instance,
6856 self.op.shutdown_timeout)
6857 result.Raise("Could not shutdown instance for full reboot")
6858 _ShutdownInstanceDisks(self, instance)
6860 self.LogInfo("Instance %s was already stopped, starting now",
6862 _StartInstanceDisks(self, instance, ignore_secondaries)
6863 result = self.rpc.call_instance_start(node_current,
6864 (instance, None, None), False)
6865 msg = result.fail_msg
6867 _ShutdownInstanceDisks(self, instance)
6868 raise errors.OpExecError("Could not start instance for"
6869 " full reboot: %s" % msg)
6871 self.cfg.MarkInstanceUp(instance.name)
6874 class LUInstanceShutdown(LogicalUnit):
6875 """Shutdown an instance.
6878 HPATH = "instance-stop"
6879 HTYPE = constants.HTYPE_INSTANCE
6882 def ExpandNames(self):
6883 self._ExpandAndLockInstance()
6885 def BuildHooksEnv(self):
6888 This runs on master, primary and secondary nodes of the instance.
6891 env = _BuildInstanceHookEnvByObject(self, self.instance)
6892 env["TIMEOUT"] = self.op.timeout
6895 def BuildHooksNodes(self):
6896 """Build hooks nodes.
6899 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6902 def CheckPrereq(self):
6903 """Check prerequisites.
6905 This checks that the instance is in the cluster.
6908 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6909 assert self.instance is not None, \
6910 "Cannot retrieve locked instance %s" % self.op.instance_name
6912 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6914 self.primary_offline = \
6915 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6917 if self.primary_offline and self.op.ignore_offline_nodes:
6918 self.proc.LogWarning("Ignoring offline primary node")
6920 _CheckNodeOnline(self, self.instance.primary_node)
6922 def Exec(self, feedback_fn):
6923 """Shutdown the instance.
6926 instance = self.instance
6927 node_current = instance.primary_node
6928 timeout = self.op.timeout
6930 if not self.op.no_remember:
6931 self.cfg.MarkInstanceDown(instance.name)
6933 if self.primary_offline:
6934 assert self.op.ignore_offline_nodes
6935 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6937 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6938 msg = result.fail_msg
6940 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6942 _ShutdownInstanceDisks(self, instance)
6945 class LUInstanceReinstall(LogicalUnit):
6946 """Reinstall an instance.
6949 HPATH = "instance-reinstall"
6950 HTYPE = constants.HTYPE_INSTANCE
6953 def ExpandNames(self):
6954 self._ExpandAndLockInstance()
6956 def BuildHooksEnv(self):
6959 This runs on master, primary and secondary nodes of the instance.
6962 return _BuildInstanceHookEnvByObject(self, self.instance)
6964 def BuildHooksNodes(self):
6965 """Build hooks nodes.
6968 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6971 def CheckPrereq(self):
6972 """Check prerequisites.
6974 This checks that the instance is in the cluster and is not running.
6977 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6978 assert instance is not None, \
6979 "Cannot retrieve locked instance %s" % self.op.instance_name
6980 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6981 " offline, cannot reinstall")
6982 for node in instance.secondary_nodes:
6983 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6984 " cannot reinstall")
6986 if instance.disk_template == constants.DT_DISKLESS:
6987 raise errors.OpPrereqError("Instance '%s' has no disks" %
6988 self.op.instance_name,
6990 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6992 if self.op.os_type is not None:
6994 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6995 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6996 instance_os = self.op.os_type
6998 instance_os = instance.os
7000 nodelist = list(instance.all_nodes)
7002 if self.op.osparams:
7003 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7004 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7005 self.os_inst = i_osdict # the new dict (without defaults)
7009 self.instance = instance
7011 def Exec(self, feedback_fn):
7012 """Reinstall the instance.
7015 inst = self.instance
7017 if self.op.os_type is not None:
7018 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7019 inst.os = self.op.os_type
7020 # Write to configuration
7021 self.cfg.Update(inst, feedback_fn)
7023 _StartInstanceDisks(self, inst, None)
7025 feedback_fn("Running the instance OS create scripts...")
7026 # FIXME: pass debug option from opcode to backend
7027 result = self.rpc.call_instance_os_add(inst.primary_node,
7028 (inst, self.os_inst), True,
7029 self.op.debug_level)
7030 result.Raise("Could not install OS for instance %s on node %s" %
7031 (inst.name, inst.primary_node))
7033 _ShutdownInstanceDisks(self, inst)
7036 class LUInstanceRecreateDisks(LogicalUnit):
7037 """Recreate an instance's missing disks.
7040 HPATH = "instance-recreate-disks"
7041 HTYPE = constants.HTYPE_INSTANCE
7044 _MODIFYABLE = frozenset([
7045 constants.IDISK_SIZE,
7046 constants.IDISK_MODE,
7049 # New or changed disk parameters may have different semantics
7050 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7051 constants.IDISK_ADOPT,
7053 # TODO: Implement support changing VG while recreating
7055 constants.IDISK_METAVG,
7058 def CheckArguments(self):
7059 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7060 # Normalize and convert deprecated list of disk indices
7061 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7063 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7065 raise errors.OpPrereqError("Some disks have been specified more than"
7066 " once: %s" % utils.CommaJoin(duplicates),
7069 for (idx, params) in self.op.disks:
7070 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7071 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7073 raise errors.OpPrereqError("Parameters for disk %s try to change"
7074 " unmodifyable parameter(s): %s" %
7075 (idx, utils.CommaJoin(unsupported)),
7078 def ExpandNames(self):
7079 self._ExpandAndLockInstance()
7080 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7082 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7083 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7085 self.needed_locks[locking.LEVEL_NODE] = []
7086 self.needed_locks[locking.LEVEL_NODE_RES] = []
7088 def DeclareLocks(self, level):
7089 if level == locking.LEVEL_NODE:
7090 # if we replace the nodes, we only need to lock the old primary,
7091 # otherwise we need to lock all nodes for disk re-creation
7092 primary_only = bool(self.op.nodes)
7093 self._LockInstancesNodes(primary_only=primary_only)
7094 elif level == locking.LEVEL_NODE_RES:
7096 self.needed_locks[locking.LEVEL_NODE_RES] = \
7097 self.needed_locks[locking.LEVEL_NODE][:]
7099 def BuildHooksEnv(self):
7102 This runs on master, primary and secondary nodes of the instance.
7105 return _BuildInstanceHookEnvByObject(self, self.instance)
7107 def BuildHooksNodes(self):
7108 """Build hooks nodes.
7111 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7114 def CheckPrereq(self):
7115 """Check prerequisites.
7117 This checks that the instance is in the cluster and is not running.
7120 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7121 assert instance is not None, \
7122 "Cannot retrieve locked instance %s" % self.op.instance_name
7124 if len(self.op.nodes) != len(instance.all_nodes):
7125 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7126 " %d replacement nodes were specified" %
7127 (instance.name, len(instance.all_nodes),
7128 len(self.op.nodes)),
7130 assert instance.disk_template != constants.DT_DRBD8 or \
7131 len(self.op.nodes) == 2
7132 assert instance.disk_template != constants.DT_PLAIN or \
7133 len(self.op.nodes) == 1
7134 primary_node = self.op.nodes[0]
7136 primary_node = instance.primary_node
7137 _CheckNodeOnline(self, primary_node)
7139 if instance.disk_template == constants.DT_DISKLESS:
7140 raise errors.OpPrereqError("Instance '%s' has no disks" %
7141 self.op.instance_name, errors.ECODE_INVAL)
7143 # if we replace nodes *and* the old primary is offline, we don't
7145 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7146 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7147 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7148 if not (self.op.nodes and old_pnode.offline):
7149 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7150 msg="cannot recreate disks")
7153 self.disks = dict(self.op.disks)
7155 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7157 maxidx = max(self.disks.keys())
7158 if maxidx >= len(instance.disks):
7159 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7162 if (self.op.nodes and
7163 sorted(self.disks.keys()) != range(len(instance.disks))):
7164 raise errors.OpPrereqError("Can't recreate disks partially and"
7165 " change the nodes at the same time",
7168 self.instance = instance
7170 def Exec(self, feedback_fn):
7171 """Recreate the disks.
7174 instance = self.instance
7176 assert (self.owned_locks(locking.LEVEL_NODE) ==
7177 self.owned_locks(locking.LEVEL_NODE_RES))
7180 mods = [] # keeps track of needed changes
7182 for idx, disk in enumerate(instance.disks):
7184 changes = self.disks[idx]
7186 # Disk should not be recreated
7190 # update secondaries for disks, if needed
7191 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7192 # need to update the nodes and minors
7193 assert len(self.op.nodes) == 2
7194 assert len(disk.logical_id) == 6 # otherwise disk internals
7196 (_, _, old_port, _, _, old_secret) = disk.logical_id
7197 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7198 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7199 new_minors[0], new_minors[1], old_secret)
7200 assert len(disk.logical_id) == len(new_id)
7204 mods.append((idx, new_id, changes))
7206 # now that we have passed all asserts above, we can apply the mods
7207 # in a single run (to avoid partial changes)
7208 for idx, new_id, changes in mods:
7209 disk = instance.disks[idx]
7210 if new_id is not None:
7211 assert disk.dev_type == constants.LD_DRBD8
7212 disk.logical_id = new_id
7214 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7215 mode=changes.get(constants.IDISK_MODE, None))
7217 # change primary node, if needed
7219 instance.primary_node = self.op.nodes[0]
7220 self.LogWarning("Changing the instance's nodes, you will have to"
7221 " remove any disks left on the older nodes manually")
7224 self.cfg.Update(instance, feedback_fn)
7226 _CreateDisks(self, instance, to_skip=to_skip)
7229 class LUInstanceRename(LogicalUnit):
7230 """Rename an instance.
7233 HPATH = "instance-rename"
7234 HTYPE = constants.HTYPE_INSTANCE
7236 def CheckArguments(self):
7240 if self.op.ip_check and not self.op.name_check:
7241 # TODO: make the ip check more flexible and not depend on the name check
7242 raise errors.OpPrereqError("IP address check requires a name check",
7245 def BuildHooksEnv(self):
7248 This runs on master, primary and secondary nodes of the instance.
7251 env = _BuildInstanceHookEnvByObject(self, self.instance)
7252 env["INSTANCE_NEW_NAME"] = self.op.new_name
7255 def BuildHooksNodes(self):
7256 """Build hooks nodes.
7259 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7262 def CheckPrereq(self):
7263 """Check prerequisites.
7265 This checks that the instance is in the cluster and is not running.
7268 self.op.instance_name = _ExpandInstanceName(self.cfg,
7269 self.op.instance_name)
7270 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7271 assert instance is not None
7272 _CheckNodeOnline(self, instance.primary_node)
7273 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7274 msg="cannot rename")
7275 self.instance = instance
7277 new_name = self.op.new_name
7278 if self.op.name_check:
7279 hostname = netutils.GetHostname(name=new_name)
7280 if hostname.name != new_name:
7281 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7283 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7284 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7285 " same as given hostname '%s'") %
7286 (hostname.name, self.op.new_name),
7288 new_name = self.op.new_name = hostname.name
7289 if (self.op.ip_check and
7290 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7291 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7292 (hostname.ip, new_name),
7293 errors.ECODE_NOTUNIQUE)
7295 instance_list = self.cfg.GetInstanceList()
7296 if new_name in instance_list and new_name != instance.name:
7297 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7298 new_name, errors.ECODE_EXISTS)
7300 def Exec(self, feedback_fn):
7301 """Rename the instance.
7304 inst = self.instance
7305 old_name = inst.name
7307 rename_file_storage = False
7308 if (inst.disk_template in constants.DTS_FILEBASED and
7309 self.op.new_name != inst.name):
7310 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7311 rename_file_storage = True
7313 self.cfg.RenameInstance(inst.name, self.op.new_name)
7314 # Change the instance lock. This is definitely safe while we hold the BGL.
7315 # Otherwise the new lock would have to be added in acquired mode.
7317 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7318 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7320 # re-read the instance from the configuration after rename
7321 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7323 if rename_file_storage:
7324 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7325 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7326 old_file_storage_dir,
7327 new_file_storage_dir)
7328 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7329 " (but the instance has been renamed in Ganeti)" %
7330 (inst.primary_node, old_file_storage_dir,
7331 new_file_storage_dir))
7333 _StartInstanceDisks(self, inst, None)
7335 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7336 old_name, self.op.debug_level)
7337 msg = result.fail_msg
7339 msg = ("Could not run OS rename script for instance %s on node %s"
7340 " (but the instance has been renamed in Ganeti): %s" %
7341 (inst.name, inst.primary_node, msg))
7342 self.proc.LogWarning(msg)
7344 _ShutdownInstanceDisks(self, inst)
7349 class LUInstanceRemove(LogicalUnit):
7350 """Remove an instance.
7353 HPATH = "instance-remove"
7354 HTYPE = constants.HTYPE_INSTANCE
7357 def ExpandNames(self):
7358 self._ExpandAndLockInstance()
7359 self.needed_locks[locking.LEVEL_NODE] = []
7360 self.needed_locks[locking.LEVEL_NODE_RES] = []
7361 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7363 def DeclareLocks(self, level):
7364 if level == locking.LEVEL_NODE:
7365 self._LockInstancesNodes()
7366 elif level == locking.LEVEL_NODE_RES:
7368 self.needed_locks[locking.LEVEL_NODE_RES] = \
7369 self.needed_locks[locking.LEVEL_NODE][:]
7371 def BuildHooksEnv(self):
7374 This runs on master, primary and secondary nodes of the instance.
7377 env = _BuildInstanceHookEnvByObject(self, self.instance)
7378 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7381 def BuildHooksNodes(self):
7382 """Build hooks nodes.
7385 nl = [self.cfg.GetMasterNode()]
7386 nl_post = list(self.instance.all_nodes) + nl
7387 return (nl, nl_post)
7389 def CheckPrereq(self):
7390 """Check prerequisites.
7392 This checks that the instance is in the cluster.
7395 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7396 assert self.instance is not None, \
7397 "Cannot retrieve locked instance %s" % self.op.instance_name
7399 def Exec(self, feedback_fn):
7400 """Remove the instance.
7403 instance = self.instance
7404 logging.info("Shutting down instance %s on node %s",
7405 instance.name, instance.primary_node)
7407 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7408 self.op.shutdown_timeout)
7409 msg = result.fail_msg
7411 if self.op.ignore_failures:
7412 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7414 raise errors.OpExecError("Could not shutdown instance %s on"
7416 (instance.name, instance.primary_node, msg))
7418 assert (self.owned_locks(locking.LEVEL_NODE) ==
7419 self.owned_locks(locking.LEVEL_NODE_RES))
7420 assert not (set(instance.all_nodes) -
7421 self.owned_locks(locking.LEVEL_NODE)), \
7422 "Not owning correct locks"
7424 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7427 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7428 """Utility function to remove an instance.
7431 logging.info("Removing block devices for instance %s", instance.name)
7433 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7434 if not ignore_failures:
7435 raise errors.OpExecError("Can't remove instance's disks")
7436 feedback_fn("Warning: can't remove instance's disks")
7438 logging.info("Removing instance %s out of cluster config", instance.name)
7440 lu.cfg.RemoveInstance(instance.name)
7442 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7443 "Instance lock removal conflict"
7445 # Remove lock for the instance
7446 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7449 class LUInstanceQuery(NoHooksLU):
7450 """Logical unit for querying instances.
7453 # pylint: disable=W0142
7456 def CheckArguments(self):
7457 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7458 self.op.output_fields, self.op.use_locking)
7460 def ExpandNames(self):
7461 self.iq.ExpandNames(self)
7463 def DeclareLocks(self, level):
7464 self.iq.DeclareLocks(self, level)
7466 def Exec(self, feedback_fn):
7467 return self.iq.OldStyleQuery(self)
7470 class LUInstanceFailover(LogicalUnit):
7471 """Failover an instance.
7474 HPATH = "instance-failover"
7475 HTYPE = constants.HTYPE_INSTANCE
7478 def CheckArguments(self):
7479 """Check the arguments.
7482 self.iallocator = getattr(self.op, "iallocator", None)
7483 self.target_node = getattr(self.op, "target_node", None)
7485 def ExpandNames(self):
7486 self._ExpandAndLockInstance()
7488 if self.op.target_node is not None:
7489 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7491 self.needed_locks[locking.LEVEL_NODE] = []
7492 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7494 self.needed_locks[locking.LEVEL_NODE_RES] = []
7495 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7497 ignore_consistency = self.op.ignore_consistency
7498 shutdown_timeout = self.op.shutdown_timeout
7499 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7502 ignore_consistency=ignore_consistency,
7503 shutdown_timeout=shutdown_timeout,
7504 ignore_ipolicy=self.op.ignore_ipolicy)
7505 self.tasklets = [self._migrater]
7507 def DeclareLocks(self, level):
7508 if level == locking.LEVEL_NODE:
7509 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7510 if instance.disk_template in constants.DTS_EXT_MIRROR:
7511 if self.op.target_node is None:
7512 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7514 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7515 self.op.target_node]
7516 del self.recalculate_locks[locking.LEVEL_NODE]
7518 self._LockInstancesNodes()
7519 elif level == locking.LEVEL_NODE_RES:
7521 self.needed_locks[locking.LEVEL_NODE_RES] = \
7522 self.needed_locks[locking.LEVEL_NODE][:]
7524 def BuildHooksEnv(self):
7527 This runs on master, primary and secondary nodes of the instance.
7530 instance = self._migrater.instance
7531 source_node = instance.primary_node
7532 target_node = self.op.target_node
7534 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7535 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7536 "OLD_PRIMARY": source_node,
7537 "NEW_PRIMARY": target_node,
7540 if instance.disk_template in constants.DTS_INT_MIRROR:
7541 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7542 env["NEW_SECONDARY"] = source_node
7544 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7546 env.update(_BuildInstanceHookEnvByObject(self, instance))
7550 def BuildHooksNodes(self):
7551 """Build hooks nodes.
7554 instance = self._migrater.instance
7555 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7556 return (nl, nl + [instance.primary_node])
7559 class LUInstanceMigrate(LogicalUnit):
7560 """Migrate an instance.
7562 This is migration without shutting down, compared to the failover,
7563 which is done with shutdown.
7566 HPATH = "instance-migrate"
7567 HTYPE = constants.HTYPE_INSTANCE
7570 def ExpandNames(self):
7571 self._ExpandAndLockInstance()
7573 if self.op.target_node is not None:
7574 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7576 self.needed_locks[locking.LEVEL_NODE] = []
7577 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7579 self.needed_locks[locking.LEVEL_NODE] = []
7580 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7583 TLMigrateInstance(self, self.op.instance_name,
7584 cleanup=self.op.cleanup,
7586 fallback=self.op.allow_failover,
7587 allow_runtime_changes=self.op.allow_runtime_changes,
7588 ignore_ipolicy=self.op.ignore_ipolicy)
7589 self.tasklets = [self._migrater]
7591 def DeclareLocks(self, level):
7592 if level == locking.LEVEL_NODE:
7593 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7594 if instance.disk_template in constants.DTS_EXT_MIRROR:
7595 if self.op.target_node is None:
7596 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7598 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7599 self.op.target_node]
7600 del self.recalculate_locks[locking.LEVEL_NODE]
7602 self._LockInstancesNodes()
7603 elif level == locking.LEVEL_NODE_RES:
7605 self.needed_locks[locking.LEVEL_NODE_RES] = \
7606 self.needed_locks[locking.LEVEL_NODE][:]
7608 def BuildHooksEnv(self):
7611 This runs on master, primary and secondary nodes of the instance.
7614 instance = self._migrater.instance
7615 source_node = instance.primary_node
7616 target_node = self.op.target_node
7617 env = _BuildInstanceHookEnvByObject(self, instance)
7619 "MIGRATE_LIVE": self._migrater.live,
7620 "MIGRATE_CLEANUP": self.op.cleanup,
7621 "OLD_PRIMARY": source_node,
7622 "NEW_PRIMARY": target_node,
7623 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7626 if instance.disk_template in constants.DTS_INT_MIRROR:
7627 env["OLD_SECONDARY"] = target_node
7628 env["NEW_SECONDARY"] = source_node
7630 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7634 def BuildHooksNodes(self):
7635 """Build hooks nodes.
7638 instance = self._migrater.instance
7639 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7640 return (nl, nl + [instance.primary_node])
7643 class LUInstanceMove(LogicalUnit):
7644 """Move an instance by data-copying.
7647 HPATH = "instance-move"
7648 HTYPE = constants.HTYPE_INSTANCE
7651 def ExpandNames(self):
7652 self._ExpandAndLockInstance()
7653 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7654 self.op.target_node = target_node
7655 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7656 self.needed_locks[locking.LEVEL_NODE_RES] = []
7657 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7659 def DeclareLocks(self, level):
7660 if level == locking.LEVEL_NODE:
7661 self._LockInstancesNodes(primary_only=True)
7662 elif level == locking.LEVEL_NODE_RES:
7664 self.needed_locks[locking.LEVEL_NODE_RES] = \
7665 self.needed_locks[locking.LEVEL_NODE][:]
7667 def BuildHooksEnv(self):
7670 This runs on master, primary and secondary nodes of the instance.
7674 "TARGET_NODE": self.op.target_node,
7675 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7677 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7680 def BuildHooksNodes(self):
7681 """Build hooks nodes.
7685 self.cfg.GetMasterNode(),
7686 self.instance.primary_node,
7687 self.op.target_node,
7691 def CheckPrereq(self):
7692 """Check prerequisites.
7694 This checks that the instance is in the cluster.
7697 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7698 assert self.instance is not None, \
7699 "Cannot retrieve locked instance %s" % self.op.instance_name
7701 node = self.cfg.GetNodeInfo(self.op.target_node)
7702 assert node is not None, \
7703 "Cannot retrieve locked node %s" % self.op.target_node
7705 self.target_node = target_node = node.name
7707 if target_node == instance.primary_node:
7708 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7709 (instance.name, target_node),
7712 bep = self.cfg.GetClusterInfo().FillBE(instance)
7714 for idx, dsk in enumerate(instance.disks):
7715 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7716 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7717 " cannot copy" % idx, errors.ECODE_STATE)
7719 _CheckNodeOnline(self, target_node)
7720 _CheckNodeNotDrained(self, target_node)
7721 _CheckNodeVmCapable(self, target_node)
7722 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7723 self.cfg.GetNodeGroup(node.group))
7724 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7725 ignore=self.op.ignore_ipolicy)
7727 if instance.admin_state == constants.ADMINST_UP:
7728 # check memory requirements on the secondary node
7729 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7730 instance.name, bep[constants.BE_MAXMEM],
7731 instance.hypervisor)
7733 self.LogInfo("Not checking memory on the secondary node as"
7734 " instance will not be started")
7736 # check bridge existance
7737 _CheckInstanceBridgesExist(self, instance, node=target_node)
7739 def Exec(self, feedback_fn):
7740 """Move an instance.
7742 The move is done by shutting it down on its present node, copying
7743 the data over (slow) and starting it on the new node.
7746 instance = self.instance
7748 source_node = instance.primary_node
7749 target_node = self.target_node
7751 self.LogInfo("Shutting down instance %s on source node %s",
7752 instance.name, source_node)
7754 assert (self.owned_locks(locking.LEVEL_NODE) ==
7755 self.owned_locks(locking.LEVEL_NODE_RES))
7757 result = self.rpc.call_instance_shutdown(source_node, instance,
7758 self.op.shutdown_timeout)
7759 msg = result.fail_msg
7761 if self.op.ignore_consistency:
7762 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7763 " Proceeding anyway. Please make sure node"
7764 " %s is down. Error details: %s",
7765 instance.name, source_node, source_node, msg)
7767 raise errors.OpExecError("Could not shutdown instance %s on"
7769 (instance.name, source_node, msg))
7771 # create the target disks
7773 _CreateDisks(self, instance, target_node=target_node)
7774 except errors.OpExecError:
7775 self.LogWarning("Device creation failed, reverting...")
7777 _RemoveDisks(self, instance, target_node=target_node)
7779 self.cfg.ReleaseDRBDMinors(instance.name)
7782 cluster_name = self.cfg.GetClusterInfo().cluster_name
7785 # activate, get path, copy the data over
7786 for idx, disk in enumerate(instance.disks):
7787 self.LogInfo("Copying data for disk %d", idx)
7788 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7789 instance.name, True, idx)
7791 self.LogWarning("Can't assemble newly created disk %d: %s",
7792 idx, result.fail_msg)
7793 errs.append(result.fail_msg)
7795 dev_path = result.payload
7796 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7797 target_node, dev_path,
7800 self.LogWarning("Can't copy data over for disk %d: %s",
7801 idx, result.fail_msg)
7802 errs.append(result.fail_msg)
7806 self.LogWarning("Some disks failed to copy, aborting")
7808 _RemoveDisks(self, instance, target_node=target_node)
7810 self.cfg.ReleaseDRBDMinors(instance.name)
7811 raise errors.OpExecError("Errors during disk copy: %s" %
7814 instance.primary_node = target_node
7815 self.cfg.Update(instance, feedback_fn)
7817 self.LogInfo("Removing the disks on the original node")
7818 _RemoveDisks(self, instance, target_node=source_node)
7820 # Only start the instance if it's marked as up
7821 if instance.admin_state == constants.ADMINST_UP:
7822 self.LogInfo("Starting instance %s on node %s",
7823 instance.name, target_node)
7825 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7826 ignore_secondaries=True)
7828 _ShutdownInstanceDisks(self, instance)
7829 raise errors.OpExecError("Can't activate the instance's disks")
7831 result = self.rpc.call_instance_start(target_node,
7832 (instance, None, None), False)
7833 msg = result.fail_msg
7835 _ShutdownInstanceDisks(self, instance)
7836 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7837 (instance.name, target_node, msg))
7840 class LUNodeMigrate(LogicalUnit):
7841 """Migrate all instances from a node.
7844 HPATH = "node-migrate"
7845 HTYPE = constants.HTYPE_NODE
7848 def CheckArguments(self):
7851 def ExpandNames(self):
7852 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7854 self.share_locks = _ShareAll()
7855 self.needed_locks = {
7856 locking.LEVEL_NODE: [self.op.node_name],
7859 def BuildHooksEnv(self):
7862 This runs on the master, the primary and all the secondaries.
7866 "NODE_NAME": self.op.node_name,
7867 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7870 def BuildHooksNodes(self):
7871 """Build hooks nodes.
7874 nl = [self.cfg.GetMasterNode()]
7877 def CheckPrereq(self):
7880 def Exec(self, feedback_fn):
7881 # Prepare jobs for migration instances
7882 allow_runtime_changes = self.op.allow_runtime_changes
7884 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7887 iallocator=self.op.iallocator,
7888 target_node=self.op.target_node,
7889 allow_runtime_changes=allow_runtime_changes,
7890 ignore_ipolicy=self.op.ignore_ipolicy)]
7891 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7894 # TODO: Run iallocator in this opcode and pass correct placement options to
7895 # OpInstanceMigrate. Since other jobs can modify the cluster between
7896 # running the iallocator and the actual migration, a good consistency model
7897 # will have to be found.
7899 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7900 frozenset([self.op.node_name]))
7902 return ResultWithJobs(jobs)
7905 class TLMigrateInstance(Tasklet):
7906 """Tasklet class for instance migration.
7909 @ivar live: whether the migration will be done live or non-live;
7910 this variable is initalized only after CheckPrereq has run
7911 @type cleanup: boolean
7912 @ivar cleanup: Wheater we cleanup from a failed migration
7913 @type iallocator: string
7914 @ivar iallocator: The iallocator used to determine target_node
7915 @type target_node: string
7916 @ivar target_node: If given, the target_node to reallocate the instance to
7917 @type failover: boolean
7918 @ivar failover: Whether operation results in failover or migration
7919 @type fallback: boolean
7920 @ivar fallback: Whether fallback to failover is allowed if migration not
7922 @type ignore_consistency: boolean
7923 @ivar ignore_consistency: Wheter we should ignore consistency between source
7925 @type shutdown_timeout: int
7926 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7927 @type ignore_ipolicy: bool
7928 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7933 _MIGRATION_POLL_INTERVAL = 1 # seconds
7934 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7936 def __init__(self, lu, instance_name, cleanup=False,
7937 failover=False, fallback=False,
7938 ignore_consistency=False,
7939 allow_runtime_changes=True,
7940 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7941 ignore_ipolicy=False):
7942 """Initializes this class.
7945 Tasklet.__init__(self, lu)
7948 self.instance_name = instance_name
7949 self.cleanup = cleanup
7950 self.live = False # will be overridden later
7951 self.failover = failover
7952 self.fallback = fallback
7953 self.ignore_consistency = ignore_consistency
7954 self.shutdown_timeout = shutdown_timeout
7955 self.ignore_ipolicy = ignore_ipolicy
7956 self.allow_runtime_changes = allow_runtime_changes
7958 def CheckPrereq(self):
7959 """Check prerequisites.
7961 This checks that the instance is in the cluster.
7964 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7965 instance = self.cfg.GetInstanceInfo(instance_name)
7966 assert instance is not None
7967 self.instance = instance
7968 cluster = self.cfg.GetClusterInfo()
7970 if (not self.cleanup and
7971 not instance.admin_state == constants.ADMINST_UP and
7972 not self.failover and self.fallback):
7973 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7974 " switching to failover")
7975 self.failover = True
7977 if instance.disk_template not in constants.DTS_MIRRORED:
7982 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7983 " %s" % (instance.disk_template, text),
7986 if instance.disk_template in constants.DTS_EXT_MIRROR:
7987 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7989 if self.lu.op.iallocator:
7990 self._RunAllocator()
7992 # We set set self.target_node as it is required by
7994 self.target_node = self.lu.op.target_node
7996 # Check that the target node is correct in terms of instance policy
7997 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7998 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7999 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8000 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8001 ignore=self.ignore_ipolicy)
8003 # self.target_node is already populated, either directly or by the
8005 target_node = self.target_node
8006 if self.target_node == instance.primary_node:
8007 raise errors.OpPrereqError("Cannot migrate instance %s"
8008 " to its primary (%s)" %
8009 (instance.name, instance.primary_node))
8011 if len(self.lu.tasklets) == 1:
8012 # It is safe to release locks only when we're the only tasklet
8014 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8015 keep=[instance.primary_node, self.target_node])
8018 secondary_nodes = instance.secondary_nodes
8019 if not secondary_nodes:
8020 raise errors.ConfigurationError("No secondary node but using"
8021 " %s disk template" %
8022 instance.disk_template)
8023 target_node = secondary_nodes[0]
8024 if self.lu.op.iallocator or (self.lu.op.target_node and
8025 self.lu.op.target_node != target_node):
8027 text = "failed over"
8030 raise errors.OpPrereqError("Instances with disk template %s cannot"
8031 " be %s to arbitrary nodes"
8032 " (neither an iallocator nor a target"
8033 " node can be passed)" %
8034 (instance.disk_template, text),
8036 nodeinfo = self.cfg.GetNodeInfo(target_node)
8037 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8038 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8039 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8040 ignore=self.ignore_ipolicy)
8042 i_be = cluster.FillBE(instance)
8044 # check memory requirements on the secondary node
8045 if (not self.cleanup and
8046 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8047 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8048 "migrating instance %s" %
8050 i_be[constants.BE_MINMEM],
8051 instance.hypervisor)
8053 self.lu.LogInfo("Not checking memory on the secondary node as"
8054 " instance will not be started")
8056 # check if failover must be forced instead of migration
8057 if (not self.cleanup and not self.failover and
8058 i_be[constants.BE_ALWAYS_FAILOVER]):
8060 self.lu.LogInfo("Instance configured to always failover; fallback"
8062 self.failover = True
8064 raise errors.OpPrereqError("This instance has been configured to"
8065 " always failover, please allow failover",
8068 # check bridge existance
8069 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8071 if not self.cleanup:
8072 _CheckNodeNotDrained(self.lu, target_node)
8073 if not self.failover:
8074 result = self.rpc.call_instance_migratable(instance.primary_node,
8076 if result.fail_msg and self.fallback:
8077 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8079 self.failover = True
8081 result.Raise("Can't migrate, please use failover",
8082 prereq=True, ecode=errors.ECODE_STATE)
8084 assert not (self.failover and self.cleanup)
8086 if not self.failover:
8087 if self.lu.op.live is not None and self.lu.op.mode is not None:
8088 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8089 " parameters are accepted",
8091 if self.lu.op.live is not None:
8093 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8095 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8096 # reset the 'live' parameter to None so that repeated
8097 # invocations of CheckPrereq do not raise an exception
8098 self.lu.op.live = None
8099 elif self.lu.op.mode is None:
8100 # read the default value from the hypervisor
8101 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8102 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8104 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8106 # Failover is never live
8109 if not (self.failover or self.cleanup):
8110 remote_info = self.rpc.call_instance_info(instance.primary_node,
8112 instance.hypervisor)
8113 remote_info.Raise("Error checking instance on node %s" %
8114 instance.primary_node)
8115 instance_running = bool(remote_info.payload)
8116 if instance_running:
8117 self.current_mem = int(remote_info.payload["memory"])
8119 def _RunAllocator(self):
8120 """Run the allocator based on input opcode.
8123 # FIXME: add a self.ignore_ipolicy option
8124 ial = IAllocator(self.cfg, self.rpc,
8125 mode=constants.IALLOCATOR_MODE_RELOC,
8126 name=self.instance_name,
8127 relocate_from=[self.instance.primary_node],
8130 ial.Run(self.lu.op.iallocator)
8133 raise errors.OpPrereqError("Can't compute nodes using"
8134 " iallocator '%s': %s" %
8135 (self.lu.op.iallocator, ial.info),
8137 if len(ial.result) != ial.required_nodes:
8138 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8139 " of nodes (%s), required %s" %
8140 (self.lu.op.iallocator, len(ial.result),
8141 ial.required_nodes), errors.ECODE_FAULT)
8142 self.target_node = ial.result[0]
8143 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8144 self.instance_name, self.lu.op.iallocator,
8145 utils.CommaJoin(ial.result))
8147 def _WaitUntilSync(self):
8148 """Poll with custom rpc for disk sync.
8150 This uses our own step-based rpc call.
8153 self.feedback_fn("* wait until resync is done")
8157 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8159 (self.instance.disks,
8162 for node, nres in result.items():
8163 nres.Raise("Cannot resync disks on node %s" % node)
8164 node_done, node_percent = nres.payload
8165 all_done = all_done and node_done
8166 if node_percent is not None:
8167 min_percent = min(min_percent, node_percent)
8169 if min_percent < 100:
8170 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8173 def _EnsureSecondary(self, node):
8174 """Demote a node to secondary.
8177 self.feedback_fn("* switching node %s to secondary mode" % node)
8179 for dev in self.instance.disks:
8180 self.cfg.SetDiskID(dev, node)
8182 result = self.rpc.call_blockdev_close(node, self.instance.name,
8183 self.instance.disks)
8184 result.Raise("Cannot change disk to secondary on node %s" % node)
8186 def _GoStandalone(self):
8187 """Disconnect from the network.
8190 self.feedback_fn("* changing into standalone mode")
8191 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8192 self.instance.disks)
8193 for node, nres in result.items():
8194 nres.Raise("Cannot disconnect disks node %s" % node)
8196 def _GoReconnect(self, multimaster):
8197 """Reconnect to the network.
8203 msg = "single-master"
8204 self.feedback_fn("* changing disks into %s mode" % msg)
8205 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8206 (self.instance.disks, self.instance),
8207 self.instance.name, multimaster)
8208 for node, nres in result.items():
8209 nres.Raise("Cannot change disks config on node %s" % node)
8211 def _ExecCleanup(self):
8212 """Try to cleanup after a failed migration.
8214 The cleanup is done by:
8215 - check that the instance is running only on one node
8216 (and update the config if needed)
8217 - change disks on its secondary node to secondary
8218 - wait until disks are fully synchronized
8219 - disconnect from the network
8220 - change disks into single-master mode
8221 - wait again until disks are fully synchronized
8224 instance = self.instance
8225 target_node = self.target_node
8226 source_node = self.source_node
8228 # check running on only one node
8229 self.feedback_fn("* checking where the instance actually runs"
8230 " (if this hangs, the hypervisor might be in"
8232 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8233 for node, result in ins_l.items():
8234 result.Raise("Can't contact node %s" % node)
8236 runningon_source = instance.name in ins_l[source_node].payload
8237 runningon_target = instance.name in ins_l[target_node].payload
8239 if runningon_source and runningon_target:
8240 raise errors.OpExecError("Instance seems to be running on two nodes,"
8241 " or the hypervisor is confused; you will have"
8242 " to ensure manually that it runs only on one"
8243 " and restart this operation")
8245 if not (runningon_source or runningon_target):
8246 raise errors.OpExecError("Instance does not seem to be running at all;"
8247 " in this case it's safer to repair by"
8248 " running 'gnt-instance stop' to ensure disk"
8249 " shutdown, and then restarting it")
8251 if runningon_target:
8252 # the migration has actually succeeded, we need to update the config
8253 self.feedback_fn("* instance running on secondary node (%s),"
8254 " updating config" % target_node)
8255 instance.primary_node = target_node
8256 self.cfg.Update(instance, self.feedback_fn)
8257 demoted_node = source_node
8259 self.feedback_fn("* instance confirmed to be running on its"
8260 " primary node (%s)" % source_node)
8261 demoted_node = target_node
8263 if instance.disk_template in constants.DTS_INT_MIRROR:
8264 self._EnsureSecondary(demoted_node)
8266 self._WaitUntilSync()
8267 except errors.OpExecError:
8268 # we ignore here errors, since if the device is standalone, it
8269 # won't be able to sync
8271 self._GoStandalone()
8272 self._GoReconnect(False)
8273 self._WaitUntilSync()
8275 self.feedback_fn("* done")
8277 def _RevertDiskStatus(self):
8278 """Try to revert the disk status after a failed migration.
8281 target_node = self.target_node
8282 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8286 self._EnsureSecondary(target_node)
8287 self._GoStandalone()
8288 self._GoReconnect(False)
8289 self._WaitUntilSync()
8290 except errors.OpExecError, err:
8291 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8292 " please try to recover the instance manually;"
8293 " error '%s'" % str(err))
8295 def _AbortMigration(self):
8296 """Call the hypervisor code to abort a started migration.
8299 instance = self.instance
8300 target_node = self.target_node
8301 source_node = self.source_node
8302 migration_info = self.migration_info
8304 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8308 abort_msg = abort_result.fail_msg
8310 logging.error("Aborting migration failed on target node %s: %s",
8311 target_node, abort_msg)
8312 # Don't raise an exception here, as we stil have to try to revert the
8313 # disk status, even if this step failed.
8315 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8316 instance, False, self.live)
8317 abort_msg = abort_result.fail_msg
8319 logging.error("Aborting migration failed on source node %s: %s",
8320 source_node, abort_msg)
8322 def _ExecMigration(self):
8323 """Migrate an instance.
8325 The migrate is done by:
8326 - change the disks into dual-master mode
8327 - wait until disks are fully synchronized again
8328 - migrate the instance
8329 - change disks on the new secondary node (the old primary) to secondary
8330 - wait until disks are fully synchronized
8331 - change disks into single-master mode
8334 instance = self.instance
8335 target_node = self.target_node
8336 source_node = self.source_node
8338 # Check for hypervisor version mismatch and warn the user.
8339 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8340 None, [self.instance.hypervisor])
8341 for ninfo in nodeinfo.values():
8342 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8344 (_, _, (src_info, )) = nodeinfo[source_node].payload
8345 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8347 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8348 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8349 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8350 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8351 if src_version != dst_version:
8352 self.feedback_fn("* warning: hypervisor version mismatch between"
8353 " source (%s) and target (%s) node" %
8354 (src_version, dst_version))
8356 self.feedback_fn("* checking disk consistency between source and target")
8357 for (idx, dev) in enumerate(instance.disks):
8358 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8359 raise errors.OpExecError("Disk %s is degraded or not fully"
8360 " synchronized on target node,"
8361 " aborting migration" % idx)
8363 if self.current_mem > self.tgt_free_mem:
8364 if not self.allow_runtime_changes:
8365 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8366 " free memory to fit instance %s on target"
8367 " node %s (have %dMB, need %dMB)" %
8368 (instance.name, target_node,
8369 self.tgt_free_mem, self.current_mem))
8370 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8371 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8374 rpcres.Raise("Cannot modify instance runtime memory")
8376 # First get the migration information from the remote node
8377 result = self.rpc.call_migration_info(source_node, instance)
8378 msg = result.fail_msg
8380 log_err = ("Failed fetching source migration information from %s: %s" %
8382 logging.error(log_err)
8383 raise errors.OpExecError(log_err)
8385 self.migration_info = migration_info = result.payload
8387 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8388 # Then switch the disks to master/master mode
8389 self._EnsureSecondary(target_node)
8390 self._GoStandalone()
8391 self._GoReconnect(True)
8392 self._WaitUntilSync()
8394 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8395 result = self.rpc.call_accept_instance(target_node,
8398 self.nodes_ip[target_node])
8400 msg = result.fail_msg
8402 logging.error("Instance pre-migration failed, trying to revert"
8403 " disk status: %s", msg)
8404 self.feedback_fn("Pre-migration failed, aborting")
8405 self._AbortMigration()
8406 self._RevertDiskStatus()
8407 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8408 (instance.name, msg))
8410 self.feedback_fn("* migrating instance to %s" % target_node)
8411 result = self.rpc.call_instance_migrate(source_node, instance,
8412 self.nodes_ip[target_node],
8414 msg = result.fail_msg
8416 logging.error("Instance migration failed, trying to revert"
8417 " disk status: %s", msg)
8418 self.feedback_fn("Migration failed, aborting")
8419 self._AbortMigration()
8420 self._RevertDiskStatus()
8421 raise errors.OpExecError("Could not migrate instance %s: %s" %
8422 (instance.name, msg))
8424 self.feedback_fn("* starting memory transfer")
8425 last_feedback = time.time()
8427 result = self.rpc.call_instance_get_migration_status(source_node,
8429 msg = result.fail_msg
8430 ms = result.payload # MigrationStatus instance
8431 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8432 logging.error("Instance migration failed, trying to revert"
8433 " disk status: %s", msg)
8434 self.feedback_fn("Migration failed, aborting")
8435 self._AbortMigration()
8436 self._RevertDiskStatus()
8437 raise errors.OpExecError("Could not migrate instance %s: %s" %
8438 (instance.name, msg))
8440 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8441 self.feedback_fn("* memory transfer complete")
8444 if (utils.TimeoutExpired(last_feedback,
8445 self._MIGRATION_FEEDBACK_INTERVAL) and
8446 ms.transferred_ram is not None):
8447 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8448 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8449 last_feedback = time.time()
8451 time.sleep(self._MIGRATION_POLL_INTERVAL)
8453 result = self.rpc.call_instance_finalize_migration_src(source_node,
8457 msg = result.fail_msg
8459 logging.error("Instance migration succeeded, but finalization failed"
8460 " on the source node: %s", msg)
8461 raise errors.OpExecError("Could not finalize instance migration: %s" %
8464 instance.primary_node = target_node
8466 # distribute new instance config to the other nodes
8467 self.cfg.Update(instance, self.feedback_fn)
8469 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8473 msg = result.fail_msg
8475 logging.error("Instance migration succeeded, but finalization failed"
8476 " on the target node: %s", msg)
8477 raise errors.OpExecError("Could not finalize instance migration: %s" %
8480 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8481 self._EnsureSecondary(source_node)
8482 self._WaitUntilSync()
8483 self._GoStandalone()
8484 self._GoReconnect(False)
8485 self._WaitUntilSync()
8487 # If the instance's disk template is `rbd' and there was a successful
8488 # migration, unmap the device from the source node.
8489 if self.instance.disk_template == constants.DT_RBD:
8490 disks = _ExpandCheckDisks(instance, instance.disks)
8491 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8493 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8494 msg = result.fail_msg
8496 logging.error("Migration was successful, but couldn't unmap the"
8497 " block device %s on source node %s: %s",
8498 disk.iv_name, source_node, msg)
8499 logging.error("You need to unmap the device %s manually on %s",
8500 disk.iv_name, source_node)
8502 self.feedback_fn("* done")
8504 def _ExecFailover(self):
8505 """Failover an instance.
8507 The failover is done by shutting it down on its present node and
8508 starting it on the secondary.
8511 instance = self.instance
8512 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8514 source_node = instance.primary_node
8515 target_node = self.target_node
8517 if instance.admin_state == constants.ADMINST_UP:
8518 self.feedback_fn("* checking disk consistency between source and target")
8519 for (idx, dev) in enumerate(instance.disks):
8520 # for drbd, these are drbd over lvm
8521 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8523 if primary_node.offline:
8524 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8526 (primary_node.name, idx, target_node))
8527 elif not self.ignore_consistency:
8528 raise errors.OpExecError("Disk %s is degraded on target node,"
8529 " aborting failover" % idx)
8531 self.feedback_fn("* not checking disk consistency as instance is not"
8534 self.feedback_fn("* shutting down instance on source node")
8535 logging.info("Shutting down instance %s on node %s",
8536 instance.name, source_node)
8538 result = self.rpc.call_instance_shutdown(source_node, instance,
8539 self.shutdown_timeout)
8540 msg = result.fail_msg
8542 if self.ignore_consistency or primary_node.offline:
8543 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8544 " proceeding anyway; please make sure node"
8545 " %s is down; error details: %s",
8546 instance.name, source_node, source_node, msg)
8548 raise errors.OpExecError("Could not shutdown instance %s on"
8550 (instance.name, source_node, msg))
8552 self.feedback_fn("* deactivating the instance's disks on source node")
8553 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8554 raise errors.OpExecError("Can't shut down the instance's disks")
8556 instance.primary_node = target_node
8557 # distribute new instance config to the other nodes
8558 self.cfg.Update(instance, self.feedback_fn)
8560 # Only start the instance if it's marked as up
8561 if instance.admin_state == constants.ADMINST_UP:
8562 self.feedback_fn("* activating the instance's disks on target node %s" %
8564 logging.info("Starting instance %s on node %s",
8565 instance.name, target_node)
8567 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8568 ignore_secondaries=True)
8570 _ShutdownInstanceDisks(self.lu, instance)
8571 raise errors.OpExecError("Can't activate the instance's disks")
8573 self.feedback_fn("* starting the instance on the target node %s" %
8575 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8577 msg = result.fail_msg
8579 _ShutdownInstanceDisks(self.lu, instance)
8580 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8581 (instance.name, target_node, msg))
8583 def Exec(self, feedback_fn):
8584 """Perform the migration.
8587 self.feedback_fn = feedback_fn
8588 self.source_node = self.instance.primary_node
8590 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8591 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8592 self.target_node = self.instance.secondary_nodes[0]
8593 # Otherwise self.target_node has been populated either
8594 # directly, or through an iallocator.
8596 self.all_nodes = [self.source_node, self.target_node]
8597 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8598 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8601 feedback_fn("Failover instance %s" % self.instance.name)
8602 self._ExecFailover()
8604 feedback_fn("Migrating instance %s" % self.instance.name)
8607 return self._ExecCleanup()
8609 return self._ExecMigration()
8612 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8614 """Wrapper around L{_CreateBlockDevInner}.
8616 This method annotates the root device first.
8619 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8620 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8624 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8626 """Create a tree of block devices on a given node.
8628 If this device type has to be created on secondaries, create it and
8631 If not, just recurse to children keeping the same 'force' value.
8633 @attention: The device has to be annotated already.
8635 @param lu: the lu on whose behalf we execute
8636 @param node: the node on which to create the device
8637 @type instance: L{objects.Instance}
8638 @param instance: the instance which owns the device
8639 @type device: L{objects.Disk}
8640 @param device: the device to create
8641 @type force_create: boolean
8642 @param force_create: whether to force creation of this device; this
8643 will be change to True whenever we find a device which has
8644 CreateOnSecondary() attribute
8645 @param info: the extra 'metadata' we should attach to the device
8646 (this will be represented as a LVM tag)
8647 @type force_open: boolean
8648 @param force_open: this parameter will be passes to the
8649 L{backend.BlockdevCreate} function where it specifies
8650 whether we run on primary or not, and it affects both
8651 the child assembly and the device own Open() execution
8654 if device.CreateOnSecondary():
8658 for child in device.children:
8659 _CreateBlockDevInner(lu, node, instance, child, force_create,
8662 if not force_create:
8665 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8668 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8669 """Create a single block device on a given node.
8671 This will not recurse over children of the device, so they must be
8674 @param lu: the lu on whose behalf we execute
8675 @param node: the node on which to create the device
8676 @type instance: L{objects.Instance}
8677 @param instance: the instance which owns the device
8678 @type device: L{objects.Disk}
8679 @param device: the device to create
8680 @param info: the extra 'metadata' we should attach to the device
8681 (this will be represented as a LVM tag)
8682 @type force_open: boolean
8683 @param force_open: this parameter will be passes to the
8684 L{backend.BlockdevCreate} function where it specifies
8685 whether we run on primary or not, and it affects both
8686 the child assembly and the device own Open() execution
8689 lu.cfg.SetDiskID(device, node)
8690 result = lu.rpc.call_blockdev_create(node, device, device.size,
8691 instance.name, force_open, info)
8692 result.Raise("Can't create block device %s on"
8693 " node %s for instance %s" % (device, node, instance.name))
8694 if device.physical_id is None:
8695 device.physical_id = result.payload
8698 def _GenerateUniqueNames(lu, exts):
8699 """Generate a suitable LV name.
8701 This will generate a logical volume name for the given instance.
8706 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8707 results.append("%s%s" % (new_id, val))
8711 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8712 iv_name, p_minor, s_minor):
8713 """Generate a drbd8 device complete with its children.
8716 assert len(vgnames) == len(names) == 2
8717 port = lu.cfg.AllocatePort()
8718 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8720 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8721 logical_id=(vgnames[0], names[0]),
8723 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8724 logical_id=(vgnames[1], names[1]),
8726 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8727 logical_id=(primary, secondary, port,
8730 children=[dev_data, dev_meta],
8731 iv_name=iv_name, params={})
8735 _DISK_TEMPLATE_NAME_PREFIX = {
8736 constants.DT_PLAIN: "",
8737 constants.DT_RBD: ".rbd",
8741 _DISK_TEMPLATE_DEVICE_TYPE = {
8742 constants.DT_PLAIN: constants.LD_LV,
8743 constants.DT_FILE: constants.LD_FILE,
8744 constants.DT_SHARED_FILE: constants.LD_FILE,
8745 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8746 constants.DT_RBD: constants.LD_RBD,
8750 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8751 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8752 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8753 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8754 """Generate the entire disk layout for a given template type.
8757 #TODO: compute space requirements
8759 vgname = lu.cfg.GetVGName()
8760 disk_count = len(disk_info)
8763 if template_name == constants.DT_DISKLESS:
8765 elif template_name == constants.DT_DRBD8:
8766 if len(secondary_nodes) != 1:
8767 raise errors.ProgrammerError("Wrong template configuration")
8768 remote_node = secondary_nodes[0]
8769 minors = lu.cfg.AllocateDRBDMinor(
8770 [primary_node, remote_node] * len(disk_info), instance_name)
8772 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8774 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8777 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8778 for i in range(disk_count)]):
8779 names.append(lv_prefix + "_data")
8780 names.append(lv_prefix + "_meta")
8781 for idx, disk in enumerate(disk_info):
8782 disk_index = idx + base_index
8783 data_vg = disk.get(constants.IDISK_VG, vgname)
8784 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8785 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8786 disk[constants.IDISK_SIZE],
8788 names[idx * 2:idx * 2 + 2],
8789 "disk/%d" % disk_index,
8790 minors[idx * 2], minors[idx * 2 + 1])
8791 disk_dev.mode = disk[constants.IDISK_MODE]
8792 disks.append(disk_dev)
8795 raise errors.ProgrammerError("Wrong template configuration")
8797 if template_name == constants.DT_FILE:
8799 elif template_name == constants.DT_SHARED_FILE:
8800 _req_shr_file_storage()
8802 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8803 if name_prefix is None:
8806 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8807 (name_prefix, base_index + i)
8808 for i in range(disk_count)])
8810 if template_name == constants.DT_PLAIN:
8811 def logical_id_fn(idx, _, disk):
8812 vg = disk.get(constants.IDISK_VG, vgname)
8813 return (vg, names[idx])
8814 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8816 lambda _, disk_index, disk: (file_driver,
8817 "%s/disk%d" % (file_storage_dir,
8819 elif template_name == constants.DT_BLOCK:
8821 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8822 disk[constants.IDISK_ADOPT])
8823 elif template_name == constants.DT_RBD:
8824 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8826 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8828 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8830 for idx, disk in enumerate(disk_info):
8831 disk_index = idx + base_index
8832 size = disk[constants.IDISK_SIZE]
8833 feedback_fn("* disk %s, size %s" %
8834 (disk_index, utils.FormatUnit(size, "h")))
8835 disks.append(objects.Disk(dev_type=dev_type, size=size,
8836 logical_id=logical_id_fn(idx, disk_index, disk),
8837 iv_name="disk/%d" % disk_index,
8838 mode=disk[constants.IDISK_MODE],
8844 def _GetInstanceInfoText(instance):
8845 """Compute that text that should be added to the disk's metadata.
8848 return "originstname+%s" % instance.name
8851 def _CalcEta(time_taken, written, total_size):
8852 """Calculates the ETA based on size written and total size.
8854 @param time_taken: The time taken so far
8855 @param written: amount written so far
8856 @param total_size: The total size of data to be written
8857 @return: The remaining time in seconds
8860 avg_time = time_taken / float(written)
8861 return (total_size - written) * avg_time
8864 def _WipeDisks(lu, instance):
8865 """Wipes instance disks.
8867 @type lu: L{LogicalUnit}
8868 @param lu: the logical unit on whose behalf we execute
8869 @type instance: L{objects.Instance}
8870 @param instance: the instance whose disks we should create
8871 @return: the success of the wipe
8874 node = instance.primary_node
8876 for device in instance.disks:
8877 lu.cfg.SetDiskID(device, node)
8879 logging.info("Pause sync of instance %s disks", instance.name)
8880 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8881 (instance.disks, instance),
8884 for idx, success in enumerate(result.payload):
8886 logging.warn("pause-sync of instance %s for disks %d failed",
8890 for idx, device in enumerate(instance.disks):
8891 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8892 # MAX_WIPE_CHUNK at max
8893 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8894 constants.MIN_WIPE_CHUNK_PERCENT)
8895 # we _must_ make this an int, otherwise rounding errors will
8897 wipe_chunk_size = int(wipe_chunk_size)
8899 lu.LogInfo("* Wiping disk %d", idx)
8900 logging.info("Wiping disk %d for instance %s, node %s using"
8901 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8906 start_time = time.time()
8908 while offset < size:
8909 wipe_size = min(wipe_chunk_size, size - offset)
8910 logging.debug("Wiping disk %d, offset %s, chunk %s",
8911 idx, offset, wipe_size)
8912 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8914 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8915 (idx, offset, wipe_size))
8918 if now - last_output >= 60:
8919 eta = _CalcEta(now - start_time, offset, size)
8920 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8921 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8924 logging.info("Resume sync of instance %s disks", instance.name)
8926 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8927 (instance.disks, instance),
8930 for idx, success in enumerate(result.payload):
8932 lu.LogWarning("Resume sync of disk %d failed, please have a"
8933 " look at the status and troubleshoot the issue", idx)
8934 logging.warn("resume-sync of instance %s for disks %d failed",
8938 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8939 """Create all disks for an instance.
8941 This abstracts away some work from AddInstance.
8943 @type lu: L{LogicalUnit}
8944 @param lu: the logical unit on whose behalf we execute
8945 @type instance: L{objects.Instance}
8946 @param instance: the instance whose disks we should create
8948 @param to_skip: list of indices to skip
8949 @type target_node: string
8950 @param target_node: if passed, overrides the target node for creation
8952 @return: the success of the creation
8955 info = _GetInstanceInfoText(instance)
8956 if target_node is None:
8957 pnode = instance.primary_node
8958 all_nodes = instance.all_nodes
8963 if instance.disk_template in constants.DTS_FILEBASED:
8964 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8965 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8967 result.Raise("Failed to create directory '%s' on"
8968 " node %s" % (file_storage_dir, pnode))
8970 # Note: this needs to be kept in sync with adding of disks in
8971 # LUInstanceSetParams
8972 for idx, device in enumerate(instance.disks):
8973 if to_skip and idx in to_skip:
8975 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8977 for node in all_nodes:
8978 f_create = node == pnode
8979 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8982 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8983 """Remove all disks for an instance.
8985 This abstracts away some work from `AddInstance()` and
8986 `RemoveInstance()`. Note that in case some of the devices couldn't
8987 be removed, the removal will continue with the other ones (compare
8988 with `_CreateDisks()`).
8990 @type lu: L{LogicalUnit}
8991 @param lu: the logical unit on whose behalf we execute
8992 @type instance: L{objects.Instance}
8993 @param instance: the instance whose disks we should remove
8994 @type target_node: string
8995 @param target_node: used to override the node on which to remove the disks
8997 @return: the success of the removal
9000 logging.info("Removing block devices for instance %s", instance.name)
9003 ports_to_release = set()
9004 for (idx, device) in enumerate(instance.disks):
9006 edata = [(target_node, device)]
9008 edata = device.ComputeNodeTree(instance.primary_node)
9009 for node, disk in edata:
9010 lu.cfg.SetDiskID(disk, node)
9011 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
9013 lu.LogWarning("Could not remove disk %s on node %s,"
9014 " continuing anyway: %s", idx, node, msg)
9017 # if this is a DRBD disk, return its port to the pool
9018 if device.dev_type in constants.LDS_DRBD:
9019 ports_to_release.add(device.logical_id[2])
9021 if all_result or ignore_failures:
9022 for port in ports_to_release:
9023 lu.cfg.AddTcpUdpPort(port)
9025 if instance.disk_template == constants.DT_FILE:
9026 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9030 tgt = instance.primary_node
9031 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9033 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9034 file_storage_dir, instance.primary_node, result.fail_msg)
9040 def _ComputeDiskSizePerVG(disk_template, disks):
9041 """Compute disk size requirements in the volume group
9044 def _compute(disks, payload):
9045 """Universal algorithm.
9050 vgs[disk[constants.IDISK_VG]] = \
9051 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9055 # Required free disk space as a function of disk and swap space
9057 constants.DT_DISKLESS: {},
9058 constants.DT_PLAIN: _compute(disks, 0),
9059 # 128 MB are added for drbd metadata for each disk
9060 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9061 constants.DT_FILE: {},
9062 constants.DT_SHARED_FILE: {},
9065 if disk_template not in req_size_dict:
9066 raise errors.ProgrammerError("Disk template '%s' size requirement"
9067 " is unknown" % disk_template)
9069 return req_size_dict[disk_template]
9072 def _ComputeDiskSize(disk_template, disks):
9073 """Compute disk size requirements in the volume group
9076 # Required free disk space as a function of disk and swap space
9078 constants.DT_DISKLESS: None,
9079 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9080 # 128 MB are added for drbd metadata for each disk
9082 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9083 constants.DT_FILE: None,
9084 constants.DT_SHARED_FILE: 0,
9085 constants.DT_BLOCK: 0,
9086 constants.DT_RBD: 0,
9089 if disk_template not in req_size_dict:
9090 raise errors.ProgrammerError("Disk template '%s' size requirement"
9091 " is unknown" % disk_template)
9093 return req_size_dict[disk_template]
9096 def _FilterVmNodes(lu, nodenames):
9097 """Filters out non-vm_capable nodes from a list.
9099 @type lu: L{LogicalUnit}
9100 @param lu: the logical unit for which we check
9101 @type nodenames: list
9102 @param nodenames: the list of nodes on which we should check
9104 @return: the list of vm-capable nodes
9107 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9108 return [name for name in nodenames if name not in vm_nodes]
9111 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9112 """Hypervisor parameter validation.
9114 This function abstract the hypervisor parameter validation to be
9115 used in both instance create and instance modify.
9117 @type lu: L{LogicalUnit}
9118 @param lu: the logical unit for which we check
9119 @type nodenames: list
9120 @param nodenames: the list of nodes on which we should check
9121 @type hvname: string
9122 @param hvname: the name of the hypervisor we should use
9123 @type hvparams: dict
9124 @param hvparams: the parameters which we need to check
9125 @raise errors.OpPrereqError: if the parameters are not valid
9128 nodenames = _FilterVmNodes(lu, nodenames)
9130 cluster = lu.cfg.GetClusterInfo()
9131 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9133 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9134 for node in nodenames:
9138 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9141 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9142 """OS parameters validation.
9144 @type lu: L{LogicalUnit}
9145 @param lu: the logical unit for which we check
9146 @type required: boolean
9147 @param required: whether the validation should fail if the OS is not
9149 @type nodenames: list
9150 @param nodenames: the list of nodes on which we should check
9151 @type osname: string
9152 @param osname: the name of the hypervisor we should use
9153 @type osparams: dict
9154 @param osparams: the parameters which we need to check
9155 @raise errors.OpPrereqError: if the parameters are not valid
9158 nodenames = _FilterVmNodes(lu, nodenames)
9159 result = lu.rpc.call_os_validate(nodenames, required, osname,
9160 [constants.OS_VALIDATE_PARAMETERS],
9162 for node, nres in result.items():
9163 # we don't check for offline cases since this should be run only
9164 # against the master node and/or an instance's nodes
9165 nres.Raise("OS Parameters validation failed on node %s" % node)
9166 if not nres.payload:
9167 lu.LogInfo("OS %s not found on node %s, validation skipped",
9171 class LUInstanceCreate(LogicalUnit):
9172 """Create an instance.
9175 HPATH = "instance-add"
9176 HTYPE = constants.HTYPE_INSTANCE
9179 def CheckArguments(self):
9183 # do not require name_check to ease forward/backward compatibility
9185 if self.op.no_install and self.op.start:
9186 self.LogInfo("No-installation mode selected, disabling startup")
9187 self.op.start = False
9188 # validate/normalize the instance name
9189 self.op.instance_name = \
9190 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9192 if self.op.ip_check and not self.op.name_check:
9193 # TODO: make the ip check more flexible and not depend on the name check
9194 raise errors.OpPrereqError("Cannot do IP address check without a name"
9195 " check", errors.ECODE_INVAL)
9197 # check nics' parameter names
9198 for nic in self.op.nics:
9199 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9201 # check disks. parameter names and consistent adopt/no-adopt strategy
9202 has_adopt = has_no_adopt = False
9203 for disk in self.op.disks:
9204 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9205 if constants.IDISK_ADOPT in disk:
9209 if has_adopt and has_no_adopt:
9210 raise errors.OpPrereqError("Either all disks are adopted or none is",
9213 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9214 raise errors.OpPrereqError("Disk adoption is not supported for the"
9215 " '%s' disk template" %
9216 self.op.disk_template,
9218 if self.op.iallocator is not None:
9219 raise errors.OpPrereqError("Disk adoption not allowed with an"
9220 " iallocator script", errors.ECODE_INVAL)
9221 if self.op.mode == constants.INSTANCE_IMPORT:
9222 raise errors.OpPrereqError("Disk adoption not allowed for"
9223 " instance import", errors.ECODE_INVAL)
9225 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9226 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9227 " but no 'adopt' parameter given" %
9228 self.op.disk_template,
9231 self.adopt_disks = has_adopt
9233 # instance name verification
9234 if self.op.name_check:
9235 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9236 self.op.instance_name = self.hostname1.name
9237 # used in CheckPrereq for ip ping check
9238 self.check_ip = self.hostname1.ip
9240 self.check_ip = None
9242 # file storage checks
9243 if (self.op.file_driver and
9244 not self.op.file_driver in constants.FILE_DRIVER):
9245 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9246 self.op.file_driver, errors.ECODE_INVAL)
9248 if self.op.disk_template == constants.DT_FILE:
9249 opcodes.RequireFileStorage()
9250 elif self.op.disk_template == constants.DT_SHARED_FILE:
9251 opcodes.RequireSharedFileStorage()
9253 ### Node/iallocator related checks
9254 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9256 if self.op.pnode is not None:
9257 if self.op.disk_template in constants.DTS_INT_MIRROR:
9258 if self.op.snode is None:
9259 raise errors.OpPrereqError("The networked disk templates need"
9260 " a mirror node", errors.ECODE_INVAL)
9262 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9264 self.op.snode = None
9266 self._cds = _GetClusterDomainSecret()
9268 if self.op.mode == constants.INSTANCE_IMPORT:
9269 # On import force_variant must be True, because if we forced it at
9270 # initial install, our only chance when importing it back is that it
9272 self.op.force_variant = True
9274 if self.op.no_install:
9275 self.LogInfo("No-installation mode has no effect during import")
9277 elif self.op.mode == constants.INSTANCE_CREATE:
9278 if self.op.os_type is None:
9279 raise errors.OpPrereqError("No guest OS specified",
9281 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9282 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9283 " installation" % self.op.os_type,
9285 if self.op.disk_template is None:
9286 raise errors.OpPrereqError("No disk template specified",
9289 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9290 # Check handshake to ensure both clusters have the same domain secret
9291 src_handshake = self.op.source_handshake
9292 if not src_handshake:
9293 raise errors.OpPrereqError("Missing source handshake",
9296 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9299 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9302 # Load and check source CA
9303 self.source_x509_ca_pem = self.op.source_x509_ca
9304 if not self.source_x509_ca_pem:
9305 raise errors.OpPrereqError("Missing source X509 CA",
9309 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9311 except OpenSSL.crypto.Error, err:
9312 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9313 (err, ), errors.ECODE_INVAL)
9315 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9316 if errcode is not None:
9317 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9320 self.source_x509_ca = cert
9322 src_instance_name = self.op.source_instance_name
9323 if not src_instance_name:
9324 raise errors.OpPrereqError("Missing source instance name",
9327 self.source_instance_name = \
9328 netutils.GetHostname(name=src_instance_name).name
9331 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9332 self.op.mode, errors.ECODE_INVAL)
9334 def ExpandNames(self):
9335 """ExpandNames for CreateInstance.
9337 Figure out the right locks for instance creation.
9340 self.needed_locks = {}
9342 instance_name = self.op.instance_name
9343 # this is just a preventive check, but someone might still add this
9344 # instance in the meantime, and creation will fail at lock-add time
9345 if instance_name in self.cfg.GetInstanceList():
9346 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9347 instance_name, errors.ECODE_EXISTS)
9349 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9351 if self.op.iallocator:
9352 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9353 # specifying a group on instance creation and then selecting nodes from
9355 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9356 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9358 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9359 nodelist = [self.op.pnode]
9360 if self.op.snode is not None:
9361 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9362 nodelist.append(self.op.snode)
9363 self.needed_locks[locking.LEVEL_NODE] = nodelist
9364 # Lock resources of instance's primary and secondary nodes (copy to
9365 # prevent accidential modification)
9366 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9368 # in case of import lock the source node too
9369 if self.op.mode == constants.INSTANCE_IMPORT:
9370 src_node = self.op.src_node
9371 src_path = self.op.src_path
9373 if src_path is None:
9374 self.op.src_path = src_path = self.op.instance_name
9376 if src_node is None:
9377 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9378 self.op.src_node = None
9379 if os.path.isabs(src_path):
9380 raise errors.OpPrereqError("Importing an instance from a path"
9381 " requires a source node option",
9384 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9385 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9386 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9387 if not os.path.isabs(src_path):
9388 self.op.src_path = src_path = \
9389 utils.PathJoin(constants.EXPORT_DIR, src_path)
9391 def _RunAllocator(self):
9392 """Run the allocator based on input opcode.
9395 nics = [n.ToDict() for n in self.nics]
9396 ial = IAllocator(self.cfg, self.rpc,
9397 mode=constants.IALLOCATOR_MODE_ALLOC,
9398 name=self.op.instance_name,
9399 disk_template=self.op.disk_template,
9402 vcpus=self.be_full[constants.BE_VCPUS],
9403 memory=self.be_full[constants.BE_MAXMEM],
9404 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9407 hypervisor=self.op.hypervisor,
9410 ial.Run(self.op.iallocator)
9413 raise errors.OpPrereqError("Can't compute nodes using"
9414 " iallocator '%s': %s" %
9415 (self.op.iallocator, ial.info),
9417 if len(ial.result) != ial.required_nodes:
9418 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9419 " of nodes (%s), required %s" %
9420 (self.op.iallocator, len(ial.result),
9421 ial.required_nodes), errors.ECODE_FAULT)
9422 self.op.pnode = ial.result[0]
9423 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9424 self.op.instance_name, self.op.iallocator,
9425 utils.CommaJoin(ial.result))
9426 if ial.required_nodes == 2:
9427 self.op.snode = ial.result[1]
9429 def BuildHooksEnv(self):
9432 This runs on master, primary and secondary nodes of the instance.
9436 "ADD_MODE": self.op.mode,
9438 if self.op.mode == constants.INSTANCE_IMPORT:
9439 env["SRC_NODE"] = self.op.src_node
9440 env["SRC_PATH"] = self.op.src_path
9441 env["SRC_IMAGES"] = self.src_images
9443 env.update(_BuildInstanceHookEnv(
9444 name=self.op.instance_name,
9445 primary_node=self.op.pnode,
9446 secondary_nodes=self.secondaries,
9447 status=self.op.start,
9448 os_type=self.op.os_type,
9449 minmem=self.be_full[constants.BE_MINMEM],
9450 maxmem=self.be_full[constants.BE_MAXMEM],
9451 vcpus=self.be_full[constants.BE_VCPUS],
9452 nics=_NICListToTuple(self, self.nics),
9453 disk_template=self.op.disk_template,
9454 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9455 for d in self.disks],
9458 hypervisor_name=self.op.hypervisor,
9464 def BuildHooksNodes(self):
9465 """Build hooks nodes.
9468 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9471 def _ReadExportInfo(self):
9472 """Reads the export information from disk.
9474 It will override the opcode source node and path with the actual
9475 information, if these two were not specified before.
9477 @return: the export information
9480 assert self.op.mode == constants.INSTANCE_IMPORT
9482 src_node = self.op.src_node
9483 src_path = self.op.src_path
9485 if src_node is None:
9486 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9487 exp_list = self.rpc.call_export_list(locked_nodes)
9489 for node in exp_list:
9490 if exp_list[node].fail_msg:
9492 if src_path in exp_list[node].payload:
9494 self.op.src_node = src_node = node
9495 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9499 raise errors.OpPrereqError("No export found for relative path %s" %
9500 src_path, errors.ECODE_INVAL)
9502 _CheckNodeOnline(self, src_node)
9503 result = self.rpc.call_export_info(src_node, src_path)
9504 result.Raise("No export or invalid export found in dir %s" % src_path)
9506 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9507 if not export_info.has_section(constants.INISECT_EXP):
9508 raise errors.ProgrammerError("Corrupted export config",
9509 errors.ECODE_ENVIRON)
9511 ei_version = export_info.get(constants.INISECT_EXP, "version")
9512 if (int(ei_version) != constants.EXPORT_VERSION):
9513 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9514 (ei_version, constants.EXPORT_VERSION),
9515 errors.ECODE_ENVIRON)
9518 def _ReadExportParams(self, einfo):
9519 """Use export parameters as defaults.
9521 In case the opcode doesn't specify (as in override) some instance
9522 parameters, then try to use them from the export information, if
9526 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9528 if self.op.disk_template is None:
9529 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9530 self.op.disk_template = einfo.get(constants.INISECT_INS,
9532 if self.op.disk_template not in constants.DISK_TEMPLATES:
9533 raise errors.OpPrereqError("Disk template specified in configuration"
9534 " file is not one of the allowed values:"
9535 " %s" % " ".join(constants.DISK_TEMPLATES))
9537 raise errors.OpPrereqError("No disk template specified and the export"
9538 " is missing the disk_template information",
9541 if not self.op.disks:
9543 # TODO: import the disk iv_name too
9544 for idx in range(constants.MAX_DISKS):
9545 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9546 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9547 disks.append({constants.IDISK_SIZE: disk_sz})
9548 self.op.disks = disks
9549 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9550 raise errors.OpPrereqError("No disk info specified and the export"
9551 " is missing the disk information",
9554 if not self.op.nics:
9556 for idx in range(constants.MAX_NICS):
9557 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9559 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9560 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9567 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9568 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9570 if (self.op.hypervisor is None and
9571 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9572 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9574 if einfo.has_section(constants.INISECT_HYP):
9575 # use the export parameters but do not override the ones
9576 # specified by the user
9577 for name, value in einfo.items(constants.INISECT_HYP):
9578 if name not in self.op.hvparams:
9579 self.op.hvparams[name] = value
9581 if einfo.has_section(constants.INISECT_BEP):
9582 # use the parameters, without overriding
9583 for name, value in einfo.items(constants.INISECT_BEP):
9584 if name not in self.op.beparams:
9585 self.op.beparams[name] = value
9586 # Compatibility for the old "memory" be param
9587 if name == constants.BE_MEMORY:
9588 if constants.BE_MAXMEM not in self.op.beparams:
9589 self.op.beparams[constants.BE_MAXMEM] = value
9590 if constants.BE_MINMEM not in self.op.beparams:
9591 self.op.beparams[constants.BE_MINMEM] = value
9593 # try to read the parameters old style, from the main section
9594 for name in constants.BES_PARAMETERS:
9595 if (name not in self.op.beparams and
9596 einfo.has_option(constants.INISECT_INS, name)):
9597 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9599 if einfo.has_section(constants.INISECT_OSP):
9600 # use the parameters, without overriding
9601 for name, value in einfo.items(constants.INISECT_OSP):
9602 if name not in self.op.osparams:
9603 self.op.osparams[name] = value
9605 def _RevertToDefaults(self, cluster):
9606 """Revert the instance parameters to the default values.
9610 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9611 for name in self.op.hvparams.keys():
9612 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9613 del self.op.hvparams[name]
9615 be_defs = cluster.SimpleFillBE({})
9616 for name in self.op.beparams.keys():
9617 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9618 del self.op.beparams[name]
9620 nic_defs = cluster.SimpleFillNIC({})
9621 for nic in self.op.nics:
9622 for name in constants.NICS_PARAMETERS:
9623 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9626 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9627 for name in self.op.osparams.keys():
9628 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9629 del self.op.osparams[name]
9631 def _CalculateFileStorageDir(self):
9632 """Calculate final instance file storage dir.
9635 # file storage dir calculation/check
9636 self.instance_file_storage_dir = None
9637 if self.op.disk_template in constants.DTS_FILEBASED:
9638 # build the full file storage dir path
9641 if self.op.disk_template == constants.DT_SHARED_FILE:
9642 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9644 get_fsd_fn = self.cfg.GetFileStorageDir
9646 cfg_storagedir = get_fsd_fn()
9647 if not cfg_storagedir:
9648 raise errors.OpPrereqError("Cluster file storage dir not defined")
9649 joinargs.append(cfg_storagedir)
9651 if self.op.file_storage_dir is not None:
9652 joinargs.append(self.op.file_storage_dir)
9654 joinargs.append(self.op.instance_name)
9656 # pylint: disable=W0142
9657 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9659 def CheckPrereq(self): # pylint: disable=R0914
9660 """Check prerequisites.
9663 self._CalculateFileStorageDir()
9665 if self.op.mode == constants.INSTANCE_IMPORT:
9666 export_info = self._ReadExportInfo()
9667 self._ReadExportParams(export_info)
9668 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9670 self._old_instance_name = None
9672 if (not self.cfg.GetVGName() and
9673 self.op.disk_template not in constants.DTS_NOT_LVM):
9674 raise errors.OpPrereqError("Cluster does not support lvm-based"
9675 " instances", errors.ECODE_STATE)
9677 if (self.op.hypervisor is None or
9678 self.op.hypervisor == constants.VALUE_AUTO):
9679 self.op.hypervisor = self.cfg.GetHypervisorType()
9681 cluster = self.cfg.GetClusterInfo()
9682 enabled_hvs = cluster.enabled_hypervisors
9683 if self.op.hypervisor not in enabled_hvs:
9684 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9685 " cluster (%s)" % (self.op.hypervisor,
9686 ",".join(enabled_hvs)),
9689 # Check tag validity
9690 for tag in self.op.tags:
9691 objects.TaggableObject.ValidateTag(tag)
9693 # check hypervisor parameter syntax (locally)
9694 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9695 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9697 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9698 hv_type.CheckParameterSyntax(filled_hvp)
9699 self.hv_full = filled_hvp
9700 # check that we don't specify global parameters on an instance
9701 _CheckGlobalHvParams(self.op.hvparams)
9703 # fill and remember the beparams dict
9704 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9705 for param, value in self.op.beparams.iteritems():
9706 if value == constants.VALUE_AUTO:
9707 self.op.beparams[param] = default_beparams[param]
9708 objects.UpgradeBeParams(self.op.beparams)
9709 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9710 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9712 # build os parameters
9713 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9715 # now that hvp/bep are in final format, let's reset to defaults,
9717 if self.op.identify_defaults:
9718 self._RevertToDefaults(cluster)
9722 for idx, nic in enumerate(self.op.nics):
9723 nic_mode_req = nic.get(constants.INIC_MODE, None)
9724 nic_mode = nic_mode_req
9725 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9726 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9728 # in routed mode, for the first nic, the default ip is 'auto'
9729 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9730 default_ip_mode = constants.VALUE_AUTO
9732 default_ip_mode = constants.VALUE_NONE
9734 # ip validity checks
9735 ip = nic.get(constants.INIC_IP, default_ip_mode)
9736 if ip is None or ip.lower() == constants.VALUE_NONE:
9738 elif ip.lower() == constants.VALUE_AUTO:
9739 if not self.op.name_check:
9740 raise errors.OpPrereqError("IP address set to auto but name checks"
9741 " have been skipped",
9743 nic_ip = self.hostname1.ip
9745 if not netutils.IPAddress.IsValid(ip):
9746 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9750 # TODO: check the ip address for uniqueness
9751 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9752 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9755 # MAC address verification
9756 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9757 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9758 mac = utils.NormalizeAndValidateMac(mac)
9761 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9762 except errors.ReservationError:
9763 raise errors.OpPrereqError("MAC address %s already in use"
9764 " in cluster" % mac,
9765 errors.ECODE_NOTUNIQUE)
9767 # Build nic parameters
9768 link = nic.get(constants.INIC_LINK, None)
9769 if link == constants.VALUE_AUTO:
9770 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9773 nicparams[constants.NIC_MODE] = nic_mode
9775 nicparams[constants.NIC_LINK] = link
9777 check_params = cluster.SimpleFillNIC(nicparams)
9778 objects.NIC.CheckParameterSyntax(check_params)
9779 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9781 # disk checks/pre-build
9782 default_vg = self.cfg.GetVGName()
9784 for disk in self.op.disks:
9785 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9786 if mode not in constants.DISK_ACCESS_SET:
9787 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9788 mode, errors.ECODE_INVAL)
9789 size = disk.get(constants.IDISK_SIZE, None)
9791 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9794 except (TypeError, ValueError):
9795 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9798 data_vg = disk.get(constants.IDISK_VG, default_vg)
9800 constants.IDISK_SIZE: size,
9801 constants.IDISK_MODE: mode,
9802 constants.IDISK_VG: data_vg,
9804 if constants.IDISK_METAVG in disk:
9805 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9806 if constants.IDISK_ADOPT in disk:
9807 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9808 self.disks.append(new_disk)
9810 if self.op.mode == constants.INSTANCE_IMPORT:
9812 for idx in range(len(self.disks)):
9813 option = "disk%d_dump" % idx
9814 if export_info.has_option(constants.INISECT_INS, option):
9815 # FIXME: are the old os-es, disk sizes, etc. useful?
9816 export_name = export_info.get(constants.INISECT_INS, option)
9817 image = utils.PathJoin(self.op.src_path, export_name)
9818 disk_images.append(image)
9820 disk_images.append(False)
9822 self.src_images = disk_images
9824 if self.op.instance_name == self._old_instance_name:
9825 for idx, nic in enumerate(self.nics):
9826 if nic.mac == constants.VALUE_AUTO:
9827 nic_mac_ini = "nic%d_mac" % idx
9828 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9830 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9832 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9833 if self.op.ip_check:
9834 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9835 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9836 (self.check_ip, self.op.instance_name),
9837 errors.ECODE_NOTUNIQUE)
9839 #### mac address generation
9840 # By generating here the mac address both the allocator and the hooks get
9841 # the real final mac address rather than the 'auto' or 'generate' value.
9842 # There is a race condition between the generation and the instance object
9843 # creation, which means that we know the mac is valid now, but we're not
9844 # sure it will be when we actually add the instance. If things go bad
9845 # adding the instance will abort because of a duplicate mac, and the
9846 # creation job will fail.
9847 for nic in self.nics:
9848 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9849 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9853 if self.op.iallocator is not None:
9854 self._RunAllocator()
9856 # Release all unneeded node locks
9857 _ReleaseLocks(self, locking.LEVEL_NODE,
9858 keep=filter(None, [self.op.pnode, self.op.snode,
9860 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9861 keep=filter(None, [self.op.pnode, self.op.snode,
9864 #### node related checks
9866 # check primary node
9867 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9868 assert self.pnode is not None, \
9869 "Cannot retrieve locked node %s" % self.op.pnode
9871 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9872 pnode.name, errors.ECODE_STATE)
9874 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9875 pnode.name, errors.ECODE_STATE)
9876 if not pnode.vm_capable:
9877 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9878 " '%s'" % pnode.name, errors.ECODE_STATE)
9880 self.secondaries = []
9882 # mirror node verification
9883 if self.op.disk_template in constants.DTS_INT_MIRROR:
9884 if self.op.snode == pnode.name:
9885 raise errors.OpPrereqError("The secondary node cannot be the"
9886 " primary node", errors.ECODE_INVAL)
9887 _CheckNodeOnline(self, self.op.snode)
9888 _CheckNodeNotDrained(self, self.op.snode)
9889 _CheckNodeVmCapable(self, self.op.snode)
9890 self.secondaries.append(self.op.snode)
9892 snode = self.cfg.GetNodeInfo(self.op.snode)
9893 if pnode.group != snode.group:
9894 self.LogWarning("The primary and secondary nodes are in two"
9895 " different node groups; the disk parameters"
9896 " from the first disk's node group will be"
9899 nodenames = [pnode.name] + self.secondaries
9901 # Verify instance specs
9902 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9904 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9905 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9906 constants.ISPEC_DISK_COUNT: len(self.disks),
9907 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9908 constants.ISPEC_NIC_COUNT: len(self.nics),
9909 constants.ISPEC_SPINDLE_USE: spindle_use,
9912 group_info = self.cfg.GetNodeGroup(pnode.group)
9913 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9914 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9915 if not self.op.ignore_ipolicy and res:
9916 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9917 " policy: %s") % (pnode.group,
9918 utils.CommaJoin(res)),
9921 if not self.adopt_disks:
9922 if self.op.disk_template == constants.DT_RBD:
9923 # _CheckRADOSFreeSpace() is just a placeholder.
9924 # Any function that checks prerequisites can be placed here.
9925 # Check if there is enough space on the RADOS cluster.
9926 _CheckRADOSFreeSpace()
9928 # Check lv size requirements, if not adopting
9929 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9930 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9932 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9933 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9934 disk[constants.IDISK_ADOPT])
9935 for disk in self.disks])
9936 if len(all_lvs) != len(self.disks):
9937 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9939 for lv_name in all_lvs:
9941 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9942 # to ReserveLV uses the same syntax
9943 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9944 except errors.ReservationError:
9945 raise errors.OpPrereqError("LV named %s used by another instance" %
9946 lv_name, errors.ECODE_NOTUNIQUE)
9948 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9949 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9951 node_lvs = self.rpc.call_lv_list([pnode.name],
9952 vg_names.payload.keys())[pnode.name]
9953 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9954 node_lvs = node_lvs.payload
9956 delta = all_lvs.difference(node_lvs.keys())
9958 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9959 utils.CommaJoin(delta),
9961 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9963 raise errors.OpPrereqError("Online logical volumes found, cannot"
9964 " adopt: %s" % utils.CommaJoin(online_lvs),
9966 # update the size of disk based on what is found
9967 for dsk in self.disks:
9968 dsk[constants.IDISK_SIZE] = \
9969 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9970 dsk[constants.IDISK_ADOPT])][0]))
9972 elif self.op.disk_template == constants.DT_BLOCK:
9973 # Normalize and de-duplicate device paths
9974 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9975 for disk in self.disks])
9976 if len(all_disks) != len(self.disks):
9977 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9979 baddisks = [d for d in all_disks
9980 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9982 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9983 " cannot be adopted" %
9984 (", ".join(baddisks),
9985 constants.ADOPTABLE_BLOCKDEV_ROOT),
9988 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9989 list(all_disks))[pnode.name]
9990 node_disks.Raise("Cannot get block device information from node %s" %
9992 node_disks = node_disks.payload
9993 delta = all_disks.difference(node_disks.keys())
9995 raise errors.OpPrereqError("Missing block device(s): %s" %
9996 utils.CommaJoin(delta),
9998 for dsk in self.disks:
9999 dsk[constants.IDISK_SIZE] = \
10000 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10002 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10004 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10005 # check OS parameters (remotely)
10006 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10008 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10010 # memory check on primary node
10011 #TODO(dynmem): use MINMEM for checking
10013 _CheckNodeFreeMemory(self, self.pnode.name,
10014 "creating instance %s" % self.op.instance_name,
10015 self.be_full[constants.BE_MAXMEM],
10016 self.op.hypervisor)
10018 self.dry_run_result = list(nodenames)
10020 def Exec(self, feedback_fn):
10021 """Create and add the instance to the cluster.
10024 instance = self.op.instance_name
10025 pnode_name = self.pnode.name
10027 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10028 self.owned_locks(locking.LEVEL_NODE)), \
10029 "Node locks differ from node resource locks"
10031 ht_kind = self.op.hypervisor
10032 if ht_kind in constants.HTS_REQ_PORT:
10033 network_port = self.cfg.AllocatePort()
10035 network_port = None
10037 # This is ugly but we got a chicken-egg problem here
10038 # We can only take the group disk parameters, as the instance
10039 # has no disks yet (we are generating them right here).
10040 node = self.cfg.GetNodeInfo(pnode_name)
10041 nodegroup = self.cfg.GetNodeGroup(node.group)
10042 disks = _GenerateDiskTemplate(self,
10043 self.op.disk_template,
10044 instance, pnode_name,
10047 self.instance_file_storage_dir,
10048 self.op.file_driver,
10051 self.cfg.GetGroupDiskParams(nodegroup))
10053 iobj = objects.Instance(name=instance, os=self.op.os_type,
10054 primary_node=pnode_name,
10055 nics=self.nics, disks=disks,
10056 disk_template=self.op.disk_template,
10057 admin_state=constants.ADMINST_DOWN,
10058 network_port=network_port,
10059 beparams=self.op.beparams,
10060 hvparams=self.op.hvparams,
10061 hypervisor=self.op.hypervisor,
10062 osparams=self.op.osparams,
10066 for tag in self.op.tags:
10069 if self.adopt_disks:
10070 if self.op.disk_template == constants.DT_PLAIN:
10071 # rename LVs to the newly-generated names; we need to construct
10072 # 'fake' LV disks with the old data, plus the new unique_id
10073 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10075 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10076 rename_to.append(t_dsk.logical_id)
10077 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10078 self.cfg.SetDiskID(t_dsk, pnode_name)
10079 result = self.rpc.call_blockdev_rename(pnode_name,
10080 zip(tmp_disks, rename_to))
10081 result.Raise("Failed to rename adoped LVs")
10083 feedback_fn("* creating instance disks...")
10085 _CreateDisks(self, iobj)
10086 except errors.OpExecError:
10087 self.LogWarning("Device creation failed, reverting...")
10089 _RemoveDisks(self, iobj)
10091 self.cfg.ReleaseDRBDMinors(instance)
10094 feedback_fn("adding instance %s to cluster config" % instance)
10096 self.cfg.AddInstance(iobj, self.proc.GetECId())
10098 # Declare that we don't want to remove the instance lock anymore, as we've
10099 # added the instance to the config
10100 del self.remove_locks[locking.LEVEL_INSTANCE]
10102 if self.op.mode == constants.INSTANCE_IMPORT:
10103 # Release unused nodes
10104 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10106 # Release all nodes
10107 _ReleaseLocks(self, locking.LEVEL_NODE)
10110 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10111 feedback_fn("* wiping instance disks...")
10113 _WipeDisks(self, iobj)
10114 except errors.OpExecError, err:
10115 logging.exception("Wiping disks failed")
10116 self.LogWarning("Wiping instance disks failed (%s)", err)
10120 # Something is already wrong with the disks, don't do anything else
10122 elif self.op.wait_for_sync:
10123 disk_abort = not _WaitForSync(self, iobj)
10124 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10125 # make sure the disks are not degraded (still sync-ing is ok)
10126 feedback_fn("* checking mirrors status")
10127 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10132 _RemoveDisks(self, iobj)
10133 self.cfg.RemoveInstance(iobj.name)
10134 # Make sure the instance lock gets removed
10135 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10136 raise errors.OpExecError("There are some degraded disks for"
10139 # Release all node resource locks
10140 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10142 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10143 if self.op.mode == constants.INSTANCE_CREATE:
10144 if not self.op.no_install:
10145 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10146 not self.op.wait_for_sync)
10148 feedback_fn("* pausing disk sync to install instance OS")
10149 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10152 for idx, success in enumerate(result.payload):
10154 logging.warn("pause-sync of instance %s for disk %d failed",
10157 feedback_fn("* running the instance OS create scripts...")
10158 # FIXME: pass debug option from opcode to backend
10160 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10161 self.op.debug_level)
10163 feedback_fn("* resuming disk sync")
10164 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10167 for idx, success in enumerate(result.payload):
10169 logging.warn("resume-sync of instance %s for disk %d failed",
10172 os_add_result.Raise("Could not add os for instance %s"
10173 " on node %s" % (instance, pnode_name))
10176 if self.op.mode == constants.INSTANCE_IMPORT:
10177 feedback_fn("* running the instance OS import scripts...")
10181 for idx, image in enumerate(self.src_images):
10185 # FIXME: pass debug option from opcode to backend
10186 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10187 constants.IEIO_FILE, (image, ),
10188 constants.IEIO_SCRIPT,
10189 (iobj.disks[idx], idx),
10191 transfers.append(dt)
10194 masterd.instance.TransferInstanceData(self, feedback_fn,
10195 self.op.src_node, pnode_name,
10196 self.pnode.secondary_ip,
10198 if not compat.all(import_result):
10199 self.LogWarning("Some disks for instance %s on node %s were not"
10200 " imported successfully" % (instance, pnode_name))
10202 rename_from = self._old_instance_name
10204 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10205 feedback_fn("* preparing remote import...")
10206 # The source cluster will stop the instance before attempting to make
10207 # a connection. In some cases stopping an instance can take a long
10208 # time, hence the shutdown timeout is added to the connection
10210 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10211 self.op.source_shutdown_timeout)
10212 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10214 assert iobj.primary_node == self.pnode.name
10216 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10217 self.source_x509_ca,
10218 self._cds, timeouts)
10219 if not compat.all(disk_results):
10220 # TODO: Should the instance still be started, even if some disks
10221 # failed to import (valid for local imports, too)?
10222 self.LogWarning("Some disks for instance %s on node %s were not"
10223 " imported successfully" % (instance, pnode_name))
10225 rename_from = self.source_instance_name
10228 # also checked in the prereq part
10229 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10232 # Run rename script on newly imported instance
10233 assert iobj.name == instance
10234 feedback_fn("Running rename script for %s" % instance)
10235 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10237 self.op.debug_level)
10238 if result.fail_msg:
10239 self.LogWarning("Failed to run rename script for %s on node"
10240 " %s: %s" % (instance, pnode_name, result.fail_msg))
10242 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10245 iobj.admin_state = constants.ADMINST_UP
10246 self.cfg.Update(iobj, feedback_fn)
10247 logging.info("Starting instance %s on node %s", instance, pnode_name)
10248 feedback_fn("* starting instance...")
10249 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10251 result.Raise("Could not start instance")
10253 return list(iobj.all_nodes)
10256 def _CheckRADOSFreeSpace():
10257 """Compute disk size requirements inside the RADOS cluster.
10260 # For the RADOS cluster we assume there is always enough space.
10264 class LUInstanceConsole(NoHooksLU):
10265 """Connect to an instance's console.
10267 This is somewhat special in that it returns the command line that
10268 you need to run on the master node in order to connect to the
10274 def ExpandNames(self):
10275 self.share_locks = _ShareAll()
10276 self._ExpandAndLockInstance()
10278 def CheckPrereq(self):
10279 """Check prerequisites.
10281 This checks that the instance is in the cluster.
10284 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10285 assert self.instance is not None, \
10286 "Cannot retrieve locked instance %s" % self.op.instance_name
10287 _CheckNodeOnline(self, self.instance.primary_node)
10289 def Exec(self, feedback_fn):
10290 """Connect to the console of an instance
10293 instance = self.instance
10294 node = instance.primary_node
10296 node_insts = self.rpc.call_instance_list([node],
10297 [instance.hypervisor])[node]
10298 node_insts.Raise("Can't get node information from %s" % node)
10300 if instance.name not in node_insts.payload:
10301 if instance.admin_state == constants.ADMINST_UP:
10302 state = constants.INSTST_ERRORDOWN
10303 elif instance.admin_state == constants.ADMINST_DOWN:
10304 state = constants.INSTST_ADMINDOWN
10306 state = constants.INSTST_ADMINOFFLINE
10307 raise errors.OpExecError("Instance %s is not running (state %s)" %
10308 (instance.name, state))
10310 logging.debug("Connecting to console of %s on %s", instance.name, node)
10312 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10315 def _GetInstanceConsole(cluster, instance):
10316 """Returns console information for an instance.
10318 @type cluster: L{objects.Cluster}
10319 @type instance: L{objects.Instance}
10323 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10324 # beparams and hvparams are passed separately, to avoid editing the
10325 # instance and then saving the defaults in the instance itself.
10326 hvparams = cluster.FillHV(instance)
10327 beparams = cluster.FillBE(instance)
10328 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10330 assert console.instance == instance.name
10331 assert console.Validate()
10333 return console.ToDict()
10336 class LUInstanceReplaceDisks(LogicalUnit):
10337 """Replace the disks of an instance.
10340 HPATH = "mirrors-replace"
10341 HTYPE = constants.HTYPE_INSTANCE
10344 def CheckArguments(self):
10345 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10346 self.op.iallocator)
10348 def ExpandNames(self):
10349 self._ExpandAndLockInstance()
10351 assert locking.LEVEL_NODE not in self.needed_locks
10352 assert locking.LEVEL_NODE_RES not in self.needed_locks
10353 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10355 assert self.op.iallocator is None or self.op.remote_node is None, \
10356 "Conflicting options"
10358 if self.op.remote_node is not None:
10359 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10361 # Warning: do not remove the locking of the new secondary here
10362 # unless DRBD8.AddChildren is changed to work in parallel;
10363 # currently it doesn't since parallel invocations of
10364 # FindUnusedMinor will conflict
10365 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10366 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10368 self.needed_locks[locking.LEVEL_NODE] = []
10369 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10371 if self.op.iallocator is not None:
10372 # iallocator will select a new node in the same group
10373 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10375 self.needed_locks[locking.LEVEL_NODE_RES] = []
10377 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10378 self.op.iallocator, self.op.remote_node,
10379 self.op.disks, False, self.op.early_release,
10380 self.op.ignore_ipolicy)
10382 self.tasklets = [self.replacer]
10384 def DeclareLocks(self, level):
10385 if level == locking.LEVEL_NODEGROUP:
10386 assert self.op.remote_node is None
10387 assert self.op.iallocator is not None
10388 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10390 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10391 # Lock all groups used by instance optimistically; this requires going
10392 # via the node before it's locked, requiring verification later on
10393 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10394 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10396 elif level == locking.LEVEL_NODE:
10397 if self.op.iallocator is not None:
10398 assert self.op.remote_node is None
10399 assert not self.needed_locks[locking.LEVEL_NODE]
10401 # Lock member nodes of all locked groups
10402 self.needed_locks[locking.LEVEL_NODE] = [node_name
10403 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10404 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10406 self._LockInstancesNodes()
10407 elif level == locking.LEVEL_NODE_RES:
10409 self.needed_locks[locking.LEVEL_NODE_RES] = \
10410 self.needed_locks[locking.LEVEL_NODE]
10412 def BuildHooksEnv(self):
10413 """Build hooks env.
10415 This runs on the master, the primary and all the secondaries.
10418 instance = self.replacer.instance
10420 "MODE": self.op.mode,
10421 "NEW_SECONDARY": self.op.remote_node,
10422 "OLD_SECONDARY": instance.secondary_nodes[0],
10424 env.update(_BuildInstanceHookEnvByObject(self, instance))
10427 def BuildHooksNodes(self):
10428 """Build hooks nodes.
10431 instance = self.replacer.instance
10433 self.cfg.GetMasterNode(),
10434 instance.primary_node,
10436 if self.op.remote_node is not None:
10437 nl.append(self.op.remote_node)
10440 def CheckPrereq(self):
10441 """Check prerequisites.
10444 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10445 self.op.iallocator is None)
10447 # Verify if node group locks are still correct
10448 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10450 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10452 return LogicalUnit.CheckPrereq(self)
10455 class TLReplaceDisks(Tasklet):
10456 """Replaces disks for an instance.
10458 Note: Locking is not within the scope of this class.
10461 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10462 disks, delay_iallocator, early_release, ignore_ipolicy):
10463 """Initializes this class.
10466 Tasklet.__init__(self, lu)
10469 self.instance_name = instance_name
10471 self.iallocator_name = iallocator_name
10472 self.remote_node = remote_node
10474 self.delay_iallocator = delay_iallocator
10475 self.early_release = early_release
10476 self.ignore_ipolicy = ignore_ipolicy
10479 self.instance = None
10480 self.new_node = None
10481 self.target_node = None
10482 self.other_node = None
10483 self.remote_node_info = None
10484 self.node_secondary_ip = None
10487 def CheckArguments(mode, remote_node, iallocator):
10488 """Helper function for users of this class.
10491 # check for valid parameter combination
10492 if mode == constants.REPLACE_DISK_CHG:
10493 if remote_node is None and iallocator is None:
10494 raise errors.OpPrereqError("When changing the secondary either an"
10495 " iallocator script must be used or the"
10496 " new node given", errors.ECODE_INVAL)
10498 if remote_node is not None and iallocator is not None:
10499 raise errors.OpPrereqError("Give either the iallocator or the new"
10500 " secondary, not both", errors.ECODE_INVAL)
10502 elif remote_node is not None or iallocator is not None:
10503 # Not replacing the secondary
10504 raise errors.OpPrereqError("The iallocator and new node options can"
10505 " only be used when changing the"
10506 " secondary node", errors.ECODE_INVAL)
10509 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10510 """Compute a new secondary node using an IAllocator.
10513 ial = IAllocator(lu.cfg, lu.rpc,
10514 mode=constants.IALLOCATOR_MODE_RELOC,
10515 name=instance_name,
10516 relocate_from=list(relocate_from))
10518 ial.Run(iallocator_name)
10520 if not ial.success:
10521 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10522 " %s" % (iallocator_name, ial.info),
10523 errors.ECODE_NORES)
10525 if len(ial.result) != ial.required_nodes:
10526 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10527 " of nodes (%s), required %s" %
10529 len(ial.result), ial.required_nodes),
10530 errors.ECODE_FAULT)
10532 remote_node_name = ial.result[0]
10534 lu.LogInfo("Selected new secondary for instance '%s': %s",
10535 instance_name, remote_node_name)
10537 return remote_node_name
10539 def _FindFaultyDisks(self, node_name):
10540 """Wrapper for L{_FindFaultyInstanceDisks}.
10543 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10546 def _CheckDisksActivated(self, instance):
10547 """Checks if the instance disks are activated.
10549 @param instance: The instance to check disks
10550 @return: True if they are activated, False otherwise
10553 nodes = instance.all_nodes
10555 for idx, dev in enumerate(instance.disks):
10557 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10558 self.cfg.SetDiskID(dev, node)
10560 result = _BlockdevFind(self, node, dev, instance)
10564 elif result.fail_msg or not result.payload:
10569 def CheckPrereq(self):
10570 """Check prerequisites.
10572 This checks that the instance is in the cluster.
10575 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10576 assert instance is not None, \
10577 "Cannot retrieve locked instance %s" % self.instance_name
10579 if instance.disk_template != constants.DT_DRBD8:
10580 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10581 " instances", errors.ECODE_INVAL)
10583 if len(instance.secondary_nodes) != 1:
10584 raise errors.OpPrereqError("The instance has a strange layout,"
10585 " expected one secondary but found %d" %
10586 len(instance.secondary_nodes),
10587 errors.ECODE_FAULT)
10589 if not self.delay_iallocator:
10590 self._CheckPrereq2()
10592 def _CheckPrereq2(self):
10593 """Check prerequisites, second part.
10595 This function should always be part of CheckPrereq. It was separated and is
10596 now called from Exec because during node evacuation iallocator was only
10597 called with an unmodified cluster model, not taking planned changes into
10601 instance = self.instance
10602 secondary_node = instance.secondary_nodes[0]
10604 if self.iallocator_name is None:
10605 remote_node = self.remote_node
10607 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10608 instance.name, instance.secondary_nodes)
10610 if remote_node is None:
10611 self.remote_node_info = None
10613 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10614 "Remote node '%s' is not locked" % remote_node
10616 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10617 assert self.remote_node_info is not None, \
10618 "Cannot retrieve locked node %s" % remote_node
10620 if remote_node == self.instance.primary_node:
10621 raise errors.OpPrereqError("The specified node is the primary node of"
10622 " the instance", errors.ECODE_INVAL)
10624 if remote_node == secondary_node:
10625 raise errors.OpPrereqError("The specified node is already the"
10626 " secondary node of the instance",
10627 errors.ECODE_INVAL)
10629 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10630 constants.REPLACE_DISK_CHG):
10631 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10632 errors.ECODE_INVAL)
10634 if self.mode == constants.REPLACE_DISK_AUTO:
10635 if not self._CheckDisksActivated(instance):
10636 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10637 " first" % self.instance_name,
10638 errors.ECODE_STATE)
10639 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10640 faulty_secondary = self._FindFaultyDisks(secondary_node)
10642 if faulty_primary and faulty_secondary:
10643 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10644 " one node and can not be repaired"
10645 " automatically" % self.instance_name,
10646 errors.ECODE_STATE)
10649 self.disks = faulty_primary
10650 self.target_node = instance.primary_node
10651 self.other_node = secondary_node
10652 check_nodes = [self.target_node, self.other_node]
10653 elif faulty_secondary:
10654 self.disks = faulty_secondary
10655 self.target_node = secondary_node
10656 self.other_node = instance.primary_node
10657 check_nodes = [self.target_node, self.other_node]
10663 # Non-automatic modes
10664 if self.mode == constants.REPLACE_DISK_PRI:
10665 self.target_node = instance.primary_node
10666 self.other_node = secondary_node
10667 check_nodes = [self.target_node, self.other_node]
10669 elif self.mode == constants.REPLACE_DISK_SEC:
10670 self.target_node = secondary_node
10671 self.other_node = instance.primary_node
10672 check_nodes = [self.target_node, self.other_node]
10674 elif self.mode == constants.REPLACE_DISK_CHG:
10675 self.new_node = remote_node
10676 self.other_node = instance.primary_node
10677 self.target_node = secondary_node
10678 check_nodes = [self.new_node, self.other_node]
10680 _CheckNodeNotDrained(self.lu, remote_node)
10681 _CheckNodeVmCapable(self.lu, remote_node)
10683 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10684 assert old_node_info is not None
10685 if old_node_info.offline and not self.early_release:
10686 # doesn't make sense to delay the release
10687 self.early_release = True
10688 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10689 " early-release mode", secondary_node)
10692 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10695 # If not specified all disks should be replaced
10697 self.disks = range(len(self.instance.disks))
10699 # TODO: This is ugly, but right now we can't distinguish between internal
10700 # submitted opcode and external one. We should fix that.
10701 if self.remote_node_info:
10702 # We change the node, lets verify it still meets instance policy
10703 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10704 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10706 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10707 ignore=self.ignore_ipolicy)
10709 for node in check_nodes:
10710 _CheckNodeOnline(self.lu, node)
10712 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10715 if node_name is not None)
10717 # Release unneeded node and node resource locks
10718 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10719 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10721 # Release any owned node group
10722 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10723 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10725 # Check whether disks are valid
10726 for disk_idx in self.disks:
10727 instance.FindDisk(disk_idx)
10729 # Get secondary node IP addresses
10730 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10731 in self.cfg.GetMultiNodeInfo(touched_nodes))
10733 def Exec(self, feedback_fn):
10734 """Execute disk replacement.
10736 This dispatches the disk replacement to the appropriate handler.
10739 if self.delay_iallocator:
10740 self._CheckPrereq2()
10743 # Verify owned locks before starting operation
10744 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10745 assert set(owned_nodes) == set(self.node_secondary_ip), \
10746 ("Incorrect node locks, owning %s, expected %s" %
10747 (owned_nodes, self.node_secondary_ip.keys()))
10748 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10749 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10751 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10752 assert list(owned_instances) == [self.instance_name], \
10753 "Instance '%s' not locked" % self.instance_name
10755 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10756 "Should not own any node group lock at this point"
10759 feedback_fn("No disks need replacement")
10762 feedback_fn("Replacing disk(s) %s for %s" %
10763 (utils.CommaJoin(self.disks), self.instance.name))
10765 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10767 # Activate the instance disks if we're replacing them on a down instance
10769 _StartInstanceDisks(self.lu, self.instance, True)
10772 # Should we replace the secondary node?
10773 if self.new_node is not None:
10774 fn = self._ExecDrbd8Secondary
10776 fn = self._ExecDrbd8DiskOnly
10778 result = fn(feedback_fn)
10780 # Deactivate the instance disks if we're replacing them on a
10783 _SafeShutdownInstanceDisks(self.lu, self.instance)
10785 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10788 # Verify owned locks
10789 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10790 nodes = frozenset(self.node_secondary_ip)
10791 assert ((self.early_release and not owned_nodes) or
10792 (not self.early_release and not (set(owned_nodes) - nodes))), \
10793 ("Not owning the correct locks, early_release=%s, owned=%r,"
10794 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10798 def _CheckVolumeGroup(self, nodes):
10799 self.lu.LogInfo("Checking volume groups")
10801 vgname = self.cfg.GetVGName()
10803 # Make sure volume group exists on all involved nodes
10804 results = self.rpc.call_vg_list(nodes)
10806 raise errors.OpExecError("Can't list volume groups on the nodes")
10809 res = results[node]
10810 res.Raise("Error checking node %s" % node)
10811 if vgname not in res.payload:
10812 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10815 def _CheckDisksExistence(self, nodes):
10816 # Check disk existence
10817 for idx, dev in enumerate(self.instance.disks):
10818 if idx not in self.disks:
10822 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10823 self.cfg.SetDiskID(dev, node)
10825 result = _BlockdevFind(self, node, dev, self.instance)
10827 msg = result.fail_msg
10828 if msg or not result.payload:
10830 msg = "disk not found"
10831 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10834 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10835 for idx, dev in enumerate(self.instance.disks):
10836 if idx not in self.disks:
10839 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10842 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10843 on_primary, ldisk=ldisk):
10844 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10845 " replace disks for instance %s" %
10846 (node_name, self.instance.name))
10848 def _CreateNewStorage(self, node_name):
10849 """Create new storage on the primary or secondary node.
10851 This is only used for same-node replaces, not for changing the
10852 secondary node, hence we don't want to modify the existing disk.
10857 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10858 for idx, dev in enumerate(disks):
10859 if idx not in self.disks:
10862 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10864 self.cfg.SetDiskID(dev, node_name)
10866 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10867 names = _GenerateUniqueNames(self.lu, lv_names)
10869 (data_disk, meta_disk) = dev.children
10870 vg_data = data_disk.logical_id[0]
10871 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10872 logical_id=(vg_data, names[0]),
10873 params=data_disk.params)
10874 vg_meta = meta_disk.logical_id[0]
10875 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10876 logical_id=(vg_meta, names[1]),
10877 params=meta_disk.params)
10879 new_lvs = [lv_data, lv_meta]
10880 old_lvs = [child.Copy() for child in dev.children]
10881 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10883 # we pass force_create=True to force the LVM creation
10884 for new_lv in new_lvs:
10885 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10886 _GetInstanceInfoText(self.instance), False)
10890 def _CheckDevices(self, node_name, iv_names):
10891 for name, (dev, _, _) in iv_names.iteritems():
10892 self.cfg.SetDiskID(dev, node_name)
10894 result = _BlockdevFind(self, node_name, dev, self.instance)
10896 msg = result.fail_msg
10897 if msg or not result.payload:
10899 msg = "disk not found"
10900 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10903 if result.payload.is_degraded:
10904 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10906 def _RemoveOldStorage(self, node_name, iv_names):
10907 for name, (_, old_lvs, _) in iv_names.iteritems():
10908 self.lu.LogInfo("Remove logical volumes for %s" % name)
10911 self.cfg.SetDiskID(lv, node_name)
10913 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10915 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10916 hint="remove unused LVs manually")
10918 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10919 """Replace a disk on the primary or secondary for DRBD 8.
10921 The algorithm for replace is quite complicated:
10923 1. for each disk to be replaced:
10925 1. create new LVs on the target node with unique names
10926 1. detach old LVs from the drbd device
10927 1. rename old LVs to name_replaced.<time_t>
10928 1. rename new LVs to old LVs
10929 1. attach the new LVs (with the old names now) to the drbd device
10931 1. wait for sync across all devices
10933 1. for each modified disk:
10935 1. remove old LVs (which have the name name_replaces.<time_t>)
10937 Failures are not very well handled.
10942 # Step: check device activation
10943 self.lu.LogStep(1, steps_total, "Check device existence")
10944 self._CheckDisksExistence([self.other_node, self.target_node])
10945 self._CheckVolumeGroup([self.target_node, self.other_node])
10947 # Step: check other node consistency
10948 self.lu.LogStep(2, steps_total, "Check peer consistency")
10949 self._CheckDisksConsistency(self.other_node,
10950 self.other_node == self.instance.primary_node,
10953 # Step: create new storage
10954 self.lu.LogStep(3, steps_total, "Allocate new storage")
10955 iv_names = self._CreateNewStorage(self.target_node)
10957 # Step: for each lv, detach+rename*2+attach
10958 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10959 for dev, old_lvs, new_lvs in iv_names.itervalues():
10960 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10962 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10964 result.Raise("Can't detach drbd from local storage on node"
10965 " %s for device %s" % (self.target_node, dev.iv_name))
10967 #cfg.Update(instance)
10969 # ok, we created the new LVs, so now we know we have the needed
10970 # storage; as such, we proceed on the target node to rename
10971 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10972 # using the assumption that logical_id == physical_id (which in
10973 # turn is the unique_id on that node)
10975 # FIXME(iustin): use a better name for the replaced LVs
10976 temp_suffix = int(time.time())
10977 ren_fn = lambda d, suff: (d.physical_id[0],
10978 d.physical_id[1] + "_replaced-%s" % suff)
10980 # Build the rename list based on what LVs exist on the node
10981 rename_old_to_new = []
10982 for to_ren in old_lvs:
10983 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10984 if not result.fail_msg and result.payload:
10986 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10988 self.lu.LogInfo("Renaming the old LVs on the target node")
10989 result = self.rpc.call_blockdev_rename(self.target_node,
10991 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10993 # Now we rename the new LVs to the old LVs
10994 self.lu.LogInfo("Renaming the new LVs on the target node")
10995 rename_new_to_old = [(new, old.physical_id)
10996 for old, new in zip(old_lvs, new_lvs)]
10997 result = self.rpc.call_blockdev_rename(self.target_node,
10999 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11001 # Intermediate steps of in memory modifications
11002 for old, new in zip(old_lvs, new_lvs):
11003 new.logical_id = old.logical_id
11004 self.cfg.SetDiskID(new, self.target_node)
11006 # We need to modify old_lvs so that removal later removes the
11007 # right LVs, not the newly added ones; note that old_lvs is a
11009 for disk in old_lvs:
11010 disk.logical_id = ren_fn(disk, temp_suffix)
11011 self.cfg.SetDiskID(disk, self.target_node)
11013 # Now that the new lvs have the old name, we can add them to the device
11014 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11015 result = self.rpc.call_blockdev_addchildren(self.target_node,
11016 (dev, self.instance), new_lvs)
11017 msg = result.fail_msg
11019 for new_lv in new_lvs:
11020 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11023 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11024 hint=("cleanup manually the unused logical"
11026 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11028 cstep = itertools.count(5)
11030 if self.early_release:
11031 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11032 self._RemoveOldStorage(self.target_node, iv_names)
11033 # TODO: Check if releasing locks early still makes sense
11034 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11036 # Release all resource locks except those used by the instance
11037 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11038 keep=self.node_secondary_ip.keys())
11040 # Release all node locks while waiting for sync
11041 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11043 # TODO: Can the instance lock be downgraded here? Take the optional disk
11044 # shutdown in the caller into consideration.
11047 # This can fail as the old devices are degraded and _WaitForSync
11048 # does a combined result over all disks, so we don't check its return value
11049 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11050 _WaitForSync(self.lu, self.instance)
11052 # Check all devices manually
11053 self._CheckDevices(self.instance.primary_node, iv_names)
11055 # Step: remove old storage
11056 if not self.early_release:
11057 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11058 self._RemoveOldStorage(self.target_node, iv_names)
11060 def _ExecDrbd8Secondary(self, feedback_fn):
11061 """Replace the secondary node for DRBD 8.
11063 The algorithm for replace is quite complicated:
11064 - for all disks of the instance:
11065 - create new LVs on the new node with same names
11066 - shutdown the drbd device on the old secondary
11067 - disconnect the drbd network on the primary
11068 - create the drbd device on the new secondary
11069 - network attach the drbd on the primary, using an artifice:
11070 the drbd code for Attach() will connect to the network if it
11071 finds a device which is connected to the good local disks but
11072 not network enabled
11073 - wait for sync across all devices
11074 - remove all disks from the old secondary
11076 Failures are not very well handled.
11081 pnode = self.instance.primary_node
11083 # Step: check device activation
11084 self.lu.LogStep(1, steps_total, "Check device existence")
11085 self._CheckDisksExistence([self.instance.primary_node])
11086 self._CheckVolumeGroup([self.instance.primary_node])
11088 # Step: check other node consistency
11089 self.lu.LogStep(2, steps_total, "Check peer consistency")
11090 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11092 # Step: create new storage
11093 self.lu.LogStep(3, steps_total, "Allocate new storage")
11094 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11095 for idx, dev in enumerate(disks):
11096 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11097 (self.new_node, idx))
11098 # we pass force_create=True to force LVM creation
11099 for new_lv in dev.children:
11100 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11101 True, _GetInstanceInfoText(self.instance), False)
11103 # Step 4: dbrd minors and drbd setups changes
11104 # after this, we must manually remove the drbd minors on both the
11105 # error and the success paths
11106 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11107 minors = self.cfg.AllocateDRBDMinor([self.new_node
11108 for dev in self.instance.disks],
11109 self.instance.name)
11110 logging.debug("Allocated minors %r", minors)
11113 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11114 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11115 (self.new_node, idx))
11116 # create new devices on new_node; note that we create two IDs:
11117 # one without port, so the drbd will be activated without
11118 # networking information on the new node at this stage, and one
11119 # with network, for the latter activation in step 4
11120 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11121 if self.instance.primary_node == o_node1:
11124 assert self.instance.primary_node == o_node2, "Three-node instance?"
11127 new_alone_id = (self.instance.primary_node, self.new_node, None,
11128 p_minor, new_minor, o_secret)
11129 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11130 p_minor, new_minor, o_secret)
11132 iv_names[idx] = (dev, dev.children, new_net_id)
11133 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11135 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11136 logical_id=new_alone_id,
11137 children=dev.children,
11140 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11143 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11145 _GetInstanceInfoText(self.instance), False)
11146 except errors.GenericError:
11147 self.cfg.ReleaseDRBDMinors(self.instance.name)
11150 # We have new devices, shutdown the drbd on the old secondary
11151 for idx, dev in enumerate(self.instance.disks):
11152 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11153 self.cfg.SetDiskID(dev, self.target_node)
11154 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11156 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11157 "node: %s" % (idx, msg),
11158 hint=("Please cleanup this device manually as"
11159 " soon as possible"))
11161 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11162 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11163 self.instance.disks)[pnode]
11165 msg = result.fail_msg
11167 # detaches didn't succeed (unlikely)
11168 self.cfg.ReleaseDRBDMinors(self.instance.name)
11169 raise errors.OpExecError("Can't detach the disks from the network on"
11170 " old node: %s" % (msg,))
11172 # if we managed to detach at least one, we update all the disks of
11173 # the instance to point to the new secondary
11174 self.lu.LogInfo("Updating instance configuration")
11175 for dev, _, new_logical_id in iv_names.itervalues():
11176 dev.logical_id = new_logical_id
11177 self.cfg.SetDiskID(dev, self.instance.primary_node)
11179 self.cfg.Update(self.instance, feedback_fn)
11181 # Release all node locks (the configuration has been updated)
11182 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11184 # and now perform the drbd attach
11185 self.lu.LogInfo("Attaching primary drbds to new secondary"
11186 " (standalone => connected)")
11187 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11189 self.node_secondary_ip,
11190 (self.instance.disks, self.instance),
11191 self.instance.name,
11193 for to_node, to_result in result.items():
11194 msg = to_result.fail_msg
11196 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11198 hint=("please do a gnt-instance info to see the"
11199 " status of disks"))
11201 cstep = itertools.count(5)
11203 if self.early_release:
11204 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11205 self._RemoveOldStorage(self.target_node, iv_names)
11206 # TODO: Check if releasing locks early still makes sense
11207 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11209 # Release all resource locks except those used by the instance
11210 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11211 keep=self.node_secondary_ip.keys())
11213 # TODO: Can the instance lock be downgraded here? Take the optional disk
11214 # shutdown in the caller into consideration.
11217 # This can fail as the old devices are degraded and _WaitForSync
11218 # does a combined result over all disks, so we don't check its return value
11219 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11220 _WaitForSync(self.lu, self.instance)
11222 # Check all devices manually
11223 self._CheckDevices(self.instance.primary_node, iv_names)
11225 # Step: remove old storage
11226 if not self.early_release:
11227 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11228 self._RemoveOldStorage(self.target_node, iv_names)
11231 class LURepairNodeStorage(NoHooksLU):
11232 """Repairs the volume group on a node.
11237 def CheckArguments(self):
11238 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11240 storage_type = self.op.storage_type
11242 if (constants.SO_FIX_CONSISTENCY not in
11243 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11244 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11245 " repaired" % storage_type,
11246 errors.ECODE_INVAL)
11248 def ExpandNames(self):
11249 self.needed_locks = {
11250 locking.LEVEL_NODE: [self.op.node_name],
11253 def _CheckFaultyDisks(self, instance, node_name):
11254 """Ensure faulty disks abort the opcode or at least warn."""
11256 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11258 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11259 " node '%s'" % (instance.name, node_name),
11260 errors.ECODE_STATE)
11261 except errors.OpPrereqError, err:
11262 if self.op.ignore_consistency:
11263 self.proc.LogWarning(str(err.args[0]))
11267 def CheckPrereq(self):
11268 """Check prerequisites.
11271 # Check whether any instance on this node has faulty disks
11272 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11273 if inst.admin_state != constants.ADMINST_UP:
11275 check_nodes = set(inst.all_nodes)
11276 check_nodes.discard(self.op.node_name)
11277 for inst_node_name in check_nodes:
11278 self._CheckFaultyDisks(inst, inst_node_name)
11280 def Exec(self, feedback_fn):
11281 feedback_fn("Repairing storage unit '%s' on %s ..." %
11282 (self.op.name, self.op.node_name))
11284 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11285 result = self.rpc.call_storage_execute(self.op.node_name,
11286 self.op.storage_type, st_args,
11288 constants.SO_FIX_CONSISTENCY)
11289 result.Raise("Failed to repair storage unit '%s' on %s" %
11290 (self.op.name, self.op.node_name))
11293 class LUNodeEvacuate(NoHooksLU):
11294 """Evacuates instances off a list of nodes.
11299 _MODE2IALLOCATOR = {
11300 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11301 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11302 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11304 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11305 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11306 constants.IALLOCATOR_NEVAC_MODES)
11308 def CheckArguments(self):
11309 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11311 def ExpandNames(self):
11312 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11314 if self.op.remote_node is not None:
11315 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11316 assert self.op.remote_node
11318 if self.op.remote_node == self.op.node_name:
11319 raise errors.OpPrereqError("Can not use evacuated node as a new"
11320 " secondary node", errors.ECODE_INVAL)
11322 if self.op.mode != constants.NODE_EVAC_SEC:
11323 raise errors.OpPrereqError("Without the use of an iallocator only"
11324 " secondary instances can be evacuated",
11325 errors.ECODE_INVAL)
11328 self.share_locks = _ShareAll()
11329 self.needed_locks = {
11330 locking.LEVEL_INSTANCE: [],
11331 locking.LEVEL_NODEGROUP: [],
11332 locking.LEVEL_NODE: [],
11335 # Determine nodes (via group) optimistically, needs verification once locks
11336 # have been acquired
11337 self.lock_nodes = self._DetermineNodes()
11339 def _DetermineNodes(self):
11340 """Gets the list of nodes to operate on.
11343 if self.op.remote_node is None:
11344 # Iallocator will choose any node(s) in the same group
11345 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11347 group_nodes = frozenset([self.op.remote_node])
11349 # Determine nodes to be locked
11350 return set([self.op.node_name]) | group_nodes
11352 def _DetermineInstances(self):
11353 """Builds list of instances to operate on.
11356 assert self.op.mode in constants.NODE_EVAC_MODES
11358 if self.op.mode == constants.NODE_EVAC_PRI:
11359 # Primary instances only
11360 inst_fn = _GetNodePrimaryInstances
11361 assert self.op.remote_node is None, \
11362 "Evacuating primary instances requires iallocator"
11363 elif self.op.mode == constants.NODE_EVAC_SEC:
11364 # Secondary instances only
11365 inst_fn = _GetNodeSecondaryInstances
11368 assert self.op.mode == constants.NODE_EVAC_ALL
11369 inst_fn = _GetNodeInstances
11370 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11372 raise errors.OpPrereqError("Due to an issue with the iallocator"
11373 " interface it is not possible to evacuate"
11374 " all instances at once; specify explicitly"
11375 " whether to evacuate primary or secondary"
11377 errors.ECODE_INVAL)
11379 return inst_fn(self.cfg, self.op.node_name)
11381 def DeclareLocks(self, level):
11382 if level == locking.LEVEL_INSTANCE:
11383 # Lock instances optimistically, needs verification once node and group
11384 # locks have been acquired
11385 self.needed_locks[locking.LEVEL_INSTANCE] = \
11386 set(i.name for i in self._DetermineInstances())
11388 elif level == locking.LEVEL_NODEGROUP:
11389 # Lock node groups for all potential target nodes optimistically, needs
11390 # verification once nodes have been acquired
11391 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11392 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11394 elif level == locking.LEVEL_NODE:
11395 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11397 def CheckPrereq(self):
11399 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11400 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11401 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11403 need_nodes = self._DetermineNodes()
11405 if not owned_nodes.issuperset(need_nodes):
11406 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11407 " locks were acquired, current nodes are"
11408 " are '%s', used to be '%s'; retry the"
11410 (self.op.node_name,
11411 utils.CommaJoin(need_nodes),
11412 utils.CommaJoin(owned_nodes)),
11413 errors.ECODE_STATE)
11415 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11416 if owned_groups != wanted_groups:
11417 raise errors.OpExecError("Node groups changed since locks were acquired,"
11418 " current groups are '%s', used to be '%s';"
11419 " retry the operation" %
11420 (utils.CommaJoin(wanted_groups),
11421 utils.CommaJoin(owned_groups)))
11423 # Determine affected instances
11424 self.instances = self._DetermineInstances()
11425 self.instance_names = [i.name for i in self.instances]
11427 if set(self.instance_names) != owned_instances:
11428 raise errors.OpExecError("Instances on node '%s' changed since locks"
11429 " were acquired, current instances are '%s',"
11430 " used to be '%s'; retry the operation" %
11431 (self.op.node_name,
11432 utils.CommaJoin(self.instance_names),
11433 utils.CommaJoin(owned_instances)))
11435 if self.instance_names:
11436 self.LogInfo("Evacuating instances from node '%s': %s",
11438 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11440 self.LogInfo("No instances to evacuate from node '%s'",
11443 if self.op.remote_node is not None:
11444 for i in self.instances:
11445 if i.primary_node == self.op.remote_node:
11446 raise errors.OpPrereqError("Node %s is the primary node of"
11447 " instance %s, cannot use it as"
11449 (self.op.remote_node, i.name),
11450 errors.ECODE_INVAL)
11452 def Exec(self, feedback_fn):
11453 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11455 if not self.instance_names:
11456 # No instances to evacuate
11459 elif self.op.iallocator is not None:
11460 # TODO: Implement relocation to other group
11461 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11462 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11463 instances=list(self.instance_names))
11465 ial.Run(self.op.iallocator)
11467 if not ial.success:
11468 raise errors.OpPrereqError("Can't compute node evacuation using"
11469 " iallocator '%s': %s" %
11470 (self.op.iallocator, ial.info),
11471 errors.ECODE_NORES)
11473 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11475 elif self.op.remote_node is not None:
11476 assert self.op.mode == constants.NODE_EVAC_SEC
11478 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11479 remote_node=self.op.remote_node,
11481 mode=constants.REPLACE_DISK_CHG,
11482 early_release=self.op.early_release)]
11483 for instance_name in self.instance_names
11487 raise errors.ProgrammerError("No iallocator or remote node")
11489 return ResultWithJobs(jobs)
11492 def _SetOpEarlyRelease(early_release, op):
11493 """Sets C{early_release} flag on opcodes if available.
11497 op.early_release = early_release
11498 except AttributeError:
11499 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11504 def _NodeEvacDest(use_nodes, group, nodes):
11505 """Returns group or nodes depending on caller's choice.
11509 return utils.CommaJoin(nodes)
11514 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11515 """Unpacks the result of change-group and node-evacuate iallocator requests.
11517 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11518 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11520 @type lu: L{LogicalUnit}
11521 @param lu: Logical unit instance
11522 @type alloc_result: tuple/list
11523 @param alloc_result: Result from iallocator
11524 @type early_release: bool
11525 @param early_release: Whether to release locks early if possible
11526 @type use_nodes: bool
11527 @param use_nodes: Whether to display node names instead of groups
11530 (moved, failed, jobs) = alloc_result
11533 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11534 for (name, reason) in failed)
11535 lu.LogWarning("Unable to evacuate instances %s", failreason)
11536 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11539 lu.LogInfo("Instances to be moved: %s",
11540 utils.CommaJoin("%s (to %s)" %
11541 (name, _NodeEvacDest(use_nodes, group, nodes))
11542 for (name, group, nodes) in moved))
11544 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11545 map(opcodes.OpCode.LoadOpCode, ops))
11549 class LUInstanceGrowDisk(LogicalUnit):
11550 """Grow a disk of an instance.
11553 HPATH = "disk-grow"
11554 HTYPE = constants.HTYPE_INSTANCE
11557 def ExpandNames(self):
11558 self._ExpandAndLockInstance()
11559 self.needed_locks[locking.LEVEL_NODE] = []
11560 self.needed_locks[locking.LEVEL_NODE_RES] = []
11561 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11562 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11564 def DeclareLocks(self, level):
11565 if level == locking.LEVEL_NODE:
11566 self._LockInstancesNodes()
11567 elif level == locking.LEVEL_NODE_RES:
11569 self.needed_locks[locking.LEVEL_NODE_RES] = \
11570 self.needed_locks[locking.LEVEL_NODE][:]
11572 def BuildHooksEnv(self):
11573 """Build hooks env.
11575 This runs on the master, the primary and all the secondaries.
11579 "DISK": self.op.disk,
11580 "AMOUNT": self.op.amount,
11581 "ABSOLUTE": self.op.absolute,
11583 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11586 def BuildHooksNodes(self):
11587 """Build hooks nodes.
11590 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11593 def CheckPrereq(self):
11594 """Check prerequisites.
11596 This checks that the instance is in the cluster.
11599 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11600 assert instance is not None, \
11601 "Cannot retrieve locked instance %s" % self.op.instance_name
11602 nodenames = list(instance.all_nodes)
11603 for node in nodenames:
11604 _CheckNodeOnline(self, node)
11606 self.instance = instance
11608 if instance.disk_template not in constants.DTS_GROWABLE:
11609 raise errors.OpPrereqError("Instance's disk layout does not support"
11610 " growing", errors.ECODE_INVAL)
11612 self.disk = instance.FindDisk(self.op.disk)
11614 if self.op.absolute:
11615 self.target = self.op.amount
11616 self.delta = self.target - self.disk.size
11618 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11619 "current disk size (%s)" %
11620 (utils.FormatUnit(self.target, "h"),
11621 utils.FormatUnit(self.disk.size, "h")),
11622 errors.ECODE_STATE)
11624 self.delta = self.op.amount
11625 self.target = self.disk.size + self.delta
11627 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11628 utils.FormatUnit(self.delta, "h"),
11629 errors.ECODE_INVAL)
11631 if instance.disk_template not in (constants.DT_FILE,
11632 constants.DT_SHARED_FILE,
11634 # TODO: check the free disk space for file, when that feature will be
11636 _CheckNodesFreeDiskPerVG(self, nodenames,
11637 self.disk.ComputeGrowth(self.delta))
11639 def Exec(self, feedback_fn):
11640 """Execute disk grow.
11643 instance = self.instance
11646 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11647 assert (self.owned_locks(locking.LEVEL_NODE) ==
11648 self.owned_locks(locking.LEVEL_NODE_RES))
11650 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11652 raise errors.OpExecError("Cannot activate block device to grow")
11654 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11655 (self.op.disk, instance.name,
11656 utils.FormatUnit(self.delta, "h"),
11657 utils.FormatUnit(self.target, "h")))
11659 # First run all grow ops in dry-run mode
11660 for node in instance.all_nodes:
11661 self.cfg.SetDiskID(disk, node)
11662 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11664 result.Raise("Grow request failed to node %s" % node)
11666 # We know that (as far as we can test) operations across different
11667 # nodes will succeed, time to run it for real
11668 for node in instance.all_nodes:
11669 self.cfg.SetDiskID(disk, node)
11670 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11672 result.Raise("Grow request failed to node %s" % node)
11674 # TODO: Rewrite code to work properly
11675 # DRBD goes into sync mode for a short amount of time after executing the
11676 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11677 # calling "resize" in sync mode fails. Sleeping for a short amount of
11678 # time is a work-around.
11681 disk.RecordGrow(self.delta)
11682 self.cfg.Update(instance, feedback_fn)
11684 # Changes have been recorded, release node lock
11685 _ReleaseLocks(self, locking.LEVEL_NODE)
11687 # Downgrade lock while waiting for sync
11688 self.glm.downgrade(locking.LEVEL_INSTANCE)
11690 if self.op.wait_for_sync:
11691 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11693 self.proc.LogWarning("Disk sync-ing has not returned a good"
11694 " status; please check the instance")
11695 if instance.admin_state != constants.ADMINST_UP:
11696 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11697 elif instance.admin_state != constants.ADMINST_UP:
11698 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11699 " not supposed to be running because no wait for"
11700 " sync mode was requested")
11702 assert self.owned_locks(locking.LEVEL_NODE_RES)
11703 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11706 class LUInstanceQueryData(NoHooksLU):
11707 """Query runtime instance data.
11712 def ExpandNames(self):
11713 self.needed_locks = {}
11715 # Use locking if requested or when non-static information is wanted
11716 if not (self.op.static or self.op.use_locking):
11717 self.LogWarning("Non-static data requested, locks need to be acquired")
11718 self.op.use_locking = True
11720 if self.op.instances or not self.op.use_locking:
11721 # Expand instance names right here
11722 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11724 # Will use acquired locks
11725 self.wanted_names = None
11727 if self.op.use_locking:
11728 self.share_locks = _ShareAll()
11730 if self.wanted_names is None:
11731 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11733 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11735 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11736 self.needed_locks[locking.LEVEL_NODE] = []
11737 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11739 def DeclareLocks(self, level):
11740 if self.op.use_locking:
11741 if level == locking.LEVEL_NODEGROUP:
11742 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11744 # Lock all groups used by instances optimistically; this requires going
11745 # via the node before it's locked, requiring verification later on
11746 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11747 frozenset(group_uuid
11748 for instance_name in owned_instances
11750 self.cfg.GetInstanceNodeGroups(instance_name))
11752 elif level == locking.LEVEL_NODE:
11753 self._LockInstancesNodes()
11755 def CheckPrereq(self):
11756 """Check prerequisites.
11758 This only checks the optional instance list against the existing names.
11761 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11762 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11763 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11765 if self.wanted_names is None:
11766 assert self.op.use_locking, "Locking was not used"
11767 self.wanted_names = owned_instances
11769 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11771 if self.op.use_locking:
11772 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11775 assert not (owned_instances or owned_groups or owned_nodes)
11777 self.wanted_instances = instances.values()
11779 def _ComputeBlockdevStatus(self, node, instance, dev):
11780 """Returns the status of a block device
11783 if self.op.static or not node:
11786 self.cfg.SetDiskID(dev, node)
11788 result = self.rpc.call_blockdev_find(node, dev)
11792 result.Raise("Can't compute disk status for %s" % instance.name)
11794 status = result.payload
11798 return (status.dev_path, status.major, status.minor,
11799 status.sync_percent, status.estimated_time,
11800 status.is_degraded, status.ldisk_status)
11802 def _ComputeDiskStatus(self, instance, snode, dev):
11803 """Compute block device status.
11806 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11808 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11810 def _ComputeDiskStatusInner(self, instance, snode, dev):
11811 """Compute block device status.
11813 @attention: The device has to be annotated already.
11816 if dev.dev_type in constants.LDS_DRBD:
11817 # we change the snode then (otherwise we use the one passed in)
11818 if dev.logical_id[0] == instance.primary_node:
11819 snode = dev.logical_id[1]
11821 snode = dev.logical_id[0]
11823 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11825 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11828 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11835 "iv_name": dev.iv_name,
11836 "dev_type": dev.dev_type,
11837 "logical_id": dev.logical_id,
11838 "physical_id": dev.physical_id,
11839 "pstatus": dev_pstatus,
11840 "sstatus": dev_sstatus,
11841 "children": dev_children,
11846 def Exec(self, feedback_fn):
11847 """Gather and return data"""
11850 cluster = self.cfg.GetClusterInfo()
11852 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11853 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11855 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11856 for node in nodes.values()))
11858 group2name_fn = lambda uuid: groups[uuid].name
11860 for instance in self.wanted_instances:
11861 pnode = nodes[instance.primary_node]
11863 if self.op.static or pnode.offline:
11864 remote_state = None
11866 self.LogWarning("Primary node %s is marked offline, returning static"
11867 " information only for instance %s" %
11868 (pnode.name, instance.name))
11870 remote_info = self.rpc.call_instance_info(instance.primary_node,
11872 instance.hypervisor)
11873 remote_info.Raise("Error checking node %s" % instance.primary_node)
11874 remote_info = remote_info.payload
11875 if remote_info and "state" in remote_info:
11876 remote_state = "up"
11878 if instance.admin_state == constants.ADMINST_UP:
11879 remote_state = "down"
11881 remote_state = instance.admin_state
11883 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11886 snodes_group_uuids = [nodes[snode_name].group
11887 for snode_name in instance.secondary_nodes]
11889 result[instance.name] = {
11890 "name": instance.name,
11891 "config_state": instance.admin_state,
11892 "run_state": remote_state,
11893 "pnode": instance.primary_node,
11894 "pnode_group_uuid": pnode.group,
11895 "pnode_group_name": group2name_fn(pnode.group),
11896 "snodes": instance.secondary_nodes,
11897 "snodes_group_uuids": snodes_group_uuids,
11898 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11900 # this happens to be the same format used for hooks
11901 "nics": _NICListToTuple(self, instance.nics),
11902 "disk_template": instance.disk_template,
11904 "hypervisor": instance.hypervisor,
11905 "network_port": instance.network_port,
11906 "hv_instance": instance.hvparams,
11907 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11908 "be_instance": instance.beparams,
11909 "be_actual": cluster.FillBE(instance),
11910 "os_instance": instance.osparams,
11911 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11912 "serial_no": instance.serial_no,
11913 "mtime": instance.mtime,
11914 "ctime": instance.ctime,
11915 "uuid": instance.uuid,
11921 def PrepareContainerMods(mods, private_fn):
11922 """Prepares a list of container modifications by adding a private data field.
11924 @type mods: list of tuples; (operation, index, parameters)
11925 @param mods: List of modifications
11926 @type private_fn: callable or None
11927 @param private_fn: Callable for constructing a private data field for a
11932 if private_fn is None:
11937 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11940 #: Type description for changes as returned by L{ApplyContainerMods}'s
11942 _TApplyContModsCbChanges = \
11943 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11944 ht.TNonEmptyString,
11949 def ApplyContainerMods(kind, container, chgdesc, mods,
11950 create_fn, modify_fn, remove_fn):
11951 """Applies descriptions in C{mods} to C{container}.
11954 @param kind: One-word item description
11955 @type container: list
11956 @param container: Container to modify
11957 @type chgdesc: None or list
11958 @param chgdesc: List of applied changes
11960 @param mods: Modifications as returned by L{PrepareContainerMods}
11961 @type create_fn: callable
11962 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11963 receives absolute item index, parameters and private data object as added
11964 by L{PrepareContainerMods}, returns tuple containing new item and changes
11966 @type modify_fn: callable
11967 @param modify_fn: Callback for modifying an existing item
11968 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11969 and private data object as added by L{PrepareContainerMods}, returns
11971 @type remove_fn: callable
11972 @param remove_fn: Callback on removing item; receives absolute item index,
11973 item and private data object as added by L{PrepareContainerMods}
11976 for (op, idx, params, private) in mods:
11979 absidx = len(container) - 1
11981 raise IndexError("Not accepting negative indices other than -1")
11982 elif idx > len(container):
11983 raise IndexError("Got %s index %s, but there are only %s" %
11984 (kind, idx, len(container)))
11990 if op == constants.DDM_ADD:
11991 # Calculate where item will be added
11993 addidx = len(container)
11997 if create_fn is None:
12000 (item, changes) = create_fn(addidx, params, private)
12003 container.append(item)
12006 assert idx <= len(container)
12007 # list.insert does so before the specified index
12008 container.insert(idx, item)
12010 # Retrieve existing item
12012 item = container[absidx]
12014 raise IndexError("Invalid %s index %s" % (kind, idx))
12016 if op == constants.DDM_REMOVE:
12019 if remove_fn is not None:
12020 remove_fn(absidx, item, private)
12022 changes = [("%s/%s" % (kind, absidx), "remove")]
12024 assert container[absidx] == item
12025 del container[absidx]
12026 elif op == constants.DDM_MODIFY:
12027 if modify_fn is not None:
12028 changes = modify_fn(absidx, item, params, private)
12030 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12032 assert _TApplyContModsCbChanges(changes)
12034 if not (chgdesc is None or changes is None):
12035 chgdesc.extend(changes)
12038 def _UpdateIvNames(base_index, disks):
12039 """Updates the C{iv_name} attribute of disks.
12041 @type disks: list of L{objects.Disk}
12044 for (idx, disk) in enumerate(disks):
12045 disk.iv_name = "disk/%s" % (base_index + idx, )
12048 class _InstNicModPrivate:
12049 """Data structure for network interface modifications.
12051 Used by L{LUInstanceSetParams}.
12054 def __init__(self):
12059 class LUInstanceSetParams(LogicalUnit):
12060 """Modifies an instances's parameters.
12063 HPATH = "instance-modify"
12064 HTYPE = constants.HTYPE_INSTANCE
12068 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12069 assert ht.TList(mods)
12070 assert not mods or len(mods[0]) in (2, 3)
12072 if mods and len(mods[0]) == 2:
12076 for op, params in mods:
12077 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12078 result.append((op, -1, params))
12082 raise errors.OpPrereqError("Only one %s add or remove operation is"
12083 " supported at a time" % kind,
12084 errors.ECODE_INVAL)
12086 result.append((constants.DDM_MODIFY, op, params))
12088 assert verify_fn(result)
12095 def _CheckMods(kind, mods, key_types, item_fn):
12096 """Ensures requested disk/NIC modifications are valid.
12099 for (op, _, params) in mods:
12100 assert ht.TDict(params)
12102 utils.ForceDictType(params, key_types)
12104 if op == constants.DDM_REMOVE:
12106 raise errors.OpPrereqError("No settings should be passed when"
12107 " removing a %s" % kind,
12108 errors.ECODE_INVAL)
12109 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12110 item_fn(op, params)
12112 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12115 def _VerifyDiskModification(op, params):
12116 """Verifies a disk modification.
12119 if op == constants.DDM_ADD:
12120 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12121 if mode not in constants.DISK_ACCESS_SET:
12122 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12123 errors.ECODE_INVAL)
12125 size = params.get(constants.IDISK_SIZE, None)
12127 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12128 constants.IDISK_SIZE, errors.ECODE_INVAL)
12132 except (TypeError, ValueError), err:
12133 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12134 errors.ECODE_INVAL)
12136 params[constants.IDISK_SIZE] = size
12138 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12139 raise errors.OpPrereqError("Disk size change not possible, use"
12140 " grow-disk", errors.ECODE_INVAL)
12143 def _VerifyNicModification(op, params):
12144 """Verifies a network interface modification.
12147 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12148 ip = params.get(constants.INIC_IP, None)
12151 elif ip.lower() == constants.VALUE_NONE:
12152 params[constants.INIC_IP] = None
12153 elif not netutils.IPAddress.IsValid(ip):
12154 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12155 errors.ECODE_INVAL)
12157 bridge = params.get("bridge", None)
12158 link = params.get(constants.INIC_LINK, None)
12159 if bridge and link:
12160 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12161 " at the same time", errors.ECODE_INVAL)
12162 elif bridge and bridge.lower() == constants.VALUE_NONE:
12163 params["bridge"] = None
12164 elif link and link.lower() == constants.VALUE_NONE:
12165 params[constants.INIC_LINK] = None
12167 if op == constants.DDM_ADD:
12168 macaddr = params.get(constants.INIC_MAC, None)
12169 if macaddr is None:
12170 params[constants.INIC_MAC] = constants.VALUE_AUTO
12172 if constants.INIC_MAC in params:
12173 macaddr = params[constants.INIC_MAC]
12174 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12175 macaddr = utils.NormalizeAndValidateMac(macaddr)
12177 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12178 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12179 " modifying an existing NIC",
12180 errors.ECODE_INVAL)
12182 def CheckArguments(self):
12183 if not (self.op.nics or self.op.disks or self.op.disk_template or
12184 self.op.hvparams or self.op.beparams or self.op.os_name or
12185 self.op.offline is not None or self.op.runtime_mem):
12186 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12188 if self.op.hvparams:
12189 _CheckGlobalHvParams(self.op.hvparams)
12192 self._UpgradeDiskNicMods("disk", self.op.disks,
12193 opcodes.OpInstanceSetParams.TestDiskModifications)
12195 self._UpgradeDiskNicMods("NIC", self.op.nics,
12196 opcodes.OpInstanceSetParams.TestNicModifications)
12198 # Check disk modifications
12199 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12200 self._VerifyDiskModification)
12202 if self.op.disks and self.op.disk_template is not None:
12203 raise errors.OpPrereqError("Disk template conversion and other disk"
12204 " changes not supported at the same time",
12205 errors.ECODE_INVAL)
12207 if (self.op.disk_template and
12208 self.op.disk_template in constants.DTS_INT_MIRROR and
12209 self.op.remote_node is None):
12210 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12211 " one requires specifying a secondary node",
12212 errors.ECODE_INVAL)
12214 # Check NIC modifications
12215 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12216 self._VerifyNicModification)
12218 def ExpandNames(self):
12219 self._ExpandAndLockInstance()
12220 # Can't even acquire node locks in shared mode as upcoming changes in
12221 # Ganeti 2.6 will start to modify the node object on disk conversion
12222 self.needed_locks[locking.LEVEL_NODE] = []
12223 self.needed_locks[locking.LEVEL_NODE_RES] = []
12224 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12226 def DeclareLocks(self, level):
12227 # TODO: Acquire group lock in shared mode (disk parameters)
12228 if level == locking.LEVEL_NODE:
12229 self._LockInstancesNodes()
12230 if self.op.disk_template and self.op.remote_node:
12231 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12232 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12233 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12235 self.needed_locks[locking.LEVEL_NODE_RES] = \
12236 self.needed_locks[locking.LEVEL_NODE][:]
12238 def BuildHooksEnv(self):
12239 """Build hooks env.
12241 This runs on the master, primary and secondaries.
12245 if constants.BE_MINMEM in self.be_new:
12246 args["minmem"] = self.be_new[constants.BE_MINMEM]
12247 if constants.BE_MAXMEM in self.be_new:
12248 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12249 if constants.BE_VCPUS in self.be_new:
12250 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12251 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12252 # information at all.
12254 if self._new_nics is not None:
12257 for nic in self._new_nics:
12258 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12259 mode = nicparams[constants.NIC_MODE]
12260 link = nicparams[constants.NIC_LINK]
12261 nics.append((nic.ip, nic.mac, mode, link))
12263 args["nics"] = nics
12265 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12266 if self.op.disk_template:
12267 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12268 if self.op.runtime_mem:
12269 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12273 def BuildHooksNodes(self):
12274 """Build hooks nodes.
12277 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12280 def _PrepareNicModification(self, params, private, old_ip, old_params,
12282 update_params_dict = dict([(key, params[key])
12283 for key in constants.NICS_PARAMETERS
12286 if "bridge" in params:
12287 update_params_dict[constants.NIC_LINK] = params["bridge"]
12289 new_params = _GetUpdatedParams(old_params, update_params_dict)
12290 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12292 new_filled_params = cluster.SimpleFillNIC(new_params)
12293 objects.NIC.CheckParameterSyntax(new_filled_params)
12295 new_mode = new_filled_params[constants.NIC_MODE]
12296 if new_mode == constants.NIC_MODE_BRIDGED:
12297 bridge = new_filled_params[constants.NIC_LINK]
12298 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12300 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12302 self.warn.append(msg)
12304 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12306 elif new_mode == constants.NIC_MODE_ROUTED:
12307 ip = params.get(constants.INIC_IP, old_ip)
12309 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12310 " on a routed NIC", errors.ECODE_INVAL)
12312 if constants.INIC_MAC in params:
12313 mac = params[constants.INIC_MAC]
12315 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12316 errors.ECODE_INVAL)
12317 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12318 # otherwise generate the MAC address
12319 params[constants.INIC_MAC] = \
12320 self.cfg.GenerateMAC(self.proc.GetECId())
12322 # or validate/reserve the current one
12324 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12325 except errors.ReservationError:
12326 raise errors.OpPrereqError("MAC address '%s' already in use"
12327 " in cluster" % mac,
12328 errors.ECODE_NOTUNIQUE)
12330 private.params = new_params
12331 private.filled = new_filled_params
12333 return (None, None)
12335 def CheckPrereq(self):
12336 """Check prerequisites.
12338 This only checks the instance list against the existing names.
12341 # checking the new params on the primary/secondary nodes
12343 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12344 cluster = self.cluster = self.cfg.GetClusterInfo()
12345 assert self.instance is not None, \
12346 "Cannot retrieve locked instance %s" % self.op.instance_name
12347 pnode = instance.primary_node
12348 nodelist = list(instance.all_nodes)
12349 pnode_info = self.cfg.GetNodeInfo(pnode)
12350 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12352 # Prepare disk/NIC modifications
12353 self.diskmod = PrepareContainerMods(self.op.disks, None)
12354 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12357 if self.op.os_name and not self.op.force:
12358 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12359 self.op.force_variant)
12360 instance_os = self.op.os_name
12362 instance_os = instance.os
12364 assert not (self.op.disk_template and self.op.disks), \
12365 "Can't modify disk template and apply disk changes at the same time"
12367 if self.op.disk_template:
12368 if instance.disk_template == self.op.disk_template:
12369 raise errors.OpPrereqError("Instance already has disk template %s" %
12370 instance.disk_template, errors.ECODE_INVAL)
12372 if (instance.disk_template,
12373 self.op.disk_template) not in self._DISK_CONVERSIONS:
12374 raise errors.OpPrereqError("Unsupported disk template conversion from"
12375 " %s to %s" % (instance.disk_template,
12376 self.op.disk_template),
12377 errors.ECODE_INVAL)
12378 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12379 msg="cannot change disk template")
12380 if self.op.disk_template in constants.DTS_INT_MIRROR:
12381 if self.op.remote_node == pnode:
12382 raise errors.OpPrereqError("Given new secondary node %s is the same"
12383 " as the primary node of the instance" %
12384 self.op.remote_node, errors.ECODE_STATE)
12385 _CheckNodeOnline(self, self.op.remote_node)
12386 _CheckNodeNotDrained(self, self.op.remote_node)
12387 # FIXME: here we assume that the old instance type is DT_PLAIN
12388 assert instance.disk_template == constants.DT_PLAIN
12389 disks = [{constants.IDISK_SIZE: d.size,
12390 constants.IDISK_VG: d.logical_id[0]}
12391 for d in instance.disks]
12392 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12393 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12395 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12396 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12397 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12398 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12399 ignore=self.op.ignore_ipolicy)
12400 if pnode_info.group != snode_info.group:
12401 self.LogWarning("The primary and secondary nodes are in two"
12402 " different node groups; the disk parameters"
12403 " from the first disk's node group will be"
12406 # hvparams processing
12407 if self.op.hvparams:
12408 hv_type = instance.hypervisor
12409 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12410 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12411 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12414 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12415 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12416 self.hv_proposed = self.hv_new = hv_new # the new actual values
12417 self.hv_inst = i_hvdict # the new dict (without defaults)
12419 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12421 self.hv_new = self.hv_inst = {}
12423 # beparams processing
12424 if self.op.beparams:
12425 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12427 objects.UpgradeBeParams(i_bedict)
12428 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12429 be_new = cluster.SimpleFillBE(i_bedict)
12430 self.be_proposed = self.be_new = be_new # the new actual values
12431 self.be_inst = i_bedict # the new dict (without defaults)
12433 self.be_new = self.be_inst = {}
12434 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12435 be_old = cluster.FillBE(instance)
12437 # CPU param validation -- checking every time a parameter is
12438 # changed to cover all cases where either CPU mask or vcpus have
12440 if (constants.BE_VCPUS in self.be_proposed and
12441 constants.HV_CPU_MASK in self.hv_proposed):
12443 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12444 # Verify mask is consistent with number of vCPUs. Can skip this
12445 # test if only 1 entry in the CPU mask, which means same mask
12446 # is applied to all vCPUs.
12447 if (len(cpu_list) > 1 and
12448 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12449 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12451 (self.be_proposed[constants.BE_VCPUS],
12452 self.hv_proposed[constants.HV_CPU_MASK]),
12453 errors.ECODE_INVAL)
12455 # Only perform this test if a new CPU mask is given
12456 if constants.HV_CPU_MASK in self.hv_new:
12457 # Calculate the largest CPU number requested
12458 max_requested_cpu = max(map(max, cpu_list))
12459 # Check that all of the instance's nodes have enough physical CPUs to
12460 # satisfy the requested CPU mask
12461 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12462 max_requested_cpu + 1, instance.hypervisor)
12464 # osparams processing
12465 if self.op.osparams:
12466 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12467 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12468 self.os_inst = i_osdict # the new dict (without defaults)
12474 #TODO(dynmem): do the appropriate check involving MINMEM
12475 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12476 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12477 mem_check_list = [pnode]
12478 if be_new[constants.BE_AUTO_BALANCE]:
12479 # either we changed auto_balance to yes or it was from before
12480 mem_check_list.extend(instance.secondary_nodes)
12481 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12482 instance.hypervisor)
12483 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12484 [instance.hypervisor])
12485 pninfo = nodeinfo[pnode]
12486 msg = pninfo.fail_msg
12488 # Assume the primary node is unreachable and go ahead
12489 self.warn.append("Can't get info from primary node %s: %s" %
12492 (_, _, (pnhvinfo, )) = pninfo.payload
12493 if not isinstance(pnhvinfo.get("memory_free", None), int):
12494 self.warn.append("Node data from primary node %s doesn't contain"
12495 " free memory information" % pnode)
12496 elif instance_info.fail_msg:
12497 self.warn.append("Can't get instance runtime information: %s" %
12498 instance_info.fail_msg)
12500 if instance_info.payload:
12501 current_mem = int(instance_info.payload["memory"])
12503 # Assume instance not running
12504 # (there is a slight race condition here, but it's not very
12505 # probable, and we have no other way to check)
12506 # TODO: Describe race condition
12508 #TODO(dynmem): do the appropriate check involving MINMEM
12509 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12510 pnhvinfo["memory_free"])
12512 raise errors.OpPrereqError("This change will prevent the instance"
12513 " from starting, due to %d MB of memory"
12514 " missing on its primary node" %
12516 errors.ECODE_NORES)
12518 if be_new[constants.BE_AUTO_BALANCE]:
12519 for node, nres in nodeinfo.items():
12520 if node not in instance.secondary_nodes:
12522 nres.Raise("Can't get info from secondary node %s" % node,
12523 prereq=True, ecode=errors.ECODE_STATE)
12524 (_, _, (nhvinfo, )) = nres.payload
12525 if not isinstance(nhvinfo.get("memory_free", None), int):
12526 raise errors.OpPrereqError("Secondary node %s didn't return free"
12527 " memory information" % node,
12528 errors.ECODE_STATE)
12529 #TODO(dynmem): do the appropriate check involving MINMEM
12530 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12531 raise errors.OpPrereqError("This change will prevent the instance"
12532 " from failover to its secondary node"
12533 " %s, due to not enough memory" % node,
12534 errors.ECODE_STATE)
12536 if self.op.runtime_mem:
12537 remote_info = self.rpc.call_instance_info(instance.primary_node,
12539 instance.hypervisor)
12540 remote_info.Raise("Error checking node %s" % instance.primary_node)
12541 if not remote_info.payload: # not running already
12542 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12543 errors.ECODE_STATE)
12545 current_memory = remote_info.payload["memory"]
12546 if (not self.op.force and
12547 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12548 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12549 raise errors.OpPrereqError("Instance %s must have memory between %d"
12550 " and %d MB of memory unless --force is"
12551 " given" % (instance.name,
12552 self.be_proposed[constants.BE_MINMEM],
12553 self.be_proposed[constants.BE_MAXMEM]),
12554 errors.ECODE_INVAL)
12556 if self.op.runtime_mem > current_memory:
12557 _CheckNodeFreeMemory(self, instance.primary_node,
12558 "ballooning memory for instance %s" %
12560 self.op.memory - current_memory,
12561 instance.hypervisor)
12563 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12564 raise errors.OpPrereqError("Disk operations not supported for"
12565 " diskless instances",
12566 errors.ECODE_INVAL)
12568 def _PrepareNicCreate(_, params, private):
12569 return self._PrepareNicModification(params, private, None, {},
12572 def _PrepareNicMod(_, nic, params, private):
12573 return self._PrepareNicModification(params, private, nic.ip,
12574 nic.nicparams, cluster, pnode)
12576 # Verify NIC changes (operating on copy)
12577 nics = instance.nics[:]
12578 ApplyContainerMods("NIC", nics, None, self.nicmod,
12579 _PrepareNicCreate, _PrepareNicMod, None)
12580 if len(nics) > constants.MAX_NICS:
12581 raise errors.OpPrereqError("Instance has too many network interfaces"
12582 " (%d), cannot add more" % constants.MAX_NICS,
12583 errors.ECODE_STATE)
12585 # Verify disk changes (operating on a copy)
12586 disks = instance.disks[:]
12587 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12588 if len(disks) > constants.MAX_DISKS:
12589 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12590 " more" % constants.MAX_DISKS,
12591 errors.ECODE_STATE)
12593 if self.op.offline is not None:
12594 if self.op.offline:
12595 msg = "can't change to offline"
12597 msg = "can't change to online"
12598 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12600 # Pre-compute NIC changes (necessary to use result in hooks)
12601 self._nic_chgdesc = []
12603 # Operate on copies as this is still in prereq
12604 nics = [nic.Copy() for nic in instance.nics]
12605 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12606 self._CreateNewNic, self._ApplyNicMods, None)
12607 self._new_nics = nics
12609 self._new_nics = None
12611 def _ConvertPlainToDrbd(self, feedback_fn):
12612 """Converts an instance from plain to drbd.
12615 feedback_fn("Converting template to drbd")
12616 instance = self.instance
12617 pnode = instance.primary_node
12618 snode = self.op.remote_node
12620 assert instance.disk_template == constants.DT_PLAIN
12622 # create a fake disk info for _GenerateDiskTemplate
12623 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12624 constants.IDISK_VG: d.logical_id[0]}
12625 for d in instance.disks]
12626 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12627 instance.name, pnode, [snode],
12628 disk_info, None, None, 0, feedback_fn,
12630 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12632 info = _GetInstanceInfoText(instance)
12633 feedback_fn("Creating additional volumes...")
12634 # first, create the missing data and meta devices
12635 for disk in anno_disks:
12636 # unfortunately this is... not too nice
12637 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12639 for child in disk.children:
12640 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12641 # at this stage, all new LVs have been created, we can rename the
12643 feedback_fn("Renaming original volumes...")
12644 rename_list = [(o, n.children[0].logical_id)
12645 for (o, n) in zip(instance.disks, new_disks)]
12646 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12647 result.Raise("Failed to rename original LVs")
12649 feedback_fn("Initializing DRBD devices...")
12650 # all child devices are in place, we can now create the DRBD devices
12651 for disk in anno_disks:
12652 for node in [pnode, snode]:
12653 f_create = node == pnode
12654 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12656 # at this point, the instance has been modified
12657 instance.disk_template = constants.DT_DRBD8
12658 instance.disks = new_disks
12659 self.cfg.Update(instance, feedback_fn)
12661 # Release node locks while waiting for sync
12662 _ReleaseLocks(self, locking.LEVEL_NODE)
12664 # disks are created, waiting for sync
12665 disk_abort = not _WaitForSync(self, instance,
12666 oneshot=not self.op.wait_for_sync)
12668 raise errors.OpExecError("There are some degraded disks for"
12669 " this instance, please cleanup manually")
12671 # Node resource locks will be released by caller
12673 def _ConvertDrbdToPlain(self, feedback_fn):
12674 """Converts an instance from drbd to plain.
12677 instance = self.instance
12679 assert len(instance.secondary_nodes) == 1
12680 assert instance.disk_template == constants.DT_DRBD8
12682 pnode = instance.primary_node
12683 snode = instance.secondary_nodes[0]
12684 feedback_fn("Converting template to plain")
12686 old_disks = instance.disks
12687 new_disks = [d.children[0] for d in old_disks]
12689 # copy over size and mode
12690 for parent, child in zip(old_disks, new_disks):
12691 child.size = parent.size
12692 child.mode = parent.mode
12694 # this is a DRBD disk, return its port to the pool
12695 # NOTE: this must be done right before the call to cfg.Update!
12696 for disk in old_disks:
12697 tcp_port = disk.logical_id[2]
12698 self.cfg.AddTcpUdpPort(tcp_port)
12700 # update instance structure
12701 instance.disks = new_disks
12702 instance.disk_template = constants.DT_PLAIN
12703 self.cfg.Update(instance, feedback_fn)
12705 # Release locks in case removing disks takes a while
12706 _ReleaseLocks(self, locking.LEVEL_NODE)
12708 feedback_fn("Removing volumes on the secondary node...")
12709 for disk in old_disks:
12710 self.cfg.SetDiskID(disk, snode)
12711 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12713 self.LogWarning("Could not remove block device %s on node %s,"
12714 " continuing anyway: %s", disk.iv_name, snode, msg)
12716 feedback_fn("Removing unneeded volumes on the primary node...")
12717 for idx, disk in enumerate(old_disks):
12718 meta = disk.children[1]
12719 self.cfg.SetDiskID(meta, pnode)
12720 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12722 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12723 " continuing anyway: %s", idx, pnode, msg)
12725 def _CreateNewDisk(self, idx, params, _):
12726 """Creates a new disk.
12729 instance = self.instance
12732 if instance.disk_template in constants.DTS_FILEBASED:
12733 (file_driver, file_path) = instance.disks[0].logical_id
12734 file_path = os.path.dirname(file_path)
12736 file_driver = file_path = None
12739 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12740 instance.primary_node, instance.secondary_nodes,
12741 [params], file_path, file_driver, idx,
12742 self.Log, self.diskparams)[0]
12744 info = _GetInstanceInfoText(instance)
12746 logging.info("Creating volume %s for instance %s",
12747 disk.iv_name, instance.name)
12748 # Note: this needs to be kept in sync with _CreateDisks
12750 for node in instance.all_nodes:
12751 f_create = (node == instance.primary_node)
12753 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12754 except errors.OpExecError, err:
12755 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12756 disk.iv_name, disk, node, err)
12759 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12763 def _ModifyDisk(idx, disk, params, _):
12764 """Modifies a disk.
12767 disk.mode = params[constants.IDISK_MODE]
12770 ("disk.mode/%d" % idx, disk.mode),
12773 def _RemoveDisk(self, idx, root, _):
12777 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12778 self.cfg.SetDiskID(disk, node)
12779 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12781 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12782 " continuing anyway", idx, node, msg)
12784 # if this is a DRBD disk, return its port to the pool
12785 if root.dev_type in constants.LDS_DRBD:
12786 self.cfg.AddTcpUdpPort(root.logical_id[2])
12789 def _CreateNewNic(idx, params, private):
12790 """Creates data structure for a new network interface.
12793 mac = params[constants.INIC_MAC]
12794 ip = params.get(constants.INIC_IP, None)
12795 nicparams = private.params
12797 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12799 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12800 (mac, ip, private.filled[constants.NIC_MODE],
12801 private.filled[constants.NIC_LINK])),
12805 def _ApplyNicMods(idx, nic, params, private):
12806 """Modifies a network interface.
12811 for key in [constants.INIC_MAC, constants.INIC_IP]:
12813 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12814 setattr(nic, key, params[key])
12817 nic.nicparams = private.params
12819 for (key, val) in params.items():
12820 changes.append(("nic.%s/%d" % (key, idx), val))
12824 def Exec(self, feedback_fn):
12825 """Modifies an instance.
12827 All parameters take effect only at the next restart of the instance.
12830 # Process here the warnings from CheckPrereq, as we don't have a
12831 # feedback_fn there.
12832 # TODO: Replace with self.LogWarning
12833 for warn in self.warn:
12834 feedback_fn("WARNING: %s" % warn)
12836 assert ((self.op.disk_template is None) ^
12837 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12838 "Not owning any node resource locks"
12841 instance = self.instance
12844 if self.op.runtime_mem:
12845 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12847 self.op.runtime_mem)
12848 rpcres.Raise("Cannot modify instance runtime memory")
12849 result.append(("runtime_memory", self.op.runtime_mem))
12851 # Apply disk changes
12852 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12853 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12854 _UpdateIvNames(0, instance.disks)
12856 if self.op.disk_template:
12858 check_nodes = set(instance.all_nodes)
12859 if self.op.remote_node:
12860 check_nodes.add(self.op.remote_node)
12861 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12862 owned = self.owned_locks(level)
12863 assert not (check_nodes - owned), \
12864 ("Not owning the correct locks, owning %r, expected at least %r" %
12865 (owned, check_nodes))
12867 r_shut = _ShutdownInstanceDisks(self, instance)
12869 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12870 " proceed with disk template conversion")
12871 mode = (instance.disk_template, self.op.disk_template)
12873 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12875 self.cfg.ReleaseDRBDMinors(instance.name)
12877 result.append(("disk_template", self.op.disk_template))
12879 assert instance.disk_template == self.op.disk_template, \
12880 ("Expected disk template '%s', found '%s'" %
12881 (self.op.disk_template, instance.disk_template))
12883 # Release node and resource locks if there are any (they might already have
12884 # been released during disk conversion)
12885 _ReleaseLocks(self, locking.LEVEL_NODE)
12886 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12888 # Apply NIC changes
12889 if self._new_nics is not None:
12890 instance.nics = self._new_nics
12891 result.extend(self._nic_chgdesc)
12894 if self.op.hvparams:
12895 instance.hvparams = self.hv_inst
12896 for key, val in self.op.hvparams.iteritems():
12897 result.append(("hv/%s" % key, val))
12900 if self.op.beparams:
12901 instance.beparams = self.be_inst
12902 for key, val in self.op.beparams.iteritems():
12903 result.append(("be/%s" % key, val))
12906 if self.op.os_name:
12907 instance.os = self.op.os_name
12910 if self.op.osparams:
12911 instance.osparams = self.os_inst
12912 for key, val in self.op.osparams.iteritems():
12913 result.append(("os/%s" % key, val))
12915 if self.op.offline is None:
12918 elif self.op.offline:
12919 # Mark instance as offline
12920 self.cfg.MarkInstanceOffline(instance.name)
12921 result.append(("admin_state", constants.ADMINST_OFFLINE))
12923 # Mark instance as online, but stopped
12924 self.cfg.MarkInstanceDown(instance.name)
12925 result.append(("admin_state", constants.ADMINST_DOWN))
12927 self.cfg.Update(instance, feedback_fn)
12929 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12930 self.owned_locks(locking.LEVEL_NODE)), \
12931 "All node locks should have been released by now"
12935 _DISK_CONVERSIONS = {
12936 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12937 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12941 class LUInstanceChangeGroup(LogicalUnit):
12942 HPATH = "instance-change-group"
12943 HTYPE = constants.HTYPE_INSTANCE
12946 def ExpandNames(self):
12947 self.share_locks = _ShareAll()
12948 self.needed_locks = {
12949 locking.LEVEL_NODEGROUP: [],
12950 locking.LEVEL_NODE: [],
12953 self._ExpandAndLockInstance()
12955 if self.op.target_groups:
12956 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12957 self.op.target_groups)
12959 self.req_target_uuids = None
12961 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12963 def DeclareLocks(self, level):
12964 if level == locking.LEVEL_NODEGROUP:
12965 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12967 if self.req_target_uuids:
12968 lock_groups = set(self.req_target_uuids)
12970 # Lock all groups used by instance optimistically; this requires going
12971 # via the node before it's locked, requiring verification later on
12972 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12973 lock_groups.update(instance_groups)
12975 # No target groups, need to lock all of them
12976 lock_groups = locking.ALL_SET
12978 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12980 elif level == locking.LEVEL_NODE:
12981 if self.req_target_uuids:
12982 # Lock all nodes used by instances
12983 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12984 self._LockInstancesNodes()
12986 # Lock all nodes in all potential target groups
12987 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12988 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12989 member_nodes = [node_name
12990 for group in lock_groups
12991 for node_name in self.cfg.GetNodeGroup(group).members]
12992 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12994 # Lock all nodes as all groups are potential targets
12995 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12997 def CheckPrereq(self):
12998 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12999 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13000 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13002 assert (self.req_target_uuids is None or
13003 owned_groups.issuperset(self.req_target_uuids))
13004 assert owned_instances == set([self.op.instance_name])
13006 # Get instance information
13007 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13009 # Check if node groups for locked instance are still correct
13010 assert owned_nodes.issuperset(self.instance.all_nodes), \
13011 ("Instance %s's nodes changed while we kept the lock" %
13012 self.op.instance_name)
13014 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13017 if self.req_target_uuids:
13018 # User requested specific target groups
13019 self.target_uuids = frozenset(self.req_target_uuids)
13021 # All groups except those used by the instance are potential targets
13022 self.target_uuids = owned_groups - inst_groups
13024 conflicting_groups = self.target_uuids & inst_groups
13025 if conflicting_groups:
13026 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13027 " used by the instance '%s'" %
13028 (utils.CommaJoin(conflicting_groups),
13029 self.op.instance_name),
13030 errors.ECODE_INVAL)
13032 if not self.target_uuids:
13033 raise errors.OpPrereqError("There are no possible target groups",
13034 errors.ECODE_INVAL)
13036 def BuildHooksEnv(self):
13037 """Build hooks env.
13040 assert self.target_uuids
13043 "TARGET_GROUPS": " ".join(self.target_uuids),
13046 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13050 def BuildHooksNodes(self):
13051 """Build hooks nodes.
13054 mn = self.cfg.GetMasterNode()
13055 return ([mn], [mn])
13057 def Exec(self, feedback_fn):
13058 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13060 assert instances == [self.op.instance_name], "Instance not locked"
13062 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13063 instances=instances, target_groups=list(self.target_uuids))
13065 ial.Run(self.op.iallocator)
13067 if not ial.success:
13068 raise errors.OpPrereqError("Can't compute solution for changing group of"
13069 " instance '%s' using iallocator '%s': %s" %
13070 (self.op.instance_name, self.op.iallocator,
13072 errors.ECODE_NORES)
13074 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13076 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13077 " instance '%s'", len(jobs), self.op.instance_name)
13079 return ResultWithJobs(jobs)
13082 class LUBackupQuery(NoHooksLU):
13083 """Query the exports list
13088 def CheckArguments(self):
13089 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13090 ["node", "export"], self.op.use_locking)
13092 def ExpandNames(self):
13093 self.expq.ExpandNames(self)
13095 def DeclareLocks(self, level):
13096 self.expq.DeclareLocks(self, level)
13098 def Exec(self, feedback_fn):
13101 for (node, expname) in self.expq.OldStyleQuery(self):
13102 if expname is None:
13103 result[node] = False
13105 result.setdefault(node, []).append(expname)
13110 class _ExportQuery(_QueryBase):
13111 FIELDS = query.EXPORT_FIELDS
13113 #: The node name is not a unique key for this query
13114 SORT_FIELD = "node"
13116 def ExpandNames(self, lu):
13117 lu.needed_locks = {}
13119 # The following variables interact with _QueryBase._GetNames
13121 self.wanted = _GetWantedNodes(lu, self.names)
13123 self.wanted = locking.ALL_SET
13125 self.do_locking = self.use_locking
13127 if self.do_locking:
13128 lu.share_locks = _ShareAll()
13129 lu.needed_locks = {
13130 locking.LEVEL_NODE: self.wanted,
13133 def DeclareLocks(self, lu, level):
13136 def _GetQueryData(self, lu):
13137 """Computes the list of nodes and their attributes.
13140 # Locking is not used
13142 assert not (compat.any(lu.glm.is_owned(level)
13143 for level in locking.LEVELS
13144 if level != locking.LEVEL_CLUSTER) or
13145 self.do_locking or self.use_locking)
13147 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13151 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13153 result.append((node, None))
13155 result.extend((node, expname) for expname in nres.payload)
13160 class LUBackupPrepare(NoHooksLU):
13161 """Prepares an instance for an export and returns useful information.
13166 def ExpandNames(self):
13167 self._ExpandAndLockInstance()
13169 def CheckPrereq(self):
13170 """Check prerequisites.
13173 instance_name = self.op.instance_name
13175 self.instance = self.cfg.GetInstanceInfo(instance_name)
13176 assert self.instance is not None, \
13177 "Cannot retrieve locked instance %s" % self.op.instance_name
13178 _CheckNodeOnline(self, self.instance.primary_node)
13180 self._cds = _GetClusterDomainSecret()
13182 def Exec(self, feedback_fn):
13183 """Prepares an instance for an export.
13186 instance = self.instance
13188 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13189 salt = utils.GenerateSecret(8)
13191 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13192 result = self.rpc.call_x509_cert_create(instance.primary_node,
13193 constants.RIE_CERT_VALIDITY)
13194 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13196 (name, cert_pem) = result.payload
13198 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13202 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13203 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13205 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13211 class LUBackupExport(LogicalUnit):
13212 """Export an instance to an image in the cluster.
13215 HPATH = "instance-export"
13216 HTYPE = constants.HTYPE_INSTANCE
13219 def CheckArguments(self):
13220 """Check the arguments.
13223 self.x509_key_name = self.op.x509_key_name
13224 self.dest_x509_ca_pem = self.op.destination_x509_ca
13226 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13227 if not self.x509_key_name:
13228 raise errors.OpPrereqError("Missing X509 key name for encryption",
13229 errors.ECODE_INVAL)
13231 if not self.dest_x509_ca_pem:
13232 raise errors.OpPrereqError("Missing destination X509 CA",
13233 errors.ECODE_INVAL)
13235 def ExpandNames(self):
13236 self._ExpandAndLockInstance()
13238 # Lock all nodes for local exports
13239 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13240 # FIXME: lock only instance primary and destination node
13242 # Sad but true, for now we have do lock all nodes, as we don't know where
13243 # the previous export might be, and in this LU we search for it and
13244 # remove it from its current node. In the future we could fix this by:
13245 # - making a tasklet to search (share-lock all), then create the
13246 # new one, then one to remove, after
13247 # - removing the removal operation altogether
13248 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13250 def DeclareLocks(self, level):
13251 """Last minute lock declaration."""
13252 # All nodes are locked anyway, so nothing to do here.
13254 def BuildHooksEnv(self):
13255 """Build hooks env.
13257 This will run on the master, primary node and target node.
13261 "EXPORT_MODE": self.op.mode,
13262 "EXPORT_NODE": self.op.target_node,
13263 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13264 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13265 # TODO: Generic function for boolean env variables
13266 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13269 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13273 def BuildHooksNodes(self):
13274 """Build hooks nodes.
13277 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13279 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13280 nl.append(self.op.target_node)
13284 def CheckPrereq(self):
13285 """Check prerequisites.
13287 This checks that the instance and node names are valid.
13290 instance_name = self.op.instance_name
13292 self.instance = self.cfg.GetInstanceInfo(instance_name)
13293 assert self.instance is not None, \
13294 "Cannot retrieve locked instance %s" % self.op.instance_name
13295 _CheckNodeOnline(self, self.instance.primary_node)
13297 if (self.op.remove_instance and
13298 self.instance.admin_state == constants.ADMINST_UP and
13299 not self.op.shutdown):
13300 raise errors.OpPrereqError("Can not remove instance without shutting it"
13303 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13304 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13305 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13306 assert self.dst_node is not None
13308 _CheckNodeOnline(self, self.dst_node.name)
13309 _CheckNodeNotDrained(self, self.dst_node.name)
13312 self.dest_disk_info = None
13313 self.dest_x509_ca = None
13315 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13316 self.dst_node = None
13318 if len(self.op.target_node) != len(self.instance.disks):
13319 raise errors.OpPrereqError(("Received destination information for %s"
13320 " disks, but instance %s has %s disks") %
13321 (len(self.op.target_node), instance_name,
13322 len(self.instance.disks)),
13323 errors.ECODE_INVAL)
13325 cds = _GetClusterDomainSecret()
13327 # Check X509 key name
13329 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13330 except (TypeError, ValueError), err:
13331 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13333 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13334 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13335 errors.ECODE_INVAL)
13337 # Load and verify CA
13339 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13340 except OpenSSL.crypto.Error, err:
13341 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13342 (err, ), errors.ECODE_INVAL)
13344 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13345 if errcode is not None:
13346 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13347 (msg, ), errors.ECODE_INVAL)
13349 self.dest_x509_ca = cert
13351 # Verify target information
13353 for idx, disk_data in enumerate(self.op.target_node):
13355 (host, port, magic) = \
13356 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13357 except errors.GenericError, err:
13358 raise errors.OpPrereqError("Target info for disk %s: %s" %
13359 (idx, err), errors.ECODE_INVAL)
13361 disk_info.append((host, port, magic))
13363 assert len(disk_info) == len(self.op.target_node)
13364 self.dest_disk_info = disk_info
13367 raise errors.ProgrammerError("Unhandled export mode %r" %
13370 # instance disk type verification
13371 # TODO: Implement export support for file-based disks
13372 for disk in self.instance.disks:
13373 if disk.dev_type == constants.LD_FILE:
13374 raise errors.OpPrereqError("Export not supported for instances with"
13375 " file-based disks", errors.ECODE_INVAL)
13377 def _CleanupExports(self, feedback_fn):
13378 """Removes exports of current instance from all other nodes.
13380 If an instance in a cluster with nodes A..D was exported to node C, its
13381 exports will be removed from the nodes A, B and D.
13384 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13386 nodelist = self.cfg.GetNodeList()
13387 nodelist.remove(self.dst_node.name)
13389 # on one-node clusters nodelist will be empty after the removal
13390 # if we proceed the backup would be removed because OpBackupQuery
13391 # substitutes an empty list with the full cluster node list.
13392 iname = self.instance.name
13394 feedback_fn("Removing old exports for instance %s" % iname)
13395 exportlist = self.rpc.call_export_list(nodelist)
13396 for node in exportlist:
13397 if exportlist[node].fail_msg:
13399 if iname in exportlist[node].payload:
13400 msg = self.rpc.call_export_remove(node, iname).fail_msg
13402 self.LogWarning("Could not remove older export for instance %s"
13403 " on node %s: %s", iname, node, msg)
13405 def Exec(self, feedback_fn):
13406 """Export an instance to an image in the cluster.
13409 assert self.op.mode in constants.EXPORT_MODES
13411 instance = self.instance
13412 src_node = instance.primary_node
13414 if self.op.shutdown:
13415 # shutdown the instance, but not the disks
13416 feedback_fn("Shutting down instance %s" % instance.name)
13417 result = self.rpc.call_instance_shutdown(src_node, instance,
13418 self.op.shutdown_timeout)
13419 # TODO: Maybe ignore failures if ignore_remove_failures is set
13420 result.Raise("Could not shutdown instance %s on"
13421 " node %s" % (instance.name, src_node))
13423 # set the disks ID correctly since call_instance_start needs the
13424 # correct drbd minor to create the symlinks
13425 for disk in instance.disks:
13426 self.cfg.SetDiskID(disk, src_node)
13428 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13431 # Activate the instance disks if we'exporting a stopped instance
13432 feedback_fn("Activating disks for %s" % instance.name)
13433 _StartInstanceDisks(self, instance, None)
13436 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13439 helper.CreateSnapshots()
13441 if (self.op.shutdown and
13442 instance.admin_state == constants.ADMINST_UP and
13443 not self.op.remove_instance):
13444 assert not activate_disks
13445 feedback_fn("Starting instance %s" % instance.name)
13446 result = self.rpc.call_instance_start(src_node,
13447 (instance, None, None), False)
13448 msg = result.fail_msg
13450 feedback_fn("Failed to start instance: %s" % msg)
13451 _ShutdownInstanceDisks(self, instance)
13452 raise errors.OpExecError("Could not start instance: %s" % msg)
13454 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13455 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13456 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13457 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13458 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13460 (key_name, _, _) = self.x509_key_name
13463 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13466 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13467 key_name, dest_ca_pem,
13472 # Check for backwards compatibility
13473 assert len(dresults) == len(instance.disks)
13474 assert compat.all(isinstance(i, bool) for i in dresults), \
13475 "Not all results are boolean: %r" % dresults
13479 feedback_fn("Deactivating disks for %s" % instance.name)
13480 _ShutdownInstanceDisks(self, instance)
13482 if not (compat.all(dresults) and fin_resu):
13485 failures.append("export finalization")
13486 if not compat.all(dresults):
13487 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13489 failures.append("disk export: disk(s) %s" % fdsk)
13491 raise errors.OpExecError("Export failed, errors in %s" %
13492 utils.CommaJoin(failures))
13494 # At this point, the export was successful, we can cleanup/finish
13496 # Remove instance if requested
13497 if self.op.remove_instance:
13498 feedback_fn("Removing instance %s" % instance.name)
13499 _RemoveInstance(self, feedback_fn, instance,
13500 self.op.ignore_remove_failures)
13502 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13503 self._CleanupExports(feedback_fn)
13505 return fin_resu, dresults
13508 class LUBackupRemove(NoHooksLU):
13509 """Remove exports related to the named instance.
13514 def ExpandNames(self):
13515 self.needed_locks = {}
13516 # We need all nodes to be locked in order for RemoveExport to work, but we
13517 # don't need to lock the instance itself, as nothing will happen to it (and
13518 # we can remove exports also for a removed instance)
13519 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13521 def Exec(self, feedback_fn):
13522 """Remove any export.
13525 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13526 # If the instance was not found we'll try with the name that was passed in.
13527 # This will only work if it was an FQDN, though.
13529 if not instance_name:
13531 instance_name = self.op.instance_name
13533 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13534 exportlist = self.rpc.call_export_list(locked_nodes)
13536 for node in exportlist:
13537 msg = exportlist[node].fail_msg
13539 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13541 if instance_name in exportlist[node].payload:
13543 result = self.rpc.call_export_remove(node, instance_name)
13544 msg = result.fail_msg
13546 logging.error("Could not remove export for instance %s"
13547 " on node %s: %s", instance_name, node, msg)
13549 if fqdn_warn and not found:
13550 feedback_fn("Export not found. If trying to remove an export belonging"
13551 " to a deleted instance please use its Fully Qualified"
13555 class LUGroupAdd(LogicalUnit):
13556 """Logical unit for creating node groups.
13559 HPATH = "group-add"
13560 HTYPE = constants.HTYPE_GROUP
13563 def ExpandNames(self):
13564 # We need the new group's UUID here so that we can create and acquire the
13565 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13566 # that it should not check whether the UUID exists in the configuration.
13567 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13568 self.needed_locks = {}
13569 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13571 def CheckPrereq(self):
13572 """Check prerequisites.
13574 This checks that the given group name is not an existing node group
13579 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13580 except errors.OpPrereqError:
13583 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13584 " node group (UUID: %s)" %
13585 (self.op.group_name, existing_uuid),
13586 errors.ECODE_EXISTS)
13588 if self.op.ndparams:
13589 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13591 if self.op.hv_state:
13592 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13594 self.new_hv_state = None
13596 if self.op.disk_state:
13597 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13599 self.new_disk_state = None
13601 if self.op.diskparams:
13602 for templ in constants.DISK_TEMPLATES:
13603 if templ in self.op.diskparams:
13604 utils.ForceDictType(self.op.diskparams[templ],
13605 constants.DISK_DT_TYPES)
13606 self.new_diskparams = self.op.diskparams
13608 self.new_diskparams = {}
13610 if self.op.ipolicy:
13611 cluster = self.cfg.GetClusterInfo()
13612 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13614 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13615 except errors.ConfigurationError, err:
13616 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13617 errors.ECODE_INVAL)
13619 def BuildHooksEnv(self):
13620 """Build hooks env.
13624 "GROUP_NAME": self.op.group_name,
13627 def BuildHooksNodes(self):
13628 """Build hooks nodes.
13631 mn = self.cfg.GetMasterNode()
13632 return ([mn], [mn])
13634 def Exec(self, feedback_fn):
13635 """Add the node group to the cluster.
13638 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13639 uuid=self.group_uuid,
13640 alloc_policy=self.op.alloc_policy,
13641 ndparams=self.op.ndparams,
13642 diskparams=self.new_diskparams,
13643 ipolicy=self.op.ipolicy,
13644 hv_state_static=self.new_hv_state,
13645 disk_state_static=self.new_disk_state)
13647 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13648 del self.remove_locks[locking.LEVEL_NODEGROUP]
13651 class LUGroupAssignNodes(NoHooksLU):
13652 """Logical unit for assigning nodes to groups.
13657 def ExpandNames(self):
13658 # These raise errors.OpPrereqError on their own:
13659 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13660 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13662 # We want to lock all the affected nodes and groups. We have readily
13663 # available the list of nodes, and the *destination* group. To gather the
13664 # list of "source" groups, we need to fetch node information later on.
13665 self.needed_locks = {
13666 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13667 locking.LEVEL_NODE: self.op.nodes,
13670 def DeclareLocks(self, level):
13671 if level == locking.LEVEL_NODEGROUP:
13672 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13674 # Try to get all affected nodes' groups without having the group or node
13675 # lock yet. Needs verification later in the code flow.
13676 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13678 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13680 def CheckPrereq(self):
13681 """Check prerequisites.
13684 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13685 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13686 frozenset(self.op.nodes))
13688 expected_locks = (set([self.group_uuid]) |
13689 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13690 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13691 if actual_locks != expected_locks:
13692 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13693 " current groups are '%s', used to be '%s'" %
13694 (utils.CommaJoin(expected_locks),
13695 utils.CommaJoin(actual_locks)))
13697 self.node_data = self.cfg.GetAllNodesInfo()
13698 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13699 instance_data = self.cfg.GetAllInstancesInfo()
13701 if self.group is None:
13702 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13703 (self.op.group_name, self.group_uuid))
13705 (new_splits, previous_splits) = \
13706 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13707 for node in self.op.nodes],
13708 self.node_data, instance_data)
13711 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13713 if not self.op.force:
13714 raise errors.OpExecError("The following instances get split by this"
13715 " change and --force was not given: %s" %
13718 self.LogWarning("This operation will split the following instances: %s",
13721 if previous_splits:
13722 self.LogWarning("In addition, these already-split instances continue"
13723 " to be split across groups: %s",
13724 utils.CommaJoin(utils.NiceSort(previous_splits)))
13726 def Exec(self, feedback_fn):
13727 """Assign nodes to a new group.
13730 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13732 self.cfg.AssignGroupNodes(mods)
13735 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13736 """Check for split instances after a node assignment.
13738 This method considers a series of node assignments as an atomic operation,
13739 and returns information about split instances after applying the set of
13742 In particular, it returns information about newly split instances, and
13743 instances that were already split, and remain so after the change.
13745 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13748 @type changes: list of (node_name, new_group_uuid) pairs.
13749 @param changes: list of node assignments to consider.
13750 @param node_data: a dict with data for all nodes
13751 @param instance_data: a dict with all instances to consider
13752 @rtype: a two-tuple
13753 @return: a list of instances that were previously okay and result split as a
13754 consequence of this change, and a list of instances that were previously
13755 split and this change does not fix.
13758 changed_nodes = dict((node, group) for node, group in changes
13759 if node_data[node].group != group)
13761 all_split_instances = set()
13762 previously_split_instances = set()
13764 def InstanceNodes(instance):
13765 return [instance.primary_node] + list(instance.secondary_nodes)
13767 for inst in instance_data.values():
13768 if inst.disk_template not in constants.DTS_INT_MIRROR:
13771 instance_nodes = InstanceNodes(inst)
13773 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13774 previously_split_instances.add(inst.name)
13776 if len(set(changed_nodes.get(node, node_data[node].group)
13777 for node in instance_nodes)) > 1:
13778 all_split_instances.add(inst.name)
13780 return (list(all_split_instances - previously_split_instances),
13781 list(previously_split_instances & all_split_instances))
13784 class _GroupQuery(_QueryBase):
13785 FIELDS = query.GROUP_FIELDS
13787 def ExpandNames(self, lu):
13788 lu.needed_locks = {}
13790 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13791 self._cluster = lu.cfg.GetClusterInfo()
13792 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13795 self.wanted = [name_to_uuid[name]
13796 for name in utils.NiceSort(name_to_uuid.keys())]
13798 # Accept names to be either names or UUIDs.
13801 all_uuid = frozenset(self._all_groups.keys())
13803 for name in self.names:
13804 if name in all_uuid:
13805 self.wanted.append(name)
13806 elif name in name_to_uuid:
13807 self.wanted.append(name_to_uuid[name])
13809 missing.append(name)
13812 raise errors.OpPrereqError("Some groups do not exist: %s" %
13813 utils.CommaJoin(missing),
13814 errors.ECODE_NOENT)
13816 def DeclareLocks(self, lu, level):
13819 def _GetQueryData(self, lu):
13820 """Computes the list of node groups and their attributes.
13823 do_nodes = query.GQ_NODE in self.requested_data
13824 do_instances = query.GQ_INST in self.requested_data
13826 group_to_nodes = None
13827 group_to_instances = None
13829 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13830 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13831 # latter GetAllInstancesInfo() is not enough, for we have to go through
13832 # instance->node. Hence, we will need to process nodes even if we only need
13833 # instance information.
13834 if do_nodes or do_instances:
13835 all_nodes = lu.cfg.GetAllNodesInfo()
13836 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13839 for node in all_nodes.values():
13840 if node.group in group_to_nodes:
13841 group_to_nodes[node.group].append(node.name)
13842 node_to_group[node.name] = node.group
13845 all_instances = lu.cfg.GetAllInstancesInfo()
13846 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13848 for instance in all_instances.values():
13849 node = instance.primary_node
13850 if node in node_to_group:
13851 group_to_instances[node_to_group[node]].append(instance.name)
13854 # Do not pass on node information if it was not requested.
13855 group_to_nodes = None
13857 return query.GroupQueryData(self._cluster,
13858 [self._all_groups[uuid]
13859 for uuid in self.wanted],
13860 group_to_nodes, group_to_instances,
13861 query.GQ_DISKPARAMS in self.requested_data)
13864 class LUGroupQuery(NoHooksLU):
13865 """Logical unit for querying node groups.
13870 def CheckArguments(self):
13871 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13872 self.op.output_fields, False)
13874 def ExpandNames(self):
13875 self.gq.ExpandNames(self)
13877 def DeclareLocks(self, level):
13878 self.gq.DeclareLocks(self, level)
13880 def Exec(self, feedback_fn):
13881 return self.gq.OldStyleQuery(self)
13884 class LUGroupSetParams(LogicalUnit):
13885 """Modifies the parameters of a node group.
13888 HPATH = "group-modify"
13889 HTYPE = constants.HTYPE_GROUP
13892 def CheckArguments(self):
13895 self.op.diskparams,
13896 self.op.alloc_policy,
13898 self.op.disk_state,
13902 if all_changes.count(None) == len(all_changes):
13903 raise errors.OpPrereqError("Please pass at least one modification",
13904 errors.ECODE_INVAL)
13906 def ExpandNames(self):
13907 # This raises errors.OpPrereqError on its own:
13908 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13910 self.needed_locks = {
13911 locking.LEVEL_INSTANCE: [],
13912 locking.LEVEL_NODEGROUP: [self.group_uuid],
13915 self.share_locks[locking.LEVEL_INSTANCE] = 1
13917 def DeclareLocks(self, level):
13918 if level == locking.LEVEL_INSTANCE:
13919 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13921 # Lock instances optimistically, needs verification once group lock has
13923 self.needed_locks[locking.LEVEL_INSTANCE] = \
13924 self.cfg.GetNodeGroupInstances(self.group_uuid)
13927 def _UpdateAndVerifyDiskParams(old, new):
13928 """Updates and verifies disk parameters.
13931 new_params = _GetUpdatedParams(old, new)
13932 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13935 def CheckPrereq(self):
13936 """Check prerequisites.
13939 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13941 # Check if locked instances are still correct
13942 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13944 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13945 cluster = self.cfg.GetClusterInfo()
13947 if self.group is None:
13948 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13949 (self.op.group_name, self.group_uuid))
13951 if self.op.ndparams:
13952 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13953 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13954 self.new_ndparams = new_ndparams
13956 if self.op.diskparams:
13957 diskparams = self.group.diskparams
13958 uavdp = self._UpdateAndVerifyDiskParams
13959 # For each disktemplate subdict update and verify the values
13960 new_diskparams = dict((dt,
13961 uavdp(diskparams.get(dt, {}),
13962 self.op.diskparams[dt]))
13963 for dt in constants.DISK_TEMPLATES
13964 if dt in self.op.diskparams)
13965 # As we've all subdicts of diskparams ready, lets merge the actual
13966 # dict with all updated subdicts
13967 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13969 if self.op.hv_state:
13970 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13971 self.group.hv_state_static)
13973 if self.op.disk_state:
13974 self.new_disk_state = \
13975 _MergeAndVerifyDiskState(self.op.disk_state,
13976 self.group.disk_state_static)
13978 if self.op.ipolicy:
13979 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13983 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13984 inst_filter = lambda inst: inst.name in owned_instances
13985 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13987 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13989 new_ipolicy, instances)
13992 self.LogWarning("After the ipolicy change the following instances"
13993 " violate them: %s",
13994 utils.CommaJoin(violations))
13996 def BuildHooksEnv(self):
13997 """Build hooks env.
14001 "GROUP_NAME": self.op.group_name,
14002 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14005 def BuildHooksNodes(self):
14006 """Build hooks nodes.
14009 mn = self.cfg.GetMasterNode()
14010 return ([mn], [mn])
14012 def Exec(self, feedback_fn):
14013 """Modifies the node group.
14018 if self.op.ndparams:
14019 self.group.ndparams = self.new_ndparams
14020 result.append(("ndparams", str(self.group.ndparams)))
14022 if self.op.diskparams:
14023 self.group.diskparams = self.new_diskparams
14024 result.append(("diskparams", str(self.group.diskparams)))
14026 if self.op.alloc_policy:
14027 self.group.alloc_policy = self.op.alloc_policy
14029 if self.op.hv_state:
14030 self.group.hv_state_static = self.new_hv_state
14032 if self.op.disk_state:
14033 self.group.disk_state_static = self.new_disk_state
14035 if self.op.ipolicy:
14036 self.group.ipolicy = self.new_ipolicy
14038 self.cfg.Update(self.group, feedback_fn)
14042 class LUGroupRemove(LogicalUnit):
14043 HPATH = "group-remove"
14044 HTYPE = constants.HTYPE_GROUP
14047 def ExpandNames(self):
14048 # This will raises errors.OpPrereqError on its own:
14049 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14050 self.needed_locks = {
14051 locking.LEVEL_NODEGROUP: [self.group_uuid],
14054 def CheckPrereq(self):
14055 """Check prerequisites.
14057 This checks that the given group name exists as a node group, that is
14058 empty (i.e., contains no nodes), and that is not the last group of the
14062 # Verify that the group is empty.
14063 group_nodes = [node.name
14064 for node in self.cfg.GetAllNodesInfo().values()
14065 if node.group == self.group_uuid]
14068 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14070 (self.op.group_name,
14071 utils.CommaJoin(utils.NiceSort(group_nodes))),
14072 errors.ECODE_STATE)
14074 # Verify the cluster would not be left group-less.
14075 if len(self.cfg.GetNodeGroupList()) == 1:
14076 raise errors.OpPrereqError("Group '%s' is the only group,"
14077 " cannot be removed" %
14078 self.op.group_name,
14079 errors.ECODE_STATE)
14081 def BuildHooksEnv(self):
14082 """Build hooks env.
14086 "GROUP_NAME": self.op.group_name,
14089 def BuildHooksNodes(self):
14090 """Build hooks nodes.
14093 mn = self.cfg.GetMasterNode()
14094 return ([mn], [mn])
14096 def Exec(self, feedback_fn):
14097 """Remove the node group.
14101 self.cfg.RemoveNodeGroup(self.group_uuid)
14102 except errors.ConfigurationError:
14103 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14104 (self.op.group_name, self.group_uuid))
14106 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14109 class LUGroupRename(LogicalUnit):
14110 HPATH = "group-rename"
14111 HTYPE = constants.HTYPE_GROUP
14114 def ExpandNames(self):
14115 # This raises errors.OpPrereqError on its own:
14116 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14118 self.needed_locks = {
14119 locking.LEVEL_NODEGROUP: [self.group_uuid],
14122 def CheckPrereq(self):
14123 """Check prerequisites.
14125 Ensures requested new name is not yet used.
14129 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14130 except errors.OpPrereqError:
14133 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14134 " node group (UUID: %s)" %
14135 (self.op.new_name, new_name_uuid),
14136 errors.ECODE_EXISTS)
14138 def BuildHooksEnv(self):
14139 """Build hooks env.
14143 "OLD_NAME": self.op.group_name,
14144 "NEW_NAME": self.op.new_name,
14147 def BuildHooksNodes(self):
14148 """Build hooks nodes.
14151 mn = self.cfg.GetMasterNode()
14153 all_nodes = self.cfg.GetAllNodesInfo()
14154 all_nodes.pop(mn, None)
14157 run_nodes.extend(node.name for node in all_nodes.values()
14158 if node.group == self.group_uuid)
14160 return (run_nodes, run_nodes)
14162 def Exec(self, feedback_fn):
14163 """Rename the node group.
14166 group = self.cfg.GetNodeGroup(self.group_uuid)
14169 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14170 (self.op.group_name, self.group_uuid))
14172 group.name = self.op.new_name
14173 self.cfg.Update(group, feedback_fn)
14175 return self.op.new_name
14178 class LUGroupEvacuate(LogicalUnit):
14179 HPATH = "group-evacuate"
14180 HTYPE = constants.HTYPE_GROUP
14183 def ExpandNames(self):
14184 # This raises errors.OpPrereqError on its own:
14185 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14187 if self.op.target_groups:
14188 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14189 self.op.target_groups)
14191 self.req_target_uuids = []
14193 if self.group_uuid in self.req_target_uuids:
14194 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14195 " as a target group (targets are %s)" %
14197 utils.CommaJoin(self.req_target_uuids)),
14198 errors.ECODE_INVAL)
14200 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14202 self.share_locks = _ShareAll()
14203 self.needed_locks = {
14204 locking.LEVEL_INSTANCE: [],
14205 locking.LEVEL_NODEGROUP: [],
14206 locking.LEVEL_NODE: [],
14209 def DeclareLocks(self, level):
14210 if level == locking.LEVEL_INSTANCE:
14211 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14213 # Lock instances optimistically, needs verification once node and group
14214 # locks have been acquired
14215 self.needed_locks[locking.LEVEL_INSTANCE] = \
14216 self.cfg.GetNodeGroupInstances(self.group_uuid)
14218 elif level == locking.LEVEL_NODEGROUP:
14219 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14221 if self.req_target_uuids:
14222 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14224 # Lock all groups used by instances optimistically; this requires going
14225 # via the node before it's locked, requiring verification later on
14226 lock_groups.update(group_uuid
14227 for instance_name in
14228 self.owned_locks(locking.LEVEL_INSTANCE)
14230 self.cfg.GetInstanceNodeGroups(instance_name))
14232 # No target groups, need to lock all of them
14233 lock_groups = locking.ALL_SET
14235 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14237 elif level == locking.LEVEL_NODE:
14238 # This will only lock the nodes in the group to be evacuated which
14239 # contain actual instances
14240 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14241 self._LockInstancesNodes()
14243 # Lock all nodes in group to be evacuated and target groups
14244 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14245 assert self.group_uuid in owned_groups
14246 member_nodes = [node_name
14247 for group in owned_groups
14248 for node_name in self.cfg.GetNodeGroup(group).members]
14249 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14251 def CheckPrereq(self):
14252 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14253 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14254 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14256 assert owned_groups.issuperset(self.req_target_uuids)
14257 assert self.group_uuid in owned_groups
14259 # Check if locked instances are still correct
14260 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14262 # Get instance information
14263 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14265 # Check if node groups for locked instances are still correct
14266 _CheckInstancesNodeGroups(self.cfg, self.instances,
14267 owned_groups, owned_nodes, self.group_uuid)
14269 if self.req_target_uuids:
14270 # User requested specific target groups
14271 self.target_uuids = self.req_target_uuids
14273 # All groups except the one to be evacuated are potential targets
14274 self.target_uuids = [group_uuid for group_uuid in owned_groups
14275 if group_uuid != self.group_uuid]
14277 if not self.target_uuids:
14278 raise errors.OpPrereqError("There are no possible target groups",
14279 errors.ECODE_INVAL)
14281 def BuildHooksEnv(self):
14282 """Build hooks env.
14286 "GROUP_NAME": self.op.group_name,
14287 "TARGET_GROUPS": " ".join(self.target_uuids),
14290 def BuildHooksNodes(self):
14291 """Build hooks nodes.
14294 mn = self.cfg.GetMasterNode()
14296 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14298 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14300 return (run_nodes, run_nodes)
14302 def Exec(self, feedback_fn):
14303 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14305 assert self.group_uuid not in self.target_uuids
14307 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14308 instances=instances, target_groups=self.target_uuids)
14310 ial.Run(self.op.iallocator)
14312 if not ial.success:
14313 raise errors.OpPrereqError("Can't compute group evacuation using"
14314 " iallocator '%s': %s" %
14315 (self.op.iallocator, ial.info),
14316 errors.ECODE_NORES)
14318 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14320 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14321 len(jobs), self.op.group_name)
14323 return ResultWithJobs(jobs)
14326 class TagsLU(NoHooksLU): # pylint: disable=W0223
14327 """Generic tags LU.
14329 This is an abstract class which is the parent of all the other tags LUs.
14332 def ExpandNames(self):
14333 self.group_uuid = None
14334 self.needed_locks = {}
14336 if self.op.kind == constants.TAG_NODE:
14337 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14338 lock_level = locking.LEVEL_NODE
14339 lock_name = self.op.name
14340 elif self.op.kind == constants.TAG_INSTANCE:
14341 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14342 lock_level = locking.LEVEL_INSTANCE
14343 lock_name = self.op.name
14344 elif self.op.kind == constants.TAG_NODEGROUP:
14345 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14346 lock_level = locking.LEVEL_NODEGROUP
14347 lock_name = self.group_uuid
14352 if lock_level and getattr(self.op, "use_locking", True):
14353 self.needed_locks[lock_level] = lock_name
14355 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14356 # not possible to acquire the BGL based on opcode parameters)
14358 def CheckPrereq(self):
14359 """Check prerequisites.
14362 if self.op.kind == constants.TAG_CLUSTER:
14363 self.target = self.cfg.GetClusterInfo()
14364 elif self.op.kind == constants.TAG_NODE:
14365 self.target = self.cfg.GetNodeInfo(self.op.name)
14366 elif self.op.kind == constants.TAG_INSTANCE:
14367 self.target = self.cfg.GetInstanceInfo(self.op.name)
14368 elif self.op.kind == constants.TAG_NODEGROUP:
14369 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14371 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14372 str(self.op.kind), errors.ECODE_INVAL)
14375 class LUTagsGet(TagsLU):
14376 """Returns the tags of a given object.
14381 def ExpandNames(self):
14382 TagsLU.ExpandNames(self)
14384 # Share locks as this is only a read operation
14385 self.share_locks = _ShareAll()
14387 def Exec(self, feedback_fn):
14388 """Returns the tag list.
14391 return list(self.target.GetTags())
14394 class LUTagsSearch(NoHooksLU):
14395 """Searches the tags for a given pattern.
14400 def ExpandNames(self):
14401 self.needed_locks = {}
14403 def CheckPrereq(self):
14404 """Check prerequisites.
14406 This checks the pattern passed for validity by compiling it.
14410 self.re = re.compile(self.op.pattern)
14411 except re.error, err:
14412 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14413 (self.op.pattern, err), errors.ECODE_INVAL)
14415 def Exec(self, feedback_fn):
14416 """Returns the tag list.
14420 tgts = [("/cluster", cfg.GetClusterInfo())]
14421 ilist = cfg.GetAllInstancesInfo().values()
14422 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14423 nlist = cfg.GetAllNodesInfo().values()
14424 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14425 tgts.extend(("/nodegroup/%s" % n.name, n)
14426 for n in cfg.GetAllNodeGroupsInfo().values())
14428 for path, target in tgts:
14429 for tag in target.GetTags():
14430 if self.re.search(tag):
14431 results.append((path, tag))
14435 class LUTagsSet(TagsLU):
14436 """Sets a tag on a given object.
14441 def CheckPrereq(self):
14442 """Check prerequisites.
14444 This checks the type and length of the tag name and value.
14447 TagsLU.CheckPrereq(self)
14448 for tag in self.op.tags:
14449 objects.TaggableObject.ValidateTag(tag)
14451 def Exec(self, feedback_fn):
14456 for tag in self.op.tags:
14457 self.target.AddTag(tag)
14458 except errors.TagError, err:
14459 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14460 self.cfg.Update(self.target, feedback_fn)
14463 class LUTagsDel(TagsLU):
14464 """Delete a list of tags from a given object.
14469 def CheckPrereq(self):
14470 """Check prerequisites.
14472 This checks that we have the given tag.
14475 TagsLU.CheckPrereq(self)
14476 for tag in self.op.tags:
14477 objects.TaggableObject.ValidateTag(tag)
14478 del_tags = frozenset(self.op.tags)
14479 cur_tags = self.target.GetTags()
14481 diff_tags = del_tags - cur_tags
14483 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14484 raise errors.OpPrereqError("Tag(s) %s not found" %
14485 (utils.CommaJoin(diff_names), ),
14486 errors.ECODE_NOENT)
14488 def Exec(self, feedback_fn):
14489 """Remove the tag from the object.
14492 for tag in self.op.tags:
14493 self.target.RemoveTag(tag)
14494 self.cfg.Update(self.target, feedback_fn)
14497 class LUTestDelay(NoHooksLU):
14498 """Sleep for a specified amount of time.
14500 This LU sleeps on the master and/or nodes for a specified amount of
14506 def ExpandNames(self):
14507 """Expand names and set required locks.
14509 This expands the node list, if any.
14512 self.needed_locks = {}
14513 if self.op.on_nodes:
14514 # _GetWantedNodes can be used here, but is not always appropriate to use
14515 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14516 # more information.
14517 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14518 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14520 def _TestDelay(self):
14521 """Do the actual sleep.
14524 if self.op.on_master:
14525 if not utils.TestDelay(self.op.duration):
14526 raise errors.OpExecError("Error during master delay test")
14527 if self.op.on_nodes:
14528 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14529 for node, node_result in result.items():
14530 node_result.Raise("Failure during rpc call to node %s" % node)
14532 def Exec(self, feedback_fn):
14533 """Execute the test delay opcode, with the wanted repetitions.
14536 if self.op.repeat == 0:
14539 top_value = self.op.repeat - 1
14540 for i in range(self.op.repeat):
14541 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14545 class LUTestJqueue(NoHooksLU):
14546 """Utility LU to test some aspects of the job queue.
14551 # Must be lower than default timeout for WaitForJobChange to see whether it
14552 # notices changed jobs
14553 _CLIENT_CONNECT_TIMEOUT = 20.0
14554 _CLIENT_CONFIRM_TIMEOUT = 60.0
14557 def _NotifyUsingSocket(cls, cb, errcls):
14558 """Opens a Unix socket and waits for another program to connect.
14561 @param cb: Callback to send socket name to client
14562 @type errcls: class
14563 @param errcls: Exception class to use for errors
14566 # Using a temporary directory as there's no easy way to create temporary
14567 # sockets without writing a custom loop around tempfile.mktemp and
14569 tmpdir = tempfile.mkdtemp()
14571 tmpsock = utils.PathJoin(tmpdir, "sock")
14573 logging.debug("Creating temporary socket at %s", tmpsock)
14574 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14579 # Send details to client
14582 # Wait for client to connect before continuing
14583 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14585 (conn, _) = sock.accept()
14586 except socket.error, err:
14587 raise errcls("Client didn't connect in time (%s)" % err)
14591 # Remove as soon as client is connected
14592 shutil.rmtree(tmpdir)
14594 # Wait for client to close
14597 # pylint: disable=E1101
14598 # Instance of '_socketobject' has no ... member
14599 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14601 except socket.error, err:
14602 raise errcls("Client failed to confirm notification (%s)" % err)
14606 def _SendNotification(self, test, arg, sockname):
14607 """Sends a notification to the client.
14610 @param test: Test name
14611 @param arg: Test argument (depends on test)
14612 @type sockname: string
14613 @param sockname: Socket path
14616 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14618 def _Notify(self, prereq, test, arg):
14619 """Notifies the client of a test.
14622 @param prereq: Whether this is a prereq-phase test
14624 @param test: Test name
14625 @param arg: Test argument (depends on test)
14629 errcls = errors.OpPrereqError
14631 errcls = errors.OpExecError
14633 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14637 def CheckArguments(self):
14638 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14639 self.expandnames_calls = 0
14641 def ExpandNames(self):
14642 checkargs_calls = getattr(self, "checkargs_calls", 0)
14643 if checkargs_calls < 1:
14644 raise errors.ProgrammerError("CheckArguments was not called")
14646 self.expandnames_calls += 1
14648 if self.op.notify_waitlock:
14649 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14651 self.LogInfo("Expanding names")
14653 # Get lock on master node (just to get a lock, not for a particular reason)
14654 self.needed_locks = {
14655 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14658 def Exec(self, feedback_fn):
14659 if self.expandnames_calls < 1:
14660 raise errors.ProgrammerError("ExpandNames was not called")
14662 if self.op.notify_exec:
14663 self._Notify(False, constants.JQT_EXEC, None)
14665 self.LogInfo("Executing")
14667 if self.op.log_messages:
14668 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14669 for idx, msg in enumerate(self.op.log_messages):
14670 self.LogInfo("Sending log message %s", idx + 1)
14671 feedback_fn(constants.JQT_MSGPREFIX + msg)
14672 # Report how many test messages have been sent
14673 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14676 raise errors.OpExecError("Opcode failure was requested")
14681 class IAllocator(object):
14682 """IAllocator framework.
14684 An IAllocator instance has three sets of attributes:
14685 - cfg that is needed to query the cluster
14686 - input data (all members of the _KEYS class attribute are required)
14687 - four buffer attributes (in|out_data|text), that represent the
14688 input (to the external script) in text and data structure format,
14689 and the output from it, again in two formats
14690 - the result variables from the script (success, info, nodes) for
14694 # pylint: disable=R0902
14695 # lots of instance attributes
14697 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14699 self.rpc = rpc_runner
14700 # init buffer variables
14701 self.in_text = self.out_text = self.in_data = self.out_data = None
14702 # init all input fields so that pylint is happy
14704 self.memory = self.disks = self.disk_template = self.spindle_use = None
14705 self.os = self.tags = self.nics = self.vcpus = None
14706 self.hypervisor = None
14707 self.relocate_from = None
14709 self.instances = None
14710 self.evac_mode = None
14711 self.target_groups = []
14713 self.required_nodes = None
14714 # init result fields
14715 self.success = self.info = self.result = None
14718 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14720 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14721 " IAllocator" % self.mode)
14723 keyset = [n for (n, _) in keydata]
14726 if key not in keyset:
14727 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14728 " IAllocator" % key)
14729 setattr(self, key, kwargs[key])
14732 if key not in kwargs:
14733 raise errors.ProgrammerError("Missing input parameter '%s' to"
14734 " IAllocator" % key)
14735 self._BuildInputData(compat.partial(fn, self), keydata)
14737 def _ComputeClusterData(self):
14738 """Compute the generic allocator input data.
14740 This is the data that is independent of the actual operation.
14744 cluster_info = cfg.GetClusterInfo()
14747 "version": constants.IALLOCATOR_VERSION,
14748 "cluster_name": cfg.GetClusterName(),
14749 "cluster_tags": list(cluster_info.GetTags()),
14750 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14751 "ipolicy": cluster_info.ipolicy,
14753 ninfo = cfg.GetAllNodesInfo()
14754 iinfo = cfg.GetAllInstancesInfo().values()
14755 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14758 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14760 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14761 hypervisor_name = self.hypervisor
14762 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14763 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14765 hypervisor_name = cluster_info.primary_hypervisor
14767 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14770 self.rpc.call_all_instances_info(node_list,
14771 cluster_info.enabled_hypervisors)
14773 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14775 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14776 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14777 i_list, config_ndata)
14778 assert len(data["nodes"]) == len(ninfo), \
14779 "Incomplete node data computed"
14781 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14783 self.in_data = data
14786 def _ComputeNodeGroupData(cfg):
14787 """Compute node groups data.
14790 cluster = cfg.GetClusterInfo()
14791 ng = dict((guuid, {
14792 "name": gdata.name,
14793 "alloc_policy": gdata.alloc_policy,
14794 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14796 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14801 def _ComputeBasicNodeData(cfg, node_cfg):
14802 """Compute global node data.
14805 @returns: a dict of name: (node dict, node config)
14808 # fill in static (config-based) values
14809 node_results = dict((ninfo.name, {
14810 "tags": list(ninfo.GetTags()),
14811 "primary_ip": ninfo.primary_ip,
14812 "secondary_ip": ninfo.secondary_ip,
14813 "offline": ninfo.offline,
14814 "drained": ninfo.drained,
14815 "master_candidate": ninfo.master_candidate,
14816 "group": ninfo.group,
14817 "master_capable": ninfo.master_capable,
14818 "vm_capable": ninfo.vm_capable,
14819 "ndparams": cfg.GetNdParams(ninfo),
14821 for ninfo in node_cfg.values())
14823 return node_results
14826 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14828 """Compute global node data.
14830 @param node_results: the basic node structures as filled from the config
14833 #TODO(dynmem): compute the right data on MAX and MIN memory
14834 # make a copy of the current dict
14835 node_results = dict(node_results)
14836 for nname, nresult in node_data.items():
14837 assert nname in node_results, "Missing basic data for node %s" % nname
14838 ninfo = node_cfg[nname]
14840 if not (ninfo.offline or ninfo.drained):
14841 nresult.Raise("Can't get data for node %s" % nname)
14842 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14844 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14846 for attr in ["memory_total", "memory_free", "memory_dom0",
14847 "vg_size", "vg_free", "cpu_total"]:
14848 if attr not in remote_info:
14849 raise errors.OpExecError("Node '%s' didn't return attribute"
14850 " '%s'" % (nname, attr))
14851 if not isinstance(remote_info[attr], int):
14852 raise errors.OpExecError("Node '%s' returned invalid value"
14854 (nname, attr, remote_info[attr]))
14855 # compute memory used by primary instances
14856 i_p_mem = i_p_up_mem = 0
14857 for iinfo, beinfo in i_list:
14858 if iinfo.primary_node == nname:
14859 i_p_mem += beinfo[constants.BE_MAXMEM]
14860 if iinfo.name not in node_iinfo[nname].payload:
14863 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14864 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14865 remote_info["memory_free"] -= max(0, i_mem_diff)
14867 if iinfo.admin_state == constants.ADMINST_UP:
14868 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14870 # compute memory used by instances
14872 "total_memory": remote_info["memory_total"],
14873 "reserved_memory": remote_info["memory_dom0"],
14874 "free_memory": remote_info["memory_free"],
14875 "total_disk": remote_info["vg_size"],
14876 "free_disk": remote_info["vg_free"],
14877 "total_cpus": remote_info["cpu_total"],
14878 "i_pri_memory": i_p_mem,
14879 "i_pri_up_memory": i_p_up_mem,
14881 pnr_dyn.update(node_results[nname])
14882 node_results[nname] = pnr_dyn
14884 return node_results
14887 def _ComputeInstanceData(cluster_info, i_list):
14888 """Compute global instance data.
14892 for iinfo, beinfo in i_list:
14894 for nic in iinfo.nics:
14895 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14899 "mode": filled_params[constants.NIC_MODE],
14900 "link": filled_params[constants.NIC_LINK],
14902 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14903 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14904 nic_data.append(nic_dict)
14906 "tags": list(iinfo.GetTags()),
14907 "admin_state": iinfo.admin_state,
14908 "vcpus": beinfo[constants.BE_VCPUS],
14909 "memory": beinfo[constants.BE_MAXMEM],
14910 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14912 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14914 "disks": [{constants.IDISK_SIZE: dsk.size,
14915 constants.IDISK_MODE: dsk.mode}
14916 for dsk in iinfo.disks],
14917 "disk_template": iinfo.disk_template,
14918 "hypervisor": iinfo.hypervisor,
14920 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14922 instance_data[iinfo.name] = pir
14924 return instance_data
14926 def _AddNewInstance(self):
14927 """Add new instance data to allocator structure.
14929 This in combination with _AllocatorGetClusterData will create the
14930 correct structure needed as input for the allocator.
14932 The checks for the completeness of the opcode must have already been
14936 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14938 if self.disk_template in constants.DTS_INT_MIRROR:
14939 self.required_nodes = 2
14941 self.required_nodes = 1
14945 "disk_template": self.disk_template,
14948 "vcpus": self.vcpus,
14949 "memory": self.memory,
14950 "spindle_use": self.spindle_use,
14951 "disks": self.disks,
14952 "disk_space_total": disk_space,
14954 "required_nodes": self.required_nodes,
14955 "hypervisor": self.hypervisor,
14960 def _AddRelocateInstance(self):
14961 """Add relocate instance data to allocator structure.
14963 This in combination with _IAllocatorGetClusterData will create the
14964 correct structure needed as input for the allocator.
14966 The checks for the completeness of the opcode must have already been
14970 instance = self.cfg.GetInstanceInfo(self.name)
14971 if instance is None:
14972 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14973 " IAllocator" % self.name)
14975 if instance.disk_template not in constants.DTS_MIRRORED:
14976 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14977 errors.ECODE_INVAL)
14979 if instance.disk_template in constants.DTS_INT_MIRROR and \
14980 len(instance.secondary_nodes) != 1:
14981 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14982 errors.ECODE_STATE)
14984 self.required_nodes = 1
14985 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14986 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14990 "disk_space_total": disk_space,
14991 "required_nodes": self.required_nodes,
14992 "relocate_from": self.relocate_from,
14996 def _AddNodeEvacuate(self):
14997 """Get data for node-evacuate requests.
15001 "instances": self.instances,
15002 "evac_mode": self.evac_mode,
15005 def _AddChangeGroup(self):
15006 """Get data for node-evacuate requests.
15010 "instances": self.instances,
15011 "target_groups": self.target_groups,
15014 def _BuildInputData(self, fn, keydata):
15015 """Build input data structures.
15018 self._ComputeClusterData()
15021 request["type"] = self.mode
15022 for keyname, keytype in keydata:
15023 if keyname not in request:
15024 raise errors.ProgrammerError("Request parameter %s is missing" %
15026 val = request[keyname]
15027 if not keytype(val):
15028 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15029 " validation, value %s, expected"
15030 " type %s" % (keyname, val, keytype))
15031 self.in_data["request"] = request
15033 self.in_text = serializer.Dump(self.in_data)
15035 _STRING_LIST = ht.TListOf(ht.TString)
15036 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15037 # pylint: disable=E1101
15038 # Class '...' has no 'OP_ID' member
15039 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15040 opcodes.OpInstanceMigrate.OP_ID,
15041 opcodes.OpInstanceReplaceDisks.OP_ID])
15045 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15046 ht.TItems([ht.TNonEmptyString,
15047 ht.TNonEmptyString,
15048 ht.TListOf(ht.TNonEmptyString),
15051 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15052 ht.TItems([ht.TNonEmptyString,
15055 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15056 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15059 constants.IALLOCATOR_MODE_ALLOC:
15062 ("name", ht.TString),
15063 ("memory", ht.TInt),
15064 ("spindle_use", ht.TInt),
15065 ("disks", ht.TListOf(ht.TDict)),
15066 ("disk_template", ht.TString),
15067 ("os", ht.TString),
15068 ("tags", _STRING_LIST),
15069 ("nics", ht.TListOf(ht.TDict)),
15070 ("vcpus", ht.TInt),
15071 ("hypervisor", ht.TString),
15073 constants.IALLOCATOR_MODE_RELOC:
15074 (_AddRelocateInstance,
15075 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15077 constants.IALLOCATOR_MODE_NODE_EVAC:
15078 (_AddNodeEvacuate, [
15079 ("instances", _STRING_LIST),
15080 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15082 constants.IALLOCATOR_MODE_CHG_GROUP:
15083 (_AddChangeGroup, [
15084 ("instances", _STRING_LIST),
15085 ("target_groups", _STRING_LIST),
15089 def Run(self, name, validate=True, call_fn=None):
15090 """Run an instance allocator and return the results.
15093 if call_fn is None:
15094 call_fn = self.rpc.call_iallocator_runner
15096 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15097 result.Raise("Failure while running the iallocator script")
15099 self.out_text = result.payload
15101 self._ValidateResult()
15103 def _ValidateResult(self):
15104 """Process the allocator results.
15106 This will process and if successful save the result in
15107 self.out_data and the other parameters.
15111 rdict = serializer.Load(self.out_text)
15112 except Exception, err:
15113 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15115 if not isinstance(rdict, dict):
15116 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15118 # TODO: remove backwards compatiblity in later versions
15119 if "nodes" in rdict and "result" not in rdict:
15120 rdict["result"] = rdict["nodes"]
15123 for key in "success", "info", "result":
15124 if key not in rdict:
15125 raise errors.OpExecError("Can't parse iallocator results:"
15126 " missing key '%s'" % key)
15127 setattr(self, key, rdict[key])
15129 if not self._result_check(self.result):
15130 raise errors.OpExecError("Iallocator returned invalid result,"
15131 " expected %s, got %s" %
15132 (self._result_check, self.result),
15133 errors.ECODE_INVAL)
15135 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15136 assert self.relocate_from is not None
15137 assert self.required_nodes == 1
15139 node2group = dict((name, ndata["group"])
15140 for (name, ndata) in self.in_data["nodes"].items())
15142 fn = compat.partial(self._NodesToGroups, node2group,
15143 self.in_data["nodegroups"])
15145 instance = self.cfg.GetInstanceInfo(self.name)
15146 request_groups = fn(self.relocate_from + [instance.primary_node])
15147 result_groups = fn(rdict["result"] + [instance.primary_node])
15149 if self.success and not set(result_groups).issubset(request_groups):
15150 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15151 " differ from original groups (%s)" %
15152 (utils.CommaJoin(result_groups),
15153 utils.CommaJoin(request_groups)))
15155 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15156 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15158 self.out_data = rdict
15161 def _NodesToGroups(node2group, groups, nodes):
15162 """Returns a list of unique group names for a list of nodes.
15164 @type node2group: dict
15165 @param node2group: Map from node name to group UUID
15167 @param groups: Group information
15169 @param nodes: Node names
15176 group_uuid = node2group[node]
15178 # Ignore unknown node
15182 group = groups[group_uuid]
15184 # Can't find group, let's use UUID
15185 group_name = group_uuid
15187 group_name = group["name"]
15189 result.add(group_name)
15191 return sorted(result)
15194 class LUTestAllocator(NoHooksLU):
15195 """Run allocator tests.
15197 This LU runs the allocator tests
15200 def CheckPrereq(self):
15201 """Check prerequisites.
15203 This checks the opcode parameters depending on the director and mode test.
15206 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15207 for attr in ["memory", "disks", "disk_template",
15208 "os", "tags", "nics", "vcpus"]:
15209 if not hasattr(self.op, attr):
15210 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15211 attr, errors.ECODE_INVAL)
15212 iname = self.cfg.ExpandInstanceName(self.op.name)
15213 if iname is not None:
15214 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15215 iname, errors.ECODE_EXISTS)
15216 if not isinstance(self.op.nics, list):
15217 raise errors.OpPrereqError("Invalid parameter 'nics'",
15218 errors.ECODE_INVAL)
15219 if not isinstance(self.op.disks, list):
15220 raise errors.OpPrereqError("Invalid parameter 'disks'",
15221 errors.ECODE_INVAL)
15222 for row in self.op.disks:
15223 if (not isinstance(row, dict) or
15224 constants.IDISK_SIZE not in row or
15225 not isinstance(row[constants.IDISK_SIZE], int) or
15226 constants.IDISK_MODE not in row or
15227 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15228 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15229 " parameter", errors.ECODE_INVAL)
15230 if self.op.hypervisor is None:
15231 self.op.hypervisor = self.cfg.GetHypervisorType()
15232 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15233 fname = _ExpandInstanceName(self.cfg, self.op.name)
15234 self.op.name = fname
15235 self.relocate_from = \
15236 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15237 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15238 constants.IALLOCATOR_MODE_NODE_EVAC):
15239 if not self.op.instances:
15240 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15241 self.op.instances = _GetWantedInstances(self, self.op.instances)
15243 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15244 self.op.mode, errors.ECODE_INVAL)
15246 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15247 if self.op.allocator is None:
15248 raise errors.OpPrereqError("Missing allocator name",
15249 errors.ECODE_INVAL)
15250 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15251 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15252 self.op.direction, errors.ECODE_INVAL)
15254 def Exec(self, feedback_fn):
15255 """Run the allocator test.
15258 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15259 ial = IAllocator(self.cfg, self.rpc,
15262 memory=self.op.memory,
15263 disks=self.op.disks,
15264 disk_template=self.op.disk_template,
15268 vcpus=self.op.vcpus,
15269 hypervisor=self.op.hypervisor,
15271 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15272 ial = IAllocator(self.cfg, self.rpc,
15275 relocate_from=list(self.relocate_from),
15277 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15278 ial = IAllocator(self.cfg, self.rpc,
15280 instances=self.op.instances,
15281 target_groups=self.op.target_groups)
15282 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15283 ial = IAllocator(self.cfg, self.rpc,
15285 instances=self.op.instances,
15286 evac_mode=self.op.evac_mode)
15288 raise errors.ProgrammerError("Uncatched mode %s in"
15289 " LUTestAllocator.Exec", self.op.mode)
15291 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15292 result = ial.in_text
15294 ial.Run(self.op.allocator, validate=False)
15295 result = ial.out_text
15299 #: Query type implementations
15301 constants.QR_CLUSTER: _ClusterQuery,
15302 constants.QR_INSTANCE: _InstanceQuery,
15303 constants.QR_NODE: _NodeQuery,
15304 constants.QR_GROUP: _GroupQuery,
15305 constants.QR_OS: _OsQuery,
15306 constants.QR_EXPORT: _ExportQuery,
15309 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15312 def _GetQueryImplementation(name):
15313 """Returns the implemtnation for a query type.
15315 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15319 return _QUERY_IMPL[name]
15321 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15322 errors.ECODE_INVAL)