4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
645 """Checks if the owned node groups are still correct for an instance.
647 @type cfg: L{config.ConfigWriter}
648 @param cfg: The cluster configuration
649 @type instance_name: string
650 @param instance_name: Instance name
651 @type owned_groups: set or frozenset
652 @param owned_groups: List of currently owned node groups
653 @type primary_only: boolean
654 @param primary_only: Whether to check node groups for only the primary node
657 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
659 if not owned_groups.issuperset(inst_groups):
660 raise errors.OpPrereqError("Instance %s's node groups changed since"
661 " locks were acquired, current groups are"
662 " are '%s', owning groups '%s'; retry the"
665 utils.CommaJoin(inst_groups),
666 utils.CommaJoin(owned_groups)),
672 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
673 """Checks if the instances in a node group are still correct.
675 @type cfg: L{config.ConfigWriter}
676 @param cfg: The cluster configuration
677 @type group_uuid: string
678 @param group_uuid: Node group UUID
679 @type owned_instances: set or frozenset
680 @param owned_instances: List of currently owned instances
683 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
684 if owned_instances != wanted_instances:
685 raise errors.OpPrereqError("Instances in node group '%s' changed since"
686 " locks were acquired, wanted '%s', have '%s';"
687 " retry the operation" %
689 utils.CommaJoin(wanted_instances),
690 utils.CommaJoin(owned_instances)),
693 return wanted_instances
696 def _SupportsOob(cfg, node):
697 """Tells if node supports OOB.
699 @type cfg: L{config.ConfigWriter}
700 @param cfg: The cluster configuration
701 @type node: L{objects.Node}
702 @param node: The node
703 @return: The OOB script if supported or an empty string otherwise
706 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
709 def _GetWantedNodes(lu, nodes):
710 """Returns list of checked and expanded node names.
712 @type lu: L{LogicalUnit}
713 @param lu: the logical unit on whose behalf we execute
715 @param nodes: list of node names or None for all nodes
717 @return: the list of nodes, sorted
718 @raise errors.ProgrammerError: if the nodes parameter is wrong type
722 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
724 return utils.NiceSort(lu.cfg.GetNodeList())
727 def _GetWantedInstances(lu, instances):
728 """Returns list of checked and expanded instance names.
730 @type lu: L{LogicalUnit}
731 @param lu: the logical unit on whose behalf we execute
732 @type instances: list
733 @param instances: list of instance names or None for all instances
735 @return: the list of instances, sorted
736 @raise errors.OpPrereqError: if the instances parameter is wrong type
737 @raise errors.OpPrereqError: if any of the passed instances is not found
741 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
743 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
747 def _GetUpdatedParams(old_params, update_dict,
748 use_default=True, use_none=False):
749 """Return the new version of a parameter dictionary.
751 @type old_params: dict
752 @param old_params: old parameters
753 @type update_dict: dict
754 @param update_dict: dict containing new parameter values, or
755 constants.VALUE_DEFAULT to reset the parameter to its default
757 @param use_default: boolean
758 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
759 values as 'to be deleted' values
760 @param use_none: boolean
761 @type use_none: whether to recognise C{None} values as 'to be
764 @return: the new parameter dictionary
767 params_copy = copy.deepcopy(old_params)
768 for key, val in update_dict.iteritems():
769 if ((use_default and val == constants.VALUE_DEFAULT) or
770 (use_none and val is None)):
776 params_copy[key] = val
780 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
781 """Return the new version of a instance policy.
783 @param group_policy: whether this policy applies to a group and thus
784 we should support removal of policy entries
787 use_none = use_default = group_policy
788 ipolicy = copy.deepcopy(old_ipolicy)
789 for key, value in new_ipolicy.items():
790 if key not in constants.IPOLICY_ALL_KEYS:
791 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
793 if key in constants.IPOLICY_ISPECS:
794 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
795 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
797 use_default=use_default)
799 if (not value or value == [constants.VALUE_DEFAULT] or
800 value == constants.VALUE_DEFAULT):
804 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
805 " on the cluster'" % key,
808 if key in constants.IPOLICY_PARAMETERS:
809 # FIXME: we assume all such values are float
811 ipolicy[key] = float(value)
812 except (TypeError, ValueError), err:
813 raise errors.OpPrereqError("Invalid value for attribute"
814 " '%s': '%s', error: %s" %
815 (key, value, err), errors.ECODE_INVAL)
817 # FIXME: we assume all others are lists; this should be redone
819 ipolicy[key] = list(value)
821 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
822 except errors.ConfigurationError, err:
823 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
828 def _UpdateAndVerifySubDict(base, updates, type_check):
829 """Updates and verifies a dict with sub dicts of the same type.
831 @param base: The dict with the old data
832 @param updates: The dict with the new data
833 @param type_check: Dict suitable to ForceDictType to verify correct types
834 @returns: A new dict with updated and verified values
838 new = _GetUpdatedParams(old, value)
839 utils.ForceDictType(new, type_check)
842 ret = copy.deepcopy(base)
843 ret.update(dict((key, fn(base.get(key, {}), value))
844 for key, value in updates.items()))
848 def _MergeAndVerifyHvState(op_input, obj_input):
849 """Combines the hv state from an opcode with the one of the object
851 @param op_input: The input dict from the opcode
852 @param obj_input: The input dict from the objects
853 @return: The verified and updated dict
857 invalid_hvs = set(op_input) - constants.HYPER_TYPES
859 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
860 " %s" % utils.CommaJoin(invalid_hvs),
862 if obj_input is None:
864 type_check = constants.HVSTS_PARAMETER_TYPES
865 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
870 def _MergeAndVerifyDiskState(op_input, obj_input):
871 """Combines the disk state from an opcode with the one of the object
873 @param op_input: The input dict from the opcode
874 @param obj_input: The input dict from the objects
875 @return: The verified and updated dict
878 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
880 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
881 utils.CommaJoin(invalid_dst),
883 type_check = constants.DSS_PARAMETER_TYPES
884 if obj_input is None:
886 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
888 for key, value in op_input.items())
893 def _ReleaseLocks(lu, level, names=None, keep=None):
894 """Releases locks owned by an LU.
896 @type lu: L{LogicalUnit}
897 @param level: Lock level
898 @type names: list or None
899 @param names: Names of locks to release
900 @type keep: list or None
901 @param keep: Names of locks to retain
904 assert not (keep is not None and names is not None), \
905 "Only one of the 'names' and the 'keep' parameters can be given"
907 if names is not None:
908 should_release = names.__contains__
910 should_release = lambda name: name not in keep
912 should_release = None
914 owned = lu.owned_locks(level)
916 # Not owning any lock at this level, do nothing
923 # Determine which locks to release
925 if should_release(name):
930 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
932 # Release just some locks
933 lu.glm.release(level, names=release)
935 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
938 lu.glm.release(level)
940 assert not lu.glm.is_owned(level), "No locks should be owned"
943 def _MapInstanceDisksToNodes(instances):
944 """Creates a map from (node, volume) to instance name.
946 @type instances: list of L{objects.Instance}
947 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
950 return dict(((node, vol), inst.name)
951 for inst in instances
952 for (node, vols) in inst.MapLVsByNode().items()
956 def _RunPostHook(lu, node_name):
957 """Runs the post-hook for an opcode on a single node.
960 hm = lu.proc.BuildHooksManager(lu)
962 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
964 # pylint: disable=W0702
965 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
968 def _CheckOutputFields(static, dynamic, selected):
969 """Checks whether all selected fields are valid.
971 @type static: L{utils.FieldSet}
972 @param static: static fields set
973 @type dynamic: L{utils.FieldSet}
974 @param dynamic: dynamic fields set
981 delta = f.NonMatching(selected)
983 raise errors.OpPrereqError("Unknown output fields selected: %s"
984 % ",".join(delta), errors.ECODE_INVAL)
987 def _CheckGlobalHvParams(params):
988 """Validates that given hypervisor params are not global ones.
990 This will ensure that instances don't get customised versions of
994 used_globals = constants.HVC_GLOBALS.intersection(params)
996 msg = ("The following hypervisor parameters are global and cannot"
997 " be customized at instance level, please modify them at"
998 " cluster level: %s" % utils.CommaJoin(used_globals))
999 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1002 def _CheckNodeOnline(lu, node, msg=None):
1003 """Ensure that a given node is online.
1005 @param lu: the LU on behalf of which we make the check
1006 @param node: the node to check
1007 @param msg: if passed, should be a message to replace the default one
1008 @raise errors.OpPrereqError: if the node is offline
1012 msg = "Can't use offline node"
1013 if lu.cfg.GetNodeInfo(node).offline:
1014 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1017 def _CheckNodeNotDrained(lu, node):
1018 """Ensure that a given node is not drained.
1020 @param lu: the LU on behalf of which we make the check
1021 @param node: the node to check
1022 @raise errors.OpPrereqError: if the node is drained
1025 if lu.cfg.GetNodeInfo(node).drained:
1026 raise errors.OpPrereqError("Can't use drained node %s" % node,
1030 def _CheckNodeVmCapable(lu, node):
1031 """Ensure that a given node is vm capable.
1033 @param lu: the LU on behalf of which we make the check
1034 @param node: the node to check
1035 @raise errors.OpPrereqError: if the node is not vm capable
1038 if not lu.cfg.GetNodeInfo(node).vm_capable:
1039 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1043 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1044 """Ensure that a node supports a given OS.
1046 @param lu: the LU on behalf of which we make the check
1047 @param node: the node to check
1048 @param os_name: the OS to query about
1049 @param force_variant: whether to ignore variant errors
1050 @raise errors.OpPrereqError: if the node is not supporting the OS
1053 result = lu.rpc.call_os_get(node, os_name)
1054 result.Raise("OS '%s' not in supported OS list for node %s" %
1056 prereq=True, ecode=errors.ECODE_INVAL)
1057 if not force_variant:
1058 _CheckOSVariant(result.payload, os_name)
1061 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1062 """Ensure that a node has the given secondary ip.
1064 @type lu: L{LogicalUnit}
1065 @param lu: the LU on behalf of which we make the check
1067 @param node: the node to check
1068 @type secondary_ip: string
1069 @param secondary_ip: the ip to check
1070 @type prereq: boolean
1071 @param prereq: whether to throw a prerequisite or an execute error
1072 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1073 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1076 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1077 result.Raise("Failure checking secondary ip on node %s" % node,
1078 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1079 if not result.payload:
1080 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1081 " please fix and re-run this command" % secondary_ip)
1083 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1085 raise errors.OpExecError(msg)
1088 def _GetClusterDomainSecret():
1089 """Reads the cluster domain secret.
1092 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1096 def _CheckInstanceState(lu, instance, req_states, msg=None):
1097 """Ensure that an instance is in one of the required states.
1099 @param lu: the LU on behalf of which we make the check
1100 @param instance: the instance to check
1101 @param msg: if passed, should be a message to replace the default one
1102 @raise errors.OpPrereqError: if the instance is not in the required state
1106 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1107 if instance.admin_state not in req_states:
1108 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1109 (instance.name, instance.admin_state, msg),
1112 if constants.ADMINST_UP not in req_states:
1113 pnode = instance.primary_node
1114 if not lu.cfg.GetNodeInfo(pnode).offline:
1115 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1116 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1117 prereq=True, ecode=errors.ECODE_ENVIRON)
1118 if instance.name in ins_l.payload:
1119 raise errors.OpPrereqError("Instance %s is running, %s" %
1120 (instance.name, msg), errors.ECODE_STATE)
1122 lu.LogWarning("Primary node offline, ignoring check that instance"
1126 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1127 """Computes if value is in the desired range.
1129 @param name: name of the parameter for which we perform the check
1130 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1132 @param ipolicy: dictionary containing min, max and std values
1133 @param value: actual value that we want to use
1134 @return: None or element not meeting the criteria
1138 if value in [None, constants.VALUE_AUTO]:
1140 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1141 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1142 if value > max_v or min_v > value:
1144 fqn = "%s/%s" % (name, qualifier)
1147 return ("%s value %s is not in range [%s, %s]" %
1148 (fqn, value, min_v, max_v))
1152 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1153 nic_count, disk_sizes, spindle_use,
1154 _compute_fn=_ComputeMinMaxSpec):
1155 """Verifies ipolicy against provided specs.
1158 @param ipolicy: The ipolicy
1160 @param mem_size: The memory size
1161 @type cpu_count: int
1162 @param cpu_count: Used cpu cores
1163 @type disk_count: int
1164 @param disk_count: Number of disks used
1165 @type nic_count: int
1166 @param nic_count: Number of nics used
1167 @type disk_sizes: list of ints
1168 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1169 @type spindle_use: int
1170 @param spindle_use: The number of spindles this instance uses
1171 @param _compute_fn: The compute function (unittest only)
1172 @return: A list of violations, or an empty list of no violations are found
1175 assert disk_count == len(disk_sizes)
1178 (constants.ISPEC_MEM_SIZE, "", mem_size),
1179 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1180 (constants.ISPEC_DISK_COUNT, "", disk_count),
1181 (constants.ISPEC_NIC_COUNT, "", nic_count),
1182 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1183 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1184 for idx, d in enumerate(disk_sizes)]
1187 (_compute_fn(name, qualifier, ipolicy, value)
1188 for (name, qualifier, value) in test_settings))
1191 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1192 _compute_fn=_ComputeIPolicySpecViolation):
1193 """Compute if instance meets the specs of ipolicy.
1196 @param ipolicy: The ipolicy to verify against
1197 @type instance: L{objects.Instance}
1198 @param instance: The instance to verify
1199 @param _compute_fn: The function to verify ipolicy (unittest only)
1200 @see: L{_ComputeIPolicySpecViolation}
1203 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1204 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1205 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1206 disk_count = len(instance.disks)
1207 disk_sizes = [disk.size for disk in instance.disks]
1208 nic_count = len(instance.nics)
1210 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1211 disk_sizes, spindle_use)
1214 def _ComputeIPolicyInstanceSpecViolation(
1215 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1216 """Compute if instance specs meets the specs of ipolicy.
1219 @param ipolicy: The ipolicy to verify against
1220 @param instance_spec: dict
1221 @param instance_spec: The instance spec to verify
1222 @param _compute_fn: The function to verify ipolicy (unittest only)
1223 @see: L{_ComputeIPolicySpecViolation}
1226 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1227 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1228 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1229 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1230 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1231 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1233 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1234 disk_sizes, spindle_use)
1237 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1239 _compute_fn=_ComputeIPolicyInstanceViolation):
1240 """Compute if instance meets the specs of the new target group.
1242 @param ipolicy: The ipolicy to verify
1243 @param instance: The instance object to verify
1244 @param current_group: The current group of the instance
1245 @param target_group: The new group of the instance
1246 @param _compute_fn: The function to verify ipolicy (unittest only)
1247 @see: L{_ComputeIPolicySpecViolation}
1250 if current_group == target_group:
1253 return _compute_fn(ipolicy, instance)
1256 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1257 _compute_fn=_ComputeIPolicyNodeViolation):
1258 """Checks that the target node is correct in terms of instance policy.
1260 @param ipolicy: The ipolicy to verify
1261 @param instance: The instance object to verify
1262 @param node: The new node to relocate
1263 @param ignore: Ignore violations of the ipolicy
1264 @param _compute_fn: The function to verify ipolicy (unittest only)
1265 @see: L{_ComputeIPolicySpecViolation}
1268 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1269 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1272 msg = ("Instance does not meet target node group's (%s) instance"
1273 " policy: %s") % (node.group, utils.CommaJoin(res))
1277 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1280 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1281 """Computes a set of any instances that would violate the new ipolicy.
1283 @param old_ipolicy: The current (still in-place) ipolicy
1284 @param new_ipolicy: The new (to become) ipolicy
1285 @param instances: List of instances to verify
1286 @return: A list of instances which violates the new ipolicy but
1290 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1291 _ComputeViolatingInstances(old_ipolicy, instances))
1294 def _ExpandItemName(fn, name, kind):
1295 """Expand an item name.
1297 @param fn: the function to use for expansion
1298 @param name: requested item name
1299 @param kind: text description ('Node' or 'Instance')
1300 @return: the resolved (full) name
1301 @raise errors.OpPrereqError: if the item is not found
1304 full_name = fn(name)
1305 if full_name is None:
1306 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1311 def _ExpandNodeName(cfg, name):
1312 """Wrapper over L{_ExpandItemName} for nodes."""
1313 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1316 def _ExpandInstanceName(cfg, name):
1317 """Wrapper over L{_ExpandItemName} for instance."""
1318 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1321 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1322 minmem, maxmem, vcpus, nics, disk_template, disks,
1323 bep, hvp, hypervisor_name, tags):
1324 """Builds instance related env variables for hooks
1326 This builds the hook environment from individual variables.
1329 @param name: the name of the instance
1330 @type primary_node: string
1331 @param primary_node: the name of the instance's primary node
1332 @type secondary_nodes: list
1333 @param secondary_nodes: list of secondary nodes as strings
1334 @type os_type: string
1335 @param os_type: the name of the instance's OS
1336 @type status: string
1337 @param status: the desired status of the instance
1338 @type minmem: string
1339 @param minmem: the minimum memory size of the instance
1340 @type maxmem: string
1341 @param maxmem: the maximum memory size of the instance
1343 @param vcpus: the count of VCPUs the instance has
1345 @param nics: list of tuples (ip, mac, mode, link) representing
1346 the NICs the instance has
1347 @type disk_template: string
1348 @param disk_template: the disk template of the instance
1350 @param disks: the list of (size, mode) pairs
1352 @param bep: the backend parameters for the instance
1354 @param hvp: the hypervisor parameters for the instance
1355 @type hypervisor_name: string
1356 @param hypervisor_name: the hypervisor for the instance
1358 @param tags: list of instance tags as strings
1360 @return: the hook environment for this instance
1365 "INSTANCE_NAME": name,
1366 "INSTANCE_PRIMARY": primary_node,
1367 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1368 "INSTANCE_OS_TYPE": os_type,
1369 "INSTANCE_STATUS": status,
1370 "INSTANCE_MINMEM": minmem,
1371 "INSTANCE_MAXMEM": maxmem,
1372 # TODO(2.7) remove deprecated "memory" value
1373 "INSTANCE_MEMORY": maxmem,
1374 "INSTANCE_VCPUS": vcpus,
1375 "INSTANCE_DISK_TEMPLATE": disk_template,
1376 "INSTANCE_HYPERVISOR": hypervisor_name,
1379 nic_count = len(nics)
1380 for idx, (ip, mac, mode, link) in enumerate(nics):
1383 env["INSTANCE_NIC%d_IP" % idx] = ip
1384 env["INSTANCE_NIC%d_MAC" % idx] = mac
1385 env["INSTANCE_NIC%d_MODE" % idx] = mode
1386 env["INSTANCE_NIC%d_LINK" % idx] = link
1387 if mode == constants.NIC_MODE_BRIDGED:
1388 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1392 env["INSTANCE_NIC_COUNT"] = nic_count
1395 disk_count = len(disks)
1396 for idx, (size, mode) in enumerate(disks):
1397 env["INSTANCE_DISK%d_SIZE" % idx] = size
1398 env["INSTANCE_DISK%d_MODE" % idx] = mode
1402 env["INSTANCE_DISK_COUNT"] = disk_count
1407 env["INSTANCE_TAGS"] = " ".join(tags)
1409 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1410 for key, value in source.items():
1411 env["INSTANCE_%s_%s" % (kind, key)] = value
1416 def _NICListToTuple(lu, nics):
1417 """Build a list of nic information tuples.
1419 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1420 value in LUInstanceQueryData.
1422 @type lu: L{LogicalUnit}
1423 @param lu: the logical unit on whose behalf we execute
1424 @type nics: list of L{objects.NIC}
1425 @param nics: list of nics to convert to hooks tuples
1429 cluster = lu.cfg.GetClusterInfo()
1433 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1434 mode = filled_params[constants.NIC_MODE]
1435 link = filled_params[constants.NIC_LINK]
1436 hooks_nics.append((ip, mac, mode, link))
1440 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1441 """Builds instance related env variables for hooks from an object.
1443 @type lu: L{LogicalUnit}
1444 @param lu: the logical unit on whose behalf we execute
1445 @type instance: L{objects.Instance}
1446 @param instance: the instance for which we should build the
1448 @type override: dict
1449 @param override: dictionary with key/values that will override
1452 @return: the hook environment dictionary
1455 cluster = lu.cfg.GetClusterInfo()
1456 bep = cluster.FillBE(instance)
1457 hvp = cluster.FillHV(instance)
1459 "name": instance.name,
1460 "primary_node": instance.primary_node,
1461 "secondary_nodes": instance.secondary_nodes,
1462 "os_type": instance.os,
1463 "status": instance.admin_state,
1464 "maxmem": bep[constants.BE_MAXMEM],
1465 "minmem": bep[constants.BE_MINMEM],
1466 "vcpus": bep[constants.BE_VCPUS],
1467 "nics": _NICListToTuple(lu, instance.nics),
1468 "disk_template": instance.disk_template,
1469 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1472 "hypervisor_name": instance.hypervisor,
1473 "tags": instance.tags,
1476 args.update(override)
1477 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1480 def _AdjustCandidatePool(lu, exceptions):
1481 """Adjust the candidate pool after node operations.
1484 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1486 lu.LogInfo("Promoted nodes to master candidate role: %s",
1487 utils.CommaJoin(node.name for node in mod_list))
1488 for name in mod_list:
1489 lu.context.ReaddNode(name)
1490 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1492 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1496 def _DecideSelfPromotion(lu, exceptions=None):
1497 """Decide whether I should promote myself as a master candidate.
1500 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1501 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1502 # the new node will increase mc_max with one, so:
1503 mc_should = min(mc_should + 1, cp_size)
1504 return mc_now < mc_should
1507 def _CalculateGroupIPolicy(cluster, group):
1508 """Calculate instance policy for group.
1511 return cluster.SimpleFillIPolicy(group.ipolicy)
1514 def _ComputeViolatingInstances(ipolicy, instances):
1515 """Computes a set of instances who violates given ipolicy.
1517 @param ipolicy: The ipolicy to verify
1518 @type instances: object.Instance
1519 @param instances: List of instances to verify
1520 @return: A frozenset of instance names violating the ipolicy
1523 return frozenset([inst.name for inst in instances
1524 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1527 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1528 """Check that the brigdes needed by a list of nics exist.
1531 cluster = lu.cfg.GetClusterInfo()
1532 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1533 brlist = [params[constants.NIC_LINK] for params in paramslist
1534 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1536 result = lu.rpc.call_bridges_exist(target_node, brlist)
1537 result.Raise("Error checking bridges on destination node '%s'" %
1538 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1541 def _CheckInstanceBridgesExist(lu, instance, node=None):
1542 """Check that the brigdes needed by an instance exist.
1546 node = instance.primary_node
1547 _CheckNicsBridgesExist(lu, instance.nics, node)
1550 def _CheckOSVariant(os_obj, name):
1551 """Check whether an OS name conforms to the os variants specification.
1553 @type os_obj: L{objects.OS}
1554 @param os_obj: OS object to check
1556 @param name: OS name passed by the user, to check for validity
1559 variant = objects.OS.GetVariant(name)
1560 if not os_obj.supported_variants:
1562 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1563 " passed)" % (os_obj.name, variant),
1567 raise errors.OpPrereqError("OS name must include a variant",
1570 if variant not in os_obj.supported_variants:
1571 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1574 def _GetNodeInstancesInner(cfg, fn):
1575 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1578 def _GetNodeInstances(cfg, node_name):
1579 """Returns a list of all primary and secondary instances on a node.
1583 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1586 def _GetNodePrimaryInstances(cfg, node_name):
1587 """Returns primary instances on a node.
1590 return _GetNodeInstancesInner(cfg,
1591 lambda inst: node_name == inst.primary_node)
1594 def _GetNodeSecondaryInstances(cfg, node_name):
1595 """Returns secondary instances on a node.
1598 return _GetNodeInstancesInner(cfg,
1599 lambda inst: node_name in inst.secondary_nodes)
1602 def _GetStorageTypeArgs(cfg, storage_type):
1603 """Returns the arguments for a storage type.
1606 # Special case for file storage
1607 if storage_type == constants.ST_FILE:
1608 # storage.FileStorage wants a list of storage directories
1609 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1614 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1617 for dev in instance.disks:
1618 cfg.SetDiskID(dev, node_name)
1620 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1622 result.Raise("Failed to get disk status from node %s" % node_name,
1623 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1625 for idx, bdev_status in enumerate(result.payload):
1626 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1632 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1633 """Check the sanity of iallocator and node arguments and use the
1634 cluster-wide iallocator if appropriate.
1636 Check that at most one of (iallocator, node) is specified. If none is
1637 specified, then the LU's opcode's iallocator slot is filled with the
1638 cluster-wide default iallocator.
1640 @type iallocator_slot: string
1641 @param iallocator_slot: the name of the opcode iallocator slot
1642 @type node_slot: string
1643 @param node_slot: the name of the opcode target node slot
1646 node = getattr(lu.op, node_slot, None)
1647 iallocator = getattr(lu.op, iallocator_slot, None)
1649 if node is not None and iallocator is not None:
1650 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1652 elif node is None and iallocator is None:
1653 default_iallocator = lu.cfg.GetDefaultIAllocator()
1654 if default_iallocator:
1655 setattr(lu.op, iallocator_slot, default_iallocator)
1657 raise errors.OpPrereqError("No iallocator or node given and no"
1658 " cluster-wide default iallocator found;"
1659 " please specify either an iallocator or a"
1660 " node, or set a cluster-wide default"
1661 " iallocator", errors.ECODE_INVAL)
1664 def _GetDefaultIAllocator(cfg, iallocator):
1665 """Decides on which iallocator to use.
1667 @type cfg: L{config.ConfigWriter}
1668 @param cfg: Cluster configuration object
1669 @type iallocator: string or None
1670 @param iallocator: Iallocator specified in opcode
1672 @return: Iallocator name
1676 # Use default iallocator
1677 iallocator = cfg.GetDefaultIAllocator()
1680 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1681 " opcode nor as a cluster-wide default",
1687 class LUClusterPostInit(LogicalUnit):
1688 """Logical unit for running hooks after cluster initialization.
1691 HPATH = "cluster-init"
1692 HTYPE = constants.HTYPE_CLUSTER
1694 def BuildHooksEnv(self):
1699 "OP_TARGET": self.cfg.GetClusterName(),
1702 def BuildHooksNodes(self):
1703 """Build hooks nodes.
1706 return ([], [self.cfg.GetMasterNode()])
1708 def Exec(self, feedback_fn):
1715 class LUClusterDestroy(LogicalUnit):
1716 """Logical unit for destroying the cluster.
1719 HPATH = "cluster-destroy"
1720 HTYPE = constants.HTYPE_CLUSTER
1722 def BuildHooksEnv(self):
1727 "OP_TARGET": self.cfg.GetClusterName(),
1730 def BuildHooksNodes(self):
1731 """Build hooks nodes.
1736 def CheckPrereq(self):
1737 """Check prerequisites.
1739 This checks whether the cluster is empty.
1741 Any errors are signaled by raising errors.OpPrereqError.
1744 master = self.cfg.GetMasterNode()
1746 nodelist = self.cfg.GetNodeList()
1747 if len(nodelist) != 1 or nodelist[0] != master:
1748 raise errors.OpPrereqError("There are still %d node(s) in"
1749 " this cluster." % (len(nodelist) - 1),
1751 instancelist = self.cfg.GetInstanceList()
1753 raise errors.OpPrereqError("There are still %d instance(s) in"
1754 " this cluster." % len(instancelist),
1757 def Exec(self, feedback_fn):
1758 """Destroys the cluster.
1761 master_params = self.cfg.GetMasterNetworkParameters()
1763 # Run post hooks on master node before it's removed
1764 _RunPostHook(self, master_params.name)
1766 ems = self.cfg.GetUseExternalMipScript()
1767 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1770 self.LogWarning("Error disabling the master IP address: %s",
1773 return master_params.name
1776 def _VerifyCertificate(filename):
1777 """Verifies a certificate for L{LUClusterVerifyConfig}.
1779 @type filename: string
1780 @param filename: Path to PEM file
1784 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1785 utils.ReadFile(filename))
1786 except Exception, err: # pylint: disable=W0703
1787 return (LUClusterVerifyConfig.ETYPE_ERROR,
1788 "Failed to load X509 certificate %s: %s" % (filename, err))
1791 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1792 constants.SSL_CERT_EXPIRATION_ERROR)
1795 fnamemsg = "While verifying %s: %s" % (filename, msg)
1800 return (None, fnamemsg)
1801 elif errcode == utils.CERT_WARNING:
1802 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1803 elif errcode == utils.CERT_ERROR:
1804 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1806 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1809 def _GetAllHypervisorParameters(cluster, instances):
1810 """Compute the set of all hypervisor parameters.
1812 @type cluster: L{objects.Cluster}
1813 @param cluster: the cluster object
1814 @param instances: list of L{objects.Instance}
1815 @param instances: additional instances from which to obtain parameters
1816 @rtype: list of (origin, hypervisor, parameters)
1817 @return: a list with all parameters found, indicating the hypervisor they
1818 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1823 for hv_name in cluster.enabled_hypervisors:
1824 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1826 for os_name, os_hvp in cluster.os_hvp.items():
1827 for hv_name, hv_params in os_hvp.items():
1829 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1830 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1832 # TODO: collapse identical parameter values in a single one
1833 for instance in instances:
1834 if instance.hvparams:
1835 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1836 cluster.FillHV(instance)))
1841 class _VerifyErrors(object):
1842 """Mix-in for cluster/group verify LUs.
1844 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1845 self.op and self._feedback_fn to be available.)
1849 ETYPE_FIELD = "code"
1850 ETYPE_ERROR = "ERROR"
1851 ETYPE_WARNING = "WARNING"
1853 def _Error(self, ecode, item, msg, *args, **kwargs):
1854 """Format an error message.
1856 Based on the opcode's error_codes parameter, either format a
1857 parseable error code, or a simpler error string.
1859 This must be called only from Exec and functions called from Exec.
1862 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1863 itype, etxt, _ = ecode
1864 # first complete the msg
1867 # then format the whole message
1868 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1869 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1875 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1876 # and finally report it via the feedback_fn
1877 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1879 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1880 """Log an error message if the passed condition is True.
1884 or self.op.debug_simulate_errors) # pylint: disable=E1101
1886 # If the error code is in the list of ignored errors, demote the error to a
1888 (_, etxt, _) = ecode
1889 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1890 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1893 self._Error(ecode, *args, **kwargs)
1895 # do not mark the operation as failed for WARN cases only
1896 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1897 self.bad = self.bad or cond
1900 class LUClusterVerify(NoHooksLU):
1901 """Submits all jobs necessary to verify the cluster.
1906 def ExpandNames(self):
1907 self.needed_locks = {}
1909 def Exec(self, feedback_fn):
1912 if self.op.group_name:
1913 groups = [self.op.group_name]
1914 depends_fn = lambda: None
1916 groups = self.cfg.GetNodeGroupList()
1918 # Verify global configuration
1920 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1923 # Always depend on global verification
1924 depends_fn = lambda: [(-len(jobs), [])]
1927 [opcodes.OpClusterVerifyGroup(group_name=group,
1928 ignore_errors=self.op.ignore_errors,
1929 depends=depends_fn())]
1930 for group in groups)
1932 # Fix up all parameters
1933 for op in itertools.chain(*jobs): # pylint: disable=W0142
1934 op.debug_simulate_errors = self.op.debug_simulate_errors
1935 op.verbose = self.op.verbose
1936 op.error_codes = self.op.error_codes
1938 op.skip_checks = self.op.skip_checks
1939 except AttributeError:
1940 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1942 return ResultWithJobs(jobs)
1945 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1946 """Verifies the cluster config.
1951 def _VerifyHVP(self, hvp_data):
1952 """Verifies locally the syntax of the hypervisor parameters.
1955 for item, hv_name, hv_params in hvp_data:
1956 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1959 hv_class = hypervisor.GetHypervisor(hv_name)
1960 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1961 hv_class.CheckParameterSyntax(hv_params)
1962 except errors.GenericError, err:
1963 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1965 def ExpandNames(self):
1966 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1967 self.share_locks = _ShareAll()
1969 def CheckPrereq(self):
1970 """Check prerequisites.
1973 # Retrieve all information
1974 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1975 self.all_node_info = self.cfg.GetAllNodesInfo()
1976 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1978 def Exec(self, feedback_fn):
1979 """Verify integrity of cluster, performing various test on nodes.
1983 self._feedback_fn = feedback_fn
1985 feedback_fn("* Verifying cluster config")
1987 for msg in self.cfg.VerifyConfig():
1988 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1990 feedback_fn("* Verifying cluster certificate files")
1992 for cert_filename in constants.ALL_CERT_FILES:
1993 (errcode, msg) = _VerifyCertificate(cert_filename)
1994 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1996 feedback_fn("* Verifying hypervisor parameters")
1998 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1999 self.all_inst_info.values()))
2001 feedback_fn("* Verifying all nodes belong to an existing group")
2003 # We do this verification here because, should this bogus circumstance
2004 # occur, it would never be caught by VerifyGroup, which only acts on
2005 # nodes/instances reachable from existing node groups.
2007 dangling_nodes = set(node.name for node in self.all_node_info.values()
2008 if node.group not in self.all_group_info)
2010 dangling_instances = {}
2011 no_node_instances = []
2013 for inst in self.all_inst_info.values():
2014 if inst.primary_node in dangling_nodes:
2015 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2016 elif inst.primary_node not in self.all_node_info:
2017 no_node_instances.append(inst.name)
2022 utils.CommaJoin(dangling_instances.get(node.name,
2024 for node in dangling_nodes]
2026 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2028 "the following nodes (and their instances) belong to a non"
2029 " existing group: %s", utils.CommaJoin(pretty_dangling))
2031 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2033 "the following instances have a non-existing primary-node:"
2034 " %s", utils.CommaJoin(no_node_instances))
2039 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2040 """Verifies the status of a node group.
2043 HPATH = "cluster-verify"
2044 HTYPE = constants.HTYPE_CLUSTER
2047 _HOOKS_INDENT_RE = re.compile("^", re.M)
2049 class NodeImage(object):
2050 """A class representing the logical and physical status of a node.
2053 @ivar name: the node name to which this object refers
2054 @ivar volumes: a structure as returned from
2055 L{ganeti.backend.GetVolumeList} (runtime)
2056 @ivar instances: a list of running instances (runtime)
2057 @ivar pinst: list of configured primary instances (config)
2058 @ivar sinst: list of configured secondary instances (config)
2059 @ivar sbp: dictionary of {primary-node: list of instances} for all
2060 instances for which this node is secondary (config)
2061 @ivar mfree: free memory, as reported by hypervisor (runtime)
2062 @ivar dfree: free disk, as reported by the node (runtime)
2063 @ivar offline: the offline status (config)
2064 @type rpc_fail: boolean
2065 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2066 not whether the individual keys were correct) (runtime)
2067 @type lvm_fail: boolean
2068 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2069 @type hyp_fail: boolean
2070 @ivar hyp_fail: whether the RPC call didn't return the instance list
2071 @type ghost: boolean
2072 @ivar ghost: whether this is a known node or not (config)
2073 @type os_fail: boolean
2074 @ivar os_fail: whether the RPC call didn't return valid OS data
2076 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2077 @type vm_capable: boolean
2078 @ivar vm_capable: whether the node can host instances
2081 def __init__(self, offline=False, name=None, vm_capable=True):
2090 self.offline = offline
2091 self.vm_capable = vm_capable
2092 self.rpc_fail = False
2093 self.lvm_fail = False
2094 self.hyp_fail = False
2096 self.os_fail = False
2099 def ExpandNames(self):
2100 # This raises errors.OpPrereqError on its own:
2101 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2103 # Get instances in node group; this is unsafe and needs verification later
2105 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2107 self.needed_locks = {
2108 locking.LEVEL_INSTANCE: inst_names,
2109 locking.LEVEL_NODEGROUP: [self.group_uuid],
2110 locking.LEVEL_NODE: [],
2113 self.share_locks = _ShareAll()
2115 def DeclareLocks(self, level):
2116 if level == locking.LEVEL_NODE:
2117 # Get members of node group; this is unsafe and needs verification later
2118 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2120 all_inst_info = self.cfg.GetAllInstancesInfo()
2122 # In Exec(), we warn about mirrored instances that have primary and
2123 # secondary living in separate node groups. To fully verify that
2124 # volumes for these instances are healthy, we will need to do an
2125 # extra call to their secondaries. We ensure here those nodes will
2127 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2128 # Important: access only the instances whose lock is owned
2129 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2130 nodes.update(all_inst_info[inst].secondary_nodes)
2132 self.needed_locks[locking.LEVEL_NODE] = nodes
2134 def CheckPrereq(self):
2135 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2136 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2138 group_nodes = set(self.group_info.members)
2140 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2143 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2145 unlocked_instances = \
2146 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2149 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2150 utils.CommaJoin(unlocked_nodes),
2153 if unlocked_instances:
2154 raise errors.OpPrereqError("Missing lock for instances: %s" %
2155 utils.CommaJoin(unlocked_instances),
2158 self.all_node_info = self.cfg.GetAllNodesInfo()
2159 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2161 self.my_node_names = utils.NiceSort(group_nodes)
2162 self.my_inst_names = utils.NiceSort(group_instances)
2164 self.my_node_info = dict((name, self.all_node_info[name])
2165 for name in self.my_node_names)
2167 self.my_inst_info = dict((name, self.all_inst_info[name])
2168 for name in self.my_inst_names)
2170 # We detect here the nodes that will need the extra RPC calls for verifying
2171 # split LV volumes; they should be locked.
2172 extra_lv_nodes = set()
2174 for inst in self.my_inst_info.values():
2175 if inst.disk_template in constants.DTS_INT_MIRROR:
2176 for nname in inst.all_nodes:
2177 if self.all_node_info[nname].group != self.group_uuid:
2178 extra_lv_nodes.add(nname)
2180 unlocked_lv_nodes = \
2181 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2183 if unlocked_lv_nodes:
2184 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2185 utils.CommaJoin(unlocked_lv_nodes),
2187 self.extra_lv_nodes = list(extra_lv_nodes)
2189 def _VerifyNode(self, ninfo, nresult):
2190 """Perform some basic validation on data returned from a node.
2192 - check the result data structure is well formed and has all the
2194 - check ganeti version
2196 @type ninfo: L{objects.Node}
2197 @param ninfo: the node to check
2198 @param nresult: the results from the node
2200 @return: whether overall this call was successful (and we can expect
2201 reasonable values in the respose)
2205 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2207 # main result, nresult should be a non-empty dict
2208 test = not nresult or not isinstance(nresult, dict)
2209 _ErrorIf(test, constants.CV_ENODERPC, node,
2210 "unable to verify node: no data returned")
2214 # compares ganeti version
2215 local_version = constants.PROTOCOL_VERSION
2216 remote_version = nresult.get("version", None)
2217 test = not (remote_version and
2218 isinstance(remote_version, (list, tuple)) and
2219 len(remote_version) == 2)
2220 _ErrorIf(test, constants.CV_ENODERPC, node,
2221 "connection to node returned invalid data")
2225 test = local_version != remote_version[0]
2226 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2227 "incompatible protocol versions: master %s,"
2228 " node %s", local_version, remote_version[0])
2232 # node seems compatible, we can actually try to look into its results
2234 # full package version
2235 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2236 constants.CV_ENODEVERSION, node,
2237 "software version mismatch: master %s, node %s",
2238 constants.RELEASE_VERSION, remote_version[1],
2239 code=self.ETYPE_WARNING)
2241 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2242 if ninfo.vm_capable and isinstance(hyp_result, dict):
2243 for hv_name, hv_result in hyp_result.iteritems():
2244 test = hv_result is not None
2245 _ErrorIf(test, constants.CV_ENODEHV, node,
2246 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2248 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2249 if ninfo.vm_capable and isinstance(hvp_result, list):
2250 for item, hv_name, hv_result in hvp_result:
2251 _ErrorIf(True, constants.CV_ENODEHV, node,
2252 "hypervisor %s parameter verify failure (source %s): %s",
2253 hv_name, item, hv_result)
2255 test = nresult.get(constants.NV_NODESETUP,
2256 ["Missing NODESETUP results"])
2257 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2262 def _VerifyNodeTime(self, ninfo, nresult,
2263 nvinfo_starttime, nvinfo_endtime):
2264 """Check the node time.
2266 @type ninfo: L{objects.Node}
2267 @param ninfo: the node to check
2268 @param nresult: the remote results for the node
2269 @param nvinfo_starttime: the start time of the RPC call
2270 @param nvinfo_endtime: the end time of the RPC call
2274 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2276 ntime = nresult.get(constants.NV_TIME, None)
2278 ntime_merged = utils.MergeTime(ntime)
2279 except (ValueError, TypeError):
2280 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2283 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2284 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2285 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2286 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2290 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2291 "Node time diverges by at least %s from master node time",
2294 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2295 """Check the node LVM results.
2297 @type ninfo: L{objects.Node}
2298 @param ninfo: the node to check
2299 @param nresult: the remote results for the node
2300 @param vg_name: the configured VG name
2307 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2309 # checks vg existence and size > 20G
2310 vglist = nresult.get(constants.NV_VGLIST, None)
2312 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2314 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2315 constants.MIN_VG_SIZE)
2316 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2319 pvlist = nresult.get(constants.NV_PVLIST, None)
2320 test = pvlist is None
2321 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2323 # check that ':' is not present in PV names, since it's a
2324 # special character for lvcreate (denotes the range of PEs to
2326 for _, pvname, owner_vg in pvlist:
2327 test = ":" in pvname
2328 _ErrorIf(test, constants.CV_ENODELVM, node,
2329 "Invalid character ':' in PV '%s' of VG '%s'",
2332 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2333 """Check the node bridges.
2335 @type ninfo: L{objects.Node}
2336 @param ninfo: the node to check
2337 @param nresult: the remote results for the node
2338 @param bridges: the expected list of bridges
2345 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2347 missing = nresult.get(constants.NV_BRIDGES, None)
2348 test = not isinstance(missing, list)
2349 _ErrorIf(test, constants.CV_ENODENET, node,
2350 "did not return valid bridge information")
2352 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2353 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2355 def _VerifyNodeUserScripts(self, ninfo, nresult):
2356 """Check the results of user scripts presence and executability on the node
2358 @type ninfo: L{objects.Node}
2359 @param ninfo: the node to check
2360 @param nresult: the remote results for the node
2365 test = not constants.NV_USERSCRIPTS in nresult
2366 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2367 "did not return user scripts information")
2369 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2371 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2372 "user scripts not present or not executable: %s" %
2373 utils.CommaJoin(sorted(broken_scripts)))
2375 def _VerifyNodeNetwork(self, ninfo, nresult):
2376 """Check the node network connectivity results.
2378 @type ninfo: L{objects.Node}
2379 @param ninfo: the node to check
2380 @param nresult: the remote results for the node
2384 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2386 test = constants.NV_NODELIST not in nresult
2387 _ErrorIf(test, constants.CV_ENODESSH, node,
2388 "node hasn't returned node ssh connectivity data")
2390 if nresult[constants.NV_NODELIST]:
2391 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2392 _ErrorIf(True, constants.CV_ENODESSH, node,
2393 "ssh communication with node '%s': %s", a_node, a_msg)
2395 test = constants.NV_NODENETTEST not in nresult
2396 _ErrorIf(test, constants.CV_ENODENET, node,
2397 "node hasn't returned node tcp connectivity data")
2399 if nresult[constants.NV_NODENETTEST]:
2400 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2402 _ErrorIf(True, constants.CV_ENODENET, node,
2403 "tcp communication with node '%s': %s",
2404 anode, nresult[constants.NV_NODENETTEST][anode])
2406 test = constants.NV_MASTERIP not in nresult
2407 _ErrorIf(test, constants.CV_ENODENET, node,
2408 "node hasn't returned node master IP reachability data")
2410 if not nresult[constants.NV_MASTERIP]:
2411 if node == self.master_node:
2412 msg = "the master node cannot reach the master IP (not configured?)"
2414 msg = "cannot reach the master IP"
2415 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2417 def _VerifyInstance(self, instance, instanceconfig, node_image,
2419 """Verify an instance.
2421 This function checks to see if the required block devices are
2422 available on the instance's node.
2425 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2426 node_current = instanceconfig.primary_node
2428 node_vol_should = {}
2429 instanceconfig.MapLVsByNode(node_vol_should)
2431 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2432 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2433 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2435 for node in node_vol_should:
2436 n_img = node_image[node]
2437 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2438 # ignore missing volumes on offline or broken nodes
2440 for volume in node_vol_should[node]:
2441 test = volume not in n_img.volumes
2442 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2443 "volume %s missing on node %s", volume, node)
2445 if instanceconfig.admin_state == constants.ADMINST_UP:
2446 pri_img = node_image[node_current]
2447 test = instance not in pri_img.instances and not pri_img.offline
2448 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2449 "instance not running on its primary node %s",
2452 diskdata = [(nname, success, status, idx)
2453 for (nname, disks) in diskstatus.items()
2454 for idx, (success, status) in enumerate(disks)]
2456 for nname, success, bdev_status, idx in diskdata:
2457 # the 'ghost node' construction in Exec() ensures that we have a
2459 snode = node_image[nname]
2460 bad_snode = snode.ghost or snode.offline
2461 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2462 not success and not bad_snode,
2463 constants.CV_EINSTANCEFAULTYDISK, instance,
2464 "couldn't retrieve status for disk/%s on %s: %s",
2465 idx, nname, bdev_status)
2466 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2467 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2468 constants.CV_EINSTANCEFAULTYDISK, instance,
2469 "disk/%s on %s is faulty", idx, nname)
2471 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2472 """Verify if there are any unknown volumes in the cluster.
2474 The .os, .swap and backup volumes are ignored. All other volumes are
2475 reported as unknown.
2477 @type reserved: L{ganeti.utils.FieldSet}
2478 @param reserved: a FieldSet of reserved volume names
2481 for node, n_img in node_image.items():
2482 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2483 self.all_node_info[node].group != self.group_uuid):
2484 # skip non-healthy nodes
2486 for volume in n_img.volumes:
2487 test = ((node not in node_vol_should or
2488 volume not in node_vol_should[node]) and
2489 not reserved.Matches(volume))
2490 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2491 "volume %s is unknown", volume)
2493 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2494 """Verify N+1 Memory Resilience.
2496 Check that if one single node dies we can still start all the
2497 instances it was primary for.
2500 cluster_info = self.cfg.GetClusterInfo()
2501 for node, n_img in node_image.items():
2502 # This code checks that every node which is now listed as
2503 # secondary has enough memory to host all instances it is
2504 # supposed to should a single other node in the cluster fail.
2505 # FIXME: not ready for failover to an arbitrary node
2506 # FIXME: does not support file-backed instances
2507 # WARNING: we currently take into account down instances as well
2508 # as up ones, considering that even if they're down someone
2509 # might want to start them even in the event of a node failure.
2510 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2511 # we're skipping nodes marked offline and nodes in other groups from
2512 # the N+1 warning, since most likely we don't have good memory
2513 # infromation from them; we already list instances living on such
2514 # nodes, and that's enough warning
2516 #TODO(dynmem): also consider ballooning out other instances
2517 for prinode, instances in n_img.sbp.items():
2519 for instance in instances:
2520 bep = cluster_info.FillBE(instance_cfg[instance])
2521 if bep[constants.BE_AUTO_BALANCE]:
2522 needed_mem += bep[constants.BE_MINMEM]
2523 test = n_img.mfree < needed_mem
2524 self._ErrorIf(test, constants.CV_ENODEN1, node,
2525 "not enough memory to accomodate instance failovers"
2526 " should node %s fail (%dMiB needed, %dMiB available)",
2527 prinode, needed_mem, n_img.mfree)
2530 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2531 (files_all, files_opt, files_mc, files_vm)):
2532 """Verifies file checksums collected from all nodes.
2534 @param errorif: Callback for reporting errors
2535 @param nodeinfo: List of L{objects.Node} objects
2536 @param master_node: Name of master node
2537 @param all_nvinfo: RPC results
2540 # Define functions determining which nodes to consider for a file
2543 (files_mc, lambda node: (node.master_candidate or
2544 node.name == master_node)),
2545 (files_vm, lambda node: node.vm_capable),
2548 # Build mapping from filename to list of nodes which should have the file
2550 for (files, fn) in files2nodefn:
2552 filenodes = nodeinfo
2554 filenodes = filter(fn, nodeinfo)
2555 nodefiles.update((filename,
2556 frozenset(map(operator.attrgetter("name"), filenodes)))
2557 for filename in files)
2559 assert set(nodefiles) == (files_all | files_mc | files_vm)
2561 fileinfo = dict((filename, {}) for filename in nodefiles)
2562 ignore_nodes = set()
2564 for node in nodeinfo:
2566 ignore_nodes.add(node.name)
2569 nresult = all_nvinfo[node.name]
2571 if nresult.fail_msg or not nresult.payload:
2574 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2576 test = not (node_files and isinstance(node_files, dict))
2577 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2578 "Node did not return file checksum data")
2580 ignore_nodes.add(node.name)
2583 # Build per-checksum mapping from filename to nodes having it
2584 for (filename, checksum) in node_files.items():
2585 assert filename in nodefiles
2586 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2588 for (filename, checksums) in fileinfo.items():
2589 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2591 # Nodes having the file
2592 with_file = frozenset(node_name
2593 for nodes in fileinfo[filename].values()
2594 for node_name in nodes) - ignore_nodes
2596 expected_nodes = nodefiles[filename] - ignore_nodes
2598 # Nodes missing file
2599 missing_file = expected_nodes - with_file
2601 if filename in files_opt:
2603 errorif(missing_file and missing_file != expected_nodes,
2604 constants.CV_ECLUSTERFILECHECK, None,
2605 "File %s is optional, but it must exist on all or no"
2606 " nodes (not found on %s)",
2607 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2609 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2610 "File %s is missing from node(s) %s", filename,
2611 utils.CommaJoin(utils.NiceSort(missing_file)))
2613 # Warn if a node has a file it shouldn't
2614 unexpected = with_file - expected_nodes
2616 constants.CV_ECLUSTERFILECHECK, None,
2617 "File %s should not exist on node(s) %s",
2618 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2620 # See if there are multiple versions of the file
2621 test = len(checksums) > 1
2623 variants = ["variant %s on %s" %
2624 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2625 for (idx, (checksum, nodes)) in
2626 enumerate(sorted(checksums.items()))]
2630 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2631 "File %s found with %s different checksums (%s)",
2632 filename, len(checksums), "; ".join(variants))
2634 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2636 """Verifies and the node DRBD status.
2638 @type ninfo: L{objects.Node}
2639 @param ninfo: the node to check
2640 @param nresult: the remote results for the node
2641 @param instanceinfo: the dict of instances
2642 @param drbd_helper: the configured DRBD usermode helper
2643 @param drbd_map: the DRBD map as returned by
2644 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2648 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2651 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2652 test = (helper_result is None)
2653 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2654 "no drbd usermode helper returned")
2656 status, payload = helper_result
2658 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2659 "drbd usermode helper check unsuccessful: %s", payload)
2660 test = status and (payload != drbd_helper)
2661 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2662 "wrong drbd usermode helper: %s", payload)
2664 # compute the DRBD minors
2666 for minor, instance in drbd_map[node].items():
2667 test = instance not in instanceinfo
2668 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2669 "ghost instance '%s' in temporary DRBD map", instance)
2670 # ghost instance should not be running, but otherwise we
2671 # don't give double warnings (both ghost instance and
2672 # unallocated minor in use)
2674 node_drbd[minor] = (instance, False)
2676 instance = instanceinfo[instance]
2677 node_drbd[minor] = (instance.name,
2678 instance.admin_state == constants.ADMINST_UP)
2680 # and now check them
2681 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2682 test = not isinstance(used_minors, (tuple, list))
2683 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2684 "cannot parse drbd status file: %s", str(used_minors))
2686 # we cannot check drbd status
2689 for minor, (iname, must_exist) in node_drbd.items():
2690 test = minor not in used_minors and must_exist
2691 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2692 "drbd minor %d of instance %s is not active", minor, iname)
2693 for minor in used_minors:
2694 test = minor not in node_drbd
2695 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2696 "unallocated drbd minor %d is in use", minor)
2698 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2699 """Builds the node OS structures.
2701 @type ninfo: L{objects.Node}
2702 @param ninfo: the node to check
2703 @param nresult: the remote results for the node
2704 @param nimg: the node image object
2708 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2710 remote_os = nresult.get(constants.NV_OSLIST, None)
2711 test = (not isinstance(remote_os, list) or
2712 not compat.all(isinstance(v, list) and len(v) == 7
2713 for v in remote_os))
2715 _ErrorIf(test, constants.CV_ENODEOS, node,
2716 "node hasn't returned valid OS data")
2725 for (name, os_path, status, diagnose,
2726 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2728 if name not in os_dict:
2731 # parameters is a list of lists instead of list of tuples due to
2732 # JSON lacking a real tuple type, fix it:
2733 parameters = [tuple(v) for v in parameters]
2734 os_dict[name].append((os_path, status, diagnose,
2735 set(variants), set(parameters), set(api_ver)))
2737 nimg.oslist = os_dict
2739 def _VerifyNodeOS(self, ninfo, nimg, base):
2740 """Verifies the node OS list.
2742 @type ninfo: L{objects.Node}
2743 @param ninfo: the node to check
2744 @param nimg: the node image object
2745 @param base: the 'template' node we match against (e.g. from the master)
2749 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2751 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2753 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2754 for os_name, os_data in nimg.oslist.items():
2755 assert os_data, "Empty OS status for OS %s?!" % os_name
2756 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2757 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2758 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2759 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2760 "OS '%s' has multiple entries (first one shadows the rest): %s",
2761 os_name, utils.CommaJoin([v[0] for v in os_data]))
2762 # comparisons with the 'base' image
2763 test = os_name not in base.oslist
2764 _ErrorIf(test, constants.CV_ENODEOS, node,
2765 "Extra OS %s not present on reference node (%s)",
2769 assert base.oslist[os_name], "Base node has empty OS status?"
2770 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2772 # base OS is invalid, skipping
2774 for kind, a, b in [("API version", f_api, b_api),
2775 ("variants list", f_var, b_var),
2776 ("parameters", beautify_params(f_param),
2777 beautify_params(b_param))]:
2778 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2779 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2780 kind, os_name, base.name,
2781 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2783 # check any missing OSes
2784 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2785 _ErrorIf(missing, constants.CV_ENODEOS, node,
2786 "OSes present on reference node %s but missing on this node: %s",
2787 base.name, utils.CommaJoin(missing))
2789 def _VerifyOob(self, ninfo, nresult):
2790 """Verifies out of band functionality of a node.
2792 @type ninfo: L{objects.Node}
2793 @param ninfo: the node to check
2794 @param nresult: the remote results for the node
2798 # We just have to verify the paths on master and/or master candidates
2799 # as the oob helper is invoked on the master
2800 if ((ninfo.master_candidate or ninfo.master_capable) and
2801 constants.NV_OOB_PATHS in nresult):
2802 for path_result in nresult[constants.NV_OOB_PATHS]:
2803 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2805 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2806 """Verifies and updates the node volume data.
2808 This function will update a L{NodeImage}'s internal structures
2809 with data from the remote call.
2811 @type ninfo: L{objects.Node}
2812 @param ninfo: the node to check
2813 @param nresult: the remote results for the node
2814 @param nimg: the node image object
2815 @param vg_name: the configured VG name
2819 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2821 nimg.lvm_fail = True
2822 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2825 elif isinstance(lvdata, basestring):
2826 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2827 utils.SafeEncode(lvdata))
2828 elif not isinstance(lvdata, dict):
2829 _ErrorIf(True, constants.CV_ENODELVM, node,
2830 "rpc call to node failed (lvlist)")
2832 nimg.volumes = lvdata
2833 nimg.lvm_fail = False
2835 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2836 """Verifies and updates the node instance list.
2838 If the listing was successful, then updates this node's instance
2839 list. Otherwise, it marks the RPC call as failed for the instance
2842 @type ninfo: L{objects.Node}
2843 @param ninfo: the node to check
2844 @param nresult: the remote results for the node
2845 @param nimg: the node image object
2848 idata = nresult.get(constants.NV_INSTANCELIST, None)
2849 test = not isinstance(idata, list)
2850 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2851 "rpc call to node failed (instancelist): %s",
2852 utils.SafeEncode(str(idata)))
2854 nimg.hyp_fail = True
2856 nimg.instances = idata
2858 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2859 """Verifies and computes a node information map
2861 @type ninfo: L{objects.Node}
2862 @param ninfo: the node to check
2863 @param nresult: the remote results for the node
2864 @param nimg: the node image object
2865 @param vg_name: the configured VG name
2869 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2871 # try to read free memory (from the hypervisor)
2872 hv_info = nresult.get(constants.NV_HVINFO, None)
2873 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2874 _ErrorIf(test, constants.CV_ENODEHV, node,
2875 "rpc call to node failed (hvinfo)")
2878 nimg.mfree = int(hv_info["memory_free"])
2879 except (ValueError, TypeError):
2880 _ErrorIf(True, constants.CV_ENODERPC, node,
2881 "node returned invalid nodeinfo, check hypervisor")
2883 # FIXME: devise a free space model for file based instances as well
2884 if vg_name is not None:
2885 test = (constants.NV_VGLIST not in nresult or
2886 vg_name not in nresult[constants.NV_VGLIST])
2887 _ErrorIf(test, constants.CV_ENODELVM, node,
2888 "node didn't return data for the volume group '%s'"
2889 " - it is either missing or broken", vg_name)
2892 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2893 except (ValueError, TypeError):
2894 _ErrorIf(True, constants.CV_ENODERPC, node,
2895 "node returned invalid LVM info, check LVM status")
2897 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2898 """Gets per-disk status information for all instances.
2900 @type nodelist: list of strings
2901 @param nodelist: Node names
2902 @type node_image: dict of (name, L{objects.Node})
2903 @param node_image: Node objects
2904 @type instanceinfo: dict of (name, L{objects.Instance})
2905 @param instanceinfo: Instance objects
2906 @rtype: {instance: {node: [(succes, payload)]}}
2907 @return: a dictionary of per-instance dictionaries with nodes as
2908 keys and disk information as values; the disk information is a
2909 list of tuples (success, payload)
2912 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2915 node_disks_devonly = {}
2916 diskless_instances = set()
2917 diskless = constants.DT_DISKLESS
2919 for nname in nodelist:
2920 node_instances = list(itertools.chain(node_image[nname].pinst,
2921 node_image[nname].sinst))
2922 diskless_instances.update(inst for inst in node_instances
2923 if instanceinfo[inst].disk_template == diskless)
2924 disks = [(inst, disk)
2925 for inst in node_instances
2926 for disk in instanceinfo[inst].disks]
2929 # No need to collect data
2932 node_disks[nname] = disks
2934 # _AnnotateDiskParams makes already copies of the disks
2936 for (inst, dev) in disks:
2937 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2938 self.cfg.SetDiskID(anno_disk, nname)
2939 devonly.append(anno_disk)
2941 node_disks_devonly[nname] = devonly
2943 assert len(node_disks) == len(node_disks_devonly)
2945 # Collect data from all nodes with disks
2946 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2949 assert len(result) == len(node_disks)
2953 for (nname, nres) in result.items():
2954 disks = node_disks[nname]
2957 # No data from this node
2958 data = len(disks) * [(False, "node offline")]
2961 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2962 "while getting disk information: %s", msg)
2964 # No data from this node
2965 data = len(disks) * [(False, msg)]
2968 for idx, i in enumerate(nres.payload):
2969 if isinstance(i, (tuple, list)) and len(i) == 2:
2972 logging.warning("Invalid result from node %s, entry %d: %s",
2974 data.append((False, "Invalid result from the remote node"))
2976 for ((inst, _), status) in zip(disks, data):
2977 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2979 # Add empty entries for diskless instances.
2980 for inst in diskless_instances:
2981 assert inst not in instdisk
2984 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2985 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2986 compat.all(isinstance(s, (tuple, list)) and
2987 len(s) == 2 for s in statuses)
2988 for inst, nnames in instdisk.items()
2989 for nname, statuses in nnames.items())
2990 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2995 def _SshNodeSelector(group_uuid, all_nodes):
2996 """Create endless iterators for all potential SSH check hosts.
2999 nodes = [node for node in all_nodes
3000 if (node.group != group_uuid and
3002 keyfunc = operator.attrgetter("group")
3004 return map(itertools.cycle,
3005 [sorted(map(operator.attrgetter("name"), names))
3006 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3010 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3011 """Choose which nodes should talk to which other nodes.
3013 We will make nodes contact all nodes in their group, and one node from
3016 @warning: This algorithm has a known issue if one node group is much
3017 smaller than others (e.g. just one node). In such a case all other
3018 nodes will talk to the single node.
3021 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3022 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3024 return (online_nodes,
3025 dict((name, sorted([i.next() for i in sel]))
3026 for name in online_nodes))
3028 def BuildHooksEnv(self):
3031 Cluster-Verify hooks just ran in the post phase and their failure makes
3032 the output be logged in the verify output and the verification to fail.
3036 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3039 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3040 for node in self.my_node_info.values())
3044 def BuildHooksNodes(self):
3045 """Build hooks nodes.
3048 return ([], self.my_node_names)
3050 def Exec(self, feedback_fn):
3051 """Verify integrity of the node group, performing various test on nodes.
3054 # This method has too many local variables. pylint: disable=R0914
3055 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3057 if not self.my_node_names:
3059 feedback_fn("* Empty node group, skipping verification")
3063 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3064 verbose = self.op.verbose
3065 self._feedback_fn = feedback_fn
3067 vg_name = self.cfg.GetVGName()
3068 drbd_helper = self.cfg.GetDRBDHelper()
3069 cluster = self.cfg.GetClusterInfo()
3070 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3071 hypervisors = cluster.enabled_hypervisors
3072 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3074 i_non_redundant = [] # Non redundant instances
3075 i_non_a_balanced = [] # Non auto-balanced instances
3076 i_offline = 0 # Count of offline instances
3077 n_offline = 0 # Count of offline nodes
3078 n_drained = 0 # Count of nodes being drained
3079 node_vol_should = {}
3081 # FIXME: verify OS list
3084 filemap = _ComputeAncillaryFiles(cluster, False)
3086 # do local checksums
3087 master_node = self.master_node = self.cfg.GetMasterNode()
3088 master_ip = self.cfg.GetMasterIP()
3090 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3093 if self.cfg.GetUseExternalMipScript():
3094 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3096 node_verify_param = {
3097 constants.NV_FILELIST:
3098 utils.UniqueSequence(filename
3099 for files in filemap
3100 for filename in files),
3101 constants.NV_NODELIST:
3102 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3103 self.all_node_info.values()),
3104 constants.NV_HYPERVISOR: hypervisors,
3105 constants.NV_HVPARAMS:
3106 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3107 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3108 for node in node_data_list
3109 if not node.offline],
3110 constants.NV_INSTANCELIST: hypervisors,
3111 constants.NV_VERSION: None,
3112 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3113 constants.NV_NODESETUP: None,
3114 constants.NV_TIME: None,
3115 constants.NV_MASTERIP: (master_node, master_ip),
3116 constants.NV_OSLIST: None,
3117 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3118 constants.NV_USERSCRIPTS: user_scripts,
3121 if vg_name is not None:
3122 node_verify_param[constants.NV_VGLIST] = None
3123 node_verify_param[constants.NV_LVLIST] = vg_name
3124 node_verify_param[constants.NV_PVLIST] = [vg_name]
3125 node_verify_param[constants.NV_DRBDLIST] = None
3128 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3131 # FIXME: this needs to be changed per node-group, not cluster-wide
3133 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3134 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3135 bridges.add(default_nicpp[constants.NIC_LINK])
3136 for instance in self.my_inst_info.values():
3137 for nic in instance.nics:
3138 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3139 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3140 bridges.add(full_nic[constants.NIC_LINK])
3143 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3145 # Build our expected cluster state
3146 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3148 vm_capable=node.vm_capable))
3149 for node in node_data_list)
3153 for node in self.all_node_info.values():
3154 path = _SupportsOob(self.cfg, node)
3155 if path and path not in oob_paths:
3156 oob_paths.append(path)
3159 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3161 for instance in self.my_inst_names:
3162 inst_config = self.my_inst_info[instance]
3163 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3166 for nname in inst_config.all_nodes:
3167 if nname not in node_image:
3168 gnode = self.NodeImage(name=nname)
3169 gnode.ghost = (nname not in self.all_node_info)
3170 node_image[nname] = gnode
3172 inst_config.MapLVsByNode(node_vol_should)
3174 pnode = inst_config.primary_node
3175 node_image[pnode].pinst.append(instance)
3177 for snode in inst_config.secondary_nodes:
3178 nimg = node_image[snode]
3179 nimg.sinst.append(instance)
3180 if pnode not in nimg.sbp:
3181 nimg.sbp[pnode] = []
3182 nimg.sbp[pnode].append(instance)
3184 # At this point, we have the in-memory data structures complete,
3185 # except for the runtime information, which we'll gather next
3187 # Due to the way our RPC system works, exact response times cannot be
3188 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3189 # time before and after executing the request, we can at least have a time
3191 nvinfo_starttime = time.time()
3192 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3194 self.cfg.GetClusterName())
3195 nvinfo_endtime = time.time()
3197 if self.extra_lv_nodes and vg_name is not None:
3199 self.rpc.call_node_verify(self.extra_lv_nodes,
3200 {constants.NV_LVLIST: vg_name},
3201 self.cfg.GetClusterName())
3203 extra_lv_nvinfo = {}
3205 all_drbd_map = self.cfg.ComputeDRBDMap()
3207 feedback_fn("* Gathering disk information (%s nodes)" %
3208 len(self.my_node_names))
3209 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3212 feedback_fn("* Verifying configuration file consistency")
3214 # If not all nodes are being checked, we need to make sure the master node
3215 # and a non-checked vm_capable node are in the list.
3216 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3218 vf_nvinfo = all_nvinfo.copy()
3219 vf_node_info = list(self.my_node_info.values())
3220 additional_nodes = []
3221 if master_node not in self.my_node_info:
3222 additional_nodes.append(master_node)
3223 vf_node_info.append(self.all_node_info[master_node])
3224 # Add the first vm_capable node we find which is not included,
3225 # excluding the master node (which we already have)
3226 for node in absent_nodes:
3227 nodeinfo = self.all_node_info[node]
3228 if (nodeinfo.vm_capable and not nodeinfo.offline and
3229 node != master_node):
3230 additional_nodes.append(node)
3231 vf_node_info.append(self.all_node_info[node])
3233 key = constants.NV_FILELIST
3234 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3235 {key: node_verify_param[key]},
3236 self.cfg.GetClusterName()))
3238 vf_nvinfo = all_nvinfo
3239 vf_node_info = self.my_node_info.values()
3241 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3243 feedback_fn("* Verifying node status")
3247 for node_i in node_data_list:
3249 nimg = node_image[node]
3253 feedback_fn("* Skipping offline node %s" % (node,))
3257 if node == master_node:
3259 elif node_i.master_candidate:
3260 ntype = "master candidate"
3261 elif node_i.drained:
3267 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3269 msg = all_nvinfo[node].fail_msg
3270 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3273 nimg.rpc_fail = True
3276 nresult = all_nvinfo[node].payload
3278 nimg.call_ok = self._VerifyNode(node_i, nresult)
3279 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3280 self._VerifyNodeNetwork(node_i, nresult)
3281 self._VerifyNodeUserScripts(node_i, nresult)
3282 self._VerifyOob(node_i, nresult)
3285 self._VerifyNodeLVM(node_i, nresult, vg_name)
3286 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3289 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3290 self._UpdateNodeInstances(node_i, nresult, nimg)
3291 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3292 self._UpdateNodeOS(node_i, nresult, nimg)
3294 if not nimg.os_fail:
3295 if refos_img is None:
3297 self._VerifyNodeOS(node_i, nimg, refos_img)
3298 self._VerifyNodeBridges(node_i, nresult, bridges)
3300 # Check whether all running instancies are primary for the node. (This
3301 # can no longer be done from _VerifyInstance below, since some of the
3302 # wrong instances could be from other node groups.)
3303 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3305 for inst in non_primary_inst:
3306 test = inst in self.all_inst_info
3307 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3308 "instance should not run on node %s", node_i.name)
3309 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3310 "node is running unknown instance %s", inst)
3312 for node, result in extra_lv_nvinfo.items():
3313 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3314 node_image[node], vg_name)
3316 feedback_fn("* Verifying instance status")
3317 for instance in self.my_inst_names:
3319 feedback_fn("* Verifying instance %s" % instance)
3320 inst_config = self.my_inst_info[instance]
3321 self._VerifyInstance(instance, inst_config, node_image,
3323 inst_nodes_offline = []
3325 pnode = inst_config.primary_node
3326 pnode_img = node_image[pnode]
3327 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3328 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3329 " primary node failed", instance)
3331 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3333 constants.CV_EINSTANCEBADNODE, instance,
3334 "instance is marked as running and lives on offline node %s",
3335 inst_config.primary_node)
3337 # If the instance is non-redundant we cannot survive losing its primary
3338 # node, so we are not N+1 compliant. On the other hand we have no disk
3339 # templates with more than one secondary so that situation is not well
3341 # FIXME: does not support file-backed instances
3342 if not inst_config.secondary_nodes:
3343 i_non_redundant.append(instance)
3345 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3346 constants.CV_EINSTANCELAYOUT,
3347 instance, "instance has multiple secondary nodes: %s",
3348 utils.CommaJoin(inst_config.secondary_nodes),
3349 code=self.ETYPE_WARNING)
3351 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3352 pnode = inst_config.primary_node
3353 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3354 instance_groups = {}
3356 for node in instance_nodes:
3357 instance_groups.setdefault(self.all_node_info[node].group,
3361 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3362 # Sort so that we always list the primary node first.
3363 for group, nodes in sorted(instance_groups.items(),
3364 key=lambda (_, nodes): pnode in nodes,
3367 self._ErrorIf(len(instance_groups) > 1,
3368 constants.CV_EINSTANCESPLITGROUPS,
3369 instance, "instance has primary and secondary nodes in"
3370 " different groups: %s", utils.CommaJoin(pretty_list),
3371 code=self.ETYPE_WARNING)
3373 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3374 i_non_a_balanced.append(instance)
3376 for snode in inst_config.secondary_nodes:
3377 s_img = node_image[snode]
3378 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3379 snode, "instance %s, connection to secondary node failed",
3383 inst_nodes_offline.append(snode)
3385 # warn that the instance lives on offline nodes
3386 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3387 "instance has offline secondary node(s) %s",
3388 utils.CommaJoin(inst_nodes_offline))
3389 # ... or ghost/non-vm_capable nodes
3390 for node in inst_config.all_nodes:
3391 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3392 instance, "instance lives on ghost node %s", node)
3393 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3394 instance, "instance lives on non-vm_capable node %s", node)
3396 feedback_fn("* Verifying orphan volumes")
3397 reserved = utils.FieldSet(*cluster.reserved_lvs)
3399 # We will get spurious "unknown volume" warnings if any node of this group
3400 # is secondary for an instance whose primary is in another group. To avoid
3401 # them, we find these instances and add their volumes to node_vol_should.
3402 for inst in self.all_inst_info.values():
3403 for secondary in inst.secondary_nodes:
3404 if (secondary in self.my_node_info
3405 and inst.name not in self.my_inst_info):
3406 inst.MapLVsByNode(node_vol_should)
3409 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3411 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3412 feedback_fn("* Verifying N+1 Memory redundancy")
3413 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3415 feedback_fn("* Other Notes")
3417 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3418 % len(i_non_redundant))
3420 if i_non_a_balanced:
3421 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3422 % len(i_non_a_balanced))
3425 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3428 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3431 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3435 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3436 """Analyze the post-hooks' result
3438 This method analyses the hook result, handles it, and sends some
3439 nicely-formatted feedback back to the user.
3441 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3442 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3443 @param hooks_results: the results of the multi-node hooks rpc call
3444 @param feedback_fn: function used send feedback back to the caller
3445 @param lu_result: previous Exec result
3446 @return: the new Exec result, based on the previous result
3450 # We only really run POST phase hooks, only for non-empty groups,
3451 # and are only interested in their results
3452 if not self.my_node_names:
3455 elif phase == constants.HOOKS_PHASE_POST:
3456 # Used to change hooks' output to proper indentation
3457 feedback_fn("* Hooks Results")
3458 assert hooks_results, "invalid result from hooks"
3460 for node_name in hooks_results:
3461 res = hooks_results[node_name]
3463 test = msg and not res.offline
3464 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3465 "Communication failure in hooks execution: %s", msg)
3466 if res.offline or msg:
3467 # No need to investigate payload if node is offline or gave
3470 for script, hkr, output in res.payload:
3471 test = hkr == constants.HKR_FAIL
3472 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3473 "Script %s failed, output:", script)
3475 output = self._HOOKS_INDENT_RE.sub(" ", output)
3476 feedback_fn("%s" % output)
3482 class LUClusterVerifyDisks(NoHooksLU):
3483 """Verifies the cluster disks status.
3488 def ExpandNames(self):
3489 self.share_locks = _ShareAll()
3490 self.needed_locks = {
3491 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3494 def Exec(self, feedback_fn):
3495 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3497 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3498 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3499 for group in group_names])
3502 class LUGroupVerifyDisks(NoHooksLU):
3503 """Verifies the status of all disks in a node group.
3508 def ExpandNames(self):
3509 # Raises errors.OpPrereqError on its own if group can't be found
3510 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3512 self.share_locks = _ShareAll()
3513 self.needed_locks = {
3514 locking.LEVEL_INSTANCE: [],
3515 locking.LEVEL_NODEGROUP: [],
3516 locking.LEVEL_NODE: [],
3519 def DeclareLocks(self, level):
3520 if level == locking.LEVEL_INSTANCE:
3521 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3523 # Lock instances optimistically, needs verification once node and group
3524 # locks have been acquired
3525 self.needed_locks[locking.LEVEL_INSTANCE] = \
3526 self.cfg.GetNodeGroupInstances(self.group_uuid)
3528 elif level == locking.LEVEL_NODEGROUP:
3529 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3531 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3532 set([self.group_uuid] +
3533 # Lock all groups used by instances optimistically; this requires
3534 # going via the node before it's locked, requiring verification
3537 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3538 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3540 elif level == locking.LEVEL_NODE:
3541 # This will only lock the nodes in the group to be verified which contain
3543 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3544 self._LockInstancesNodes()
3546 # Lock all nodes in group to be verified
3547 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3548 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3549 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3551 def CheckPrereq(self):
3552 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3553 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3554 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3556 assert self.group_uuid in owned_groups
3558 # Check if locked instances are still correct
3559 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3561 # Get instance information
3562 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3564 # Check if node groups for locked instances are still correct
3565 _CheckInstancesNodeGroups(self.cfg, self.instances,
3566 owned_groups, owned_nodes, self.group_uuid)
3568 def Exec(self, feedback_fn):
3569 """Verify integrity of cluster disks.
3571 @rtype: tuple of three items
3572 @return: a tuple of (dict of node-to-node_error, list of instances
3573 which need activate-disks, dict of instance: (node, volume) for
3578 res_instances = set()
3581 nv_dict = _MapInstanceDisksToNodes(
3582 [inst for inst in self.instances.values()
3583 if inst.admin_state == constants.ADMINST_UP])
3586 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3587 set(self.cfg.GetVmCapableNodeList()))
3589 node_lvs = self.rpc.call_lv_list(nodes, [])
3591 for (node, node_res) in node_lvs.items():
3592 if node_res.offline:
3595 msg = node_res.fail_msg
3597 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3598 res_nodes[node] = msg
3601 for lv_name, (_, _, lv_online) in node_res.payload.items():
3602 inst = nv_dict.pop((node, lv_name), None)
3603 if not (lv_online or inst is None):
3604 res_instances.add(inst)
3606 # any leftover items in nv_dict are missing LVs, let's arrange the data
3608 for key, inst in nv_dict.iteritems():
3609 res_missing.setdefault(inst, []).append(list(key))
3611 return (res_nodes, list(res_instances), res_missing)
3614 class LUClusterRepairDiskSizes(NoHooksLU):
3615 """Verifies the cluster disks sizes.
3620 def ExpandNames(self):
3621 if self.op.instances:
3622 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3623 self.needed_locks = {
3624 locking.LEVEL_NODE_RES: [],
3625 locking.LEVEL_INSTANCE: self.wanted_names,
3627 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3629 self.wanted_names = None
3630 self.needed_locks = {
3631 locking.LEVEL_NODE_RES: locking.ALL_SET,
3632 locking.LEVEL_INSTANCE: locking.ALL_SET,
3634 self.share_locks = {
3635 locking.LEVEL_NODE_RES: 1,
3636 locking.LEVEL_INSTANCE: 0,
3639 def DeclareLocks(self, level):
3640 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3641 self._LockInstancesNodes(primary_only=True, level=level)
3643 def CheckPrereq(self):
3644 """Check prerequisites.
3646 This only checks the optional instance list against the existing names.
3649 if self.wanted_names is None:
3650 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3652 self.wanted_instances = \
3653 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3655 def _EnsureChildSizes(self, disk):
3656 """Ensure children of the disk have the needed disk size.
3658 This is valid mainly for DRBD8 and fixes an issue where the
3659 children have smaller disk size.
3661 @param disk: an L{ganeti.objects.Disk} object
3664 if disk.dev_type == constants.LD_DRBD8:
3665 assert disk.children, "Empty children for DRBD8?"
3666 fchild = disk.children[0]
3667 mismatch = fchild.size < disk.size
3669 self.LogInfo("Child disk has size %d, parent %d, fixing",
3670 fchild.size, disk.size)
3671 fchild.size = disk.size
3673 # and we recurse on this child only, not on the metadev
3674 return self._EnsureChildSizes(fchild) or mismatch
3678 def Exec(self, feedback_fn):
3679 """Verify the size of cluster disks.
3682 # TODO: check child disks too
3683 # TODO: check differences in size between primary/secondary nodes
3685 for instance in self.wanted_instances:
3686 pnode = instance.primary_node
3687 if pnode not in per_node_disks:
3688 per_node_disks[pnode] = []
3689 for idx, disk in enumerate(instance.disks):
3690 per_node_disks[pnode].append((instance, idx, disk))
3692 assert not (frozenset(per_node_disks.keys()) -
3693 self.owned_locks(locking.LEVEL_NODE_RES)), \
3694 "Not owning correct locks"
3695 assert not self.owned_locks(locking.LEVEL_NODE)
3698 for node, dskl in per_node_disks.items():
3699 newl = [v[2].Copy() for v in dskl]
3701 self.cfg.SetDiskID(dsk, node)
3702 result = self.rpc.call_blockdev_getsize(node, newl)
3704 self.LogWarning("Failure in blockdev_getsize call to node"
3705 " %s, ignoring", node)
3707 if len(result.payload) != len(dskl):
3708 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3709 " result.payload=%s", node, len(dskl), result.payload)
3710 self.LogWarning("Invalid result from node %s, ignoring node results",
3713 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3715 self.LogWarning("Disk %d of instance %s did not return size"
3716 " information, ignoring", idx, instance.name)
3718 if not isinstance(size, (int, long)):
3719 self.LogWarning("Disk %d of instance %s did not return valid"
3720 " size information, ignoring", idx, instance.name)
3723 if size != disk.size:
3724 self.LogInfo("Disk %d of instance %s has mismatched size,"
3725 " correcting: recorded %d, actual %d", idx,
3726 instance.name, disk.size, size)
3728 self.cfg.Update(instance, feedback_fn)
3729 changed.append((instance.name, idx, size))
3730 if self._EnsureChildSizes(disk):
3731 self.cfg.Update(instance, feedback_fn)
3732 changed.append((instance.name, idx, disk.size))
3736 class LUClusterRename(LogicalUnit):
3737 """Rename the cluster.
3740 HPATH = "cluster-rename"
3741 HTYPE = constants.HTYPE_CLUSTER
3743 def BuildHooksEnv(self):
3748 "OP_TARGET": self.cfg.GetClusterName(),
3749 "NEW_NAME": self.op.name,
3752 def BuildHooksNodes(self):
3753 """Build hooks nodes.
3756 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3758 def CheckPrereq(self):
3759 """Verify that the passed name is a valid one.
3762 hostname = netutils.GetHostname(name=self.op.name,
3763 family=self.cfg.GetPrimaryIPFamily())
3765 new_name = hostname.name
3766 self.ip = new_ip = hostname.ip
3767 old_name = self.cfg.GetClusterName()
3768 old_ip = self.cfg.GetMasterIP()
3769 if new_name == old_name and new_ip == old_ip:
3770 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3771 " cluster has changed",
3773 if new_ip != old_ip:
3774 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3775 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3776 " reachable on the network" %
3777 new_ip, errors.ECODE_NOTUNIQUE)
3779 self.op.name = new_name
3781 def Exec(self, feedback_fn):
3782 """Rename the cluster.
3785 clustername = self.op.name
3788 # shutdown the master IP
3789 master_params = self.cfg.GetMasterNetworkParameters()
3790 ems = self.cfg.GetUseExternalMipScript()
3791 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3793 result.Raise("Could not disable the master role")
3796 cluster = self.cfg.GetClusterInfo()
3797 cluster.cluster_name = clustername
3798 cluster.master_ip = new_ip
3799 self.cfg.Update(cluster, feedback_fn)
3801 # update the known hosts file
3802 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3803 node_list = self.cfg.GetOnlineNodeList()
3805 node_list.remove(master_params.name)
3808 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3810 master_params.ip = new_ip
3811 result = self.rpc.call_node_activate_master_ip(master_params.name,
3813 msg = result.fail_msg
3815 self.LogWarning("Could not re-enable the master role on"
3816 " the master, please restart manually: %s", msg)
3821 def _ValidateNetmask(cfg, netmask):
3822 """Checks if a netmask is valid.
3824 @type cfg: L{config.ConfigWriter}
3825 @param cfg: The cluster configuration
3827 @param netmask: the netmask to be verified
3828 @raise errors.OpPrereqError: if the validation fails
3831 ip_family = cfg.GetPrimaryIPFamily()
3833 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3834 except errors.ProgrammerError:
3835 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3836 ip_family, errors.ECODE_INVAL)
3837 if not ipcls.ValidateNetmask(netmask):
3838 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3839 (netmask), errors.ECODE_INVAL)
3842 class LUClusterSetParams(LogicalUnit):
3843 """Change the parameters of the cluster.
3846 HPATH = "cluster-modify"
3847 HTYPE = constants.HTYPE_CLUSTER
3850 def CheckArguments(self):
3854 if self.op.uid_pool:
3855 uidpool.CheckUidPool(self.op.uid_pool)
3857 if self.op.add_uids:
3858 uidpool.CheckUidPool(self.op.add_uids)
3860 if self.op.remove_uids:
3861 uidpool.CheckUidPool(self.op.remove_uids)
3863 if self.op.master_netmask is not None:
3864 _ValidateNetmask(self.cfg, self.op.master_netmask)
3866 if self.op.diskparams:
3867 for dt_params in self.op.diskparams.values():
3868 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3870 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3871 except errors.OpPrereqError, err:
3872 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3875 def ExpandNames(self):
3876 # FIXME: in the future maybe other cluster params won't require checking on
3877 # all nodes to be modified.
3878 self.needed_locks = {
3879 locking.LEVEL_NODE: locking.ALL_SET,
3880 locking.LEVEL_INSTANCE: locking.ALL_SET,
3881 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3883 self.share_locks = {
3884 locking.LEVEL_NODE: 1,
3885 locking.LEVEL_INSTANCE: 1,
3886 locking.LEVEL_NODEGROUP: 1,
3889 def BuildHooksEnv(self):
3894 "OP_TARGET": self.cfg.GetClusterName(),
3895 "NEW_VG_NAME": self.op.vg_name,
3898 def BuildHooksNodes(self):
3899 """Build hooks nodes.
3902 mn = self.cfg.GetMasterNode()
3905 def CheckPrereq(self):
3906 """Check prerequisites.
3908 This checks whether the given params don't conflict and
3909 if the given volume group is valid.
3912 if self.op.vg_name is not None and not self.op.vg_name:
3913 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3914 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3915 " instances exist", errors.ECODE_INVAL)
3917 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3918 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3919 raise errors.OpPrereqError("Cannot disable drbd helper while"
3920 " drbd-based instances exist",
3923 node_list = self.owned_locks(locking.LEVEL_NODE)
3925 # if vg_name not None, checks given volume group on all nodes
3927 vglist = self.rpc.call_vg_list(node_list)
3928 for node in node_list:
3929 msg = vglist[node].fail_msg
3931 # ignoring down node
3932 self.LogWarning("Error while gathering data on node %s"
3933 " (ignoring node): %s", node, msg)
3935 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3937 constants.MIN_VG_SIZE)
3939 raise errors.OpPrereqError("Error on node '%s': %s" %
3940 (node, vgstatus), errors.ECODE_ENVIRON)
3942 if self.op.drbd_helper:
3943 # checks given drbd helper on all nodes
3944 helpers = self.rpc.call_drbd_helper(node_list)
3945 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3947 self.LogInfo("Not checking drbd helper on offline node %s", node)
3949 msg = helpers[node].fail_msg
3951 raise errors.OpPrereqError("Error checking drbd helper on node"
3952 " '%s': %s" % (node, msg),
3953 errors.ECODE_ENVIRON)
3954 node_helper = helpers[node].payload
3955 if node_helper != self.op.drbd_helper:
3956 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3957 (node, node_helper), errors.ECODE_ENVIRON)
3959 self.cluster = cluster = self.cfg.GetClusterInfo()
3960 # validate params changes
3961 if self.op.beparams:
3962 objects.UpgradeBeParams(self.op.beparams)
3963 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3964 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3966 if self.op.ndparams:
3967 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3968 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3970 # TODO: we need a more general way to handle resetting
3971 # cluster-level parameters to default values
3972 if self.new_ndparams["oob_program"] == "":
3973 self.new_ndparams["oob_program"] = \
3974 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3976 if self.op.hv_state:
3977 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3978 self.cluster.hv_state_static)
3979 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3980 for hv, values in new_hv_state.items())
3982 if self.op.disk_state:
3983 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3984 self.cluster.disk_state_static)
3985 self.new_disk_state = \
3986 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3987 for name, values in svalues.items()))
3988 for storage, svalues in new_disk_state.items())
3991 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3994 all_instances = self.cfg.GetAllInstancesInfo().values()
3996 for group in self.cfg.GetAllNodeGroupsInfo().values():
3997 instances = frozenset([inst for inst in all_instances
3998 if compat.any(node in group.members
3999 for node in inst.all_nodes)])
4000 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4001 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4003 new_ipolicy, instances)
4005 violations.update(new)
4008 self.LogWarning("After the ipolicy change the following instances"
4009 " violate them: %s",
4010 utils.CommaJoin(utils.NiceSort(violations)))
4012 if self.op.nicparams:
4013 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4014 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4015 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4018 # check all instances for consistency
4019 for instance in self.cfg.GetAllInstancesInfo().values():
4020 for nic_idx, nic in enumerate(instance.nics):
4021 params_copy = copy.deepcopy(nic.nicparams)
4022 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4024 # check parameter syntax
4026 objects.NIC.CheckParameterSyntax(params_filled)
4027 except errors.ConfigurationError, err:
4028 nic_errors.append("Instance %s, nic/%d: %s" %
4029 (instance.name, nic_idx, err))
4031 # if we're moving instances to routed, check that they have an ip
4032 target_mode = params_filled[constants.NIC_MODE]
4033 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4034 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4035 " address" % (instance.name, nic_idx))
4037 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4038 "\n".join(nic_errors), errors.ECODE_INVAL)
4040 # hypervisor list/parameters
4041 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4042 if self.op.hvparams:
4043 for hv_name, hv_dict in self.op.hvparams.items():
4044 if hv_name not in self.new_hvparams:
4045 self.new_hvparams[hv_name] = hv_dict
4047 self.new_hvparams[hv_name].update(hv_dict)
4049 # disk template parameters
4050 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4051 if self.op.diskparams:
4052 for dt_name, dt_params in self.op.diskparams.items():
4053 if dt_name not in self.op.diskparams:
4054 self.new_diskparams[dt_name] = dt_params
4056 self.new_diskparams[dt_name].update(dt_params)
4058 # os hypervisor parameters
4059 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4061 for os_name, hvs in self.op.os_hvp.items():
4062 if os_name not in self.new_os_hvp:
4063 self.new_os_hvp[os_name] = hvs
4065 for hv_name, hv_dict in hvs.items():
4066 if hv_name not in self.new_os_hvp[os_name]:
4067 self.new_os_hvp[os_name][hv_name] = hv_dict
4069 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4072 self.new_osp = objects.FillDict(cluster.osparams, {})
4073 if self.op.osparams:
4074 for os_name, osp in self.op.osparams.items():
4075 if os_name not in self.new_osp:
4076 self.new_osp[os_name] = {}
4078 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4081 if not self.new_osp[os_name]:
4082 # we removed all parameters
4083 del self.new_osp[os_name]
4085 # check the parameter validity (remote check)
4086 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4087 os_name, self.new_osp[os_name])
4089 # changes to the hypervisor list
4090 if self.op.enabled_hypervisors is not None:
4091 self.hv_list = self.op.enabled_hypervisors
4092 for hv in self.hv_list:
4093 # if the hypervisor doesn't already exist in the cluster
4094 # hvparams, we initialize it to empty, and then (in both
4095 # cases) we make sure to fill the defaults, as we might not
4096 # have a complete defaults list if the hypervisor wasn't
4098 if hv not in new_hvp:
4100 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4101 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4103 self.hv_list = cluster.enabled_hypervisors
4105 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4106 # either the enabled list has changed, or the parameters have, validate
4107 for hv_name, hv_params in self.new_hvparams.items():
4108 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4109 (self.op.enabled_hypervisors and
4110 hv_name in self.op.enabled_hypervisors)):
4111 # either this is a new hypervisor, or its parameters have changed
4112 hv_class = hypervisor.GetHypervisor(hv_name)
4113 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4114 hv_class.CheckParameterSyntax(hv_params)
4115 _CheckHVParams(self, node_list, hv_name, hv_params)
4118 # no need to check any newly-enabled hypervisors, since the
4119 # defaults have already been checked in the above code-block
4120 for os_name, os_hvp in self.new_os_hvp.items():
4121 for hv_name, hv_params in os_hvp.items():
4122 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4123 # we need to fill in the new os_hvp on top of the actual hv_p
4124 cluster_defaults = self.new_hvparams.get(hv_name, {})
4125 new_osp = objects.FillDict(cluster_defaults, hv_params)
4126 hv_class = hypervisor.GetHypervisor(hv_name)
4127 hv_class.CheckParameterSyntax(new_osp)
4128 _CheckHVParams(self, node_list, hv_name, new_osp)
4130 if self.op.default_iallocator:
4131 alloc_script = utils.FindFile(self.op.default_iallocator,
4132 constants.IALLOCATOR_SEARCH_PATH,
4134 if alloc_script is None:
4135 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4136 " specified" % self.op.default_iallocator,
4139 def Exec(self, feedback_fn):
4140 """Change the parameters of the cluster.
4143 if self.op.vg_name is not None:
4144 new_volume = self.op.vg_name
4147 if new_volume != self.cfg.GetVGName():
4148 self.cfg.SetVGName(new_volume)
4150 feedback_fn("Cluster LVM configuration already in desired"
4151 " state, not changing")
4152 if self.op.drbd_helper is not None:
4153 new_helper = self.op.drbd_helper
4156 if new_helper != self.cfg.GetDRBDHelper():
4157 self.cfg.SetDRBDHelper(new_helper)
4159 feedback_fn("Cluster DRBD helper already in desired state,"
4161 if self.op.hvparams:
4162 self.cluster.hvparams = self.new_hvparams
4164 self.cluster.os_hvp = self.new_os_hvp
4165 if self.op.enabled_hypervisors is not None:
4166 self.cluster.hvparams = self.new_hvparams
4167 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4168 if self.op.beparams:
4169 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4170 if self.op.nicparams:
4171 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4173 self.cluster.ipolicy = self.new_ipolicy
4174 if self.op.osparams:
4175 self.cluster.osparams = self.new_osp
4176 if self.op.ndparams:
4177 self.cluster.ndparams = self.new_ndparams
4178 if self.op.diskparams:
4179 self.cluster.diskparams = self.new_diskparams
4180 if self.op.hv_state:
4181 self.cluster.hv_state_static = self.new_hv_state
4182 if self.op.disk_state:
4183 self.cluster.disk_state_static = self.new_disk_state
4185 if self.op.candidate_pool_size is not None:
4186 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4187 # we need to update the pool size here, otherwise the save will fail
4188 _AdjustCandidatePool(self, [])
4190 if self.op.maintain_node_health is not None:
4191 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4192 feedback_fn("Note: CONFD was disabled at build time, node health"
4193 " maintenance is not useful (still enabling it)")
4194 self.cluster.maintain_node_health = self.op.maintain_node_health
4196 if self.op.prealloc_wipe_disks is not None:
4197 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4199 if self.op.add_uids is not None:
4200 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4202 if self.op.remove_uids is not None:
4203 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4205 if self.op.uid_pool is not None:
4206 self.cluster.uid_pool = self.op.uid_pool
4208 if self.op.default_iallocator is not None:
4209 self.cluster.default_iallocator = self.op.default_iallocator
4211 if self.op.reserved_lvs is not None:
4212 self.cluster.reserved_lvs = self.op.reserved_lvs
4214 if self.op.use_external_mip_script is not None:
4215 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4217 def helper_os(aname, mods, desc):
4219 lst = getattr(self.cluster, aname)
4220 for key, val in mods:
4221 if key == constants.DDM_ADD:
4223 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4226 elif key == constants.DDM_REMOVE:
4230 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4232 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4234 if self.op.hidden_os:
4235 helper_os("hidden_os", self.op.hidden_os, "hidden")
4237 if self.op.blacklisted_os:
4238 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4240 if self.op.master_netdev:
4241 master_params = self.cfg.GetMasterNetworkParameters()
4242 ems = self.cfg.GetUseExternalMipScript()
4243 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4244 self.cluster.master_netdev)
4245 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4247 result.Raise("Could not disable the master ip")
4248 feedback_fn("Changing master_netdev from %s to %s" %
4249 (master_params.netdev, self.op.master_netdev))
4250 self.cluster.master_netdev = self.op.master_netdev
4252 if self.op.master_netmask:
4253 master_params = self.cfg.GetMasterNetworkParameters()
4254 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4255 result = self.rpc.call_node_change_master_netmask(master_params.name,
4256 master_params.netmask,
4257 self.op.master_netmask,
4259 master_params.netdev)
4261 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4264 self.cluster.master_netmask = self.op.master_netmask
4266 self.cfg.Update(self.cluster, feedback_fn)
4268 if self.op.master_netdev:
4269 master_params = self.cfg.GetMasterNetworkParameters()
4270 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4271 self.op.master_netdev)
4272 ems = self.cfg.GetUseExternalMipScript()
4273 result = self.rpc.call_node_activate_master_ip(master_params.name,
4276 self.LogWarning("Could not re-enable the master ip on"
4277 " the master, please restart manually: %s",
4281 def _UploadHelper(lu, nodes, fname):
4282 """Helper for uploading a file and showing warnings.
4285 if os.path.exists(fname):
4286 result = lu.rpc.call_upload_file(nodes, fname)
4287 for to_node, to_result in result.items():
4288 msg = to_result.fail_msg
4290 msg = ("Copy of file %s to node %s failed: %s" %
4291 (fname, to_node, msg))
4292 lu.proc.LogWarning(msg)
4295 def _ComputeAncillaryFiles(cluster, redist):
4296 """Compute files external to Ganeti which need to be consistent.
4298 @type redist: boolean
4299 @param redist: Whether to include files which need to be redistributed
4302 # Compute files for all nodes
4304 constants.SSH_KNOWN_HOSTS_FILE,
4305 constants.CONFD_HMAC_KEY,
4306 constants.CLUSTER_DOMAIN_SECRET_FILE,
4307 constants.SPICE_CERT_FILE,
4308 constants.SPICE_CACERT_FILE,
4309 constants.RAPI_USERS_FILE,
4313 files_all.update(constants.ALL_CERT_FILES)
4314 files_all.update(ssconf.SimpleStore().GetFileList())
4316 # we need to ship at least the RAPI certificate
4317 files_all.add(constants.RAPI_CERT_FILE)
4319 if cluster.modify_etc_hosts:
4320 files_all.add(constants.ETC_HOSTS)
4322 if cluster.use_external_mip_script:
4323 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4325 # Files which are optional, these must:
4326 # - be present in one other category as well
4327 # - either exist or not exist on all nodes of that category (mc, vm all)
4329 constants.RAPI_USERS_FILE,
4332 # Files which should only be on master candidates
4336 files_mc.add(constants.CLUSTER_CONF_FILE)
4338 # Files which should only be on VM-capable nodes
4341 for hv_name in cluster.enabled_hypervisors
4342 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4346 for hv_name in cluster.enabled_hypervisors
4347 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4349 # Filenames in each category must be unique
4350 all_files_set = files_all | files_mc | files_vm
4351 assert (len(all_files_set) ==
4352 sum(map(len, [files_all, files_mc, files_vm]))), \
4353 "Found file listed in more than one file list"
4355 # Optional files must be present in one other category
4356 assert all_files_set.issuperset(files_opt), \
4357 "Optional file not in a different required list"
4359 return (files_all, files_opt, files_mc, files_vm)
4362 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4363 """Distribute additional files which are part of the cluster configuration.
4365 ConfigWriter takes care of distributing the config and ssconf files, but
4366 there are more files which should be distributed to all nodes. This function
4367 makes sure those are copied.
4369 @param lu: calling logical unit
4370 @param additional_nodes: list of nodes not in the config to distribute to
4371 @type additional_vm: boolean
4372 @param additional_vm: whether the additional nodes are vm-capable or not
4375 # Gather target nodes
4376 cluster = lu.cfg.GetClusterInfo()
4377 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4379 online_nodes = lu.cfg.GetOnlineNodeList()
4380 online_set = frozenset(online_nodes)
4381 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4383 if additional_nodes is not None:
4384 online_nodes.extend(additional_nodes)
4386 vm_nodes.extend(additional_nodes)
4388 # Never distribute to master node
4389 for nodelist in [online_nodes, vm_nodes]:
4390 if master_info.name in nodelist:
4391 nodelist.remove(master_info.name)
4394 (files_all, _, files_mc, files_vm) = \
4395 _ComputeAncillaryFiles(cluster, True)
4397 # Never re-distribute configuration file from here
4398 assert not (constants.CLUSTER_CONF_FILE in files_all or
4399 constants.CLUSTER_CONF_FILE in files_vm)
4400 assert not files_mc, "Master candidates not handled in this function"
4403 (online_nodes, files_all),
4404 (vm_nodes, files_vm),
4408 for (node_list, files) in filemap:
4410 _UploadHelper(lu, node_list, fname)
4413 class LUClusterRedistConf(NoHooksLU):
4414 """Force the redistribution of cluster configuration.
4416 This is a very simple LU.
4421 def ExpandNames(self):
4422 self.needed_locks = {
4423 locking.LEVEL_NODE: locking.ALL_SET,
4425 self.share_locks[locking.LEVEL_NODE] = 1
4427 def Exec(self, feedback_fn):
4428 """Redistribute the configuration.
4431 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4432 _RedistributeAncillaryFiles(self)
4435 class LUClusterActivateMasterIp(NoHooksLU):
4436 """Activate the master IP on the master node.
4439 def Exec(self, feedback_fn):
4440 """Activate the master IP.
4443 master_params = self.cfg.GetMasterNetworkParameters()
4444 ems = self.cfg.GetUseExternalMipScript()
4445 result = self.rpc.call_node_activate_master_ip(master_params.name,
4447 result.Raise("Could not activate the master IP")
4450 class LUClusterDeactivateMasterIp(NoHooksLU):
4451 """Deactivate the master IP on the master node.
4454 def Exec(self, feedback_fn):
4455 """Deactivate the master IP.
4458 master_params = self.cfg.GetMasterNetworkParameters()
4459 ems = self.cfg.GetUseExternalMipScript()
4460 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4462 result.Raise("Could not deactivate the master IP")
4465 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4466 """Sleep and poll for an instance's disk to sync.
4469 if not instance.disks or disks is not None and not disks:
4472 disks = _ExpandCheckDisks(instance, disks)
4475 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4477 node = instance.primary_node
4480 lu.cfg.SetDiskID(dev, node)
4482 # TODO: Convert to utils.Retry
4485 degr_retries = 10 # in seconds, as we sleep 1 second each time
4489 cumul_degraded = False
4490 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4491 msg = rstats.fail_msg
4493 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4496 raise errors.RemoteError("Can't contact node %s for mirror data,"
4497 " aborting." % node)
4500 rstats = rstats.payload
4502 for i, mstat in enumerate(rstats):
4504 lu.LogWarning("Can't compute data for node %s/%s",
4505 node, disks[i].iv_name)
4508 cumul_degraded = (cumul_degraded or
4509 (mstat.is_degraded and mstat.sync_percent is None))
4510 if mstat.sync_percent is not None:
4512 if mstat.estimated_time is not None:
4513 rem_time = ("%s remaining (estimated)" %
4514 utils.FormatSeconds(mstat.estimated_time))
4515 max_time = mstat.estimated_time
4517 rem_time = "no time estimate"
4518 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4519 (disks[i].iv_name, mstat.sync_percent, rem_time))
4521 # if we're done but degraded, let's do a few small retries, to
4522 # make sure we see a stable and not transient situation; therefore
4523 # we force restart of the loop
4524 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4525 logging.info("Degraded disks found, %d retries left", degr_retries)
4533 time.sleep(min(60, max_time))
4536 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4537 return not cumul_degraded
4540 def _BlockdevFind(lu, node, dev, instance):
4541 """Wrapper around call_blockdev_find to annotate diskparams.
4543 @param lu: A reference to the lu object
4544 @param node: The node to call out
4545 @param dev: The device to find
4546 @param instance: The instance object the device belongs to
4547 @returns The result of the rpc call
4550 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4551 return lu.rpc.call_blockdev_find(node, disk)
4554 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4555 """Wrapper around L{_CheckDiskConsistencyInner}.
4558 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4559 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4563 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4565 """Check that mirrors are not degraded.
4567 @attention: The device has to be annotated already.
4569 The ldisk parameter, if True, will change the test from the
4570 is_degraded attribute (which represents overall non-ok status for
4571 the device(s)) to the ldisk (representing the local storage status).
4574 lu.cfg.SetDiskID(dev, node)
4578 if on_primary or dev.AssembleOnSecondary():
4579 rstats = lu.rpc.call_blockdev_find(node, dev)
4580 msg = rstats.fail_msg
4582 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4584 elif not rstats.payload:
4585 lu.LogWarning("Can't find disk on node %s", node)
4589 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4591 result = result and not rstats.payload.is_degraded
4594 for child in dev.children:
4595 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4601 class LUOobCommand(NoHooksLU):
4602 """Logical unit for OOB handling.
4606 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4608 def ExpandNames(self):
4609 """Gather locks we need.
4612 if self.op.node_names:
4613 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4614 lock_names = self.op.node_names
4616 lock_names = locking.ALL_SET
4618 self.needed_locks = {
4619 locking.LEVEL_NODE: lock_names,
4622 def CheckPrereq(self):
4623 """Check prerequisites.
4626 - the node exists in the configuration
4629 Any errors are signaled by raising errors.OpPrereqError.
4633 self.master_node = self.cfg.GetMasterNode()
4635 assert self.op.power_delay >= 0.0
4637 if self.op.node_names:
4638 if (self.op.command in self._SKIP_MASTER and
4639 self.master_node in self.op.node_names):
4640 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4641 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4643 if master_oob_handler:
4644 additional_text = ("run '%s %s %s' if you want to operate on the"
4645 " master regardless") % (master_oob_handler,
4649 additional_text = "it does not support out-of-band operations"
4651 raise errors.OpPrereqError(("Operating on the master node %s is not"
4652 " allowed for %s; %s") %
4653 (self.master_node, self.op.command,
4654 additional_text), errors.ECODE_INVAL)
4656 self.op.node_names = self.cfg.GetNodeList()
4657 if self.op.command in self._SKIP_MASTER:
4658 self.op.node_names.remove(self.master_node)
4660 if self.op.command in self._SKIP_MASTER:
4661 assert self.master_node not in self.op.node_names
4663 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4665 raise errors.OpPrereqError("Node %s not found" % node_name,
4668 self.nodes.append(node)
4670 if (not self.op.ignore_status and
4671 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4672 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4673 " not marked offline") % node_name,
4676 def Exec(self, feedback_fn):
4677 """Execute OOB and return result if we expect any.
4680 master_node = self.master_node
4683 for idx, node in enumerate(utils.NiceSort(self.nodes,
4684 key=lambda node: node.name)):
4685 node_entry = [(constants.RS_NORMAL, node.name)]
4686 ret.append(node_entry)
4688 oob_program = _SupportsOob(self.cfg, node)
4691 node_entry.append((constants.RS_UNAVAIL, None))
4694 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4695 self.op.command, oob_program, node.name)
4696 result = self.rpc.call_run_oob(master_node, oob_program,
4697 self.op.command, node.name,
4701 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4702 node.name, result.fail_msg)
4703 node_entry.append((constants.RS_NODATA, None))
4706 self._CheckPayload(result)
4707 except errors.OpExecError, err:
4708 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4710 node_entry.append((constants.RS_NODATA, None))
4712 if self.op.command == constants.OOB_HEALTH:
4713 # For health we should log important events
4714 for item, status in result.payload:
4715 if status in [constants.OOB_STATUS_WARNING,
4716 constants.OOB_STATUS_CRITICAL]:
4717 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4718 item, node.name, status)
4720 if self.op.command == constants.OOB_POWER_ON:
4722 elif self.op.command == constants.OOB_POWER_OFF:
4723 node.powered = False
4724 elif self.op.command == constants.OOB_POWER_STATUS:
4725 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4726 if powered != node.powered:
4727 logging.warning(("Recorded power state (%s) of node '%s' does not"
4728 " match actual power state (%s)"), node.powered,
4731 # For configuration changing commands we should update the node
4732 if self.op.command in (constants.OOB_POWER_ON,
4733 constants.OOB_POWER_OFF):
4734 self.cfg.Update(node, feedback_fn)
4736 node_entry.append((constants.RS_NORMAL, result.payload))
4738 if (self.op.command == constants.OOB_POWER_ON and
4739 idx < len(self.nodes) - 1):
4740 time.sleep(self.op.power_delay)
4744 def _CheckPayload(self, result):
4745 """Checks if the payload is valid.
4747 @param result: RPC result
4748 @raises errors.OpExecError: If payload is not valid
4752 if self.op.command == constants.OOB_HEALTH:
4753 if not isinstance(result.payload, list):
4754 errs.append("command 'health' is expected to return a list but got %s" %
4755 type(result.payload))
4757 for item, status in result.payload:
4758 if status not in constants.OOB_STATUSES:
4759 errs.append("health item '%s' has invalid status '%s'" %
4762 if self.op.command == constants.OOB_POWER_STATUS:
4763 if not isinstance(result.payload, dict):
4764 errs.append("power-status is expected to return a dict but got %s" %
4765 type(result.payload))
4767 if self.op.command in [
4768 constants.OOB_POWER_ON,
4769 constants.OOB_POWER_OFF,
4770 constants.OOB_POWER_CYCLE,
4772 if result.payload is not None:
4773 errs.append("%s is expected to not return payload but got '%s'" %
4774 (self.op.command, result.payload))
4777 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4778 utils.CommaJoin(errs))
4781 class _OsQuery(_QueryBase):
4782 FIELDS = query.OS_FIELDS
4784 def ExpandNames(self, lu):
4785 # Lock all nodes in shared mode
4786 # Temporary removal of locks, should be reverted later
4787 # TODO: reintroduce locks when they are lighter-weight
4788 lu.needed_locks = {}
4789 #self.share_locks[locking.LEVEL_NODE] = 1
4790 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4792 # The following variables interact with _QueryBase._GetNames
4794 self.wanted = self.names
4796 self.wanted = locking.ALL_SET
4798 self.do_locking = self.use_locking
4800 def DeclareLocks(self, lu, level):
4804 def _DiagnoseByOS(rlist):
4805 """Remaps a per-node return list into an a per-os per-node dictionary
4807 @param rlist: a map with node names as keys and OS objects as values
4810 @return: a dictionary with osnames as keys and as value another
4811 map, with nodes as keys and tuples of (path, status, diagnose,
4812 variants, parameters, api_versions) as values, eg::
4814 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4815 (/srv/..., False, "invalid api")],
4816 "node2": [(/srv/..., True, "", [], [])]}
4821 # we build here the list of nodes that didn't fail the RPC (at RPC
4822 # level), so that nodes with a non-responding node daemon don't
4823 # make all OSes invalid
4824 good_nodes = [node_name for node_name in rlist
4825 if not rlist[node_name].fail_msg]
4826 for node_name, nr in rlist.items():
4827 if nr.fail_msg or not nr.payload:
4829 for (name, path, status, diagnose, variants,
4830 params, api_versions) in nr.payload:
4831 if name not in all_os:
4832 # build a list of nodes for this os containing empty lists
4833 # for each node in node_list
4835 for nname in good_nodes:
4836 all_os[name][nname] = []
4837 # convert params from [name, help] to (name, help)
4838 params = [tuple(v) for v in params]
4839 all_os[name][node_name].append((path, status, diagnose,
4840 variants, params, api_versions))
4843 def _GetQueryData(self, lu):
4844 """Computes the list of nodes and their attributes.
4847 # Locking is not used
4848 assert not (compat.any(lu.glm.is_owned(level)
4849 for level in locking.LEVELS
4850 if level != locking.LEVEL_CLUSTER) or
4851 self.do_locking or self.use_locking)
4853 valid_nodes = [node.name
4854 for node in lu.cfg.GetAllNodesInfo().values()
4855 if not node.offline and node.vm_capable]
4856 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4857 cluster = lu.cfg.GetClusterInfo()
4861 for (os_name, os_data) in pol.items():
4862 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4863 hidden=(os_name in cluster.hidden_os),
4864 blacklisted=(os_name in cluster.blacklisted_os))
4868 api_versions = set()
4870 for idx, osl in enumerate(os_data.values()):
4871 info.valid = bool(info.valid and osl and osl[0][1])
4875 (node_variants, node_params, node_api) = osl[0][3:6]
4878 variants.update(node_variants)
4879 parameters.update(node_params)
4880 api_versions.update(node_api)
4882 # Filter out inconsistent values
4883 variants.intersection_update(node_variants)
4884 parameters.intersection_update(node_params)
4885 api_versions.intersection_update(node_api)
4887 info.variants = list(variants)
4888 info.parameters = list(parameters)
4889 info.api_versions = list(api_versions)
4891 data[os_name] = info
4893 # Prepare data in requested order
4894 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4898 class LUOsDiagnose(NoHooksLU):
4899 """Logical unit for OS diagnose/query.
4905 def _BuildFilter(fields, names):
4906 """Builds a filter for querying OSes.
4909 name_filter = qlang.MakeSimpleFilter("name", names)
4911 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4912 # respective field is not requested
4913 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4914 for fname in ["hidden", "blacklisted"]
4915 if fname not in fields]
4916 if "valid" not in fields:
4917 status_filter.append([qlang.OP_TRUE, "valid"])
4920 status_filter.insert(0, qlang.OP_AND)
4922 status_filter = None
4924 if name_filter and status_filter:
4925 return [qlang.OP_AND, name_filter, status_filter]
4929 return status_filter
4931 def CheckArguments(self):
4932 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4933 self.op.output_fields, False)
4935 def ExpandNames(self):
4936 self.oq.ExpandNames(self)
4938 def Exec(self, feedback_fn):
4939 return self.oq.OldStyleQuery(self)
4942 class LUNodeRemove(LogicalUnit):
4943 """Logical unit for removing a node.
4946 HPATH = "node-remove"
4947 HTYPE = constants.HTYPE_NODE
4949 def BuildHooksEnv(self):
4954 "OP_TARGET": self.op.node_name,
4955 "NODE_NAME": self.op.node_name,
4958 def BuildHooksNodes(self):
4959 """Build hooks nodes.
4961 This doesn't run on the target node in the pre phase as a failed
4962 node would then be impossible to remove.
4965 all_nodes = self.cfg.GetNodeList()
4967 all_nodes.remove(self.op.node_name)
4970 return (all_nodes, all_nodes)
4972 def CheckPrereq(self):
4973 """Check prerequisites.
4976 - the node exists in the configuration
4977 - it does not have primary or secondary instances
4978 - it's not the master
4980 Any errors are signaled by raising errors.OpPrereqError.
4983 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4984 node = self.cfg.GetNodeInfo(self.op.node_name)
4985 assert node is not None
4987 masternode = self.cfg.GetMasterNode()
4988 if node.name == masternode:
4989 raise errors.OpPrereqError("Node is the master node, failover to another"
4990 " node is required", errors.ECODE_INVAL)
4992 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4993 if node.name in instance.all_nodes:
4994 raise errors.OpPrereqError("Instance %s is still running on the node,"
4995 " please remove first" % instance_name,
4997 self.op.node_name = node.name
5000 def Exec(self, feedback_fn):
5001 """Removes the node from the cluster.
5005 logging.info("Stopping the node daemon and removing configs from node %s",
5008 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5010 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5013 # Promote nodes to master candidate as needed
5014 _AdjustCandidatePool(self, exceptions=[node.name])
5015 self.context.RemoveNode(node.name)
5017 # Run post hooks on the node before it's removed
5018 _RunPostHook(self, node.name)
5020 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5021 msg = result.fail_msg
5023 self.LogWarning("Errors encountered on the remote node while leaving"
5024 " the cluster: %s", msg)
5026 # Remove node from our /etc/hosts
5027 if self.cfg.GetClusterInfo().modify_etc_hosts:
5028 master_node = self.cfg.GetMasterNode()
5029 result = self.rpc.call_etc_hosts_modify(master_node,
5030 constants.ETC_HOSTS_REMOVE,
5032 result.Raise("Can't update hosts file with new host data")
5033 _RedistributeAncillaryFiles(self)
5036 class _NodeQuery(_QueryBase):
5037 FIELDS = query.NODE_FIELDS
5039 def ExpandNames(self, lu):
5040 lu.needed_locks = {}
5041 lu.share_locks = _ShareAll()
5044 self.wanted = _GetWantedNodes(lu, self.names)
5046 self.wanted = locking.ALL_SET
5048 self.do_locking = (self.use_locking and
5049 query.NQ_LIVE in self.requested_data)
5052 # If any non-static field is requested we need to lock the nodes
5053 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5055 def DeclareLocks(self, lu, level):
5058 def _GetQueryData(self, lu):
5059 """Computes the list of nodes and their attributes.
5062 all_info = lu.cfg.GetAllNodesInfo()
5064 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5066 # Gather data as requested
5067 if query.NQ_LIVE in self.requested_data:
5068 # filter out non-vm_capable nodes
5069 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5071 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5072 [lu.cfg.GetHypervisorType()])
5073 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5074 for (name, nresult) in node_data.items()
5075 if not nresult.fail_msg and nresult.payload)
5079 if query.NQ_INST in self.requested_data:
5080 node_to_primary = dict([(name, set()) for name in nodenames])
5081 node_to_secondary = dict([(name, set()) for name in nodenames])
5083 inst_data = lu.cfg.GetAllInstancesInfo()
5085 for inst in inst_data.values():
5086 if inst.primary_node in node_to_primary:
5087 node_to_primary[inst.primary_node].add(inst.name)
5088 for secnode in inst.secondary_nodes:
5089 if secnode in node_to_secondary:
5090 node_to_secondary[secnode].add(inst.name)
5092 node_to_primary = None
5093 node_to_secondary = None
5095 if query.NQ_OOB in self.requested_data:
5096 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5097 for name, node in all_info.iteritems())
5101 if query.NQ_GROUP in self.requested_data:
5102 groups = lu.cfg.GetAllNodeGroupsInfo()
5106 return query.NodeQueryData([all_info[name] for name in nodenames],
5107 live_data, lu.cfg.GetMasterNode(),
5108 node_to_primary, node_to_secondary, groups,
5109 oob_support, lu.cfg.GetClusterInfo())
5112 class LUNodeQuery(NoHooksLU):
5113 """Logical unit for querying nodes.
5116 # pylint: disable=W0142
5119 def CheckArguments(self):
5120 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5121 self.op.output_fields, self.op.use_locking)
5123 def ExpandNames(self):
5124 self.nq.ExpandNames(self)
5126 def DeclareLocks(self, level):
5127 self.nq.DeclareLocks(self, level)
5129 def Exec(self, feedback_fn):
5130 return self.nq.OldStyleQuery(self)
5133 class LUNodeQueryvols(NoHooksLU):
5134 """Logical unit for getting volumes on node(s).
5138 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5139 _FIELDS_STATIC = utils.FieldSet("node")
5141 def CheckArguments(self):
5142 _CheckOutputFields(static=self._FIELDS_STATIC,
5143 dynamic=self._FIELDS_DYNAMIC,
5144 selected=self.op.output_fields)
5146 def ExpandNames(self):
5147 self.share_locks = _ShareAll()
5148 self.needed_locks = {}
5150 if not self.op.nodes:
5151 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5153 self.needed_locks[locking.LEVEL_NODE] = \
5154 _GetWantedNodes(self, self.op.nodes)
5156 def Exec(self, feedback_fn):
5157 """Computes the list of nodes and their attributes.
5160 nodenames = self.owned_locks(locking.LEVEL_NODE)
5161 volumes = self.rpc.call_node_volumes(nodenames)
5163 ilist = self.cfg.GetAllInstancesInfo()
5164 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5167 for node in nodenames:
5168 nresult = volumes[node]
5171 msg = nresult.fail_msg
5173 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5176 node_vols = sorted(nresult.payload,
5177 key=operator.itemgetter("dev"))
5179 for vol in node_vols:
5181 for field in self.op.output_fields:
5184 elif field == "phys":
5188 elif field == "name":
5190 elif field == "size":
5191 val = int(float(vol["size"]))
5192 elif field == "instance":
5193 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5195 raise errors.ParameterError(field)
5196 node_output.append(str(val))
5198 output.append(node_output)
5203 class LUNodeQueryStorage(NoHooksLU):
5204 """Logical unit for getting information on storage units on node(s).
5207 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5210 def CheckArguments(self):
5211 _CheckOutputFields(static=self._FIELDS_STATIC,
5212 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5213 selected=self.op.output_fields)
5215 def ExpandNames(self):
5216 self.share_locks = _ShareAll()
5217 self.needed_locks = {}
5220 self.needed_locks[locking.LEVEL_NODE] = \
5221 _GetWantedNodes(self, self.op.nodes)
5223 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5225 def Exec(self, feedback_fn):
5226 """Computes the list of nodes and their attributes.
5229 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5231 # Always get name to sort by
5232 if constants.SF_NAME in self.op.output_fields:
5233 fields = self.op.output_fields[:]
5235 fields = [constants.SF_NAME] + self.op.output_fields
5237 # Never ask for node or type as it's only known to the LU
5238 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5239 while extra in fields:
5240 fields.remove(extra)
5242 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5243 name_idx = field_idx[constants.SF_NAME]
5245 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5246 data = self.rpc.call_storage_list(self.nodes,
5247 self.op.storage_type, st_args,
5248 self.op.name, fields)
5252 for node in utils.NiceSort(self.nodes):
5253 nresult = data[node]
5257 msg = nresult.fail_msg
5259 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5262 rows = dict([(row[name_idx], row) for row in nresult.payload])
5264 for name in utils.NiceSort(rows.keys()):
5269 for field in self.op.output_fields:
5270 if field == constants.SF_NODE:
5272 elif field == constants.SF_TYPE:
5273 val = self.op.storage_type
5274 elif field in field_idx:
5275 val = row[field_idx[field]]
5277 raise errors.ParameterError(field)
5286 class _InstanceQuery(_QueryBase):
5287 FIELDS = query.INSTANCE_FIELDS
5289 def ExpandNames(self, lu):
5290 lu.needed_locks = {}
5291 lu.share_locks = _ShareAll()
5294 self.wanted = _GetWantedInstances(lu, self.names)
5296 self.wanted = locking.ALL_SET
5298 self.do_locking = (self.use_locking and
5299 query.IQ_LIVE in self.requested_data)
5301 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5302 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5303 lu.needed_locks[locking.LEVEL_NODE] = []
5304 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5306 self.do_grouplocks = (self.do_locking and
5307 query.IQ_NODES in self.requested_data)
5309 def DeclareLocks(self, lu, level):
5311 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5312 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5314 # Lock all groups used by instances optimistically; this requires going
5315 # via the node before it's locked, requiring verification later on
5316 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5318 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5319 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5320 elif level == locking.LEVEL_NODE:
5321 lu._LockInstancesNodes() # pylint: disable=W0212
5324 def _CheckGroupLocks(lu):
5325 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5326 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5328 # Check if node groups for locked instances are still correct
5329 for instance_name in owned_instances:
5330 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5332 def _GetQueryData(self, lu):
5333 """Computes the list of instances and their attributes.
5336 if self.do_grouplocks:
5337 self._CheckGroupLocks(lu)
5339 cluster = lu.cfg.GetClusterInfo()
5340 all_info = lu.cfg.GetAllInstancesInfo()
5342 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5344 instance_list = [all_info[name] for name in instance_names]
5345 nodes = frozenset(itertools.chain(*(inst.all_nodes
5346 for inst in instance_list)))
5347 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5350 wrongnode_inst = set()
5352 # Gather data as requested
5353 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5355 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5357 result = node_data[name]
5359 # offline nodes will be in both lists
5360 assert result.fail_msg
5361 offline_nodes.append(name)
5363 bad_nodes.append(name)
5364 elif result.payload:
5365 for inst in result.payload:
5366 if inst in all_info:
5367 if all_info[inst].primary_node == name:
5368 live_data.update(result.payload)
5370 wrongnode_inst.add(inst)
5372 # orphan instance; we don't list it here as we don't
5373 # handle this case yet in the output of instance listing
5374 logging.warning("Orphan instance '%s' found on node %s",
5376 # else no instance is alive
5380 if query.IQ_DISKUSAGE in self.requested_data:
5381 disk_usage = dict((inst.name,
5382 _ComputeDiskSize(inst.disk_template,
5383 [{constants.IDISK_SIZE: disk.size}
5384 for disk in inst.disks]))
5385 for inst in instance_list)
5389 if query.IQ_CONSOLE in self.requested_data:
5391 for inst in instance_list:
5392 if inst.name in live_data:
5393 # Instance is running
5394 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5396 consinfo[inst.name] = None
5397 assert set(consinfo.keys()) == set(instance_names)
5401 if query.IQ_NODES in self.requested_data:
5402 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5404 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5405 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5406 for uuid in set(map(operator.attrgetter("group"),
5412 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5413 disk_usage, offline_nodes, bad_nodes,
5414 live_data, wrongnode_inst, consinfo,
5418 class LUQuery(NoHooksLU):
5419 """Query for resources/items of a certain kind.
5422 # pylint: disable=W0142
5425 def CheckArguments(self):
5426 qcls = _GetQueryImplementation(self.op.what)
5428 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5430 def ExpandNames(self):
5431 self.impl.ExpandNames(self)
5433 def DeclareLocks(self, level):
5434 self.impl.DeclareLocks(self, level)
5436 def Exec(self, feedback_fn):
5437 return self.impl.NewStyleQuery(self)
5440 class LUQueryFields(NoHooksLU):
5441 """Query for resources/items of a certain kind.
5444 # pylint: disable=W0142
5447 def CheckArguments(self):
5448 self.qcls = _GetQueryImplementation(self.op.what)
5450 def ExpandNames(self):
5451 self.needed_locks = {}
5453 def Exec(self, feedback_fn):
5454 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5457 class LUNodeModifyStorage(NoHooksLU):
5458 """Logical unit for modifying a storage volume on a node.
5463 def CheckArguments(self):
5464 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5466 storage_type = self.op.storage_type
5469 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5471 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5472 " modified" % storage_type,
5475 diff = set(self.op.changes.keys()) - modifiable
5477 raise errors.OpPrereqError("The following fields can not be modified for"
5478 " storage units of type '%s': %r" %
5479 (storage_type, list(diff)),
5482 def ExpandNames(self):
5483 self.needed_locks = {
5484 locking.LEVEL_NODE: self.op.node_name,
5487 def Exec(self, feedback_fn):
5488 """Computes the list of nodes and their attributes.
5491 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5492 result = self.rpc.call_storage_modify(self.op.node_name,
5493 self.op.storage_type, st_args,
5494 self.op.name, self.op.changes)
5495 result.Raise("Failed to modify storage unit '%s' on %s" %
5496 (self.op.name, self.op.node_name))
5499 class LUNodeAdd(LogicalUnit):
5500 """Logical unit for adding node to the cluster.
5504 HTYPE = constants.HTYPE_NODE
5505 _NFLAGS = ["master_capable", "vm_capable"]
5507 def CheckArguments(self):
5508 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5509 # validate/normalize the node name
5510 self.hostname = netutils.GetHostname(name=self.op.node_name,
5511 family=self.primary_ip_family)
5512 self.op.node_name = self.hostname.name
5514 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5515 raise errors.OpPrereqError("Cannot readd the master node",
5518 if self.op.readd and self.op.group:
5519 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5520 " being readded", errors.ECODE_INVAL)
5522 def BuildHooksEnv(self):
5525 This will run on all nodes before, and on all nodes + the new node after.
5529 "OP_TARGET": self.op.node_name,
5530 "NODE_NAME": self.op.node_name,
5531 "NODE_PIP": self.op.primary_ip,
5532 "NODE_SIP": self.op.secondary_ip,
5533 "MASTER_CAPABLE": str(self.op.master_capable),
5534 "VM_CAPABLE": str(self.op.vm_capable),
5537 def BuildHooksNodes(self):
5538 """Build hooks nodes.
5541 # Exclude added node
5542 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5543 post_nodes = pre_nodes + [self.op.node_name, ]
5545 return (pre_nodes, post_nodes)
5547 def CheckPrereq(self):
5548 """Check prerequisites.
5551 - the new node is not already in the config
5553 - its parameters (single/dual homed) matches the cluster
5555 Any errors are signaled by raising errors.OpPrereqError.
5559 hostname = self.hostname
5560 node = hostname.name
5561 primary_ip = self.op.primary_ip = hostname.ip
5562 if self.op.secondary_ip is None:
5563 if self.primary_ip_family == netutils.IP6Address.family:
5564 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5565 " IPv4 address must be given as secondary",
5567 self.op.secondary_ip = primary_ip
5569 secondary_ip = self.op.secondary_ip
5570 if not netutils.IP4Address.IsValid(secondary_ip):
5571 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5572 " address" % secondary_ip, errors.ECODE_INVAL)
5574 node_list = cfg.GetNodeList()
5575 if not self.op.readd and node in node_list:
5576 raise errors.OpPrereqError("Node %s is already in the configuration" %
5577 node, errors.ECODE_EXISTS)
5578 elif self.op.readd and node not in node_list:
5579 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5582 self.changed_primary_ip = False
5584 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5585 if self.op.readd and node == existing_node_name:
5586 if existing_node.secondary_ip != secondary_ip:
5587 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5588 " address configuration as before",
5590 if existing_node.primary_ip != primary_ip:
5591 self.changed_primary_ip = True
5595 if (existing_node.primary_ip == primary_ip or
5596 existing_node.secondary_ip == primary_ip or
5597 existing_node.primary_ip == secondary_ip or
5598 existing_node.secondary_ip == secondary_ip):
5599 raise errors.OpPrereqError("New node ip address(es) conflict with"
5600 " existing node %s" % existing_node.name,
5601 errors.ECODE_NOTUNIQUE)
5603 # After this 'if' block, None is no longer a valid value for the
5604 # _capable op attributes
5606 old_node = self.cfg.GetNodeInfo(node)
5607 assert old_node is not None, "Can't retrieve locked node %s" % node
5608 for attr in self._NFLAGS:
5609 if getattr(self.op, attr) is None:
5610 setattr(self.op, attr, getattr(old_node, attr))
5612 for attr in self._NFLAGS:
5613 if getattr(self.op, attr) is None:
5614 setattr(self.op, attr, True)
5616 if self.op.readd and not self.op.vm_capable:
5617 pri, sec = cfg.GetNodeInstances(node)
5619 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5620 " flag set to false, but it already holds"
5621 " instances" % node,
5624 # check that the type of the node (single versus dual homed) is the
5625 # same as for the master
5626 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5627 master_singlehomed = myself.secondary_ip == myself.primary_ip
5628 newbie_singlehomed = secondary_ip == primary_ip
5629 if master_singlehomed != newbie_singlehomed:
5630 if master_singlehomed:
5631 raise errors.OpPrereqError("The master has no secondary ip but the"
5632 " new node has one",
5635 raise errors.OpPrereqError("The master has a secondary ip but the"
5636 " new node doesn't have one",
5639 # checks reachability
5640 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5641 raise errors.OpPrereqError("Node not reachable by ping",
5642 errors.ECODE_ENVIRON)
5644 if not newbie_singlehomed:
5645 # check reachability from my secondary ip to newbie's secondary ip
5646 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5647 source=myself.secondary_ip):
5648 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5649 " based ping to node daemon port",
5650 errors.ECODE_ENVIRON)
5657 if self.op.master_capable:
5658 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5660 self.master_candidate = False
5663 self.new_node = old_node
5665 node_group = cfg.LookupNodeGroup(self.op.group)
5666 self.new_node = objects.Node(name=node,
5667 primary_ip=primary_ip,
5668 secondary_ip=secondary_ip,
5669 master_candidate=self.master_candidate,
5670 offline=False, drained=False,
5673 if self.op.ndparams:
5674 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5676 if self.op.hv_state:
5677 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5679 if self.op.disk_state:
5680 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5682 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5683 # it a property on the base class.
5684 result = rpc.DnsOnlyRunner().call_version([node])[node]
5685 result.Raise("Can't get version information from node %s" % node)
5686 if constants.PROTOCOL_VERSION == result.payload:
5687 logging.info("Communication to node %s fine, sw version %s match",
5688 node, result.payload)
5690 raise errors.OpPrereqError("Version mismatch master version %s,"
5691 " node version %s" %
5692 (constants.PROTOCOL_VERSION, result.payload),
5693 errors.ECODE_ENVIRON)
5695 def Exec(self, feedback_fn):
5696 """Adds the new node to the cluster.
5699 new_node = self.new_node
5700 node = new_node.name
5702 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5705 # We adding a new node so we assume it's powered
5706 new_node.powered = True
5708 # for re-adds, reset the offline/drained/master-candidate flags;
5709 # we need to reset here, otherwise offline would prevent RPC calls
5710 # later in the procedure; this also means that if the re-add
5711 # fails, we are left with a non-offlined, broken node
5713 new_node.drained = new_node.offline = False # pylint: disable=W0201
5714 self.LogInfo("Readding a node, the offline/drained flags were reset")
5715 # if we demote the node, we do cleanup later in the procedure
5716 new_node.master_candidate = self.master_candidate
5717 if self.changed_primary_ip:
5718 new_node.primary_ip = self.op.primary_ip
5720 # copy the master/vm_capable flags
5721 for attr in self._NFLAGS:
5722 setattr(new_node, attr, getattr(self.op, attr))
5724 # notify the user about any possible mc promotion
5725 if new_node.master_candidate:
5726 self.LogInfo("Node will be a master candidate")
5728 if self.op.ndparams:
5729 new_node.ndparams = self.op.ndparams
5731 new_node.ndparams = {}
5733 if self.op.hv_state:
5734 new_node.hv_state_static = self.new_hv_state
5736 if self.op.disk_state:
5737 new_node.disk_state_static = self.new_disk_state
5739 # Add node to our /etc/hosts, and add key to known_hosts
5740 if self.cfg.GetClusterInfo().modify_etc_hosts:
5741 master_node = self.cfg.GetMasterNode()
5742 result = self.rpc.call_etc_hosts_modify(master_node,
5743 constants.ETC_HOSTS_ADD,
5746 result.Raise("Can't update hosts file with new host data")
5748 if new_node.secondary_ip != new_node.primary_ip:
5749 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5752 node_verify_list = [self.cfg.GetMasterNode()]
5753 node_verify_param = {
5754 constants.NV_NODELIST: ([node], {}),
5755 # TODO: do a node-net-test as well?
5758 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5759 self.cfg.GetClusterName())
5760 for verifier in node_verify_list:
5761 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5762 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5764 for failed in nl_payload:
5765 feedback_fn("ssh/hostname verification failed"
5766 " (checking from %s): %s" %
5767 (verifier, nl_payload[failed]))
5768 raise errors.OpExecError("ssh/hostname verification failed")
5771 _RedistributeAncillaryFiles(self)
5772 self.context.ReaddNode(new_node)
5773 # make sure we redistribute the config
5774 self.cfg.Update(new_node, feedback_fn)
5775 # and make sure the new node will not have old files around
5776 if not new_node.master_candidate:
5777 result = self.rpc.call_node_demote_from_mc(new_node.name)
5778 msg = result.fail_msg
5780 self.LogWarning("Node failed to demote itself from master"
5781 " candidate status: %s" % msg)
5783 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5784 additional_vm=self.op.vm_capable)
5785 self.context.AddNode(new_node, self.proc.GetECId())
5788 class LUNodeSetParams(LogicalUnit):
5789 """Modifies the parameters of a node.
5791 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5792 to the node role (as _ROLE_*)
5793 @cvar _R2F: a dictionary from node role to tuples of flags
5794 @cvar _FLAGS: a list of attribute names corresponding to the flags
5797 HPATH = "node-modify"
5798 HTYPE = constants.HTYPE_NODE
5800 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5802 (True, False, False): _ROLE_CANDIDATE,
5803 (False, True, False): _ROLE_DRAINED,
5804 (False, False, True): _ROLE_OFFLINE,
5805 (False, False, False): _ROLE_REGULAR,
5807 _R2F = dict((v, k) for k, v in _F2R.items())
5808 _FLAGS = ["master_candidate", "drained", "offline"]
5810 def CheckArguments(self):
5811 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5812 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5813 self.op.master_capable, self.op.vm_capable,
5814 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5816 if all_mods.count(None) == len(all_mods):
5817 raise errors.OpPrereqError("Please pass at least one modification",
5819 if all_mods.count(True) > 1:
5820 raise errors.OpPrereqError("Can't set the node into more than one"
5821 " state at the same time",
5824 # Boolean value that tells us whether we might be demoting from MC
5825 self.might_demote = (self.op.master_candidate is False or
5826 self.op.offline is True or
5827 self.op.drained is True or
5828 self.op.master_capable is False)
5830 if self.op.secondary_ip:
5831 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5832 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5833 " address" % self.op.secondary_ip,
5836 self.lock_all = self.op.auto_promote and self.might_demote
5837 self.lock_instances = self.op.secondary_ip is not None
5839 def _InstanceFilter(self, instance):
5840 """Filter for getting affected instances.
5843 return (instance.disk_template in constants.DTS_INT_MIRROR and
5844 self.op.node_name in instance.all_nodes)
5846 def ExpandNames(self):
5848 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5850 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5852 # Since modifying a node can have severe effects on currently running
5853 # operations the resource lock is at least acquired in shared mode
5854 self.needed_locks[locking.LEVEL_NODE_RES] = \
5855 self.needed_locks[locking.LEVEL_NODE]
5857 # Get node resource and instance locks in shared mode; they are not used
5858 # for anything but read-only access
5859 self.share_locks[locking.LEVEL_NODE_RES] = 1
5860 self.share_locks[locking.LEVEL_INSTANCE] = 1
5862 if self.lock_instances:
5863 self.needed_locks[locking.LEVEL_INSTANCE] = \
5864 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5866 def BuildHooksEnv(self):
5869 This runs on the master node.
5873 "OP_TARGET": self.op.node_name,
5874 "MASTER_CANDIDATE": str(self.op.master_candidate),
5875 "OFFLINE": str(self.op.offline),
5876 "DRAINED": str(self.op.drained),
5877 "MASTER_CAPABLE": str(self.op.master_capable),
5878 "VM_CAPABLE": str(self.op.vm_capable),
5881 def BuildHooksNodes(self):
5882 """Build hooks nodes.
5885 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5888 def CheckPrereq(self):
5889 """Check prerequisites.
5891 This only checks the instance list against the existing names.
5894 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5896 if self.lock_instances:
5897 affected_instances = \
5898 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5900 # Verify instance locks
5901 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5902 wanted_instances = frozenset(affected_instances.keys())
5903 if wanted_instances - owned_instances:
5904 raise errors.OpPrereqError("Instances affected by changing node %s's"
5905 " secondary IP address have changed since"
5906 " locks were acquired, wanted '%s', have"
5907 " '%s'; retry the operation" %
5909 utils.CommaJoin(wanted_instances),
5910 utils.CommaJoin(owned_instances)),
5913 affected_instances = None
5915 if (self.op.master_candidate is not None or
5916 self.op.drained is not None or
5917 self.op.offline is not None):
5918 # we can't change the master's node flags
5919 if self.op.node_name == self.cfg.GetMasterNode():
5920 raise errors.OpPrereqError("The master role can be changed"
5921 " only via master-failover",
5924 if self.op.master_candidate and not node.master_capable:
5925 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5926 " it a master candidate" % node.name,
5929 if self.op.vm_capable is False:
5930 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5932 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5933 " the vm_capable flag" % node.name,
5936 if node.master_candidate and self.might_demote and not self.lock_all:
5937 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5938 # check if after removing the current node, we're missing master
5940 (mc_remaining, mc_should, _) = \
5941 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5942 if mc_remaining < mc_should:
5943 raise errors.OpPrereqError("Not enough master candidates, please"
5944 " pass auto promote option to allow"
5945 " promotion (--auto-promote or RAPI"
5946 " auto_promote=True)", errors.ECODE_STATE)
5948 self.old_flags = old_flags = (node.master_candidate,
5949 node.drained, node.offline)
5950 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5951 self.old_role = old_role = self._F2R[old_flags]
5953 # Check for ineffective changes
5954 for attr in self._FLAGS:
5955 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5956 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5957 setattr(self.op, attr, None)
5959 # Past this point, any flag change to False means a transition
5960 # away from the respective state, as only real changes are kept
5962 # TODO: We might query the real power state if it supports OOB
5963 if _SupportsOob(self.cfg, node):
5964 if self.op.offline is False and not (node.powered or
5965 self.op.powered is True):
5966 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5967 " offline status can be reset") %
5968 self.op.node_name, errors.ECODE_STATE)
5969 elif self.op.powered is not None:
5970 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5971 " as it does not support out-of-band"
5972 " handling") % self.op.node_name,
5975 # If we're being deofflined/drained, we'll MC ourself if needed
5976 if (self.op.drained is False or self.op.offline is False or
5977 (self.op.master_capable and not node.master_capable)):
5978 if _DecideSelfPromotion(self):
5979 self.op.master_candidate = True
5980 self.LogInfo("Auto-promoting node to master candidate")
5982 # If we're no longer master capable, we'll demote ourselves from MC
5983 if self.op.master_capable is False and node.master_candidate:
5984 self.LogInfo("Demoting from master candidate")
5985 self.op.master_candidate = False
5988 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5989 if self.op.master_candidate:
5990 new_role = self._ROLE_CANDIDATE
5991 elif self.op.drained:
5992 new_role = self._ROLE_DRAINED
5993 elif self.op.offline:
5994 new_role = self._ROLE_OFFLINE
5995 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5996 # False is still in new flags, which means we're un-setting (the
5998 new_role = self._ROLE_REGULAR
5999 else: # no new flags, nothing, keep old role
6002 self.new_role = new_role
6004 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6005 # Trying to transition out of offline status
6006 result = self.rpc.call_version([node.name])[node.name]
6008 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6009 " to report its version: %s" %
6010 (node.name, result.fail_msg),
6013 self.LogWarning("Transitioning node from offline to online state"
6014 " without using re-add. Please make sure the node"
6017 # When changing the secondary ip, verify if this is a single-homed to
6018 # multi-homed transition or vice versa, and apply the relevant
6020 if self.op.secondary_ip:
6021 # Ok even without locking, because this can't be changed by any LU
6022 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6023 master_singlehomed = master.secondary_ip == master.primary_ip
6024 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6025 if self.op.force and node.name == master.name:
6026 self.LogWarning("Transitioning from single-homed to multi-homed"
6027 " cluster. All nodes will require a secondary ip.")
6029 raise errors.OpPrereqError("Changing the secondary ip on a"
6030 " single-homed cluster requires the"
6031 " --force option to be passed, and the"
6032 " target node to be the master",
6034 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6035 if self.op.force and node.name == master.name:
6036 self.LogWarning("Transitioning from multi-homed to single-homed"
6037 " cluster. Secondary IPs will have to be removed.")
6039 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6040 " same as the primary IP on a multi-homed"
6041 " cluster, unless the --force option is"
6042 " passed, and the target node is the"
6043 " master", errors.ECODE_INVAL)
6045 assert not (frozenset(affected_instances) -
6046 self.owned_locks(locking.LEVEL_INSTANCE))
6049 if affected_instances:
6050 msg = ("Cannot change secondary IP address: offline node has"
6051 " instances (%s) configured to use it" %
6052 utils.CommaJoin(affected_instances.keys()))
6053 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6055 # On online nodes, check that no instances are running, and that
6056 # the node has the new ip and we can reach it.
6057 for instance in affected_instances.values():
6058 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6059 msg="cannot change secondary ip")
6061 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6062 if master.name != node.name:
6063 # check reachability from master secondary ip to new secondary ip
6064 if not netutils.TcpPing(self.op.secondary_ip,
6065 constants.DEFAULT_NODED_PORT,
6066 source=master.secondary_ip):
6067 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6068 " based ping to node daemon port",
6069 errors.ECODE_ENVIRON)
6071 if self.op.ndparams:
6072 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6073 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6074 self.new_ndparams = new_ndparams
6076 if self.op.hv_state:
6077 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6078 self.node.hv_state_static)
6080 if self.op.disk_state:
6081 self.new_disk_state = \
6082 _MergeAndVerifyDiskState(self.op.disk_state,
6083 self.node.disk_state_static)
6085 def Exec(self, feedback_fn):
6090 old_role = self.old_role
6091 new_role = self.new_role
6095 if self.op.ndparams:
6096 node.ndparams = self.new_ndparams
6098 if self.op.powered is not None:
6099 node.powered = self.op.powered
6101 if self.op.hv_state:
6102 node.hv_state_static = self.new_hv_state
6104 if self.op.disk_state:
6105 node.disk_state_static = self.new_disk_state
6107 for attr in ["master_capable", "vm_capable"]:
6108 val = getattr(self.op, attr)
6110 setattr(node, attr, val)
6111 result.append((attr, str(val)))
6113 if new_role != old_role:
6114 # Tell the node to demote itself, if no longer MC and not offline
6115 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6116 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6118 self.LogWarning("Node failed to demote itself: %s", msg)
6120 new_flags = self._R2F[new_role]
6121 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6123 result.append((desc, str(nf)))
6124 (node.master_candidate, node.drained, node.offline) = new_flags
6126 # we locked all nodes, we adjust the CP before updating this node
6128 _AdjustCandidatePool(self, [node.name])
6130 if self.op.secondary_ip:
6131 node.secondary_ip = self.op.secondary_ip
6132 result.append(("secondary_ip", self.op.secondary_ip))
6134 # this will trigger configuration file update, if needed
6135 self.cfg.Update(node, feedback_fn)
6137 # this will trigger job queue propagation or cleanup if the mc
6139 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6140 self.context.ReaddNode(node)
6145 class LUNodePowercycle(NoHooksLU):
6146 """Powercycles a node.
6151 def CheckArguments(self):
6152 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6153 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6154 raise errors.OpPrereqError("The node is the master and the force"
6155 " parameter was not set",
6158 def ExpandNames(self):
6159 """Locking for PowercycleNode.
6161 This is a last-resort option and shouldn't block on other
6162 jobs. Therefore, we grab no locks.
6165 self.needed_locks = {}
6167 def Exec(self, feedback_fn):
6171 result = self.rpc.call_node_powercycle(self.op.node_name,
6172 self.cfg.GetHypervisorType())
6173 result.Raise("Failed to schedule the reboot")
6174 return result.payload
6177 class LUClusterQuery(NoHooksLU):
6178 """Query cluster configuration.
6183 def ExpandNames(self):
6184 self.needed_locks = {}
6186 def Exec(self, feedback_fn):
6187 """Return cluster config.
6190 cluster = self.cfg.GetClusterInfo()
6193 # Filter just for enabled hypervisors
6194 for os_name, hv_dict in cluster.os_hvp.items():
6195 os_hvp[os_name] = {}
6196 for hv_name, hv_params in hv_dict.items():
6197 if hv_name in cluster.enabled_hypervisors:
6198 os_hvp[os_name][hv_name] = hv_params
6200 # Convert ip_family to ip_version
6201 primary_ip_version = constants.IP4_VERSION
6202 if cluster.primary_ip_family == netutils.IP6Address.family:
6203 primary_ip_version = constants.IP6_VERSION
6206 "software_version": constants.RELEASE_VERSION,
6207 "protocol_version": constants.PROTOCOL_VERSION,
6208 "config_version": constants.CONFIG_VERSION,
6209 "os_api_version": max(constants.OS_API_VERSIONS),
6210 "export_version": constants.EXPORT_VERSION,
6211 "architecture": runtime.GetArchInfo(),
6212 "name": cluster.cluster_name,
6213 "master": cluster.master_node,
6214 "default_hypervisor": cluster.primary_hypervisor,
6215 "enabled_hypervisors": cluster.enabled_hypervisors,
6216 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6217 for hypervisor_name in cluster.enabled_hypervisors]),
6219 "beparams": cluster.beparams,
6220 "osparams": cluster.osparams,
6221 "ipolicy": cluster.ipolicy,
6222 "nicparams": cluster.nicparams,
6223 "ndparams": cluster.ndparams,
6224 "diskparams": cluster.diskparams,
6225 "candidate_pool_size": cluster.candidate_pool_size,
6226 "master_netdev": cluster.master_netdev,
6227 "master_netmask": cluster.master_netmask,
6228 "use_external_mip_script": cluster.use_external_mip_script,
6229 "volume_group_name": cluster.volume_group_name,
6230 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6231 "file_storage_dir": cluster.file_storage_dir,
6232 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6233 "maintain_node_health": cluster.maintain_node_health,
6234 "ctime": cluster.ctime,
6235 "mtime": cluster.mtime,
6236 "uuid": cluster.uuid,
6237 "tags": list(cluster.GetTags()),
6238 "uid_pool": cluster.uid_pool,
6239 "default_iallocator": cluster.default_iallocator,
6240 "reserved_lvs": cluster.reserved_lvs,
6241 "primary_ip_version": primary_ip_version,
6242 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6243 "hidden_os": cluster.hidden_os,
6244 "blacklisted_os": cluster.blacklisted_os,
6250 class LUClusterConfigQuery(NoHooksLU):
6251 """Return configuration values.
6256 def CheckArguments(self):
6257 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6259 def ExpandNames(self):
6260 self.cq.ExpandNames(self)
6262 def DeclareLocks(self, level):
6263 self.cq.DeclareLocks(self, level)
6265 def Exec(self, feedback_fn):
6266 result = self.cq.OldStyleQuery(self)
6268 assert len(result) == 1
6273 class _ClusterQuery(_QueryBase):
6274 FIELDS = query.CLUSTER_FIELDS
6276 #: Do not sort (there is only one item)
6279 def ExpandNames(self, lu):
6280 lu.needed_locks = {}
6282 # The following variables interact with _QueryBase._GetNames
6283 self.wanted = locking.ALL_SET
6284 self.do_locking = self.use_locking
6287 raise errors.OpPrereqError("Can not use locking for cluster queries",
6290 def DeclareLocks(self, lu, level):
6293 def _GetQueryData(self, lu):
6294 """Computes the list of nodes and their attributes.
6297 # Locking is not used
6298 assert not (compat.any(lu.glm.is_owned(level)
6299 for level in locking.LEVELS
6300 if level != locking.LEVEL_CLUSTER) or
6301 self.do_locking or self.use_locking)
6303 if query.CQ_CONFIG in self.requested_data:
6304 cluster = lu.cfg.GetClusterInfo()
6306 cluster = NotImplemented
6308 if query.CQ_QUEUE_DRAINED in self.requested_data:
6309 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6311 drain_flag = NotImplemented
6313 if query.CQ_WATCHER_PAUSE in self.requested_data:
6314 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6316 watcher_pause = NotImplemented
6318 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6321 class LUInstanceActivateDisks(NoHooksLU):
6322 """Bring up an instance's disks.
6327 def ExpandNames(self):
6328 self._ExpandAndLockInstance()
6329 self.needed_locks[locking.LEVEL_NODE] = []
6330 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6332 def DeclareLocks(self, level):
6333 if level == locking.LEVEL_NODE:
6334 self._LockInstancesNodes()
6336 def CheckPrereq(self):
6337 """Check prerequisites.
6339 This checks that the instance is in the cluster.
6342 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6343 assert self.instance is not None, \
6344 "Cannot retrieve locked instance %s" % self.op.instance_name
6345 _CheckNodeOnline(self, self.instance.primary_node)
6347 def Exec(self, feedback_fn):
6348 """Activate the disks.
6351 disks_ok, disks_info = \
6352 _AssembleInstanceDisks(self, self.instance,
6353 ignore_size=self.op.ignore_size)
6355 raise errors.OpExecError("Cannot activate block devices")
6357 if self.op.wait_for_sync:
6358 if not _WaitForSync(self, self.instance):
6359 raise errors.OpExecError("Some disks of the instance are degraded!")
6364 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6366 """Prepare the block devices for an instance.
6368 This sets up the block devices on all nodes.
6370 @type lu: L{LogicalUnit}
6371 @param lu: the logical unit on whose behalf we execute
6372 @type instance: L{objects.Instance}
6373 @param instance: the instance for whose disks we assemble
6374 @type disks: list of L{objects.Disk} or None
6375 @param disks: which disks to assemble (or all, if None)
6376 @type ignore_secondaries: boolean
6377 @param ignore_secondaries: if true, errors on secondary nodes
6378 won't result in an error return from the function
6379 @type ignore_size: boolean
6380 @param ignore_size: if true, the current known size of the disk
6381 will not be used during the disk activation, useful for cases
6382 when the size is wrong
6383 @return: False if the operation failed, otherwise a list of
6384 (host, instance_visible_name, node_visible_name)
6385 with the mapping from node devices to instance devices
6390 iname = instance.name
6391 disks = _ExpandCheckDisks(instance, disks)
6393 # With the two passes mechanism we try to reduce the window of
6394 # opportunity for the race condition of switching DRBD to primary
6395 # before handshaking occured, but we do not eliminate it
6397 # The proper fix would be to wait (with some limits) until the
6398 # connection has been made and drbd transitions from WFConnection
6399 # into any other network-connected state (Connected, SyncTarget,
6402 # 1st pass, assemble on all nodes in secondary mode
6403 for idx, inst_disk in enumerate(disks):
6404 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6406 node_disk = node_disk.Copy()
6407 node_disk.UnsetSize()
6408 lu.cfg.SetDiskID(node_disk, node)
6409 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6411 msg = result.fail_msg
6413 is_offline_secondary = (node in instance.secondary_nodes and
6415 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6416 " (is_primary=False, pass=1): %s",
6417 inst_disk.iv_name, node, msg)
6418 if not (ignore_secondaries or is_offline_secondary):
6421 # FIXME: race condition on drbd migration to primary
6423 # 2nd pass, do only the primary node
6424 for idx, inst_disk in enumerate(disks):
6427 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6428 if node != instance.primary_node:
6431 node_disk = node_disk.Copy()
6432 node_disk.UnsetSize()
6433 lu.cfg.SetDiskID(node_disk, node)
6434 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6436 msg = result.fail_msg
6438 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6439 " (is_primary=True, pass=2): %s",
6440 inst_disk.iv_name, node, msg)
6443 dev_path = result.payload
6445 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6447 # leave the disks configured for the primary node
6448 # this is a workaround that would be fixed better by
6449 # improving the logical/physical id handling
6451 lu.cfg.SetDiskID(disk, instance.primary_node)
6453 return disks_ok, device_info
6456 def _StartInstanceDisks(lu, instance, force):
6457 """Start the disks of an instance.
6460 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6461 ignore_secondaries=force)
6463 _ShutdownInstanceDisks(lu, instance)
6464 if force is not None and not force:
6465 lu.proc.LogWarning("", hint="If the message above refers to a"
6467 " you can retry the operation using '--force'.")
6468 raise errors.OpExecError("Disk consistency error")
6471 class LUInstanceDeactivateDisks(NoHooksLU):
6472 """Shutdown an instance's disks.
6477 def ExpandNames(self):
6478 self._ExpandAndLockInstance()
6479 self.needed_locks[locking.LEVEL_NODE] = []
6480 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6482 def DeclareLocks(self, level):
6483 if level == locking.LEVEL_NODE:
6484 self._LockInstancesNodes()
6486 def CheckPrereq(self):
6487 """Check prerequisites.
6489 This checks that the instance is in the cluster.
6492 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6493 assert self.instance is not None, \
6494 "Cannot retrieve locked instance %s" % self.op.instance_name
6496 def Exec(self, feedback_fn):
6497 """Deactivate the disks
6500 instance = self.instance
6502 _ShutdownInstanceDisks(self, instance)
6504 _SafeShutdownInstanceDisks(self, instance)
6507 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6508 """Shutdown block devices of an instance.
6510 This function checks if an instance is running, before calling
6511 _ShutdownInstanceDisks.
6514 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6515 _ShutdownInstanceDisks(lu, instance, disks=disks)
6518 def _ExpandCheckDisks(instance, disks):
6519 """Return the instance disks selected by the disks list
6521 @type disks: list of L{objects.Disk} or None
6522 @param disks: selected disks
6523 @rtype: list of L{objects.Disk}
6524 @return: selected instance disks to act on
6528 return instance.disks
6530 if not set(disks).issubset(instance.disks):
6531 raise errors.ProgrammerError("Can only act on disks belonging to the"
6536 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6537 """Shutdown block devices of an instance.
6539 This does the shutdown on all nodes of the instance.
6541 If the ignore_primary is false, errors on the primary node are
6546 disks = _ExpandCheckDisks(instance, disks)
6549 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6550 lu.cfg.SetDiskID(top_disk, node)
6551 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6552 msg = result.fail_msg
6554 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6555 disk.iv_name, node, msg)
6556 if ((node == instance.primary_node and not ignore_primary) or
6557 (node != instance.primary_node and not result.offline)):
6562 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6563 """Checks if a node has enough free memory.
6565 This function check if a given node has the needed amount of free
6566 memory. In case the node has less memory or we cannot get the
6567 information from the node, this function raise an OpPrereqError
6570 @type lu: C{LogicalUnit}
6571 @param lu: a logical unit from which we get configuration data
6573 @param node: the node to check
6574 @type reason: C{str}
6575 @param reason: string to use in the error message
6576 @type requested: C{int}
6577 @param requested: the amount of memory in MiB to check for
6578 @type hypervisor_name: C{str}
6579 @param hypervisor_name: the hypervisor to ask for memory stats
6581 @return: node current free memory
6582 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6583 we cannot check the node
6586 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6587 nodeinfo[node].Raise("Can't get data from node %s" % node,
6588 prereq=True, ecode=errors.ECODE_ENVIRON)
6589 (_, _, (hv_info, )) = nodeinfo[node].payload
6591 free_mem = hv_info.get("memory_free", None)
6592 if not isinstance(free_mem, int):
6593 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6594 " was '%s'" % (node, free_mem),
6595 errors.ECODE_ENVIRON)
6596 if requested > free_mem:
6597 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6598 " needed %s MiB, available %s MiB" %
6599 (node, reason, requested, free_mem),
6604 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6605 """Checks if nodes have enough free disk space in the all VGs.
6607 This function check if all given nodes have the needed amount of
6608 free disk. In case any node has less disk or we cannot get the
6609 information from the node, this function raise an OpPrereqError
6612 @type lu: C{LogicalUnit}
6613 @param lu: a logical unit from which we get configuration data
6614 @type nodenames: C{list}
6615 @param nodenames: the list of node names to check
6616 @type req_sizes: C{dict}
6617 @param req_sizes: the hash of vg and corresponding amount of disk in
6619 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6620 or we cannot check the node
6623 for vg, req_size in req_sizes.items():
6624 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6627 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6628 """Checks if nodes have enough free disk space in the specified VG.
6630 This function check if all given nodes have the needed amount of
6631 free disk. In case any node has less disk or we cannot get the
6632 information from the node, this function raise an OpPrereqError
6635 @type lu: C{LogicalUnit}
6636 @param lu: a logical unit from which we get configuration data
6637 @type nodenames: C{list}
6638 @param nodenames: the list of node names to check
6640 @param vg: the volume group to check
6641 @type requested: C{int}
6642 @param requested: the amount of disk in MiB to check for
6643 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6644 or we cannot check the node
6647 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6648 for node in nodenames:
6649 info = nodeinfo[node]
6650 info.Raise("Cannot get current information from node %s" % node,
6651 prereq=True, ecode=errors.ECODE_ENVIRON)
6652 (_, (vg_info, ), _) = info.payload
6653 vg_free = vg_info.get("vg_free", None)
6654 if not isinstance(vg_free, int):
6655 raise errors.OpPrereqError("Can't compute free disk space on node"
6656 " %s for vg %s, result was '%s'" %
6657 (node, vg, vg_free), errors.ECODE_ENVIRON)
6658 if requested > vg_free:
6659 raise errors.OpPrereqError("Not enough disk space on target node %s"
6660 " vg %s: required %d MiB, available %d MiB" %
6661 (node, vg, requested, vg_free),
6665 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6666 """Checks if nodes have enough physical CPUs
6668 This function checks if all given nodes have the needed number of
6669 physical CPUs. In case any node has less CPUs or we cannot get the
6670 information from the node, this function raises an OpPrereqError
6673 @type lu: C{LogicalUnit}
6674 @param lu: a logical unit from which we get configuration data
6675 @type nodenames: C{list}
6676 @param nodenames: the list of node names to check
6677 @type requested: C{int}
6678 @param requested: the minimum acceptable number of physical CPUs
6679 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6680 or we cannot check the node
6683 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6684 for node in nodenames:
6685 info = nodeinfo[node]
6686 info.Raise("Cannot get current information from node %s" % node,
6687 prereq=True, ecode=errors.ECODE_ENVIRON)
6688 (_, _, (hv_info, )) = info.payload
6689 num_cpus = hv_info.get("cpu_total", None)
6690 if not isinstance(num_cpus, int):
6691 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6692 " on node %s, result was '%s'" %
6693 (node, num_cpus), errors.ECODE_ENVIRON)
6694 if requested > num_cpus:
6695 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6696 "required" % (node, num_cpus, requested),
6700 class LUInstanceStartup(LogicalUnit):
6701 """Starts an instance.
6704 HPATH = "instance-start"
6705 HTYPE = constants.HTYPE_INSTANCE
6708 def CheckArguments(self):
6710 if self.op.beparams:
6711 # fill the beparams dict
6712 objects.UpgradeBeParams(self.op.beparams)
6713 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6715 def ExpandNames(self):
6716 self._ExpandAndLockInstance()
6717 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6719 def DeclareLocks(self, level):
6720 if level == locking.LEVEL_NODE_RES:
6721 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6723 def BuildHooksEnv(self):
6726 This runs on master, primary and secondary nodes of the instance.
6730 "FORCE": self.op.force,
6733 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6737 def BuildHooksNodes(self):
6738 """Build hooks nodes.
6741 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6744 def CheckPrereq(self):
6745 """Check prerequisites.
6747 This checks that the instance is in the cluster.
6750 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6751 assert self.instance is not None, \
6752 "Cannot retrieve locked instance %s" % self.op.instance_name
6755 if self.op.hvparams:
6756 # check hypervisor parameter syntax (locally)
6757 cluster = self.cfg.GetClusterInfo()
6758 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6759 filled_hvp = cluster.FillHV(instance)
6760 filled_hvp.update(self.op.hvparams)
6761 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6762 hv_type.CheckParameterSyntax(filled_hvp)
6763 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6765 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6767 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6769 if self.primary_offline and self.op.ignore_offline_nodes:
6770 self.proc.LogWarning("Ignoring offline primary node")
6772 if self.op.hvparams or self.op.beparams:
6773 self.proc.LogWarning("Overridden parameters are ignored")
6775 _CheckNodeOnline(self, instance.primary_node)
6777 bep = self.cfg.GetClusterInfo().FillBE(instance)
6778 bep.update(self.op.beparams)
6780 # check bridges existence
6781 _CheckInstanceBridgesExist(self, instance)
6783 remote_info = self.rpc.call_instance_info(instance.primary_node,
6785 instance.hypervisor)
6786 remote_info.Raise("Error checking node %s" % instance.primary_node,
6787 prereq=True, ecode=errors.ECODE_ENVIRON)
6788 if not remote_info.payload: # not running already
6789 _CheckNodeFreeMemory(self, instance.primary_node,
6790 "starting instance %s" % instance.name,
6791 bep[constants.BE_MINMEM], instance.hypervisor)
6793 def Exec(self, feedback_fn):
6794 """Start the instance.
6797 instance = self.instance
6798 force = self.op.force
6800 if not self.op.no_remember:
6801 self.cfg.MarkInstanceUp(instance.name)
6803 if self.primary_offline:
6804 assert self.op.ignore_offline_nodes
6805 self.proc.LogInfo("Primary node offline, marked instance as started")
6807 node_current = instance.primary_node
6809 _StartInstanceDisks(self, instance, force)
6812 self.rpc.call_instance_start(node_current,
6813 (instance, self.op.hvparams,
6815 self.op.startup_paused)
6816 msg = result.fail_msg
6818 _ShutdownInstanceDisks(self, instance)
6819 raise errors.OpExecError("Could not start instance: %s" % msg)
6822 class LUInstanceReboot(LogicalUnit):
6823 """Reboot an instance.
6826 HPATH = "instance-reboot"
6827 HTYPE = constants.HTYPE_INSTANCE
6830 def ExpandNames(self):
6831 self._ExpandAndLockInstance()
6833 def BuildHooksEnv(self):
6836 This runs on master, primary and secondary nodes of the instance.
6840 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6841 "REBOOT_TYPE": self.op.reboot_type,
6842 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6845 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6849 def BuildHooksNodes(self):
6850 """Build hooks nodes.
6853 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6856 def CheckPrereq(self):
6857 """Check prerequisites.
6859 This checks that the instance is in the cluster.
6862 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6863 assert self.instance is not None, \
6864 "Cannot retrieve locked instance %s" % self.op.instance_name
6865 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6866 _CheckNodeOnline(self, instance.primary_node)
6868 # check bridges existence
6869 _CheckInstanceBridgesExist(self, instance)
6871 def Exec(self, feedback_fn):
6872 """Reboot the instance.
6875 instance = self.instance
6876 ignore_secondaries = self.op.ignore_secondaries
6877 reboot_type = self.op.reboot_type
6879 remote_info = self.rpc.call_instance_info(instance.primary_node,
6881 instance.hypervisor)
6882 remote_info.Raise("Error checking node %s" % instance.primary_node)
6883 instance_running = bool(remote_info.payload)
6885 node_current = instance.primary_node
6887 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6888 constants.INSTANCE_REBOOT_HARD]:
6889 for disk in instance.disks:
6890 self.cfg.SetDiskID(disk, node_current)
6891 result = self.rpc.call_instance_reboot(node_current, instance,
6893 self.op.shutdown_timeout)
6894 result.Raise("Could not reboot instance")
6896 if instance_running:
6897 result = self.rpc.call_instance_shutdown(node_current, instance,
6898 self.op.shutdown_timeout)
6899 result.Raise("Could not shutdown instance for full reboot")
6900 _ShutdownInstanceDisks(self, instance)
6902 self.LogInfo("Instance %s was already stopped, starting now",
6904 _StartInstanceDisks(self, instance, ignore_secondaries)
6905 result = self.rpc.call_instance_start(node_current,
6906 (instance, None, None), False)
6907 msg = result.fail_msg
6909 _ShutdownInstanceDisks(self, instance)
6910 raise errors.OpExecError("Could not start instance for"
6911 " full reboot: %s" % msg)
6913 self.cfg.MarkInstanceUp(instance.name)
6916 class LUInstanceShutdown(LogicalUnit):
6917 """Shutdown an instance.
6920 HPATH = "instance-stop"
6921 HTYPE = constants.HTYPE_INSTANCE
6924 def ExpandNames(self):
6925 self._ExpandAndLockInstance()
6927 def BuildHooksEnv(self):
6930 This runs on master, primary and secondary nodes of the instance.
6933 env = _BuildInstanceHookEnvByObject(self, self.instance)
6934 env["TIMEOUT"] = self.op.timeout
6937 def BuildHooksNodes(self):
6938 """Build hooks nodes.
6941 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6944 def CheckPrereq(self):
6945 """Check prerequisites.
6947 This checks that the instance is in the cluster.
6950 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6951 assert self.instance is not None, \
6952 "Cannot retrieve locked instance %s" % self.op.instance_name
6954 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6956 self.primary_offline = \
6957 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6959 if self.primary_offline and self.op.ignore_offline_nodes:
6960 self.proc.LogWarning("Ignoring offline primary node")
6962 _CheckNodeOnline(self, self.instance.primary_node)
6964 def Exec(self, feedback_fn):
6965 """Shutdown the instance.
6968 instance = self.instance
6969 node_current = instance.primary_node
6970 timeout = self.op.timeout
6972 if not self.op.no_remember:
6973 self.cfg.MarkInstanceDown(instance.name)
6975 if self.primary_offline:
6976 assert self.op.ignore_offline_nodes
6977 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6979 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6980 msg = result.fail_msg
6982 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6984 _ShutdownInstanceDisks(self, instance)
6987 class LUInstanceReinstall(LogicalUnit):
6988 """Reinstall an instance.
6991 HPATH = "instance-reinstall"
6992 HTYPE = constants.HTYPE_INSTANCE
6995 def ExpandNames(self):
6996 self._ExpandAndLockInstance()
6998 def BuildHooksEnv(self):
7001 This runs on master, primary and secondary nodes of the instance.
7004 return _BuildInstanceHookEnvByObject(self, self.instance)
7006 def BuildHooksNodes(self):
7007 """Build hooks nodes.
7010 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7013 def CheckPrereq(self):
7014 """Check prerequisites.
7016 This checks that the instance is in the cluster and is not running.
7019 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7020 assert instance is not None, \
7021 "Cannot retrieve locked instance %s" % self.op.instance_name
7022 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7023 " offline, cannot reinstall")
7025 if instance.disk_template == constants.DT_DISKLESS:
7026 raise errors.OpPrereqError("Instance '%s' has no disks" %
7027 self.op.instance_name,
7029 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7031 if self.op.os_type is not None:
7033 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7034 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7035 instance_os = self.op.os_type
7037 instance_os = instance.os
7039 nodelist = list(instance.all_nodes)
7041 if self.op.osparams:
7042 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7043 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7044 self.os_inst = i_osdict # the new dict (without defaults)
7048 self.instance = instance
7050 def Exec(self, feedback_fn):
7051 """Reinstall the instance.
7054 inst = self.instance
7056 if self.op.os_type is not None:
7057 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7058 inst.os = self.op.os_type
7059 # Write to configuration
7060 self.cfg.Update(inst, feedback_fn)
7062 _StartInstanceDisks(self, inst, None)
7064 feedback_fn("Running the instance OS create scripts...")
7065 # FIXME: pass debug option from opcode to backend
7066 result = self.rpc.call_instance_os_add(inst.primary_node,
7067 (inst, self.os_inst), True,
7068 self.op.debug_level)
7069 result.Raise("Could not install OS for instance %s on node %s" %
7070 (inst.name, inst.primary_node))
7072 _ShutdownInstanceDisks(self, inst)
7075 class LUInstanceRecreateDisks(LogicalUnit):
7076 """Recreate an instance's missing disks.
7079 HPATH = "instance-recreate-disks"
7080 HTYPE = constants.HTYPE_INSTANCE
7083 _MODIFYABLE = frozenset([
7084 constants.IDISK_SIZE,
7085 constants.IDISK_MODE,
7088 # New or changed disk parameters may have different semantics
7089 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7090 constants.IDISK_ADOPT,
7092 # TODO: Implement support changing VG while recreating
7094 constants.IDISK_METAVG,
7097 def _RunAllocator(self):
7098 """Run the allocator based on input opcode.
7101 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7104 # The allocator should actually run in "relocate" mode, but current
7105 # allocators don't support relocating all the nodes of an instance at
7106 # the same time. As a workaround we use "allocate" mode, but this is
7107 # suboptimal for two reasons:
7108 # - The instance name passed to the allocator is present in the list of
7109 # existing instances, so there could be a conflict within the
7110 # internal structures of the allocator. This doesn't happen with the
7111 # current allocators, but it's a liability.
7112 # - The allocator counts the resources used by the instance twice: once
7113 # because the instance exists already, and once because it tries to
7114 # allocate a new instance.
7115 # The allocator could choose some of the nodes on which the instance is
7116 # running, but that's not a problem. If the instance nodes are broken,
7117 # they should be already be marked as drained or offline, and hence
7118 # skipped by the allocator. If instance disks have been lost for other
7119 # reasons, then recreating the disks on the same nodes should be fine.
7120 ial = IAllocator(self.cfg, self.rpc,
7121 mode=constants.IALLOCATOR_MODE_ALLOC,
7122 name=self.op.instance_name,
7123 disk_template=self.instance.disk_template,
7124 tags=list(self.instance.GetTags()),
7125 os=self.instance.os,
7127 vcpus=be_full[constants.BE_VCPUS],
7128 memory=be_full[constants.BE_MAXMEM],
7129 spindle_use=be_full[constants.BE_SPINDLE_USE],
7130 disks=[{constants.IDISK_SIZE: d.size,
7131 constants.IDISK_MODE: d.mode}
7132 for d in self.instance.disks],
7133 hypervisor=self.instance.hypervisor)
7135 assert ial.required_nodes == len(self.instance.all_nodes)
7137 ial.Run(self.op.iallocator)
7140 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7141 " %s" % (self.op.iallocator, ial.info),
7144 if len(ial.result) != ial.required_nodes:
7145 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7146 " of nodes (%s), required %s" %
7147 (self.op.iallocator, len(ial.result),
7148 ial.required_nodes), errors.ECODE_FAULT)
7150 self.op.nodes = ial.result
7151 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7152 self.op.instance_name, self.op.iallocator,
7153 utils.CommaJoin(ial.result))
7155 def CheckArguments(self):
7156 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7157 # Normalize and convert deprecated list of disk indices
7158 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7160 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7162 raise errors.OpPrereqError("Some disks have been specified more than"
7163 " once: %s" % utils.CommaJoin(duplicates),
7166 if self.op.iallocator and self.op.nodes:
7167 raise errors.OpPrereqError("Give either the iallocator or the new"
7168 " nodes, not both", errors.ECODE_INVAL)
7170 for (idx, params) in self.op.disks:
7171 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7172 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7174 raise errors.OpPrereqError("Parameters for disk %s try to change"
7175 " unmodifyable parameter(s): %s" %
7176 (idx, utils.CommaJoin(unsupported)),
7179 def ExpandNames(self):
7180 self._ExpandAndLockInstance()
7181 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7183 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7184 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7186 self.needed_locks[locking.LEVEL_NODE] = []
7187 if self.op.iallocator:
7188 # iallocator will select a new node in the same group
7189 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7190 self.needed_locks[locking.LEVEL_NODE_RES] = []
7192 def DeclareLocks(self, level):
7193 if level == locking.LEVEL_NODEGROUP:
7194 assert self.op.iallocator is not None
7195 assert not self.op.nodes
7196 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7197 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7198 # Lock the primary group used by the instance optimistically; this
7199 # requires going via the node before it's locked, requiring
7200 # verification later on
7201 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7202 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7204 elif level == locking.LEVEL_NODE:
7205 # If an allocator is used, then we lock all the nodes in the current
7206 # instance group, as we don't know yet which ones will be selected;
7207 # if we replace the nodes without using an allocator, we only need to
7208 # lock the old primary for doing RPCs (FIXME: we don't lock nodes for
7209 # RPC anymore), otherwise we need to lock all the instance nodes for
7211 if self.op.iallocator:
7212 assert not self.op.nodes
7213 assert not self.needed_locks[locking.LEVEL_NODE]
7214 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7216 # Lock member nodes of the group of the primary node
7217 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7218 self.needed_locks[locking.LEVEL_NODE].extend(
7219 self.cfg.GetNodeGroup(group_uuid).members)
7221 primary_only = bool(self.op.nodes)
7222 self._LockInstancesNodes(primary_only=primary_only)
7223 elif level == locking.LEVEL_NODE_RES:
7225 self.needed_locks[locking.LEVEL_NODE_RES] = \
7226 self.needed_locks[locking.LEVEL_NODE][:]
7228 def BuildHooksEnv(self):
7231 This runs on master, primary and secondary nodes of the instance.
7234 return _BuildInstanceHookEnvByObject(self, self.instance)
7236 def BuildHooksNodes(self):
7237 """Build hooks nodes.
7240 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7243 def CheckPrereq(self):
7244 """Check prerequisites.
7246 This checks that the instance is in the cluster and is not running.
7249 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7250 assert instance is not None, \
7251 "Cannot retrieve locked instance %s" % self.op.instance_name
7253 if len(self.op.nodes) != len(instance.all_nodes):
7254 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7255 " %d replacement nodes were specified" %
7256 (instance.name, len(instance.all_nodes),
7257 len(self.op.nodes)),
7259 assert instance.disk_template != constants.DT_DRBD8 or \
7260 len(self.op.nodes) == 2
7261 assert instance.disk_template != constants.DT_PLAIN or \
7262 len(self.op.nodes) == 1
7263 primary_node = self.op.nodes[0]
7265 primary_node = instance.primary_node
7266 if not self.op.iallocator:
7267 _CheckNodeOnline(self, primary_node)
7269 if instance.disk_template == constants.DT_DISKLESS:
7270 raise errors.OpPrereqError("Instance '%s' has no disks" %
7271 self.op.instance_name, errors.ECODE_INVAL)
7273 # Verify if node group locks are still correct
7274 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7276 # Node group locks are acquired only for the primary node (and only
7277 # when the allocator is used)
7278 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7281 # if we replace nodes *and* the old primary is offline, we don't
7283 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7284 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7285 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7286 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7287 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7288 msg="cannot recreate disks")
7291 self.disks = dict(self.op.disks)
7293 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7295 maxidx = max(self.disks.keys())
7296 if maxidx >= len(instance.disks):
7297 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7300 if ((self.op.nodes or self.op.iallocator) and
7301 sorted(self.disks.keys()) != range(len(instance.disks))):
7302 raise errors.OpPrereqError("Can't recreate disks partially and"
7303 " change the nodes at the same time",
7306 self.instance = instance
7308 if self.op.iallocator:
7309 self._RunAllocator()
7311 # Release unneeded node and node resource locks
7312 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7313 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7315 def Exec(self, feedback_fn):
7316 """Recreate the disks.
7319 instance = self.instance
7321 assert (self.owned_locks(locking.LEVEL_NODE) ==
7322 self.owned_locks(locking.LEVEL_NODE_RES))
7325 mods = [] # keeps track of needed changes
7327 for idx, disk in enumerate(instance.disks):
7329 changes = self.disks[idx]
7331 # Disk should not be recreated
7335 # update secondaries for disks, if needed
7336 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7337 # need to update the nodes and minors
7338 assert len(self.op.nodes) == 2
7339 assert len(disk.logical_id) == 6 # otherwise disk internals
7341 (_, _, old_port, _, _, old_secret) = disk.logical_id
7342 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7343 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7344 new_minors[0], new_minors[1], old_secret)
7345 assert len(disk.logical_id) == len(new_id)
7349 mods.append((idx, new_id, changes))
7351 # now that we have passed all asserts above, we can apply the mods
7352 # in a single run (to avoid partial changes)
7353 for idx, new_id, changes in mods:
7354 disk = instance.disks[idx]
7355 if new_id is not None:
7356 assert disk.dev_type == constants.LD_DRBD8
7357 disk.logical_id = new_id
7359 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7360 mode=changes.get(constants.IDISK_MODE, None))
7362 # change primary node, if needed
7364 instance.primary_node = self.op.nodes[0]
7365 self.LogWarning("Changing the instance's nodes, you will have to"
7366 " remove any disks left on the older nodes manually")
7369 self.cfg.Update(instance, feedback_fn)
7371 _CreateDisks(self, instance, to_skip=to_skip)
7374 class LUInstanceRename(LogicalUnit):
7375 """Rename an instance.
7378 HPATH = "instance-rename"
7379 HTYPE = constants.HTYPE_INSTANCE
7381 def CheckArguments(self):
7385 if self.op.ip_check and not self.op.name_check:
7386 # TODO: make the ip check more flexible and not depend on the name check
7387 raise errors.OpPrereqError("IP address check requires a name check",
7390 def BuildHooksEnv(self):
7393 This runs on master, primary and secondary nodes of the instance.
7396 env = _BuildInstanceHookEnvByObject(self, self.instance)
7397 env["INSTANCE_NEW_NAME"] = self.op.new_name
7400 def BuildHooksNodes(self):
7401 """Build hooks nodes.
7404 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7407 def CheckPrereq(self):
7408 """Check prerequisites.
7410 This checks that the instance is in the cluster and is not running.
7413 self.op.instance_name = _ExpandInstanceName(self.cfg,
7414 self.op.instance_name)
7415 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7416 assert instance is not None
7417 _CheckNodeOnline(self, instance.primary_node)
7418 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7419 msg="cannot rename")
7420 self.instance = instance
7422 new_name = self.op.new_name
7423 if self.op.name_check:
7424 hostname = netutils.GetHostname(name=new_name)
7425 if hostname.name != new_name:
7426 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7428 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7429 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7430 " same as given hostname '%s'") %
7431 (hostname.name, self.op.new_name),
7433 new_name = self.op.new_name = hostname.name
7434 if (self.op.ip_check and
7435 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7436 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7437 (hostname.ip, new_name),
7438 errors.ECODE_NOTUNIQUE)
7440 instance_list = self.cfg.GetInstanceList()
7441 if new_name in instance_list and new_name != instance.name:
7442 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7443 new_name, errors.ECODE_EXISTS)
7445 def Exec(self, feedback_fn):
7446 """Rename the instance.
7449 inst = self.instance
7450 old_name = inst.name
7452 rename_file_storage = False
7453 if (inst.disk_template in constants.DTS_FILEBASED and
7454 self.op.new_name != inst.name):
7455 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7456 rename_file_storage = True
7458 self.cfg.RenameInstance(inst.name, self.op.new_name)
7459 # Change the instance lock. This is definitely safe while we hold the BGL.
7460 # Otherwise the new lock would have to be added in acquired mode.
7462 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7463 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7465 # re-read the instance from the configuration after rename
7466 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7468 if rename_file_storage:
7469 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7470 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7471 old_file_storage_dir,
7472 new_file_storage_dir)
7473 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7474 " (but the instance has been renamed in Ganeti)" %
7475 (inst.primary_node, old_file_storage_dir,
7476 new_file_storage_dir))
7478 _StartInstanceDisks(self, inst, None)
7480 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7481 old_name, self.op.debug_level)
7482 msg = result.fail_msg
7484 msg = ("Could not run OS rename script for instance %s on node %s"
7485 " (but the instance has been renamed in Ganeti): %s" %
7486 (inst.name, inst.primary_node, msg))
7487 self.proc.LogWarning(msg)
7489 _ShutdownInstanceDisks(self, inst)
7494 class LUInstanceRemove(LogicalUnit):
7495 """Remove an instance.
7498 HPATH = "instance-remove"
7499 HTYPE = constants.HTYPE_INSTANCE
7502 def ExpandNames(self):
7503 self._ExpandAndLockInstance()
7504 self.needed_locks[locking.LEVEL_NODE] = []
7505 self.needed_locks[locking.LEVEL_NODE_RES] = []
7506 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7508 def DeclareLocks(self, level):
7509 if level == locking.LEVEL_NODE:
7510 self._LockInstancesNodes()
7511 elif level == locking.LEVEL_NODE_RES:
7513 self.needed_locks[locking.LEVEL_NODE_RES] = \
7514 self.needed_locks[locking.LEVEL_NODE][:]
7516 def BuildHooksEnv(self):
7519 This runs on master, primary and secondary nodes of the instance.
7522 env = _BuildInstanceHookEnvByObject(self, self.instance)
7523 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7526 def BuildHooksNodes(self):
7527 """Build hooks nodes.
7530 nl = [self.cfg.GetMasterNode()]
7531 nl_post = list(self.instance.all_nodes) + nl
7532 return (nl, nl_post)
7534 def CheckPrereq(self):
7535 """Check prerequisites.
7537 This checks that the instance is in the cluster.
7540 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7541 assert self.instance is not None, \
7542 "Cannot retrieve locked instance %s" % self.op.instance_name
7544 def Exec(self, feedback_fn):
7545 """Remove the instance.
7548 instance = self.instance
7549 logging.info("Shutting down instance %s on node %s",
7550 instance.name, instance.primary_node)
7552 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7553 self.op.shutdown_timeout)
7554 msg = result.fail_msg
7556 if self.op.ignore_failures:
7557 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7559 raise errors.OpExecError("Could not shutdown instance %s on"
7561 (instance.name, instance.primary_node, msg))
7563 assert (self.owned_locks(locking.LEVEL_NODE) ==
7564 self.owned_locks(locking.LEVEL_NODE_RES))
7565 assert not (set(instance.all_nodes) -
7566 self.owned_locks(locking.LEVEL_NODE)), \
7567 "Not owning correct locks"
7569 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7572 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7573 """Utility function to remove an instance.
7576 logging.info("Removing block devices for instance %s", instance.name)
7578 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7579 if not ignore_failures:
7580 raise errors.OpExecError("Can't remove instance's disks")
7581 feedback_fn("Warning: can't remove instance's disks")
7583 logging.info("Removing instance %s out of cluster config", instance.name)
7585 lu.cfg.RemoveInstance(instance.name)
7587 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7588 "Instance lock removal conflict"
7590 # Remove lock for the instance
7591 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7594 class LUInstanceQuery(NoHooksLU):
7595 """Logical unit for querying instances.
7598 # pylint: disable=W0142
7601 def CheckArguments(self):
7602 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7603 self.op.output_fields, self.op.use_locking)
7605 def ExpandNames(self):
7606 self.iq.ExpandNames(self)
7608 def DeclareLocks(self, level):
7609 self.iq.DeclareLocks(self, level)
7611 def Exec(self, feedback_fn):
7612 return self.iq.OldStyleQuery(self)
7615 class LUInstanceFailover(LogicalUnit):
7616 """Failover an instance.
7619 HPATH = "instance-failover"
7620 HTYPE = constants.HTYPE_INSTANCE
7623 def CheckArguments(self):
7624 """Check the arguments.
7627 self.iallocator = getattr(self.op, "iallocator", None)
7628 self.target_node = getattr(self.op, "target_node", None)
7630 def ExpandNames(self):
7631 self._ExpandAndLockInstance()
7633 if self.op.target_node is not None:
7634 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7636 self.needed_locks[locking.LEVEL_NODE] = []
7637 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7639 self.needed_locks[locking.LEVEL_NODE_RES] = []
7640 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7642 ignore_consistency = self.op.ignore_consistency
7643 shutdown_timeout = self.op.shutdown_timeout
7644 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7647 ignore_consistency=ignore_consistency,
7648 shutdown_timeout=shutdown_timeout,
7649 ignore_ipolicy=self.op.ignore_ipolicy)
7650 self.tasklets = [self._migrater]
7652 def DeclareLocks(self, level):
7653 if level == locking.LEVEL_NODE:
7654 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7655 if instance.disk_template in constants.DTS_EXT_MIRROR:
7656 if self.op.target_node is None:
7657 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7659 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7660 self.op.target_node]
7661 del self.recalculate_locks[locking.LEVEL_NODE]
7663 self._LockInstancesNodes()
7664 elif level == locking.LEVEL_NODE_RES:
7666 self.needed_locks[locking.LEVEL_NODE_RES] = \
7667 self.needed_locks[locking.LEVEL_NODE][:]
7669 def BuildHooksEnv(self):
7672 This runs on master, primary and secondary nodes of the instance.
7675 instance = self._migrater.instance
7676 source_node = instance.primary_node
7677 target_node = self.op.target_node
7679 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7680 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7681 "OLD_PRIMARY": source_node,
7682 "NEW_PRIMARY": target_node,
7685 if instance.disk_template in constants.DTS_INT_MIRROR:
7686 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7687 env["NEW_SECONDARY"] = source_node
7689 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7691 env.update(_BuildInstanceHookEnvByObject(self, instance))
7695 def BuildHooksNodes(self):
7696 """Build hooks nodes.
7699 instance = self._migrater.instance
7700 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7701 return (nl, nl + [instance.primary_node])
7704 class LUInstanceMigrate(LogicalUnit):
7705 """Migrate an instance.
7707 This is migration without shutting down, compared to the failover,
7708 which is done with shutdown.
7711 HPATH = "instance-migrate"
7712 HTYPE = constants.HTYPE_INSTANCE
7715 def ExpandNames(self):
7716 self._ExpandAndLockInstance()
7718 if self.op.target_node is not None:
7719 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7721 self.needed_locks[locking.LEVEL_NODE] = []
7722 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7724 self.needed_locks[locking.LEVEL_NODE] = []
7725 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7728 TLMigrateInstance(self, self.op.instance_name,
7729 cleanup=self.op.cleanup,
7731 fallback=self.op.allow_failover,
7732 allow_runtime_changes=self.op.allow_runtime_changes,
7733 ignore_ipolicy=self.op.ignore_ipolicy)
7734 self.tasklets = [self._migrater]
7736 def DeclareLocks(self, level):
7737 if level == locking.LEVEL_NODE:
7738 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7739 if instance.disk_template in constants.DTS_EXT_MIRROR:
7740 if self.op.target_node is None:
7741 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7743 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7744 self.op.target_node]
7745 del self.recalculate_locks[locking.LEVEL_NODE]
7747 self._LockInstancesNodes()
7748 elif level == locking.LEVEL_NODE_RES:
7750 self.needed_locks[locking.LEVEL_NODE_RES] = \
7751 self.needed_locks[locking.LEVEL_NODE][:]
7753 def BuildHooksEnv(self):
7756 This runs on master, primary and secondary nodes of the instance.
7759 instance = self._migrater.instance
7760 source_node = instance.primary_node
7761 target_node = self.op.target_node
7762 env = _BuildInstanceHookEnvByObject(self, instance)
7764 "MIGRATE_LIVE": self._migrater.live,
7765 "MIGRATE_CLEANUP": self.op.cleanup,
7766 "OLD_PRIMARY": source_node,
7767 "NEW_PRIMARY": target_node,
7768 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7771 if instance.disk_template in constants.DTS_INT_MIRROR:
7772 env["OLD_SECONDARY"] = target_node
7773 env["NEW_SECONDARY"] = source_node
7775 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7779 def BuildHooksNodes(self):
7780 """Build hooks nodes.
7783 instance = self._migrater.instance
7784 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7785 return (nl, nl + [instance.primary_node])
7788 class LUInstanceMove(LogicalUnit):
7789 """Move an instance by data-copying.
7792 HPATH = "instance-move"
7793 HTYPE = constants.HTYPE_INSTANCE
7796 def ExpandNames(self):
7797 self._ExpandAndLockInstance()
7798 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7799 self.op.target_node = target_node
7800 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7801 self.needed_locks[locking.LEVEL_NODE_RES] = []
7802 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7804 def DeclareLocks(self, level):
7805 if level == locking.LEVEL_NODE:
7806 self._LockInstancesNodes(primary_only=True)
7807 elif level == locking.LEVEL_NODE_RES:
7809 self.needed_locks[locking.LEVEL_NODE_RES] = \
7810 self.needed_locks[locking.LEVEL_NODE][:]
7812 def BuildHooksEnv(self):
7815 This runs on master, primary and secondary nodes of the instance.
7819 "TARGET_NODE": self.op.target_node,
7820 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7822 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7825 def BuildHooksNodes(self):
7826 """Build hooks nodes.
7830 self.cfg.GetMasterNode(),
7831 self.instance.primary_node,
7832 self.op.target_node,
7836 def CheckPrereq(self):
7837 """Check prerequisites.
7839 This checks that the instance is in the cluster.
7842 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7843 assert self.instance is not None, \
7844 "Cannot retrieve locked instance %s" % self.op.instance_name
7846 node = self.cfg.GetNodeInfo(self.op.target_node)
7847 assert node is not None, \
7848 "Cannot retrieve locked node %s" % self.op.target_node
7850 self.target_node = target_node = node.name
7852 if target_node == instance.primary_node:
7853 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7854 (instance.name, target_node),
7857 bep = self.cfg.GetClusterInfo().FillBE(instance)
7859 for idx, dsk in enumerate(instance.disks):
7860 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7861 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7862 " cannot copy" % idx, errors.ECODE_STATE)
7864 _CheckNodeOnline(self, target_node)
7865 _CheckNodeNotDrained(self, target_node)
7866 _CheckNodeVmCapable(self, target_node)
7867 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7868 self.cfg.GetNodeGroup(node.group))
7869 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7870 ignore=self.op.ignore_ipolicy)
7872 if instance.admin_state == constants.ADMINST_UP:
7873 # check memory requirements on the secondary node
7874 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7875 instance.name, bep[constants.BE_MAXMEM],
7876 instance.hypervisor)
7878 self.LogInfo("Not checking memory on the secondary node as"
7879 " instance will not be started")
7881 # check bridge existance
7882 _CheckInstanceBridgesExist(self, instance, node=target_node)
7884 def Exec(self, feedback_fn):
7885 """Move an instance.
7887 The move is done by shutting it down on its present node, copying
7888 the data over (slow) and starting it on the new node.
7891 instance = self.instance
7893 source_node = instance.primary_node
7894 target_node = self.target_node
7896 self.LogInfo("Shutting down instance %s on source node %s",
7897 instance.name, source_node)
7899 assert (self.owned_locks(locking.LEVEL_NODE) ==
7900 self.owned_locks(locking.LEVEL_NODE_RES))
7902 result = self.rpc.call_instance_shutdown(source_node, instance,
7903 self.op.shutdown_timeout)
7904 msg = result.fail_msg
7906 if self.op.ignore_consistency:
7907 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7908 " Proceeding anyway. Please make sure node"
7909 " %s is down. Error details: %s",
7910 instance.name, source_node, source_node, msg)
7912 raise errors.OpExecError("Could not shutdown instance %s on"
7914 (instance.name, source_node, msg))
7916 # create the target disks
7918 _CreateDisks(self, instance, target_node=target_node)
7919 except errors.OpExecError:
7920 self.LogWarning("Device creation failed, reverting...")
7922 _RemoveDisks(self, instance, target_node=target_node)
7924 self.cfg.ReleaseDRBDMinors(instance.name)
7927 cluster_name = self.cfg.GetClusterInfo().cluster_name
7930 # activate, get path, copy the data over
7931 for idx, disk in enumerate(instance.disks):
7932 self.LogInfo("Copying data for disk %d", idx)
7933 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7934 instance.name, True, idx)
7936 self.LogWarning("Can't assemble newly created disk %d: %s",
7937 idx, result.fail_msg)
7938 errs.append(result.fail_msg)
7940 dev_path = result.payload
7941 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7942 target_node, dev_path,
7945 self.LogWarning("Can't copy data over for disk %d: %s",
7946 idx, result.fail_msg)
7947 errs.append(result.fail_msg)
7951 self.LogWarning("Some disks failed to copy, aborting")
7953 _RemoveDisks(self, instance, target_node=target_node)
7955 self.cfg.ReleaseDRBDMinors(instance.name)
7956 raise errors.OpExecError("Errors during disk copy: %s" %
7959 instance.primary_node = target_node
7960 self.cfg.Update(instance, feedback_fn)
7962 self.LogInfo("Removing the disks on the original node")
7963 _RemoveDisks(self, instance, target_node=source_node)
7965 # Only start the instance if it's marked as up
7966 if instance.admin_state == constants.ADMINST_UP:
7967 self.LogInfo("Starting instance %s on node %s",
7968 instance.name, target_node)
7970 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7971 ignore_secondaries=True)
7973 _ShutdownInstanceDisks(self, instance)
7974 raise errors.OpExecError("Can't activate the instance's disks")
7976 result = self.rpc.call_instance_start(target_node,
7977 (instance, None, None), False)
7978 msg = result.fail_msg
7980 _ShutdownInstanceDisks(self, instance)
7981 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7982 (instance.name, target_node, msg))
7985 class LUNodeMigrate(LogicalUnit):
7986 """Migrate all instances from a node.
7989 HPATH = "node-migrate"
7990 HTYPE = constants.HTYPE_NODE
7993 def CheckArguments(self):
7996 def ExpandNames(self):
7997 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7999 self.share_locks = _ShareAll()
8000 self.needed_locks = {
8001 locking.LEVEL_NODE: [self.op.node_name],
8004 def BuildHooksEnv(self):
8007 This runs on the master, the primary and all the secondaries.
8011 "NODE_NAME": self.op.node_name,
8012 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8015 def BuildHooksNodes(self):
8016 """Build hooks nodes.
8019 nl = [self.cfg.GetMasterNode()]
8022 def CheckPrereq(self):
8025 def Exec(self, feedback_fn):
8026 # Prepare jobs for migration instances
8027 allow_runtime_changes = self.op.allow_runtime_changes
8029 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8032 iallocator=self.op.iallocator,
8033 target_node=self.op.target_node,
8034 allow_runtime_changes=allow_runtime_changes,
8035 ignore_ipolicy=self.op.ignore_ipolicy)]
8036 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8039 # TODO: Run iallocator in this opcode and pass correct placement options to
8040 # OpInstanceMigrate. Since other jobs can modify the cluster between
8041 # running the iallocator and the actual migration, a good consistency model
8042 # will have to be found.
8044 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8045 frozenset([self.op.node_name]))
8047 return ResultWithJobs(jobs)
8050 class TLMigrateInstance(Tasklet):
8051 """Tasklet class for instance migration.
8054 @ivar live: whether the migration will be done live or non-live;
8055 this variable is initalized only after CheckPrereq has run
8056 @type cleanup: boolean
8057 @ivar cleanup: Wheater we cleanup from a failed migration
8058 @type iallocator: string
8059 @ivar iallocator: The iallocator used to determine target_node
8060 @type target_node: string
8061 @ivar target_node: If given, the target_node to reallocate the instance to
8062 @type failover: boolean
8063 @ivar failover: Whether operation results in failover or migration
8064 @type fallback: boolean
8065 @ivar fallback: Whether fallback to failover is allowed if migration not
8067 @type ignore_consistency: boolean
8068 @ivar ignore_consistency: Wheter we should ignore consistency between source
8070 @type shutdown_timeout: int
8071 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8072 @type ignore_ipolicy: bool
8073 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8078 _MIGRATION_POLL_INTERVAL = 1 # seconds
8079 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8081 def __init__(self, lu, instance_name, cleanup=False,
8082 failover=False, fallback=False,
8083 ignore_consistency=False,
8084 allow_runtime_changes=True,
8085 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8086 ignore_ipolicy=False):
8087 """Initializes this class.
8090 Tasklet.__init__(self, lu)
8093 self.instance_name = instance_name
8094 self.cleanup = cleanup
8095 self.live = False # will be overridden later
8096 self.failover = failover
8097 self.fallback = fallback
8098 self.ignore_consistency = ignore_consistency
8099 self.shutdown_timeout = shutdown_timeout
8100 self.ignore_ipolicy = ignore_ipolicy
8101 self.allow_runtime_changes = allow_runtime_changes
8103 def CheckPrereq(self):
8104 """Check prerequisites.
8106 This checks that the instance is in the cluster.
8109 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8110 instance = self.cfg.GetInstanceInfo(instance_name)
8111 assert instance is not None
8112 self.instance = instance
8113 cluster = self.cfg.GetClusterInfo()
8115 if (not self.cleanup and
8116 not instance.admin_state == constants.ADMINST_UP and
8117 not self.failover and self.fallback):
8118 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8119 " switching to failover")
8120 self.failover = True
8122 if instance.disk_template not in constants.DTS_MIRRORED:
8127 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8128 " %s" % (instance.disk_template, text),
8131 if instance.disk_template in constants.DTS_EXT_MIRROR:
8132 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8134 if self.lu.op.iallocator:
8135 self._RunAllocator()
8137 # We set set self.target_node as it is required by
8139 self.target_node = self.lu.op.target_node
8141 # Check that the target node is correct in terms of instance policy
8142 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8143 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8144 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8145 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8146 ignore=self.ignore_ipolicy)
8148 # self.target_node is already populated, either directly or by the
8150 target_node = self.target_node
8151 if self.target_node == instance.primary_node:
8152 raise errors.OpPrereqError("Cannot migrate instance %s"
8153 " to its primary (%s)" %
8154 (instance.name, instance.primary_node),
8157 if len(self.lu.tasklets) == 1:
8158 # It is safe to release locks only when we're the only tasklet
8160 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8161 keep=[instance.primary_node, self.target_node])
8164 secondary_nodes = instance.secondary_nodes
8165 if not secondary_nodes:
8166 raise errors.ConfigurationError("No secondary node but using"
8167 " %s disk template" %
8168 instance.disk_template)
8169 target_node = secondary_nodes[0]
8170 if self.lu.op.iallocator or (self.lu.op.target_node and
8171 self.lu.op.target_node != target_node):
8173 text = "failed over"
8176 raise errors.OpPrereqError("Instances with disk template %s cannot"
8177 " be %s to arbitrary nodes"
8178 " (neither an iallocator nor a target"
8179 " node can be passed)" %
8180 (instance.disk_template, text),
8182 nodeinfo = self.cfg.GetNodeInfo(target_node)
8183 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8184 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8185 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8186 ignore=self.ignore_ipolicy)
8188 i_be = cluster.FillBE(instance)
8190 # check memory requirements on the secondary node
8191 if (not self.cleanup and
8192 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8193 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8194 "migrating instance %s" %
8196 i_be[constants.BE_MINMEM],
8197 instance.hypervisor)
8199 self.lu.LogInfo("Not checking memory on the secondary node as"
8200 " instance will not be started")
8202 # check if failover must be forced instead of migration
8203 if (not self.cleanup and not self.failover and
8204 i_be[constants.BE_ALWAYS_FAILOVER]):
8206 self.lu.LogInfo("Instance configured to always failover; fallback"
8208 self.failover = True
8210 raise errors.OpPrereqError("This instance has been configured to"
8211 " always failover, please allow failover",
8214 # check bridge existance
8215 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8217 if not self.cleanup:
8218 _CheckNodeNotDrained(self.lu, target_node)
8219 if not self.failover:
8220 result = self.rpc.call_instance_migratable(instance.primary_node,
8222 if result.fail_msg and self.fallback:
8223 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8225 self.failover = True
8227 result.Raise("Can't migrate, please use failover",
8228 prereq=True, ecode=errors.ECODE_STATE)
8230 assert not (self.failover and self.cleanup)
8232 if not self.failover:
8233 if self.lu.op.live is not None and self.lu.op.mode is not None:
8234 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8235 " parameters are accepted",
8237 if self.lu.op.live is not None:
8239 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8241 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8242 # reset the 'live' parameter to None so that repeated
8243 # invocations of CheckPrereq do not raise an exception
8244 self.lu.op.live = None
8245 elif self.lu.op.mode is None:
8246 # read the default value from the hypervisor
8247 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8248 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8250 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8252 # Failover is never live
8255 if not (self.failover or self.cleanup):
8256 remote_info = self.rpc.call_instance_info(instance.primary_node,
8258 instance.hypervisor)
8259 remote_info.Raise("Error checking instance on node %s" %
8260 instance.primary_node)
8261 instance_running = bool(remote_info.payload)
8262 if instance_running:
8263 self.current_mem = int(remote_info.payload["memory"])
8265 def _RunAllocator(self):
8266 """Run the allocator based on input opcode.
8269 # FIXME: add a self.ignore_ipolicy option
8270 ial = IAllocator(self.cfg, self.rpc,
8271 mode=constants.IALLOCATOR_MODE_RELOC,
8272 name=self.instance_name,
8273 relocate_from=[self.instance.primary_node],
8276 ial.Run(self.lu.op.iallocator)
8279 raise errors.OpPrereqError("Can't compute nodes using"
8280 " iallocator '%s': %s" %
8281 (self.lu.op.iallocator, ial.info),
8283 if len(ial.result) != ial.required_nodes:
8284 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8285 " of nodes (%s), required %s" %
8286 (self.lu.op.iallocator, len(ial.result),
8287 ial.required_nodes), errors.ECODE_FAULT)
8288 self.target_node = ial.result[0]
8289 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8290 self.instance_name, self.lu.op.iallocator,
8291 utils.CommaJoin(ial.result))
8293 def _WaitUntilSync(self):
8294 """Poll with custom rpc for disk sync.
8296 This uses our own step-based rpc call.
8299 self.feedback_fn("* wait until resync is done")
8303 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8305 (self.instance.disks,
8308 for node, nres in result.items():
8309 nres.Raise("Cannot resync disks on node %s" % node)
8310 node_done, node_percent = nres.payload
8311 all_done = all_done and node_done
8312 if node_percent is not None:
8313 min_percent = min(min_percent, node_percent)
8315 if min_percent < 100:
8316 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8319 def _EnsureSecondary(self, node):
8320 """Demote a node to secondary.
8323 self.feedback_fn("* switching node %s to secondary mode" % node)
8325 for dev in self.instance.disks:
8326 self.cfg.SetDiskID(dev, node)
8328 result = self.rpc.call_blockdev_close(node, self.instance.name,
8329 self.instance.disks)
8330 result.Raise("Cannot change disk to secondary on node %s" % node)
8332 def _GoStandalone(self):
8333 """Disconnect from the network.
8336 self.feedback_fn("* changing into standalone mode")
8337 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8338 self.instance.disks)
8339 for node, nres in result.items():
8340 nres.Raise("Cannot disconnect disks node %s" % node)
8342 def _GoReconnect(self, multimaster):
8343 """Reconnect to the network.
8349 msg = "single-master"
8350 self.feedback_fn("* changing disks into %s mode" % msg)
8351 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8352 (self.instance.disks, self.instance),
8353 self.instance.name, multimaster)
8354 for node, nres in result.items():
8355 nres.Raise("Cannot change disks config on node %s" % node)
8357 def _ExecCleanup(self):
8358 """Try to cleanup after a failed migration.
8360 The cleanup is done by:
8361 - check that the instance is running only on one node
8362 (and update the config if needed)
8363 - change disks on its secondary node to secondary
8364 - wait until disks are fully synchronized
8365 - disconnect from the network
8366 - change disks into single-master mode
8367 - wait again until disks are fully synchronized
8370 instance = self.instance
8371 target_node = self.target_node
8372 source_node = self.source_node
8374 # check running on only one node
8375 self.feedback_fn("* checking where the instance actually runs"
8376 " (if this hangs, the hypervisor might be in"
8378 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8379 for node, result in ins_l.items():
8380 result.Raise("Can't contact node %s" % node)
8382 runningon_source = instance.name in ins_l[source_node].payload
8383 runningon_target = instance.name in ins_l[target_node].payload
8385 if runningon_source and runningon_target:
8386 raise errors.OpExecError("Instance seems to be running on two nodes,"
8387 " or the hypervisor is confused; you will have"
8388 " to ensure manually that it runs only on one"
8389 " and restart this operation")
8391 if not (runningon_source or runningon_target):
8392 raise errors.OpExecError("Instance does not seem to be running at all;"
8393 " in this case it's safer to repair by"
8394 " running 'gnt-instance stop' to ensure disk"
8395 " shutdown, and then restarting it")
8397 if runningon_target:
8398 # the migration has actually succeeded, we need to update the config
8399 self.feedback_fn("* instance running on secondary node (%s),"
8400 " updating config" % target_node)
8401 instance.primary_node = target_node
8402 self.cfg.Update(instance, self.feedback_fn)
8403 demoted_node = source_node
8405 self.feedback_fn("* instance confirmed to be running on its"
8406 " primary node (%s)" % source_node)
8407 demoted_node = target_node
8409 if instance.disk_template in constants.DTS_INT_MIRROR:
8410 self._EnsureSecondary(demoted_node)
8412 self._WaitUntilSync()
8413 except errors.OpExecError:
8414 # we ignore here errors, since if the device is standalone, it
8415 # won't be able to sync
8417 self._GoStandalone()
8418 self._GoReconnect(False)
8419 self._WaitUntilSync()
8421 self.feedback_fn("* done")
8423 def _RevertDiskStatus(self):
8424 """Try to revert the disk status after a failed migration.
8427 target_node = self.target_node
8428 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8432 self._EnsureSecondary(target_node)
8433 self._GoStandalone()
8434 self._GoReconnect(False)
8435 self._WaitUntilSync()
8436 except errors.OpExecError, err:
8437 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8438 " please try to recover the instance manually;"
8439 " error '%s'" % str(err))
8441 def _AbortMigration(self):
8442 """Call the hypervisor code to abort a started migration.
8445 instance = self.instance
8446 target_node = self.target_node
8447 source_node = self.source_node
8448 migration_info = self.migration_info
8450 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8454 abort_msg = abort_result.fail_msg
8456 logging.error("Aborting migration failed on target node %s: %s",
8457 target_node, abort_msg)
8458 # Don't raise an exception here, as we stil have to try to revert the
8459 # disk status, even if this step failed.
8461 abort_result = self.rpc.call_instance_finalize_migration_src(
8462 source_node, instance, False, self.live)
8463 abort_msg = abort_result.fail_msg
8465 logging.error("Aborting migration failed on source node %s: %s",
8466 source_node, abort_msg)
8468 def _ExecMigration(self):
8469 """Migrate an instance.
8471 The migrate is done by:
8472 - change the disks into dual-master mode
8473 - wait until disks are fully synchronized again
8474 - migrate the instance
8475 - change disks on the new secondary node (the old primary) to secondary
8476 - wait until disks are fully synchronized
8477 - change disks into single-master mode
8480 instance = self.instance
8481 target_node = self.target_node
8482 source_node = self.source_node
8484 # Check for hypervisor version mismatch and warn the user.
8485 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8486 None, [self.instance.hypervisor])
8487 for ninfo in nodeinfo.values():
8488 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8490 (_, _, (src_info, )) = nodeinfo[source_node].payload
8491 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8493 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8494 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8495 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8496 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8497 if src_version != dst_version:
8498 self.feedback_fn("* warning: hypervisor version mismatch between"
8499 " source (%s) and target (%s) node" %
8500 (src_version, dst_version))
8502 self.feedback_fn("* checking disk consistency between source and target")
8503 for (idx, dev) in enumerate(instance.disks):
8504 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8505 raise errors.OpExecError("Disk %s is degraded or not fully"
8506 " synchronized on target node,"
8507 " aborting migration" % idx)
8509 if self.current_mem > self.tgt_free_mem:
8510 if not self.allow_runtime_changes:
8511 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8512 " free memory to fit instance %s on target"
8513 " node %s (have %dMB, need %dMB)" %
8514 (instance.name, target_node,
8515 self.tgt_free_mem, self.current_mem))
8516 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8517 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8520 rpcres.Raise("Cannot modify instance runtime memory")
8522 # First get the migration information from the remote node
8523 result = self.rpc.call_migration_info(source_node, instance)
8524 msg = result.fail_msg
8526 log_err = ("Failed fetching source migration information from %s: %s" %
8528 logging.error(log_err)
8529 raise errors.OpExecError(log_err)
8531 self.migration_info = migration_info = result.payload
8533 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8534 # Then switch the disks to master/master mode
8535 self._EnsureSecondary(target_node)
8536 self._GoStandalone()
8537 self._GoReconnect(True)
8538 self._WaitUntilSync()
8540 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8541 result = self.rpc.call_accept_instance(target_node,
8544 self.nodes_ip[target_node])
8546 msg = result.fail_msg
8548 logging.error("Instance pre-migration failed, trying to revert"
8549 " disk status: %s", msg)
8550 self.feedback_fn("Pre-migration failed, aborting")
8551 self._AbortMigration()
8552 self._RevertDiskStatus()
8553 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8554 (instance.name, msg))
8556 self.feedback_fn("* migrating instance to %s" % target_node)
8557 result = self.rpc.call_instance_migrate(source_node, instance,
8558 self.nodes_ip[target_node],
8560 msg = result.fail_msg
8562 logging.error("Instance migration failed, trying to revert"
8563 " disk status: %s", msg)
8564 self.feedback_fn("Migration failed, aborting")
8565 self._AbortMigration()
8566 self._RevertDiskStatus()
8567 raise errors.OpExecError("Could not migrate instance %s: %s" %
8568 (instance.name, msg))
8570 self.feedback_fn("* starting memory transfer")
8571 last_feedback = time.time()
8573 result = self.rpc.call_instance_get_migration_status(source_node,
8575 msg = result.fail_msg
8576 ms = result.payload # MigrationStatus instance
8577 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8578 logging.error("Instance migration failed, trying to revert"
8579 " disk status: %s", msg)
8580 self.feedback_fn("Migration failed, aborting")
8581 self._AbortMigration()
8582 self._RevertDiskStatus()
8583 raise errors.OpExecError("Could not migrate instance %s: %s" %
8584 (instance.name, msg))
8586 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8587 self.feedback_fn("* memory transfer complete")
8590 if (utils.TimeoutExpired(last_feedback,
8591 self._MIGRATION_FEEDBACK_INTERVAL) and
8592 ms.transferred_ram is not None):
8593 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8594 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8595 last_feedback = time.time()
8597 time.sleep(self._MIGRATION_POLL_INTERVAL)
8599 result = self.rpc.call_instance_finalize_migration_src(source_node,
8603 msg = result.fail_msg
8605 logging.error("Instance migration succeeded, but finalization failed"
8606 " on the source node: %s", msg)
8607 raise errors.OpExecError("Could not finalize instance migration: %s" %
8610 instance.primary_node = target_node
8612 # distribute new instance config to the other nodes
8613 self.cfg.Update(instance, self.feedback_fn)
8615 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8619 msg = result.fail_msg
8621 logging.error("Instance migration succeeded, but finalization failed"
8622 " on the target node: %s", msg)
8623 raise errors.OpExecError("Could not finalize instance migration: %s" %
8626 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8627 self._EnsureSecondary(source_node)
8628 self._WaitUntilSync()
8629 self._GoStandalone()
8630 self._GoReconnect(False)
8631 self._WaitUntilSync()
8633 # If the instance's disk template is `rbd' and there was a successful
8634 # migration, unmap the device from the source node.
8635 if self.instance.disk_template == constants.DT_RBD:
8636 disks = _ExpandCheckDisks(instance, instance.disks)
8637 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8639 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8640 msg = result.fail_msg
8642 logging.error("Migration was successful, but couldn't unmap the"
8643 " block device %s on source node %s: %s",
8644 disk.iv_name, source_node, msg)
8645 logging.error("You need to unmap the device %s manually on %s",
8646 disk.iv_name, source_node)
8648 self.feedback_fn("* done")
8650 def _ExecFailover(self):
8651 """Failover an instance.
8653 The failover is done by shutting it down on its present node and
8654 starting it on the secondary.
8657 instance = self.instance
8658 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8660 source_node = instance.primary_node
8661 target_node = self.target_node
8663 if instance.admin_state == constants.ADMINST_UP:
8664 self.feedback_fn("* checking disk consistency between source and target")
8665 for (idx, dev) in enumerate(instance.disks):
8666 # for drbd, these are drbd over lvm
8667 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8669 if primary_node.offline:
8670 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8672 (primary_node.name, idx, target_node))
8673 elif not self.ignore_consistency:
8674 raise errors.OpExecError("Disk %s is degraded on target node,"
8675 " aborting failover" % idx)
8677 self.feedback_fn("* not checking disk consistency as instance is not"
8680 self.feedback_fn("* shutting down instance on source node")
8681 logging.info("Shutting down instance %s on node %s",
8682 instance.name, source_node)
8684 result = self.rpc.call_instance_shutdown(source_node, instance,
8685 self.shutdown_timeout)
8686 msg = result.fail_msg
8688 if self.ignore_consistency or primary_node.offline:
8689 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8690 " proceeding anyway; please make sure node"
8691 " %s is down; error details: %s",
8692 instance.name, source_node, source_node, msg)
8694 raise errors.OpExecError("Could not shutdown instance %s on"
8696 (instance.name, source_node, msg))
8698 self.feedback_fn("* deactivating the instance's disks on source node")
8699 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8700 raise errors.OpExecError("Can't shut down the instance's disks")
8702 instance.primary_node = target_node
8703 # distribute new instance config to the other nodes
8704 self.cfg.Update(instance, self.feedback_fn)
8706 # Only start the instance if it's marked as up
8707 if instance.admin_state == constants.ADMINST_UP:
8708 self.feedback_fn("* activating the instance's disks on target node %s" %
8710 logging.info("Starting instance %s on node %s",
8711 instance.name, target_node)
8713 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8714 ignore_secondaries=True)
8716 _ShutdownInstanceDisks(self.lu, instance)
8717 raise errors.OpExecError("Can't activate the instance's disks")
8719 self.feedback_fn("* starting the instance on the target node %s" %
8721 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8723 msg = result.fail_msg
8725 _ShutdownInstanceDisks(self.lu, instance)
8726 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8727 (instance.name, target_node, msg))
8729 def Exec(self, feedback_fn):
8730 """Perform the migration.
8733 self.feedback_fn = feedback_fn
8734 self.source_node = self.instance.primary_node
8736 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8737 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8738 self.target_node = self.instance.secondary_nodes[0]
8739 # Otherwise self.target_node has been populated either
8740 # directly, or through an iallocator.
8742 self.all_nodes = [self.source_node, self.target_node]
8743 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8744 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8747 feedback_fn("Failover instance %s" % self.instance.name)
8748 self._ExecFailover()
8750 feedback_fn("Migrating instance %s" % self.instance.name)
8753 return self._ExecCleanup()
8755 return self._ExecMigration()
8758 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8760 """Wrapper around L{_CreateBlockDevInner}.
8762 This method annotates the root device first.
8765 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8766 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8770 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8772 """Create a tree of block devices on a given node.
8774 If this device type has to be created on secondaries, create it and
8777 If not, just recurse to children keeping the same 'force' value.
8779 @attention: The device has to be annotated already.
8781 @param lu: the lu on whose behalf we execute
8782 @param node: the node on which to create the device
8783 @type instance: L{objects.Instance}
8784 @param instance: the instance which owns the device
8785 @type device: L{objects.Disk}
8786 @param device: the device to create
8787 @type force_create: boolean
8788 @param force_create: whether to force creation of this device; this
8789 will be change to True whenever we find a device which has
8790 CreateOnSecondary() attribute
8791 @param info: the extra 'metadata' we should attach to the device
8792 (this will be represented as a LVM tag)
8793 @type force_open: boolean
8794 @param force_open: this parameter will be passes to the
8795 L{backend.BlockdevCreate} function where it specifies
8796 whether we run on primary or not, and it affects both
8797 the child assembly and the device own Open() execution
8800 if device.CreateOnSecondary():
8804 for child in device.children:
8805 _CreateBlockDevInner(lu, node, instance, child, force_create,
8808 if not force_create:
8811 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8814 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8815 """Create a single block device on a given node.
8817 This will not recurse over children of the device, so they must be
8820 @param lu: the lu on whose behalf we execute
8821 @param node: the node on which to create the device
8822 @type instance: L{objects.Instance}
8823 @param instance: the instance which owns the device
8824 @type device: L{objects.Disk}
8825 @param device: the device to create
8826 @param info: the extra 'metadata' we should attach to the device
8827 (this will be represented as a LVM tag)
8828 @type force_open: boolean
8829 @param force_open: this parameter will be passes to the
8830 L{backend.BlockdevCreate} function where it specifies
8831 whether we run on primary or not, and it affects both
8832 the child assembly and the device own Open() execution
8835 lu.cfg.SetDiskID(device, node)
8836 result = lu.rpc.call_blockdev_create(node, device, device.size,
8837 instance.name, force_open, info)
8838 result.Raise("Can't create block device %s on"
8839 " node %s for instance %s" % (device, node, instance.name))
8840 if device.physical_id is None:
8841 device.physical_id = result.payload
8844 def _GenerateUniqueNames(lu, exts):
8845 """Generate a suitable LV name.
8847 This will generate a logical volume name for the given instance.
8852 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8853 results.append("%s%s" % (new_id, val))
8857 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8858 iv_name, p_minor, s_minor):
8859 """Generate a drbd8 device complete with its children.
8862 assert len(vgnames) == len(names) == 2
8863 port = lu.cfg.AllocatePort()
8864 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8866 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8867 logical_id=(vgnames[0], names[0]),
8869 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8870 logical_id=(vgnames[1], names[1]),
8872 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8873 logical_id=(primary, secondary, port,
8876 children=[dev_data, dev_meta],
8877 iv_name=iv_name, params={})
8881 _DISK_TEMPLATE_NAME_PREFIX = {
8882 constants.DT_PLAIN: "",
8883 constants.DT_RBD: ".rbd",
8887 _DISK_TEMPLATE_DEVICE_TYPE = {
8888 constants.DT_PLAIN: constants.LD_LV,
8889 constants.DT_FILE: constants.LD_FILE,
8890 constants.DT_SHARED_FILE: constants.LD_FILE,
8891 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8892 constants.DT_RBD: constants.LD_RBD,
8896 def _GenerateDiskTemplate(
8897 lu, template_name, instance_name, primary_node, secondary_nodes,
8898 disk_info, file_storage_dir, file_driver, base_index,
8899 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8900 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8901 """Generate the entire disk layout for a given template type.
8904 #TODO: compute space requirements
8906 vgname = lu.cfg.GetVGName()
8907 disk_count = len(disk_info)
8910 if template_name == constants.DT_DISKLESS:
8912 elif template_name == constants.DT_DRBD8:
8913 if len(secondary_nodes) != 1:
8914 raise errors.ProgrammerError("Wrong template configuration")
8915 remote_node = secondary_nodes[0]
8916 minors = lu.cfg.AllocateDRBDMinor(
8917 [primary_node, remote_node] * len(disk_info), instance_name)
8919 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8921 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8924 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8925 for i in range(disk_count)]):
8926 names.append(lv_prefix + "_data")
8927 names.append(lv_prefix + "_meta")
8928 for idx, disk in enumerate(disk_info):
8929 disk_index = idx + base_index
8930 data_vg = disk.get(constants.IDISK_VG, vgname)
8931 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8932 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8933 disk[constants.IDISK_SIZE],
8935 names[idx * 2:idx * 2 + 2],
8936 "disk/%d" % disk_index,
8937 minors[idx * 2], minors[idx * 2 + 1])
8938 disk_dev.mode = disk[constants.IDISK_MODE]
8939 disks.append(disk_dev)
8942 raise errors.ProgrammerError("Wrong template configuration")
8944 if template_name == constants.DT_FILE:
8946 elif template_name == constants.DT_SHARED_FILE:
8947 _req_shr_file_storage()
8949 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8950 if name_prefix is None:
8953 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8954 (name_prefix, base_index + i)
8955 for i in range(disk_count)])
8957 if template_name == constants.DT_PLAIN:
8958 def logical_id_fn(idx, _, disk):
8959 vg = disk.get(constants.IDISK_VG, vgname)
8960 return (vg, names[idx])
8961 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8963 lambda _, disk_index, disk: (file_driver,
8964 "%s/disk%d" % (file_storage_dir,
8966 elif template_name == constants.DT_BLOCK:
8968 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8969 disk[constants.IDISK_ADOPT])
8970 elif template_name == constants.DT_RBD:
8971 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8973 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8975 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8977 for idx, disk in enumerate(disk_info):
8978 disk_index = idx + base_index
8979 size = disk[constants.IDISK_SIZE]
8980 feedback_fn("* disk %s, size %s" %
8981 (disk_index, utils.FormatUnit(size, "h")))
8982 disks.append(objects.Disk(dev_type=dev_type, size=size,
8983 logical_id=logical_id_fn(idx, disk_index, disk),
8984 iv_name="disk/%d" % disk_index,
8985 mode=disk[constants.IDISK_MODE],
8991 def _GetInstanceInfoText(instance):
8992 """Compute that text that should be added to the disk's metadata.
8995 return "originstname+%s" % instance.name
8998 def _CalcEta(time_taken, written, total_size):
8999 """Calculates the ETA based on size written and total size.
9001 @param time_taken: The time taken so far
9002 @param written: amount written so far
9003 @param total_size: The total size of data to be written
9004 @return: The remaining time in seconds
9007 avg_time = time_taken / float(written)
9008 return (total_size - written) * avg_time
9011 def _WipeDisks(lu, instance):
9012 """Wipes instance disks.
9014 @type lu: L{LogicalUnit}
9015 @param lu: the logical unit on whose behalf we execute
9016 @type instance: L{objects.Instance}
9017 @param instance: the instance whose disks we should create
9018 @return: the success of the wipe
9021 node = instance.primary_node
9023 for device in instance.disks:
9024 lu.cfg.SetDiskID(device, node)
9026 logging.info("Pause sync of instance %s disks", instance.name)
9027 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9028 (instance.disks, instance),
9030 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
9032 for idx, success in enumerate(result.payload):
9034 logging.warn("pause-sync of instance %s for disks %d failed",
9038 for idx, device in enumerate(instance.disks):
9039 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9040 # MAX_WIPE_CHUNK at max
9041 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9042 constants.MIN_WIPE_CHUNK_PERCENT)
9043 # we _must_ make this an int, otherwise rounding errors will
9045 wipe_chunk_size = int(wipe_chunk_size)
9047 lu.LogInfo("* Wiping disk %d", idx)
9048 logging.info("Wiping disk %d for instance %s, node %s using"
9049 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9054 start_time = time.time()
9056 while offset < size:
9057 wipe_size = min(wipe_chunk_size, size - offset)
9058 logging.debug("Wiping disk %d, offset %s, chunk %s",
9059 idx, offset, wipe_size)
9060 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9062 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9063 (idx, offset, wipe_size))
9066 if now - last_output >= 60:
9067 eta = _CalcEta(now - start_time, offset, size)
9068 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9069 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9072 logging.info("Resume sync of instance %s disks", instance.name)
9074 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9075 (instance.disks, instance),
9079 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9080 " please have a look at the status and troubleshoot"
9081 " the issue: %s", node, result.fail_msg)
9083 for idx, success in enumerate(result.payload):
9085 lu.LogWarning("Resume sync of disk %d failed, please have a"
9086 " look at the status and troubleshoot the issue", idx)
9087 logging.warn("resume-sync of instance %s for disks %d failed",
9091 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9092 """Create all disks for an instance.
9094 This abstracts away some work from AddInstance.
9096 @type lu: L{LogicalUnit}
9097 @param lu: the logical unit on whose behalf we execute
9098 @type instance: L{objects.Instance}
9099 @param instance: the instance whose disks we should create
9101 @param to_skip: list of indices to skip
9102 @type target_node: string
9103 @param target_node: if passed, overrides the target node for creation
9105 @return: the success of the creation
9108 info = _GetInstanceInfoText(instance)
9109 if target_node is None:
9110 pnode = instance.primary_node
9111 all_nodes = instance.all_nodes
9116 if instance.disk_template in constants.DTS_FILEBASED:
9117 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9118 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9120 result.Raise("Failed to create directory '%s' on"
9121 " node %s" % (file_storage_dir, pnode))
9123 # Note: this needs to be kept in sync with adding of disks in
9124 # LUInstanceSetParams
9125 for idx, device in enumerate(instance.disks):
9126 if to_skip and idx in to_skip:
9128 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9130 for node in all_nodes:
9131 f_create = node == pnode
9132 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9135 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9136 """Remove all disks for an instance.
9138 This abstracts away some work from `AddInstance()` and
9139 `RemoveInstance()`. Note that in case some of the devices couldn't
9140 be removed, the removal will continue with the other ones (compare
9141 with `_CreateDisks()`).
9143 @type lu: L{LogicalUnit}
9144 @param lu: the logical unit on whose behalf we execute
9145 @type instance: L{objects.Instance}
9146 @param instance: the instance whose disks we should remove
9147 @type target_node: string
9148 @param target_node: used to override the node on which to remove the disks
9150 @return: the success of the removal
9153 logging.info("Removing block devices for instance %s", instance.name)
9156 ports_to_release = set()
9157 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9158 for (idx, device) in enumerate(anno_disks):
9160 edata = [(target_node, device)]
9162 edata = device.ComputeNodeTree(instance.primary_node)
9163 for node, disk in edata:
9164 lu.cfg.SetDiskID(disk, node)
9165 result = lu.rpc.call_blockdev_remove(node, disk)
9167 lu.LogWarning("Could not remove disk %s on node %s,"
9168 " continuing anyway: %s", idx, node, result.fail_msg)
9169 if not (result.offline and node != instance.primary_node):
9172 # if this is a DRBD disk, return its port to the pool
9173 if device.dev_type in constants.LDS_DRBD:
9174 ports_to_release.add(device.logical_id[2])
9176 if all_result or ignore_failures:
9177 for port in ports_to_release:
9178 lu.cfg.AddTcpUdpPort(port)
9180 if instance.disk_template == constants.DT_FILE:
9181 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9185 tgt = instance.primary_node
9186 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9188 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9189 file_storage_dir, instance.primary_node, result.fail_msg)
9195 def _ComputeDiskSizePerVG(disk_template, disks):
9196 """Compute disk size requirements in the volume group
9199 def _compute(disks, payload):
9200 """Universal algorithm.
9205 vgs[disk[constants.IDISK_VG]] = \
9206 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9210 # Required free disk space as a function of disk and swap space
9212 constants.DT_DISKLESS: {},
9213 constants.DT_PLAIN: _compute(disks, 0),
9214 # 128 MB are added for drbd metadata for each disk
9215 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9216 constants.DT_FILE: {},
9217 constants.DT_SHARED_FILE: {},
9220 if disk_template not in req_size_dict:
9221 raise errors.ProgrammerError("Disk template '%s' size requirement"
9222 " is unknown" % disk_template)
9224 return req_size_dict[disk_template]
9227 def _ComputeDiskSize(disk_template, disks):
9228 """Compute disk size requirements according to disk template
9231 # Required free disk space as a function of disk and swap space
9233 constants.DT_DISKLESS: None,
9234 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9235 # 128 MB are added for drbd metadata for each disk
9237 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9238 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9239 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9240 constants.DT_BLOCK: 0,
9241 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9244 if disk_template not in req_size_dict:
9245 raise errors.ProgrammerError("Disk template '%s' size requirement"
9246 " is unknown" % disk_template)
9248 return req_size_dict[disk_template]
9251 def _FilterVmNodes(lu, nodenames):
9252 """Filters out non-vm_capable nodes from a list.
9254 @type lu: L{LogicalUnit}
9255 @param lu: the logical unit for which we check
9256 @type nodenames: list
9257 @param nodenames: the list of nodes on which we should check
9259 @return: the list of vm-capable nodes
9262 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9263 return [name for name in nodenames if name not in vm_nodes]
9266 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9267 """Hypervisor parameter validation.
9269 This function abstract the hypervisor parameter validation to be
9270 used in both instance create and instance modify.
9272 @type lu: L{LogicalUnit}
9273 @param lu: the logical unit for which we check
9274 @type nodenames: list
9275 @param nodenames: the list of nodes on which we should check
9276 @type hvname: string
9277 @param hvname: the name of the hypervisor we should use
9278 @type hvparams: dict
9279 @param hvparams: the parameters which we need to check
9280 @raise errors.OpPrereqError: if the parameters are not valid
9283 nodenames = _FilterVmNodes(lu, nodenames)
9285 cluster = lu.cfg.GetClusterInfo()
9286 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9288 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9289 for node in nodenames:
9293 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9296 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9297 """OS parameters validation.
9299 @type lu: L{LogicalUnit}
9300 @param lu: the logical unit for which we check
9301 @type required: boolean
9302 @param required: whether the validation should fail if the OS is not
9304 @type nodenames: list
9305 @param nodenames: the list of nodes on which we should check
9306 @type osname: string
9307 @param osname: the name of the hypervisor we should use
9308 @type osparams: dict
9309 @param osparams: the parameters which we need to check
9310 @raise errors.OpPrereqError: if the parameters are not valid
9313 nodenames = _FilterVmNodes(lu, nodenames)
9314 result = lu.rpc.call_os_validate(nodenames, required, osname,
9315 [constants.OS_VALIDATE_PARAMETERS],
9317 for node, nres in result.items():
9318 # we don't check for offline cases since this should be run only
9319 # against the master node and/or an instance's nodes
9320 nres.Raise("OS Parameters validation failed on node %s" % node)
9321 if not nres.payload:
9322 lu.LogInfo("OS %s not found on node %s, validation skipped",
9326 class LUInstanceCreate(LogicalUnit):
9327 """Create an instance.
9330 HPATH = "instance-add"
9331 HTYPE = constants.HTYPE_INSTANCE
9334 def CheckArguments(self):
9338 # do not require name_check to ease forward/backward compatibility
9340 if self.op.no_install and self.op.start:
9341 self.LogInfo("No-installation mode selected, disabling startup")
9342 self.op.start = False
9343 # validate/normalize the instance name
9344 self.op.instance_name = \
9345 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9347 if self.op.ip_check and not self.op.name_check:
9348 # TODO: make the ip check more flexible and not depend on the name check
9349 raise errors.OpPrereqError("Cannot do IP address check without a name"
9350 " check", errors.ECODE_INVAL)
9352 # check nics' parameter names
9353 for nic in self.op.nics:
9354 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9356 # check disks. parameter names and consistent adopt/no-adopt strategy
9357 has_adopt = has_no_adopt = False
9358 for disk in self.op.disks:
9359 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9360 if constants.IDISK_ADOPT in disk:
9364 if has_adopt and has_no_adopt:
9365 raise errors.OpPrereqError("Either all disks are adopted or none is",
9368 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9369 raise errors.OpPrereqError("Disk adoption is not supported for the"
9370 " '%s' disk template" %
9371 self.op.disk_template,
9373 if self.op.iallocator is not None:
9374 raise errors.OpPrereqError("Disk adoption not allowed with an"
9375 " iallocator script", errors.ECODE_INVAL)
9376 if self.op.mode == constants.INSTANCE_IMPORT:
9377 raise errors.OpPrereqError("Disk adoption not allowed for"
9378 " instance import", errors.ECODE_INVAL)
9380 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9381 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9382 " but no 'adopt' parameter given" %
9383 self.op.disk_template,
9386 self.adopt_disks = has_adopt
9388 # instance name verification
9389 if self.op.name_check:
9390 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9391 self.op.instance_name = self.hostname1.name
9392 # used in CheckPrereq for ip ping check
9393 self.check_ip = self.hostname1.ip
9395 self.check_ip = None
9397 # file storage checks
9398 if (self.op.file_driver and
9399 not self.op.file_driver in constants.FILE_DRIVER):
9400 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9401 self.op.file_driver, errors.ECODE_INVAL)
9403 if self.op.disk_template == constants.DT_FILE:
9404 opcodes.RequireFileStorage()
9405 elif self.op.disk_template == constants.DT_SHARED_FILE:
9406 opcodes.RequireSharedFileStorage()
9408 ### Node/iallocator related checks
9409 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9411 if self.op.pnode is not None:
9412 if self.op.disk_template in constants.DTS_INT_MIRROR:
9413 if self.op.snode is None:
9414 raise errors.OpPrereqError("The networked disk templates need"
9415 " a mirror node", errors.ECODE_INVAL)
9417 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9419 self.op.snode = None
9421 self._cds = _GetClusterDomainSecret()
9423 if self.op.mode == constants.INSTANCE_IMPORT:
9424 # On import force_variant must be True, because if we forced it at
9425 # initial install, our only chance when importing it back is that it
9427 self.op.force_variant = True
9429 if self.op.no_install:
9430 self.LogInfo("No-installation mode has no effect during import")
9432 elif self.op.mode == constants.INSTANCE_CREATE:
9433 if self.op.os_type is None:
9434 raise errors.OpPrereqError("No guest OS specified",
9436 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9437 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9438 " installation" % self.op.os_type,
9440 if self.op.disk_template is None:
9441 raise errors.OpPrereqError("No disk template specified",
9444 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9445 # Check handshake to ensure both clusters have the same domain secret
9446 src_handshake = self.op.source_handshake
9447 if not src_handshake:
9448 raise errors.OpPrereqError("Missing source handshake",
9451 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9454 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9457 # Load and check source CA
9458 self.source_x509_ca_pem = self.op.source_x509_ca
9459 if not self.source_x509_ca_pem:
9460 raise errors.OpPrereqError("Missing source X509 CA",
9464 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9466 except OpenSSL.crypto.Error, err:
9467 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9468 (err, ), errors.ECODE_INVAL)
9470 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9471 if errcode is not None:
9472 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9475 self.source_x509_ca = cert
9477 src_instance_name = self.op.source_instance_name
9478 if not src_instance_name:
9479 raise errors.OpPrereqError("Missing source instance name",
9482 self.source_instance_name = \
9483 netutils.GetHostname(name=src_instance_name).name
9486 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9487 self.op.mode, errors.ECODE_INVAL)
9489 def ExpandNames(self):
9490 """ExpandNames for CreateInstance.
9492 Figure out the right locks for instance creation.
9495 self.needed_locks = {}
9497 instance_name = self.op.instance_name
9498 # this is just a preventive check, but someone might still add this
9499 # instance in the meantime, and creation will fail at lock-add time
9500 if instance_name in self.cfg.GetInstanceList():
9501 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9502 instance_name, errors.ECODE_EXISTS)
9504 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9506 if self.op.iallocator:
9507 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9508 # specifying a group on instance creation and then selecting nodes from
9510 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9511 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9513 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9514 nodelist = [self.op.pnode]
9515 if self.op.snode is not None:
9516 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9517 nodelist.append(self.op.snode)
9518 self.needed_locks[locking.LEVEL_NODE] = nodelist
9519 # Lock resources of instance's primary and secondary nodes (copy to
9520 # prevent accidential modification)
9521 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9523 # in case of import lock the source node too
9524 if self.op.mode == constants.INSTANCE_IMPORT:
9525 src_node = self.op.src_node
9526 src_path = self.op.src_path
9528 if src_path is None:
9529 self.op.src_path = src_path = self.op.instance_name
9531 if src_node is None:
9532 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9533 self.op.src_node = None
9534 if os.path.isabs(src_path):
9535 raise errors.OpPrereqError("Importing an instance from a path"
9536 " requires a source node option",
9539 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9540 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9541 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9542 if not os.path.isabs(src_path):
9543 self.op.src_path = src_path = \
9544 utils.PathJoin(constants.EXPORT_DIR, src_path)
9546 def _RunAllocator(self):
9547 """Run the allocator based on input opcode.
9550 nics = [n.ToDict() for n in self.nics]
9551 ial = IAllocator(self.cfg, self.rpc,
9552 mode=constants.IALLOCATOR_MODE_ALLOC,
9553 name=self.op.instance_name,
9554 disk_template=self.op.disk_template,
9557 vcpus=self.be_full[constants.BE_VCPUS],
9558 memory=self.be_full[constants.BE_MAXMEM],
9559 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9562 hypervisor=self.op.hypervisor,
9565 ial.Run(self.op.iallocator)
9568 raise errors.OpPrereqError("Can't compute nodes using"
9569 " iallocator '%s': %s" %
9570 (self.op.iallocator, ial.info),
9572 if len(ial.result) != ial.required_nodes:
9573 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9574 " of nodes (%s), required %s" %
9575 (self.op.iallocator, len(ial.result),
9576 ial.required_nodes), errors.ECODE_FAULT)
9577 self.op.pnode = ial.result[0]
9578 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9579 self.op.instance_name, self.op.iallocator,
9580 utils.CommaJoin(ial.result))
9581 if ial.required_nodes == 2:
9582 self.op.snode = ial.result[1]
9584 def BuildHooksEnv(self):
9587 This runs on master, primary and secondary nodes of the instance.
9591 "ADD_MODE": self.op.mode,
9593 if self.op.mode == constants.INSTANCE_IMPORT:
9594 env["SRC_NODE"] = self.op.src_node
9595 env["SRC_PATH"] = self.op.src_path
9596 env["SRC_IMAGES"] = self.src_images
9598 env.update(_BuildInstanceHookEnv(
9599 name=self.op.instance_name,
9600 primary_node=self.op.pnode,
9601 secondary_nodes=self.secondaries,
9602 status=self.op.start,
9603 os_type=self.op.os_type,
9604 minmem=self.be_full[constants.BE_MINMEM],
9605 maxmem=self.be_full[constants.BE_MAXMEM],
9606 vcpus=self.be_full[constants.BE_VCPUS],
9607 nics=_NICListToTuple(self, self.nics),
9608 disk_template=self.op.disk_template,
9609 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9610 for d in self.disks],
9613 hypervisor_name=self.op.hypervisor,
9619 def BuildHooksNodes(self):
9620 """Build hooks nodes.
9623 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9626 def _ReadExportInfo(self):
9627 """Reads the export information from disk.
9629 It will override the opcode source node and path with the actual
9630 information, if these two were not specified before.
9632 @return: the export information
9635 assert self.op.mode == constants.INSTANCE_IMPORT
9637 src_node = self.op.src_node
9638 src_path = self.op.src_path
9640 if src_node is None:
9641 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9642 exp_list = self.rpc.call_export_list(locked_nodes)
9644 for node in exp_list:
9645 if exp_list[node].fail_msg:
9647 if src_path in exp_list[node].payload:
9649 self.op.src_node = src_node = node
9650 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9654 raise errors.OpPrereqError("No export found for relative path %s" %
9655 src_path, errors.ECODE_INVAL)
9657 _CheckNodeOnline(self, src_node)
9658 result = self.rpc.call_export_info(src_node, src_path)
9659 result.Raise("No export or invalid export found in dir %s" % src_path)
9661 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9662 if not export_info.has_section(constants.INISECT_EXP):
9663 raise errors.ProgrammerError("Corrupted export config",
9664 errors.ECODE_ENVIRON)
9666 ei_version = export_info.get(constants.INISECT_EXP, "version")
9667 if (int(ei_version) != constants.EXPORT_VERSION):
9668 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9669 (ei_version, constants.EXPORT_VERSION),
9670 errors.ECODE_ENVIRON)
9673 def _ReadExportParams(self, einfo):
9674 """Use export parameters as defaults.
9676 In case the opcode doesn't specify (as in override) some instance
9677 parameters, then try to use them from the export information, if
9681 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9683 if self.op.disk_template is None:
9684 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9685 self.op.disk_template = einfo.get(constants.INISECT_INS,
9687 if self.op.disk_template not in constants.DISK_TEMPLATES:
9688 raise errors.OpPrereqError("Disk template specified in configuration"
9689 " file is not one of the allowed values:"
9691 " ".join(constants.DISK_TEMPLATES),
9694 raise errors.OpPrereqError("No disk template specified and the export"
9695 " is missing the disk_template information",
9698 if not self.op.disks:
9700 # TODO: import the disk iv_name too
9701 for idx in range(constants.MAX_DISKS):
9702 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9703 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9704 disks.append({constants.IDISK_SIZE: disk_sz})
9705 self.op.disks = disks
9706 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9707 raise errors.OpPrereqError("No disk info specified and the export"
9708 " is missing the disk information",
9711 if not self.op.nics:
9713 for idx in range(constants.MAX_NICS):
9714 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9716 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9717 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9724 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9725 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9727 if (self.op.hypervisor is None and
9728 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9729 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9731 if einfo.has_section(constants.INISECT_HYP):
9732 # use the export parameters but do not override the ones
9733 # specified by the user
9734 for name, value in einfo.items(constants.INISECT_HYP):
9735 if name not in self.op.hvparams:
9736 self.op.hvparams[name] = value
9738 if einfo.has_section(constants.INISECT_BEP):
9739 # use the parameters, without overriding
9740 for name, value in einfo.items(constants.INISECT_BEP):
9741 if name not in self.op.beparams:
9742 self.op.beparams[name] = value
9743 # Compatibility for the old "memory" be param
9744 if name == constants.BE_MEMORY:
9745 if constants.BE_MAXMEM not in self.op.beparams:
9746 self.op.beparams[constants.BE_MAXMEM] = value
9747 if constants.BE_MINMEM not in self.op.beparams:
9748 self.op.beparams[constants.BE_MINMEM] = value
9750 # try to read the parameters old style, from the main section
9751 for name in constants.BES_PARAMETERS:
9752 if (name not in self.op.beparams and
9753 einfo.has_option(constants.INISECT_INS, name)):
9754 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9756 if einfo.has_section(constants.INISECT_OSP):
9757 # use the parameters, without overriding
9758 for name, value in einfo.items(constants.INISECT_OSP):
9759 if name not in self.op.osparams:
9760 self.op.osparams[name] = value
9762 def _RevertToDefaults(self, cluster):
9763 """Revert the instance parameters to the default values.
9767 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9768 for name in self.op.hvparams.keys():
9769 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9770 del self.op.hvparams[name]
9772 be_defs = cluster.SimpleFillBE({})
9773 for name in self.op.beparams.keys():
9774 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9775 del self.op.beparams[name]
9777 nic_defs = cluster.SimpleFillNIC({})
9778 for nic in self.op.nics:
9779 for name in constants.NICS_PARAMETERS:
9780 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9783 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9784 for name in self.op.osparams.keys():
9785 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9786 del self.op.osparams[name]
9788 def _CalculateFileStorageDir(self):
9789 """Calculate final instance file storage dir.
9792 # file storage dir calculation/check
9793 self.instance_file_storage_dir = None
9794 if self.op.disk_template in constants.DTS_FILEBASED:
9795 # build the full file storage dir path
9798 if self.op.disk_template == constants.DT_SHARED_FILE:
9799 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9801 get_fsd_fn = self.cfg.GetFileStorageDir
9803 cfg_storagedir = get_fsd_fn()
9804 if not cfg_storagedir:
9805 raise errors.OpPrereqError("Cluster file storage dir not defined",
9807 joinargs.append(cfg_storagedir)
9809 if self.op.file_storage_dir is not None:
9810 joinargs.append(self.op.file_storage_dir)
9812 joinargs.append(self.op.instance_name)
9814 # pylint: disable=W0142
9815 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9817 def CheckPrereq(self): # pylint: disable=R0914
9818 """Check prerequisites.
9821 self._CalculateFileStorageDir()
9823 if self.op.mode == constants.INSTANCE_IMPORT:
9824 export_info = self._ReadExportInfo()
9825 self._ReadExportParams(export_info)
9826 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9828 self._old_instance_name = None
9830 if (not self.cfg.GetVGName() and
9831 self.op.disk_template not in constants.DTS_NOT_LVM):
9832 raise errors.OpPrereqError("Cluster does not support lvm-based"
9833 " instances", errors.ECODE_STATE)
9835 if (self.op.hypervisor is None or
9836 self.op.hypervisor == constants.VALUE_AUTO):
9837 self.op.hypervisor = self.cfg.GetHypervisorType()
9839 cluster = self.cfg.GetClusterInfo()
9840 enabled_hvs = cluster.enabled_hypervisors
9841 if self.op.hypervisor not in enabled_hvs:
9842 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9844 (self.op.hypervisor, ",".join(enabled_hvs)),
9847 # Check tag validity
9848 for tag in self.op.tags:
9849 objects.TaggableObject.ValidateTag(tag)
9851 # check hypervisor parameter syntax (locally)
9852 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9853 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9855 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9856 hv_type.CheckParameterSyntax(filled_hvp)
9857 self.hv_full = filled_hvp
9858 # check that we don't specify global parameters on an instance
9859 _CheckGlobalHvParams(self.op.hvparams)
9861 # fill and remember the beparams dict
9862 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9863 for param, value in self.op.beparams.iteritems():
9864 if value == constants.VALUE_AUTO:
9865 self.op.beparams[param] = default_beparams[param]
9866 objects.UpgradeBeParams(self.op.beparams)
9867 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9868 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9870 # build os parameters
9871 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9873 # now that hvp/bep are in final format, let's reset to defaults,
9875 if self.op.identify_defaults:
9876 self._RevertToDefaults(cluster)
9880 for idx, nic in enumerate(self.op.nics):
9881 nic_mode_req = nic.get(constants.INIC_MODE, None)
9882 nic_mode = nic_mode_req
9883 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9884 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9886 # in routed mode, for the first nic, the default ip is 'auto'
9887 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9888 default_ip_mode = constants.VALUE_AUTO
9890 default_ip_mode = constants.VALUE_NONE
9892 # ip validity checks
9893 ip = nic.get(constants.INIC_IP, default_ip_mode)
9894 if ip is None or ip.lower() == constants.VALUE_NONE:
9896 elif ip.lower() == constants.VALUE_AUTO:
9897 if not self.op.name_check:
9898 raise errors.OpPrereqError("IP address set to auto but name checks"
9899 " have been skipped",
9901 nic_ip = self.hostname1.ip
9903 if not netutils.IPAddress.IsValid(ip):
9904 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9908 # TODO: check the ip address for uniqueness
9909 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9910 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9913 # MAC address verification
9914 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9915 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9916 mac = utils.NormalizeAndValidateMac(mac)
9919 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9920 except errors.ReservationError:
9921 raise errors.OpPrereqError("MAC address %s already in use"
9922 " in cluster" % mac,
9923 errors.ECODE_NOTUNIQUE)
9925 # Build nic parameters
9926 link = nic.get(constants.INIC_LINK, None)
9927 if link == constants.VALUE_AUTO:
9928 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9931 nicparams[constants.NIC_MODE] = nic_mode
9933 nicparams[constants.NIC_LINK] = link
9935 check_params = cluster.SimpleFillNIC(nicparams)
9936 objects.NIC.CheckParameterSyntax(check_params)
9937 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9939 # disk checks/pre-build
9940 default_vg = self.cfg.GetVGName()
9942 for disk in self.op.disks:
9943 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9944 if mode not in constants.DISK_ACCESS_SET:
9945 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9946 mode, errors.ECODE_INVAL)
9947 size = disk.get(constants.IDISK_SIZE, None)
9949 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9952 except (TypeError, ValueError):
9953 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9956 data_vg = disk.get(constants.IDISK_VG, default_vg)
9958 constants.IDISK_SIZE: size,
9959 constants.IDISK_MODE: mode,
9960 constants.IDISK_VG: data_vg,
9962 if constants.IDISK_METAVG in disk:
9963 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9964 if constants.IDISK_ADOPT in disk:
9965 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9966 self.disks.append(new_disk)
9968 if self.op.mode == constants.INSTANCE_IMPORT:
9970 for idx in range(len(self.disks)):
9971 option = "disk%d_dump" % idx
9972 if export_info.has_option(constants.INISECT_INS, option):
9973 # FIXME: are the old os-es, disk sizes, etc. useful?
9974 export_name = export_info.get(constants.INISECT_INS, option)
9975 image = utils.PathJoin(self.op.src_path, export_name)
9976 disk_images.append(image)
9978 disk_images.append(False)
9980 self.src_images = disk_images
9982 if self.op.instance_name == self._old_instance_name:
9983 for idx, nic in enumerate(self.nics):
9984 if nic.mac == constants.VALUE_AUTO:
9985 nic_mac_ini = "nic%d_mac" % idx
9986 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9988 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9990 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9991 if self.op.ip_check:
9992 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9993 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9994 (self.check_ip, self.op.instance_name),
9995 errors.ECODE_NOTUNIQUE)
9997 #### mac address generation
9998 # By generating here the mac address both the allocator and the hooks get
9999 # the real final mac address rather than the 'auto' or 'generate' value.
10000 # There is a race condition between the generation and the instance object
10001 # creation, which means that we know the mac is valid now, but we're not
10002 # sure it will be when we actually add the instance. If things go bad
10003 # adding the instance will abort because of a duplicate mac, and the
10004 # creation job will fail.
10005 for nic in self.nics:
10006 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10007 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10011 if self.op.iallocator is not None:
10012 self._RunAllocator()
10014 # Release all unneeded node locks
10015 _ReleaseLocks(self, locking.LEVEL_NODE,
10016 keep=filter(None, [self.op.pnode, self.op.snode,
10017 self.op.src_node]))
10018 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10019 keep=filter(None, [self.op.pnode, self.op.snode,
10020 self.op.src_node]))
10022 #### node related checks
10024 # check primary node
10025 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10026 assert self.pnode is not None, \
10027 "Cannot retrieve locked node %s" % self.op.pnode
10029 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10030 pnode.name, errors.ECODE_STATE)
10032 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10033 pnode.name, errors.ECODE_STATE)
10034 if not pnode.vm_capable:
10035 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10036 " '%s'" % pnode.name, errors.ECODE_STATE)
10038 self.secondaries = []
10040 # mirror node verification
10041 if self.op.disk_template in constants.DTS_INT_MIRROR:
10042 if self.op.snode == pnode.name:
10043 raise errors.OpPrereqError("The secondary node cannot be the"
10044 " primary node", errors.ECODE_INVAL)
10045 _CheckNodeOnline(self, self.op.snode)
10046 _CheckNodeNotDrained(self, self.op.snode)
10047 _CheckNodeVmCapable(self, self.op.snode)
10048 self.secondaries.append(self.op.snode)
10050 snode = self.cfg.GetNodeInfo(self.op.snode)
10051 if pnode.group != snode.group:
10052 self.LogWarning("The primary and secondary nodes are in two"
10053 " different node groups; the disk parameters"
10054 " from the first disk's node group will be"
10057 nodenames = [pnode.name] + self.secondaries
10059 # Verify instance specs
10060 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10062 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10063 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10064 constants.ISPEC_DISK_COUNT: len(self.disks),
10065 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10066 constants.ISPEC_NIC_COUNT: len(self.nics),
10067 constants.ISPEC_SPINDLE_USE: spindle_use,
10070 group_info = self.cfg.GetNodeGroup(pnode.group)
10071 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10072 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10073 if not self.op.ignore_ipolicy and res:
10074 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10075 " policy: %s") % (pnode.group,
10076 utils.CommaJoin(res)),
10077 errors.ECODE_INVAL)
10079 if not self.adopt_disks:
10080 if self.op.disk_template == constants.DT_RBD:
10081 # _CheckRADOSFreeSpace() is just a placeholder.
10082 # Any function that checks prerequisites can be placed here.
10083 # Check if there is enough space on the RADOS cluster.
10084 _CheckRADOSFreeSpace()
10086 # Check lv size requirements, if not adopting
10087 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10088 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10090 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10091 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10092 disk[constants.IDISK_ADOPT])
10093 for disk in self.disks])
10094 if len(all_lvs) != len(self.disks):
10095 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10096 errors.ECODE_INVAL)
10097 for lv_name in all_lvs:
10099 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10100 # to ReserveLV uses the same syntax
10101 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10102 except errors.ReservationError:
10103 raise errors.OpPrereqError("LV named %s used by another instance" %
10104 lv_name, errors.ECODE_NOTUNIQUE)
10106 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10107 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10109 node_lvs = self.rpc.call_lv_list([pnode.name],
10110 vg_names.payload.keys())[pnode.name]
10111 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10112 node_lvs = node_lvs.payload
10114 delta = all_lvs.difference(node_lvs.keys())
10116 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10117 utils.CommaJoin(delta),
10118 errors.ECODE_INVAL)
10119 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10121 raise errors.OpPrereqError("Online logical volumes found, cannot"
10122 " adopt: %s" % utils.CommaJoin(online_lvs),
10123 errors.ECODE_STATE)
10124 # update the size of disk based on what is found
10125 for dsk in self.disks:
10126 dsk[constants.IDISK_SIZE] = \
10127 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10128 dsk[constants.IDISK_ADOPT])][0]))
10130 elif self.op.disk_template == constants.DT_BLOCK:
10131 # Normalize and de-duplicate device paths
10132 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10133 for disk in self.disks])
10134 if len(all_disks) != len(self.disks):
10135 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10136 errors.ECODE_INVAL)
10137 baddisks = [d for d in all_disks
10138 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10140 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10141 " cannot be adopted" %
10142 (", ".join(baddisks),
10143 constants.ADOPTABLE_BLOCKDEV_ROOT),
10144 errors.ECODE_INVAL)
10146 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10147 list(all_disks))[pnode.name]
10148 node_disks.Raise("Cannot get block device information from node %s" %
10150 node_disks = node_disks.payload
10151 delta = all_disks.difference(node_disks.keys())
10153 raise errors.OpPrereqError("Missing block device(s): %s" %
10154 utils.CommaJoin(delta),
10155 errors.ECODE_INVAL)
10156 for dsk in self.disks:
10157 dsk[constants.IDISK_SIZE] = \
10158 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10160 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10162 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10163 # check OS parameters (remotely)
10164 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10166 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10168 # memory check on primary node
10169 #TODO(dynmem): use MINMEM for checking
10171 _CheckNodeFreeMemory(self, self.pnode.name,
10172 "creating instance %s" % self.op.instance_name,
10173 self.be_full[constants.BE_MAXMEM],
10174 self.op.hypervisor)
10176 self.dry_run_result = list(nodenames)
10178 def Exec(self, feedback_fn):
10179 """Create and add the instance to the cluster.
10182 instance = self.op.instance_name
10183 pnode_name = self.pnode.name
10185 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10186 self.owned_locks(locking.LEVEL_NODE)), \
10187 "Node locks differ from node resource locks"
10189 ht_kind = self.op.hypervisor
10190 if ht_kind in constants.HTS_REQ_PORT:
10191 network_port = self.cfg.AllocatePort()
10193 network_port = None
10195 # This is ugly but we got a chicken-egg problem here
10196 # We can only take the group disk parameters, as the instance
10197 # has no disks yet (we are generating them right here).
10198 node = self.cfg.GetNodeInfo(pnode_name)
10199 nodegroup = self.cfg.GetNodeGroup(node.group)
10200 disks = _GenerateDiskTemplate(self,
10201 self.op.disk_template,
10202 instance, pnode_name,
10205 self.instance_file_storage_dir,
10206 self.op.file_driver,
10209 self.cfg.GetGroupDiskParams(nodegroup))
10211 iobj = objects.Instance(name=instance, os=self.op.os_type,
10212 primary_node=pnode_name,
10213 nics=self.nics, disks=disks,
10214 disk_template=self.op.disk_template,
10215 admin_state=constants.ADMINST_DOWN,
10216 network_port=network_port,
10217 beparams=self.op.beparams,
10218 hvparams=self.op.hvparams,
10219 hypervisor=self.op.hypervisor,
10220 osparams=self.op.osparams,
10224 for tag in self.op.tags:
10227 if self.adopt_disks:
10228 if self.op.disk_template == constants.DT_PLAIN:
10229 # rename LVs to the newly-generated names; we need to construct
10230 # 'fake' LV disks with the old data, plus the new unique_id
10231 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10233 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10234 rename_to.append(t_dsk.logical_id)
10235 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10236 self.cfg.SetDiskID(t_dsk, pnode_name)
10237 result = self.rpc.call_blockdev_rename(pnode_name,
10238 zip(tmp_disks, rename_to))
10239 result.Raise("Failed to rename adoped LVs")
10241 feedback_fn("* creating instance disks...")
10243 _CreateDisks(self, iobj)
10244 except errors.OpExecError:
10245 self.LogWarning("Device creation failed, reverting...")
10247 _RemoveDisks(self, iobj)
10249 self.cfg.ReleaseDRBDMinors(instance)
10252 feedback_fn("adding instance %s to cluster config" % instance)
10254 self.cfg.AddInstance(iobj, self.proc.GetECId())
10256 # Declare that we don't want to remove the instance lock anymore, as we've
10257 # added the instance to the config
10258 del self.remove_locks[locking.LEVEL_INSTANCE]
10260 if self.op.mode == constants.INSTANCE_IMPORT:
10261 # Release unused nodes
10262 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10264 # Release all nodes
10265 _ReleaseLocks(self, locking.LEVEL_NODE)
10268 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10269 feedback_fn("* wiping instance disks...")
10271 _WipeDisks(self, iobj)
10272 except errors.OpExecError, err:
10273 logging.exception("Wiping disks failed")
10274 self.LogWarning("Wiping instance disks failed (%s)", err)
10278 # Something is already wrong with the disks, don't do anything else
10280 elif self.op.wait_for_sync:
10281 disk_abort = not _WaitForSync(self, iobj)
10282 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10283 # make sure the disks are not degraded (still sync-ing is ok)
10284 feedback_fn("* checking mirrors status")
10285 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10290 _RemoveDisks(self, iobj)
10291 self.cfg.RemoveInstance(iobj.name)
10292 # Make sure the instance lock gets removed
10293 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10294 raise errors.OpExecError("There are some degraded disks for"
10297 # Release all node resource locks
10298 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10300 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10301 # we need to set the disks ID to the primary node, since the
10302 # preceding code might or might have not done it, depending on
10303 # disk template and other options
10304 for disk in iobj.disks:
10305 self.cfg.SetDiskID(disk, pnode_name)
10306 if self.op.mode == constants.INSTANCE_CREATE:
10307 if not self.op.no_install:
10308 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10309 not self.op.wait_for_sync)
10311 feedback_fn("* pausing disk sync to install instance OS")
10312 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10315 for idx, success in enumerate(result.payload):
10317 logging.warn("pause-sync of instance %s for disk %d failed",
10320 feedback_fn("* running the instance OS create scripts...")
10321 # FIXME: pass debug option from opcode to backend
10323 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10324 self.op.debug_level)
10326 feedback_fn("* resuming disk sync")
10327 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10330 for idx, success in enumerate(result.payload):
10332 logging.warn("resume-sync of instance %s for disk %d failed",
10335 os_add_result.Raise("Could not add os for instance %s"
10336 " on node %s" % (instance, pnode_name))
10339 if self.op.mode == constants.INSTANCE_IMPORT:
10340 feedback_fn("* running the instance OS import scripts...")
10344 for idx, image in enumerate(self.src_images):
10348 # FIXME: pass debug option from opcode to backend
10349 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10350 constants.IEIO_FILE, (image, ),
10351 constants.IEIO_SCRIPT,
10352 (iobj.disks[idx], idx),
10354 transfers.append(dt)
10357 masterd.instance.TransferInstanceData(self, feedback_fn,
10358 self.op.src_node, pnode_name,
10359 self.pnode.secondary_ip,
10361 if not compat.all(import_result):
10362 self.LogWarning("Some disks for instance %s on node %s were not"
10363 " imported successfully" % (instance, pnode_name))
10365 rename_from = self._old_instance_name
10367 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10368 feedback_fn("* preparing remote import...")
10369 # The source cluster will stop the instance before attempting to make
10370 # a connection. In some cases stopping an instance can take a long
10371 # time, hence the shutdown timeout is added to the connection
10373 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10374 self.op.source_shutdown_timeout)
10375 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10377 assert iobj.primary_node == self.pnode.name
10379 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10380 self.source_x509_ca,
10381 self._cds, timeouts)
10382 if not compat.all(disk_results):
10383 # TODO: Should the instance still be started, even if some disks
10384 # failed to import (valid for local imports, too)?
10385 self.LogWarning("Some disks for instance %s on node %s were not"
10386 " imported successfully" % (instance, pnode_name))
10388 rename_from = self.source_instance_name
10391 # also checked in the prereq part
10392 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10395 # Run rename script on newly imported instance
10396 assert iobj.name == instance
10397 feedback_fn("Running rename script for %s" % instance)
10398 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10400 self.op.debug_level)
10401 if result.fail_msg:
10402 self.LogWarning("Failed to run rename script for %s on node"
10403 " %s: %s" % (instance, pnode_name, result.fail_msg))
10405 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10408 iobj.admin_state = constants.ADMINST_UP
10409 self.cfg.Update(iobj, feedback_fn)
10410 logging.info("Starting instance %s on node %s", instance, pnode_name)
10411 feedback_fn("* starting instance...")
10412 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10414 result.Raise("Could not start instance")
10416 return list(iobj.all_nodes)
10419 def _CheckRADOSFreeSpace():
10420 """Compute disk size requirements inside the RADOS cluster.
10423 # For the RADOS cluster we assume there is always enough space.
10427 class LUInstanceConsole(NoHooksLU):
10428 """Connect to an instance's console.
10430 This is somewhat special in that it returns the command line that
10431 you need to run on the master node in order to connect to the
10437 def ExpandNames(self):
10438 self.share_locks = _ShareAll()
10439 self._ExpandAndLockInstance()
10441 def CheckPrereq(self):
10442 """Check prerequisites.
10444 This checks that the instance is in the cluster.
10447 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10448 assert self.instance is not None, \
10449 "Cannot retrieve locked instance %s" % self.op.instance_name
10450 _CheckNodeOnline(self, self.instance.primary_node)
10452 def Exec(self, feedback_fn):
10453 """Connect to the console of an instance
10456 instance = self.instance
10457 node = instance.primary_node
10459 node_insts = self.rpc.call_instance_list([node],
10460 [instance.hypervisor])[node]
10461 node_insts.Raise("Can't get node information from %s" % node)
10463 if instance.name not in node_insts.payload:
10464 if instance.admin_state == constants.ADMINST_UP:
10465 state = constants.INSTST_ERRORDOWN
10466 elif instance.admin_state == constants.ADMINST_DOWN:
10467 state = constants.INSTST_ADMINDOWN
10469 state = constants.INSTST_ADMINOFFLINE
10470 raise errors.OpExecError("Instance %s is not running (state %s)" %
10471 (instance.name, state))
10473 logging.debug("Connecting to console of %s on %s", instance.name, node)
10475 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10478 def _GetInstanceConsole(cluster, instance):
10479 """Returns console information for an instance.
10481 @type cluster: L{objects.Cluster}
10482 @type instance: L{objects.Instance}
10486 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10487 # beparams and hvparams are passed separately, to avoid editing the
10488 # instance and then saving the defaults in the instance itself.
10489 hvparams = cluster.FillHV(instance)
10490 beparams = cluster.FillBE(instance)
10491 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10493 assert console.instance == instance.name
10494 assert console.Validate()
10496 return console.ToDict()
10499 class LUInstanceReplaceDisks(LogicalUnit):
10500 """Replace the disks of an instance.
10503 HPATH = "mirrors-replace"
10504 HTYPE = constants.HTYPE_INSTANCE
10507 def CheckArguments(self):
10508 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10509 self.op.iallocator)
10511 def ExpandNames(self):
10512 self._ExpandAndLockInstance()
10514 assert locking.LEVEL_NODE not in self.needed_locks
10515 assert locking.LEVEL_NODE_RES not in self.needed_locks
10516 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10518 assert self.op.iallocator is None or self.op.remote_node is None, \
10519 "Conflicting options"
10521 if self.op.remote_node is not None:
10522 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10524 # Warning: do not remove the locking of the new secondary here
10525 # unless DRBD8.AddChildren is changed to work in parallel;
10526 # currently it doesn't since parallel invocations of
10527 # FindUnusedMinor will conflict
10528 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10529 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10531 self.needed_locks[locking.LEVEL_NODE] = []
10532 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10534 if self.op.iallocator is not None:
10535 # iallocator will select a new node in the same group
10536 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10538 self.needed_locks[locking.LEVEL_NODE_RES] = []
10540 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10541 self.op.iallocator, self.op.remote_node,
10542 self.op.disks, False, self.op.early_release,
10543 self.op.ignore_ipolicy)
10545 self.tasklets = [self.replacer]
10547 def DeclareLocks(self, level):
10548 if level == locking.LEVEL_NODEGROUP:
10549 assert self.op.remote_node is None
10550 assert self.op.iallocator is not None
10551 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10553 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10554 # Lock all groups used by instance optimistically; this requires going
10555 # via the node before it's locked, requiring verification later on
10556 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10557 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10559 elif level == locking.LEVEL_NODE:
10560 if self.op.iallocator is not None:
10561 assert self.op.remote_node is None
10562 assert not self.needed_locks[locking.LEVEL_NODE]
10564 # Lock member nodes of all locked groups
10565 self.needed_locks[locking.LEVEL_NODE] = \
10567 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10568 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10570 self._LockInstancesNodes()
10571 elif level == locking.LEVEL_NODE_RES:
10573 self.needed_locks[locking.LEVEL_NODE_RES] = \
10574 self.needed_locks[locking.LEVEL_NODE]
10576 def BuildHooksEnv(self):
10577 """Build hooks env.
10579 This runs on the master, the primary and all the secondaries.
10582 instance = self.replacer.instance
10584 "MODE": self.op.mode,
10585 "NEW_SECONDARY": self.op.remote_node,
10586 "OLD_SECONDARY": instance.secondary_nodes[0],
10588 env.update(_BuildInstanceHookEnvByObject(self, instance))
10591 def BuildHooksNodes(self):
10592 """Build hooks nodes.
10595 instance = self.replacer.instance
10597 self.cfg.GetMasterNode(),
10598 instance.primary_node,
10600 if self.op.remote_node is not None:
10601 nl.append(self.op.remote_node)
10604 def CheckPrereq(self):
10605 """Check prerequisites.
10608 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10609 self.op.iallocator is None)
10611 # Verify if node group locks are still correct
10612 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10614 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10616 return LogicalUnit.CheckPrereq(self)
10619 class TLReplaceDisks(Tasklet):
10620 """Replaces disks for an instance.
10622 Note: Locking is not within the scope of this class.
10625 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10626 disks, delay_iallocator, early_release, ignore_ipolicy):
10627 """Initializes this class.
10630 Tasklet.__init__(self, lu)
10633 self.instance_name = instance_name
10635 self.iallocator_name = iallocator_name
10636 self.remote_node = remote_node
10638 self.delay_iallocator = delay_iallocator
10639 self.early_release = early_release
10640 self.ignore_ipolicy = ignore_ipolicy
10643 self.instance = None
10644 self.new_node = None
10645 self.target_node = None
10646 self.other_node = None
10647 self.remote_node_info = None
10648 self.node_secondary_ip = None
10651 def CheckArguments(mode, remote_node, iallocator):
10652 """Helper function for users of this class.
10655 # check for valid parameter combination
10656 if mode == constants.REPLACE_DISK_CHG:
10657 if remote_node is None and iallocator is None:
10658 raise errors.OpPrereqError("When changing the secondary either an"
10659 " iallocator script must be used or the"
10660 " new node given", errors.ECODE_INVAL)
10662 if remote_node is not None and iallocator is not None:
10663 raise errors.OpPrereqError("Give either the iallocator or the new"
10664 " secondary, not both", errors.ECODE_INVAL)
10666 elif remote_node is not None or iallocator is not None:
10667 # Not replacing the secondary
10668 raise errors.OpPrereqError("The iallocator and new node options can"
10669 " only be used when changing the"
10670 " secondary node", errors.ECODE_INVAL)
10673 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10674 """Compute a new secondary node using an IAllocator.
10677 ial = IAllocator(lu.cfg, lu.rpc,
10678 mode=constants.IALLOCATOR_MODE_RELOC,
10679 name=instance_name,
10680 relocate_from=list(relocate_from))
10682 ial.Run(iallocator_name)
10684 if not ial.success:
10685 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10686 " %s" % (iallocator_name, ial.info),
10687 errors.ECODE_NORES)
10689 if len(ial.result) != ial.required_nodes:
10690 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10691 " of nodes (%s), required %s" %
10693 len(ial.result), ial.required_nodes),
10694 errors.ECODE_FAULT)
10696 remote_node_name = ial.result[0]
10698 lu.LogInfo("Selected new secondary for instance '%s': %s",
10699 instance_name, remote_node_name)
10701 return remote_node_name
10703 def _FindFaultyDisks(self, node_name):
10704 """Wrapper for L{_FindFaultyInstanceDisks}.
10707 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10710 def _CheckDisksActivated(self, instance):
10711 """Checks if the instance disks are activated.
10713 @param instance: The instance to check disks
10714 @return: True if they are activated, False otherwise
10717 nodes = instance.all_nodes
10719 for idx, dev in enumerate(instance.disks):
10721 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10722 self.cfg.SetDiskID(dev, node)
10724 result = _BlockdevFind(self, node, dev, instance)
10728 elif result.fail_msg or not result.payload:
10733 def CheckPrereq(self):
10734 """Check prerequisites.
10736 This checks that the instance is in the cluster.
10739 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10740 assert instance is not None, \
10741 "Cannot retrieve locked instance %s" % self.instance_name
10743 if instance.disk_template != constants.DT_DRBD8:
10744 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10745 " instances", errors.ECODE_INVAL)
10747 if len(instance.secondary_nodes) != 1:
10748 raise errors.OpPrereqError("The instance has a strange layout,"
10749 " expected one secondary but found %d" %
10750 len(instance.secondary_nodes),
10751 errors.ECODE_FAULT)
10753 if not self.delay_iallocator:
10754 self._CheckPrereq2()
10756 def _CheckPrereq2(self):
10757 """Check prerequisites, second part.
10759 This function should always be part of CheckPrereq. It was separated and is
10760 now called from Exec because during node evacuation iallocator was only
10761 called with an unmodified cluster model, not taking planned changes into
10765 instance = self.instance
10766 secondary_node = instance.secondary_nodes[0]
10768 if self.iallocator_name is None:
10769 remote_node = self.remote_node
10771 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10772 instance.name, instance.secondary_nodes)
10774 if remote_node is None:
10775 self.remote_node_info = None
10777 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10778 "Remote node '%s' is not locked" % remote_node
10780 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10781 assert self.remote_node_info is not None, \
10782 "Cannot retrieve locked node %s" % remote_node
10784 if remote_node == self.instance.primary_node:
10785 raise errors.OpPrereqError("The specified node is the primary node of"
10786 " the instance", errors.ECODE_INVAL)
10788 if remote_node == secondary_node:
10789 raise errors.OpPrereqError("The specified node is already the"
10790 " secondary node of the instance",
10791 errors.ECODE_INVAL)
10793 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10794 constants.REPLACE_DISK_CHG):
10795 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10796 errors.ECODE_INVAL)
10798 if self.mode == constants.REPLACE_DISK_AUTO:
10799 if not self._CheckDisksActivated(instance):
10800 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10801 " first" % self.instance_name,
10802 errors.ECODE_STATE)
10803 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10804 faulty_secondary = self._FindFaultyDisks(secondary_node)
10806 if faulty_primary and faulty_secondary:
10807 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10808 " one node and can not be repaired"
10809 " automatically" % self.instance_name,
10810 errors.ECODE_STATE)
10813 self.disks = faulty_primary
10814 self.target_node = instance.primary_node
10815 self.other_node = secondary_node
10816 check_nodes = [self.target_node, self.other_node]
10817 elif faulty_secondary:
10818 self.disks = faulty_secondary
10819 self.target_node = secondary_node
10820 self.other_node = instance.primary_node
10821 check_nodes = [self.target_node, self.other_node]
10827 # Non-automatic modes
10828 if self.mode == constants.REPLACE_DISK_PRI:
10829 self.target_node = instance.primary_node
10830 self.other_node = secondary_node
10831 check_nodes = [self.target_node, self.other_node]
10833 elif self.mode == constants.REPLACE_DISK_SEC:
10834 self.target_node = secondary_node
10835 self.other_node = instance.primary_node
10836 check_nodes = [self.target_node, self.other_node]
10838 elif self.mode == constants.REPLACE_DISK_CHG:
10839 self.new_node = remote_node
10840 self.other_node = instance.primary_node
10841 self.target_node = secondary_node
10842 check_nodes = [self.new_node, self.other_node]
10844 _CheckNodeNotDrained(self.lu, remote_node)
10845 _CheckNodeVmCapable(self.lu, remote_node)
10847 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10848 assert old_node_info is not None
10849 if old_node_info.offline and not self.early_release:
10850 # doesn't make sense to delay the release
10851 self.early_release = True
10852 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10853 " early-release mode", secondary_node)
10856 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10859 # If not specified all disks should be replaced
10861 self.disks = range(len(self.instance.disks))
10863 # TODO: This is ugly, but right now we can't distinguish between internal
10864 # submitted opcode and external one. We should fix that.
10865 if self.remote_node_info:
10866 # We change the node, lets verify it still meets instance policy
10867 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10868 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10870 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10871 ignore=self.ignore_ipolicy)
10873 for node in check_nodes:
10874 _CheckNodeOnline(self.lu, node)
10876 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10879 if node_name is not None)
10881 # Release unneeded node and node resource locks
10882 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10883 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10885 # Release any owned node group
10886 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10887 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10889 # Check whether disks are valid
10890 for disk_idx in self.disks:
10891 instance.FindDisk(disk_idx)
10893 # Get secondary node IP addresses
10894 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10895 in self.cfg.GetMultiNodeInfo(touched_nodes))
10897 def Exec(self, feedback_fn):
10898 """Execute disk replacement.
10900 This dispatches the disk replacement to the appropriate handler.
10903 if self.delay_iallocator:
10904 self._CheckPrereq2()
10907 # Verify owned locks before starting operation
10908 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10909 assert set(owned_nodes) == set(self.node_secondary_ip), \
10910 ("Incorrect node locks, owning %s, expected %s" %
10911 (owned_nodes, self.node_secondary_ip.keys()))
10912 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10913 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10915 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10916 assert list(owned_instances) == [self.instance_name], \
10917 "Instance '%s' not locked" % self.instance_name
10919 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10920 "Should not own any node group lock at this point"
10923 feedback_fn("No disks need replacement")
10926 feedback_fn("Replacing disk(s) %s for %s" %
10927 (utils.CommaJoin(self.disks), self.instance.name))
10929 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10931 # Activate the instance disks if we're replacing them on a down instance
10933 _StartInstanceDisks(self.lu, self.instance, True)
10936 # Should we replace the secondary node?
10937 if self.new_node is not None:
10938 fn = self._ExecDrbd8Secondary
10940 fn = self._ExecDrbd8DiskOnly
10942 result = fn(feedback_fn)
10944 # Deactivate the instance disks if we're replacing them on a
10947 _SafeShutdownInstanceDisks(self.lu, self.instance)
10949 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10952 # Verify owned locks
10953 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10954 nodes = frozenset(self.node_secondary_ip)
10955 assert ((self.early_release and not owned_nodes) or
10956 (not self.early_release and not (set(owned_nodes) - nodes))), \
10957 ("Not owning the correct locks, early_release=%s, owned=%r,"
10958 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10962 def _CheckVolumeGroup(self, nodes):
10963 self.lu.LogInfo("Checking volume groups")
10965 vgname = self.cfg.GetVGName()
10967 # Make sure volume group exists on all involved nodes
10968 results = self.rpc.call_vg_list(nodes)
10970 raise errors.OpExecError("Can't list volume groups on the nodes")
10973 res = results[node]
10974 res.Raise("Error checking node %s" % node)
10975 if vgname not in res.payload:
10976 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10979 def _CheckDisksExistence(self, nodes):
10980 # Check disk existence
10981 for idx, dev in enumerate(self.instance.disks):
10982 if idx not in self.disks:
10986 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10987 self.cfg.SetDiskID(dev, node)
10989 result = _BlockdevFind(self, node, dev, self.instance)
10991 msg = result.fail_msg
10992 if msg or not result.payload:
10994 msg = "disk not found"
10995 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10998 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10999 for idx, dev in enumerate(self.instance.disks):
11000 if idx not in self.disks:
11003 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11006 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11007 on_primary, ldisk=ldisk):
11008 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11009 " replace disks for instance %s" %
11010 (node_name, self.instance.name))
11012 def _CreateNewStorage(self, node_name):
11013 """Create new storage on the primary or secondary node.
11015 This is only used for same-node replaces, not for changing the
11016 secondary node, hence we don't want to modify the existing disk.
11021 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11022 for idx, dev in enumerate(disks):
11023 if idx not in self.disks:
11026 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11028 self.cfg.SetDiskID(dev, node_name)
11030 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11031 names = _GenerateUniqueNames(self.lu, lv_names)
11033 (data_disk, meta_disk) = dev.children
11034 vg_data = data_disk.logical_id[0]
11035 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11036 logical_id=(vg_data, names[0]),
11037 params=data_disk.params)
11038 vg_meta = meta_disk.logical_id[0]
11039 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
11040 logical_id=(vg_meta, names[1]),
11041 params=meta_disk.params)
11043 new_lvs = [lv_data, lv_meta]
11044 old_lvs = [child.Copy() for child in dev.children]
11045 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11047 # we pass force_create=True to force the LVM creation
11048 for new_lv in new_lvs:
11049 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11050 _GetInstanceInfoText(self.instance), False)
11054 def _CheckDevices(self, node_name, iv_names):
11055 for name, (dev, _, _) in iv_names.iteritems():
11056 self.cfg.SetDiskID(dev, node_name)
11058 result = _BlockdevFind(self, node_name, dev, self.instance)
11060 msg = result.fail_msg
11061 if msg or not result.payload:
11063 msg = "disk not found"
11064 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11067 if result.payload.is_degraded:
11068 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11070 def _RemoveOldStorage(self, node_name, iv_names):
11071 for name, (_, old_lvs, _) in iv_names.iteritems():
11072 self.lu.LogInfo("Remove logical volumes for %s" % name)
11075 self.cfg.SetDiskID(lv, node_name)
11077 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11079 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11080 hint="remove unused LVs manually")
11082 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11083 """Replace a disk on the primary or secondary for DRBD 8.
11085 The algorithm for replace is quite complicated:
11087 1. for each disk to be replaced:
11089 1. create new LVs on the target node with unique names
11090 1. detach old LVs from the drbd device
11091 1. rename old LVs to name_replaced.<time_t>
11092 1. rename new LVs to old LVs
11093 1. attach the new LVs (with the old names now) to the drbd device
11095 1. wait for sync across all devices
11097 1. for each modified disk:
11099 1. remove old LVs (which have the name name_replaces.<time_t>)
11101 Failures are not very well handled.
11106 # Step: check device activation
11107 self.lu.LogStep(1, steps_total, "Check device existence")
11108 self._CheckDisksExistence([self.other_node, self.target_node])
11109 self._CheckVolumeGroup([self.target_node, self.other_node])
11111 # Step: check other node consistency
11112 self.lu.LogStep(2, steps_total, "Check peer consistency")
11113 self._CheckDisksConsistency(self.other_node,
11114 self.other_node == self.instance.primary_node,
11117 # Step: create new storage
11118 self.lu.LogStep(3, steps_total, "Allocate new storage")
11119 iv_names = self._CreateNewStorage(self.target_node)
11121 # Step: for each lv, detach+rename*2+attach
11122 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11123 for dev, old_lvs, new_lvs in iv_names.itervalues():
11124 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11126 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11128 result.Raise("Can't detach drbd from local storage on node"
11129 " %s for device %s" % (self.target_node, dev.iv_name))
11131 #cfg.Update(instance)
11133 # ok, we created the new LVs, so now we know we have the needed
11134 # storage; as such, we proceed on the target node to rename
11135 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11136 # using the assumption that logical_id == physical_id (which in
11137 # turn is the unique_id on that node)
11139 # FIXME(iustin): use a better name for the replaced LVs
11140 temp_suffix = int(time.time())
11141 ren_fn = lambda d, suff: (d.physical_id[0],
11142 d.physical_id[1] + "_replaced-%s" % suff)
11144 # Build the rename list based on what LVs exist on the node
11145 rename_old_to_new = []
11146 for to_ren in old_lvs:
11147 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11148 if not result.fail_msg and result.payload:
11150 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11152 self.lu.LogInfo("Renaming the old LVs on the target node")
11153 result = self.rpc.call_blockdev_rename(self.target_node,
11155 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11157 # Now we rename the new LVs to the old LVs
11158 self.lu.LogInfo("Renaming the new LVs on the target node")
11159 rename_new_to_old = [(new, old.physical_id)
11160 for old, new in zip(old_lvs, new_lvs)]
11161 result = self.rpc.call_blockdev_rename(self.target_node,
11163 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11165 # Intermediate steps of in memory modifications
11166 for old, new in zip(old_lvs, new_lvs):
11167 new.logical_id = old.logical_id
11168 self.cfg.SetDiskID(new, self.target_node)
11170 # We need to modify old_lvs so that removal later removes the
11171 # right LVs, not the newly added ones; note that old_lvs is a
11173 for disk in old_lvs:
11174 disk.logical_id = ren_fn(disk, temp_suffix)
11175 self.cfg.SetDiskID(disk, self.target_node)
11177 # Now that the new lvs have the old name, we can add them to the device
11178 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11179 result = self.rpc.call_blockdev_addchildren(self.target_node,
11180 (dev, self.instance), new_lvs)
11181 msg = result.fail_msg
11183 for new_lv in new_lvs:
11184 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11187 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11188 hint=("cleanup manually the unused logical"
11190 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11192 cstep = itertools.count(5)
11194 if self.early_release:
11195 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11196 self._RemoveOldStorage(self.target_node, iv_names)
11197 # TODO: Check if releasing locks early still makes sense
11198 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11200 # Release all resource locks except those used by the instance
11201 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11202 keep=self.node_secondary_ip.keys())
11204 # Release all node locks while waiting for sync
11205 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11207 # TODO: Can the instance lock be downgraded here? Take the optional disk
11208 # shutdown in the caller into consideration.
11211 # This can fail as the old devices are degraded and _WaitForSync
11212 # does a combined result over all disks, so we don't check its return value
11213 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11214 _WaitForSync(self.lu, self.instance)
11216 # Check all devices manually
11217 self._CheckDevices(self.instance.primary_node, iv_names)
11219 # Step: remove old storage
11220 if not self.early_release:
11221 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11222 self._RemoveOldStorage(self.target_node, iv_names)
11224 def _ExecDrbd8Secondary(self, feedback_fn):
11225 """Replace the secondary node for DRBD 8.
11227 The algorithm for replace is quite complicated:
11228 - for all disks of the instance:
11229 - create new LVs on the new node with same names
11230 - shutdown the drbd device on the old secondary
11231 - disconnect the drbd network on the primary
11232 - create the drbd device on the new secondary
11233 - network attach the drbd on the primary, using an artifice:
11234 the drbd code for Attach() will connect to the network if it
11235 finds a device which is connected to the good local disks but
11236 not network enabled
11237 - wait for sync across all devices
11238 - remove all disks from the old secondary
11240 Failures are not very well handled.
11245 pnode = self.instance.primary_node
11247 # Step: check device activation
11248 self.lu.LogStep(1, steps_total, "Check device existence")
11249 self._CheckDisksExistence([self.instance.primary_node])
11250 self._CheckVolumeGroup([self.instance.primary_node])
11252 # Step: check other node consistency
11253 self.lu.LogStep(2, steps_total, "Check peer consistency")
11254 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11256 # Step: create new storage
11257 self.lu.LogStep(3, steps_total, "Allocate new storage")
11258 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11259 for idx, dev in enumerate(disks):
11260 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11261 (self.new_node, idx))
11262 # we pass force_create=True to force LVM creation
11263 for new_lv in dev.children:
11264 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11265 True, _GetInstanceInfoText(self.instance), False)
11267 # Step 4: dbrd minors and drbd setups changes
11268 # after this, we must manually remove the drbd minors on both the
11269 # error and the success paths
11270 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11271 minors = self.cfg.AllocateDRBDMinor([self.new_node
11272 for dev in self.instance.disks],
11273 self.instance.name)
11274 logging.debug("Allocated minors %r", minors)
11277 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11278 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11279 (self.new_node, idx))
11280 # create new devices on new_node; note that we create two IDs:
11281 # one without port, so the drbd will be activated without
11282 # networking information on the new node at this stage, and one
11283 # with network, for the latter activation in step 4
11284 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11285 if self.instance.primary_node == o_node1:
11288 assert self.instance.primary_node == o_node2, "Three-node instance?"
11291 new_alone_id = (self.instance.primary_node, self.new_node, None,
11292 p_minor, new_minor, o_secret)
11293 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11294 p_minor, new_minor, o_secret)
11296 iv_names[idx] = (dev, dev.children, new_net_id)
11297 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11299 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11300 logical_id=new_alone_id,
11301 children=dev.children,
11304 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11307 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11309 _GetInstanceInfoText(self.instance), False)
11310 except errors.GenericError:
11311 self.cfg.ReleaseDRBDMinors(self.instance.name)
11314 # We have new devices, shutdown the drbd on the old secondary
11315 for idx, dev in enumerate(self.instance.disks):
11316 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11317 self.cfg.SetDiskID(dev, self.target_node)
11318 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11319 (dev, self.instance)).fail_msg
11321 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11322 "node: %s" % (idx, msg),
11323 hint=("Please cleanup this device manually as"
11324 " soon as possible"))
11326 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11327 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11328 self.instance.disks)[pnode]
11330 msg = result.fail_msg
11332 # detaches didn't succeed (unlikely)
11333 self.cfg.ReleaseDRBDMinors(self.instance.name)
11334 raise errors.OpExecError("Can't detach the disks from the network on"
11335 " old node: %s" % (msg,))
11337 # if we managed to detach at least one, we update all the disks of
11338 # the instance to point to the new secondary
11339 self.lu.LogInfo("Updating instance configuration")
11340 for dev, _, new_logical_id in iv_names.itervalues():
11341 dev.logical_id = new_logical_id
11342 self.cfg.SetDiskID(dev, self.instance.primary_node)
11344 self.cfg.Update(self.instance, feedback_fn)
11346 # Release all node locks (the configuration has been updated)
11347 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11349 # and now perform the drbd attach
11350 self.lu.LogInfo("Attaching primary drbds to new secondary"
11351 " (standalone => connected)")
11352 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11354 self.node_secondary_ip,
11355 (self.instance.disks, self.instance),
11356 self.instance.name,
11358 for to_node, to_result in result.items():
11359 msg = to_result.fail_msg
11361 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11363 hint=("please do a gnt-instance info to see the"
11364 " status of disks"))
11366 cstep = itertools.count(5)
11368 if self.early_release:
11369 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11370 self._RemoveOldStorage(self.target_node, iv_names)
11371 # TODO: Check if releasing locks early still makes sense
11372 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11374 # Release all resource locks except those used by the instance
11375 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11376 keep=self.node_secondary_ip.keys())
11378 # TODO: Can the instance lock be downgraded here? Take the optional disk
11379 # shutdown in the caller into consideration.
11382 # This can fail as the old devices are degraded and _WaitForSync
11383 # does a combined result over all disks, so we don't check its return value
11384 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11385 _WaitForSync(self.lu, self.instance)
11387 # Check all devices manually
11388 self._CheckDevices(self.instance.primary_node, iv_names)
11390 # Step: remove old storage
11391 if not self.early_release:
11392 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11393 self._RemoveOldStorage(self.target_node, iv_names)
11396 class LURepairNodeStorage(NoHooksLU):
11397 """Repairs the volume group on a node.
11402 def CheckArguments(self):
11403 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11405 storage_type = self.op.storage_type
11407 if (constants.SO_FIX_CONSISTENCY not in
11408 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11409 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11410 " repaired" % storage_type,
11411 errors.ECODE_INVAL)
11413 def ExpandNames(self):
11414 self.needed_locks = {
11415 locking.LEVEL_NODE: [self.op.node_name],
11418 def _CheckFaultyDisks(self, instance, node_name):
11419 """Ensure faulty disks abort the opcode or at least warn."""
11421 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11423 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11424 " node '%s'" % (instance.name, node_name),
11425 errors.ECODE_STATE)
11426 except errors.OpPrereqError, err:
11427 if self.op.ignore_consistency:
11428 self.proc.LogWarning(str(err.args[0]))
11432 def CheckPrereq(self):
11433 """Check prerequisites.
11436 # Check whether any instance on this node has faulty disks
11437 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11438 if inst.admin_state != constants.ADMINST_UP:
11440 check_nodes = set(inst.all_nodes)
11441 check_nodes.discard(self.op.node_name)
11442 for inst_node_name in check_nodes:
11443 self._CheckFaultyDisks(inst, inst_node_name)
11445 def Exec(self, feedback_fn):
11446 feedback_fn("Repairing storage unit '%s' on %s ..." %
11447 (self.op.name, self.op.node_name))
11449 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11450 result = self.rpc.call_storage_execute(self.op.node_name,
11451 self.op.storage_type, st_args,
11453 constants.SO_FIX_CONSISTENCY)
11454 result.Raise("Failed to repair storage unit '%s' on %s" %
11455 (self.op.name, self.op.node_name))
11458 class LUNodeEvacuate(NoHooksLU):
11459 """Evacuates instances off a list of nodes.
11464 _MODE2IALLOCATOR = {
11465 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11466 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11467 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11469 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11470 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11471 constants.IALLOCATOR_NEVAC_MODES)
11473 def CheckArguments(self):
11474 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11476 def ExpandNames(self):
11477 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11479 if self.op.remote_node is not None:
11480 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11481 assert self.op.remote_node
11483 if self.op.remote_node == self.op.node_name:
11484 raise errors.OpPrereqError("Can not use evacuated node as a new"
11485 " secondary node", errors.ECODE_INVAL)
11487 if self.op.mode != constants.NODE_EVAC_SEC:
11488 raise errors.OpPrereqError("Without the use of an iallocator only"
11489 " secondary instances can be evacuated",
11490 errors.ECODE_INVAL)
11493 self.share_locks = _ShareAll()
11494 self.needed_locks = {
11495 locking.LEVEL_INSTANCE: [],
11496 locking.LEVEL_NODEGROUP: [],
11497 locking.LEVEL_NODE: [],
11500 # Determine nodes (via group) optimistically, needs verification once locks
11501 # have been acquired
11502 self.lock_nodes = self._DetermineNodes()
11504 def _DetermineNodes(self):
11505 """Gets the list of nodes to operate on.
11508 if self.op.remote_node is None:
11509 # Iallocator will choose any node(s) in the same group
11510 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11512 group_nodes = frozenset([self.op.remote_node])
11514 # Determine nodes to be locked
11515 return set([self.op.node_name]) | group_nodes
11517 def _DetermineInstances(self):
11518 """Builds list of instances to operate on.
11521 assert self.op.mode in constants.NODE_EVAC_MODES
11523 if self.op.mode == constants.NODE_EVAC_PRI:
11524 # Primary instances only
11525 inst_fn = _GetNodePrimaryInstances
11526 assert self.op.remote_node is None, \
11527 "Evacuating primary instances requires iallocator"
11528 elif self.op.mode == constants.NODE_EVAC_SEC:
11529 # Secondary instances only
11530 inst_fn = _GetNodeSecondaryInstances
11533 assert self.op.mode == constants.NODE_EVAC_ALL
11534 inst_fn = _GetNodeInstances
11535 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11537 raise errors.OpPrereqError("Due to an issue with the iallocator"
11538 " interface it is not possible to evacuate"
11539 " all instances at once; specify explicitly"
11540 " whether to evacuate primary or secondary"
11542 errors.ECODE_INVAL)
11544 return inst_fn(self.cfg, self.op.node_name)
11546 def DeclareLocks(self, level):
11547 if level == locking.LEVEL_INSTANCE:
11548 # Lock instances optimistically, needs verification once node and group
11549 # locks have been acquired
11550 self.needed_locks[locking.LEVEL_INSTANCE] = \
11551 set(i.name for i in self._DetermineInstances())
11553 elif level == locking.LEVEL_NODEGROUP:
11554 # Lock node groups for all potential target nodes optimistically, needs
11555 # verification once nodes have been acquired
11556 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11557 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11559 elif level == locking.LEVEL_NODE:
11560 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11562 def CheckPrereq(self):
11564 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11565 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11566 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11568 need_nodes = self._DetermineNodes()
11570 if not owned_nodes.issuperset(need_nodes):
11571 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11572 " locks were acquired, current nodes are"
11573 " are '%s', used to be '%s'; retry the"
11575 (self.op.node_name,
11576 utils.CommaJoin(need_nodes),
11577 utils.CommaJoin(owned_nodes)),
11578 errors.ECODE_STATE)
11580 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11581 if owned_groups != wanted_groups:
11582 raise errors.OpExecError("Node groups changed since locks were acquired,"
11583 " current groups are '%s', used to be '%s';"
11584 " retry the operation" %
11585 (utils.CommaJoin(wanted_groups),
11586 utils.CommaJoin(owned_groups)))
11588 # Determine affected instances
11589 self.instances = self._DetermineInstances()
11590 self.instance_names = [i.name for i in self.instances]
11592 if set(self.instance_names) != owned_instances:
11593 raise errors.OpExecError("Instances on node '%s' changed since locks"
11594 " were acquired, current instances are '%s',"
11595 " used to be '%s'; retry the operation" %
11596 (self.op.node_name,
11597 utils.CommaJoin(self.instance_names),
11598 utils.CommaJoin(owned_instances)))
11600 if self.instance_names:
11601 self.LogInfo("Evacuating instances from node '%s': %s",
11603 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11605 self.LogInfo("No instances to evacuate from node '%s'",
11608 if self.op.remote_node is not None:
11609 for i in self.instances:
11610 if i.primary_node == self.op.remote_node:
11611 raise errors.OpPrereqError("Node %s is the primary node of"
11612 " instance %s, cannot use it as"
11614 (self.op.remote_node, i.name),
11615 errors.ECODE_INVAL)
11617 def Exec(self, feedback_fn):
11618 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11620 if not self.instance_names:
11621 # No instances to evacuate
11624 elif self.op.iallocator is not None:
11625 # TODO: Implement relocation to other group
11626 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11627 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11628 instances=list(self.instance_names))
11630 ial.Run(self.op.iallocator)
11632 if not ial.success:
11633 raise errors.OpPrereqError("Can't compute node evacuation using"
11634 " iallocator '%s': %s" %
11635 (self.op.iallocator, ial.info),
11636 errors.ECODE_NORES)
11638 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11640 elif self.op.remote_node is not None:
11641 assert self.op.mode == constants.NODE_EVAC_SEC
11643 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11644 remote_node=self.op.remote_node,
11646 mode=constants.REPLACE_DISK_CHG,
11647 early_release=self.op.early_release)]
11648 for instance_name in self.instance_names
11652 raise errors.ProgrammerError("No iallocator or remote node")
11654 return ResultWithJobs(jobs)
11657 def _SetOpEarlyRelease(early_release, op):
11658 """Sets C{early_release} flag on opcodes if available.
11662 op.early_release = early_release
11663 except AttributeError:
11664 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11669 def _NodeEvacDest(use_nodes, group, nodes):
11670 """Returns group or nodes depending on caller's choice.
11674 return utils.CommaJoin(nodes)
11679 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11680 """Unpacks the result of change-group and node-evacuate iallocator requests.
11682 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11683 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11685 @type lu: L{LogicalUnit}
11686 @param lu: Logical unit instance
11687 @type alloc_result: tuple/list
11688 @param alloc_result: Result from iallocator
11689 @type early_release: bool
11690 @param early_release: Whether to release locks early if possible
11691 @type use_nodes: bool
11692 @param use_nodes: Whether to display node names instead of groups
11695 (moved, failed, jobs) = alloc_result
11698 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11699 for (name, reason) in failed)
11700 lu.LogWarning("Unable to evacuate instances %s", failreason)
11701 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11704 lu.LogInfo("Instances to be moved: %s",
11705 utils.CommaJoin("%s (to %s)" %
11706 (name, _NodeEvacDest(use_nodes, group, nodes))
11707 for (name, group, nodes) in moved))
11709 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11710 map(opcodes.OpCode.LoadOpCode, ops))
11714 class LUInstanceGrowDisk(LogicalUnit):
11715 """Grow a disk of an instance.
11718 HPATH = "disk-grow"
11719 HTYPE = constants.HTYPE_INSTANCE
11722 def ExpandNames(self):
11723 self._ExpandAndLockInstance()
11724 self.needed_locks[locking.LEVEL_NODE] = []
11725 self.needed_locks[locking.LEVEL_NODE_RES] = []
11726 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11727 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11729 def DeclareLocks(self, level):
11730 if level == locking.LEVEL_NODE:
11731 self._LockInstancesNodes()
11732 elif level == locking.LEVEL_NODE_RES:
11734 self.needed_locks[locking.LEVEL_NODE_RES] = \
11735 self.needed_locks[locking.LEVEL_NODE][:]
11737 def BuildHooksEnv(self):
11738 """Build hooks env.
11740 This runs on the master, the primary and all the secondaries.
11744 "DISK": self.op.disk,
11745 "AMOUNT": self.op.amount,
11746 "ABSOLUTE": self.op.absolute,
11748 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11751 def BuildHooksNodes(self):
11752 """Build hooks nodes.
11755 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11758 def CheckPrereq(self):
11759 """Check prerequisites.
11761 This checks that the instance is in the cluster.
11764 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11765 assert instance is not None, \
11766 "Cannot retrieve locked instance %s" % self.op.instance_name
11767 nodenames = list(instance.all_nodes)
11768 for node in nodenames:
11769 _CheckNodeOnline(self, node)
11771 self.instance = instance
11773 if instance.disk_template not in constants.DTS_GROWABLE:
11774 raise errors.OpPrereqError("Instance's disk layout does not support"
11775 " growing", errors.ECODE_INVAL)
11777 self.disk = instance.FindDisk(self.op.disk)
11779 if self.op.absolute:
11780 self.target = self.op.amount
11781 self.delta = self.target - self.disk.size
11783 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11784 "current disk size (%s)" %
11785 (utils.FormatUnit(self.target, "h"),
11786 utils.FormatUnit(self.disk.size, "h")),
11787 errors.ECODE_STATE)
11789 self.delta = self.op.amount
11790 self.target = self.disk.size + self.delta
11792 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11793 utils.FormatUnit(self.delta, "h"),
11794 errors.ECODE_INVAL)
11796 if instance.disk_template not in (constants.DT_FILE,
11797 constants.DT_SHARED_FILE,
11799 # TODO: check the free disk space for file, when that feature will be
11801 _CheckNodesFreeDiskPerVG(self, nodenames,
11802 self.disk.ComputeGrowth(self.delta))
11804 def Exec(self, feedback_fn):
11805 """Execute disk grow.
11808 instance = self.instance
11811 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11812 assert (self.owned_locks(locking.LEVEL_NODE) ==
11813 self.owned_locks(locking.LEVEL_NODE_RES))
11815 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11817 raise errors.OpExecError("Cannot activate block device to grow")
11819 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11820 (self.op.disk, instance.name,
11821 utils.FormatUnit(self.delta, "h"),
11822 utils.FormatUnit(self.target, "h")))
11824 # First run all grow ops in dry-run mode
11825 for node in instance.all_nodes:
11826 self.cfg.SetDiskID(disk, node)
11827 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11829 result.Raise("Grow request failed to node %s" % node)
11831 # We know that (as far as we can test) operations across different
11832 # nodes will succeed, time to run it for real on the backing storage
11833 for node in instance.all_nodes:
11834 self.cfg.SetDiskID(disk, node)
11835 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11837 result.Raise("Grow request failed to node %s" % node)
11839 # And now execute it for logical storage, on the primary node
11840 node = instance.primary_node
11841 self.cfg.SetDiskID(disk, node)
11842 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11844 result.Raise("Grow request failed to node %s" % node)
11846 disk.RecordGrow(self.delta)
11847 self.cfg.Update(instance, feedback_fn)
11849 # Changes have been recorded, release node lock
11850 _ReleaseLocks(self, locking.LEVEL_NODE)
11852 # Downgrade lock while waiting for sync
11853 self.glm.downgrade(locking.LEVEL_INSTANCE)
11855 if self.op.wait_for_sync:
11856 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11858 self.proc.LogWarning("Disk sync-ing has not returned a good"
11859 " status; please check the instance")
11860 if instance.admin_state != constants.ADMINST_UP:
11861 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11862 elif instance.admin_state != constants.ADMINST_UP:
11863 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11864 " not supposed to be running because no wait for"
11865 " sync mode was requested")
11867 assert self.owned_locks(locking.LEVEL_NODE_RES)
11868 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11871 class LUInstanceQueryData(NoHooksLU):
11872 """Query runtime instance data.
11877 def ExpandNames(self):
11878 self.needed_locks = {}
11880 # Use locking if requested or when non-static information is wanted
11881 if not (self.op.static or self.op.use_locking):
11882 self.LogWarning("Non-static data requested, locks need to be acquired")
11883 self.op.use_locking = True
11885 if self.op.instances or not self.op.use_locking:
11886 # Expand instance names right here
11887 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11889 # Will use acquired locks
11890 self.wanted_names = None
11892 if self.op.use_locking:
11893 self.share_locks = _ShareAll()
11895 if self.wanted_names is None:
11896 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11898 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11900 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11901 self.needed_locks[locking.LEVEL_NODE] = []
11902 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11904 def DeclareLocks(self, level):
11905 if self.op.use_locking:
11906 if level == locking.LEVEL_NODEGROUP:
11907 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11909 # Lock all groups used by instances optimistically; this requires going
11910 # via the node before it's locked, requiring verification later on
11911 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11912 frozenset(group_uuid
11913 for instance_name in owned_instances
11915 self.cfg.GetInstanceNodeGroups(instance_name))
11917 elif level == locking.LEVEL_NODE:
11918 self._LockInstancesNodes()
11920 def CheckPrereq(self):
11921 """Check prerequisites.
11923 This only checks the optional instance list against the existing names.
11926 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11927 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11928 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11930 if self.wanted_names is None:
11931 assert self.op.use_locking, "Locking was not used"
11932 self.wanted_names = owned_instances
11934 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11936 if self.op.use_locking:
11937 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11940 assert not (owned_instances or owned_groups or owned_nodes)
11942 self.wanted_instances = instances.values()
11944 def _ComputeBlockdevStatus(self, node, instance, dev):
11945 """Returns the status of a block device
11948 if self.op.static or not node:
11951 self.cfg.SetDiskID(dev, node)
11953 result = self.rpc.call_blockdev_find(node, dev)
11957 result.Raise("Can't compute disk status for %s" % instance.name)
11959 status = result.payload
11963 return (status.dev_path, status.major, status.minor,
11964 status.sync_percent, status.estimated_time,
11965 status.is_degraded, status.ldisk_status)
11967 def _ComputeDiskStatus(self, instance, snode, dev):
11968 """Compute block device status.
11971 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11973 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11975 def _ComputeDiskStatusInner(self, instance, snode, dev):
11976 """Compute block device status.
11978 @attention: The device has to be annotated already.
11981 if dev.dev_type in constants.LDS_DRBD:
11982 # we change the snode then (otherwise we use the one passed in)
11983 if dev.logical_id[0] == instance.primary_node:
11984 snode = dev.logical_id[1]
11986 snode = dev.logical_id[0]
11988 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11990 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11993 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12000 "iv_name": dev.iv_name,
12001 "dev_type": dev.dev_type,
12002 "logical_id": dev.logical_id,
12003 "physical_id": dev.physical_id,
12004 "pstatus": dev_pstatus,
12005 "sstatus": dev_sstatus,
12006 "children": dev_children,
12011 def Exec(self, feedback_fn):
12012 """Gather and return data"""
12015 cluster = self.cfg.GetClusterInfo()
12017 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12018 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12020 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12021 for node in nodes.values()))
12023 group2name_fn = lambda uuid: groups[uuid].name
12025 for instance in self.wanted_instances:
12026 pnode = nodes[instance.primary_node]
12028 if self.op.static or pnode.offline:
12029 remote_state = None
12031 self.LogWarning("Primary node %s is marked offline, returning static"
12032 " information only for instance %s" %
12033 (pnode.name, instance.name))
12035 remote_info = self.rpc.call_instance_info(instance.primary_node,
12037 instance.hypervisor)
12038 remote_info.Raise("Error checking node %s" % instance.primary_node)
12039 remote_info = remote_info.payload
12040 if remote_info and "state" in remote_info:
12041 remote_state = "up"
12043 if instance.admin_state == constants.ADMINST_UP:
12044 remote_state = "down"
12046 remote_state = instance.admin_state
12048 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12051 snodes_group_uuids = [nodes[snode_name].group
12052 for snode_name in instance.secondary_nodes]
12054 result[instance.name] = {
12055 "name": instance.name,
12056 "config_state": instance.admin_state,
12057 "run_state": remote_state,
12058 "pnode": instance.primary_node,
12059 "pnode_group_uuid": pnode.group,
12060 "pnode_group_name": group2name_fn(pnode.group),
12061 "snodes": instance.secondary_nodes,
12062 "snodes_group_uuids": snodes_group_uuids,
12063 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12065 # this happens to be the same format used for hooks
12066 "nics": _NICListToTuple(self, instance.nics),
12067 "disk_template": instance.disk_template,
12069 "hypervisor": instance.hypervisor,
12070 "network_port": instance.network_port,
12071 "hv_instance": instance.hvparams,
12072 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12073 "be_instance": instance.beparams,
12074 "be_actual": cluster.FillBE(instance),
12075 "os_instance": instance.osparams,
12076 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12077 "serial_no": instance.serial_no,
12078 "mtime": instance.mtime,
12079 "ctime": instance.ctime,
12080 "uuid": instance.uuid,
12086 def PrepareContainerMods(mods, private_fn):
12087 """Prepares a list of container modifications by adding a private data field.
12089 @type mods: list of tuples; (operation, index, parameters)
12090 @param mods: List of modifications
12091 @type private_fn: callable or None
12092 @param private_fn: Callable for constructing a private data field for a
12097 if private_fn is None:
12102 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12105 #: Type description for changes as returned by L{ApplyContainerMods}'s
12107 _TApplyContModsCbChanges = \
12108 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12109 ht.TNonEmptyString,
12114 def ApplyContainerMods(kind, container, chgdesc, mods,
12115 create_fn, modify_fn, remove_fn):
12116 """Applies descriptions in C{mods} to C{container}.
12119 @param kind: One-word item description
12120 @type container: list
12121 @param container: Container to modify
12122 @type chgdesc: None or list
12123 @param chgdesc: List of applied changes
12125 @param mods: Modifications as returned by L{PrepareContainerMods}
12126 @type create_fn: callable
12127 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12128 receives absolute item index, parameters and private data object as added
12129 by L{PrepareContainerMods}, returns tuple containing new item and changes
12131 @type modify_fn: callable
12132 @param modify_fn: Callback for modifying an existing item
12133 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12134 and private data object as added by L{PrepareContainerMods}, returns
12136 @type remove_fn: callable
12137 @param remove_fn: Callback on removing item; receives absolute item index,
12138 item and private data object as added by L{PrepareContainerMods}
12141 for (op, idx, params, private) in mods:
12144 absidx = len(container) - 1
12146 raise IndexError("Not accepting negative indices other than -1")
12147 elif idx > len(container):
12148 raise IndexError("Got %s index %s, but there are only %s" %
12149 (kind, idx, len(container)))
12155 if op == constants.DDM_ADD:
12156 # Calculate where item will be added
12158 addidx = len(container)
12162 if create_fn is None:
12165 (item, changes) = create_fn(addidx, params, private)
12168 container.append(item)
12171 assert idx <= len(container)
12172 # list.insert does so before the specified index
12173 container.insert(idx, item)
12175 # Retrieve existing item
12177 item = container[absidx]
12179 raise IndexError("Invalid %s index %s" % (kind, idx))
12181 if op == constants.DDM_REMOVE:
12184 if remove_fn is not None:
12185 remove_fn(absidx, item, private)
12187 changes = [("%s/%s" % (kind, absidx), "remove")]
12189 assert container[absidx] == item
12190 del container[absidx]
12191 elif op == constants.DDM_MODIFY:
12192 if modify_fn is not None:
12193 changes = modify_fn(absidx, item, params, private)
12195 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12197 assert _TApplyContModsCbChanges(changes)
12199 if not (chgdesc is None or changes is None):
12200 chgdesc.extend(changes)
12203 def _UpdateIvNames(base_index, disks):
12204 """Updates the C{iv_name} attribute of disks.
12206 @type disks: list of L{objects.Disk}
12209 for (idx, disk) in enumerate(disks):
12210 disk.iv_name = "disk/%s" % (base_index + idx, )
12213 class _InstNicModPrivate:
12214 """Data structure for network interface modifications.
12216 Used by L{LUInstanceSetParams}.
12219 def __init__(self):
12224 class LUInstanceSetParams(LogicalUnit):
12225 """Modifies an instances's parameters.
12228 HPATH = "instance-modify"
12229 HTYPE = constants.HTYPE_INSTANCE
12233 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12234 assert ht.TList(mods)
12235 assert not mods or len(mods[0]) in (2, 3)
12237 if mods and len(mods[0]) == 2:
12241 for op, params in mods:
12242 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12243 result.append((op, -1, params))
12247 raise errors.OpPrereqError("Only one %s add or remove operation is"
12248 " supported at a time" % kind,
12249 errors.ECODE_INVAL)
12251 result.append((constants.DDM_MODIFY, op, params))
12253 assert verify_fn(result)
12260 def _CheckMods(kind, mods, key_types, item_fn):
12261 """Ensures requested disk/NIC modifications are valid.
12264 for (op, _, params) in mods:
12265 assert ht.TDict(params)
12267 utils.ForceDictType(params, key_types)
12269 if op == constants.DDM_REMOVE:
12271 raise errors.OpPrereqError("No settings should be passed when"
12272 " removing a %s" % kind,
12273 errors.ECODE_INVAL)
12274 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12275 item_fn(op, params)
12277 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12280 def _VerifyDiskModification(op, params):
12281 """Verifies a disk modification.
12284 if op == constants.DDM_ADD:
12285 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12286 if mode not in constants.DISK_ACCESS_SET:
12287 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12288 errors.ECODE_INVAL)
12290 size = params.get(constants.IDISK_SIZE, None)
12292 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12293 constants.IDISK_SIZE, errors.ECODE_INVAL)
12297 except (TypeError, ValueError), err:
12298 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12299 errors.ECODE_INVAL)
12301 params[constants.IDISK_SIZE] = size
12303 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12304 raise errors.OpPrereqError("Disk size change not possible, use"
12305 " grow-disk", errors.ECODE_INVAL)
12308 def _VerifyNicModification(op, params):
12309 """Verifies a network interface modification.
12312 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12313 ip = params.get(constants.INIC_IP, None)
12316 elif ip.lower() == constants.VALUE_NONE:
12317 params[constants.INIC_IP] = None
12318 elif not netutils.IPAddress.IsValid(ip):
12319 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12320 errors.ECODE_INVAL)
12322 bridge = params.get("bridge", None)
12323 link = params.get(constants.INIC_LINK, None)
12324 if bridge and link:
12325 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12326 " at the same time", errors.ECODE_INVAL)
12327 elif bridge and bridge.lower() == constants.VALUE_NONE:
12328 params["bridge"] = None
12329 elif link and link.lower() == constants.VALUE_NONE:
12330 params[constants.INIC_LINK] = None
12332 if op == constants.DDM_ADD:
12333 macaddr = params.get(constants.INIC_MAC, None)
12334 if macaddr is None:
12335 params[constants.INIC_MAC] = constants.VALUE_AUTO
12337 if constants.INIC_MAC in params:
12338 macaddr = params[constants.INIC_MAC]
12339 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12340 macaddr = utils.NormalizeAndValidateMac(macaddr)
12342 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12343 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12344 " modifying an existing NIC",
12345 errors.ECODE_INVAL)
12347 def CheckArguments(self):
12348 if not (self.op.nics or self.op.disks or self.op.disk_template or
12349 self.op.hvparams or self.op.beparams or self.op.os_name or
12350 self.op.offline is not None or self.op.runtime_mem):
12351 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12353 if self.op.hvparams:
12354 _CheckGlobalHvParams(self.op.hvparams)
12356 self.op.disks = self._UpgradeDiskNicMods(
12357 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12358 self.op.nics = self._UpgradeDiskNicMods(
12359 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12361 # Check disk modifications
12362 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12363 self._VerifyDiskModification)
12365 if self.op.disks and self.op.disk_template is not None:
12366 raise errors.OpPrereqError("Disk template conversion and other disk"
12367 " changes not supported at the same time",
12368 errors.ECODE_INVAL)
12370 if (self.op.disk_template and
12371 self.op.disk_template in constants.DTS_INT_MIRROR and
12372 self.op.remote_node is None):
12373 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12374 " one requires specifying a secondary node",
12375 errors.ECODE_INVAL)
12377 # Check NIC modifications
12378 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12379 self._VerifyNicModification)
12381 def ExpandNames(self):
12382 self._ExpandAndLockInstance()
12383 # Can't even acquire node locks in shared mode as upcoming changes in
12384 # Ganeti 2.6 will start to modify the node object on disk conversion
12385 self.needed_locks[locking.LEVEL_NODE] = []
12386 self.needed_locks[locking.LEVEL_NODE_RES] = []
12387 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12389 def DeclareLocks(self, level):
12390 # TODO: Acquire group lock in shared mode (disk parameters)
12391 if level == locking.LEVEL_NODE:
12392 self._LockInstancesNodes()
12393 if self.op.disk_template and self.op.remote_node:
12394 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12395 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12396 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12398 self.needed_locks[locking.LEVEL_NODE_RES] = \
12399 self.needed_locks[locking.LEVEL_NODE][:]
12401 def BuildHooksEnv(self):
12402 """Build hooks env.
12404 This runs on the master, primary and secondaries.
12408 if constants.BE_MINMEM in self.be_new:
12409 args["minmem"] = self.be_new[constants.BE_MINMEM]
12410 if constants.BE_MAXMEM in self.be_new:
12411 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12412 if constants.BE_VCPUS in self.be_new:
12413 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12414 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12415 # information at all.
12417 if self._new_nics is not None:
12420 for nic in self._new_nics:
12421 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12422 mode = nicparams[constants.NIC_MODE]
12423 link = nicparams[constants.NIC_LINK]
12424 nics.append((nic.ip, nic.mac, mode, link))
12426 args["nics"] = nics
12428 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12429 if self.op.disk_template:
12430 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12431 if self.op.runtime_mem:
12432 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12436 def BuildHooksNodes(self):
12437 """Build hooks nodes.
12440 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12443 def _PrepareNicModification(self, params, private, old_ip, old_params,
12445 update_params_dict = dict([(key, params[key])
12446 for key in constants.NICS_PARAMETERS
12449 if "bridge" in params:
12450 update_params_dict[constants.NIC_LINK] = params["bridge"]
12452 new_params = _GetUpdatedParams(old_params, update_params_dict)
12453 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12455 new_filled_params = cluster.SimpleFillNIC(new_params)
12456 objects.NIC.CheckParameterSyntax(new_filled_params)
12458 new_mode = new_filled_params[constants.NIC_MODE]
12459 if new_mode == constants.NIC_MODE_BRIDGED:
12460 bridge = new_filled_params[constants.NIC_LINK]
12461 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12463 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12465 self.warn.append(msg)
12467 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12469 elif new_mode == constants.NIC_MODE_ROUTED:
12470 ip = params.get(constants.INIC_IP, old_ip)
12472 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12473 " on a routed NIC", errors.ECODE_INVAL)
12475 if constants.INIC_MAC in params:
12476 mac = params[constants.INIC_MAC]
12478 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12479 errors.ECODE_INVAL)
12480 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12481 # otherwise generate the MAC address
12482 params[constants.INIC_MAC] = \
12483 self.cfg.GenerateMAC(self.proc.GetECId())
12485 # or validate/reserve the current one
12487 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12488 except errors.ReservationError:
12489 raise errors.OpPrereqError("MAC address '%s' already in use"
12490 " in cluster" % mac,
12491 errors.ECODE_NOTUNIQUE)
12493 private.params = new_params
12494 private.filled = new_filled_params
12496 def CheckPrereq(self):
12497 """Check prerequisites.
12499 This only checks the instance list against the existing names.
12502 # checking the new params on the primary/secondary nodes
12504 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12505 cluster = self.cluster = self.cfg.GetClusterInfo()
12506 assert self.instance is not None, \
12507 "Cannot retrieve locked instance %s" % self.op.instance_name
12508 pnode = instance.primary_node
12509 nodelist = list(instance.all_nodes)
12510 pnode_info = self.cfg.GetNodeInfo(pnode)
12511 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12513 # Prepare disk/NIC modifications
12514 self.diskmod = PrepareContainerMods(self.op.disks, None)
12515 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12518 if self.op.os_name and not self.op.force:
12519 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12520 self.op.force_variant)
12521 instance_os = self.op.os_name
12523 instance_os = instance.os
12525 assert not (self.op.disk_template and self.op.disks), \
12526 "Can't modify disk template and apply disk changes at the same time"
12528 if self.op.disk_template:
12529 if instance.disk_template == self.op.disk_template:
12530 raise errors.OpPrereqError("Instance already has disk template %s" %
12531 instance.disk_template, errors.ECODE_INVAL)
12533 if (instance.disk_template,
12534 self.op.disk_template) not in self._DISK_CONVERSIONS:
12535 raise errors.OpPrereqError("Unsupported disk template conversion from"
12536 " %s to %s" % (instance.disk_template,
12537 self.op.disk_template),
12538 errors.ECODE_INVAL)
12539 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12540 msg="cannot change disk template")
12541 if self.op.disk_template in constants.DTS_INT_MIRROR:
12542 if self.op.remote_node == pnode:
12543 raise errors.OpPrereqError("Given new secondary node %s is the same"
12544 " as the primary node of the instance" %
12545 self.op.remote_node, errors.ECODE_STATE)
12546 _CheckNodeOnline(self, self.op.remote_node)
12547 _CheckNodeNotDrained(self, self.op.remote_node)
12548 # FIXME: here we assume that the old instance type is DT_PLAIN
12549 assert instance.disk_template == constants.DT_PLAIN
12550 disks = [{constants.IDISK_SIZE: d.size,
12551 constants.IDISK_VG: d.logical_id[0]}
12552 for d in instance.disks]
12553 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12554 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12556 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12557 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12558 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12559 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12560 ignore=self.op.ignore_ipolicy)
12561 if pnode_info.group != snode_info.group:
12562 self.LogWarning("The primary and secondary nodes are in two"
12563 " different node groups; the disk parameters"
12564 " from the first disk's node group will be"
12567 # hvparams processing
12568 if self.op.hvparams:
12569 hv_type = instance.hypervisor
12570 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12571 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12572 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12575 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12576 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12577 self.hv_proposed = self.hv_new = hv_new # the new actual values
12578 self.hv_inst = i_hvdict # the new dict (without defaults)
12580 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12582 self.hv_new = self.hv_inst = {}
12584 # beparams processing
12585 if self.op.beparams:
12586 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12588 objects.UpgradeBeParams(i_bedict)
12589 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12590 be_new = cluster.SimpleFillBE(i_bedict)
12591 self.be_proposed = self.be_new = be_new # the new actual values
12592 self.be_inst = i_bedict # the new dict (without defaults)
12594 self.be_new = self.be_inst = {}
12595 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12596 be_old = cluster.FillBE(instance)
12598 # CPU param validation -- checking every time a parameter is
12599 # changed to cover all cases where either CPU mask or vcpus have
12601 if (constants.BE_VCPUS in self.be_proposed and
12602 constants.HV_CPU_MASK in self.hv_proposed):
12604 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12605 # Verify mask is consistent with number of vCPUs. Can skip this
12606 # test if only 1 entry in the CPU mask, which means same mask
12607 # is applied to all vCPUs.
12608 if (len(cpu_list) > 1 and
12609 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12610 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12612 (self.be_proposed[constants.BE_VCPUS],
12613 self.hv_proposed[constants.HV_CPU_MASK]),
12614 errors.ECODE_INVAL)
12616 # Only perform this test if a new CPU mask is given
12617 if constants.HV_CPU_MASK in self.hv_new:
12618 # Calculate the largest CPU number requested
12619 max_requested_cpu = max(map(max, cpu_list))
12620 # Check that all of the instance's nodes have enough physical CPUs to
12621 # satisfy the requested CPU mask
12622 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12623 max_requested_cpu + 1, instance.hypervisor)
12625 # osparams processing
12626 if self.op.osparams:
12627 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12628 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12629 self.os_inst = i_osdict # the new dict (without defaults)
12635 #TODO(dynmem): do the appropriate check involving MINMEM
12636 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12637 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12638 mem_check_list = [pnode]
12639 if be_new[constants.BE_AUTO_BALANCE]:
12640 # either we changed auto_balance to yes or it was from before
12641 mem_check_list.extend(instance.secondary_nodes)
12642 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12643 instance.hypervisor)
12644 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12645 [instance.hypervisor])
12646 pninfo = nodeinfo[pnode]
12647 msg = pninfo.fail_msg
12649 # Assume the primary node is unreachable and go ahead
12650 self.warn.append("Can't get info from primary node %s: %s" %
12653 (_, _, (pnhvinfo, )) = pninfo.payload
12654 if not isinstance(pnhvinfo.get("memory_free", None), int):
12655 self.warn.append("Node data from primary node %s doesn't contain"
12656 " free memory information" % pnode)
12657 elif instance_info.fail_msg:
12658 self.warn.append("Can't get instance runtime information: %s" %
12659 instance_info.fail_msg)
12661 if instance_info.payload:
12662 current_mem = int(instance_info.payload["memory"])
12664 # Assume instance not running
12665 # (there is a slight race condition here, but it's not very
12666 # probable, and we have no other way to check)
12667 # TODO: Describe race condition
12669 #TODO(dynmem): do the appropriate check involving MINMEM
12670 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12671 pnhvinfo["memory_free"])
12673 raise errors.OpPrereqError("This change will prevent the instance"
12674 " from starting, due to %d MB of memory"
12675 " missing on its primary node" %
12676 miss_mem, errors.ECODE_NORES)
12678 if be_new[constants.BE_AUTO_BALANCE]:
12679 for node, nres in nodeinfo.items():
12680 if node not in instance.secondary_nodes:
12682 nres.Raise("Can't get info from secondary node %s" % node,
12683 prereq=True, ecode=errors.ECODE_STATE)
12684 (_, _, (nhvinfo, )) = nres.payload
12685 if not isinstance(nhvinfo.get("memory_free", None), int):
12686 raise errors.OpPrereqError("Secondary node %s didn't return free"
12687 " memory information" % node,
12688 errors.ECODE_STATE)
12689 #TODO(dynmem): do the appropriate check involving MINMEM
12690 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12691 raise errors.OpPrereqError("This change will prevent the instance"
12692 " from failover to its secondary node"
12693 " %s, due to not enough memory" % node,
12694 errors.ECODE_STATE)
12696 if self.op.runtime_mem:
12697 remote_info = self.rpc.call_instance_info(instance.primary_node,
12699 instance.hypervisor)
12700 remote_info.Raise("Error checking node %s" % instance.primary_node)
12701 if not remote_info.payload: # not running already
12702 raise errors.OpPrereqError("Instance %s is not running" %
12703 instance.name, errors.ECODE_STATE)
12705 current_memory = remote_info.payload["memory"]
12706 if (not self.op.force and
12707 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12708 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12709 raise errors.OpPrereqError("Instance %s must have memory between %d"
12710 " and %d MB of memory unless --force is"
12713 self.be_proposed[constants.BE_MINMEM],
12714 self.be_proposed[constants.BE_MAXMEM]),
12715 errors.ECODE_INVAL)
12717 if self.op.runtime_mem > current_memory:
12718 _CheckNodeFreeMemory(self, instance.primary_node,
12719 "ballooning memory for instance %s" %
12721 self.op.memory - current_memory,
12722 instance.hypervisor)
12724 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12725 raise errors.OpPrereqError("Disk operations not supported for"
12726 " diskless instances", errors.ECODE_INVAL)
12728 def _PrepareNicCreate(_, params, private):
12729 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12730 return (None, None)
12732 def _PrepareNicMod(_, nic, params, private):
12733 self._PrepareNicModification(params, private, nic.ip,
12734 nic.nicparams, cluster, pnode)
12737 # Verify NIC changes (operating on copy)
12738 nics = instance.nics[:]
12739 ApplyContainerMods("NIC", nics, None, self.nicmod,
12740 _PrepareNicCreate, _PrepareNicMod, None)
12741 if len(nics) > constants.MAX_NICS:
12742 raise errors.OpPrereqError("Instance has too many network interfaces"
12743 " (%d), cannot add more" % constants.MAX_NICS,
12744 errors.ECODE_STATE)
12746 # Verify disk changes (operating on a copy)
12747 disks = instance.disks[:]
12748 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12749 if len(disks) > constants.MAX_DISKS:
12750 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12751 " more" % constants.MAX_DISKS,
12752 errors.ECODE_STATE)
12754 if self.op.offline is not None:
12755 if self.op.offline:
12756 msg = "can't change to offline"
12758 msg = "can't change to online"
12759 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12761 # Pre-compute NIC changes (necessary to use result in hooks)
12762 self._nic_chgdesc = []
12764 # Operate on copies as this is still in prereq
12765 nics = [nic.Copy() for nic in instance.nics]
12766 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12767 self._CreateNewNic, self._ApplyNicMods, None)
12768 self._new_nics = nics
12770 self._new_nics = None
12772 def _ConvertPlainToDrbd(self, feedback_fn):
12773 """Converts an instance from plain to drbd.
12776 feedback_fn("Converting template to drbd")
12777 instance = self.instance
12778 pnode = instance.primary_node
12779 snode = self.op.remote_node
12781 assert instance.disk_template == constants.DT_PLAIN
12783 # create a fake disk info for _GenerateDiskTemplate
12784 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12785 constants.IDISK_VG: d.logical_id[0]}
12786 for d in instance.disks]
12787 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12788 instance.name, pnode, [snode],
12789 disk_info, None, None, 0, feedback_fn,
12791 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12793 info = _GetInstanceInfoText(instance)
12794 feedback_fn("Creating additional volumes...")
12795 # first, create the missing data and meta devices
12796 for disk in anno_disks:
12797 # unfortunately this is... not too nice
12798 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12800 for child in disk.children:
12801 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12802 # at this stage, all new LVs have been created, we can rename the
12804 feedback_fn("Renaming original volumes...")
12805 rename_list = [(o, n.children[0].logical_id)
12806 for (o, n) in zip(instance.disks, new_disks)]
12807 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12808 result.Raise("Failed to rename original LVs")
12810 feedback_fn("Initializing DRBD devices...")
12811 # all child devices are in place, we can now create the DRBD devices
12812 for disk in anno_disks:
12813 for node in [pnode, snode]:
12814 f_create = node == pnode
12815 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12817 # at this point, the instance has been modified
12818 instance.disk_template = constants.DT_DRBD8
12819 instance.disks = new_disks
12820 self.cfg.Update(instance, feedback_fn)
12822 # Release node locks while waiting for sync
12823 _ReleaseLocks(self, locking.LEVEL_NODE)
12825 # disks are created, waiting for sync
12826 disk_abort = not _WaitForSync(self, instance,
12827 oneshot=not self.op.wait_for_sync)
12829 raise errors.OpExecError("There are some degraded disks for"
12830 " this instance, please cleanup manually")
12832 # Node resource locks will be released by caller
12834 def _ConvertDrbdToPlain(self, feedback_fn):
12835 """Converts an instance from drbd to plain.
12838 instance = self.instance
12840 assert len(instance.secondary_nodes) == 1
12841 assert instance.disk_template == constants.DT_DRBD8
12843 pnode = instance.primary_node
12844 snode = instance.secondary_nodes[0]
12845 feedback_fn("Converting template to plain")
12847 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12848 new_disks = [d.children[0] for d in instance.disks]
12850 # copy over size and mode
12851 for parent, child in zip(old_disks, new_disks):
12852 child.size = parent.size
12853 child.mode = parent.mode
12855 # this is a DRBD disk, return its port to the pool
12856 # NOTE: this must be done right before the call to cfg.Update!
12857 for disk in old_disks:
12858 tcp_port = disk.logical_id[2]
12859 self.cfg.AddTcpUdpPort(tcp_port)
12861 # update instance structure
12862 instance.disks = new_disks
12863 instance.disk_template = constants.DT_PLAIN
12864 self.cfg.Update(instance, feedback_fn)
12866 # Release locks in case removing disks takes a while
12867 _ReleaseLocks(self, locking.LEVEL_NODE)
12869 feedback_fn("Removing volumes on the secondary node...")
12870 for disk in old_disks:
12871 self.cfg.SetDiskID(disk, snode)
12872 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12874 self.LogWarning("Could not remove block device %s on node %s,"
12875 " continuing anyway: %s", disk.iv_name, snode, msg)
12877 feedback_fn("Removing unneeded volumes on the primary node...")
12878 for idx, disk in enumerate(old_disks):
12879 meta = disk.children[1]
12880 self.cfg.SetDiskID(meta, pnode)
12881 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12883 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12884 " continuing anyway: %s", idx, pnode, msg)
12886 def _CreateNewDisk(self, idx, params, _):
12887 """Creates a new disk.
12890 instance = self.instance
12893 if instance.disk_template in constants.DTS_FILEBASED:
12894 (file_driver, file_path) = instance.disks[0].logical_id
12895 file_path = os.path.dirname(file_path)
12897 file_driver = file_path = None
12900 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12901 instance.primary_node, instance.secondary_nodes,
12902 [params], file_path, file_driver, idx,
12903 self.Log, self.diskparams)[0]
12905 info = _GetInstanceInfoText(instance)
12907 logging.info("Creating volume %s for instance %s",
12908 disk.iv_name, instance.name)
12909 # Note: this needs to be kept in sync with _CreateDisks
12911 for node in instance.all_nodes:
12912 f_create = (node == instance.primary_node)
12914 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12915 except errors.OpExecError, err:
12916 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12917 disk.iv_name, disk, node, err)
12920 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12924 def _ModifyDisk(idx, disk, params, _):
12925 """Modifies a disk.
12928 disk.mode = params[constants.IDISK_MODE]
12931 ("disk.mode/%d" % idx, disk.mode),
12934 def _RemoveDisk(self, idx, root, _):
12938 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12939 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12940 self.cfg.SetDiskID(disk, node)
12941 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12943 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12944 " continuing anyway", idx, node, msg)
12946 # if this is a DRBD disk, return its port to the pool
12947 if root.dev_type in constants.LDS_DRBD:
12948 self.cfg.AddTcpUdpPort(root.logical_id[2])
12951 def _CreateNewNic(idx, params, private):
12952 """Creates data structure for a new network interface.
12955 mac = params[constants.INIC_MAC]
12956 ip = params.get(constants.INIC_IP, None)
12957 nicparams = private.params
12959 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12961 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12962 (mac, ip, private.filled[constants.NIC_MODE],
12963 private.filled[constants.NIC_LINK])),
12967 def _ApplyNicMods(idx, nic, params, private):
12968 """Modifies a network interface.
12973 for key in [constants.INIC_MAC, constants.INIC_IP]:
12975 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12976 setattr(nic, key, params[key])
12979 nic.nicparams = private.params
12981 for (key, val) in params.items():
12982 changes.append(("nic.%s/%d" % (key, idx), val))
12986 def Exec(self, feedback_fn):
12987 """Modifies an instance.
12989 All parameters take effect only at the next restart of the instance.
12992 # Process here the warnings from CheckPrereq, as we don't have a
12993 # feedback_fn there.
12994 # TODO: Replace with self.LogWarning
12995 for warn in self.warn:
12996 feedback_fn("WARNING: %s" % warn)
12998 assert ((self.op.disk_template is None) ^
12999 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13000 "Not owning any node resource locks"
13003 instance = self.instance
13006 if self.op.runtime_mem:
13007 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13009 self.op.runtime_mem)
13010 rpcres.Raise("Cannot modify instance runtime memory")
13011 result.append(("runtime_memory", self.op.runtime_mem))
13013 # Apply disk changes
13014 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13015 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13016 _UpdateIvNames(0, instance.disks)
13018 if self.op.disk_template:
13020 check_nodes = set(instance.all_nodes)
13021 if self.op.remote_node:
13022 check_nodes.add(self.op.remote_node)
13023 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13024 owned = self.owned_locks(level)
13025 assert not (check_nodes - owned), \
13026 ("Not owning the correct locks, owning %r, expected at least %r" %
13027 (owned, check_nodes))
13029 r_shut = _ShutdownInstanceDisks(self, instance)
13031 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13032 " proceed with disk template conversion")
13033 mode = (instance.disk_template, self.op.disk_template)
13035 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13037 self.cfg.ReleaseDRBDMinors(instance.name)
13039 result.append(("disk_template", self.op.disk_template))
13041 assert instance.disk_template == self.op.disk_template, \
13042 ("Expected disk template '%s', found '%s'" %
13043 (self.op.disk_template, instance.disk_template))
13045 # Release node and resource locks if there are any (they might already have
13046 # been released during disk conversion)
13047 _ReleaseLocks(self, locking.LEVEL_NODE)
13048 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13050 # Apply NIC changes
13051 if self._new_nics is not None:
13052 instance.nics = self._new_nics
13053 result.extend(self._nic_chgdesc)
13056 if self.op.hvparams:
13057 instance.hvparams = self.hv_inst
13058 for key, val in self.op.hvparams.iteritems():
13059 result.append(("hv/%s" % key, val))
13062 if self.op.beparams:
13063 instance.beparams = self.be_inst
13064 for key, val in self.op.beparams.iteritems():
13065 result.append(("be/%s" % key, val))
13068 if self.op.os_name:
13069 instance.os = self.op.os_name
13072 if self.op.osparams:
13073 instance.osparams = self.os_inst
13074 for key, val in self.op.osparams.iteritems():
13075 result.append(("os/%s" % key, val))
13077 if self.op.offline is None:
13080 elif self.op.offline:
13081 # Mark instance as offline
13082 self.cfg.MarkInstanceOffline(instance.name)
13083 result.append(("admin_state", constants.ADMINST_OFFLINE))
13085 # Mark instance as online, but stopped
13086 self.cfg.MarkInstanceDown(instance.name)
13087 result.append(("admin_state", constants.ADMINST_DOWN))
13089 self.cfg.Update(instance, feedback_fn)
13091 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13092 self.owned_locks(locking.LEVEL_NODE)), \
13093 "All node locks should have been released by now"
13097 _DISK_CONVERSIONS = {
13098 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13099 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13103 class LUInstanceChangeGroup(LogicalUnit):
13104 HPATH = "instance-change-group"
13105 HTYPE = constants.HTYPE_INSTANCE
13108 def ExpandNames(self):
13109 self.share_locks = _ShareAll()
13110 self.needed_locks = {
13111 locking.LEVEL_NODEGROUP: [],
13112 locking.LEVEL_NODE: [],
13115 self._ExpandAndLockInstance()
13117 if self.op.target_groups:
13118 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13119 self.op.target_groups)
13121 self.req_target_uuids = None
13123 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13125 def DeclareLocks(self, level):
13126 if level == locking.LEVEL_NODEGROUP:
13127 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13129 if self.req_target_uuids:
13130 lock_groups = set(self.req_target_uuids)
13132 # Lock all groups used by instance optimistically; this requires going
13133 # via the node before it's locked, requiring verification later on
13134 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13135 lock_groups.update(instance_groups)
13137 # No target groups, need to lock all of them
13138 lock_groups = locking.ALL_SET
13140 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13142 elif level == locking.LEVEL_NODE:
13143 if self.req_target_uuids:
13144 # Lock all nodes used by instances
13145 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13146 self._LockInstancesNodes()
13148 # Lock all nodes in all potential target groups
13149 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13150 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13151 member_nodes = [node_name
13152 for group in lock_groups
13153 for node_name in self.cfg.GetNodeGroup(group).members]
13154 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13156 # Lock all nodes as all groups are potential targets
13157 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13159 def CheckPrereq(self):
13160 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13161 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13162 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13164 assert (self.req_target_uuids is None or
13165 owned_groups.issuperset(self.req_target_uuids))
13166 assert owned_instances == set([self.op.instance_name])
13168 # Get instance information
13169 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13171 # Check if node groups for locked instance are still correct
13172 assert owned_nodes.issuperset(self.instance.all_nodes), \
13173 ("Instance %s's nodes changed while we kept the lock" %
13174 self.op.instance_name)
13176 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13179 if self.req_target_uuids:
13180 # User requested specific target groups
13181 self.target_uuids = frozenset(self.req_target_uuids)
13183 # All groups except those used by the instance are potential targets
13184 self.target_uuids = owned_groups - inst_groups
13186 conflicting_groups = self.target_uuids & inst_groups
13187 if conflicting_groups:
13188 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13189 " used by the instance '%s'" %
13190 (utils.CommaJoin(conflicting_groups),
13191 self.op.instance_name),
13192 errors.ECODE_INVAL)
13194 if not self.target_uuids:
13195 raise errors.OpPrereqError("There are no possible target groups",
13196 errors.ECODE_INVAL)
13198 def BuildHooksEnv(self):
13199 """Build hooks env.
13202 assert self.target_uuids
13205 "TARGET_GROUPS": " ".join(self.target_uuids),
13208 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13212 def BuildHooksNodes(self):
13213 """Build hooks nodes.
13216 mn = self.cfg.GetMasterNode()
13217 return ([mn], [mn])
13219 def Exec(self, feedback_fn):
13220 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13222 assert instances == [self.op.instance_name], "Instance not locked"
13224 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13225 instances=instances, target_groups=list(self.target_uuids))
13227 ial.Run(self.op.iallocator)
13229 if not ial.success:
13230 raise errors.OpPrereqError("Can't compute solution for changing group of"
13231 " instance '%s' using iallocator '%s': %s" %
13232 (self.op.instance_name, self.op.iallocator,
13233 ial.info), errors.ECODE_NORES)
13235 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13237 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13238 " instance '%s'", len(jobs), self.op.instance_name)
13240 return ResultWithJobs(jobs)
13243 class LUBackupQuery(NoHooksLU):
13244 """Query the exports list
13249 def CheckArguments(self):
13250 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13251 ["node", "export"], self.op.use_locking)
13253 def ExpandNames(self):
13254 self.expq.ExpandNames(self)
13256 def DeclareLocks(self, level):
13257 self.expq.DeclareLocks(self, level)
13259 def Exec(self, feedback_fn):
13262 for (node, expname) in self.expq.OldStyleQuery(self):
13263 if expname is None:
13264 result[node] = False
13266 result.setdefault(node, []).append(expname)
13271 class _ExportQuery(_QueryBase):
13272 FIELDS = query.EXPORT_FIELDS
13274 #: The node name is not a unique key for this query
13275 SORT_FIELD = "node"
13277 def ExpandNames(self, lu):
13278 lu.needed_locks = {}
13280 # The following variables interact with _QueryBase._GetNames
13282 self.wanted = _GetWantedNodes(lu, self.names)
13284 self.wanted = locking.ALL_SET
13286 self.do_locking = self.use_locking
13288 if self.do_locking:
13289 lu.share_locks = _ShareAll()
13290 lu.needed_locks = {
13291 locking.LEVEL_NODE: self.wanted,
13294 def DeclareLocks(self, lu, level):
13297 def _GetQueryData(self, lu):
13298 """Computes the list of nodes and their attributes.
13301 # Locking is not used
13303 assert not (compat.any(lu.glm.is_owned(level)
13304 for level in locking.LEVELS
13305 if level != locking.LEVEL_CLUSTER) or
13306 self.do_locking or self.use_locking)
13308 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13312 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13314 result.append((node, None))
13316 result.extend((node, expname) for expname in nres.payload)
13321 class LUBackupPrepare(NoHooksLU):
13322 """Prepares an instance for an export and returns useful information.
13327 def ExpandNames(self):
13328 self._ExpandAndLockInstance()
13330 def CheckPrereq(self):
13331 """Check prerequisites.
13334 instance_name = self.op.instance_name
13336 self.instance = self.cfg.GetInstanceInfo(instance_name)
13337 assert self.instance is not None, \
13338 "Cannot retrieve locked instance %s" % self.op.instance_name
13339 _CheckNodeOnline(self, self.instance.primary_node)
13341 self._cds = _GetClusterDomainSecret()
13343 def Exec(self, feedback_fn):
13344 """Prepares an instance for an export.
13347 instance = self.instance
13349 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13350 salt = utils.GenerateSecret(8)
13352 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13353 result = self.rpc.call_x509_cert_create(instance.primary_node,
13354 constants.RIE_CERT_VALIDITY)
13355 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13357 (name, cert_pem) = result.payload
13359 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13363 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13364 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13366 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13372 class LUBackupExport(LogicalUnit):
13373 """Export an instance to an image in the cluster.
13376 HPATH = "instance-export"
13377 HTYPE = constants.HTYPE_INSTANCE
13380 def CheckArguments(self):
13381 """Check the arguments.
13384 self.x509_key_name = self.op.x509_key_name
13385 self.dest_x509_ca_pem = self.op.destination_x509_ca
13387 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13388 if not self.x509_key_name:
13389 raise errors.OpPrereqError("Missing X509 key name for encryption",
13390 errors.ECODE_INVAL)
13392 if not self.dest_x509_ca_pem:
13393 raise errors.OpPrereqError("Missing destination X509 CA",
13394 errors.ECODE_INVAL)
13396 def ExpandNames(self):
13397 self._ExpandAndLockInstance()
13399 # Lock all nodes for local exports
13400 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13401 # FIXME: lock only instance primary and destination node
13403 # Sad but true, for now we have do lock all nodes, as we don't know where
13404 # the previous export might be, and in this LU we search for it and
13405 # remove it from its current node. In the future we could fix this by:
13406 # - making a tasklet to search (share-lock all), then create the
13407 # new one, then one to remove, after
13408 # - removing the removal operation altogether
13409 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13411 def DeclareLocks(self, level):
13412 """Last minute lock declaration."""
13413 # All nodes are locked anyway, so nothing to do here.
13415 def BuildHooksEnv(self):
13416 """Build hooks env.
13418 This will run on the master, primary node and target node.
13422 "EXPORT_MODE": self.op.mode,
13423 "EXPORT_NODE": self.op.target_node,
13424 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13425 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13426 # TODO: Generic function for boolean env variables
13427 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13430 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13434 def BuildHooksNodes(self):
13435 """Build hooks nodes.
13438 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13440 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13441 nl.append(self.op.target_node)
13445 def CheckPrereq(self):
13446 """Check prerequisites.
13448 This checks that the instance and node names are valid.
13451 instance_name = self.op.instance_name
13453 self.instance = self.cfg.GetInstanceInfo(instance_name)
13454 assert self.instance is not None, \
13455 "Cannot retrieve locked instance %s" % self.op.instance_name
13456 _CheckNodeOnline(self, self.instance.primary_node)
13458 if (self.op.remove_instance and
13459 self.instance.admin_state == constants.ADMINST_UP and
13460 not self.op.shutdown):
13461 raise errors.OpPrereqError("Can not remove instance without shutting it"
13462 " down before", errors.ECODE_STATE)
13464 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13465 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13466 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13467 assert self.dst_node is not None
13469 _CheckNodeOnline(self, self.dst_node.name)
13470 _CheckNodeNotDrained(self, self.dst_node.name)
13473 self.dest_disk_info = None
13474 self.dest_x509_ca = None
13476 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13477 self.dst_node = None
13479 if len(self.op.target_node) != len(self.instance.disks):
13480 raise errors.OpPrereqError(("Received destination information for %s"
13481 " disks, but instance %s has %s disks") %
13482 (len(self.op.target_node), instance_name,
13483 len(self.instance.disks)),
13484 errors.ECODE_INVAL)
13486 cds = _GetClusterDomainSecret()
13488 # Check X509 key name
13490 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13491 except (TypeError, ValueError), err:
13492 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13493 errors.ECODE_INVAL)
13495 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13496 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13497 errors.ECODE_INVAL)
13499 # Load and verify CA
13501 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13502 except OpenSSL.crypto.Error, err:
13503 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13504 (err, ), errors.ECODE_INVAL)
13506 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13507 if errcode is not None:
13508 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13509 (msg, ), errors.ECODE_INVAL)
13511 self.dest_x509_ca = cert
13513 # Verify target information
13515 for idx, disk_data in enumerate(self.op.target_node):
13517 (host, port, magic) = \
13518 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13519 except errors.GenericError, err:
13520 raise errors.OpPrereqError("Target info for disk %s: %s" %
13521 (idx, err), errors.ECODE_INVAL)
13523 disk_info.append((host, port, magic))
13525 assert len(disk_info) == len(self.op.target_node)
13526 self.dest_disk_info = disk_info
13529 raise errors.ProgrammerError("Unhandled export mode %r" %
13532 # instance disk type verification
13533 # TODO: Implement export support for file-based disks
13534 for disk in self.instance.disks:
13535 if disk.dev_type == constants.LD_FILE:
13536 raise errors.OpPrereqError("Export not supported for instances with"
13537 " file-based disks", errors.ECODE_INVAL)
13539 def _CleanupExports(self, feedback_fn):
13540 """Removes exports of current instance from all other nodes.
13542 If an instance in a cluster with nodes A..D was exported to node C, its
13543 exports will be removed from the nodes A, B and D.
13546 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13548 nodelist = self.cfg.GetNodeList()
13549 nodelist.remove(self.dst_node.name)
13551 # on one-node clusters nodelist will be empty after the removal
13552 # if we proceed the backup would be removed because OpBackupQuery
13553 # substitutes an empty list with the full cluster node list.
13554 iname = self.instance.name
13556 feedback_fn("Removing old exports for instance %s" % iname)
13557 exportlist = self.rpc.call_export_list(nodelist)
13558 for node in exportlist:
13559 if exportlist[node].fail_msg:
13561 if iname in exportlist[node].payload:
13562 msg = self.rpc.call_export_remove(node, iname).fail_msg
13564 self.LogWarning("Could not remove older export for instance %s"
13565 " on node %s: %s", iname, node, msg)
13567 def Exec(self, feedback_fn):
13568 """Export an instance to an image in the cluster.
13571 assert self.op.mode in constants.EXPORT_MODES
13573 instance = self.instance
13574 src_node = instance.primary_node
13576 if self.op.shutdown:
13577 # shutdown the instance, but not the disks
13578 feedback_fn("Shutting down instance %s" % instance.name)
13579 result = self.rpc.call_instance_shutdown(src_node, instance,
13580 self.op.shutdown_timeout)
13581 # TODO: Maybe ignore failures if ignore_remove_failures is set
13582 result.Raise("Could not shutdown instance %s on"
13583 " node %s" % (instance.name, src_node))
13585 # set the disks ID correctly since call_instance_start needs the
13586 # correct drbd minor to create the symlinks
13587 for disk in instance.disks:
13588 self.cfg.SetDiskID(disk, src_node)
13590 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13593 # Activate the instance disks if we'exporting a stopped instance
13594 feedback_fn("Activating disks for %s" % instance.name)
13595 _StartInstanceDisks(self, instance, None)
13598 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13601 helper.CreateSnapshots()
13603 if (self.op.shutdown and
13604 instance.admin_state == constants.ADMINST_UP and
13605 not self.op.remove_instance):
13606 assert not activate_disks
13607 feedback_fn("Starting instance %s" % instance.name)
13608 result = self.rpc.call_instance_start(src_node,
13609 (instance, None, None), False)
13610 msg = result.fail_msg
13612 feedback_fn("Failed to start instance: %s" % msg)
13613 _ShutdownInstanceDisks(self, instance)
13614 raise errors.OpExecError("Could not start instance: %s" % msg)
13616 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13617 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13618 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13619 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13620 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13622 (key_name, _, _) = self.x509_key_name
13625 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13628 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13629 key_name, dest_ca_pem,
13634 # Check for backwards compatibility
13635 assert len(dresults) == len(instance.disks)
13636 assert compat.all(isinstance(i, bool) for i in dresults), \
13637 "Not all results are boolean: %r" % dresults
13641 feedback_fn("Deactivating disks for %s" % instance.name)
13642 _ShutdownInstanceDisks(self, instance)
13644 if not (compat.all(dresults) and fin_resu):
13647 failures.append("export finalization")
13648 if not compat.all(dresults):
13649 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13651 failures.append("disk export: disk(s) %s" % fdsk)
13653 raise errors.OpExecError("Export failed, errors in %s" %
13654 utils.CommaJoin(failures))
13656 # At this point, the export was successful, we can cleanup/finish
13658 # Remove instance if requested
13659 if self.op.remove_instance:
13660 feedback_fn("Removing instance %s" % instance.name)
13661 _RemoveInstance(self, feedback_fn, instance,
13662 self.op.ignore_remove_failures)
13664 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13665 self._CleanupExports(feedback_fn)
13667 return fin_resu, dresults
13670 class LUBackupRemove(NoHooksLU):
13671 """Remove exports related to the named instance.
13676 def ExpandNames(self):
13677 self.needed_locks = {}
13678 # We need all nodes to be locked in order for RemoveExport to work, but we
13679 # don't need to lock the instance itself, as nothing will happen to it (and
13680 # we can remove exports also for a removed instance)
13681 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13683 def Exec(self, feedback_fn):
13684 """Remove any export.
13687 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13688 # If the instance was not found we'll try with the name that was passed in.
13689 # This will only work if it was an FQDN, though.
13691 if not instance_name:
13693 instance_name = self.op.instance_name
13695 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13696 exportlist = self.rpc.call_export_list(locked_nodes)
13698 for node in exportlist:
13699 msg = exportlist[node].fail_msg
13701 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13703 if instance_name in exportlist[node].payload:
13705 result = self.rpc.call_export_remove(node, instance_name)
13706 msg = result.fail_msg
13708 logging.error("Could not remove export for instance %s"
13709 " on node %s: %s", instance_name, node, msg)
13711 if fqdn_warn and not found:
13712 feedback_fn("Export not found. If trying to remove an export belonging"
13713 " to a deleted instance please use its Fully Qualified"
13717 class LUGroupAdd(LogicalUnit):
13718 """Logical unit for creating node groups.
13721 HPATH = "group-add"
13722 HTYPE = constants.HTYPE_GROUP
13725 def ExpandNames(self):
13726 # We need the new group's UUID here so that we can create and acquire the
13727 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13728 # that it should not check whether the UUID exists in the configuration.
13729 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13730 self.needed_locks = {}
13731 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13733 def CheckPrereq(self):
13734 """Check prerequisites.
13736 This checks that the given group name is not an existing node group
13741 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13742 except errors.OpPrereqError:
13745 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13746 " node group (UUID: %s)" %
13747 (self.op.group_name, existing_uuid),
13748 errors.ECODE_EXISTS)
13750 if self.op.ndparams:
13751 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13753 if self.op.hv_state:
13754 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13756 self.new_hv_state = None
13758 if self.op.disk_state:
13759 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13761 self.new_disk_state = None
13763 if self.op.diskparams:
13764 for templ in constants.DISK_TEMPLATES:
13765 if templ in self.op.diskparams:
13766 utils.ForceDictType(self.op.diskparams[templ],
13767 constants.DISK_DT_TYPES)
13768 self.new_diskparams = self.op.diskparams
13770 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13771 except errors.OpPrereqError, err:
13772 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13773 errors.ECODE_INVAL)
13775 self.new_diskparams = {}
13777 if self.op.ipolicy:
13778 cluster = self.cfg.GetClusterInfo()
13779 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13781 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13782 except errors.ConfigurationError, err:
13783 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13784 errors.ECODE_INVAL)
13786 def BuildHooksEnv(self):
13787 """Build hooks env.
13791 "GROUP_NAME": self.op.group_name,
13794 def BuildHooksNodes(self):
13795 """Build hooks nodes.
13798 mn = self.cfg.GetMasterNode()
13799 return ([mn], [mn])
13801 def Exec(self, feedback_fn):
13802 """Add the node group to the cluster.
13805 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13806 uuid=self.group_uuid,
13807 alloc_policy=self.op.alloc_policy,
13808 ndparams=self.op.ndparams,
13809 diskparams=self.new_diskparams,
13810 ipolicy=self.op.ipolicy,
13811 hv_state_static=self.new_hv_state,
13812 disk_state_static=self.new_disk_state)
13814 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13815 del self.remove_locks[locking.LEVEL_NODEGROUP]
13818 class LUGroupAssignNodes(NoHooksLU):
13819 """Logical unit for assigning nodes to groups.
13824 def ExpandNames(self):
13825 # These raise errors.OpPrereqError on their own:
13826 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13827 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13829 # We want to lock all the affected nodes and groups. We have readily
13830 # available the list of nodes, and the *destination* group. To gather the
13831 # list of "source" groups, we need to fetch node information later on.
13832 self.needed_locks = {
13833 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13834 locking.LEVEL_NODE: self.op.nodes,
13837 def DeclareLocks(self, level):
13838 if level == locking.LEVEL_NODEGROUP:
13839 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13841 # Try to get all affected nodes' groups without having the group or node
13842 # lock yet. Needs verification later in the code flow.
13843 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13845 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13847 def CheckPrereq(self):
13848 """Check prerequisites.
13851 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13852 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13853 frozenset(self.op.nodes))
13855 expected_locks = (set([self.group_uuid]) |
13856 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13857 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13858 if actual_locks != expected_locks:
13859 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13860 " current groups are '%s', used to be '%s'" %
13861 (utils.CommaJoin(expected_locks),
13862 utils.CommaJoin(actual_locks)))
13864 self.node_data = self.cfg.GetAllNodesInfo()
13865 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13866 instance_data = self.cfg.GetAllInstancesInfo()
13868 if self.group is None:
13869 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13870 (self.op.group_name, self.group_uuid))
13872 (new_splits, previous_splits) = \
13873 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13874 for node in self.op.nodes],
13875 self.node_data, instance_data)
13878 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13880 if not self.op.force:
13881 raise errors.OpExecError("The following instances get split by this"
13882 " change and --force was not given: %s" %
13885 self.LogWarning("This operation will split the following instances: %s",
13888 if previous_splits:
13889 self.LogWarning("In addition, these already-split instances continue"
13890 " to be split across groups: %s",
13891 utils.CommaJoin(utils.NiceSort(previous_splits)))
13893 def Exec(self, feedback_fn):
13894 """Assign nodes to a new group.
13897 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13899 self.cfg.AssignGroupNodes(mods)
13902 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13903 """Check for split instances after a node assignment.
13905 This method considers a series of node assignments as an atomic operation,
13906 and returns information about split instances after applying the set of
13909 In particular, it returns information about newly split instances, and
13910 instances that were already split, and remain so after the change.
13912 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13915 @type changes: list of (node_name, new_group_uuid) pairs.
13916 @param changes: list of node assignments to consider.
13917 @param node_data: a dict with data for all nodes
13918 @param instance_data: a dict with all instances to consider
13919 @rtype: a two-tuple
13920 @return: a list of instances that were previously okay and result split as a
13921 consequence of this change, and a list of instances that were previously
13922 split and this change does not fix.
13925 changed_nodes = dict((node, group) for node, group in changes
13926 if node_data[node].group != group)
13928 all_split_instances = set()
13929 previously_split_instances = set()
13931 def InstanceNodes(instance):
13932 return [instance.primary_node] + list(instance.secondary_nodes)
13934 for inst in instance_data.values():
13935 if inst.disk_template not in constants.DTS_INT_MIRROR:
13938 instance_nodes = InstanceNodes(inst)
13940 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13941 previously_split_instances.add(inst.name)
13943 if len(set(changed_nodes.get(node, node_data[node].group)
13944 for node in instance_nodes)) > 1:
13945 all_split_instances.add(inst.name)
13947 return (list(all_split_instances - previously_split_instances),
13948 list(previously_split_instances & all_split_instances))
13951 class _GroupQuery(_QueryBase):
13952 FIELDS = query.GROUP_FIELDS
13954 def ExpandNames(self, lu):
13955 lu.needed_locks = {}
13957 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13958 self._cluster = lu.cfg.GetClusterInfo()
13959 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13962 self.wanted = [name_to_uuid[name]
13963 for name in utils.NiceSort(name_to_uuid.keys())]
13965 # Accept names to be either names or UUIDs.
13968 all_uuid = frozenset(self._all_groups.keys())
13970 for name in self.names:
13971 if name in all_uuid:
13972 self.wanted.append(name)
13973 elif name in name_to_uuid:
13974 self.wanted.append(name_to_uuid[name])
13976 missing.append(name)
13979 raise errors.OpPrereqError("Some groups do not exist: %s" %
13980 utils.CommaJoin(missing),
13981 errors.ECODE_NOENT)
13983 def DeclareLocks(self, lu, level):
13986 def _GetQueryData(self, lu):
13987 """Computes the list of node groups and their attributes.
13990 do_nodes = query.GQ_NODE in self.requested_data
13991 do_instances = query.GQ_INST in self.requested_data
13993 group_to_nodes = None
13994 group_to_instances = None
13996 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13997 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13998 # latter GetAllInstancesInfo() is not enough, for we have to go through
13999 # instance->node. Hence, we will need to process nodes even if we only need
14000 # instance information.
14001 if do_nodes or do_instances:
14002 all_nodes = lu.cfg.GetAllNodesInfo()
14003 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14006 for node in all_nodes.values():
14007 if node.group in group_to_nodes:
14008 group_to_nodes[node.group].append(node.name)
14009 node_to_group[node.name] = node.group
14012 all_instances = lu.cfg.GetAllInstancesInfo()
14013 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14015 for instance in all_instances.values():
14016 node = instance.primary_node
14017 if node in node_to_group:
14018 group_to_instances[node_to_group[node]].append(instance.name)
14021 # Do not pass on node information if it was not requested.
14022 group_to_nodes = None
14024 return query.GroupQueryData(self._cluster,
14025 [self._all_groups[uuid]
14026 for uuid in self.wanted],
14027 group_to_nodes, group_to_instances,
14028 query.GQ_DISKPARAMS in self.requested_data)
14031 class LUGroupQuery(NoHooksLU):
14032 """Logical unit for querying node groups.
14037 def CheckArguments(self):
14038 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14039 self.op.output_fields, False)
14041 def ExpandNames(self):
14042 self.gq.ExpandNames(self)
14044 def DeclareLocks(self, level):
14045 self.gq.DeclareLocks(self, level)
14047 def Exec(self, feedback_fn):
14048 return self.gq.OldStyleQuery(self)
14051 class LUGroupSetParams(LogicalUnit):
14052 """Modifies the parameters of a node group.
14055 HPATH = "group-modify"
14056 HTYPE = constants.HTYPE_GROUP
14059 def CheckArguments(self):
14062 self.op.diskparams,
14063 self.op.alloc_policy,
14065 self.op.disk_state,
14069 if all_changes.count(None) == len(all_changes):
14070 raise errors.OpPrereqError("Please pass at least one modification",
14071 errors.ECODE_INVAL)
14073 def ExpandNames(self):
14074 # This raises errors.OpPrereqError on its own:
14075 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14077 self.needed_locks = {
14078 locking.LEVEL_INSTANCE: [],
14079 locking.LEVEL_NODEGROUP: [self.group_uuid],
14082 self.share_locks[locking.LEVEL_INSTANCE] = 1
14084 def DeclareLocks(self, level):
14085 if level == locking.LEVEL_INSTANCE:
14086 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14088 # Lock instances optimistically, needs verification once group lock has
14090 self.needed_locks[locking.LEVEL_INSTANCE] = \
14091 self.cfg.GetNodeGroupInstances(self.group_uuid)
14094 def _UpdateAndVerifyDiskParams(old, new):
14095 """Updates and verifies disk parameters.
14098 new_params = _GetUpdatedParams(old, new)
14099 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14102 def CheckPrereq(self):
14103 """Check prerequisites.
14106 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14108 # Check if locked instances are still correct
14109 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14111 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14112 cluster = self.cfg.GetClusterInfo()
14114 if self.group is None:
14115 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14116 (self.op.group_name, self.group_uuid))
14118 if self.op.ndparams:
14119 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14120 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14121 self.new_ndparams = new_ndparams
14123 if self.op.diskparams:
14124 diskparams = self.group.diskparams
14125 uavdp = self._UpdateAndVerifyDiskParams
14126 # For each disktemplate subdict update and verify the values
14127 new_diskparams = dict((dt,
14128 uavdp(diskparams.get(dt, {}),
14129 self.op.diskparams[dt]))
14130 for dt in constants.DISK_TEMPLATES
14131 if dt in self.op.diskparams)
14132 # As we've all subdicts of diskparams ready, lets merge the actual
14133 # dict with all updated subdicts
14134 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14136 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14137 except errors.OpPrereqError, err:
14138 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14139 errors.ECODE_INVAL)
14141 if self.op.hv_state:
14142 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14143 self.group.hv_state_static)
14145 if self.op.disk_state:
14146 self.new_disk_state = \
14147 _MergeAndVerifyDiskState(self.op.disk_state,
14148 self.group.disk_state_static)
14150 if self.op.ipolicy:
14151 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14155 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14156 inst_filter = lambda inst: inst.name in owned_instances
14157 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14159 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14161 new_ipolicy, instances)
14164 self.LogWarning("After the ipolicy change the following instances"
14165 " violate them: %s",
14166 utils.CommaJoin(violations))
14168 def BuildHooksEnv(self):
14169 """Build hooks env.
14173 "GROUP_NAME": self.op.group_name,
14174 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14177 def BuildHooksNodes(self):
14178 """Build hooks nodes.
14181 mn = self.cfg.GetMasterNode()
14182 return ([mn], [mn])
14184 def Exec(self, feedback_fn):
14185 """Modifies the node group.
14190 if self.op.ndparams:
14191 self.group.ndparams = self.new_ndparams
14192 result.append(("ndparams", str(self.group.ndparams)))
14194 if self.op.diskparams:
14195 self.group.diskparams = self.new_diskparams
14196 result.append(("diskparams", str(self.group.diskparams)))
14198 if self.op.alloc_policy:
14199 self.group.alloc_policy = self.op.alloc_policy
14201 if self.op.hv_state:
14202 self.group.hv_state_static = self.new_hv_state
14204 if self.op.disk_state:
14205 self.group.disk_state_static = self.new_disk_state
14207 if self.op.ipolicy:
14208 self.group.ipolicy = self.new_ipolicy
14210 self.cfg.Update(self.group, feedback_fn)
14214 class LUGroupRemove(LogicalUnit):
14215 HPATH = "group-remove"
14216 HTYPE = constants.HTYPE_GROUP
14219 def ExpandNames(self):
14220 # This will raises errors.OpPrereqError on its own:
14221 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14222 self.needed_locks = {
14223 locking.LEVEL_NODEGROUP: [self.group_uuid],
14226 def CheckPrereq(self):
14227 """Check prerequisites.
14229 This checks that the given group name exists as a node group, that is
14230 empty (i.e., contains no nodes), and that is not the last group of the
14234 # Verify that the group is empty.
14235 group_nodes = [node.name
14236 for node in self.cfg.GetAllNodesInfo().values()
14237 if node.group == self.group_uuid]
14240 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14242 (self.op.group_name,
14243 utils.CommaJoin(utils.NiceSort(group_nodes))),
14244 errors.ECODE_STATE)
14246 # Verify the cluster would not be left group-less.
14247 if len(self.cfg.GetNodeGroupList()) == 1:
14248 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14249 " removed" % self.op.group_name,
14250 errors.ECODE_STATE)
14252 def BuildHooksEnv(self):
14253 """Build hooks env.
14257 "GROUP_NAME": self.op.group_name,
14260 def BuildHooksNodes(self):
14261 """Build hooks nodes.
14264 mn = self.cfg.GetMasterNode()
14265 return ([mn], [mn])
14267 def Exec(self, feedback_fn):
14268 """Remove the node group.
14272 self.cfg.RemoveNodeGroup(self.group_uuid)
14273 except errors.ConfigurationError:
14274 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14275 (self.op.group_name, self.group_uuid))
14277 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14280 class LUGroupRename(LogicalUnit):
14281 HPATH = "group-rename"
14282 HTYPE = constants.HTYPE_GROUP
14285 def ExpandNames(self):
14286 # This raises errors.OpPrereqError on its own:
14287 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14289 self.needed_locks = {
14290 locking.LEVEL_NODEGROUP: [self.group_uuid],
14293 def CheckPrereq(self):
14294 """Check prerequisites.
14296 Ensures requested new name is not yet used.
14300 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14301 except errors.OpPrereqError:
14304 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14305 " node group (UUID: %s)" %
14306 (self.op.new_name, new_name_uuid),
14307 errors.ECODE_EXISTS)
14309 def BuildHooksEnv(self):
14310 """Build hooks env.
14314 "OLD_NAME": self.op.group_name,
14315 "NEW_NAME": self.op.new_name,
14318 def BuildHooksNodes(self):
14319 """Build hooks nodes.
14322 mn = self.cfg.GetMasterNode()
14324 all_nodes = self.cfg.GetAllNodesInfo()
14325 all_nodes.pop(mn, None)
14328 run_nodes.extend(node.name for node in all_nodes.values()
14329 if node.group == self.group_uuid)
14331 return (run_nodes, run_nodes)
14333 def Exec(self, feedback_fn):
14334 """Rename the node group.
14337 group = self.cfg.GetNodeGroup(self.group_uuid)
14340 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14341 (self.op.group_name, self.group_uuid))
14343 group.name = self.op.new_name
14344 self.cfg.Update(group, feedback_fn)
14346 return self.op.new_name
14349 class LUGroupEvacuate(LogicalUnit):
14350 HPATH = "group-evacuate"
14351 HTYPE = constants.HTYPE_GROUP
14354 def ExpandNames(self):
14355 # This raises errors.OpPrereqError on its own:
14356 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14358 if self.op.target_groups:
14359 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14360 self.op.target_groups)
14362 self.req_target_uuids = []
14364 if self.group_uuid in self.req_target_uuids:
14365 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14366 " as a target group (targets are %s)" %
14368 utils.CommaJoin(self.req_target_uuids)),
14369 errors.ECODE_INVAL)
14371 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14373 self.share_locks = _ShareAll()
14374 self.needed_locks = {
14375 locking.LEVEL_INSTANCE: [],
14376 locking.LEVEL_NODEGROUP: [],
14377 locking.LEVEL_NODE: [],
14380 def DeclareLocks(self, level):
14381 if level == locking.LEVEL_INSTANCE:
14382 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14384 # Lock instances optimistically, needs verification once node and group
14385 # locks have been acquired
14386 self.needed_locks[locking.LEVEL_INSTANCE] = \
14387 self.cfg.GetNodeGroupInstances(self.group_uuid)
14389 elif level == locking.LEVEL_NODEGROUP:
14390 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14392 if self.req_target_uuids:
14393 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14395 # Lock all groups used by instances optimistically; this requires going
14396 # via the node before it's locked, requiring verification later on
14397 lock_groups.update(group_uuid
14398 for instance_name in
14399 self.owned_locks(locking.LEVEL_INSTANCE)
14401 self.cfg.GetInstanceNodeGroups(instance_name))
14403 # No target groups, need to lock all of them
14404 lock_groups = locking.ALL_SET
14406 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14408 elif level == locking.LEVEL_NODE:
14409 # This will only lock the nodes in the group to be evacuated which
14410 # contain actual instances
14411 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14412 self._LockInstancesNodes()
14414 # Lock all nodes in group to be evacuated and target groups
14415 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14416 assert self.group_uuid in owned_groups
14417 member_nodes = [node_name
14418 for group in owned_groups
14419 for node_name in self.cfg.GetNodeGroup(group).members]
14420 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14422 def CheckPrereq(self):
14423 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14424 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14425 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14427 assert owned_groups.issuperset(self.req_target_uuids)
14428 assert self.group_uuid in owned_groups
14430 # Check if locked instances are still correct
14431 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14433 # Get instance information
14434 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14436 # Check if node groups for locked instances are still correct
14437 _CheckInstancesNodeGroups(self.cfg, self.instances,
14438 owned_groups, owned_nodes, self.group_uuid)
14440 if self.req_target_uuids:
14441 # User requested specific target groups
14442 self.target_uuids = self.req_target_uuids
14444 # All groups except the one to be evacuated are potential targets
14445 self.target_uuids = [group_uuid for group_uuid in owned_groups
14446 if group_uuid != self.group_uuid]
14448 if not self.target_uuids:
14449 raise errors.OpPrereqError("There are no possible target groups",
14450 errors.ECODE_INVAL)
14452 def BuildHooksEnv(self):
14453 """Build hooks env.
14457 "GROUP_NAME": self.op.group_name,
14458 "TARGET_GROUPS": " ".join(self.target_uuids),
14461 def BuildHooksNodes(self):
14462 """Build hooks nodes.
14465 mn = self.cfg.GetMasterNode()
14467 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14469 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14471 return (run_nodes, run_nodes)
14473 def Exec(self, feedback_fn):
14474 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14476 assert self.group_uuid not in self.target_uuids
14478 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14479 instances=instances, target_groups=self.target_uuids)
14481 ial.Run(self.op.iallocator)
14483 if not ial.success:
14484 raise errors.OpPrereqError("Can't compute group evacuation using"
14485 " iallocator '%s': %s" %
14486 (self.op.iallocator, ial.info),
14487 errors.ECODE_NORES)
14489 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14491 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14492 len(jobs), self.op.group_name)
14494 return ResultWithJobs(jobs)
14497 class TagsLU(NoHooksLU): # pylint: disable=W0223
14498 """Generic tags LU.
14500 This is an abstract class which is the parent of all the other tags LUs.
14503 def ExpandNames(self):
14504 self.group_uuid = None
14505 self.needed_locks = {}
14507 if self.op.kind == constants.TAG_NODE:
14508 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14509 lock_level = locking.LEVEL_NODE
14510 lock_name = self.op.name
14511 elif self.op.kind == constants.TAG_INSTANCE:
14512 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14513 lock_level = locking.LEVEL_INSTANCE
14514 lock_name = self.op.name
14515 elif self.op.kind == constants.TAG_NODEGROUP:
14516 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14517 lock_level = locking.LEVEL_NODEGROUP
14518 lock_name = self.group_uuid
14523 if lock_level and getattr(self.op, "use_locking", True):
14524 self.needed_locks[lock_level] = lock_name
14526 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14527 # not possible to acquire the BGL based on opcode parameters)
14529 def CheckPrereq(self):
14530 """Check prerequisites.
14533 if self.op.kind == constants.TAG_CLUSTER:
14534 self.target = self.cfg.GetClusterInfo()
14535 elif self.op.kind == constants.TAG_NODE:
14536 self.target = self.cfg.GetNodeInfo(self.op.name)
14537 elif self.op.kind == constants.TAG_INSTANCE:
14538 self.target = self.cfg.GetInstanceInfo(self.op.name)
14539 elif self.op.kind == constants.TAG_NODEGROUP:
14540 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14542 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14543 str(self.op.kind), errors.ECODE_INVAL)
14546 class LUTagsGet(TagsLU):
14547 """Returns the tags of a given object.
14552 def ExpandNames(self):
14553 TagsLU.ExpandNames(self)
14555 # Share locks as this is only a read operation
14556 self.share_locks = _ShareAll()
14558 def Exec(self, feedback_fn):
14559 """Returns the tag list.
14562 return list(self.target.GetTags())
14565 class LUTagsSearch(NoHooksLU):
14566 """Searches the tags for a given pattern.
14571 def ExpandNames(self):
14572 self.needed_locks = {}
14574 def CheckPrereq(self):
14575 """Check prerequisites.
14577 This checks the pattern passed for validity by compiling it.
14581 self.re = re.compile(self.op.pattern)
14582 except re.error, err:
14583 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14584 (self.op.pattern, err), errors.ECODE_INVAL)
14586 def Exec(self, feedback_fn):
14587 """Returns the tag list.
14591 tgts = [("/cluster", cfg.GetClusterInfo())]
14592 ilist = cfg.GetAllInstancesInfo().values()
14593 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14594 nlist = cfg.GetAllNodesInfo().values()
14595 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14596 tgts.extend(("/nodegroup/%s" % n.name, n)
14597 for n in cfg.GetAllNodeGroupsInfo().values())
14599 for path, target in tgts:
14600 for tag in target.GetTags():
14601 if self.re.search(tag):
14602 results.append((path, tag))
14606 class LUTagsSet(TagsLU):
14607 """Sets a tag on a given object.
14612 def CheckPrereq(self):
14613 """Check prerequisites.
14615 This checks the type and length of the tag name and value.
14618 TagsLU.CheckPrereq(self)
14619 for tag in self.op.tags:
14620 objects.TaggableObject.ValidateTag(tag)
14622 def Exec(self, feedback_fn):
14627 for tag in self.op.tags:
14628 self.target.AddTag(tag)
14629 except errors.TagError, err:
14630 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14631 self.cfg.Update(self.target, feedback_fn)
14634 class LUTagsDel(TagsLU):
14635 """Delete a list of tags from a given object.
14640 def CheckPrereq(self):
14641 """Check prerequisites.
14643 This checks that we have the given tag.
14646 TagsLU.CheckPrereq(self)
14647 for tag in self.op.tags:
14648 objects.TaggableObject.ValidateTag(tag)
14649 del_tags = frozenset(self.op.tags)
14650 cur_tags = self.target.GetTags()
14652 diff_tags = del_tags - cur_tags
14654 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14655 raise errors.OpPrereqError("Tag(s) %s not found" %
14656 (utils.CommaJoin(diff_names), ),
14657 errors.ECODE_NOENT)
14659 def Exec(self, feedback_fn):
14660 """Remove the tag from the object.
14663 for tag in self.op.tags:
14664 self.target.RemoveTag(tag)
14665 self.cfg.Update(self.target, feedback_fn)
14668 class LUTestDelay(NoHooksLU):
14669 """Sleep for a specified amount of time.
14671 This LU sleeps on the master and/or nodes for a specified amount of
14677 def ExpandNames(self):
14678 """Expand names and set required locks.
14680 This expands the node list, if any.
14683 self.needed_locks = {}
14684 if self.op.on_nodes:
14685 # _GetWantedNodes can be used here, but is not always appropriate to use
14686 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14687 # more information.
14688 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14689 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14691 def _TestDelay(self):
14692 """Do the actual sleep.
14695 if self.op.on_master:
14696 if not utils.TestDelay(self.op.duration):
14697 raise errors.OpExecError("Error during master delay test")
14698 if self.op.on_nodes:
14699 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14700 for node, node_result in result.items():
14701 node_result.Raise("Failure during rpc call to node %s" % node)
14703 def Exec(self, feedback_fn):
14704 """Execute the test delay opcode, with the wanted repetitions.
14707 if self.op.repeat == 0:
14710 top_value = self.op.repeat - 1
14711 for i in range(self.op.repeat):
14712 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14716 class LUTestJqueue(NoHooksLU):
14717 """Utility LU to test some aspects of the job queue.
14722 # Must be lower than default timeout for WaitForJobChange to see whether it
14723 # notices changed jobs
14724 _CLIENT_CONNECT_TIMEOUT = 20.0
14725 _CLIENT_CONFIRM_TIMEOUT = 60.0
14728 def _NotifyUsingSocket(cls, cb, errcls):
14729 """Opens a Unix socket and waits for another program to connect.
14732 @param cb: Callback to send socket name to client
14733 @type errcls: class
14734 @param errcls: Exception class to use for errors
14737 # Using a temporary directory as there's no easy way to create temporary
14738 # sockets without writing a custom loop around tempfile.mktemp and
14740 tmpdir = tempfile.mkdtemp()
14742 tmpsock = utils.PathJoin(tmpdir, "sock")
14744 logging.debug("Creating temporary socket at %s", tmpsock)
14745 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14750 # Send details to client
14753 # Wait for client to connect before continuing
14754 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14756 (conn, _) = sock.accept()
14757 except socket.error, err:
14758 raise errcls("Client didn't connect in time (%s)" % err)
14762 # Remove as soon as client is connected
14763 shutil.rmtree(tmpdir)
14765 # Wait for client to close
14768 # pylint: disable=E1101
14769 # Instance of '_socketobject' has no ... member
14770 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14772 except socket.error, err:
14773 raise errcls("Client failed to confirm notification (%s)" % err)
14777 def _SendNotification(self, test, arg, sockname):
14778 """Sends a notification to the client.
14781 @param test: Test name
14782 @param arg: Test argument (depends on test)
14783 @type sockname: string
14784 @param sockname: Socket path
14787 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14789 def _Notify(self, prereq, test, arg):
14790 """Notifies the client of a test.
14793 @param prereq: Whether this is a prereq-phase test
14795 @param test: Test name
14796 @param arg: Test argument (depends on test)
14800 errcls = errors.OpPrereqError
14802 errcls = errors.OpExecError
14804 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14808 def CheckArguments(self):
14809 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14810 self.expandnames_calls = 0
14812 def ExpandNames(self):
14813 checkargs_calls = getattr(self, "checkargs_calls", 0)
14814 if checkargs_calls < 1:
14815 raise errors.ProgrammerError("CheckArguments was not called")
14817 self.expandnames_calls += 1
14819 if self.op.notify_waitlock:
14820 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14822 self.LogInfo("Expanding names")
14824 # Get lock on master node (just to get a lock, not for a particular reason)
14825 self.needed_locks = {
14826 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14829 def Exec(self, feedback_fn):
14830 if self.expandnames_calls < 1:
14831 raise errors.ProgrammerError("ExpandNames was not called")
14833 if self.op.notify_exec:
14834 self._Notify(False, constants.JQT_EXEC, None)
14836 self.LogInfo("Executing")
14838 if self.op.log_messages:
14839 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14840 for idx, msg in enumerate(self.op.log_messages):
14841 self.LogInfo("Sending log message %s", idx + 1)
14842 feedback_fn(constants.JQT_MSGPREFIX + msg)
14843 # Report how many test messages have been sent
14844 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14847 raise errors.OpExecError("Opcode failure was requested")
14852 class IAllocator(object):
14853 """IAllocator framework.
14855 An IAllocator instance has three sets of attributes:
14856 - cfg that is needed to query the cluster
14857 - input data (all members of the _KEYS class attribute are required)
14858 - four buffer attributes (in|out_data|text), that represent the
14859 input (to the external script) in text and data structure format,
14860 and the output from it, again in two formats
14861 - the result variables from the script (success, info, nodes) for
14865 # pylint: disable=R0902
14866 # lots of instance attributes
14868 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14870 self.rpc = rpc_runner
14871 # init buffer variables
14872 self.in_text = self.out_text = self.in_data = self.out_data = None
14873 # init all input fields so that pylint is happy
14875 self.memory = self.disks = self.disk_template = self.spindle_use = None
14876 self.os = self.tags = self.nics = self.vcpus = None
14877 self.hypervisor = None
14878 self.relocate_from = None
14880 self.instances = None
14881 self.evac_mode = None
14882 self.target_groups = []
14884 self.required_nodes = None
14885 # init result fields
14886 self.success = self.info = self.result = None
14889 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14891 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14892 " IAllocator" % self.mode)
14894 keyset = [n for (n, _) in keydata]
14897 if key not in keyset:
14898 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14899 " IAllocator" % key)
14900 setattr(self, key, kwargs[key])
14903 if key not in kwargs:
14904 raise errors.ProgrammerError("Missing input parameter '%s' to"
14905 " IAllocator" % key)
14906 self._BuildInputData(compat.partial(fn, self), keydata)
14908 def _ComputeClusterData(self):
14909 """Compute the generic allocator input data.
14911 This is the data that is independent of the actual operation.
14915 cluster_info = cfg.GetClusterInfo()
14918 "version": constants.IALLOCATOR_VERSION,
14919 "cluster_name": cfg.GetClusterName(),
14920 "cluster_tags": list(cluster_info.GetTags()),
14921 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14922 "ipolicy": cluster_info.ipolicy,
14924 ninfo = cfg.GetAllNodesInfo()
14925 iinfo = cfg.GetAllInstancesInfo().values()
14926 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14929 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14931 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14932 hypervisor_name = self.hypervisor
14933 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14934 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14936 hypervisor_name = cluster_info.primary_hypervisor
14938 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14941 self.rpc.call_all_instances_info(node_list,
14942 cluster_info.enabled_hypervisors)
14944 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14946 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14947 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14948 i_list, config_ndata)
14949 assert len(data["nodes"]) == len(ninfo), \
14950 "Incomplete node data computed"
14952 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14954 self.in_data = data
14957 def _ComputeNodeGroupData(cfg):
14958 """Compute node groups data.
14961 cluster = cfg.GetClusterInfo()
14962 ng = dict((guuid, {
14963 "name": gdata.name,
14964 "alloc_policy": gdata.alloc_policy,
14965 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14967 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14972 def _ComputeBasicNodeData(cfg, node_cfg):
14973 """Compute global node data.
14976 @returns: a dict of name: (node dict, node config)
14979 # fill in static (config-based) values
14980 node_results = dict((ninfo.name, {
14981 "tags": list(ninfo.GetTags()),
14982 "primary_ip": ninfo.primary_ip,
14983 "secondary_ip": ninfo.secondary_ip,
14984 "offline": ninfo.offline,
14985 "drained": ninfo.drained,
14986 "master_candidate": ninfo.master_candidate,
14987 "group": ninfo.group,
14988 "master_capable": ninfo.master_capable,
14989 "vm_capable": ninfo.vm_capable,
14990 "ndparams": cfg.GetNdParams(ninfo),
14992 for ninfo in node_cfg.values())
14994 return node_results
14997 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14999 """Compute global node data.
15001 @param node_results: the basic node structures as filled from the config
15004 #TODO(dynmem): compute the right data on MAX and MIN memory
15005 # make a copy of the current dict
15006 node_results = dict(node_results)
15007 for nname, nresult in node_data.items():
15008 assert nname in node_results, "Missing basic data for node %s" % nname
15009 ninfo = node_cfg[nname]
15011 if not (ninfo.offline or ninfo.drained):
15012 nresult.Raise("Can't get data for node %s" % nname)
15013 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
15015 remote_info = _MakeLegacyNodeInfo(nresult.payload)
15017 for attr in ["memory_total", "memory_free", "memory_dom0",
15018 "vg_size", "vg_free", "cpu_total"]:
15019 if attr not in remote_info:
15020 raise errors.OpExecError("Node '%s' didn't return attribute"
15021 " '%s'" % (nname, attr))
15022 if not isinstance(remote_info[attr], int):
15023 raise errors.OpExecError("Node '%s' returned invalid value"
15025 (nname, attr, remote_info[attr]))
15026 # compute memory used by primary instances
15027 i_p_mem = i_p_up_mem = 0
15028 for iinfo, beinfo in i_list:
15029 if iinfo.primary_node == nname:
15030 i_p_mem += beinfo[constants.BE_MAXMEM]
15031 if iinfo.name not in node_iinfo[nname].payload:
15034 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
15035 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
15036 remote_info["memory_free"] -= max(0, i_mem_diff)
15038 if iinfo.admin_state == constants.ADMINST_UP:
15039 i_p_up_mem += beinfo[constants.BE_MAXMEM]
15041 # compute memory used by instances
15043 "total_memory": remote_info["memory_total"],
15044 "reserved_memory": remote_info["memory_dom0"],
15045 "free_memory": remote_info["memory_free"],
15046 "total_disk": remote_info["vg_size"],
15047 "free_disk": remote_info["vg_free"],
15048 "total_cpus": remote_info["cpu_total"],
15049 "i_pri_memory": i_p_mem,
15050 "i_pri_up_memory": i_p_up_mem,
15052 pnr_dyn.update(node_results[nname])
15053 node_results[nname] = pnr_dyn
15055 return node_results
15058 def _ComputeInstanceData(cluster_info, i_list):
15059 """Compute global instance data.
15063 for iinfo, beinfo in i_list:
15065 for nic in iinfo.nics:
15066 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15070 "mode": filled_params[constants.NIC_MODE],
15071 "link": filled_params[constants.NIC_LINK],
15073 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15074 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15075 nic_data.append(nic_dict)
15077 "tags": list(iinfo.GetTags()),
15078 "admin_state": iinfo.admin_state,
15079 "vcpus": beinfo[constants.BE_VCPUS],
15080 "memory": beinfo[constants.BE_MAXMEM],
15081 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15083 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15085 "disks": [{constants.IDISK_SIZE: dsk.size,
15086 constants.IDISK_MODE: dsk.mode}
15087 for dsk in iinfo.disks],
15088 "disk_template": iinfo.disk_template,
15089 "hypervisor": iinfo.hypervisor,
15091 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15093 instance_data[iinfo.name] = pir
15095 return instance_data
15097 def _AddNewInstance(self):
15098 """Add new instance data to allocator structure.
15100 This in combination with _AllocatorGetClusterData will create the
15101 correct structure needed as input for the allocator.
15103 The checks for the completeness of the opcode must have already been
15107 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15109 if self.disk_template in constants.DTS_INT_MIRROR:
15110 self.required_nodes = 2
15112 self.required_nodes = 1
15116 "disk_template": self.disk_template,
15119 "vcpus": self.vcpus,
15120 "memory": self.memory,
15121 "spindle_use": self.spindle_use,
15122 "disks": self.disks,
15123 "disk_space_total": disk_space,
15125 "required_nodes": self.required_nodes,
15126 "hypervisor": self.hypervisor,
15131 def _AddRelocateInstance(self):
15132 """Add relocate instance data to allocator structure.
15134 This in combination with _IAllocatorGetClusterData will create the
15135 correct structure needed as input for the allocator.
15137 The checks for the completeness of the opcode must have already been
15141 instance = self.cfg.GetInstanceInfo(self.name)
15142 if instance is None:
15143 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15144 " IAllocator" % self.name)
15146 if instance.disk_template not in constants.DTS_MIRRORED:
15147 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15148 errors.ECODE_INVAL)
15150 if instance.disk_template in constants.DTS_INT_MIRROR and \
15151 len(instance.secondary_nodes) != 1:
15152 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15153 errors.ECODE_STATE)
15155 self.required_nodes = 1
15156 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15157 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15161 "disk_space_total": disk_space,
15162 "required_nodes": self.required_nodes,
15163 "relocate_from": self.relocate_from,
15167 def _AddNodeEvacuate(self):
15168 """Get data for node-evacuate requests.
15172 "instances": self.instances,
15173 "evac_mode": self.evac_mode,
15176 def _AddChangeGroup(self):
15177 """Get data for node-evacuate requests.
15181 "instances": self.instances,
15182 "target_groups": self.target_groups,
15185 def _BuildInputData(self, fn, keydata):
15186 """Build input data structures.
15189 self._ComputeClusterData()
15192 request["type"] = self.mode
15193 for keyname, keytype in keydata:
15194 if keyname not in request:
15195 raise errors.ProgrammerError("Request parameter %s is missing" %
15197 val = request[keyname]
15198 if not keytype(val):
15199 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15200 " validation, value %s, expected"
15201 " type %s" % (keyname, val, keytype))
15202 self.in_data["request"] = request
15204 self.in_text = serializer.Dump(self.in_data)
15206 _STRING_LIST = ht.TListOf(ht.TString)
15207 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15208 # pylint: disable=E1101
15209 # Class '...' has no 'OP_ID' member
15210 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15211 opcodes.OpInstanceMigrate.OP_ID,
15212 opcodes.OpInstanceReplaceDisks.OP_ID])
15216 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15217 ht.TItems([ht.TNonEmptyString,
15218 ht.TNonEmptyString,
15219 ht.TListOf(ht.TNonEmptyString),
15222 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15223 ht.TItems([ht.TNonEmptyString,
15226 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15227 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15230 constants.IALLOCATOR_MODE_ALLOC:
15233 ("name", ht.TString),
15234 ("memory", ht.TInt),
15235 ("spindle_use", ht.TInt),
15236 ("disks", ht.TListOf(ht.TDict)),
15237 ("disk_template", ht.TString),
15238 ("os", ht.TString),
15239 ("tags", _STRING_LIST),
15240 ("nics", ht.TListOf(ht.TDict)),
15241 ("vcpus", ht.TInt),
15242 ("hypervisor", ht.TString),
15244 constants.IALLOCATOR_MODE_RELOC:
15245 (_AddRelocateInstance,
15246 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15248 constants.IALLOCATOR_MODE_NODE_EVAC:
15249 (_AddNodeEvacuate, [
15250 ("instances", _STRING_LIST),
15251 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15253 constants.IALLOCATOR_MODE_CHG_GROUP:
15254 (_AddChangeGroup, [
15255 ("instances", _STRING_LIST),
15256 ("target_groups", _STRING_LIST),
15260 def Run(self, name, validate=True, call_fn=None):
15261 """Run an instance allocator and return the results.
15264 if call_fn is None:
15265 call_fn = self.rpc.call_iallocator_runner
15267 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15268 result.Raise("Failure while running the iallocator script")
15270 self.out_text = result.payload
15272 self._ValidateResult()
15274 def _ValidateResult(self):
15275 """Process the allocator results.
15277 This will process and if successful save the result in
15278 self.out_data and the other parameters.
15282 rdict = serializer.Load(self.out_text)
15283 except Exception, err:
15284 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15286 if not isinstance(rdict, dict):
15287 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15289 # TODO: remove backwards compatiblity in later versions
15290 if "nodes" in rdict and "result" not in rdict:
15291 rdict["result"] = rdict["nodes"]
15294 for key in "success", "info", "result":
15295 if key not in rdict:
15296 raise errors.OpExecError("Can't parse iallocator results:"
15297 " missing key '%s'" % key)
15298 setattr(self, key, rdict[key])
15300 if not self._result_check(self.result):
15301 raise errors.OpExecError("Iallocator returned invalid result,"
15302 " expected %s, got %s" %
15303 (self._result_check, self.result),
15304 errors.ECODE_INVAL)
15306 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15307 assert self.relocate_from is not None
15308 assert self.required_nodes == 1
15310 node2group = dict((name, ndata["group"])
15311 for (name, ndata) in self.in_data["nodes"].items())
15313 fn = compat.partial(self._NodesToGroups, node2group,
15314 self.in_data["nodegroups"])
15316 instance = self.cfg.GetInstanceInfo(self.name)
15317 request_groups = fn(self.relocate_from + [instance.primary_node])
15318 result_groups = fn(rdict["result"] + [instance.primary_node])
15320 if self.success and not set(result_groups).issubset(request_groups):
15321 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15322 " differ from original groups (%s)" %
15323 (utils.CommaJoin(result_groups),
15324 utils.CommaJoin(request_groups)))
15326 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15327 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15329 self.out_data = rdict
15332 def _NodesToGroups(node2group, groups, nodes):
15333 """Returns a list of unique group names for a list of nodes.
15335 @type node2group: dict
15336 @param node2group: Map from node name to group UUID
15338 @param groups: Group information
15340 @param nodes: Node names
15347 group_uuid = node2group[node]
15349 # Ignore unknown node
15353 group = groups[group_uuid]
15355 # Can't find group, let's use UUID
15356 group_name = group_uuid
15358 group_name = group["name"]
15360 result.add(group_name)
15362 return sorted(result)
15365 class LUTestAllocator(NoHooksLU):
15366 """Run allocator tests.
15368 This LU runs the allocator tests
15371 def CheckPrereq(self):
15372 """Check prerequisites.
15374 This checks the opcode parameters depending on the director and mode test.
15377 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15378 for attr in ["memory", "disks", "disk_template",
15379 "os", "tags", "nics", "vcpus"]:
15380 if not hasattr(self.op, attr):
15381 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15382 attr, errors.ECODE_INVAL)
15383 iname = self.cfg.ExpandInstanceName(self.op.name)
15384 if iname is not None:
15385 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15386 iname, errors.ECODE_EXISTS)
15387 if not isinstance(self.op.nics, list):
15388 raise errors.OpPrereqError("Invalid parameter 'nics'",
15389 errors.ECODE_INVAL)
15390 if not isinstance(self.op.disks, list):
15391 raise errors.OpPrereqError("Invalid parameter 'disks'",
15392 errors.ECODE_INVAL)
15393 for row in self.op.disks:
15394 if (not isinstance(row, dict) or
15395 constants.IDISK_SIZE not in row or
15396 not isinstance(row[constants.IDISK_SIZE], int) or
15397 constants.IDISK_MODE not in row or
15398 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15399 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15400 " parameter", errors.ECODE_INVAL)
15401 if self.op.hypervisor is None:
15402 self.op.hypervisor = self.cfg.GetHypervisorType()
15403 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15404 fname = _ExpandInstanceName(self.cfg, self.op.name)
15405 self.op.name = fname
15406 self.relocate_from = \
15407 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15408 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15409 constants.IALLOCATOR_MODE_NODE_EVAC):
15410 if not self.op.instances:
15411 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15412 self.op.instances = _GetWantedInstances(self, self.op.instances)
15414 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15415 self.op.mode, errors.ECODE_INVAL)
15417 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15418 if self.op.allocator is None:
15419 raise errors.OpPrereqError("Missing allocator name",
15420 errors.ECODE_INVAL)
15421 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15422 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15423 self.op.direction, errors.ECODE_INVAL)
15425 def Exec(self, feedback_fn):
15426 """Run the allocator test.
15429 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15430 ial = IAllocator(self.cfg, self.rpc,
15433 memory=self.op.memory,
15434 disks=self.op.disks,
15435 disk_template=self.op.disk_template,
15439 vcpus=self.op.vcpus,
15440 hypervisor=self.op.hypervisor,
15442 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15443 ial = IAllocator(self.cfg, self.rpc,
15446 relocate_from=list(self.relocate_from),
15448 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15449 ial = IAllocator(self.cfg, self.rpc,
15451 instances=self.op.instances,
15452 target_groups=self.op.target_groups)
15453 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15454 ial = IAllocator(self.cfg, self.rpc,
15456 instances=self.op.instances,
15457 evac_mode=self.op.evac_mode)
15459 raise errors.ProgrammerError("Uncatched mode %s in"
15460 " LUTestAllocator.Exec", self.op.mode)
15462 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15463 result = ial.in_text
15465 ial.Run(self.op.allocator, validate=False)
15466 result = ial.out_text
15470 #: Query type implementations
15472 constants.QR_CLUSTER: _ClusterQuery,
15473 constants.QR_INSTANCE: _InstanceQuery,
15474 constants.QR_NODE: _NodeQuery,
15475 constants.QR_GROUP: _GroupQuery,
15476 constants.QR_OS: _OsQuery,
15477 constants.QR_EXPORT: _ExportQuery,
15480 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15483 def _GetQueryImplementation(name):
15484 """Returns the implemtnation for a query type.
15486 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15490 return _QUERY_IMPL[name]
15492 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15493 errors.ECODE_INVAL)