4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
645 """Checks if the owned node groups are still correct for an instance.
647 @type cfg: L{config.ConfigWriter}
648 @param cfg: The cluster configuration
649 @type instance_name: string
650 @param instance_name: Instance name
651 @type owned_groups: set or frozenset
652 @param owned_groups: List of currently owned node groups
653 @type primary_only: boolean
654 @param primary_only: Whether to check node groups for only the primary node
657 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
659 if not owned_groups.issuperset(inst_groups):
660 raise errors.OpPrereqError("Instance %s's node groups changed since"
661 " locks were acquired, current groups are"
662 " are '%s', owning groups '%s'; retry the"
665 utils.CommaJoin(inst_groups),
666 utils.CommaJoin(owned_groups)),
672 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
673 """Checks if the instances in a node group are still correct.
675 @type cfg: L{config.ConfigWriter}
676 @param cfg: The cluster configuration
677 @type group_uuid: string
678 @param group_uuid: Node group UUID
679 @type owned_instances: set or frozenset
680 @param owned_instances: List of currently owned instances
683 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
684 if owned_instances != wanted_instances:
685 raise errors.OpPrereqError("Instances in node group '%s' changed since"
686 " locks were acquired, wanted '%s', have '%s';"
687 " retry the operation" %
689 utils.CommaJoin(wanted_instances),
690 utils.CommaJoin(owned_instances)),
693 return wanted_instances
696 def _SupportsOob(cfg, node):
697 """Tells if node supports OOB.
699 @type cfg: L{config.ConfigWriter}
700 @param cfg: The cluster configuration
701 @type node: L{objects.Node}
702 @param node: The node
703 @return: The OOB script if supported or an empty string otherwise
706 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
709 def _GetWantedNodes(lu, nodes):
710 """Returns list of checked and expanded node names.
712 @type lu: L{LogicalUnit}
713 @param lu: the logical unit on whose behalf we execute
715 @param nodes: list of node names or None for all nodes
717 @return: the list of nodes, sorted
718 @raise errors.ProgrammerError: if the nodes parameter is wrong type
722 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
724 return utils.NiceSort(lu.cfg.GetNodeList())
727 def _GetWantedInstances(lu, instances):
728 """Returns list of checked and expanded instance names.
730 @type lu: L{LogicalUnit}
731 @param lu: the logical unit on whose behalf we execute
732 @type instances: list
733 @param instances: list of instance names or None for all instances
735 @return: the list of instances, sorted
736 @raise errors.OpPrereqError: if the instances parameter is wrong type
737 @raise errors.OpPrereqError: if any of the passed instances is not found
741 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
743 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
747 def _GetUpdatedParams(old_params, update_dict,
748 use_default=True, use_none=False):
749 """Return the new version of a parameter dictionary.
751 @type old_params: dict
752 @param old_params: old parameters
753 @type update_dict: dict
754 @param update_dict: dict containing new parameter values, or
755 constants.VALUE_DEFAULT to reset the parameter to its default
757 @param use_default: boolean
758 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
759 values as 'to be deleted' values
760 @param use_none: boolean
761 @type use_none: whether to recognise C{None} values as 'to be
764 @return: the new parameter dictionary
767 params_copy = copy.deepcopy(old_params)
768 for key, val in update_dict.iteritems():
769 if ((use_default and val == constants.VALUE_DEFAULT) or
770 (use_none and val is None)):
776 params_copy[key] = val
780 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
781 """Return the new version of a instance policy.
783 @param group_policy: whether this policy applies to a group and thus
784 we should support removal of policy entries
787 use_none = use_default = group_policy
788 ipolicy = copy.deepcopy(old_ipolicy)
789 for key, value in new_ipolicy.items():
790 if key not in constants.IPOLICY_ALL_KEYS:
791 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
793 if key in constants.IPOLICY_ISPECS:
794 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
795 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
797 use_default=use_default)
799 if (not value or value == [constants.VALUE_DEFAULT] or
800 value == constants.VALUE_DEFAULT):
804 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
805 " on the cluster'" % key,
808 if key in constants.IPOLICY_PARAMETERS:
809 # FIXME: we assume all such values are float
811 ipolicy[key] = float(value)
812 except (TypeError, ValueError), err:
813 raise errors.OpPrereqError("Invalid value for attribute"
814 " '%s': '%s', error: %s" %
815 (key, value, err), errors.ECODE_INVAL)
817 # FIXME: we assume all others are lists; this should be redone
819 ipolicy[key] = list(value)
821 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
822 except errors.ConfigurationError, err:
823 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
828 def _UpdateAndVerifySubDict(base, updates, type_check):
829 """Updates and verifies a dict with sub dicts of the same type.
831 @param base: The dict with the old data
832 @param updates: The dict with the new data
833 @param type_check: Dict suitable to ForceDictType to verify correct types
834 @returns: A new dict with updated and verified values
838 new = _GetUpdatedParams(old, value)
839 utils.ForceDictType(new, type_check)
842 ret = copy.deepcopy(base)
843 ret.update(dict((key, fn(base.get(key, {}), value))
844 for key, value in updates.items()))
848 def _MergeAndVerifyHvState(op_input, obj_input):
849 """Combines the hv state from an opcode with the one of the object
851 @param op_input: The input dict from the opcode
852 @param obj_input: The input dict from the objects
853 @return: The verified and updated dict
857 invalid_hvs = set(op_input) - constants.HYPER_TYPES
859 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
860 " %s" % utils.CommaJoin(invalid_hvs),
862 if obj_input is None:
864 type_check = constants.HVSTS_PARAMETER_TYPES
865 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
870 def _MergeAndVerifyDiskState(op_input, obj_input):
871 """Combines the disk state from an opcode with the one of the object
873 @param op_input: The input dict from the opcode
874 @param obj_input: The input dict from the objects
875 @return: The verified and updated dict
878 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
880 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
881 utils.CommaJoin(invalid_dst),
883 type_check = constants.DSS_PARAMETER_TYPES
884 if obj_input is None:
886 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
888 for key, value in op_input.items())
893 def _ReleaseLocks(lu, level, names=None, keep=None):
894 """Releases locks owned by an LU.
896 @type lu: L{LogicalUnit}
897 @param level: Lock level
898 @type names: list or None
899 @param names: Names of locks to release
900 @type keep: list or None
901 @param keep: Names of locks to retain
904 assert not (keep is not None and names is not None), \
905 "Only one of the 'names' and the 'keep' parameters can be given"
907 if names is not None:
908 should_release = names.__contains__
910 should_release = lambda name: name not in keep
912 should_release = None
914 owned = lu.owned_locks(level)
916 # Not owning any lock at this level, do nothing
923 # Determine which locks to release
925 if should_release(name):
930 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
932 # Release just some locks
933 lu.glm.release(level, names=release)
935 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
938 lu.glm.release(level)
940 assert not lu.glm.is_owned(level), "No locks should be owned"
943 def _MapInstanceDisksToNodes(instances):
944 """Creates a map from (node, volume) to instance name.
946 @type instances: list of L{objects.Instance}
947 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
950 return dict(((node, vol), inst.name)
951 for inst in instances
952 for (node, vols) in inst.MapLVsByNode().items()
956 def _RunPostHook(lu, node_name):
957 """Runs the post-hook for an opcode on a single node.
960 hm = lu.proc.BuildHooksManager(lu)
962 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
963 except Exception, err: # pylint: disable=W0703
964 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
967 def _CheckOutputFields(static, dynamic, selected):
968 """Checks whether all selected fields are valid.
970 @type static: L{utils.FieldSet}
971 @param static: static fields set
972 @type dynamic: L{utils.FieldSet}
973 @param dynamic: dynamic fields set
980 delta = f.NonMatching(selected)
982 raise errors.OpPrereqError("Unknown output fields selected: %s"
983 % ",".join(delta), errors.ECODE_INVAL)
986 def _CheckGlobalHvParams(params):
987 """Validates that given hypervisor params are not global ones.
989 This will ensure that instances don't get customised versions of
993 used_globals = constants.HVC_GLOBALS.intersection(params)
995 msg = ("The following hypervisor parameters are global and cannot"
996 " be customized at instance level, please modify them at"
997 " cluster level: %s" % utils.CommaJoin(used_globals))
998 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1001 def _CheckNodeOnline(lu, node, msg=None):
1002 """Ensure that a given node is online.
1004 @param lu: the LU on behalf of which we make the check
1005 @param node: the node to check
1006 @param msg: if passed, should be a message to replace the default one
1007 @raise errors.OpPrereqError: if the node is offline
1011 msg = "Can't use offline node"
1012 if lu.cfg.GetNodeInfo(node).offline:
1013 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1016 def _CheckNodeNotDrained(lu, node):
1017 """Ensure that a given node is not drained.
1019 @param lu: the LU on behalf of which we make the check
1020 @param node: the node to check
1021 @raise errors.OpPrereqError: if the node is drained
1024 if lu.cfg.GetNodeInfo(node).drained:
1025 raise errors.OpPrereqError("Can't use drained node %s" % node,
1029 def _CheckNodeVmCapable(lu, node):
1030 """Ensure that a given node is vm capable.
1032 @param lu: the LU on behalf of which we make the check
1033 @param node: the node to check
1034 @raise errors.OpPrereqError: if the node is not vm capable
1037 if not lu.cfg.GetNodeInfo(node).vm_capable:
1038 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1042 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1043 """Ensure that a node supports a given OS.
1045 @param lu: the LU on behalf of which we make the check
1046 @param node: the node to check
1047 @param os_name: the OS to query about
1048 @param force_variant: whether to ignore variant errors
1049 @raise errors.OpPrereqError: if the node is not supporting the OS
1052 result = lu.rpc.call_os_get(node, os_name)
1053 result.Raise("OS '%s' not in supported OS list for node %s" %
1055 prereq=True, ecode=errors.ECODE_INVAL)
1056 if not force_variant:
1057 _CheckOSVariant(result.payload, os_name)
1060 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1061 """Ensure that a node has the given secondary ip.
1063 @type lu: L{LogicalUnit}
1064 @param lu: the LU on behalf of which we make the check
1066 @param node: the node to check
1067 @type secondary_ip: string
1068 @param secondary_ip: the ip to check
1069 @type prereq: boolean
1070 @param prereq: whether to throw a prerequisite or an execute error
1071 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1072 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1075 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1076 result.Raise("Failure checking secondary ip on node %s" % node,
1077 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1078 if not result.payload:
1079 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1080 " please fix and re-run this command" % secondary_ip)
1082 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1084 raise errors.OpExecError(msg)
1087 def _GetClusterDomainSecret():
1088 """Reads the cluster domain secret.
1091 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1095 def _CheckInstanceState(lu, instance, req_states, msg=None):
1096 """Ensure that an instance is in one of the required states.
1098 @param lu: the LU on behalf of which we make the check
1099 @param instance: the instance to check
1100 @param msg: if passed, should be a message to replace the default one
1101 @raise errors.OpPrereqError: if the instance is not in the required state
1105 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1106 if instance.admin_state not in req_states:
1107 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1108 (instance.name, instance.admin_state, msg),
1111 if constants.ADMINST_UP not in req_states:
1112 pnode = instance.primary_node
1113 if not lu.cfg.GetNodeInfo(pnode).offline:
1114 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1115 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1116 prereq=True, ecode=errors.ECODE_ENVIRON)
1117 if instance.name in ins_l.payload:
1118 raise errors.OpPrereqError("Instance %s is running, %s" %
1119 (instance.name, msg), errors.ECODE_STATE)
1121 lu.LogWarning("Primary node offline, ignoring check that instance"
1125 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1126 """Computes if value is in the desired range.
1128 @param name: name of the parameter for which we perform the check
1129 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1131 @param ipolicy: dictionary containing min, max and std values
1132 @param value: actual value that we want to use
1133 @return: None or element not meeting the criteria
1137 if value in [None, constants.VALUE_AUTO]:
1139 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1140 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1141 if value > max_v or min_v > value:
1143 fqn = "%s/%s" % (name, qualifier)
1146 return ("%s value %s is not in range [%s, %s]" %
1147 (fqn, value, min_v, max_v))
1151 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1152 nic_count, disk_sizes, spindle_use,
1153 _compute_fn=_ComputeMinMaxSpec):
1154 """Verifies ipolicy against provided specs.
1157 @param ipolicy: The ipolicy
1159 @param mem_size: The memory size
1160 @type cpu_count: int
1161 @param cpu_count: Used cpu cores
1162 @type disk_count: int
1163 @param disk_count: Number of disks used
1164 @type nic_count: int
1165 @param nic_count: Number of nics used
1166 @type disk_sizes: list of ints
1167 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1168 @type spindle_use: int
1169 @param spindle_use: The number of spindles this instance uses
1170 @param _compute_fn: The compute function (unittest only)
1171 @return: A list of violations, or an empty list of no violations are found
1174 assert disk_count == len(disk_sizes)
1177 (constants.ISPEC_MEM_SIZE, "", mem_size),
1178 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1179 (constants.ISPEC_DISK_COUNT, "", disk_count),
1180 (constants.ISPEC_NIC_COUNT, "", nic_count),
1181 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1182 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1183 for idx, d in enumerate(disk_sizes)]
1186 (_compute_fn(name, qualifier, ipolicy, value)
1187 for (name, qualifier, value) in test_settings))
1190 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1191 _compute_fn=_ComputeIPolicySpecViolation):
1192 """Compute if instance meets the specs of ipolicy.
1195 @param ipolicy: The ipolicy to verify against
1196 @type instance: L{objects.Instance}
1197 @param instance: The instance to verify
1198 @param _compute_fn: The function to verify ipolicy (unittest only)
1199 @see: L{_ComputeIPolicySpecViolation}
1202 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1203 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1204 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1205 disk_count = len(instance.disks)
1206 disk_sizes = [disk.size for disk in instance.disks]
1207 nic_count = len(instance.nics)
1209 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1210 disk_sizes, spindle_use)
1213 def _ComputeIPolicyInstanceSpecViolation(
1214 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1215 """Compute if instance specs meets the specs of ipolicy.
1218 @param ipolicy: The ipolicy to verify against
1219 @param instance_spec: dict
1220 @param instance_spec: The instance spec to verify
1221 @param _compute_fn: The function to verify ipolicy (unittest only)
1222 @see: L{_ComputeIPolicySpecViolation}
1225 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1226 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1227 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1228 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1229 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1230 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1232 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1233 disk_sizes, spindle_use)
1236 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1238 _compute_fn=_ComputeIPolicyInstanceViolation):
1239 """Compute if instance meets the specs of the new target group.
1241 @param ipolicy: The ipolicy to verify
1242 @param instance: The instance object to verify
1243 @param current_group: The current group of the instance
1244 @param target_group: The new group of the instance
1245 @param _compute_fn: The function to verify ipolicy (unittest only)
1246 @see: L{_ComputeIPolicySpecViolation}
1249 if current_group == target_group:
1252 return _compute_fn(ipolicy, instance)
1255 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1256 _compute_fn=_ComputeIPolicyNodeViolation):
1257 """Checks that the target node is correct in terms of instance policy.
1259 @param ipolicy: The ipolicy to verify
1260 @param instance: The instance object to verify
1261 @param node: The new node to relocate
1262 @param ignore: Ignore violations of the ipolicy
1263 @param _compute_fn: The function to verify ipolicy (unittest only)
1264 @see: L{_ComputeIPolicySpecViolation}
1267 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1268 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1271 msg = ("Instance does not meet target node group's (%s) instance"
1272 " policy: %s") % (node.group, utils.CommaJoin(res))
1276 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1279 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1280 """Computes a set of any instances that would violate the new ipolicy.
1282 @param old_ipolicy: The current (still in-place) ipolicy
1283 @param new_ipolicy: The new (to become) ipolicy
1284 @param instances: List of instances to verify
1285 @return: A list of instances which violates the new ipolicy but
1289 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1290 _ComputeViolatingInstances(old_ipolicy, instances))
1293 def _ExpandItemName(fn, name, kind):
1294 """Expand an item name.
1296 @param fn: the function to use for expansion
1297 @param name: requested item name
1298 @param kind: text description ('Node' or 'Instance')
1299 @return: the resolved (full) name
1300 @raise errors.OpPrereqError: if the item is not found
1303 full_name = fn(name)
1304 if full_name is None:
1305 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1310 def _ExpandNodeName(cfg, name):
1311 """Wrapper over L{_ExpandItemName} for nodes."""
1312 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1315 def _ExpandInstanceName(cfg, name):
1316 """Wrapper over L{_ExpandItemName} for instance."""
1317 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1320 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1321 minmem, maxmem, vcpus, nics, disk_template, disks,
1322 bep, hvp, hypervisor_name, tags):
1323 """Builds instance related env variables for hooks
1325 This builds the hook environment from individual variables.
1328 @param name: the name of the instance
1329 @type primary_node: string
1330 @param primary_node: the name of the instance's primary node
1331 @type secondary_nodes: list
1332 @param secondary_nodes: list of secondary nodes as strings
1333 @type os_type: string
1334 @param os_type: the name of the instance's OS
1335 @type status: string
1336 @param status: the desired status of the instance
1337 @type minmem: string
1338 @param minmem: the minimum memory size of the instance
1339 @type maxmem: string
1340 @param maxmem: the maximum memory size of the instance
1342 @param vcpus: the count of VCPUs the instance has
1344 @param nics: list of tuples (ip, mac, mode, link) representing
1345 the NICs the instance has
1346 @type disk_template: string
1347 @param disk_template: the disk template of the instance
1349 @param disks: the list of (size, mode) pairs
1351 @param bep: the backend parameters for the instance
1353 @param hvp: the hypervisor parameters for the instance
1354 @type hypervisor_name: string
1355 @param hypervisor_name: the hypervisor for the instance
1357 @param tags: list of instance tags as strings
1359 @return: the hook environment for this instance
1364 "INSTANCE_NAME": name,
1365 "INSTANCE_PRIMARY": primary_node,
1366 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1367 "INSTANCE_OS_TYPE": os_type,
1368 "INSTANCE_STATUS": status,
1369 "INSTANCE_MINMEM": minmem,
1370 "INSTANCE_MAXMEM": maxmem,
1371 # TODO(2.7) remove deprecated "memory" value
1372 "INSTANCE_MEMORY": maxmem,
1373 "INSTANCE_VCPUS": vcpus,
1374 "INSTANCE_DISK_TEMPLATE": disk_template,
1375 "INSTANCE_HYPERVISOR": hypervisor_name,
1378 nic_count = len(nics)
1379 for idx, (ip, mac, mode, link) in enumerate(nics):
1382 env["INSTANCE_NIC%d_IP" % idx] = ip
1383 env["INSTANCE_NIC%d_MAC" % idx] = mac
1384 env["INSTANCE_NIC%d_MODE" % idx] = mode
1385 env["INSTANCE_NIC%d_LINK" % idx] = link
1386 if mode == constants.NIC_MODE_BRIDGED:
1387 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1391 env["INSTANCE_NIC_COUNT"] = nic_count
1394 disk_count = len(disks)
1395 for idx, (size, mode) in enumerate(disks):
1396 env["INSTANCE_DISK%d_SIZE" % idx] = size
1397 env["INSTANCE_DISK%d_MODE" % idx] = mode
1401 env["INSTANCE_DISK_COUNT"] = disk_count
1406 env["INSTANCE_TAGS"] = " ".join(tags)
1408 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1409 for key, value in source.items():
1410 env["INSTANCE_%s_%s" % (kind, key)] = value
1415 def _NICListToTuple(lu, nics):
1416 """Build a list of nic information tuples.
1418 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1419 value in LUInstanceQueryData.
1421 @type lu: L{LogicalUnit}
1422 @param lu: the logical unit on whose behalf we execute
1423 @type nics: list of L{objects.NIC}
1424 @param nics: list of nics to convert to hooks tuples
1428 cluster = lu.cfg.GetClusterInfo()
1432 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1433 mode = filled_params[constants.NIC_MODE]
1434 link = filled_params[constants.NIC_LINK]
1435 hooks_nics.append((ip, mac, mode, link))
1439 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1440 """Builds instance related env variables for hooks from an object.
1442 @type lu: L{LogicalUnit}
1443 @param lu: the logical unit on whose behalf we execute
1444 @type instance: L{objects.Instance}
1445 @param instance: the instance for which we should build the
1447 @type override: dict
1448 @param override: dictionary with key/values that will override
1451 @return: the hook environment dictionary
1454 cluster = lu.cfg.GetClusterInfo()
1455 bep = cluster.FillBE(instance)
1456 hvp = cluster.FillHV(instance)
1458 "name": instance.name,
1459 "primary_node": instance.primary_node,
1460 "secondary_nodes": instance.secondary_nodes,
1461 "os_type": instance.os,
1462 "status": instance.admin_state,
1463 "maxmem": bep[constants.BE_MAXMEM],
1464 "minmem": bep[constants.BE_MINMEM],
1465 "vcpus": bep[constants.BE_VCPUS],
1466 "nics": _NICListToTuple(lu, instance.nics),
1467 "disk_template": instance.disk_template,
1468 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1471 "hypervisor_name": instance.hypervisor,
1472 "tags": instance.tags,
1475 args.update(override)
1476 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1479 def _AdjustCandidatePool(lu, exceptions):
1480 """Adjust the candidate pool after node operations.
1483 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1485 lu.LogInfo("Promoted nodes to master candidate role: %s",
1486 utils.CommaJoin(node.name for node in mod_list))
1487 for name in mod_list:
1488 lu.context.ReaddNode(name)
1489 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1491 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1495 def _DecideSelfPromotion(lu, exceptions=None):
1496 """Decide whether I should promote myself as a master candidate.
1499 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1500 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1501 # the new node will increase mc_max with one, so:
1502 mc_should = min(mc_should + 1, cp_size)
1503 return mc_now < mc_should
1506 def _CalculateGroupIPolicy(cluster, group):
1507 """Calculate instance policy for group.
1510 return cluster.SimpleFillIPolicy(group.ipolicy)
1513 def _ComputeViolatingInstances(ipolicy, instances):
1514 """Computes a set of instances who violates given ipolicy.
1516 @param ipolicy: The ipolicy to verify
1517 @type instances: object.Instance
1518 @param instances: List of instances to verify
1519 @return: A frozenset of instance names violating the ipolicy
1522 return frozenset([inst.name for inst in instances
1523 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1526 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1527 """Check that the brigdes needed by a list of nics exist.
1530 cluster = lu.cfg.GetClusterInfo()
1531 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1532 brlist = [params[constants.NIC_LINK] for params in paramslist
1533 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1535 result = lu.rpc.call_bridges_exist(target_node, brlist)
1536 result.Raise("Error checking bridges on destination node '%s'" %
1537 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1540 def _CheckInstanceBridgesExist(lu, instance, node=None):
1541 """Check that the brigdes needed by an instance exist.
1545 node = instance.primary_node
1546 _CheckNicsBridgesExist(lu, instance.nics, node)
1549 def _CheckOSVariant(os_obj, name):
1550 """Check whether an OS name conforms to the os variants specification.
1552 @type os_obj: L{objects.OS}
1553 @param os_obj: OS object to check
1555 @param name: OS name passed by the user, to check for validity
1558 variant = objects.OS.GetVariant(name)
1559 if not os_obj.supported_variants:
1561 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1562 " passed)" % (os_obj.name, variant),
1566 raise errors.OpPrereqError("OS name must include a variant",
1569 if variant not in os_obj.supported_variants:
1570 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1573 def _GetNodeInstancesInner(cfg, fn):
1574 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1577 def _GetNodeInstances(cfg, node_name):
1578 """Returns a list of all primary and secondary instances on a node.
1582 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1585 def _GetNodePrimaryInstances(cfg, node_name):
1586 """Returns primary instances on a node.
1589 return _GetNodeInstancesInner(cfg,
1590 lambda inst: node_name == inst.primary_node)
1593 def _GetNodeSecondaryInstances(cfg, node_name):
1594 """Returns secondary instances on a node.
1597 return _GetNodeInstancesInner(cfg,
1598 lambda inst: node_name in inst.secondary_nodes)
1601 def _GetStorageTypeArgs(cfg, storage_type):
1602 """Returns the arguments for a storage type.
1605 # Special case for file storage
1606 if storage_type == constants.ST_FILE:
1607 # storage.FileStorage wants a list of storage directories
1608 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1613 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1616 for dev in instance.disks:
1617 cfg.SetDiskID(dev, node_name)
1619 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1621 result.Raise("Failed to get disk status from node %s" % node_name,
1622 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1624 for idx, bdev_status in enumerate(result.payload):
1625 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1631 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1632 """Check the sanity of iallocator and node arguments and use the
1633 cluster-wide iallocator if appropriate.
1635 Check that at most one of (iallocator, node) is specified. If none is
1636 specified, then the LU's opcode's iallocator slot is filled with the
1637 cluster-wide default iallocator.
1639 @type iallocator_slot: string
1640 @param iallocator_slot: the name of the opcode iallocator slot
1641 @type node_slot: string
1642 @param node_slot: the name of the opcode target node slot
1645 node = getattr(lu.op, node_slot, None)
1646 iallocator = getattr(lu.op, iallocator_slot, None)
1648 if node is not None and iallocator is not None:
1649 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1651 elif node is None and iallocator is None:
1652 default_iallocator = lu.cfg.GetDefaultIAllocator()
1653 if default_iallocator:
1654 setattr(lu.op, iallocator_slot, default_iallocator)
1656 raise errors.OpPrereqError("No iallocator or node given and no"
1657 " cluster-wide default iallocator found;"
1658 " please specify either an iallocator or a"
1659 " node, or set a cluster-wide default"
1660 " iallocator", errors.ECODE_INVAL)
1663 def _GetDefaultIAllocator(cfg, iallocator):
1664 """Decides on which iallocator to use.
1666 @type cfg: L{config.ConfigWriter}
1667 @param cfg: Cluster configuration object
1668 @type iallocator: string or None
1669 @param iallocator: Iallocator specified in opcode
1671 @return: Iallocator name
1675 # Use default iallocator
1676 iallocator = cfg.GetDefaultIAllocator()
1679 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1680 " opcode nor as a cluster-wide default",
1686 class LUClusterPostInit(LogicalUnit):
1687 """Logical unit for running hooks after cluster initialization.
1690 HPATH = "cluster-init"
1691 HTYPE = constants.HTYPE_CLUSTER
1693 def BuildHooksEnv(self):
1698 "OP_TARGET": self.cfg.GetClusterName(),
1701 def BuildHooksNodes(self):
1702 """Build hooks nodes.
1705 return ([], [self.cfg.GetMasterNode()])
1707 def Exec(self, feedback_fn):
1714 class LUClusterDestroy(LogicalUnit):
1715 """Logical unit for destroying the cluster.
1718 HPATH = "cluster-destroy"
1719 HTYPE = constants.HTYPE_CLUSTER
1721 def BuildHooksEnv(self):
1726 "OP_TARGET": self.cfg.GetClusterName(),
1729 def BuildHooksNodes(self):
1730 """Build hooks nodes.
1735 def CheckPrereq(self):
1736 """Check prerequisites.
1738 This checks whether the cluster is empty.
1740 Any errors are signaled by raising errors.OpPrereqError.
1743 master = self.cfg.GetMasterNode()
1745 nodelist = self.cfg.GetNodeList()
1746 if len(nodelist) != 1 or nodelist[0] != master:
1747 raise errors.OpPrereqError("There are still %d node(s) in"
1748 " this cluster." % (len(nodelist) - 1),
1750 instancelist = self.cfg.GetInstanceList()
1752 raise errors.OpPrereqError("There are still %d instance(s) in"
1753 " this cluster." % len(instancelist),
1756 def Exec(self, feedback_fn):
1757 """Destroys the cluster.
1760 master_params = self.cfg.GetMasterNetworkParameters()
1762 # Run post hooks on master node before it's removed
1763 _RunPostHook(self, master_params.name)
1765 ems = self.cfg.GetUseExternalMipScript()
1766 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1769 self.LogWarning("Error disabling the master IP address: %s",
1772 return master_params.name
1775 def _VerifyCertificate(filename):
1776 """Verifies a certificate for L{LUClusterVerifyConfig}.
1778 @type filename: string
1779 @param filename: Path to PEM file
1783 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1784 utils.ReadFile(filename))
1785 except Exception, err: # pylint: disable=W0703
1786 return (LUClusterVerifyConfig.ETYPE_ERROR,
1787 "Failed to load X509 certificate %s: %s" % (filename, err))
1790 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1791 constants.SSL_CERT_EXPIRATION_ERROR)
1794 fnamemsg = "While verifying %s: %s" % (filename, msg)
1799 return (None, fnamemsg)
1800 elif errcode == utils.CERT_WARNING:
1801 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1802 elif errcode == utils.CERT_ERROR:
1803 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1805 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1808 def _GetAllHypervisorParameters(cluster, instances):
1809 """Compute the set of all hypervisor parameters.
1811 @type cluster: L{objects.Cluster}
1812 @param cluster: the cluster object
1813 @param instances: list of L{objects.Instance}
1814 @param instances: additional instances from which to obtain parameters
1815 @rtype: list of (origin, hypervisor, parameters)
1816 @return: a list with all parameters found, indicating the hypervisor they
1817 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1822 for hv_name in cluster.enabled_hypervisors:
1823 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1825 for os_name, os_hvp in cluster.os_hvp.items():
1826 for hv_name, hv_params in os_hvp.items():
1828 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1829 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1831 # TODO: collapse identical parameter values in a single one
1832 for instance in instances:
1833 if instance.hvparams:
1834 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1835 cluster.FillHV(instance)))
1840 class _VerifyErrors(object):
1841 """Mix-in for cluster/group verify LUs.
1843 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1844 self.op and self._feedback_fn to be available.)
1848 ETYPE_FIELD = "code"
1849 ETYPE_ERROR = "ERROR"
1850 ETYPE_WARNING = "WARNING"
1852 def _Error(self, ecode, item, msg, *args, **kwargs):
1853 """Format an error message.
1855 Based on the opcode's error_codes parameter, either format a
1856 parseable error code, or a simpler error string.
1858 This must be called only from Exec and functions called from Exec.
1861 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1862 itype, etxt, _ = ecode
1863 # first complete the msg
1866 # then format the whole message
1867 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1868 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1874 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1875 # and finally report it via the feedback_fn
1876 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1878 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1879 """Log an error message if the passed condition is True.
1883 or self.op.debug_simulate_errors) # pylint: disable=E1101
1885 # If the error code is in the list of ignored errors, demote the error to a
1887 (_, etxt, _) = ecode
1888 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1889 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1892 self._Error(ecode, *args, **kwargs)
1894 # do not mark the operation as failed for WARN cases only
1895 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1896 self.bad = self.bad or cond
1899 class LUClusterVerify(NoHooksLU):
1900 """Submits all jobs necessary to verify the cluster.
1905 def ExpandNames(self):
1906 self.needed_locks = {}
1908 def Exec(self, feedback_fn):
1911 if self.op.group_name:
1912 groups = [self.op.group_name]
1913 depends_fn = lambda: None
1915 groups = self.cfg.GetNodeGroupList()
1917 # Verify global configuration
1919 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1922 # Always depend on global verification
1923 depends_fn = lambda: [(-len(jobs), [])]
1926 [opcodes.OpClusterVerifyGroup(group_name=group,
1927 ignore_errors=self.op.ignore_errors,
1928 depends=depends_fn())]
1929 for group in groups)
1931 # Fix up all parameters
1932 for op in itertools.chain(*jobs): # pylint: disable=W0142
1933 op.debug_simulate_errors = self.op.debug_simulate_errors
1934 op.verbose = self.op.verbose
1935 op.error_codes = self.op.error_codes
1937 op.skip_checks = self.op.skip_checks
1938 except AttributeError:
1939 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1941 return ResultWithJobs(jobs)
1944 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1945 """Verifies the cluster config.
1950 def _VerifyHVP(self, hvp_data):
1951 """Verifies locally the syntax of the hypervisor parameters.
1954 for item, hv_name, hv_params in hvp_data:
1955 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1958 hv_class = hypervisor.GetHypervisor(hv_name)
1959 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1960 hv_class.CheckParameterSyntax(hv_params)
1961 except errors.GenericError, err:
1962 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1964 def ExpandNames(self):
1965 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1966 self.share_locks = _ShareAll()
1968 def CheckPrereq(self):
1969 """Check prerequisites.
1972 # Retrieve all information
1973 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1974 self.all_node_info = self.cfg.GetAllNodesInfo()
1975 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1977 def Exec(self, feedback_fn):
1978 """Verify integrity of cluster, performing various test on nodes.
1982 self._feedback_fn = feedback_fn
1984 feedback_fn("* Verifying cluster config")
1986 for msg in self.cfg.VerifyConfig():
1987 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1989 feedback_fn("* Verifying cluster certificate files")
1991 for cert_filename in constants.ALL_CERT_FILES:
1992 (errcode, msg) = _VerifyCertificate(cert_filename)
1993 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1995 feedback_fn("* Verifying hypervisor parameters")
1997 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1998 self.all_inst_info.values()))
2000 feedback_fn("* Verifying all nodes belong to an existing group")
2002 # We do this verification here because, should this bogus circumstance
2003 # occur, it would never be caught by VerifyGroup, which only acts on
2004 # nodes/instances reachable from existing node groups.
2006 dangling_nodes = set(node.name for node in self.all_node_info.values()
2007 if node.group not in self.all_group_info)
2009 dangling_instances = {}
2010 no_node_instances = []
2012 for inst in self.all_inst_info.values():
2013 if inst.primary_node in dangling_nodes:
2014 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2015 elif inst.primary_node not in self.all_node_info:
2016 no_node_instances.append(inst.name)
2021 utils.CommaJoin(dangling_instances.get(node.name,
2023 for node in dangling_nodes]
2025 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2027 "the following nodes (and their instances) belong to a non"
2028 " existing group: %s", utils.CommaJoin(pretty_dangling))
2030 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2032 "the following instances have a non-existing primary-node:"
2033 " %s", utils.CommaJoin(no_node_instances))
2038 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2039 """Verifies the status of a node group.
2042 HPATH = "cluster-verify"
2043 HTYPE = constants.HTYPE_CLUSTER
2046 _HOOKS_INDENT_RE = re.compile("^", re.M)
2048 class NodeImage(object):
2049 """A class representing the logical and physical status of a node.
2052 @ivar name: the node name to which this object refers
2053 @ivar volumes: a structure as returned from
2054 L{ganeti.backend.GetVolumeList} (runtime)
2055 @ivar instances: a list of running instances (runtime)
2056 @ivar pinst: list of configured primary instances (config)
2057 @ivar sinst: list of configured secondary instances (config)
2058 @ivar sbp: dictionary of {primary-node: list of instances} for all
2059 instances for which this node is secondary (config)
2060 @ivar mfree: free memory, as reported by hypervisor (runtime)
2061 @ivar dfree: free disk, as reported by the node (runtime)
2062 @ivar offline: the offline status (config)
2063 @type rpc_fail: boolean
2064 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2065 not whether the individual keys were correct) (runtime)
2066 @type lvm_fail: boolean
2067 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2068 @type hyp_fail: boolean
2069 @ivar hyp_fail: whether the RPC call didn't return the instance list
2070 @type ghost: boolean
2071 @ivar ghost: whether this is a known node or not (config)
2072 @type os_fail: boolean
2073 @ivar os_fail: whether the RPC call didn't return valid OS data
2075 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2076 @type vm_capable: boolean
2077 @ivar vm_capable: whether the node can host instances
2080 def __init__(self, offline=False, name=None, vm_capable=True):
2089 self.offline = offline
2090 self.vm_capable = vm_capable
2091 self.rpc_fail = False
2092 self.lvm_fail = False
2093 self.hyp_fail = False
2095 self.os_fail = False
2098 def ExpandNames(self):
2099 # This raises errors.OpPrereqError on its own:
2100 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2102 # Get instances in node group; this is unsafe and needs verification later
2104 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2106 self.needed_locks = {
2107 locking.LEVEL_INSTANCE: inst_names,
2108 locking.LEVEL_NODEGROUP: [self.group_uuid],
2109 locking.LEVEL_NODE: [],
2112 self.share_locks = _ShareAll()
2114 def DeclareLocks(self, level):
2115 if level == locking.LEVEL_NODE:
2116 # Get members of node group; this is unsafe and needs verification later
2117 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2119 all_inst_info = self.cfg.GetAllInstancesInfo()
2121 # In Exec(), we warn about mirrored instances that have primary and
2122 # secondary living in separate node groups. To fully verify that
2123 # volumes for these instances are healthy, we will need to do an
2124 # extra call to their secondaries. We ensure here those nodes will
2126 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2127 # Important: access only the instances whose lock is owned
2128 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2129 nodes.update(all_inst_info[inst].secondary_nodes)
2131 self.needed_locks[locking.LEVEL_NODE] = nodes
2133 def CheckPrereq(self):
2134 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2135 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2137 group_nodes = set(self.group_info.members)
2139 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2142 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2144 unlocked_instances = \
2145 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2148 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2149 utils.CommaJoin(unlocked_nodes),
2152 if unlocked_instances:
2153 raise errors.OpPrereqError("Missing lock for instances: %s" %
2154 utils.CommaJoin(unlocked_instances),
2157 self.all_node_info = self.cfg.GetAllNodesInfo()
2158 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2160 self.my_node_names = utils.NiceSort(group_nodes)
2161 self.my_inst_names = utils.NiceSort(group_instances)
2163 self.my_node_info = dict((name, self.all_node_info[name])
2164 for name in self.my_node_names)
2166 self.my_inst_info = dict((name, self.all_inst_info[name])
2167 for name in self.my_inst_names)
2169 # We detect here the nodes that will need the extra RPC calls for verifying
2170 # split LV volumes; they should be locked.
2171 extra_lv_nodes = set()
2173 for inst in self.my_inst_info.values():
2174 if inst.disk_template in constants.DTS_INT_MIRROR:
2175 for nname in inst.all_nodes:
2176 if self.all_node_info[nname].group != self.group_uuid:
2177 extra_lv_nodes.add(nname)
2179 unlocked_lv_nodes = \
2180 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2182 if unlocked_lv_nodes:
2183 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2184 utils.CommaJoin(unlocked_lv_nodes),
2186 self.extra_lv_nodes = list(extra_lv_nodes)
2188 def _VerifyNode(self, ninfo, nresult):
2189 """Perform some basic validation on data returned from a node.
2191 - check the result data structure is well formed and has all the
2193 - check ganeti version
2195 @type ninfo: L{objects.Node}
2196 @param ninfo: the node to check
2197 @param nresult: the results from the node
2199 @return: whether overall this call was successful (and we can expect
2200 reasonable values in the respose)
2204 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2206 # main result, nresult should be a non-empty dict
2207 test = not nresult or not isinstance(nresult, dict)
2208 _ErrorIf(test, constants.CV_ENODERPC, node,
2209 "unable to verify node: no data returned")
2213 # compares ganeti version
2214 local_version = constants.PROTOCOL_VERSION
2215 remote_version = nresult.get("version", None)
2216 test = not (remote_version and
2217 isinstance(remote_version, (list, tuple)) and
2218 len(remote_version) == 2)
2219 _ErrorIf(test, constants.CV_ENODERPC, node,
2220 "connection to node returned invalid data")
2224 test = local_version != remote_version[0]
2225 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2226 "incompatible protocol versions: master %s,"
2227 " node %s", local_version, remote_version[0])
2231 # node seems compatible, we can actually try to look into its results
2233 # full package version
2234 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2235 constants.CV_ENODEVERSION, node,
2236 "software version mismatch: master %s, node %s",
2237 constants.RELEASE_VERSION, remote_version[1],
2238 code=self.ETYPE_WARNING)
2240 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2241 if ninfo.vm_capable and isinstance(hyp_result, dict):
2242 for hv_name, hv_result in hyp_result.iteritems():
2243 test = hv_result is not None
2244 _ErrorIf(test, constants.CV_ENODEHV, node,
2245 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2247 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2248 if ninfo.vm_capable and isinstance(hvp_result, list):
2249 for item, hv_name, hv_result in hvp_result:
2250 _ErrorIf(True, constants.CV_ENODEHV, node,
2251 "hypervisor %s parameter verify failure (source %s): %s",
2252 hv_name, item, hv_result)
2254 test = nresult.get(constants.NV_NODESETUP,
2255 ["Missing NODESETUP results"])
2256 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2261 def _VerifyNodeTime(self, ninfo, nresult,
2262 nvinfo_starttime, nvinfo_endtime):
2263 """Check the node time.
2265 @type ninfo: L{objects.Node}
2266 @param ninfo: the node to check
2267 @param nresult: the remote results for the node
2268 @param nvinfo_starttime: the start time of the RPC call
2269 @param nvinfo_endtime: the end time of the RPC call
2273 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2275 ntime = nresult.get(constants.NV_TIME, None)
2277 ntime_merged = utils.MergeTime(ntime)
2278 except (ValueError, TypeError):
2279 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2282 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2283 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2284 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2285 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2289 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2290 "Node time diverges by at least %s from master node time",
2293 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2294 """Check the node LVM results.
2296 @type ninfo: L{objects.Node}
2297 @param ninfo: the node to check
2298 @param nresult: the remote results for the node
2299 @param vg_name: the configured VG name
2306 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2308 # checks vg existence and size > 20G
2309 vglist = nresult.get(constants.NV_VGLIST, None)
2311 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2313 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2314 constants.MIN_VG_SIZE)
2315 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2318 pvlist = nresult.get(constants.NV_PVLIST, None)
2319 test = pvlist is None
2320 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2322 # check that ':' is not present in PV names, since it's a
2323 # special character for lvcreate (denotes the range of PEs to
2325 for _, pvname, owner_vg in pvlist:
2326 test = ":" in pvname
2327 _ErrorIf(test, constants.CV_ENODELVM, node,
2328 "Invalid character ':' in PV '%s' of VG '%s'",
2331 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2332 """Check the node bridges.
2334 @type ninfo: L{objects.Node}
2335 @param ninfo: the node to check
2336 @param nresult: the remote results for the node
2337 @param bridges: the expected list of bridges
2344 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2346 missing = nresult.get(constants.NV_BRIDGES, None)
2347 test = not isinstance(missing, list)
2348 _ErrorIf(test, constants.CV_ENODENET, node,
2349 "did not return valid bridge information")
2351 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2352 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2354 def _VerifyNodeUserScripts(self, ninfo, nresult):
2355 """Check the results of user scripts presence and executability on the node
2357 @type ninfo: L{objects.Node}
2358 @param ninfo: the node to check
2359 @param nresult: the remote results for the node
2364 test = not constants.NV_USERSCRIPTS in nresult
2365 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2366 "did not return user scripts information")
2368 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2370 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2371 "user scripts not present or not executable: %s" %
2372 utils.CommaJoin(sorted(broken_scripts)))
2374 def _VerifyNodeNetwork(self, ninfo, nresult):
2375 """Check the node network connectivity results.
2377 @type ninfo: L{objects.Node}
2378 @param ninfo: the node to check
2379 @param nresult: the remote results for the node
2383 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2385 test = constants.NV_NODELIST not in nresult
2386 _ErrorIf(test, constants.CV_ENODESSH, node,
2387 "node hasn't returned node ssh connectivity data")
2389 if nresult[constants.NV_NODELIST]:
2390 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2391 _ErrorIf(True, constants.CV_ENODESSH, node,
2392 "ssh communication with node '%s': %s", a_node, a_msg)
2394 test = constants.NV_NODENETTEST not in nresult
2395 _ErrorIf(test, constants.CV_ENODENET, node,
2396 "node hasn't returned node tcp connectivity data")
2398 if nresult[constants.NV_NODENETTEST]:
2399 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2401 _ErrorIf(True, constants.CV_ENODENET, node,
2402 "tcp communication with node '%s': %s",
2403 anode, nresult[constants.NV_NODENETTEST][anode])
2405 test = constants.NV_MASTERIP not in nresult
2406 _ErrorIf(test, constants.CV_ENODENET, node,
2407 "node hasn't returned node master IP reachability data")
2409 if not nresult[constants.NV_MASTERIP]:
2410 if node == self.master_node:
2411 msg = "the master node cannot reach the master IP (not configured?)"
2413 msg = "cannot reach the master IP"
2414 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2416 def _VerifyInstance(self, instance, instanceconfig, node_image,
2418 """Verify an instance.
2420 This function checks to see if the required block devices are
2421 available on the instance's node.
2424 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2425 node_current = instanceconfig.primary_node
2427 node_vol_should = {}
2428 instanceconfig.MapLVsByNode(node_vol_should)
2430 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2431 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2432 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2434 for node in node_vol_should:
2435 n_img = node_image[node]
2436 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2437 # ignore missing volumes on offline or broken nodes
2439 for volume in node_vol_should[node]:
2440 test = volume not in n_img.volumes
2441 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2442 "volume %s missing on node %s", volume, node)
2444 if instanceconfig.admin_state == constants.ADMINST_UP:
2445 pri_img = node_image[node_current]
2446 test = instance not in pri_img.instances and not pri_img.offline
2447 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2448 "instance not running on its primary node %s",
2451 diskdata = [(nname, success, status, idx)
2452 for (nname, disks) in diskstatus.items()
2453 for idx, (success, status) in enumerate(disks)]
2455 for nname, success, bdev_status, idx in diskdata:
2456 # the 'ghost node' construction in Exec() ensures that we have a
2458 snode = node_image[nname]
2459 bad_snode = snode.ghost or snode.offline
2460 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2461 not success and not bad_snode,
2462 constants.CV_EINSTANCEFAULTYDISK, instance,
2463 "couldn't retrieve status for disk/%s on %s: %s",
2464 idx, nname, bdev_status)
2465 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2466 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2467 constants.CV_EINSTANCEFAULTYDISK, instance,
2468 "disk/%s on %s is faulty", idx, nname)
2470 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2471 """Verify if there are any unknown volumes in the cluster.
2473 The .os, .swap and backup volumes are ignored. All other volumes are
2474 reported as unknown.
2476 @type reserved: L{ganeti.utils.FieldSet}
2477 @param reserved: a FieldSet of reserved volume names
2480 for node, n_img in node_image.items():
2481 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2482 self.all_node_info[node].group != self.group_uuid):
2483 # skip non-healthy nodes
2485 for volume in n_img.volumes:
2486 test = ((node not in node_vol_should or
2487 volume not in node_vol_should[node]) and
2488 not reserved.Matches(volume))
2489 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2490 "volume %s is unknown", volume)
2492 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2493 """Verify N+1 Memory Resilience.
2495 Check that if one single node dies we can still start all the
2496 instances it was primary for.
2499 cluster_info = self.cfg.GetClusterInfo()
2500 for node, n_img in node_image.items():
2501 # This code checks that every node which is now listed as
2502 # secondary has enough memory to host all instances it is
2503 # supposed to should a single other node in the cluster fail.
2504 # FIXME: not ready for failover to an arbitrary node
2505 # FIXME: does not support file-backed instances
2506 # WARNING: we currently take into account down instances as well
2507 # as up ones, considering that even if they're down someone
2508 # might want to start them even in the event of a node failure.
2509 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2510 # we're skipping nodes marked offline and nodes in other groups from
2511 # the N+1 warning, since most likely we don't have good memory
2512 # infromation from them; we already list instances living on such
2513 # nodes, and that's enough warning
2515 #TODO(dynmem): also consider ballooning out other instances
2516 for prinode, instances in n_img.sbp.items():
2518 for instance in instances:
2519 bep = cluster_info.FillBE(instance_cfg[instance])
2520 if bep[constants.BE_AUTO_BALANCE]:
2521 needed_mem += bep[constants.BE_MINMEM]
2522 test = n_img.mfree < needed_mem
2523 self._ErrorIf(test, constants.CV_ENODEN1, node,
2524 "not enough memory to accomodate instance failovers"
2525 " should node %s fail (%dMiB needed, %dMiB available)",
2526 prinode, needed_mem, n_img.mfree)
2529 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2530 (files_all, files_opt, files_mc, files_vm)):
2531 """Verifies file checksums collected from all nodes.
2533 @param errorif: Callback for reporting errors
2534 @param nodeinfo: List of L{objects.Node} objects
2535 @param master_node: Name of master node
2536 @param all_nvinfo: RPC results
2539 # Define functions determining which nodes to consider for a file
2542 (files_mc, lambda node: (node.master_candidate or
2543 node.name == master_node)),
2544 (files_vm, lambda node: node.vm_capable),
2547 # Build mapping from filename to list of nodes which should have the file
2549 for (files, fn) in files2nodefn:
2551 filenodes = nodeinfo
2553 filenodes = filter(fn, nodeinfo)
2554 nodefiles.update((filename,
2555 frozenset(map(operator.attrgetter("name"), filenodes)))
2556 for filename in files)
2558 assert set(nodefiles) == (files_all | files_mc | files_vm)
2560 fileinfo = dict((filename, {}) for filename in nodefiles)
2561 ignore_nodes = set()
2563 for node in nodeinfo:
2565 ignore_nodes.add(node.name)
2568 nresult = all_nvinfo[node.name]
2570 if nresult.fail_msg or not nresult.payload:
2573 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2575 test = not (node_files and isinstance(node_files, dict))
2576 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2577 "Node did not return file checksum data")
2579 ignore_nodes.add(node.name)
2582 # Build per-checksum mapping from filename to nodes having it
2583 for (filename, checksum) in node_files.items():
2584 assert filename in nodefiles
2585 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2587 for (filename, checksums) in fileinfo.items():
2588 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2590 # Nodes having the file
2591 with_file = frozenset(node_name
2592 for nodes in fileinfo[filename].values()
2593 for node_name in nodes) - ignore_nodes
2595 expected_nodes = nodefiles[filename] - ignore_nodes
2597 # Nodes missing file
2598 missing_file = expected_nodes - with_file
2600 if filename in files_opt:
2602 errorif(missing_file and missing_file != expected_nodes,
2603 constants.CV_ECLUSTERFILECHECK, None,
2604 "File %s is optional, but it must exist on all or no"
2605 " nodes (not found on %s)",
2606 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2608 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2609 "File %s is missing from node(s) %s", filename,
2610 utils.CommaJoin(utils.NiceSort(missing_file)))
2612 # Warn if a node has a file it shouldn't
2613 unexpected = with_file - expected_nodes
2615 constants.CV_ECLUSTERFILECHECK, None,
2616 "File %s should not exist on node(s) %s",
2617 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2619 # See if there are multiple versions of the file
2620 test = len(checksums) > 1
2622 variants = ["variant %s on %s" %
2623 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2624 for (idx, (checksum, nodes)) in
2625 enumerate(sorted(checksums.items()))]
2629 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2630 "File %s found with %s different checksums (%s)",
2631 filename, len(checksums), "; ".join(variants))
2633 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2635 """Verifies and the node DRBD status.
2637 @type ninfo: L{objects.Node}
2638 @param ninfo: the node to check
2639 @param nresult: the remote results for the node
2640 @param instanceinfo: the dict of instances
2641 @param drbd_helper: the configured DRBD usermode helper
2642 @param drbd_map: the DRBD map as returned by
2643 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2647 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2650 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2651 test = (helper_result is None)
2652 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2653 "no drbd usermode helper returned")
2655 status, payload = helper_result
2657 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2658 "drbd usermode helper check unsuccessful: %s", payload)
2659 test = status and (payload != drbd_helper)
2660 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2661 "wrong drbd usermode helper: %s", payload)
2663 # compute the DRBD minors
2665 for minor, instance in drbd_map[node].items():
2666 test = instance not in instanceinfo
2667 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2668 "ghost instance '%s' in temporary DRBD map", instance)
2669 # ghost instance should not be running, but otherwise we
2670 # don't give double warnings (both ghost instance and
2671 # unallocated minor in use)
2673 node_drbd[minor] = (instance, False)
2675 instance = instanceinfo[instance]
2676 node_drbd[minor] = (instance.name,
2677 instance.admin_state == constants.ADMINST_UP)
2679 # and now check them
2680 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2681 test = not isinstance(used_minors, (tuple, list))
2682 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2683 "cannot parse drbd status file: %s", str(used_minors))
2685 # we cannot check drbd status
2688 for minor, (iname, must_exist) in node_drbd.items():
2689 test = minor not in used_minors and must_exist
2690 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2691 "drbd minor %d of instance %s is not active", minor, iname)
2692 for minor in used_minors:
2693 test = minor not in node_drbd
2694 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2695 "unallocated drbd minor %d is in use", minor)
2697 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2698 """Builds the node OS structures.
2700 @type ninfo: L{objects.Node}
2701 @param ninfo: the node to check
2702 @param nresult: the remote results for the node
2703 @param nimg: the node image object
2707 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2709 remote_os = nresult.get(constants.NV_OSLIST, None)
2710 test = (not isinstance(remote_os, list) or
2711 not compat.all(isinstance(v, list) and len(v) == 7
2712 for v in remote_os))
2714 _ErrorIf(test, constants.CV_ENODEOS, node,
2715 "node hasn't returned valid OS data")
2724 for (name, os_path, status, diagnose,
2725 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2727 if name not in os_dict:
2730 # parameters is a list of lists instead of list of tuples due to
2731 # JSON lacking a real tuple type, fix it:
2732 parameters = [tuple(v) for v in parameters]
2733 os_dict[name].append((os_path, status, diagnose,
2734 set(variants), set(parameters), set(api_ver)))
2736 nimg.oslist = os_dict
2738 def _VerifyNodeOS(self, ninfo, nimg, base):
2739 """Verifies the node OS list.
2741 @type ninfo: L{objects.Node}
2742 @param ninfo: the node to check
2743 @param nimg: the node image object
2744 @param base: the 'template' node we match against (e.g. from the master)
2748 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2750 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2752 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2753 for os_name, os_data in nimg.oslist.items():
2754 assert os_data, "Empty OS status for OS %s?!" % os_name
2755 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2756 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2757 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2758 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2759 "OS '%s' has multiple entries (first one shadows the rest): %s",
2760 os_name, utils.CommaJoin([v[0] for v in os_data]))
2761 # comparisons with the 'base' image
2762 test = os_name not in base.oslist
2763 _ErrorIf(test, constants.CV_ENODEOS, node,
2764 "Extra OS %s not present on reference node (%s)",
2768 assert base.oslist[os_name], "Base node has empty OS status?"
2769 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2771 # base OS is invalid, skipping
2773 for kind, a, b in [("API version", f_api, b_api),
2774 ("variants list", f_var, b_var),
2775 ("parameters", beautify_params(f_param),
2776 beautify_params(b_param))]:
2777 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2778 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2779 kind, os_name, base.name,
2780 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2782 # check any missing OSes
2783 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2784 _ErrorIf(missing, constants.CV_ENODEOS, node,
2785 "OSes present on reference node %s but missing on this node: %s",
2786 base.name, utils.CommaJoin(missing))
2788 def _VerifyOob(self, ninfo, nresult):
2789 """Verifies out of band functionality of a node.
2791 @type ninfo: L{objects.Node}
2792 @param ninfo: the node to check
2793 @param nresult: the remote results for the node
2797 # We just have to verify the paths on master and/or master candidates
2798 # as the oob helper is invoked on the master
2799 if ((ninfo.master_candidate or ninfo.master_capable) and
2800 constants.NV_OOB_PATHS in nresult):
2801 for path_result in nresult[constants.NV_OOB_PATHS]:
2802 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2804 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2805 """Verifies and updates the node volume data.
2807 This function will update a L{NodeImage}'s internal structures
2808 with data from the remote call.
2810 @type ninfo: L{objects.Node}
2811 @param ninfo: the node to check
2812 @param nresult: the remote results for the node
2813 @param nimg: the node image object
2814 @param vg_name: the configured VG name
2818 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2820 nimg.lvm_fail = True
2821 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2824 elif isinstance(lvdata, basestring):
2825 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2826 utils.SafeEncode(lvdata))
2827 elif not isinstance(lvdata, dict):
2828 _ErrorIf(True, constants.CV_ENODELVM, node,
2829 "rpc call to node failed (lvlist)")
2831 nimg.volumes = lvdata
2832 nimg.lvm_fail = False
2834 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2835 """Verifies and updates the node instance list.
2837 If the listing was successful, then updates this node's instance
2838 list. Otherwise, it marks the RPC call as failed for the instance
2841 @type ninfo: L{objects.Node}
2842 @param ninfo: the node to check
2843 @param nresult: the remote results for the node
2844 @param nimg: the node image object
2847 idata = nresult.get(constants.NV_INSTANCELIST, None)
2848 test = not isinstance(idata, list)
2849 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2850 "rpc call to node failed (instancelist): %s",
2851 utils.SafeEncode(str(idata)))
2853 nimg.hyp_fail = True
2855 nimg.instances = idata
2857 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2858 """Verifies and computes a node information map
2860 @type ninfo: L{objects.Node}
2861 @param ninfo: the node to check
2862 @param nresult: the remote results for the node
2863 @param nimg: the node image object
2864 @param vg_name: the configured VG name
2868 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2870 # try to read free memory (from the hypervisor)
2871 hv_info = nresult.get(constants.NV_HVINFO, None)
2872 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2873 _ErrorIf(test, constants.CV_ENODEHV, node,
2874 "rpc call to node failed (hvinfo)")
2877 nimg.mfree = int(hv_info["memory_free"])
2878 except (ValueError, TypeError):
2879 _ErrorIf(True, constants.CV_ENODERPC, node,
2880 "node returned invalid nodeinfo, check hypervisor")
2882 # FIXME: devise a free space model for file based instances as well
2883 if vg_name is not None:
2884 test = (constants.NV_VGLIST not in nresult or
2885 vg_name not in nresult[constants.NV_VGLIST])
2886 _ErrorIf(test, constants.CV_ENODELVM, node,
2887 "node didn't return data for the volume group '%s'"
2888 " - it is either missing or broken", vg_name)
2891 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2892 except (ValueError, TypeError):
2893 _ErrorIf(True, constants.CV_ENODERPC, node,
2894 "node returned invalid LVM info, check LVM status")
2896 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2897 """Gets per-disk status information for all instances.
2899 @type nodelist: list of strings
2900 @param nodelist: Node names
2901 @type node_image: dict of (name, L{objects.Node})
2902 @param node_image: Node objects
2903 @type instanceinfo: dict of (name, L{objects.Instance})
2904 @param instanceinfo: Instance objects
2905 @rtype: {instance: {node: [(succes, payload)]}}
2906 @return: a dictionary of per-instance dictionaries with nodes as
2907 keys and disk information as values; the disk information is a
2908 list of tuples (success, payload)
2911 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2914 node_disks_devonly = {}
2915 diskless_instances = set()
2916 diskless = constants.DT_DISKLESS
2918 for nname in nodelist:
2919 node_instances = list(itertools.chain(node_image[nname].pinst,
2920 node_image[nname].sinst))
2921 diskless_instances.update(inst for inst in node_instances
2922 if instanceinfo[inst].disk_template == diskless)
2923 disks = [(inst, disk)
2924 for inst in node_instances
2925 for disk in instanceinfo[inst].disks]
2928 # No need to collect data
2931 node_disks[nname] = disks
2933 # _AnnotateDiskParams makes already copies of the disks
2935 for (inst, dev) in disks:
2936 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2937 self.cfg.SetDiskID(anno_disk, nname)
2938 devonly.append(anno_disk)
2940 node_disks_devonly[nname] = devonly
2942 assert len(node_disks) == len(node_disks_devonly)
2944 # Collect data from all nodes with disks
2945 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2948 assert len(result) == len(node_disks)
2952 for (nname, nres) in result.items():
2953 disks = node_disks[nname]
2956 # No data from this node
2957 data = len(disks) * [(False, "node offline")]
2960 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2961 "while getting disk information: %s", msg)
2963 # No data from this node
2964 data = len(disks) * [(False, msg)]
2967 for idx, i in enumerate(nres.payload):
2968 if isinstance(i, (tuple, list)) and len(i) == 2:
2971 logging.warning("Invalid result from node %s, entry %d: %s",
2973 data.append((False, "Invalid result from the remote node"))
2975 for ((inst, _), status) in zip(disks, data):
2976 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2978 # Add empty entries for diskless instances.
2979 for inst in diskless_instances:
2980 assert inst not in instdisk
2983 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2984 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2985 compat.all(isinstance(s, (tuple, list)) and
2986 len(s) == 2 for s in statuses)
2987 for inst, nnames in instdisk.items()
2988 for nname, statuses in nnames.items())
2989 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2994 def _SshNodeSelector(group_uuid, all_nodes):
2995 """Create endless iterators for all potential SSH check hosts.
2998 nodes = [node for node in all_nodes
2999 if (node.group != group_uuid and
3001 keyfunc = operator.attrgetter("group")
3003 return map(itertools.cycle,
3004 [sorted(map(operator.attrgetter("name"), names))
3005 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3009 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3010 """Choose which nodes should talk to which other nodes.
3012 We will make nodes contact all nodes in their group, and one node from
3015 @warning: This algorithm has a known issue if one node group is much
3016 smaller than others (e.g. just one node). In such a case all other
3017 nodes will talk to the single node.
3020 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3021 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3023 return (online_nodes,
3024 dict((name, sorted([i.next() for i in sel]))
3025 for name in online_nodes))
3027 def BuildHooksEnv(self):
3030 Cluster-Verify hooks just ran in the post phase and their failure makes
3031 the output be logged in the verify output and the verification to fail.
3035 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3038 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3039 for node in self.my_node_info.values())
3043 def BuildHooksNodes(self):
3044 """Build hooks nodes.
3047 return ([], self.my_node_names)
3049 def Exec(self, feedback_fn):
3050 """Verify integrity of the node group, performing various test on nodes.
3053 # This method has too many local variables. pylint: disable=R0914
3054 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3056 if not self.my_node_names:
3058 feedback_fn("* Empty node group, skipping verification")
3062 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3063 verbose = self.op.verbose
3064 self._feedback_fn = feedback_fn
3066 vg_name = self.cfg.GetVGName()
3067 drbd_helper = self.cfg.GetDRBDHelper()
3068 cluster = self.cfg.GetClusterInfo()
3069 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3070 hypervisors = cluster.enabled_hypervisors
3071 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3073 i_non_redundant = [] # Non redundant instances
3074 i_non_a_balanced = [] # Non auto-balanced instances
3075 i_offline = 0 # Count of offline instances
3076 n_offline = 0 # Count of offline nodes
3077 n_drained = 0 # Count of nodes being drained
3078 node_vol_should = {}
3080 # FIXME: verify OS list
3083 filemap = _ComputeAncillaryFiles(cluster, False)
3085 # do local checksums
3086 master_node = self.master_node = self.cfg.GetMasterNode()
3087 master_ip = self.cfg.GetMasterIP()
3089 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3092 if self.cfg.GetUseExternalMipScript():
3093 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3095 node_verify_param = {
3096 constants.NV_FILELIST:
3097 utils.UniqueSequence(filename
3098 for files in filemap
3099 for filename in files),
3100 constants.NV_NODELIST:
3101 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3102 self.all_node_info.values()),
3103 constants.NV_HYPERVISOR: hypervisors,
3104 constants.NV_HVPARAMS:
3105 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3106 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3107 for node in node_data_list
3108 if not node.offline],
3109 constants.NV_INSTANCELIST: hypervisors,
3110 constants.NV_VERSION: None,
3111 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3112 constants.NV_NODESETUP: None,
3113 constants.NV_TIME: None,
3114 constants.NV_MASTERIP: (master_node, master_ip),
3115 constants.NV_OSLIST: None,
3116 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3117 constants.NV_USERSCRIPTS: user_scripts,
3120 if vg_name is not None:
3121 node_verify_param[constants.NV_VGLIST] = None
3122 node_verify_param[constants.NV_LVLIST] = vg_name
3123 node_verify_param[constants.NV_PVLIST] = [vg_name]
3124 node_verify_param[constants.NV_DRBDLIST] = None
3127 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3130 # FIXME: this needs to be changed per node-group, not cluster-wide
3132 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3133 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3134 bridges.add(default_nicpp[constants.NIC_LINK])
3135 for instance in self.my_inst_info.values():
3136 for nic in instance.nics:
3137 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3138 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3139 bridges.add(full_nic[constants.NIC_LINK])
3142 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3144 # Build our expected cluster state
3145 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3147 vm_capable=node.vm_capable))
3148 for node in node_data_list)
3152 for node in self.all_node_info.values():
3153 path = _SupportsOob(self.cfg, node)
3154 if path and path not in oob_paths:
3155 oob_paths.append(path)
3158 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3160 for instance in self.my_inst_names:
3161 inst_config = self.my_inst_info[instance]
3162 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3165 for nname in inst_config.all_nodes:
3166 if nname not in node_image:
3167 gnode = self.NodeImage(name=nname)
3168 gnode.ghost = (nname not in self.all_node_info)
3169 node_image[nname] = gnode
3171 inst_config.MapLVsByNode(node_vol_should)
3173 pnode = inst_config.primary_node
3174 node_image[pnode].pinst.append(instance)
3176 for snode in inst_config.secondary_nodes:
3177 nimg = node_image[snode]
3178 nimg.sinst.append(instance)
3179 if pnode not in nimg.sbp:
3180 nimg.sbp[pnode] = []
3181 nimg.sbp[pnode].append(instance)
3183 # At this point, we have the in-memory data structures complete,
3184 # except for the runtime information, which we'll gather next
3186 # Due to the way our RPC system works, exact response times cannot be
3187 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3188 # time before and after executing the request, we can at least have a time
3190 nvinfo_starttime = time.time()
3191 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3193 self.cfg.GetClusterName())
3194 nvinfo_endtime = time.time()
3196 if self.extra_lv_nodes and vg_name is not None:
3198 self.rpc.call_node_verify(self.extra_lv_nodes,
3199 {constants.NV_LVLIST: vg_name},
3200 self.cfg.GetClusterName())
3202 extra_lv_nvinfo = {}
3204 all_drbd_map = self.cfg.ComputeDRBDMap()
3206 feedback_fn("* Gathering disk information (%s nodes)" %
3207 len(self.my_node_names))
3208 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3211 feedback_fn("* Verifying configuration file consistency")
3213 # If not all nodes are being checked, we need to make sure the master node
3214 # and a non-checked vm_capable node are in the list.
3215 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3217 vf_nvinfo = all_nvinfo.copy()
3218 vf_node_info = list(self.my_node_info.values())
3219 additional_nodes = []
3220 if master_node not in self.my_node_info:
3221 additional_nodes.append(master_node)
3222 vf_node_info.append(self.all_node_info[master_node])
3223 # Add the first vm_capable node we find which is not included,
3224 # excluding the master node (which we already have)
3225 for node in absent_nodes:
3226 nodeinfo = self.all_node_info[node]
3227 if (nodeinfo.vm_capable and not nodeinfo.offline and
3228 node != master_node):
3229 additional_nodes.append(node)
3230 vf_node_info.append(self.all_node_info[node])
3232 key = constants.NV_FILELIST
3233 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3234 {key: node_verify_param[key]},
3235 self.cfg.GetClusterName()))
3237 vf_nvinfo = all_nvinfo
3238 vf_node_info = self.my_node_info.values()
3240 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3242 feedback_fn("* Verifying node status")
3246 for node_i in node_data_list:
3248 nimg = node_image[node]
3252 feedback_fn("* Skipping offline node %s" % (node,))
3256 if node == master_node:
3258 elif node_i.master_candidate:
3259 ntype = "master candidate"
3260 elif node_i.drained:
3266 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3268 msg = all_nvinfo[node].fail_msg
3269 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3272 nimg.rpc_fail = True
3275 nresult = all_nvinfo[node].payload
3277 nimg.call_ok = self._VerifyNode(node_i, nresult)
3278 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3279 self._VerifyNodeNetwork(node_i, nresult)
3280 self._VerifyNodeUserScripts(node_i, nresult)
3281 self._VerifyOob(node_i, nresult)
3284 self._VerifyNodeLVM(node_i, nresult, vg_name)
3285 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3288 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3289 self._UpdateNodeInstances(node_i, nresult, nimg)
3290 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3291 self._UpdateNodeOS(node_i, nresult, nimg)
3293 if not nimg.os_fail:
3294 if refos_img is None:
3296 self._VerifyNodeOS(node_i, nimg, refos_img)
3297 self._VerifyNodeBridges(node_i, nresult, bridges)
3299 # Check whether all running instancies are primary for the node. (This
3300 # can no longer be done from _VerifyInstance below, since some of the
3301 # wrong instances could be from other node groups.)
3302 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3304 for inst in non_primary_inst:
3305 test = inst in self.all_inst_info
3306 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3307 "instance should not run on node %s", node_i.name)
3308 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3309 "node is running unknown instance %s", inst)
3311 for node, result in extra_lv_nvinfo.items():
3312 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3313 node_image[node], vg_name)
3315 feedback_fn("* Verifying instance status")
3316 for instance in self.my_inst_names:
3318 feedback_fn("* Verifying instance %s" % instance)
3319 inst_config = self.my_inst_info[instance]
3320 self._VerifyInstance(instance, inst_config, node_image,
3322 inst_nodes_offline = []
3324 pnode = inst_config.primary_node
3325 pnode_img = node_image[pnode]
3326 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3327 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3328 " primary node failed", instance)
3330 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3332 constants.CV_EINSTANCEBADNODE, instance,
3333 "instance is marked as running and lives on offline node %s",
3334 inst_config.primary_node)
3336 # If the instance is non-redundant we cannot survive losing its primary
3337 # node, so we are not N+1 compliant. On the other hand we have no disk
3338 # templates with more than one secondary so that situation is not well
3340 # FIXME: does not support file-backed instances
3341 if not inst_config.secondary_nodes:
3342 i_non_redundant.append(instance)
3344 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3345 constants.CV_EINSTANCELAYOUT,
3346 instance, "instance has multiple secondary nodes: %s",
3347 utils.CommaJoin(inst_config.secondary_nodes),
3348 code=self.ETYPE_WARNING)
3350 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3351 pnode = inst_config.primary_node
3352 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3353 instance_groups = {}
3355 for node in instance_nodes:
3356 instance_groups.setdefault(self.all_node_info[node].group,
3360 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3361 # Sort so that we always list the primary node first.
3362 for group, nodes in sorted(instance_groups.items(),
3363 key=lambda (_, nodes): pnode in nodes,
3366 self._ErrorIf(len(instance_groups) > 1,
3367 constants.CV_EINSTANCESPLITGROUPS,
3368 instance, "instance has primary and secondary nodes in"
3369 " different groups: %s", utils.CommaJoin(pretty_list),
3370 code=self.ETYPE_WARNING)
3372 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3373 i_non_a_balanced.append(instance)
3375 for snode in inst_config.secondary_nodes:
3376 s_img = node_image[snode]
3377 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3378 snode, "instance %s, connection to secondary node failed",
3382 inst_nodes_offline.append(snode)
3384 # warn that the instance lives on offline nodes
3385 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3386 "instance has offline secondary node(s) %s",
3387 utils.CommaJoin(inst_nodes_offline))
3388 # ... or ghost/non-vm_capable nodes
3389 for node in inst_config.all_nodes:
3390 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3391 instance, "instance lives on ghost node %s", node)
3392 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3393 instance, "instance lives on non-vm_capable node %s", node)
3395 feedback_fn("* Verifying orphan volumes")
3396 reserved = utils.FieldSet(*cluster.reserved_lvs)
3398 # We will get spurious "unknown volume" warnings if any node of this group
3399 # is secondary for an instance whose primary is in another group. To avoid
3400 # them, we find these instances and add their volumes to node_vol_should.
3401 for inst in self.all_inst_info.values():
3402 for secondary in inst.secondary_nodes:
3403 if (secondary in self.my_node_info
3404 and inst.name not in self.my_inst_info):
3405 inst.MapLVsByNode(node_vol_should)
3408 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3410 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3411 feedback_fn("* Verifying N+1 Memory redundancy")
3412 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3414 feedback_fn("* Other Notes")
3416 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3417 % len(i_non_redundant))
3419 if i_non_a_balanced:
3420 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3421 % len(i_non_a_balanced))
3424 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3427 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3430 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3434 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3435 """Analyze the post-hooks' result
3437 This method analyses the hook result, handles it, and sends some
3438 nicely-formatted feedback back to the user.
3440 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3441 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3442 @param hooks_results: the results of the multi-node hooks rpc call
3443 @param feedback_fn: function used send feedback back to the caller
3444 @param lu_result: previous Exec result
3445 @return: the new Exec result, based on the previous result
3449 # We only really run POST phase hooks, only for non-empty groups,
3450 # and are only interested in their results
3451 if not self.my_node_names:
3454 elif phase == constants.HOOKS_PHASE_POST:
3455 # Used to change hooks' output to proper indentation
3456 feedback_fn("* Hooks Results")
3457 assert hooks_results, "invalid result from hooks"
3459 for node_name in hooks_results:
3460 res = hooks_results[node_name]
3462 test = msg and not res.offline
3463 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3464 "Communication failure in hooks execution: %s", msg)
3465 if res.offline or msg:
3466 # No need to investigate payload if node is offline or gave
3469 for script, hkr, output in res.payload:
3470 test = hkr == constants.HKR_FAIL
3471 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3472 "Script %s failed, output:", script)
3474 output = self._HOOKS_INDENT_RE.sub(" ", output)
3475 feedback_fn("%s" % output)
3481 class LUClusterVerifyDisks(NoHooksLU):
3482 """Verifies the cluster disks status.
3487 def ExpandNames(self):
3488 self.share_locks = _ShareAll()
3489 self.needed_locks = {
3490 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3493 def Exec(self, feedback_fn):
3494 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3496 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3497 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3498 for group in group_names])
3501 class LUGroupVerifyDisks(NoHooksLU):
3502 """Verifies the status of all disks in a node group.
3507 def ExpandNames(self):
3508 # Raises errors.OpPrereqError on its own if group can't be found
3509 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3511 self.share_locks = _ShareAll()
3512 self.needed_locks = {
3513 locking.LEVEL_INSTANCE: [],
3514 locking.LEVEL_NODEGROUP: [],
3515 locking.LEVEL_NODE: [],
3518 def DeclareLocks(self, level):
3519 if level == locking.LEVEL_INSTANCE:
3520 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3522 # Lock instances optimistically, needs verification once node and group
3523 # locks have been acquired
3524 self.needed_locks[locking.LEVEL_INSTANCE] = \
3525 self.cfg.GetNodeGroupInstances(self.group_uuid)
3527 elif level == locking.LEVEL_NODEGROUP:
3528 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3530 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3531 set([self.group_uuid] +
3532 # Lock all groups used by instances optimistically; this requires
3533 # going via the node before it's locked, requiring verification
3536 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3537 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3539 elif level == locking.LEVEL_NODE:
3540 # This will only lock the nodes in the group to be verified which contain
3542 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3543 self._LockInstancesNodes()
3545 # Lock all nodes in group to be verified
3546 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3547 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3548 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3550 def CheckPrereq(self):
3551 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3552 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3553 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3555 assert self.group_uuid in owned_groups
3557 # Check if locked instances are still correct
3558 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3560 # Get instance information
3561 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3563 # Check if node groups for locked instances are still correct
3564 _CheckInstancesNodeGroups(self.cfg, self.instances,
3565 owned_groups, owned_nodes, self.group_uuid)
3567 def Exec(self, feedback_fn):
3568 """Verify integrity of cluster disks.
3570 @rtype: tuple of three items
3571 @return: a tuple of (dict of node-to-node_error, list of instances
3572 which need activate-disks, dict of instance: (node, volume) for
3577 res_instances = set()
3580 nv_dict = _MapInstanceDisksToNodes(
3581 [inst for inst in self.instances.values()
3582 if inst.admin_state == constants.ADMINST_UP])
3585 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3586 set(self.cfg.GetVmCapableNodeList()))
3588 node_lvs = self.rpc.call_lv_list(nodes, [])
3590 for (node, node_res) in node_lvs.items():
3591 if node_res.offline:
3594 msg = node_res.fail_msg
3596 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3597 res_nodes[node] = msg
3600 for lv_name, (_, _, lv_online) in node_res.payload.items():
3601 inst = nv_dict.pop((node, lv_name), None)
3602 if not (lv_online or inst is None):
3603 res_instances.add(inst)
3605 # any leftover items in nv_dict are missing LVs, let's arrange the data
3607 for key, inst in nv_dict.iteritems():
3608 res_missing.setdefault(inst, []).append(list(key))
3610 return (res_nodes, list(res_instances), res_missing)
3613 class LUClusterRepairDiskSizes(NoHooksLU):
3614 """Verifies the cluster disks sizes.
3619 def ExpandNames(self):
3620 if self.op.instances:
3621 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3622 self.needed_locks = {
3623 locking.LEVEL_NODE_RES: [],
3624 locking.LEVEL_INSTANCE: self.wanted_names,
3626 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3628 self.wanted_names = None
3629 self.needed_locks = {
3630 locking.LEVEL_NODE_RES: locking.ALL_SET,
3631 locking.LEVEL_INSTANCE: locking.ALL_SET,
3633 self.share_locks = {
3634 locking.LEVEL_NODE_RES: 1,
3635 locking.LEVEL_INSTANCE: 0,
3638 def DeclareLocks(self, level):
3639 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3640 self._LockInstancesNodes(primary_only=True, level=level)
3642 def CheckPrereq(self):
3643 """Check prerequisites.
3645 This only checks the optional instance list against the existing names.
3648 if self.wanted_names is None:
3649 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3651 self.wanted_instances = \
3652 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3654 def _EnsureChildSizes(self, disk):
3655 """Ensure children of the disk have the needed disk size.
3657 This is valid mainly for DRBD8 and fixes an issue where the
3658 children have smaller disk size.
3660 @param disk: an L{ganeti.objects.Disk} object
3663 if disk.dev_type == constants.LD_DRBD8:
3664 assert disk.children, "Empty children for DRBD8?"
3665 fchild = disk.children[0]
3666 mismatch = fchild.size < disk.size
3668 self.LogInfo("Child disk has size %d, parent %d, fixing",
3669 fchild.size, disk.size)
3670 fchild.size = disk.size
3672 # and we recurse on this child only, not on the metadev
3673 return self._EnsureChildSizes(fchild) or mismatch
3677 def Exec(self, feedback_fn):
3678 """Verify the size of cluster disks.
3681 # TODO: check child disks too
3682 # TODO: check differences in size between primary/secondary nodes
3684 for instance in self.wanted_instances:
3685 pnode = instance.primary_node
3686 if pnode not in per_node_disks:
3687 per_node_disks[pnode] = []
3688 for idx, disk in enumerate(instance.disks):
3689 per_node_disks[pnode].append((instance, idx, disk))
3691 assert not (frozenset(per_node_disks.keys()) -
3692 self.owned_locks(locking.LEVEL_NODE_RES)), \
3693 "Not owning correct locks"
3694 assert not self.owned_locks(locking.LEVEL_NODE)
3697 for node, dskl in per_node_disks.items():
3698 newl = [v[2].Copy() for v in dskl]
3700 self.cfg.SetDiskID(dsk, node)
3701 result = self.rpc.call_blockdev_getsize(node, newl)
3703 self.LogWarning("Failure in blockdev_getsize call to node"
3704 " %s, ignoring", node)
3706 if len(result.payload) != len(dskl):
3707 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3708 " result.payload=%s", node, len(dskl), result.payload)
3709 self.LogWarning("Invalid result from node %s, ignoring node results",
3712 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3714 self.LogWarning("Disk %d of instance %s did not return size"
3715 " information, ignoring", idx, instance.name)
3717 if not isinstance(size, (int, long)):
3718 self.LogWarning("Disk %d of instance %s did not return valid"
3719 " size information, ignoring", idx, instance.name)
3722 if size != disk.size:
3723 self.LogInfo("Disk %d of instance %s has mismatched size,"
3724 " correcting: recorded %d, actual %d", idx,
3725 instance.name, disk.size, size)
3727 self.cfg.Update(instance, feedback_fn)
3728 changed.append((instance.name, idx, size))
3729 if self._EnsureChildSizes(disk):
3730 self.cfg.Update(instance, feedback_fn)
3731 changed.append((instance.name, idx, disk.size))
3735 class LUClusterRename(LogicalUnit):
3736 """Rename the cluster.
3739 HPATH = "cluster-rename"
3740 HTYPE = constants.HTYPE_CLUSTER
3742 def BuildHooksEnv(self):
3747 "OP_TARGET": self.cfg.GetClusterName(),
3748 "NEW_NAME": self.op.name,
3751 def BuildHooksNodes(self):
3752 """Build hooks nodes.
3755 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3757 def CheckPrereq(self):
3758 """Verify that the passed name is a valid one.
3761 hostname = netutils.GetHostname(name=self.op.name,
3762 family=self.cfg.GetPrimaryIPFamily())
3764 new_name = hostname.name
3765 self.ip = new_ip = hostname.ip
3766 old_name = self.cfg.GetClusterName()
3767 old_ip = self.cfg.GetMasterIP()
3768 if new_name == old_name and new_ip == old_ip:
3769 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3770 " cluster has changed",
3772 if new_ip != old_ip:
3773 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3774 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3775 " reachable on the network" %
3776 new_ip, errors.ECODE_NOTUNIQUE)
3778 self.op.name = new_name
3780 def Exec(self, feedback_fn):
3781 """Rename the cluster.
3784 clustername = self.op.name
3787 # shutdown the master IP
3788 master_params = self.cfg.GetMasterNetworkParameters()
3789 ems = self.cfg.GetUseExternalMipScript()
3790 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3792 result.Raise("Could not disable the master role")
3795 cluster = self.cfg.GetClusterInfo()
3796 cluster.cluster_name = clustername
3797 cluster.master_ip = new_ip
3798 self.cfg.Update(cluster, feedback_fn)
3800 # update the known hosts file
3801 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3802 node_list = self.cfg.GetOnlineNodeList()
3804 node_list.remove(master_params.name)
3807 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3809 master_params.ip = new_ip
3810 result = self.rpc.call_node_activate_master_ip(master_params.name,
3812 msg = result.fail_msg
3814 self.LogWarning("Could not re-enable the master role on"
3815 " the master, please restart manually: %s", msg)
3820 def _ValidateNetmask(cfg, netmask):
3821 """Checks if a netmask is valid.
3823 @type cfg: L{config.ConfigWriter}
3824 @param cfg: The cluster configuration
3826 @param netmask: the netmask to be verified
3827 @raise errors.OpPrereqError: if the validation fails
3830 ip_family = cfg.GetPrimaryIPFamily()
3832 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3833 except errors.ProgrammerError:
3834 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3835 ip_family, errors.ECODE_INVAL)
3836 if not ipcls.ValidateNetmask(netmask):
3837 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3838 (netmask), errors.ECODE_INVAL)
3841 class LUClusterSetParams(LogicalUnit):
3842 """Change the parameters of the cluster.
3845 HPATH = "cluster-modify"
3846 HTYPE = constants.HTYPE_CLUSTER
3849 def CheckArguments(self):
3853 if self.op.uid_pool:
3854 uidpool.CheckUidPool(self.op.uid_pool)
3856 if self.op.add_uids:
3857 uidpool.CheckUidPool(self.op.add_uids)
3859 if self.op.remove_uids:
3860 uidpool.CheckUidPool(self.op.remove_uids)
3862 if self.op.master_netmask is not None:
3863 _ValidateNetmask(self.cfg, self.op.master_netmask)
3865 if self.op.diskparams:
3866 for dt_params in self.op.diskparams.values():
3867 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3869 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3870 except errors.OpPrereqError, err:
3871 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3874 def ExpandNames(self):
3875 # FIXME: in the future maybe other cluster params won't require checking on
3876 # all nodes to be modified.
3877 self.needed_locks = {
3878 locking.LEVEL_NODE: locking.ALL_SET,
3879 locking.LEVEL_INSTANCE: locking.ALL_SET,
3880 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3882 self.share_locks = {
3883 locking.LEVEL_NODE: 1,
3884 locking.LEVEL_INSTANCE: 1,
3885 locking.LEVEL_NODEGROUP: 1,
3888 def BuildHooksEnv(self):
3893 "OP_TARGET": self.cfg.GetClusterName(),
3894 "NEW_VG_NAME": self.op.vg_name,
3897 def BuildHooksNodes(self):
3898 """Build hooks nodes.
3901 mn = self.cfg.GetMasterNode()
3904 def CheckPrereq(self):
3905 """Check prerequisites.
3907 This checks whether the given params don't conflict and
3908 if the given volume group is valid.
3911 if self.op.vg_name is not None and not self.op.vg_name:
3912 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3913 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3914 " instances exist", errors.ECODE_INVAL)
3916 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3917 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3918 raise errors.OpPrereqError("Cannot disable drbd helper while"
3919 " drbd-based instances exist",
3922 node_list = self.owned_locks(locking.LEVEL_NODE)
3924 # if vg_name not None, checks given volume group on all nodes
3926 vglist = self.rpc.call_vg_list(node_list)
3927 for node in node_list:
3928 msg = vglist[node].fail_msg
3930 # ignoring down node
3931 self.LogWarning("Error while gathering data on node %s"
3932 " (ignoring node): %s", node, msg)
3934 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3936 constants.MIN_VG_SIZE)
3938 raise errors.OpPrereqError("Error on node '%s': %s" %
3939 (node, vgstatus), errors.ECODE_ENVIRON)
3941 if self.op.drbd_helper:
3942 # checks given drbd helper on all nodes
3943 helpers = self.rpc.call_drbd_helper(node_list)
3944 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3946 self.LogInfo("Not checking drbd helper on offline node %s", node)
3948 msg = helpers[node].fail_msg
3950 raise errors.OpPrereqError("Error checking drbd helper on node"
3951 " '%s': %s" % (node, msg),
3952 errors.ECODE_ENVIRON)
3953 node_helper = helpers[node].payload
3954 if node_helper != self.op.drbd_helper:
3955 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3956 (node, node_helper), errors.ECODE_ENVIRON)
3958 self.cluster = cluster = self.cfg.GetClusterInfo()
3959 # validate params changes
3960 if self.op.beparams:
3961 objects.UpgradeBeParams(self.op.beparams)
3962 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3963 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3965 if self.op.ndparams:
3966 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3967 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3969 # TODO: we need a more general way to handle resetting
3970 # cluster-level parameters to default values
3971 if self.new_ndparams["oob_program"] == "":
3972 self.new_ndparams["oob_program"] = \
3973 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3975 if self.op.hv_state:
3976 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3977 self.cluster.hv_state_static)
3978 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3979 for hv, values in new_hv_state.items())
3981 if self.op.disk_state:
3982 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3983 self.cluster.disk_state_static)
3984 self.new_disk_state = \
3985 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3986 for name, values in svalues.items()))
3987 for storage, svalues in new_disk_state.items())
3990 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3993 all_instances = self.cfg.GetAllInstancesInfo().values()
3995 for group in self.cfg.GetAllNodeGroupsInfo().values():
3996 instances = frozenset([inst for inst in all_instances
3997 if compat.any(node in group.members
3998 for node in inst.all_nodes)])
3999 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4000 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4002 new_ipolicy, instances)
4004 violations.update(new)
4007 self.LogWarning("After the ipolicy change the following instances"
4008 " violate them: %s",
4009 utils.CommaJoin(utils.NiceSort(violations)))
4011 if self.op.nicparams:
4012 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4013 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4014 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4017 # check all instances for consistency
4018 for instance in self.cfg.GetAllInstancesInfo().values():
4019 for nic_idx, nic in enumerate(instance.nics):
4020 params_copy = copy.deepcopy(nic.nicparams)
4021 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4023 # check parameter syntax
4025 objects.NIC.CheckParameterSyntax(params_filled)
4026 except errors.ConfigurationError, err:
4027 nic_errors.append("Instance %s, nic/%d: %s" %
4028 (instance.name, nic_idx, err))
4030 # if we're moving instances to routed, check that they have an ip
4031 target_mode = params_filled[constants.NIC_MODE]
4032 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4033 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4034 " address" % (instance.name, nic_idx))
4036 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4037 "\n".join(nic_errors), errors.ECODE_INVAL)
4039 # hypervisor list/parameters
4040 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4041 if self.op.hvparams:
4042 for hv_name, hv_dict in self.op.hvparams.items():
4043 if hv_name not in self.new_hvparams:
4044 self.new_hvparams[hv_name] = hv_dict
4046 self.new_hvparams[hv_name].update(hv_dict)
4048 # disk template parameters
4049 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4050 if self.op.diskparams:
4051 for dt_name, dt_params in self.op.diskparams.items():
4052 if dt_name not in self.op.diskparams:
4053 self.new_diskparams[dt_name] = dt_params
4055 self.new_diskparams[dt_name].update(dt_params)
4057 # os hypervisor parameters
4058 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4060 for os_name, hvs in self.op.os_hvp.items():
4061 if os_name not in self.new_os_hvp:
4062 self.new_os_hvp[os_name] = hvs
4064 for hv_name, hv_dict in hvs.items():
4065 if hv_name not in self.new_os_hvp[os_name]:
4066 self.new_os_hvp[os_name][hv_name] = hv_dict
4068 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4071 self.new_osp = objects.FillDict(cluster.osparams, {})
4072 if self.op.osparams:
4073 for os_name, osp in self.op.osparams.items():
4074 if os_name not in self.new_osp:
4075 self.new_osp[os_name] = {}
4077 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4080 if not self.new_osp[os_name]:
4081 # we removed all parameters
4082 del self.new_osp[os_name]
4084 # check the parameter validity (remote check)
4085 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4086 os_name, self.new_osp[os_name])
4088 # changes to the hypervisor list
4089 if self.op.enabled_hypervisors is not None:
4090 self.hv_list = self.op.enabled_hypervisors
4091 for hv in self.hv_list:
4092 # if the hypervisor doesn't already exist in the cluster
4093 # hvparams, we initialize it to empty, and then (in both
4094 # cases) we make sure to fill the defaults, as we might not
4095 # have a complete defaults list if the hypervisor wasn't
4097 if hv not in new_hvp:
4099 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4100 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4102 self.hv_list = cluster.enabled_hypervisors
4104 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4105 # either the enabled list has changed, or the parameters have, validate
4106 for hv_name, hv_params in self.new_hvparams.items():
4107 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4108 (self.op.enabled_hypervisors and
4109 hv_name in self.op.enabled_hypervisors)):
4110 # either this is a new hypervisor, or its parameters have changed
4111 hv_class = hypervisor.GetHypervisor(hv_name)
4112 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4113 hv_class.CheckParameterSyntax(hv_params)
4114 _CheckHVParams(self, node_list, hv_name, hv_params)
4117 # no need to check any newly-enabled hypervisors, since the
4118 # defaults have already been checked in the above code-block
4119 for os_name, os_hvp in self.new_os_hvp.items():
4120 for hv_name, hv_params in os_hvp.items():
4121 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4122 # we need to fill in the new os_hvp on top of the actual hv_p
4123 cluster_defaults = self.new_hvparams.get(hv_name, {})
4124 new_osp = objects.FillDict(cluster_defaults, hv_params)
4125 hv_class = hypervisor.GetHypervisor(hv_name)
4126 hv_class.CheckParameterSyntax(new_osp)
4127 _CheckHVParams(self, node_list, hv_name, new_osp)
4129 if self.op.default_iallocator:
4130 alloc_script = utils.FindFile(self.op.default_iallocator,
4131 constants.IALLOCATOR_SEARCH_PATH,
4133 if alloc_script is None:
4134 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4135 " specified" % self.op.default_iallocator,
4138 def Exec(self, feedback_fn):
4139 """Change the parameters of the cluster.
4142 if self.op.vg_name is not None:
4143 new_volume = self.op.vg_name
4146 if new_volume != self.cfg.GetVGName():
4147 self.cfg.SetVGName(new_volume)
4149 feedback_fn("Cluster LVM configuration already in desired"
4150 " state, not changing")
4151 if self.op.drbd_helper is not None:
4152 new_helper = self.op.drbd_helper
4155 if new_helper != self.cfg.GetDRBDHelper():
4156 self.cfg.SetDRBDHelper(new_helper)
4158 feedback_fn("Cluster DRBD helper already in desired state,"
4160 if self.op.hvparams:
4161 self.cluster.hvparams = self.new_hvparams
4163 self.cluster.os_hvp = self.new_os_hvp
4164 if self.op.enabled_hypervisors is not None:
4165 self.cluster.hvparams = self.new_hvparams
4166 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4167 if self.op.beparams:
4168 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4169 if self.op.nicparams:
4170 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4172 self.cluster.ipolicy = self.new_ipolicy
4173 if self.op.osparams:
4174 self.cluster.osparams = self.new_osp
4175 if self.op.ndparams:
4176 self.cluster.ndparams = self.new_ndparams
4177 if self.op.diskparams:
4178 self.cluster.diskparams = self.new_diskparams
4179 if self.op.hv_state:
4180 self.cluster.hv_state_static = self.new_hv_state
4181 if self.op.disk_state:
4182 self.cluster.disk_state_static = self.new_disk_state
4184 if self.op.candidate_pool_size is not None:
4185 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4186 # we need to update the pool size here, otherwise the save will fail
4187 _AdjustCandidatePool(self, [])
4189 if self.op.maintain_node_health is not None:
4190 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4191 feedback_fn("Note: CONFD was disabled at build time, node health"
4192 " maintenance is not useful (still enabling it)")
4193 self.cluster.maintain_node_health = self.op.maintain_node_health
4195 if self.op.prealloc_wipe_disks is not None:
4196 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4198 if self.op.add_uids is not None:
4199 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4201 if self.op.remove_uids is not None:
4202 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4204 if self.op.uid_pool is not None:
4205 self.cluster.uid_pool = self.op.uid_pool
4207 if self.op.default_iallocator is not None:
4208 self.cluster.default_iallocator = self.op.default_iallocator
4210 if self.op.reserved_lvs is not None:
4211 self.cluster.reserved_lvs = self.op.reserved_lvs
4213 if self.op.use_external_mip_script is not None:
4214 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4216 def helper_os(aname, mods, desc):
4218 lst = getattr(self.cluster, aname)
4219 for key, val in mods:
4220 if key == constants.DDM_ADD:
4222 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4225 elif key == constants.DDM_REMOVE:
4229 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4231 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4233 if self.op.hidden_os:
4234 helper_os("hidden_os", self.op.hidden_os, "hidden")
4236 if self.op.blacklisted_os:
4237 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4239 if self.op.master_netdev:
4240 master_params = self.cfg.GetMasterNetworkParameters()
4241 ems = self.cfg.GetUseExternalMipScript()
4242 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4243 self.cluster.master_netdev)
4244 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4246 result.Raise("Could not disable the master ip")
4247 feedback_fn("Changing master_netdev from %s to %s" %
4248 (master_params.netdev, self.op.master_netdev))
4249 self.cluster.master_netdev = self.op.master_netdev
4251 if self.op.master_netmask:
4252 master_params = self.cfg.GetMasterNetworkParameters()
4253 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4254 result = self.rpc.call_node_change_master_netmask(master_params.name,
4255 master_params.netmask,
4256 self.op.master_netmask,
4258 master_params.netdev)
4260 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4263 self.cluster.master_netmask = self.op.master_netmask
4265 self.cfg.Update(self.cluster, feedback_fn)
4267 if self.op.master_netdev:
4268 master_params = self.cfg.GetMasterNetworkParameters()
4269 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4270 self.op.master_netdev)
4271 ems = self.cfg.GetUseExternalMipScript()
4272 result = self.rpc.call_node_activate_master_ip(master_params.name,
4275 self.LogWarning("Could not re-enable the master ip on"
4276 " the master, please restart manually: %s",
4280 def _UploadHelper(lu, nodes, fname):
4281 """Helper for uploading a file and showing warnings.
4284 if os.path.exists(fname):
4285 result = lu.rpc.call_upload_file(nodes, fname)
4286 for to_node, to_result in result.items():
4287 msg = to_result.fail_msg
4289 msg = ("Copy of file %s to node %s failed: %s" %
4290 (fname, to_node, msg))
4291 lu.proc.LogWarning(msg)
4294 def _ComputeAncillaryFiles(cluster, redist):
4295 """Compute files external to Ganeti which need to be consistent.
4297 @type redist: boolean
4298 @param redist: Whether to include files which need to be redistributed
4301 # Compute files for all nodes
4303 constants.SSH_KNOWN_HOSTS_FILE,
4304 constants.CONFD_HMAC_KEY,
4305 constants.CLUSTER_DOMAIN_SECRET_FILE,
4306 constants.SPICE_CERT_FILE,
4307 constants.SPICE_CACERT_FILE,
4308 constants.RAPI_USERS_FILE,
4312 files_all.update(constants.ALL_CERT_FILES)
4313 files_all.update(ssconf.SimpleStore().GetFileList())
4315 # we need to ship at least the RAPI certificate
4316 files_all.add(constants.RAPI_CERT_FILE)
4318 if cluster.modify_etc_hosts:
4319 files_all.add(constants.ETC_HOSTS)
4321 if cluster.use_external_mip_script:
4322 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4324 # Files which are optional, these must:
4325 # - be present in one other category as well
4326 # - either exist or not exist on all nodes of that category (mc, vm all)
4328 constants.RAPI_USERS_FILE,
4331 # Files which should only be on master candidates
4335 files_mc.add(constants.CLUSTER_CONF_FILE)
4337 # Files which should only be on VM-capable nodes
4340 for hv_name in cluster.enabled_hypervisors
4341 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4345 for hv_name in cluster.enabled_hypervisors
4346 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4348 # Filenames in each category must be unique
4349 all_files_set = files_all | files_mc | files_vm
4350 assert (len(all_files_set) ==
4351 sum(map(len, [files_all, files_mc, files_vm]))), \
4352 "Found file listed in more than one file list"
4354 # Optional files must be present in one other category
4355 assert all_files_set.issuperset(files_opt), \
4356 "Optional file not in a different required list"
4358 return (files_all, files_opt, files_mc, files_vm)
4361 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4362 """Distribute additional files which are part of the cluster configuration.
4364 ConfigWriter takes care of distributing the config and ssconf files, but
4365 there are more files which should be distributed to all nodes. This function
4366 makes sure those are copied.
4368 @param lu: calling logical unit
4369 @param additional_nodes: list of nodes not in the config to distribute to
4370 @type additional_vm: boolean
4371 @param additional_vm: whether the additional nodes are vm-capable or not
4374 # Gather target nodes
4375 cluster = lu.cfg.GetClusterInfo()
4376 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4378 online_nodes = lu.cfg.GetOnlineNodeList()
4379 online_set = frozenset(online_nodes)
4380 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4382 if additional_nodes is not None:
4383 online_nodes.extend(additional_nodes)
4385 vm_nodes.extend(additional_nodes)
4387 # Never distribute to master node
4388 for nodelist in [online_nodes, vm_nodes]:
4389 if master_info.name in nodelist:
4390 nodelist.remove(master_info.name)
4393 (files_all, _, files_mc, files_vm) = \
4394 _ComputeAncillaryFiles(cluster, True)
4396 # Never re-distribute configuration file from here
4397 assert not (constants.CLUSTER_CONF_FILE in files_all or
4398 constants.CLUSTER_CONF_FILE in files_vm)
4399 assert not files_mc, "Master candidates not handled in this function"
4402 (online_nodes, files_all),
4403 (vm_nodes, files_vm),
4407 for (node_list, files) in filemap:
4409 _UploadHelper(lu, node_list, fname)
4412 class LUClusterRedistConf(NoHooksLU):
4413 """Force the redistribution of cluster configuration.
4415 This is a very simple LU.
4420 def ExpandNames(self):
4421 self.needed_locks = {
4422 locking.LEVEL_NODE: locking.ALL_SET,
4424 self.share_locks[locking.LEVEL_NODE] = 1
4426 def Exec(self, feedback_fn):
4427 """Redistribute the configuration.
4430 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4431 _RedistributeAncillaryFiles(self)
4434 class LUClusterActivateMasterIp(NoHooksLU):
4435 """Activate the master IP on the master node.
4438 def Exec(self, feedback_fn):
4439 """Activate the master IP.
4442 master_params = self.cfg.GetMasterNetworkParameters()
4443 ems = self.cfg.GetUseExternalMipScript()
4444 result = self.rpc.call_node_activate_master_ip(master_params.name,
4446 result.Raise("Could not activate the master IP")
4449 class LUClusterDeactivateMasterIp(NoHooksLU):
4450 """Deactivate the master IP on the master node.
4453 def Exec(self, feedback_fn):
4454 """Deactivate the master IP.
4457 master_params = self.cfg.GetMasterNetworkParameters()
4458 ems = self.cfg.GetUseExternalMipScript()
4459 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4461 result.Raise("Could not deactivate the master IP")
4464 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4465 """Sleep and poll for an instance's disk to sync.
4468 if not instance.disks or disks is not None and not disks:
4471 disks = _ExpandCheckDisks(instance, disks)
4474 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4476 node = instance.primary_node
4479 lu.cfg.SetDiskID(dev, node)
4481 # TODO: Convert to utils.Retry
4484 degr_retries = 10 # in seconds, as we sleep 1 second each time
4488 cumul_degraded = False
4489 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4490 msg = rstats.fail_msg
4492 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4495 raise errors.RemoteError("Can't contact node %s for mirror data,"
4496 " aborting." % node)
4499 rstats = rstats.payload
4501 for i, mstat in enumerate(rstats):
4503 lu.LogWarning("Can't compute data for node %s/%s",
4504 node, disks[i].iv_name)
4507 cumul_degraded = (cumul_degraded or
4508 (mstat.is_degraded and mstat.sync_percent is None))
4509 if mstat.sync_percent is not None:
4511 if mstat.estimated_time is not None:
4512 rem_time = ("%s remaining (estimated)" %
4513 utils.FormatSeconds(mstat.estimated_time))
4514 max_time = mstat.estimated_time
4516 rem_time = "no time estimate"
4517 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4518 (disks[i].iv_name, mstat.sync_percent, rem_time))
4520 # if we're done but degraded, let's do a few small retries, to
4521 # make sure we see a stable and not transient situation; therefore
4522 # we force restart of the loop
4523 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4524 logging.info("Degraded disks found, %d retries left", degr_retries)
4532 time.sleep(min(60, max_time))
4535 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4536 return not cumul_degraded
4539 def _BlockdevFind(lu, node, dev, instance):
4540 """Wrapper around call_blockdev_find to annotate diskparams.
4542 @param lu: A reference to the lu object
4543 @param node: The node to call out
4544 @param dev: The device to find
4545 @param instance: The instance object the device belongs to
4546 @returns The result of the rpc call
4549 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4550 return lu.rpc.call_blockdev_find(node, disk)
4553 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4554 """Wrapper around L{_CheckDiskConsistencyInner}.
4557 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4558 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4562 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4564 """Check that mirrors are not degraded.
4566 @attention: The device has to be annotated already.
4568 The ldisk parameter, if True, will change the test from the
4569 is_degraded attribute (which represents overall non-ok status for
4570 the device(s)) to the ldisk (representing the local storage status).
4573 lu.cfg.SetDiskID(dev, node)
4577 if on_primary or dev.AssembleOnSecondary():
4578 rstats = lu.rpc.call_blockdev_find(node, dev)
4579 msg = rstats.fail_msg
4581 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4583 elif not rstats.payload:
4584 lu.LogWarning("Can't find disk on node %s", node)
4588 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4590 result = result and not rstats.payload.is_degraded
4593 for child in dev.children:
4594 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4600 class LUOobCommand(NoHooksLU):
4601 """Logical unit for OOB handling.
4605 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4607 def ExpandNames(self):
4608 """Gather locks we need.
4611 if self.op.node_names:
4612 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4613 lock_names = self.op.node_names
4615 lock_names = locking.ALL_SET
4617 self.needed_locks = {
4618 locking.LEVEL_NODE: lock_names,
4621 def CheckPrereq(self):
4622 """Check prerequisites.
4625 - the node exists in the configuration
4628 Any errors are signaled by raising errors.OpPrereqError.
4632 self.master_node = self.cfg.GetMasterNode()
4634 assert self.op.power_delay >= 0.0
4636 if self.op.node_names:
4637 if (self.op.command in self._SKIP_MASTER and
4638 self.master_node in self.op.node_names):
4639 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4640 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4642 if master_oob_handler:
4643 additional_text = ("run '%s %s %s' if you want to operate on the"
4644 " master regardless") % (master_oob_handler,
4648 additional_text = "it does not support out-of-band operations"
4650 raise errors.OpPrereqError(("Operating on the master node %s is not"
4651 " allowed for %s; %s") %
4652 (self.master_node, self.op.command,
4653 additional_text), errors.ECODE_INVAL)
4655 self.op.node_names = self.cfg.GetNodeList()
4656 if self.op.command in self._SKIP_MASTER:
4657 self.op.node_names.remove(self.master_node)
4659 if self.op.command in self._SKIP_MASTER:
4660 assert self.master_node not in self.op.node_names
4662 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4664 raise errors.OpPrereqError("Node %s not found" % node_name,
4667 self.nodes.append(node)
4669 if (not self.op.ignore_status and
4670 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4671 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4672 " not marked offline") % node_name,
4675 def Exec(self, feedback_fn):
4676 """Execute OOB and return result if we expect any.
4679 master_node = self.master_node
4682 for idx, node in enumerate(utils.NiceSort(self.nodes,
4683 key=lambda node: node.name)):
4684 node_entry = [(constants.RS_NORMAL, node.name)]
4685 ret.append(node_entry)
4687 oob_program = _SupportsOob(self.cfg, node)
4690 node_entry.append((constants.RS_UNAVAIL, None))
4693 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4694 self.op.command, oob_program, node.name)
4695 result = self.rpc.call_run_oob(master_node, oob_program,
4696 self.op.command, node.name,
4700 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4701 node.name, result.fail_msg)
4702 node_entry.append((constants.RS_NODATA, None))
4705 self._CheckPayload(result)
4706 except errors.OpExecError, err:
4707 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4709 node_entry.append((constants.RS_NODATA, None))
4711 if self.op.command == constants.OOB_HEALTH:
4712 # For health we should log important events
4713 for item, status in result.payload:
4714 if status in [constants.OOB_STATUS_WARNING,
4715 constants.OOB_STATUS_CRITICAL]:
4716 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4717 item, node.name, status)
4719 if self.op.command == constants.OOB_POWER_ON:
4721 elif self.op.command == constants.OOB_POWER_OFF:
4722 node.powered = False
4723 elif self.op.command == constants.OOB_POWER_STATUS:
4724 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4725 if powered != node.powered:
4726 logging.warning(("Recorded power state (%s) of node '%s' does not"
4727 " match actual power state (%s)"), node.powered,
4730 # For configuration changing commands we should update the node
4731 if self.op.command in (constants.OOB_POWER_ON,
4732 constants.OOB_POWER_OFF):
4733 self.cfg.Update(node, feedback_fn)
4735 node_entry.append((constants.RS_NORMAL, result.payload))
4737 if (self.op.command == constants.OOB_POWER_ON and
4738 idx < len(self.nodes) - 1):
4739 time.sleep(self.op.power_delay)
4743 def _CheckPayload(self, result):
4744 """Checks if the payload is valid.
4746 @param result: RPC result
4747 @raises errors.OpExecError: If payload is not valid
4751 if self.op.command == constants.OOB_HEALTH:
4752 if not isinstance(result.payload, list):
4753 errs.append("command 'health' is expected to return a list but got %s" %
4754 type(result.payload))
4756 for item, status in result.payload:
4757 if status not in constants.OOB_STATUSES:
4758 errs.append("health item '%s' has invalid status '%s'" %
4761 if self.op.command == constants.OOB_POWER_STATUS:
4762 if not isinstance(result.payload, dict):
4763 errs.append("power-status is expected to return a dict but got %s" %
4764 type(result.payload))
4766 if self.op.command in [
4767 constants.OOB_POWER_ON,
4768 constants.OOB_POWER_OFF,
4769 constants.OOB_POWER_CYCLE,
4771 if result.payload is not None:
4772 errs.append("%s is expected to not return payload but got '%s'" %
4773 (self.op.command, result.payload))
4776 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4777 utils.CommaJoin(errs))
4780 class _OsQuery(_QueryBase):
4781 FIELDS = query.OS_FIELDS
4783 def ExpandNames(self, lu):
4784 # Lock all nodes in shared mode
4785 # Temporary removal of locks, should be reverted later
4786 # TODO: reintroduce locks when they are lighter-weight
4787 lu.needed_locks = {}
4788 #self.share_locks[locking.LEVEL_NODE] = 1
4789 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4791 # The following variables interact with _QueryBase._GetNames
4793 self.wanted = self.names
4795 self.wanted = locking.ALL_SET
4797 self.do_locking = self.use_locking
4799 def DeclareLocks(self, lu, level):
4803 def _DiagnoseByOS(rlist):
4804 """Remaps a per-node return list into an a per-os per-node dictionary
4806 @param rlist: a map with node names as keys and OS objects as values
4809 @return: a dictionary with osnames as keys and as value another
4810 map, with nodes as keys and tuples of (path, status, diagnose,
4811 variants, parameters, api_versions) as values, eg::
4813 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4814 (/srv/..., False, "invalid api")],
4815 "node2": [(/srv/..., True, "", [], [])]}
4820 # we build here the list of nodes that didn't fail the RPC (at RPC
4821 # level), so that nodes with a non-responding node daemon don't
4822 # make all OSes invalid
4823 good_nodes = [node_name for node_name in rlist
4824 if not rlist[node_name].fail_msg]
4825 for node_name, nr in rlist.items():
4826 if nr.fail_msg or not nr.payload:
4828 for (name, path, status, diagnose, variants,
4829 params, api_versions) in nr.payload:
4830 if name not in all_os:
4831 # build a list of nodes for this os containing empty lists
4832 # for each node in node_list
4834 for nname in good_nodes:
4835 all_os[name][nname] = []
4836 # convert params from [name, help] to (name, help)
4837 params = [tuple(v) for v in params]
4838 all_os[name][node_name].append((path, status, diagnose,
4839 variants, params, api_versions))
4842 def _GetQueryData(self, lu):
4843 """Computes the list of nodes and their attributes.
4846 # Locking is not used
4847 assert not (compat.any(lu.glm.is_owned(level)
4848 for level in locking.LEVELS
4849 if level != locking.LEVEL_CLUSTER) or
4850 self.do_locking or self.use_locking)
4852 valid_nodes = [node.name
4853 for node in lu.cfg.GetAllNodesInfo().values()
4854 if not node.offline and node.vm_capable]
4855 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4856 cluster = lu.cfg.GetClusterInfo()
4860 for (os_name, os_data) in pol.items():
4861 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4862 hidden=(os_name in cluster.hidden_os),
4863 blacklisted=(os_name in cluster.blacklisted_os))
4867 api_versions = set()
4869 for idx, osl in enumerate(os_data.values()):
4870 info.valid = bool(info.valid and osl and osl[0][1])
4874 (node_variants, node_params, node_api) = osl[0][3:6]
4877 variants.update(node_variants)
4878 parameters.update(node_params)
4879 api_versions.update(node_api)
4881 # Filter out inconsistent values
4882 variants.intersection_update(node_variants)
4883 parameters.intersection_update(node_params)
4884 api_versions.intersection_update(node_api)
4886 info.variants = list(variants)
4887 info.parameters = list(parameters)
4888 info.api_versions = list(api_versions)
4890 data[os_name] = info
4892 # Prepare data in requested order
4893 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4897 class LUOsDiagnose(NoHooksLU):
4898 """Logical unit for OS diagnose/query.
4904 def _BuildFilter(fields, names):
4905 """Builds a filter for querying OSes.
4908 name_filter = qlang.MakeSimpleFilter("name", names)
4910 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4911 # respective field is not requested
4912 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4913 for fname in ["hidden", "blacklisted"]
4914 if fname not in fields]
4915 if "valid" not in fields:
4916 status_filter.append([qlang.OP_TRUE, "valid"])
4919 status_filter.insert(0, qlang.OP_AND)
4921 status_filter = None
4923 if name_filter and status_filter:
4924 return [qlang.OP_AND, name_filter, status_filter]
4928 return status_filter
4930 def CheckArguments(self):
4931 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4932 self.op.output_fields, False)
4934 def ExpandNames(self):
4935 self.oq.ExpandNames(self)
4937 def Exec(self, feedback_fn):
4938 return self.oq.OldStyleQuery(self)
4941 class LUNodeRemove(LogicalUnit):
4942 """Logical unit for removing a node.
4945 HPATH = "node-remove"
4946 HTYPE = constants.HTYPE_NODE
4948 def BuildHooksEnv(self):
4953 "OP_TARGET": self.op.node_name,
4954 "NODE_NAME": self.op.node_name,
4957 def BuildHooksNodes(self):
4958 """Build hooks nodes.
4960 This doesn't run on the target node in the pre phase as a failed
4961 node would then be impossible to remove.
4964 all_nodes = self.cfg.GetNodeList()
4966 all_nodes.remove(self.op.node_name)
4969 return (all_nodes, all_nodes)
4971 def CheckPrereq(self):
4972 """Check prerequisites.
4975 - the node exists in the configuration
4976 - it does not have primary or secondary instances
4977 - it's not the master
4979 Any errors are signaled by raising errors.OpPrereqError.
4982 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4983 node = self.cfg.GetNodeInfo(self.op.node_name)
4984 assert node is not None
4986 masternode = self.cfg.GetMasterNode()
4987 if node.name == masternode:
4988 raise errors.OpPrereqError("Node is the master node, failover to another"
4989 " node is required", errors.ECODE_INVAL)
4991 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4992 if node.name in instance.all_nodes:
4993 raise errors.OpPrereqError("Instance %s is still running on the node,"
4994 " please remove first" % instance_name,
4996 self.op.node_name = node.name
4999 def Exec(self, feedback_fn):
5000 """Removes the node from the cluster.
5004 logging.info("Stopping the node daemon and removing configs from node %s",
5007 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5009 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5012 # Promote nodes to master candidate as needed
5013 _AdjustCandidatePool(self, exceptions=[node.name])
5014 self.context.RemoveNode(node.name)
5016 # Run post hooks on the node before it's removed
5017 _RunPostHook(self, node.name)
5019 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5020 msg = result.fail_msg
5022 self.LogWarning("Errors encountered on the remote node while leaving"
5023 " the cluster: %s", msg)
5025 # Remove node from our /etc/hosts
5026 if self.cfg.GetClusterInfo().modify_etc_hosts:
5027 master_node = self.cfg.GetMasterNode()
5028 result = self.rpc.call_etc_hosts_modify(master_node,
5029 constants.ETC_HOSTS_REMOVE,
5031 result.Raise("Can't update hosts file with new host data")
5032 _RedistributeAncillaryFiles(self)
5035 class _NodeQuery(_QueryBase):
5036 FIELDS = query.NODE_FIELDS
5038 def ExpandNames(self, lu):
5039 lu.needed_locks = {}
5040 lu.share_locks = _ShareAll()
5043 self.wanted = _GetWantedNodes(lu, self.names)
5045 self.wanted = locking.ALL_SET
5047 self.do_locking = (self.use_locking and
5048 query.NQ_LIVE in self.requested_data)
5051 # If any non-static field is requested we need to lock the nodes
5052 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5054 def DeclareLocks(self, lu, level):
5057 def _GetQueryData(self, lu):
5058 """Computes the list of nodes and their attributes.
5061 all_info = lu.cfg.GetAllNodesInfo()
5063 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5065 # Gather data as requested
5066 if query.NQ_LIVE in self.requested_data:
5067 # filter out non-vm_capable nodes
5068 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5070 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5071 [lu.cfg.GetHypervisorType()])
5072 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5073 for (name, nresult) in node_data.items()
5074 if not nresult.fail_msg and nresult.payload)
5078 if query.NQ_INST in self.requested_data:
5079 node_to_primary = dict([(name, set()) for name in nodenames])
5080 node_to_secondary = dict([(name, set()) for name in nodenames])
5082 inst_data = lu.cfg.GetAllInstancesInfo()
5084 for inst in inst_data.values():
5085 if inst.primary_node in node_to_primary:
5086 node_to_primary[inst.primary_node].add(inst.name)
5087 for secnode in inst.secondary_nodes:
5088 if secnode in node_to_secondary:
5089 node_to_secondary[secnode].add(inst.name)
5091 node_to_primary = None
5092 node_to_secondary = None
5094 if query.NQ_OOB in self.requested_data:
5095 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5096 for name, node in all_info.iteritems())
5100 if query.NQ_GROUP in self.requested_data:
5101 groups = lu.cfg.GetAllNodeGroupsInfo()
5105 return query.NodeQueryData([all_info[name] for name in nodenames],
5106 live_data, lu.cfg.GetMasterNode(),
5107 node_to_primary, node_to_secondary, groups,
5108 oob_support, lu.cfg.GetClusterInfo())
5111 class LUNodeQuery(NoHooksLU):
5112 """Logical unit for querying nodes.
5115 # pylint: disable=W0142
5118 def CheckArguments(self):
5119 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5120 self.op.output_fields, self.op.use_locking)
5122 def ExpandNames(self):
5123 self.nq.ExpandNames(self)
5125 def DeclareLocks(self, level):
5126 self.nq.DeclareLocks(self, level)
5128 def Exec(self, feedback_fn):
5129 return self.nq.OldStyleQuery(self)
5132 class LUNodeQueryvols(NoHooksLU):
5133 """Logical unit for getting volumes on node(s).
5137 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5138 _FIELDS_STATIC = utils.FieldSet("node")
5140 def CheckArguments(self):
5141 _CheckOutputFields(static=self._FIELDS_STATIC,
5142 dynamic=self._FIELDS_DYNAMIC,
5143 selected=self.op.output_fields)
5145 def ExpandNames(self):
5146 self.share_locks = _ShareAll()
5147 self.needed_locks = {}
5149 if not self.op.nodes:
5150 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5152 self.needed_locks[locking.LEVEL_NODE] = \
5153 _GetWantedNodes(self, self.op.nodes)
5155 def Exec(self, feedback_fn):
5156 """Computes the list of nodes and their attributes.
5159 nodenames = self.owned_locks(locking.LEVEL_NODE)
5160 volumes = self.rpc.call_node_volumes(nodenames)
5162 ilist = self.cfg.GetAllInstancesInfo()
5163 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5166 for node in nodenames:
5167 nresult = volumes[node]
5170 msg = nresult.fail_msg
5172 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5175 node_vols = sorted(nresult.payload,
5176 key=operator.itemgetter("dev"))
5178 for vol in node_vols:
5180 for field in self.op.output_fields:
5183 elif field == "phys":
5187 elif field == "name":
5189 elif field == "size":
5190 val = int(float(vol["size"]))
5191 elif field == "instance":
5192 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5194 raise errors.ParameterError(field)
5195 node_output.append(str(val))
5197 output.append(node_output)
5202 class LUNodeQueryStorage(NoHooksLU):
5203 """Logical unit for getting information on storage units on node(s).
5206 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5209 def CheckArguments(self):
5210 _CheckOutputFields(static=self._FIELDS_STATIC,
5211 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5212 selected=self.op.output_fields)
5214 def ExpandNames(self):
5215 self.share_locks = _ShareAll()
5216 self.needed_locks = {}
5219 self.needed_locks[locking.LEVEL_NODE] = \
5220 _GetWantedNodes(self, self.op.nodes)
5222 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5224 def Exec(self, feedback_fn):
5225 """Computes the list of nodes and their attributes.
5228 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5230 # Always get name to sort by
5231 if constants.SF_NAME in self.op.output_fields:
5232 fields = self.op.output_fields[:]
5234 fields = [constants.SF_NAME] + self.op.output_fields
5236 # Never ask for node or type as it's only known to the LU
5237 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5238 while extra in fields:
5239 fields.remove(extra)
5241 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5242 name_idx = field_idx[constants.SF_NAME]
5244 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5245 data = self.rpc.call_storage_list(self.nodes,
5246 self.op.storage_type, st_args,
5247 self.op.name, fields)
5251 for node in utils.NiceSort(self.nodes):
5252 nresult = data[node]
5256 msg = nresult.fail_msg
5258 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5261 rows = dict([(row[name_idx], row) for row in nresult.payload])
5263 for name in utils.NiceSort(rows.keys()):
5268 for field in self.op.output_fields:
5269 if field == constants.SF_NODE:
5271 elif field == constants.SF_TYPE:
5272 val = self.op.storage_type
5273 elif field in field_idx:
5274 val = row[field_idx[field]]
5276 raise errors.ParameterError(field)
5285 class _InstanceQuery(_QueryBase):
5286 FIELDS = query.INSTANCE_FIELDS
5288 def ExpandNames(self, lu):
5289 lu.needed_locks = {}
5290 lu.share_locks = _ShareAll()
5293 self.wanted = _GetWantedInstances(lu, self.names)
5295 self.wanted = locking.ALL_SET
5297 self.do_locking = (self.use_locking and
5298 query.IQ_LIVE in self.requested_data)
5300 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5301 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5302 lu.needed_locks[locking.LEVEL_NODE] = []
5303 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5305 self.do_grouplocks = (self.do_locking and
5306 query.IQ_NODES in self.requested_data)
5308 def DeclareLocks(self, lu, level):
5310 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5311 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5313 # Lock all groups used by instances optimistically; this requires going
5314 # via the node before it's locked, requiring verification later on
5315 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5317 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5318 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5319 elif level == locking.LEVEL_NODE:
5320 lu._LockInstancesNodes() # pylint: disable=W0212
5323 def _CheckGroupLocks(lu):
5324 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5325 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5327 # Check if node groups for locked instances are still correct
5328 for instance_name in owned_instances:
5329 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5331 def _GetQueryData(self, lu):
5332 """Computes the list of instances and their attributes.
5335 if self.do_grouplocks:
5336 self._CheckGroupLocks(lu)
5338 cluster = lu.cfg.GetClusterInfo()
5339 all_info = lu.cfg.GetAllInstancesInfo()
5341 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5343 instance_list = [all_info[name] for name in instance_names]
5344 nodes = frozenset(itertools.chain(*(inst.all_nodes
5345 for inst in instance_list)))
5346 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5349 wrongnode_inst = set()
5351 # Gather data as requested
5352 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5354 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5356 result = node_data[name]
5358 # offline nodes will be in both lists
5359 assert result.fail_msg
5360 offline_nodes.append(name)
5362 bad_nodes.append(name)
5363 elif result.payload:
5364 for inst in result.payload:
5365 if inst in all_info:
5366 if all_info[inst].primary_node == name:
5367 live_data.update(result.payload)
5369 wrongnode_inst.add(inst)
5371 # orphan instance; we don't list it here as we don't
5372 # handle this case yet in the output of instance listing
5373 logging.warning("Orphan instance '%s' found on node %s",
5375 # else no instance is alive
5379 if query.IQ_DISKUSAGE in self.requested_data:
5380 disk_usage = dict((inst.name,
5381 _ComputeDiskSize(inst.disk_template,
5382 [{constants.IDISK_SIZE: disk.size}
5383 for disk in inst.disks]))
5384 for inst in instance_list)
5388 if query.IQ_CONSOLE in self.requested_data:
5390 for inst in instance_list:
5391 if inst.name in live_data:
5392 # Instance is running
5393 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5395 consinfo[inst.name] = None
5396 assert set(consinfo.keys()) == set(instance_names)
5400 if query.IQ_NODES in self.requested_data:
5401 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5403 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5404 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5405 for uuid in set(map(operator.attrgetter("group"),
5411 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5412 disk_usage, offline_nodes, bad_nodes,
5413 live_data, wrongnode_inst, consinfo,
5417 class LUQuery(NoHooksLU):
5418 """Query for resources/items of a certain kind.
5421 # pylint: disable=W0142
5424 def CheckArguments(self):
5425 qcls = _GetQueryImplementation(self.op.what)
5427 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5429 def ExpandNames(self):
5430 self.impl.ExpandNames(self)
5432 def DeclareLocks(self, level):
5433 self.impl.DeclareLocks(self, level)
5435 def Exec(self, feedback_fn):
5436 return self.impl.NewStyleQuery(self)
5439 class LUQueryFields(NoHooksLU):
5440 """Query for resources/items of a certain kind.
5443 # pylint: disable=W0142
5446 def CheckArguments(self):
5447 self.qcls = _GetQueryImplementation(self.op.what)
5449 def ExpandNames(self):
5450 self.needed_locks = {}
5452 def Exec(self, feedback_fn):
5453 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5456 class LUNodeModifyStorage(NoHooksLU):
5457 """Logical unit for modifying a storage volume on a node.
5462 def CheckArguments(self):
5463 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5465 storage_type = self.op.storage_type
5468 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5470 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5471 " modified" % storage_type,
5474 diff = set(self.op.changes.keys()) - modifiable
5476 raise errors.OpPrereqError("The following fields can not be modified for"
5477 " storage units of type '%s': %r" %
5478 (storage_type, list(diff)),
5481 def ExpandNames(self):
5482 self.needed_locks = {
5483 locking.LEVEL_NODE: self.op.node_name,
5486 def Exec(self, feedback_fn):
5487 """Computes the list of nodes and their attributes.
5490 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5491 result = self.rpc.call_storage_modify(self.op.node_name,
5492 self.op.storage_type, st_args,
5493 self.op.name, self.op.changes)
5494 result.Raise("Failed to modify storage unit '%s' on %s" %
5495 (self.op.name, self.op.node_name))
5498 class LUNodeAdd(LogicalUnit):
5499 """Logical unit for adding node to the cluster.
5503 HTYPE = constants.HTYPE_NODE
5504 _NFLAGS = ["master_capable", "vm_capable"]
5506 def CheckArguments(self):
5507 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5508 # validate/normalize the node name
5509 self.hostname = netutils.GetHostname(name=self.op.node_name,
5510 family=self.primary_ip_family)
5511 self.op.node_name = self.hostname.name
5513 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5514 raise errors.OpPrereqError("Cannot readd the master node",
5517 if self.op.readd and self.op.group:
5518 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5519 " being readded", errors.ECODE_INVAL)
5521 def BuildHooksEnv(self):
5524 This will run on all nodes before, and on all nodes + the new node after.
5528 "OP_TARGET": self.op.node_name,
5529 "NODE_NAME": self.op.node_name,
5530 "NODE_PIP": self.op.primary_ip,
5531 "NODE_SIP": self.op.secondary_ip,
5532 "MASTER_CAPABLE": str(self.op.master_capable),
5533 "VM_CAPABLE": str(self.op.vm_capable),
5536 def BuildHooksNodes(self):
5537 """Build hooks nodes.
5540 # Exclude added node
5541 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5542 post_nodes = pre_nodes + [self.op.node_name, ]
5544 return (pre_nodes, post_nodes)
5546 def CheckPrereq(self):
5547 """Check prerequisites.
5550 - the new node is not already in the config
5552 - its parameters (single/dual homed) matches the cluster
5554 Any errors are signaled by raising errors.OpPrereqError.
5558 hostname = self.hostname
5559 node = hostname.name
5560 primary_ip = self.op.primary_ip = hostname.ip
5561 if self.op.secondary_ip is None:
5562 if self.primary_ip_family == netutils.IP6Address.family:
5563 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5564 " IPv4 address must be given as secondary",
5566 self.op.secondary_ip = primary_ip
5568 secondary_ip = self.op.secondary_ip
5569 if not netutils.IP4Address.IsValid(secondary_ip):
5570 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5571 " address" % secondary_ip, errors.ECODE_INVAL)
5573 node_list = cfg.GetNodeList()
5574 if not self.op.readd and node in node_list:
5575 raise errors.OpPrereqError("Node %s is already in the configuration" %
5576 node, errors.ECODE_EXISTS)
5577 elif self.op.readd and node not in node_list:
5578 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5581 self.changed_primary_ip = False
5583 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5584 if self.op.readd and node == existing_node_name:
5585 if existing_node.secondary_ip != secondary_ip:
5586 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5587 " address configuration as before",
5589 if existing_node.primary_ip != primary_ip:
5590 self.changed_primary_ip = True
5594 if (existing_node.primary_ip == primary_ip or
5595 existing_node.secondary_ip == primary_ip or
5596 existing_node.primary_ip == secondary_ip or
5597 existing_node.secondary_ip == secondary_ip):
5598 raise errors.OpPrereqError("New node ip address(es) conflict with"
5599 " existing node %s" % existing_node.name,
5600 errors.ECODE_NOTUNIQUE)
5602 # After this 'if' block, None is no longer a valid value for the
5603 # _capable op attributes
5605 old_node = self.cfg.GetNodeInfo(node)
5606 assert old_node is not None, "Can't retrieve locked node %s" % node
5607 for attr in self._NFLAGS:
5608 if getattr(self.op, attr) is None:
5609 setattr(self.op, attr, getattr(old_node, attr))
5611 for attr in self._NFLAGS:
5612 if getattr(self.op, attr) is None:
5613 setattr(self.op, attr, True)
5615 if self.op.readd and not self.op.vm_capable:
5616 pri, sec = cfg.GetNodeInstances(node)
5618 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5619 " flag set to false, but it already holds"
5620 " instances" % node,
5623 # check that the type of the node (single versus dual homed) is the
5624 # same as for the master
5625 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5626 master_singlehomed = myself.secondary_ip == myself.primary_ip
5627 newbie_singlehomed = secondary_ip == primary_ip
5628 if master_singlehomed != newbie_singlehomed:
5629 if master_singlehomed:
5630 raise errors.OpPrereqError("The master has no secondary ip but the"
5631 " new node has one",
5634 raise errors.OpPrereqError("The master has a secondary ip but the"
5635 " new node doesn't have one",
5638 # checks reachability
5639 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5640 raise errors.OpPrereqError("Node not reachable by ping",
5641 errors.ECODE_ENVIRON)
5643 if not newbie_singlehomed:
5644 # check reachability from my secondary ip to newbie's secondary ip
5645 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5646 source=myself.secondary_ip):
5647 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5648 " based ping to node daemon port",
5649 errors.ECODE_ENVIRON)
5656 if self.op.master_capable:
5657 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5659 self.master_candidate = False
5662 self.new_node = old_node
5664 node_group = cfg.LookupNodeGroup(self.op.group)
5665 self.new_node = objects.Node(name=node,
5666 primary_ip=primary_ip,
5667 secondary_ip=secondary_ip,
5668 master_candidate=self.master_candidate,
5669 offline=False, drained=False,
5672 if self.op.ndparams:
5673 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5675 if self.op.hv_state:
5676 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5678 if self.op.disk_state:
5679 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5681 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5682 # it a property on the base class.
5683 result = rpc.DnsOnlyRunner().call_version([node])[node]
5684 result.Raise("Can't get version information from node %s" % node)
5685 if constants.PROTOCOL_VERSION == result.payload:
5686 logging.info("Communication to node %s fine, sw version %s match",
5687 node, result.payload)
5689 raise errors.OpPrereqError("Version mismatch master version %s,"
5690 " node version %s" %
5691 (constants.PROTOCOL_VERSION, result.payload),
5692 errors.ECODE_ENVIRON)
5694 def Exec(self, feedback_fn):
5695 """Adds the new node to the cluster.
5698 new_node = self.new_node
5699 node = new_node.name
5701 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5704 # We adding a new node so we assume it's powered
5705 new_node.powered = True
5707 # for re-adds, reset the offline/drained/master-candidate flags;
5708 # we need to reset here, otherwise offline would prevent RPC calls
5709 # later in the procedure; this also means that if the re-add
5710 # fails, we are left with a non-offlined, broken node
5712 new_node.drained = new_node.offline = False # pylint: disable=W0201
5713 self.LogInfo("Readding a node, the offline/drained flags were reset")
5714 # if we demote the node, we do cleanup later in the procedure
5715 new_node.master_candidate = self.master_candidate
5716 if self.changed_primary_ip:
5717 new_node.primary_ip = self.op.primary_ip
5719 # copy the master/vm_capable flags
5720 for attr in self._NFLAGS:
5721 setattr(new_node, attr, getattr(self.op, attr))
5723 # notify the user about any possible mc promotion
5724 if new_node.master_candidate:
5725 self.LogInfo("Node will be a master candidate")
5727 if self.op.ndparams:
5728 new_node.ndparams = self.op.ndparams
5730 new_node.ndparams = {}
5732 if self.op.hv_state:
5733 new_node.hv_state_static = self.new_hv_state
5735 if self.op.disk_state:
5736 new_node.disk_state_static = self.new_disk_state
5738 # Add node to our /etc/hosts, and add key to known_hosts
5739 if self.cfg.GetClusterInfo().modify_etc_hosts:
5740 master_node = self.cfg.GetMasterNode()
5741 result = self.rpc.call_etc_hosts_modify(master_node,
5742 constants.ETC_HOSTS_ADD,
5745 result.Raise("Can't update hosts file with new host data")
5747 if new_node.secondary_ip != new_node.primary_ip:
5748 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5751 node_verify_list = [self.cfg.GetMasterNode()]
5752 node_verify_param = {
5753 constants.NV_NODELIST: ([node], {}),
5754 # TODO: do a node-net-test as well?
5757 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5758 self.cfg.GetClusterName())
5759 for verifier in node_verify_list:
5760 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5761 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5763 for failed in nl_payload:
5764 feedback_fn("ssh/hostname verification failed"
5765 " (checking from %s): %s" %
5766 (verifier, nl_payload[failed]))
5767 raise errors.OpExecError("ssh/hostname verification failed")
5770 _RedistributeAncillaryFiles(self)
5771 self.context.ReaddNode(new_node)
5772 # make sure we redistribute the config
5773 self.cfg.Update(new_node, feedback_fn)
5774 # and make sure the new node will not have old files around
5775 if not new_node.master_candidate:
5776 result = self.rpc.call_node_demote_from_mc(new_node.name)
5777 msg = result.fail_msg
5779 self.LogWarning("Node failed to demote itself from master"
5780 " candidate status: %s" % msg)
5782 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5783 additional_vm=self.op.vm_capable)
5784 self.context.AddNode(new_node, self.proc.GetECId())
5787 class LUNodeSetParams(LogicalUnit):
5788 """Modifies the parameters of a node.
5790 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5791 to the node role (as _ROLE_*)
5792 @cvar _R2F: a dictionary from node role to tuples of flags
5793 @cvar _FLAGS: a list of attribute names corresponding to the flags
5796 HPATH = "node-modify"
5797 HTYPE = constants.HTYPE_NODE
5799 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5801 (True, False, False): _ROLE_CANDIDATE,
5802 (False, True, False): _ROLE_DRAINED,
5803 (False, False, True): _ROLE_OFFLINE,
5804 (False, False, False): _ROLE_REGULAR,
5806 _R2F = dict((v, k) for k, v in _F2R.items())
5807 _FLAGS = ["master_candidate", "drained", "offline"]
5809 def CheckArguments(self):
5810 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5811 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5812 self.op.master_capable, self.op.vm_capable,
5813 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5815 if all_mods.count(None) == len(all_mods):
5816 raise errors.OpPrereqError("Please pass at least one modification",
5818 if all_mods.count(True) > 1:
5819 raise errors.OpPrereqError("Can't set the node into more than one"
5820 " state at the same time",
5823 # Boolean value that tells us whether we might be demoting from MC
5824 self.might_demote = (self.op.master_candidate is False or
5825 self.op.offline is True or
5826 self.op.drained is True or
5827 self.op.master_capable is False)
5829 if self.op.secondary_ip:
5830 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5831 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5832 " address" % self.op.secondary_ip,
5835 self.lock_all = self.op.auto_promote and self.might_demote
5836 self.lock_instances = self.op.secondary_ip is not None
5838 def _InstanceFilter(self, instance):
5839 """Filter for getting affected instances.
5842 return (instance.disk_template in constants.DTS_INT_MIRROR and
5843 self.op.node_name in instance.all_nodes)
5845 def ExpandNames(self):
5847 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5849 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5851 # Since modifying a node can have severe effects on currently running
5852 # operations the resource lock is at least acquired in shared mode
5853 self.needed_locks[locking.LEVEL_NODE_RES] = \
5854 self.needed_locks[locking.LEVEL_NODE]
5856 # Get node resource and instance locks in shared mode; they are not used
5857 # for anything but read-only access
5858 self.share_locks[locking.LEVEL_NODE_RES] = 1
5859 self.share_locks[locking.LEVEL_INSTANCE] = 1
5861 if self.lock_instances:
5862 self.needed_locks[locking.LEVEL_INSTANCE] = \
5863 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5865 def BuildHooksEnv(self):
5868 This runs on the master node.
5872 "OP_TARGET": self.op.node_name,
5873 "MASTER_CANDIDATE": str(self.op.master_candidate),
5874 "OFFLINE": str(self.op.offline),
5875 "DRAINED": str(self.op.drained),
5876 "MASTER_CAPABLE": str(self.op.master_capable),
5877 "VM_CAPABLE": str(self.op.vm_capable),
5880 def BuildHooksNodes(self):
5881 """Build hooks nodes.
5884 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5887 def CheckPrereq(self):
5888 """Check prerequisites.
5890 This only checks the instance list against the existing names.
5893 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5895 if self.lock_instances:
5896 affected_instances = \
5897 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5899 # Verify instance locks
5900 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5901 wanted_instances = frozenset(affected_instances.keys())
5902 if wanted_instances - owned_instances:
5903 raise errors.OpPrereqError("Instances affected by changing node %s's"
5904 " secondary IP address have changed since"
5905 " locks were acquired, wanted '%s', have"
5906 " '%s'; retry the operation" %
5908 utils.CommaJoin(wanted_instances),
5909 utils.CommaJoin(owned_instances)),
5912 affected_instances = None
5914 if (self.op.master_candidate is not None or
5915 self.op.drained is not None or
5916 self.op.offline is not None):
5917 # we can't change the master's node flags
5918 if self.op.node_name == self.cfg.GetMasterNode():
5919 raise errors.OpPrereqError("The master role can be changed"
5920 " only via master-failover",
5923 if self.op.master_candidate and not node.master_capable:
5924 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5925 " it a master candidate" % node.name,
5928 if self.op.vm_capable is False:
5929 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5931 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5932 " the vm_capable flag" % node.name,
5935 if node.master_candidate and self.might_demote and not self.lock_all:
5936 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5937 # check if after removing the current node, we're missing master
5939 (mc_remaining, mc_should, _) = \
5940 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5941 if mc_remaining < mc_should:
5942 raise errors.OpPrereqError("Not enough master candidates, please"
5943 " pass auto promote option to allow"
5944 " promotion (--auto-promote or RAPI"
5945 " auto_promote=True)", errors.ECODE_STATE)
5947 self.old_flags = old_flags = (node.master_candidate,
5948 node.drained, node.offline)
5949 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5950 self.old_role = old_role = self._F2R[old_flags]
5952 # Check for ineffective changes
5953 for attr in self._FLAGS:
5954 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5955 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5956 setattr(self.op, attr, None)
5958 # Past this point, any flag change to False means a transition
5959 # away from the respective state, as only real changes are kept
5961 # TODO: We might query the real power state if it supports OOB
5962 if _SupportsOob(self.cfg, node):
5963 if self.op.offline is False and not (node.powered or
5964 self.op.powered is True):
5965 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5966 " offline status can be reset") %
5967 self.op.node_name, errors.ECODE_STATE)
5968 elif self.op.powered is not None:
5969 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5970 " as it does not support out-of-band"
5971 " handling") % self.op.node_name,
5974 # If we're being deofflined/drained, we'll MC ourself if needed
5975 if (self.op.drained is False or self.op.offline is False or
5976 (self.op.master_capable and not node.master_capable)):
5977 if _DecideSelfPromotion(self):
5978 self.op.master_candidate = True
5979 self.LogInfo("Auto-promoting node to master candidate")
5981 # If we're no longer master capable, we'll demote ourselves from MC
5982 if self.op.master_capable is False and node.master_candidate:
5983 self.LogInfo("Demoting from master candidate")
5984 self.op.master_candidate = False
5987 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5988 if self.op.master_candidate:
5989 new_role = self._ROLE_CANDIDATE
5990 elif self.op.drained:
5991 new_role = self._ROLE_DRAINED
5992 elif self.op.offline:
5993 new_role = self._ROLE_OFFLINE
5994 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5995 # False is still in new flags, which means we're un-setting (the
5997 new_role = self._ROLE_REGULAR
5998 else: # no new flags, nothing, keep old role
6001 self.new_role = new_role
6003 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6004 # Trying to transition out of offline status
6005 result = self.rpc.call_version([node.name])[node.name]
6007 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6008 " to report its version: %s" %
6009 (node.name, result.fail_msg),
6012 self.LogWarning("Transitioning node from offline to online state"
6013 " without using re-add. Please make sure the node"
6016 # When changing the secondary ip, verify if this is a single-homed to
6017 # multi-homed transition or vice versa, and apply the relevant
6019 if self.op.secondary_ip:
6020 # Ok even without locking, because this can't be changed by any LU
6021 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6022 master_singlehomed = master.secondary_ip == master.primary_ip
6023 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6024 if self.op.force and node.name == master.name:
6025 self.LogWarning("Transitioning from single-homed to multi-homed"
6026 " cluster. All nodes will require a secondary ip.")
6028 raise errors.OpPrereqError("Changing the secondary ip on a"
6029 " single-homed cluster requires the"
6030 " --force option to be passed, and the"
6031 " target node to be the master",
6033 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6034 if self.op.force and node.name == master.name:
6035 self.LogWarning("Transitioning from multi-homed to single-homed"
6036 " cluster. Secondary IPs will have to be removed.")
6038 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6039 " same as the primary IP on a multi-homed"
6040 " cluster, unless the --force option is"
6041 " passed, and the target node is the"
6042 " master", errors.ECODE_INVAL)
6044 assert not (frozenset(affected_instances) -
6045 self.owned_locks(locking.LEVEL_INSTANCE))
6048 if affected_instances:
6049 msg = ("Cannot change secondary IP address: offline node has"
6050 " instances (%s) configured to use it" %
6051 utils.CommaJoin(affected_instances.keys()))
6052 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6054 # On online nodes, check that no instances are running, and that
6055 # the node has the new ip and we can reach it.
6056 for instance in affected_instances.values():
6057 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6058 msg="cannot change secondary ip")
6060 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6061 if master.name != node.name:
6062 # check reachability from master secondary ip to new secondary ip
6063 if not netutils.TcpPing(self.op.secondary_ip,
6064 constants.DEFAULT_NODED_PORT,
6065 source=master.secondary_ip):
6066 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6067 " based ping to node daemon port",
6068 errors.ECODE_ENVIRON)
6070 if self.op.ndparams:
6071 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6072 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6073 self.new_ndparams = new_ndparams
6075 if self.op.hv_state:
6076 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6077 self.node.hv_state_static)
6079 if self.op.disk_state:
6080 self.new_disk_state = \
6081 _MergeAndVerifyDiskState(self.op.disk_state,
6082 self.node.disk_state_static)
6084 def Exec(self, feedback_fn):
6089 old_role = self.old_role
6090 new_role = self.new_role
6094 if self.op.ndparams:
6095 node.ndparams = self.new_ndparams
6097 if self.op.powered is not None:
6098 node.powered = self.op.powered
6100 if self.op.hv_state:
6101 node.hv_state_static = self.new_hv_state
6103 if self.op.disk_state:
6104 node.disk_state_static = self.new_disk_state
6106 for attr in ["master_capable", "vm_capable"]:
6107 val = getattr(self.op, attr)
6109 setattr(node, attr, val)
6110 result.append((attr, str(val)))
6112 if new_role != old_role:
6113 # Tell the node to demote itself, if no longer MC and not offline
6114 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6115 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6117 self.LogWarning("Node failed to demote itself: %s", msg)
6119 new_flags = self._R2F[new_role]
6120 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6122 result.append((desc, str(nf)))
6123 (node.master_candidate, node.drained, node.offline) = new_flags
6125 # we locked all nodes, we adjust the CP before updating this node
6127 _AdjustCandidatePool(self, [node.name])
6129 if self.op.secondary_ip:
6130 node.secondary_ip = self.op.secondary_ip
6131 result.append(("secondary_ip", self.op.secondary_ip))
6133 # this will trigger configuration file update, if needed
6134 self.cfg.Update(node, feedback_fn)
6136 # this will trigger job queue propagation or cleanup if the mc
6138 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6139 self.context.ReaddNode(node)
6144 class LUNodePowercycle(NoHooksLU):
6145 """Powercycles a node.
6150 def CheckArguments(self):
6151 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6152 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6153 raise errors.OpPrereqError("The node is the master and the force"
6154 " parameter was not set",
6157 def ExpandNames(self):
6158 """Locking for PowercycleNode.
6160 This is a last-resort option and shouldn't block on other
6161 jobs. Therefore, we grab no locks.
6164 self.needed_locks = {}
6166 def Exec(self, feedback_fn):
6170 result = self.rpc.call_node_powercycle(self.op.node_name,
6171 self.cfg.GetHypervisorType())
6172 result.Raise("Failed to schedule the reboot")
6173 return result.payload
6176 class LUClusterQuery(NoHooksLU):
6177 """Query cluster configuration.
6182 def ExpandNames(self):
6183 self.needed_locks = {}
6185 def Exec(self, feedback_fn):
6186 """Return cluster config.
6189 cluster = self.cfg.GetClusterInfo()
6192 # Filter just for enabled hypervisors
6193 for os_name, hv_dict in cluster.os_hvp.items():
6194 os_hvp[os_name] = {}
6195 for hv_name, hv_params in hv_dict.items():
6196 if hv_name in cluster.enabled_hypervisors:
6197 os_hvp[os_name][hv_name] = hv_params
6199 # Convert ip_family to ip_version
6200 primary_ip_version = constants.IP4_VERSION
6201 if cluster.primary_ip_family == netutils.IP6Address.family:
6202 primary_ip_version = constants.IP6_VERSION
6205 "software_version": constants.RELEASE_VERSION,
6206 "protocol_version": constants.PROTOCOL_VERSION,
6207 "config_version": constants.CONFIG_VERSION,
6208 "os_api_version": max(constants.OS_API_VERSIONS),
6209 "export_version": constants.EXPORT_VERSION,
6210 "architecture": runtime.GetArchInfo(),
6211 "name": cluster.cluster_name,
6212 "master": cluster.master_node,
6213 "default_hypervisor": cluster.primary_hypervisor,
6214 "enabled_hypervisors": cluster.enabled_hypervisors,
6215 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6216 for hypervisor_name in cluster.enabled_hypervisors]),
6218 "beparams": cluster.beparams,
6219 "osparams": cluster.osparams,
6220 "ipolicy": cluster.ipolicy,
6221 "nicparams": cluster.nicparams,
6222 "ndparams": cluster.ndparams,
6223 "diskparams": cluster.diskparams,
6224 "candidate_pool_size": cluster.candidate_pool_size,
6225 "master_netdev": cluster.master_netdev,
6226 "master_netmask": cluster.master_netmask,
6227 "use_external_mip_script": cluster.use_external_mip_script,
6228 "volume_group_name": cluster.volume_group_name,
6229 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6230 "file_storage_dir": cluster.file_storage_dir,
6231 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6232 "maintain_node_health": cluster.maintain_node_health,
6233 "ctime": cluster.ctime,
6234 "mtime": cluster.mtime,
6235 "uuid": cluster.uuid,
6236 "tags": list(cluster.GetTags()),
6237 "uid_pool": cluster.uid_pool,
6238 "default_iallocator": cluster.default_iallocator,
6239 "reserved_lvs": cluster.reserved_lvs,
6240 "primary_ip_version": primary_ip_version,
6241 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6242 "hidden_os": cluster.hidden_os,
6243 "blacklisted_os": cluster.blacklisted_os,
6249 class LUClusterConfigQuery(NoHooksLU):
6250 """Return configuration values.
6255 def CheckArguments(self):
6256 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6258 def ExpandNames(self):
6259 self.cq.ExpandNames(self)
6261 def DeclareLocks(self, level):
6262 self.cq.DeclareLocks(self, level)
6264 def Exec(self, feedback_fn):
6265 result = self.cq.OldStyleQuery(self)
6267 assert len(result) == 1
6272 class _ClusterQuery(_QueryBase):
6273 FIELDS = query.CLUSTER_FIELDS
6275 #: Do not sort (there is only one item)
6278 def ExpandNames(self, lu):
6279 lu.needed_locks = {}
6281 # The following variables interact with _QueryBase._GetNames
6282 self.wanted = locking.ALL_SET
6283 self.do_locking = self.use_locking
6286 raise errors.OpPrereqError("Can not use locking for cluster queries",
6289 def DeclareLocks(self, lu, level):
6292 def _GetQueryData(self, lu):
6293 """Computes the list of nodes and their attributes.
6296 # Locking is not used
6297 assert not (compat.any(lu.glm.is_owned(level)
6298 for level in locking.LEVELS
6299 if level != locking.LEVEL_CLUSTER) or
6300 self.do_locking or self.use_locking)
6302 if query.CQ_CONFIG in self.requested_data:
6303 cluster = lu.cfg.GetClusterInfo()
6305 cluster = NotImplemented
6307 if query.CQ_QUEUE_DRAINED in self.requested_data:
6308 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6310 drain_flag = NotImplemented
6312 if query.CQ_WATCHER_PAUSE in self.requested_data:
6313 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6315 watcher_pause = NotImplemented
6317 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6320 class LUInstanceActivateDisks(NoHooksLU):
6321 """Bring up an instance's disks.
6326 def ExpandNames(self):
6327 self._ExpandAndLockInstance()
6328 self.needed_locks[locking.LEVEL_NODE] = []
6329 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6331 def DeclareLocks(self, level):
6332 if level == locking.LEVEL_NODE:
6333 self._LockInstancesNodes()
6335 def CheckPrereq(self):
6336 """Check prerequisites.
6338 This checks that the instance is in the cluster.
6341 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6342 assert self.instance is not None, \
6343 "Cannot retrieve locked instance %s" % self.op.instance_name
6344 _CheckNodeOnline(self, self.instance.primary_node)
6346 def Exec(self, feedback_fn):
6347 """Activate the disks.
6350 disks_ok, disks_info = \
6351 _AssembleInstanceDisks(self, self.instance,
6352 ignore_size=self.op.ignore_size)
6354 raise errors.OpExecError("Cannot activate block devices")
6356 if self.op.wait_for_sync:
6357 if not _WaitForSync(self, self.instance):
6358 raise errors.OpExecError("Some disks of the instance are degraded!")
6363 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6365 """Prepare the block devices for an instance.
6367 This sets up the block devices on all nodes.
6369 @type lu: L{LogicalUnit}
6370 @param lu: the logical unit on whose behalf we execute
6371 @type instance: L{objects.Instance}
6372 @param instance: the instance for whose disks we assemble
6373 @type disks: list of L{objects.Disk} or None
6374 @param disks: which disks to assemble (or all, if None)
6375 @type ignore_secondaries: boolean
6376 @param ignore_secondaries: if true, errors on secondary nodes
6377 won't result in an error return from the function
6378 @type ignore_size: boolean
6379 @param ignore_size: if true, the current known size of the disk
6380 will not be used during the disk activation, useful for cases
6381 when the size is wrong
6382 @return: False if the operation failed, otherwise a list of
6383 (host, instance_visible_name, node_visible_name)
6384 with the mapping from node devices to instance devices
6389 iname = instance.name
6390 disks = _ExpandCheckDisks(instance, disks)
6392 # With the two passes mechanism we try to reduce the window of
6393 # opportunity for the race condition of switching DRBD to primary
6394 # before handshaking occured, but we do not eliminate it
6396 # The proper fix would be to wait (with some limits) until the
6397 # connection has been made and drbd transitions from WFConnection
6398 # into any other network-connected state (Connected, SyncTarget,
6401 # 1st pass, assemble on all nodes in secondary mode
6402 for idx, inst_disk in enumerate(disks):
6403 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6405 node_disk = node_disk.Copy()
6406 node_disk.UnsetSize()
6407 lu.cfg.SetDiskID(node_disk, node)
6408 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6410 msg = result.fail_msg
6412 is_offline_secondary = (node in instance.secondary_nodes and
6414 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6415 " (is_primary=False, pass=1): %s",
6416 inst_disk.iv_name, node, msg)
6417 if not (ignore_secondaries or is_offline_secondary):
6420 # FIXME: race condition on drbd migration to primary
6422 # 2nd pass, do only the primary node
6423 for idx, inst_disk in enumerate(disks):
6426 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6427 if node != instance.primary_node:
6430 node_disk = node_disk.Copy()
6431 node_disk.UnsetSize()
6432 lu.cfg.SetDiskID(node_disk, node)
6433 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6435 msg = result.fail_msg
6437 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6438 " (is_primary=True, pass=2): %s",
6439 inst_disk.iv_name, node, msg)
6442 dev_path = result.payload
6444 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6446 # leave the disks configured for the primary node
6447 # this is a workaround that would be fixed better by
6448 # improving the logical/physical id handling
6450 lu.cfg.SetDiskID(disk, instance.primary_node)
6452 return disks_ok, device_info
6455 def _StartInstanceDisks(lu, instance, force):
6456 """Start the disks of an instance.
6459 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6460 ignore_secondaries=force)
6462 _ShutdownInstanceDisks(lu, instance)
6463 if force is not None and not force:
6464 lu.proc.LogWarning("", hint="If the message above refers to a"
6466 " you can retry the operation using '--force'.")
6467 raise errors.OpExecError("Disk consistency error")
6470 class LUInstanceDeactivateDisks(NoHooksLU):
6471 """Shutdown an instance's disks.
6476 def ExpandNames(self):
6477 self._ExpandAndLockInstance()
6478 self.needed_locks[locking.LEVEL_NODE] = []
6479 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6481 def DeclareLocks(self, level):
6482 if level == locking.LEVEL_NODE:
6483 self._LockInstancesNodes()
6485 def CheckPrereq(self):
6486 """Check prerequisites.
6488 This checks that the instance is in the cluster.
6491 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6492 assert self.instance is not None, \
6493 "Cannot retrieve locked instance %s" % self.op.instance_name
6495 def Exec(self, feedback_fn):
6496 """Deactivate the disks
6499 instance = self.instance
6501 _ShutdownInstanceDisks(self, instance)
6503 _SafeShutdownInstanceDisks(self, instance)
6506 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6507 """Shutdown block devices of an instance.
6509 This function checks if an instance is running, before calling
6510 _ShutdownInstanceDisks.
6513 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6514 _ShutdownInstanceDisks(lu, instance, disks=disks)
6517 def _ExpandCheckDisks(instance, disks):
6518 """Return the instance disks selected by the disks list
6520 @type disks: list of L{objects.Disk} or None
6521 @param disks: selected disks
6522 @rtype: list of L{objects.Disk}
6523 @return: selected instance disks to act on
6527 return instance.disks
6529 if not set(disks).issubset(instance.disks):
6530 raise errors.ProgrammerError("Can only act on disks belonging to the"
6535 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6536 """Shutdown block devices of an instance.
6538 This does the shutdown on all nodes of the instance.
6540 If the ignore_primary is false, errors on the primary node are
6545 disks = _ExpandCheckDisks(instance, disks)
6548 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6549 lu.cfg.SetDiskID(top_disk, node)
6550 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6551 msg = result.fail_msg
6553 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6554 disk.iv_name, node, msg)
6555 if ((node == instance.primary_node and not ignore_primary) or
6556 (node != instance.primary_node and not result.offline)):
6561 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6562 """Checks if a node has enough free memory.
6564 This function check if a given node has the needed amount of free
6565 memory. In case the node has less memory or we cannot get the
6566 information from the node, this function raise an OpPrereqError
6569 @type lu: C{LogicalUnit}
6570 @param lu: a logical unit from which we get configuration data
6572 @param node: the node to check
6573 @type reason: C{str}
6574 @param reason: string to use in the error message
6575 @type requested: C{int}
6576 @param requested: the amount of memory in MiB to check for
6577 @type hypervisor_name: C{str}
6578 @param hypervisor_name: the hypervisor to ask for memory stats
6580 @return: node current free memory
6581 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6582 we cannot check the node
6585 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6586 nodeinfo[node].Raise("Can't get data from node %s" % node,
6587 prereq=True, ecode=errors.ECODE_ENVIRON)
6588 (_, _, (hv_info, )) = nodeinfo[node].payload
6590 free_mem = hv_info.get("memory_free", None)
6591 if not isinstance(free_mem, int):
6592 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6593 " was '%s'" % (node, free_mem),
6594 errors.ECODE_ENVIRON)
6595 if requested > free_mem:
6596 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6597 " needed %s MiB, available %s MiB" %
6598 (node, reason, requested, free_mem),
6603 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6604 """Checks if nodes have enough free disk space in the all VGs.
6606 This function check if all given nodes have the needed amount of
6607 free disk. In case any node has less disk or we cannot get the
6608 information from the node, this function raise an OpPrereqError
6611 @type lu: C{LogicalUnit}
6612 @param lu: a logical unit from which we get configuration data
6613 @type nodenames: C{list}
6614 @param nodenames: the list of node names to check
6615 @type req_sizes: C{dict}
6616 @param req_sizes: the hash of vg and corresponding amount of disk in
6618 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6619 or we cannot check the node
6622 for vg, req_size in req_sizes.items():
6623 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6626 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6627 """Checks if nodes have enough free disk space in the specified VG.
6629 This function check if all given nodes have the needed amount of
6630 free disk. In case any node has less disk or we cannot get the
6631 information from the node, this function raise an OpPrereqError
6634 @type lu: C{LogicalUnit}
6635 @param lu: a logical unit from which we get configuration data
6636 @type nodenames: C{list}
6637 @param nodenames: the list of node names to check
6639 @param vg: the volume group to check
6640 @type requested: C{int}
6641 @param requested: the amount of disk in MiB to check for
6642 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6643 or we cannot check the node
6646 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6647 for node in nodenames:
6648 info = nodeinfo[node]
6649 info.Raise("Cannot get current information from node %s" % node,
6650 prereq=True, ecode=errors.ECODE_ENVIRON)
6651 (_, (vg_info, ), _) = info.payload
6652 vg_free = vg_info.get("vg_free", None)
6653 if not isinstance(vg_free, int):
6654 raise errors.OpPrereqError("Can't compute free disk space on node"
6655 " %s for vg %s, result was '%s'" %
6656 (node, vg, vg_free), errors.ECODE_ENVIRON)
6657 if requested > vg_free:
6658 raise errors.OpPrereqError("Not enough disk space on target node %s"
6659 " vg %s: required %d MiB, available %d MiB" %
6660 (node, vg, requested, vg_free),
6664 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6665 """Checks if nodes have enough physical CPUs
6667 This function checks if all given nodes have the needed number of
6668 physical CPUs. In case any node has less CPUs or we cannot get the
6669 information from the node, this function raises an OpPrereqError
6672 @type lu: C{LogicalUnit}
6673 @param lu: a logical unit from which we get configuration data
6674 @type nodenames: C{list}
6675 @param nodenames: the list of node names to check
6676 @type requested: C{int}
6677 @param requested: the minimum acceptable number of physical CPUs
6678 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6679 or we cannot check the node
6682 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6683 for node in nodenames:
6684 info = nodeinfo[node]
6685 info.Raise("Cannot get current information from node %s" % node,
6686 prereq=True, ecode=errors.ECODE_ENVIRON)
6687 (_, _, (hv_info, )) = info.payload
6688 num_cpus = hv_info.get("cpu_total", None)
6689 if not isinstance(num_cpus, int):
6690 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6691 " on node %s, result was '%s'" %
6692 (node, num_cpus), errors.ECODE_ENVIRON)
6693 if requested > num_cpus:
6694 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6695 "required" % (node, num_cpus, requested),
6699 class LUInstanceStartup(LogicalUnit):
6700 """Starts an instance.
6703 HPATH = "instance-start"
6704 HTYPE = constants.HTYPE_INSTANCE
6707 def CheckArguments(self):
6709 if self.op.beparams:
6710 # fill the beparams dict
6711 objects.UpgradeBeParams(self.op.beparams)
6712 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6714 def ExpandNames(self):
6715 self._ExpandAndLockInstance()
6716 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6718 def DeclareLocks(self, level):
6719 if level == locking.LEVEL_NODE_RES:
6720 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6722 def BuildHooksEnv(self):
6725 This runs on master, primary and secondary nodes of the instance.
6729 "FORCE": self.op.force,
6732 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6736 def BuildHooksNodes(self):
6737 """Build hooks nodes.
6740 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6743 def CheckPrereq(self):
6744 """Check prerequisites.
6746 This checks that the instance is in the cluster.
6749 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6750 assert self.instance is not None, \
6751 "Cannot retrieve locked instance %s" % self.op.instance_name
6754 if self.op.hvparams:
6755 # check hypervisor parameter syntax (locally)
6756 cluster = self.cfg.GetClusterInfo()
6757 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6758 filled_hvp = cluster.FillHV(instance)
6759 filled_hvp.update(self.op.hvparams)
6760 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6761 hv_type.CheckParameterSyntax(filled_hvp)
6762 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6764 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6766 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6768 if self.primary_offline and self.op.ignore_offline_nodes:
6769 self.proc.LogWarning("Ignoring offline primary node")
6771 if self.op.hvparams or self.op.beparams:
6772 self.proc.LogWarning("Overridden parameters are ignored")
6774 _CheckNodeOnline(self, instance.primary_node)
6776 bep = self.cfg.GetClusterInfo().FillBE(instance)
6777 bep.update(self.op.beparams)
6779 # check bridges existence
6780 _CheckInstanceBridgesExist(self, instance)
6782 remote_info = self.rpc.call_instance_info(instance.primary_node,
6784 instance.hypervisor)
6785 remote_info.Raise("Error checking node %s" % instance.primary_node,
6786 prereq=True, ecode=errors.ECODE_ENVIRON)
6787 if not remote_info.payload: # not running already
6788 _CheckNodeFreeMemory(self, instance.primary_node,
6789 "starting instance %s" % instance.name,
6790 bep[constants.BE_MINMEM], instance.hypervisor)
6792 def Exec(self, feedback_fn):
6793 """Start the instance.
6796 instance = self.instance
6797 force = self.op.force
6799 if not self.op.no_remember:
6800 self.cfg.MarkInstanceUp(instance.name)
6802 if self.primary_offline:
6803 assert self.op.ignore_offline_nodes
6804 self.proc.LogInfo("Primary node offline, marked instance as started")
6806 node_current = instance.primary_node
6808 _StartInstanceDisks(self, instance, force)
6811 self.rpc.call_instance_start(node_current,
6812 (instance, self.op.hvparams,
6814 self.op.startup_paused)
6815 msg = result.fail_msg
6817 _ShutdownInstanceDisks(self, instance)
6818 raise errors.OpExecError("Could not start instance: %s" % msg)
6821 class LUInstanceReboot(LogicalUnit):
6822 """Reboot an instance.
6825 HPATH = "instance-reboot"
6826 HTYPE = constants.HTYPE_INSTANCE
6829 def ExpandNames(self):
6830 self._ExpandAndLockInstance()
6832 def BuildHooksEnv(self):
6835 This runs on master, primary and secondary nodes of the instance.
6839 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6840 "REBOOT_TYPE": self.op.reboot_type,
6841 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6844 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6848 def BuildHooksNodes(self):
6849 """Build hooks nodes.
6852 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6855 def CheckPrereq(self):
6856 """Check prerequisites.
6858 This checks that the instance is in the cluster.
6861 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6862 assert self.instance is not None, \
6863 "Cannot retrieve locked instance %s" % self.op.instance_name
6864 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6865 _CheckNodeOnline(self, instance.primary_node)
6867 # check bridges existence
6868 _CheckInstanceBridgesExist(self, instance)
6870 def Exec(self, feedback_fn):
6871 """Reboot the instance.
6874 instance = self.instance
6875 ignore_secondaries = self.op.ignore_secondaries
6876 reboot_type = self.op.reboot_type
6878 remote_info = self.rpc.call_instance_info(instance.primary_node,
6880 instance.hypervisor)
6881 remote_info.Raise("Error checking node %s" % instance.primary_node)
6882 instance_running = bool(remote_info.payload)
6884 node_current = instance.primary_node
6886 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6887 constants.INSTANCE_REBOOT_HARD]:
6888 for disk in instance.disks:
6889 self.cfg.SetDiskID(disk, node_current)
6890 result = self.rpc.call_instance_reboot(node_current, instance,
6892 self.op.shutdown_timeout)
6893 result.Raise("Could not reboot instance")
6895 if instance_running:
6896 result = self.rpc.call_instance_shutdown(node_current, instance,
6897 self.op.shutdown_timeout)
6898 result.Raise("Could not shutdown instance for full reboot")
6899 _ShutdownInstanceDisks(self, instance)
6901 self.LogInfo("Instance %s was already stopped, starting now",
6903 _StartInstanceDisks(self, instance, ignore_secondaries)
6904 result = self.rpc.call_instance_start(node_current,
6905 (instance, None, None), False)
6906 msg = result.fail_msg
6908 _ShutdownInstanceDisks(self, instance)
6909 raise errors.OpExecError("Could not start instance for"
6910 " full reboot: %s" % msg)
6912 self.cfg.MarkInstanceUp(instance.name)
6915 class LUInstanceShutdown(LogicalUnit):
6916 """Shutdown an instance.
6919 HPATH = "instance-stop"
6920 HTYPE = constants.HTYPE_INSTANCE
6923 def ExpandNames(self):
6924 self._ExpandAndLockInstance()
6926 def BuildHooksEnv(self):
6929 This runs on master, primary and secondary nodes of the instance.
6932 env = _BuildInstanceHookEnvByObject(self, self.instance)
6933 env["TIMEOUT"] = self.op.timeout
6936 def BuildHooksNodes(self):
6937 """Build hooks nodes.
6940 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6943 def CheckPrereq(self):
6944 """Check prerequisites.
6946 This checks that the instance is in the cluster.
6949 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6950 assert self.instance is not None, \
6951 "Cannot retrieve locked instance %s" % self.op.instance_name
6953 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6955 self.primary_offline = \
6956 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6958 if self.primary_offline and self.op.ignore_offline_nodes:
6959 self.proc.LogWarning("Ignoring offline primary node")
6961 _CheckNodeOnline(self, self.instance.primary_node)
6963 def Exec(self, feedback_fn):
6964 """Shutdown the instance.
6967 instance = self.instance
6968 node_current = instance.primary_node
6969 timeout = self.op.timeout
6971 if not self.op.no_remember:
6972 self.cfg.MarkInstanceDown(instance.name)
6974 if self.primary_offline:
6975 assert self.op.ignore_offline_nodes
6976 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6978 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6979 msg = result.fail_msg
6981 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6983 _ShutdownInstanceDisks(self, instance)
6986 class LUInstanceReinstall(LogicalUnit):
6987 """Reinstall an instance.
6990 HPATH = "instance-reinstall"
6991 HTYPE = constants.HTYPE_INSTANCE
6994 def ExpandNames(self):
6995 self._ExpandAndLockInstance()
6997 def BuildHooksEnv(self):
7000 This runs on master, primary and secondary nodes of the instance.
7003 return _BuildInstanceHookEnvByObject(self, self.instance)
7005 def BuildHooksNodes(self):
7006 """Build hooks nodes.
7009 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7012 def CheckPrereq(self):
7013 """Check prerequisites.
7015 This checks that the instance is in the cluster and is not running.
7018 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7019 assert instance is not None, \
7020 "Cannot retrieve locked instance %s" % self.op.instance_name
7021 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7022 " offline, cannot reinstall")
7024 if instance.disk_template == constants.DT_DISKLESS:
7025 raise errors.OpPrereqError("Instance '%s' has no disks" %
7026 self.op.instance_name,
7028 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7030 if self.op.os_type is not None:
7032 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7033 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7034 instance_os = self.op.os_type
7036 instance_os = instance.os
7038 nodelist = list(instance.all_nodes)
7040 if self.op.osparams:
7041 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7042 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7043 self.os_inst = i_osdict # the new dict (without defaults)
7047 self.instance = instance
7049 def Exec(self, feedback_fn):
7050 """Reinstall the instance.
7053 inst = self.instance
7055 if self.op.os_type is not None:
7056 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7057 inst.os = self.op.os_type
7058 # Write to configuration
7059 self.cfg.Update(inst, feedback_fn)
7061 _StartInstanceDisks(self, inst, None)
7063 feedback_fn("Running the instance OS create scripts...")
7064 # FIXME: pass debug option from opcode to backend
7065 result = self.rpc.call_instance_os_add(inst.primary_node,
7066 (inst, self.os_inst), True,
7067 self.op.debug_level)
7068 result.Raise("Could not install OS for instance %s on node %s" %
7069 (inst.name, inst.primary_node))
7071 _ShutdownInstanceDisks(self, inst)
7074 class LUInstanceRecreateDisks(LogicalUnit):
7075 """Recreate an instance's missing disks.
7078 HPATH = "instance-recreate-disks"
7079 HTYPE = constants.HTYPE_INSTANCE
7082 _MODIFYABLE = frozenset([
7083 constants.IDISK_SIZE,
7084 constants.IDISK_MODE,
7087 # New or changed disk parameters may have different semantics
7088 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7089 constants.IDISK_ADOPT,
7091 # TODO: Implement support changing VG while recreating
7093 constants.IDISK_METAVG,
7096 def _RunAllocator(self):
7097 """Run the allocator based on input opcode.
7100 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7103 # The allocator should actually run in "relocate" mode, but current
7104 # allocators don't support relocating all the nodes of an instance at
7105 # the same time. As a workaround we use "allocate" mode, but this is
7106 # suboptimal for two reasons:
7107 # - The instance name passed to the allocator is present in the list of
7108 # existing instances, so there could be a conflict within the
7109 # internal structures of the allocator. This doesn't happen with the
7110 # current allocators, but it's a liability.
7111 # - The allocator counts the resources used by the instance twice: once
7112 # because the instance exists already, and once because it tries to
7113 # allocate a new instance.
7114 # The allocator could choose some of the nodes on which the instance is
7115 # running, but that's not a problem. If the instance nodes are broken,
7116 # they should be already be marked as drained or offline, and hence
7117 # skipped by the allocator. If instance disks have been lost for other
7118 # reasons, then recreating the disks on the same nodes should be fine.
7119 ial = IAllocator(self.cfg, self.rpc,
7120 mode=constants.IALLOCATOR_MODE_ALLOC,
7121 name=self.op.instance_name,
7122 disk_template=self.instance.disk_template,
7123 tags=list(self.instance.GetTags()),
7124 os=self.instance.os,
7126 vcpus=be_full[constants.BE_VCPUS],
7127 memory=be_full[constants.BE_MAXMEM],
7128 spindle_use=be_full[constants.BE_SPINDLE_USE],
7129 disks=[{constants.IDISK_SIZE: d.size,
7130 constants.IDISK_MODE: d.mode}
7131 for d in self.instance.disks],
7132 hypervisor=self.instance.hypervisor)
7134 assert ial.required_nodes == len(self.instance.all_nodes)
7136 ial.Run(self.op.iallocator)
7139 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7140 " %s" % (self.op.iallocator, ial.info),
7143 if len(ial.result) != ial.required_nodes:
7144 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7145 " of nodes (%s), required %s" %
7146 (self.op.iallocator, len(ial.result),
7147 ial.required_nodes), errors.ECODE_FAULT)
7149 self.op.nodes = ial.result
7150 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7151 self.op.instance_name, self.op.iallocator,
7152 utils.CommaJoin(ial.result))
7154 def CheckArguments(self):
7155 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7156 # Normalize and convert deprecated list of disk indices
7157 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7159 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7161 raise errors.OpPrereqError("Some disks have been specified more than"
7162 " once: %s" % utils.CommaJoin(duplicates),
7165 if self.op.iallocator and self.op.nodes:
7166 raise errors.OpPrereqError("Give either the iallocator or the new"
7167 " nodes, not both", errors.ECODE_INVAL)
7169 for (idx, params) in self.op.disks:
7170 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7171 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7173 raise errors.OpPrereqError("Parameters for disk %s try to change"
7174 " unmodifyable parameter(s): %s" %
7175 (idx, utils.CommaJoin(unsupported)),
7178 def ExpandNames(self):
7179 self._ExpandAndLockInstance()
7180 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7182 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7183 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7185 self.needed_locks[locking.LEVEL_NODE] = []
7186 if self.op.iallocator:
7187 # iallocator will select a new node in the same group
7188 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7189 self.needed_locks[locking.LEVEL_NODE_RES] = []
7191 def DeclareLocks(self, level):
7192 if level == locking.LEVEL_NODEGROUP:
7193 assert self.op.iallocator is not None
7194 assert not self.op.nodes
7195 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7196 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7197 # Lock the primary group used by the instance optimistically; this
7198 # requires going via the node before it's locked, requiring
7199 # verification later on
7200 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7201 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7203 elif level == locking.LEVEL_NODE:
7204 # If an allocator is used, then we lock all the nodes in the current
7205 # instance group, as we don't know yet which ones will be selected;
7206 # if we replace the nodes without using an allocator, we only need to
7207 # lock the old primary for doing RPCs (FIXME: we don't lock nodes for
7208 # RPC anymore), otherwise we need to lock all the instance nodes for
7210 if self.op.iallocator:
7211 assert not self.op.nodes
7212 assert not self.needed_locks[locking.LEVEL_NODE]
7213 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7215 # Lock member nodes of the group of the primary node
7216 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7217 self.needed_locks[locking.LEVEL_NODE].extend(
7218 self.cfg.GetNodeGroup(group_uuid).members)
7220 primary_only = bool(self.op.nodes)
7221 self._LockInstancesNodes(primary_only=primary_only)
7222 elif level == locking.LEVEL_NODE_RES:
7224 self.needed_locks[locking.LEVEL_NODE_RES] = \
7225 self.needed_locks[locking.LEVEL_NODE][:]
7227 def BuildHooksEnv(self):
7230 This runs on master, primary and secondary nodes of the instance.
7233 return _BuildInstanceHookEnvByObject(self, self.instance)
7235 def BuildHooksNodes(self):
7236 """Build hooks nodes.
7239 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7242 def CheckPrereq(self):
7243 """Check prerequisites.
7245 This checks that the instance is in the cluster and is not running.
7248 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7249 assert instance is not None, \
7250 "Cannot retrieve locked instance %s" % self.op.instance_name
7252 if len(self.op.nodes) != len(instance.all_nodes):
7253 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7254 " %d replacement nodes were specified" %
7255 (instance.name, len(instance.all_nodes),
7256 len(self.op.nodes)),
7258 assert instance.disk_template != constants.DT_DRBD8 or \
7259 len(self.op.nodes) == 2
7260 assert instance.disk_template != constants.DT_PLAIN or \
7261 len(self.op.nodes) == 1
7262 primary_node = self.op.nodes[0]
7264 primary_node = instance.primary_node
7265 if not self.op.iallocator:
7266 _CheckNodeOnline(self, primary_node)
7268 if instance.disk_template == constants.DT_DISKLESS:
7269 raise errors.OpPrereqError("Instance '%s' has no disks" %
7270 self.op.instance_name, errors.ECODE_INVAL)
7272 # Verify if node group locks are still correct
7273 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7275 # Node group locks are acquired only for the primary node (and only
7276 # when the allocator is used)
7277 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7280 # if we replace nodes *and* the old primary is offline, we don't
7282 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7283 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7284 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7285 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7286 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7287 msg="cannot recreate disks")
7290 self.disks = dict(self.op.disks)
7292 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7294 maxidx = max(self.disks.keys())
7295 if maxidx >= len(instance.disks):
7296 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7299 if ((self.op.nodes or self.op.iallocator) and
7300 sorted(self.disks.keys()) != range(len(instance.disks))):
7301 raise errors.OpPrereqError("Can't recreate disks partially and"
7302 " change the nodes at the same time",
7305 self.instance = instance
7307 if self.op.iallocator:
7308 self._RunAllocator()
7310 # Release unneeded node and node resource locks
7311 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7312 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7314 def Exec(self, feedback_fn):
7315 """Recreate the disks.
7318 instance = self.instance
7320 assert (self.owned_locks(locking.LEVEL_NODE) ==
7321 self.owned_locks(locking.LEVEL_NODE_RES))
7324 mods = [] # keeps track of needed changes
7326 for idx, disk in enumerate(instance.disks):
7328 changes = self.disks[idx]
7330 # Disk should not be recreated
7334 # update secondaries for disks, if needed
7335 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7336 # need to update the nodes and minors
7337 assert len(self.op.nodes) == 2
7338 assert len(disk.logical_id) == 6 # otherwise disk internals
7340 (_, _, old_port, _, _, old_secret) = disk.logical_id
7341 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7342 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7343 new_minors[0], new_minors[1], old_secret)
7344 assert len(disk.logical_id) == len(new_id)
7348 mods.append((idx, new_id, changes))
7350 # now that we have passed all asserts above, we can apply the mods
7351 # in a single run (to avoid partial changes)
7352 for idx, new_id, changes in mods:
7353 disk = instance.disks[idx]
7354 if new_id is not None:
7355 assert disk.dev_type == constants.LD_DRBD8
7356 disk.logical_id = new_id
7358 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7359 mode=changes.get(constants.IDISK_MODE, None))
7361 # change primary node, if needed
7363 instance.primary_node = self.op.nodes[0]
7364 self.LogWarning("Changing the instance's nodes, you will have to"
7365 " remove any disks left on the older nodes manually")
7368 self.cfg.Update(instance, feedback_fn)
7370 _CreateDisks(self, instance, to_skip=to_skip)
7373 class LUInstanceRename(LogicalUnit):
7374 """Rename an instance.
7377 HPATH = "instance-rename"
7378 HTYPE = constants.HTYPE_INSTANCE
7380 def CheckArguments(self):
7384 if self.op.ip_check and not self.op.name_check:
7385 # TODO: make the ip check more flexible and not depend on the name check
7386 raise errors.OpPrereqError("IP address check requires a name check",
7389 def BuildHooksEnv(self):
7392 This runs on master, primary and secondary nodes of the instance.
7395 env = _BuildInstanceHookEnvByObject(self, self.instance)
7396 env["INSTANCE_NEW_NAME"] = self.op.new_name
7399 def BuildHooksNodes(self):
7400 """Build hooks nodes.
7403 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7406 def CheckPrereq(self):
7407 """Check prerequisites.
7409 This checks that the instance is in the cluster and is not running.
7412 self.op.instance_name = _ExpandInstanceName(self.cfg,
7413 self.op.instance_name)
7414 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7415 assert instance is not None
7416 _CheckNodeOnline(self, instance.primary_node)
7417 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7418 msg="cannot rename")
7419 self.instance = instance
7421 new_name = self.op.new_name
7422 if self.op.name_check:
7423 hostname = netutils.GetHostname(name=new_name)
7424 if hostname.name != new_name:
7425 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7427 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7428 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7429 " same as given hostname '%s'") %
7430 (hostname.name, self.op.new_name),
7432 new_name = self.op.new_name = hostname.name
7433 if (self.op.ip_check and
7434 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7435 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7436 (hostname.ip, new_name),
7437 errors.ECODE_NOTUNIQUE)
7439 instance_list = self.cfg.GetInstanceList()
7440 if new_name in instance_list and new_name != instance.name:
7441 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7442 new_name, errors.ECODE_EXISTS)
7444 def Exec(self, feedback_fn):
7445 """Rename the instance.
7448 inst = self.instance
7449 old_name = inst.name
7451 rename_file_storage = False
7452 if (inst.disk_template in constants.DTS_FILEBASED and
7453 self.op.new_name != inst.name):
7454 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7455 rename_file_storage = True
7457 self.cfg.RenameInstance(inst.name, self.op.new_name)
7458 # Change the instance lock. This is definitely safe while we hold the BGL.
7459 # Otherwise the new lock would have to be added in acquired mode.
7461 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7462 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7464 # re-read the instance from the configuration after rename
7465 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7467 if rename_file_storage:
7468 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7469 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7470 old_file_storage_dir,
7471 new_file_storage_dir)
7472 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7473 " (but the instance has been renamed in Ganeti)" %
7474 (inst.primary_node, old_file_storage_dir,
7475 new_file_storage_dir))
7477 _StartInstanceDisks(self, inst, None)
7479 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7480 old_name, self.op.debug_level)
7481 msg = result.fail_msg
7483 msg = ("Could not run OS rename script for instance %s on node %s"
7484 " (but the instance has been renamed in Ganeti): %s" %
7485 (inst.name, inst.primary_node, msg))
7486 self.proc.LogWarning(msg)
7488 _ShutdownInstanceDisks(self, inst)
7493 class LUInstanceRemove(LogicalUnit):
7494 """Remove an instance.
7497 HPATH = "instance-remove"
7498 HTYPE = constants.HTYPE_INSTANCE
7501 def ExpandNames(self):
7502 self._ExpandAndLockInstance()
7503 self.needed_locks[locking.LEVEL_NODE] = []
7504 self.needed_locks[locking.LEVEL_NODE_RES] = []
7505 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7507 def DeclareLocks(self, level):
7508 if level == locking.LEVEL_NODE:
7509 self._LockInstancesNodes()
7510 elif level == locking.LEVEL_NODE_RES:
7512 self.needed_locks[locking.LEVEL_NODE_RES] = \
7513 self.needed_locks[locking.LEVEL_NODE][:]
7515 def BuildHooksEnv(self):
7518 This runs on master, primary and secondary nodes of the instance.
7521 env = _BuildInstanceHookEnvByObject(self, self.instance)
7522 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7525 def BuildHooksNodes(self):
7526 """Build hooks nodes.
7529 nl = [self.cfg.GetMasterNode()]
7530 nl_post = list(self.instance.all_nodes) + nl
7531 return (nl, nl_post)
7533 def CheckPrereq(self):
7534 """Check prerequisites.
7536 This checks that the instance is in the cluster.
7539 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7540 assert self.instance is not None, \
7541 "Cannot retrieve locked instance %s" % self.op.instance_name
7543 def Exec(self, feedback_fn):
7544 """Remove the instance.
7547 instance = self.instance
7548 logging.info("Shutting down instance %s on node %s",
7549 instance.name, instance.primary_node)
7551 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7552 self.op.shutdown_timeout)
7553 msg = result.fail_msg
7555 if self.op.ignore_failures:
7556 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7558 raise errors.OpExecError("Could not shutdown instance %s on"
7560 (instance.name, instance.primary_node, msg))
7562 assert (self.owned_locks(locking.LEVEL_NODE) ==
7563 self.owned_locks(locking.LEVEL_NODE_RES))
7564 assert not (set(instance.all_nodes) -
7565 self.owned_locks(locking.LEVEL_NODE)), \
7566 "Not owning correct locks"
7568 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7571 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7572 """Utility function to remove an instance.
7575 logging.info("Removing block devices for instance %s", instance.name)
7577 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7578 if not ignore_failures:
7579 raise errors.OpExecError("Can't remove instance's disks")
7580 feedback_fn("Warning: can't remove instance's disks")
7582 logging.info("Removing instance %s out of cluster config", instance.name)
7584 lu.cfg.RemoveInstance(instance.name)
7586 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7587 "Instance lock removal conflict"
7589 # Remove lock for the instance
7590 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7593 class LUInstanceQuery(NoHooksLU):
7594 """Logical unit for querying instances.
7597 # pylint: disable=W0142
7600 def CheckArguments(self):
7601 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7602 self.op.output_fields, self.op.use_locking)
7604 def ExpandNames(self):
7605 self.iq.ExpandNames(self)
7607 def DeclareLocks(self, level):
7608 self.iq.DeclareLocks(self, level)
7610 def Exec(self, feedback_fn):
7611 return self.iq.OldStyleQuery(self)
7614 class LUInstanceFailover(LogicalUnit):
7615 """Failover an instance.
7618 HPATH = "instance-failover"
7619 HTYPE = constants.HTYPE_INSTANCE
7622 def CheckArguments(self):
7623 """Check the arguments.
7626 self.iallocator = getattr(self.op, "iallocator", None)
7627 self.target_node = getattr(self.op, "target_node", None)
7629 def ExpandNames(self):
7630 self._ExpandAndLockInstance()
7632 if self.op.target_node is not None:
7633 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7635 self.needed_locks[locking.LEVEL_NODE] = []
7636 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7638 self.needed_locks[locking.LEVEL_NODE_RES] = []
7639 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7641 ignore_consistency = self.op.ignore_consistency
7642 shutdown_timeout = self.op.shutdown_timeout
7643 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7646 ignore_consistency=ignore_consistency,
7647 shutdown_timeout=shutdown_timeout,
7648 ignore_ipolicy=self.op.ignore_ipolicy)
7649 self.tasklets = [self._migrater]
7651 def DeclareLocks(self, level):
7652 if level == locking.LEVEL_NODE:
7653 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7654 if instance.disk_template in constants.DTS_EXT_MIRROR:
7655 if self.op.target_node is None:
7656 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7658 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7659 self.op.target_node]
7660 del self.recalculate_locks[locking.LEVEL_NODE]
7662 self._LockInstancesNodes()
7663 elif level == locking.LEVEL_NODE_RES:
7665 self.needed_locks[locking.LEVEL_NODE_RES] = \
7666 self.needed_locks[locking.LEVEL_NODE][:]
7668 def BuildHooksEnv(self):
7671 This runs on master, primary and secondary nodes of the instance.
7674 instance = self._migrater.instance
7675 source_node = instance.primary_node
7676 target_node = self.op.target_node
7678 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7679 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7680 "OLD_PRIMARY": source_node,
7681 "NEW_PRIMARY": target_node,
7684 if instance.disk_template in constants.DTS_INT_MIRROR:
7685 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7686 env["NEW_SECONDARY"] = source_node
7688 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7690 env.update(_BuildInstanceHookEnvByObject(self, instance))
7694 def BuildHooksNodes(self):
7695 """Build hooks nodes.
7698 instance = self._migrater.instance
7699 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7700 return (nl, nl + [instance.primary_node])
7703 class LUInstanceMigrate(LogicalUnit):
7704 """Migrate an instance.
7706 This is migration without shutting down, compared to the failover,
7707 which is done with shutdown.
7710 HPATH = "instance-migrate"
7711 HTYPE = constants.HTYPE_INSTANCE
7714 def ExpandNames(self):
7715 self._ExpandAndLockInstance()
7717 if self.op.target_node is not None:
7718 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7720 self.needed_locks[locking.LEVEL_NODE] = []
7721 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7723 self.needed_locks[locking.LEVEL_NODE] = []
7724 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7727 TLMigrateInstance(self, self.op.instance_name,
7728 cleanup=self.op.cleanup,
7730 fallback=self.op.allow_failover,
7731 allow_runtime_changes=self.op.allow_runtime_changes,
7732 ignore_ipolicy=self.op.ignore_ipolicy)
7733 self.tasklets = [self._migrater]
7735 def DeclareLocks(self, level):
7736 if level == locking.LEVEL_NODE:
7737 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7738 if instance.disk_template in constants.DTS_EXT_MIRROR:
7739 if self.op.target_node is None:
7740 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7742 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7743 self.op.target_node]
7744 del self.recalculate_locks[locking.LEVEL_NODE]
7746 self._LockInstancesNodes()
7747 elif level == locking.LEVEL_NODE_RES:
7749 self.needed_locks[locking.LEVEL_NODE_RES] = \
7750 self.needed_locks[locking.LEVEL_NODE][:]
7752 def BuildHooksEnv(self):
7755 This runs on master, primary and secondary nodes of the instance.
7758 instance = self._migrater.instance
7759 source_node = instance.primary_node
7760 target_node = self.op.target_node
7761 env = _BuildInstanceHookEnvByObject(self, instance)
7763 "MIGRATE_LIVE": self._migrater.live,
7764 "MIGRATE_CLEANUP": self.op.cleanup,
7765 "OLD_PRIMARY": source_node,
7766 "NEW_PRIMARY": target_node,
7767 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7770 if instance.disk_template in constants.DTS_INT_MIRROR:
7771 env["OLD_SECONDARY"] = target_node
7772 env["NEW_SECONDARY"] = source_node
7774 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7778 def BuildHooksNodes(self):
7779 """Build hooks nodes.
7782 instance = self._migrater.instance
7783 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7784 return (nl, nl + [instance.primary_node])
7787 class LUInstanceMove(LogicalUnit):
7788 """Move an instance by data-copying.
7791 HPATH = "instance-move"
7792 HTYPE = constants.HTYPE_INSTANCE
7795 def ExpandNames(self):
7796 self._ExpandAndLockInstance()
7797 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7798 self.op.target_node = target_node
7799 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7800 self.needed_locks[locking.LEVEL_NODE_RES] = []
7801 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7803 def DeclareLocks(self, level):
7804 if level == locking.LEVEL_NODE:
7805 self._LockInstancesNodes(primary_only=True)
7806 elif level == locking.LEVEL_NODE_RES:
7808 self.needed_locks[locking.LEVEL_NODE_RES] = \
7809 self.needed_locks[locking.LEVEL_NODE][:]
7811 def BuildHooksEnv(self):
7814 This runs on master, primary and secondary nodes of the instance.
7818 "TARGET_NODE": self.op.target_node,
7819 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7821 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7824 def BuildHooksNodes(self):
7825 """Build hooks nodes.
7829 self.cfg.GetMasterNode(),
7830 self.instance.primary_node,
7831 self.op.target_node,
7835 def CheckPrereq(self):
7836 """Check prerequisites.
7838 This checks that the instance is in the cluster.
7841 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7842 assert self.instance is not None, \
7843 "Cannot retrieve locked instance %s" % self.op.instance_name
7845 node = self.cfg.GetNodeInfo(self.op.target_node)
7846 assert node is not None, \
7847 "Cannot retrieve locked node %s" % self.op.target_node
7849 self.target_node = target_node = node.name
7851 if target_node == instance.primary_node:
7852 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7853 (instance.name, target_node),
7856 bep = self.cfg.GetClusterInfo().FillBE(instance)
7858 for idx, dsk in enumerate(instance.disks):
7859 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7860 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7861 " cannot copy" % idx, errors.ECODE_STATE)
7863 _CheckNodeOnline(self, target_node)
7864 _CheckNodeNotDrained(self, target_node)
7865 _CheckNodeVmCapable(self, target_node)
7866 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7867 self.cfg.GetNodeGroup(node.group))
7868 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7869 ignore=self.op.ignore_ipolicy)
7871 if instance.admin_state == constants.ADMINST_UP:
7872 # check memory requirements on the secondary node
7873 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7874 instance.name, bep[constants.BE_MAXMEM],
7875 instance.hypervisor)
7877 self.LogInfo("Not checking memory on the secondary node as"
7878 " instance will not be started")
7880 # check bridge existance
7881 _CheckInstanceBridgesExist(self, instance, node=target_node)
7883 def Exec(self, feedback_fn):
7884 """Move an instance.
7886 The move is done by shutting it down on its present node, copying
7887 the data over (slow) and starting it on the new node.
7890 instance = self.instance
7892 source_node = instance.primary_node
7893 target_node = self.target_node
7895 self.LogInfo("Shutting down instance %s on source node %s",
7896 instance.name, source_node)
7898 assert (self.owned_locks(locking.LEVEL_NODE) ==
7899 self.owned_locks(locking.LEVEL_NODE_RES))
7901 result = self.rpc.call_instance_shutdown(source_node, instance,
7902 self.op.shutdown_timeout)
7903 msg = result.fail_msg
7905 if self.op.ignore_consistency:
7906 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7907 " Proceeding anyway. Please make sure node"
7908 " %s is down. Error details: %s",
7909 instance.name, source_node, source_node, msg)
7911 raise errors.OpExecError("Could not shutdown instance %s on"
7913 (instance.name, source_node, msg))
7915 # create the target disks
7917 _CreateDisks(self, instance, target_node=target_node)
7918 except errors.OpExecError:
7919 self.LogWarning("Device creation failed, reverting...")
7921 _RemoveDisks(self, instance, target_node=target_node)
7923 self.cfg.ReleaseDRBDMinors(instance.name)
7926 cluster_name = self.cfg.GetClusterInfo().cluster_name
7929 # activate, get path, copy the data over
7930 for idx, disk in enumerate(instance.disks):
7931 self.LogInfo("Copying data for disk %d", idx)
7932 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7933 instance.name, True, idx)
7935 self.LogWarning("Can't assemble newly created disk %d: %s",
7936 idx, result.fail_msg)
7937 errs.append(result.fail_msg)
7939 dev_path = result.payload
7940 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7941 target_node, dev_path,
7944 self.LogWarning("Can't copy data over for disk %d: %s",
7945 idx, result.fail_msg)
7946 errs.append(result.fail_msg)
7950 self.LogWarning("Some disks failed to copy, aborting")
7952 _RemoveDisks(self, instance, target_node=target_node)
7954 self.cfg.ReleaseDRBDMinors(instance.name)
7955 raise errors.OpExecError("Errors during disk copy: %s" %
7958 instance.primary_node = target_node
7959 self.cfg.Update(instance, feedback_fn)
7961 self.LogInfo("Removing the disks on the original node")
7962 _RemoveDisks(self, instance, target_node=source_node)
7964 # Only start the instance if it's marked as up
7965 if instance.admin_state == constants.ADMINST_UP:
7966 self.LogInfo("Starting instance %s on node %s",
7967 instance.name, target_node)
7969 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7970 ignore_secondaries=True)
7972 _ShutdownInstanceDisks(self, instance)
7973 raise errors.OpExecError("Can't activate the instance's disks")
7975 result = self.rpc.call_instance_start(target_node,
7976 (instance, None, None), False)
7977 msg = result.fail_msg
7979 _ShutdownInstanceDisks(self, instance)
7980 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7981 (instance.name, target_node, msg))
7984 class LUNodeMigrate(LogicalUnit):
7985 """Migrate all instances from a node.
7988 HPATH = "node-migrate"
7989 HTYPE = constants.HTYPE_NODE
7992 def CheckArguments(self):
7995 def ExpandNames(self):
7996 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7998 self.share_locks = _ShareAll()
7999 self.needed_locks = {
8000 locking.LEVEL_NODE: [self.op.node_name],
8003 def BuildHooksEnv(self):
8006 This runs on the master, the primary and all the secondaries.
8010 "NODE_NAME": self.op.node_name,
8011 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8014 def BuildHooksNodes(self):
8015 """Build hooks nodes.
8018 nl = [self.cfg.GetMasterNode()]
8021 def CheckPrereq(self):
8024 def Exec(self, feedback_fn):
8025 # Prepare jobs for migration instances
8026 allow_runtime_changes = self.op.allow_runtime_changes
8028 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8031 iallocator=self.op.iallocator,
8032 target_node=self.op.target_node,
8033 allow_runtime_changes=allow_runtime_changes,
8034 ignore_ipolicy=self.op.ignore_ipolicy)]
8035 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8038 # TODO: Run iallocator in this opcode and pass correct placement options to
8039 # OpInstanceMigrate. Since other jobs can modify the cluster between
8040 # running the iallocator and the actual migration, a good consistency model
8041 # will have to be found.
8043 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8044 frozenset([self.op.node_name]))
8046 return ResultWithJobs(jobs)
8049 class TLMigrateInstance(Tasklet):
8050 """Tasklet class for instance migration.
8053 @ivar live: whether the migration will be done live or non-live;
8054 this variable is initalized only after CheckPrereq has run
8055 @type cleanup: boolean
8056 @ivar cleanup: Wheater we cleanup from a failed migration
8057 @type iallocator: string
8058 @ivar iallocator: The iallocator used to determine target_node
8059 @type target_node: string
8060 @ivar target_node: If given, the target_node to reallocate the instance to
8061 @type failover: boolean
8062 @ivar failover: Whether operation results in failover or migration
8063 @type fallback: boolean
8064 @ivar fallback: Whether fallback to failover is allowed if migration not
8066 @type ignore_consistency: boolean
8067 @ivar ignore_consistency: Wheter we should ignore consistency between source
8069 @type shutdown_timeout: int
8070 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8071 @type ignore_ipolicy: bool
8072 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8077 _MIGRATION_POLL_INTERVAL = 1 # seconds
8078 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8080 def __init__(self, lu, instance_name, cleanup=False,
8081 failover=False, fallback=False,
8082 ignore_consistency=False,
8083 allow_runtime_changes=True,
8084 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8085 ignore_ipolicy=False):
8086 """Initializes this class.
8089 Tasklet.__init__(self, lu)
8092 self.instance_name = instance_name
8093 self.cleanup = cleanup
8094 self.live = False # will be overridden later
8095 self.failover = failover
8096 self.fallback = fallback
8097 self.ignore_consistency = ignore_consistency
8098 self.shutdown_timeout = shutdown_timeout
8099 self.ignore_ipolicy = ignore_ipolicy
8100 self.allow_runtime_changes = allow_runtime_changes
8102 def CheckPrereq(self):
8103 """Check prerequisites.
8105 This checks that the instance is in the cluster.
8108 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8109 instance = self.cfg.GetInstanceInfo(instance_name)
8110 assert instance is not None
8111 self.instance = instance
8112 cluster = self.cfg.GetClusterInfo()
8114 if (not self.cleanup and
8115 not instance.admin_state == constants.ADMINST_UP and
8116 not self.failover and self.fallback):
8117 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8118 " switching to failover")
8119 self.failover = True
8121 if instance.disk_template not in constants.DTS_MIRRORED:
8126 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8127 " %s" % (instance.disk_template, text),
8130 if instance.disk_template in constants.DTS_EXT_MIRROR:
8131 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8133 if self.lu.op.iallocator:
8134 self._RunAllocator()
8136 # We set set self.target_node as it is required by
8138 self.target_node = self.lu.op.target_node
8140 # Check that the target node is correct in terms of instance policy
8141 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8142 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8143 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8144 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8145 ignore=self.ignore_ipolicy)
8147 # self.target_node is already populated, either directly or by the
8149 target_node = self.target_node
8150 if self.target_node == instance.primary_node:
8151 raise errors.OpPrereqError("Cannot migrate instance %s"
8152 " to its primary (%s)" %
8153 (instance.name, instance.primary_node),
8156 if len(self.lu.tasklets) == 1:
8157 # It is safe to release locks only when we're the only tasklet
8159 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8160 keep=[instance.primary_node, self.target_node])
8163 secondary_nodes = instance.secondary_nodes
8164 if not secondary_nodes:
8165 raise errors.ConfigurationError("No secondary node but using"
8166 " %s disk template" %
8167 instance.disk_template)
8168 target_node = secondary_nodes[0]
8169 if self.lu.op.iallocator or (self.lu.op.target_node and
8170 self.lu.op.target_node != target_node):
8172 text = "failed over"
8175 raise errors.OpPrereqError("Instances with disk template %s cannot"
8176 " be %s to arbitrary nodes"
8177 " (neither an iallocator nor a target"
8178 " node can be passed)" %
8179 (instance.disk_template, text),
8181 nodeinfo = self.cfg.GetNodeInfo(target_node)
8182 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8183 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8184 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8185 ignore=self.ignore_ipolicy)
8187 i_be = cluster.FillBE(instance)
8189 # check memory requirements on the secondary node
8190 if (not self.cleanup and
8191 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8192 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8193 "migrating instance %s" %
8195 i_be[constants.BE_MINMEM],
8196 instance.hypervisor)
8198 self.lu.LogInfo("Not checking memory on the secondary node as"
8199 " instance will not be started")
8201 # check if failover must be forced instead of migration
8202 if (not self.cleanup and not self.failover and
8203 i_be[constants.BE_ALWAYS_FAILOVER]):
8205 self.lu.LogInfo("Instance configured to always failover; fallback"
8207 self.failover = True
8209 raise errors.OpPrereqError("This instance has been configured to"
8210 " always failover, please allow failover",
8213 # check bridge existance
8214 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8216 if not self.cleanup:
8217 _CheckNodeNotDrained(self.lu, target_node)
8218 if not self.failover:
8219 result = self.rpc.call_instance_migratable(instance.primary_node,
8221 if result.fail_msg and self.fallback:
8222 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8224 self.failover = True
8226 result.Raise("Can't migrate, please use failover",
8227 prereq=True, ecode=errors.ECODE_STATE)
8229 assert not (self.failover and self.cleanup)
8231 if not self.failover:
8232 if self.lu.op.live is not None and self.lu.op.mode is not None:
8233 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8234 " parameters are accepted",
8236 if self.lu.op.live is not None:
8238 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8240 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8241 # reset the 'live' parameter to None so that repeated
8242 # invocations of CheckPrereq do not raise an exception
8243 self.lu.op.live = None
8244 elif self.lu.op.mode is None:
8245 # read the default value from the hypervisor
8246 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8247 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8249 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8251 # Failover is never live
8254 if not (self.failover or self.cleanup):
8255 remote_info = self.rpc.call_instance_info(instance.primary_node,
8257 instance.hypervisor)
8258 remote_info.Raise("Error checking instance on node %s" %
8259 instance.primary_node)
8260 instance_running = bool(remote_info.payload)
8261 if instance_running:
8262 self.current_mem = int(remote_info.payload["memory"])
8264 def _RunAllocator(self):
8265 """Run the allocator based on input opcode.
8268 # FIXME: add a self.ignore_ipolicy option
8269 ial = IAllocator(self.cfg, self.rpc,
8270 mode=constants.IALLOCATOR_MODE_RELOC,
8271 name=self.instance_name,
8272 relocate_from=[self.instance.primary_node],
8275 ial.Run(self.lu.op.iallocator)
8278 raise errors.OpPrereqError("Can't compute nodes using"
8279 " iallocator '%s': %s" %
8280 (self.lu.op.iallocator, ial.info),
8282 if len(ial.result) != ial.required_nodes:
8283 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8284 " of nodes (%s), required %s" %
8285 (self.lu.op.iallocator, len(ial.result),
8286 ial.required_nodes), errors.ECODE_FAULT)
8287 self.target_node = ial.result[0]
8288 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8289 self.instance_name, self.lu.op.iallocator,
8290 utils.CommaJoin(ial.result))
8292 def _WaitUntilSync(self):
8293 """Poll with custom rpc for disk sync.
8295 This uses our own step-based rpc call.
8298 self.feedback_fn("* wait until resync is done")
8302 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8304 (self.instance.disks,
8307 for node, nres in result.items():
8308 nres.Raise("Cannot resync disks on node %s" % node)
8309 node_done, node_percent = nres.payload
8310 all_done = all_done and node_done
8311 if node_percent is not None:
8312 min_percent = min(min_percent, node_percent)
8314 if min_percent < 100:
8315 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8318 def _EnsureSecondary(self, node):
8319 """Demote a node to secondary.
8322 self.feedback_fn("* switching node %s to secondary mode" % node)
8324 for dev in self.instance.disks:
8325 self.cfg.SetDiskID(dev, node)
8327 result = self.rpc.call_blockdev_close(node, self.instance.name,
8328 self.instance.disks)
8329 result.Raise("Cannot change disk to secondary on node %s" % node)
8331 def _GoStandalone(self):
8332 """Disconnect from the network.
8335 self.feedback_fn("* changing into standalone mode")
8336 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8337 self.instance.disks)
8338 for node, nres in result.items():
8339 nres.Raise("Cannot disconnect disks node %s" % node)
8341 def _GoReconnect(self, multimaster):
8342 """Reconnect to the network.
8348 msg = "single-master"
8349 self.feedback_fn("* changing disks into %s mode" % msg)
8350 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8351 (self.instance.disks, self.instance),
8352 self.instance.name, multimaster)
8353 for node, nres in result.items():
8354 nres.Raise("Cannot change disks config on node %s" % node)
8356 def _ExecCleanup(self):
8357 """Try to cleanup after a failed migration.
8359 The cleanup is done by:
8360 - check that the instance is running only on one node
8361 (and update the config if needed)
8362 - change disks on its secondary node to secondary
8363 - wait until disks are fully synchronized
8364 - disconnect from the network
8365 - change disks into single-master mode
8366 - wait again until disks are fully synchronized
8369 instance = self.instance
8370 target_node = self.target_node
8371 source_node = self.source_node
8373 # check running on only one node
8374 self.feedback_fn("* checking where the instance actually runs"
8375 " (if this hangs, the hypervisor might be in"
8377 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8378 for node, result in ins_l.items():
8379 result.Raise("Can't contact node %s" % node)
8381 runningon_source = instance.name in ins_l[source_node].payload
8382 runningon_target = instance.name in ins_l[target_node].payload
8384 if runningon_source and runningon_target:
8385 raise errors.OpExecError("Instance seems to be running on two nodes,"
8386 " or the hypervisor is confused; you will have"
8387 " to ensure manually that it runs only on one"
8388 " and restart this operation")
8390 if not (runningon_source or runningon_target):
8391 raise errors.OpExecError("Instance does not seem to be running at all;"
8392 " in this case it's safer to repair by"
8393 " running 'gnt-instance stop' to ensure disk"
8394 " shutdown, and then restarting it")
8396 if runningon_target:
8397 # the migration has actually succeeded, we need to update the config
8398 self.feedback_fn("* instance running on secondary node (%s),"
8399 " updating config" % target_node)
8400 instance.primary_node = target_node
8401 self.cfg.Update(instance, self.feedback_fn)
8402 demoted_node = source_node
8404 self.feedback_fn("* instance confirmed to be running on its"
8405 " primary node (%s)" % source_node)
8406 demoted_node = target_node
8408 if instance.disk_template in constants.DTS_INT_MIRROR:
8409 self._EnsureSecondary(demoted_node)
8411 self._WaitUntilSync()
8412 except errors.OpExecError:
8413 # we ignore here errors, since if the device is standalone, it
8414 # won't be able to sync
8416 self._GoStandalone()
8417 self._GoReconnect(False)
8418 self._WaitUntilSync()
8420 self.feedback_fn("* done")
8422 def _RevertDiskStatus(self):
8423 """Try to revert the disk status after a failed migration.
8426 target_node = self.target_node
8427 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8431 self._EnsureSecondary(target_node)
8432 self._GoStandalone()
8433 self._GoReconnect(False)
8434 self._WaitUntilSync()
8435 except errors.OpExecError, err:
8436 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8437 " please try to recover the instance manually;"
8438 " error '%s'" % str(err))
8440 def _AbortMigration(self):
8441 """Call the hypervisor code to abort a started migration.
8444 instance = self.instance
8445 target_node = self.target_node
8446 source_node = self.source_node
8447 migration_info = self.migration_info
8449 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8453 abort_msg = abort_result.fail_msg
8455 logging.error("Aborting migration failed on target node %s: %s",
8456 target_node, abort_msg)
8457 # Don't raise an exception here, as we stil have to try to revert the
8458 # disk status, even if this step failed.
8460 abort_result = self.rpc.call_instance_finalize_migration_src(
8461 source_node, instance, False, self.live)
8462 abort_msg = abort_result.fail_msg
8464 logging.error("Aborting migration failed on source node %s: %s",
8465 source_node, abort_msg)
8467 def _ExecMigration(self):
8468 """Migrate an instance.
8470 The migrate is done by:
8471 - change the disks into dual-master mode
8472 - wait until disks are fully synchronized again
8473 - migrate the instance
8474 - change disks on the new secondary node (the old primary) to secondary
8475 - wait until disks are fully synchronized
8476 - change disks into single-master mode
8479 instance = self.instance
8480 target_node = self.target_node
8481 source_node = self.source_node
8483 # Check for hypervisor version mismatch and warn the user.
8484 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8485 None, [self.instance.hypervisor])
8486 for ninfo in nodeinfo.values():
8487 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8489 (_, _, (src_info, )) = nodeinfo[source_node].payload
8490 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8492 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8493 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8494 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8495 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8496 if src_version != dst_version:
8497 self.feedback_fn("* warning: hypervisor version mismatch between"
8498 " source (%s) and target (%s) node" %
8499 (src_version, dst_version))
8501 self.feedback_fn("* checking disk consistency between source and target")
8502 for (idx, dev) in enumerate(instance.disks):
8503 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8504 raise errors.OpExecError("Disk %s is degraded or not fully"
8505 " synchronized on target node,"
8506 " aborting migration" % idx)
8508 if self.current_mem > self.tgt_free_mem:
8509 if not self.allow_runtime_changes:
8510 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8511 " free memory to fit instance %s on target"
8512 " node %s (have %dMB, need %dMB)" %
8513 (instance.name, target_node,
8514 self.tgt_free_mem, self.current_mem))
8515 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8516 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8519 rpcres.Raise("Cannot modify instance runtime memory")
8521 # First get the migration information from the remote node
8522 result = self.rpc.call_migration_info(source_node, instance)
8523 msg = result.fail_msg
8525 log_err = ("Failed fetching source migration information from %s: %s" %
8527 logging.error(log_err)
8528 raise errors.OpExecError(log_err)
8530 self.migration_info = migration_info = result.payload
8532 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8533 # Then switch the disks to master/master mode
8534 self._EnsureSecondary(target_node)
8535 self._GoStandalone()
8536 self._GoReconnect(True)
8537 self._WaitUntilSync()
8539 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8540 result = self.rpc.call_accept_instance(target_node,
8543 self.nodes_ip[target_node])
8545 msg = result.fail_msg
8547 logging.error("Instance pre-migration failed, trying to revert"
8548 " disk status: %s", msg)
8549 self.feedback_fn("Pre-migration failed, aborting")
8550 self._AbortMigration()
8551 self._RevertDiskStatus()
8552 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8553 (instance.name, msg))
8555 self.feedback_fn("* migrating instance to %s" % target_node)
8556 result = self.rpc.call_instance_migrate(source_node, instance,
8557 self.nodes_ip[target_node],
8559 msg = result.fail_msg
8561 logging.error("Instance migration failed, trying to revert"
8562 " disk status: %s", msg)
8563 self.feedback_fn("Migration failed, aborting")
8564 self._AbortMigration()
8565 self._RevertDiskStatus()
8566 raise errors.OpExecError("Could not migrate instance %s: %s" %
8567 (instance.name, msg))
8569 self.feedback_fn("* starting memory transfer")
8570 last_feedback = time.time()
8572 result = self.rpc.call_instance_get_migration_status(source_node,
8574 msg = result.fail_msg
8575 ms = result.payload # MigrationStatus instance
8576 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8577 logging.error("Instance migration failed, trying to revert"
8578 " disk status: %s", msg)
8579 self.feedback_fn("Migration failed, aborting")
8580 self._AbortMigration()
8581 self._RevertDiskStatus()
8582 raise errors.OpExecError("Could not migrate instance %s: %s" %
8583 (instance.name, msg))
8585 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8586 self.feedback_fn("* memory transfer complete")
8589 if (utils.TimeoutExpired(last_feedback,
8590 self._MIGRATION_FEEDBACK_INTERVAL) and
8591 ms.transferred_ram is not None):
8592 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8593 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8594 last_feedback = time.time()
8596 time.sleep(self._MIGRATION_POLL_INTERVAL)
8598 result = self.rpc.call_instance_finalize_migration_src(source_node,
8602 msg = result.fail_msg
8604 logging.error("Instance migration succeeded, but finalization failed"
8605 " on the source node: %s", msg)
8606 raise errors.OpExecError("Could not finalize instance migration: %s" %
8609 instance.primary_node = target_node
8611 # distribute new instance config to the other nodes
8612 self.cfg.Update(instance, self.feedback_fn)
8614 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8618 msg = result.fail_msg
8620 logging.error("Instance migration succeeded, but finalization failed"
8621 " on the target node: %s", msg)
8622 raise errors.OpExecError("Could not finalize instance migration: %s" %
8625 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8626 self._EnsureSecondary(source_node)
8627 self._WaitUntilSync()
8628 self._GoStandalone()
8629 self._GoReconnect(False)
8630 self._WaitUntilSync()
8632 # If the instance's disk template is `rbd' and there was a successful
8633 # migration, unmap the device from the source node.
8634 if self.instance.disk_template == constants.DT_RBD:
8635 disks = _ExpandCheckDisks(instance, instance.disks)
8636 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8638 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8639 msg = result.fail_msg
8641 logging.error("Migration was successful, but couldn't unmap the"
8642 " block device %s on source node %s: %s",
8643 disk.iv_name, source_node, msg)
8644 logging.error("You need to unmap the device %s manually on %s",
8645 disk.iv_name, source_node)
8647 self.feedback_fn("* done")
8649 def _ExecFailover(self):
8650 """Failover an instance.
8652 The failover is done by shutting it down on its present node and
8653 starting it on the secondary.
8656 instance = self.instance
8657 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8659 source_node = instance.primary_node
8660 target_node = self.target_node
8662 if instance.admin_state == constants.ADMINST_UP:
8663 self.feedback_fn("* checking disk consistency between source and target")
8664 for (idx, dev) in enumerate(instance.disks):
8665 # for drbd, these are drbd over lvm
8666 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8668 if primary_node.offline:
8669 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8671 (primary_node.name, idx, target_node))
8672 elif not self.ignore_consistency:
8673 raise errors.OpExecError("Disk %s is degraded on target node,"
8674 " aborting failover" % idx)
8676 self.feedback_fn("* not checking disk consistency as instance is not"
8679 self.feedback_fn("* shutting down instance on source node")
8680 logging.info("Shutting down instance %s on node %s",
8681 instance.name, source_node)
8683 result = self.rpc.call_instance_shutdown(source_node, instance,
8684 self.shutdown_timeout)
8685 msg = result.fail_msg
8687 if self.ignore_consistency or primary_node.offline:
8688 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8689 " proceeding anyway; please make sure node"
8690 " %s is down; error details: %s",
8691 instance.name, source_node, source_node, msg)
8693 raise errors.OpExecError("Could not shutdown instance %s on"
8695 (instance.name, source_node, msg))
8697 self.feedback_fn("* deactivating the instance's disks on source node")
8698 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8699 raise errors.OpExecError("Can't shut down the instance's disks")
8701 instance.primary_node = target_node
8702 # distribute new instance config to the other nodes
8703 self.cfg.Update(instance, self.feedback_fn)
8705 # Only start the instance if it's marked as up
8706 if instance.admin_state == constants.ADMINST_UP:
8707 self.feedback_fn("* activating the instance's disks on target node %s" %
8709 logging.info("Starting instance %s on node %s",
8710 instance.name, target_node)
8712 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8713 ignore_secondaries=True)
8715 _ShutdownInstanceDisks(self.lu, instance)
8716 raise errors.OpExecError("Can't activate the instance's disks")
8718 self.feedback_fn("* starting the instance on the target node %s" %
8720 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8722 msg = result.fail_msg
8724 _ShutdownInstanceDisks(self.lu, instance)
8725 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8726 (instance.name, target_node, msg))
8728 def Exec(self, feedback_fn):
8729 """Perform the migration.
8732 self.feedback_fn = feedback_fn
8733 self.source_node = self.instance.primary_node
8735 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8736 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8737 self.target_node = self.instance.secondary_nodes[0]
8738 # Otherwise self.target_node has been populated either
8739 # directly, or through an iallocator.
8741 self.all_nodes = [self.source_node, self.target_node]
8742 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8743 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8746 feedback_fn("Failover instance %s" % self.instance.name)
8747 self._ExecFailover()
8749 feedback_fn("Migrating instance %s" % self.instance.name)
8752 return self._ExecCleanup()
8754 return self._ExecMigration()
8757 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8759 """Wrapper around L{_CreateBlockDevInner}.
8761 This method annotates the root device first.
8764 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8765 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8769 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8771 """Create a tree of block devices on a given node.
8773 If this device type has to be created on secondaries, create it and
8776 If not, just recurse to children keeping the same 'force' value.
8778 @attention: The device has to be annotated already.
8780 @param lu: the lu on whose behalf we execute
8781 @param node: the node on which to create the device
8782 @type instance: L{objects.Instance}
8783 @param instance: the instance which owns the device
8784 @type device: L{objects.Disk}
8785 @param device: the device to create
8786 @type force_create: boolean
8787 @param force_create: whether to force creation of this device; this
8788 will be change to True whenever we find a device which has
8789 CreateOnSecondary() attribute
8790 @param info: the extra 'metadata' we should attach to the device
8791 (this will be represented as a LVM tag)
8792 @type force_open: boolean
8793 @param force_open: this parameter will be passes to the
8794 L{backend.BlockdevCreate} function where it specifies
8795 whether we run on primary or not, and it affects both
8796 the child assembly and the device own Open() execution
8799 if device.CreateOnSecondary():
8803 for child in device.children:
8804 _CreateBlockDevInner(lu, node, instance, child, force_create,
8807 if not force_create:
8810 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8813 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8814 """Create a single block device on a given node.
8816 This will not recurse over children of the device, so they must be
8819 @param lu: the lu on whose behalf we execute
8820 @param node: the node on which to create the device
8821 @type instance: L{objects.Instance}
8822 @param instance: the instance which owns the device
8823 @type device: L{objects.Disk}
8824 @param device: the device to create
8825 @param info: the extra 'metadata' we should attach to the device
8826 (this will be represented as a LVM tag)
8827 @type force_open: boolean
8828 @param force_open: this parameter will be passes to the
8829 L{backend.BlockdevCreate} function where it specifies
8830 whether we run on primary or not, and it affects both
8831 the child assembly and the device own Open() execution
8834 lu.cfg.SetDiskID(device, node)
8835 result = lu.rpc.call_blockdev_create(node, device, device.size,
8836 instance.name, force_open, info)
8837 result.Raise("Can't create block device %s on"
8838 " node %s for instance %s" % (device, node, instance.name))
8839 if device.physical_id is None:
8840 device.physical_id = result.payload
8843 def _GenerateUniqueNames(lu, exts):
8844 """Generate a suitable LV name.
8846 This will generate a logical volume name for the given instance.
8851 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8852 results.append("%s%s" % (new_id, val))
8856 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8857 iv_name, p_minor, s_minor):
8858 """Generate a drbd8 device complete with its children.
8861 assert len(vgnames) == len(names) == 2
8862 port = lu.cfg.AllocatePort()
8863 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8865 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8866 logical_id=(vgnames[0], names[0]),
8868 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8869 logical_id=(vgnames[1], names[1]),
8871 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8872 logical_id=(primary, secondary, port,
8875 children=[dev_data, dev_meta],
8876 iv_name=iv_name, params={})
8880 _DISK_TEMPLATE_NAME_PREFIX = {
8881 constants.DT_PLAIN: "",
8882 constants.DT_RBD: ".rbd",
8886 _DISK_TEMPLATE_DEVICE_TYPE = {
8887 constants.DT_PLAIN: constants.LD_LV,
8888 constants.DT_FILE: constants.LD_FILE,
8889 constants.DT_SHARED_FILE: constants.LD_FILE,
8890 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8891 constants.DT_RBD: constants.LD_RBD,
8895 def _GenerateDiskTemplate(
8896 lu, template_name, instance_name, primary_node, secondary_nodes,
8897 disk_info, file_storage_dir, file_driver, base_index,
8898 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8899 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8900 """Generate the entire disk layout for a given template type.
8903 #TODO: compute space requirements
8905 vgname = lu.cfg.GetVGName()
8906 disk_count = len(disk_info)
8909 if template_name == constants.DT_DISKLESS:
8911 elif template_name == constants.DT_DRBD8:
8912 if len(secondary_nodes) != 1:
8913 raise errors.ProgrammerError("Wrong template configuration")
8914 remote_node = secondary_nodes[0]
8915 minors = lu.cfg.AllocateDRBDMinor(
8916 [primary_node, remote_node] * len(disk_info), instance_name)
8918 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8920 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8923 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8924 for i in range(disk_count)]):
8925 names.append(lv_prefix + "_data")
8926 names.append(lv_prefix + "_meta")
8927 for idx, disk in enumerate(disk_info):
8928 disk_index = idx + base_index
8929 data_vg = disk.get(constants.IDISK_VG, vgname)
8930 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8931 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8932 disk[constants.IDISK_SIZE],
8934 names[idx * 2:idx * 2 + 2],
8935 "disk/%d" % disk_index,
8936 minors[idx * 2], minors[idx * 2 + 1])
8937 disk_dev.mode = disk[constants.IDISK_MODE]
8938 disks.append(disk_dev)
8941 raise errors.ProgrammerError("Wrong template configuration")
8943 if template_name == constants.DT_FILE:
8945 elif template_name == constants.DT_SHARED_FILE:
8946 _req_shr_file_storage()
8948 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8949 if name_prefix is None:
8952 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8953 (name_prefix, base_index + i)
8954 for i in range(disk_count)])
8956 if template_name == constants.DT_PLAIN:
8957 def logical_id_fn(idx, _, disk):
8958 vg = disk.get(constants.IDISK_VG, vgname)
8959 return (vg, names[idx])
8960 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8962 lambda _, disk_index, disk: (file_driver,
8963 "%s/disk%d" % (file_storage_dir,
8965 elif template_name == constants.DT_BLOCK:
8967 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8968 disk[constants.IDISK_ADOPT])
8969 elif template_name == constants.DT_RBD:
8970 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8972 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8974 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8976 for idx, disk in enumerate(disk_info):
8977 disk_index = idx + base_index
8978 size = disk[constants.IDISK_SIZE]
8979 feedback_fn("* disk %s, size %s" %
8980 (disk_index, utils.FormatUnit(size, "h")))
8981 disks.append(objects.Disk(dev_type=dev_type, size=size,
8982 logical_id=logical_id_fn(idx, disk_index, disk),
8983 iv_name="disk/%d" % disk_index,
8984 mode=disk[constants.IDISK_MODE],
8990 def _GetInstanceInfoText(instance):
8991 """Compute that text that should be added to the disk's metadata.
8994 return "originstname+%s" % instance.name
8997 def _CalcEta(time_taken, written, total_size):
8998 """Calculates the ETA based on size written and total size.
9000 @param time_taken: The time taken so far
9001 @param written: amount written so far
9002 @param total_size: The total size of data to be written
9003 @return: The remaining time in seconds
9006 avg_time = time_taken / float(written)
9007 return (total_size - written) * avg_time
9010 def _WipeDisks(lu, instance):
9011 """Wipes instance disks.
9013 @type lu: L{LogicalUnit}
9014 @param lu: the logical unit on whose behalf we execute
9015 @type instance: L{objects.Instance}
9016 @param instance: the instance whose disks we should create
9017 @return: the success of the wipe
9020 node = instance.primary_node
9022 for device in instance.disks:
9023 lu.cfg.SetDiskID(device, node)
9025 logging.info("Pause sync of instance %s disks", instance.name)
9026 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9027 (instance.disks, instance),
9029 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
9031 for idx, success in enumerate(result.payload):
9033 logging.warn("pause-sync of instance %s for disks %d failed",
9037 for idx, device in enumerate(instance.disks):
9038 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9039 # MAX_WIPE_CHUNK at max
9040 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9041 constants.MIN_WIPE_CHUNK_PERCENT)
9042 # we _must_ make this an int, otherwise rounding errors will
9044 wipe_chunk_size = int(wipe_chunk_size)
9046 lu.LogInfo("* Wiping disk %d", idx)
9047 logging.info("Wiping disk %d for instance %s, node %s using"
9048 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9053 start_time = time.time()
9055 while offset < size:
9056 wipe_size = min(wipe_chunk_size, size - offset)
9057 logging.debug("Wiping disk %d, offset %s, chunk %s",
9058 idx, offset, wipe_size)
9059 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9061 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9062 (idx, offset, wipe_size))
9065 if now - last_output >= 60:
9066 eta = _CalcEta(now - start_time, offset, size)
9067 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9068 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9071 logging.info("Resume sync of instance %s disks", instance.name)
9073 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9074 (instance.disks, instance),
9078 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9079 " please have a look at the status and troubleshoot"
9080 " the issue: %s", node, result.fail_msg)
9082 for idx, success in enumerate(result.payload):
9084 lu.LogWarning("Resume sync of disk %d failed, please have a"
9085 " look at the status and troubleshoot the issue", idx)
9086 logging.warn("resume-sync of instance %s for disks %d failed",
9090 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9091 """Create all disks for an instance.
9093 This abstracts away some work from AddInstance.
9095 @type lu: L{LogicalUnit}
9096 @param lu: the logical unit on whose behalf we execute
9097 @type instance: L{objects.Instance}
9098 @param instance: the instance whose disks we should create
9100 @param to_skip: list of indices to skip
9101 @type target_node: string
9102 @param target_node: if passed, overrides the target node for creation
9104 @return: the success of the creation
9107 info = _GetInstanceInfoText(instance)
9108 if target_node is None:
9109 pnode = instance.primary_node
9110 all_nodes = instance.all_nodes
9115 if instance.disk_template in constants.DTS_FILEBASED:
9116 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9117 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9119 result.Raise("Failed to create directory '%s' on"
9120 " node %s" % (file_storage_dir, pnode))
9122 # Note: this needs to be kept in sync with adding of disks in
9123 # LUInstanceSetParams
9124 for idx, device in enumerate(instance.disks):
9125 if to_skip and idx in to_skip:
9127 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9129 for node in all_nodes:
9130 f_create = node == pnode
9131 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9134 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9135 """Remove all disks for an instance.
9137 This abstracts away some work from `AddInstance()` and
9138 `RemoveInstance()`. Note that in case some of the devices couldn't
9139 be removed, the removal will continue with the other ones (compare
9140 with `_CreateDisks()`).
9142 @type lu: L{LogicalUnit}
9143 @param lu: the logical unit on whose behalf we execute
9144 @type instance: L{objects.Instance}
9145 @param instance: the instance whose disks we should remove
9146 @type target_node: string
9147 @param target_node: used to override the node on which to remove the disks
9149 @return: the success of the removal
9152 logging.info("Removing block devices for instance %s", instance.name)
9155 ports_to_release = set()
9156 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9157 for (idx, device) in enumerate(anno_disks):
9159 edata = [(target_node, device)]
9161 edata = device.ComputeNodeTree(instance.primary_node)
9162 for node, disk in edata:
9163 lu.cfg.SetDiskID(disk, node)
9164 result = lu.rpc.call_blockdev_remove(node, disk)
9166 lu.LogWarning("Could not remove disk %s on node %s,"
9167 " continuing anyway: %s", idx, node, result.fail_msg)
9168 if not (result.offline and node != instance.primary_node):
9171 # if this is a DRBD disk, return its port to the pool
9172 if device.dev_type in constants.LDS_DRBD:
9173 ports_to_release.add(device.logical_id[2])
9175 if all_result or ignore_failures:
9176 for port in ports_to_release:
9177 lu.cfg.AddTcpUdpPort(port)
9179 if instance.disk_template == constants.DT_FILE:
9180 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9184 tgt = instance.primary_node
9185 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9187 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9188 file_storage_dir, instance.primary_node, result.fail_msg)
9194 def _ComputeDiskSizePerVG(disk_template, disks):
9195 """Compute disk size requirements in the volume group
9198 def _compute(disks, payload):
9199 """Universal algorithm.
9204 vgs[disk[constants.IDISK_VG]] = \
9205 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9209 # Required free disk space as a function of disk and swap space
9211 constants.DT_DISKLESS: {},
9212 constants.DT_PLAIN: _compute(disks, 0),
9213 # 128 MB are added for drbd metadata for each disk
9214 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9215 constants.DT_FILE: {},
9216 constants.DT_SHARED_FILE: {},
9219 if disk_template not in req_size_dict:
9220 raise errors.ProgrammerError("Disk template '%s' size requirement"
9221 " is unknown" % disk_template)
9223 return req_size_dict[disk_template]
9226 def _ComputeDiskSize(disk_template, disks):
9227 """Compute disk size requirements according to disk template
9230 # Required free disk space as a function of disk and swap space
9232 constants.DT_DISKLESS: None,
9233 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9234 # 128 MB are added for drbd metadata for each disk
9236 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9237 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9238 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9239 constants.DT_BLOCK: 0,
9240 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9243 if disk_template not in req_size_dict:
9244 raise errors.ProgrammerError("Disk template '%s' size requirement"
9245 " is unknown" % disk_template)
9247 return req_size_dict[disk_template]
9250 def _FilterVmNodes(lu, nodenames):
9251 """Filters out non-vm_capable nodes from a list.
9253 @type lu: L{LogicalUnit}
9254 @param lu: the logical unit for which we check
9255 @type nodenames: list
9256 @param nodenames: the list of nodes on which we should check
9258 @return: the list of vm-capable nodes
9261 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9262 return [name for name in nodenames if name not in vm_nodes]
9265 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9266 """Hypervisor parameter validation.
9268 This function abstract the hypervisor parameter validation to be
9269 used in both instance create and instance modify.
9271 @type lu: L{LogicalUnit}
9272 @param lu: the logical unit for which we check
9273 @type nodenames: list
9274 @param nodenames: the list of nodes on which we should check
9275 @type hvname: string
9276 @param hvname: the name of the hypervisor we should use
9277 @type hvparams: dict
9278 @param hvparams: the parameters which we need to check
9279 @raise errors.OpPrereqError: if the parameters are not valid
9282 nodenames = _FilterVmNodes(lu, nodenames)
9284 cluster = lu.cfg.GetClusterInfo()
9285 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9287 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9288 for node in nodenames:
9292 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9295 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9296 """OS parameters validation.
9298 @type lu: L{LogicalUnit}
9299 @param lu: the logical unit for which we check
9300 @type required: boolean
9301 @param required: whether the validation should fail if the OS is not
9303 @type nodenames: list
9304 @param nodenames: the list of nodes on which we should check
9305 @type osname: string
9306 @param osname: the name of the hypervisor we should use
9307 @type osparams: dict
9308 @param osparams: the parameters which we need to check
9309 @raise errors.OpPrereqError: if the parameters are not valid
9312 nodenames = _FilterVmNodes(lu, nodenames)
9313 result = lu.rpc.call_os_validate(nodenames, required, osname,
9314 [constants.OS_VALIDATE_PARAMETERS],
9316 for node, nres in result.items():
9317 # we don't check for offline cases since this should be run only
9318 # against the master node and/or an instance's nodes
9319 nres.Raise("OS Parameters validation failed on node %s" % node)
9320 if not nres.payload:
9321 lu.LogInfo("OS %s not found on node %s, validation skipped",
9325 class LUInstanceCreate(LogicalUnit):
9326 """Create an instance.
9329 HPATH = "instance-add"
9330 HTYPE = constants.HTYPE_INSTANCE
9333 def CheckArguments(self):
9337 # do not require name_check to ease forward/backward compatibility
9339 if self.op.no_install and self.op.start:
9340 self.LogInfo("No-installation mode selected, disabling startup")
9341 self.op.start = False
9342 # validate/normalize the instance name
9343 self.op.instance_name = \
9344 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9346 if self.op.ip_check and not self.op.name_check:
9347 # TODO: make the ip check more flexible and not depend on the name check
9348 raise errors.OpPrereqError("Cannot do IP address check without a name"
9349 " check", errors.ECODE_INVAL)
9351 # check nics' parameter names
9352 for nic in self.op.nics:
9353 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9355 # check disks. parameter names and consistent adopt/no-adopt strategy
9356 has_adopt = has_no_adopt = False
9357 for disk in self.op.disks:
9358 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9359 if constants.IDISK_ADOPT in disk:
9363 if has_adopt and has_no_adopt:
9364 raise errors.OpPrereqError("Either all disks are adopted or none is",
9367 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9368 raise errors.OpPrereqError("Disk adoption is not supported for the"
9369 " '%s' disk template" %
9370 self.op.disk_template,
9372 if self.op.iallocator is not None:
9373 raise errors.OpPrereqError("Disk adoption not allowed with an"
9374 " iallocator script", errors.ECODE_INVAL)
9375 if self.op.mode == constants.INSTANCE_IMPORT:
9376 raise errors.OpPrereqError("Disk adoption not allowed for"
9377 " instance import", errors.ECODE_INVAL)
9379 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9380 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9381 " but no 'adopt' parameter given" %
9382 self.op.disk_template,
9385 self.adopt_disks = has_adopt
9387 # instance name verification
9388 if self.op.name_check:
9389 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9390 self.op.instance_name = self.hostname1.name
9391 # used in CheckPrereq for ip ping check
9392 self.check_ip = self.hostname1.ip
9394 self.check_ip = None
9396 # file storage checks
9397 if (self.op.file_driver and
9398 not self.op.file_driver in constants.FILE_DRIVER):
9399 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9400 self.op.file_driver, errors.ECODE_INVAL)
9402 if self.op.disk_template == constants.DT_FILE:
9403 opcodes.RequireFileStorage()
9404 elif self.op.disk_template == constants.DT_SHARED_FILE:
9405 opcodes.RequireSharedFileStorage()
9407 ### Node/iallocator related checks
9408 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9410 if self.op.pnode is not None:
9411 if self.op.disk_template in constants.DTS_INT_MIRROR:
9412 if self.op.snode is None:
9413 raise errors.OpPrereqError("The networked disk templates need"
9414 " a mirror node", errors.ECODE_INVAL)
9416 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9418 self.op.snode = None
9420 self._cds = _GetClusterDomainSecret()
9422 if self.op.mode == constants.INSTANCE_IMPORT:
9423 # On import force_variant must be True, because if we forced it at
9424 # initial install, our only chance when importing it back is that it
9426 self.op.force_variant = True
9428 if self.op.no_install:
9429 self.LogInfo("No-installation mode has no effect during import")
9431 elif self.op.mode == constants.INSTANCE_CREATE:
9432 if self.op.os_type is None:
9433 raise errors.OpPrereqError("No guest OS specified",
9435 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9436 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9437 " installation" % self.op.os_type,
9439 if self.op.disk_template is None:
9440 raise errors.OpPrereqError("No disk template specified",
9443 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9444 # Check handshake to ensure both clusters have the same domain secret
9445 src_handshake = self.op.source_handshake
9446 if not src_handshake:
9447 raise errors.OpPrereqError("Missing source handshake",
9450 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9453 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9456 # Load and check source CA
9457 self.source_x509_ca_pem = self.op.source_x509_ca
9458 if not self.source_x509_ca_pem:
9459 raise errors.OpPrereqError("Missing source X509 CA",
9463 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9465 except OpenSSL.crypto.Error, err:
9466 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9467 (err, ), errors.ECODE_INVAL)
9469 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9470 if errcode is not None:
9471 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9474 self.source_x509_ca = cert
9476 src_instance_name = self.op.source_instance_name
9477 if not src_instance_name:
9478 raise errors.OpPrereqError("Missing source instance name",
9481 self.source_instance_name = \
9482 netutils.GetHostname(name=src_instance_name).name
9485 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9486 self.op.mode, errors.ECODE_INVAL)
9488 def ExpandNames(self):
9489 """ExpandNames for CreateInstance.
9491 Figure out the right locks for instance creation.
9494 self.needed_locks = {}
9496 instance_name = self.op.instance_name
9497 # this is just a preventive check, but someone might still add this
9498 # instance in the meantime, and creation will fail at lock-add time
9499 if instance_name in self.cfg.GetInstanceList():
9500 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9501 instance_name, errors.ECODE_EXISTS)
9503 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9505 if self.op.iallocator:
9506 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9507 # specifying a group on instance creation and then selecting nodes from
9509 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9510 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9512 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9513 nodelist = [self.op.pnode]
9514 if self.op.snode is not None:
9515 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9516 nodelist.append(self.op.snode)
9517 self.needed_locks[locking.LEVEL_NODE] = nodelist
9518 # Lock resources of instance's primary and secondary nodes (copy to
9519 # prevent accidential modification)
9520 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9522 # in case of import lock the source node too
9523 if self.op.mode == constants.INSTANCE_IMPORT:
9524 src_node = self.op.src_node
9525 src_path = self.op.src_path
9527 if src_path is None:
9528 self.op.src_path = src_path = self.op.instance_name
9530 if src_node is None:
9531 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9532 self.op.src_node = None
9533 if os.path.isabs(src_path):
9534 raise errors.OpPrereqError("Importing an instance from a path"
9535 " requires a source node option",
9538 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9539 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9540 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9541 if not os.path.isabs(src_path):
9542 self.op.src_path = src_path = \
9543 utils.PathJoin(constants.EXPORT_DIR, src_path)
9545 def _RunAllocator(self):
9546 """Run the allocator based on input opcode.
9549 nics = [n.ToDict() for n in self.nics]
9550 ial = IAllocator(self.cfg, self.rpc,
9551 mode=constants.IALLOCATOR_MODE_ALLOC,
9552 name=self.op.instance_name,
9553 disk_template=self.op.disk_template,
9556 vcpus=self.be_full[constants.BE_VCPUS],
9557 memory=self.be_full[constants.BE_MAXMEM],
9558 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9561 hypervisor=self.op.hypervisor,
9564 ial.Run(self.op.iallocator)
9567 raise errors.OpPrereqError("Can't compute nodes using"
9568 " iallocator '%s': %s" %
9569 (self.op.iallocator, ial.info),
9571 if len(ial.result) != ial.required_nodes:
9572 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9573 " of nodes (%s), required %s" %
9574 (self.op.iallocator, len(ial.result),
9575 ial.required_nodes), errors.ECODE_FAULT)
9576 self.op.pnode = ial.result[0]
9577 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9578 self.op.instance_name, self.op.iallocator,
9579 utils.CommaJoin(ial.result))
9580 if ial.required_nodes == 2:
9581 self.op.snode = ial.result[1]
9583 def BuildHooksEnv(self):
9586 This runs on master, primary and secondary nodes of the instance.
9590 "ADD_MODE": self.op.mode,
9592 if self.op.mode == constants.INSTANCE_IMPORT:
9593 env["SRC_NODE"] = self.op.src_node
9594 env["SRC_PATH"] = self.op.src_path
9595 env["SRC_IMAGES"] = self.src_images
9597 env.update(_BuildInstanceHookEnv(
9598 name=self.op.instance_name,
9599 primary_node=self.op.pnode,
9600 secondary_nodes=self.secondaries,
9601 status=self.op.start,
9602 os_type=self.op.os_type,
9603 minmem=self.be_full[constants.BE_MINMEM],
9604 maxmem=self.be_full[constants.BE_MAXMEM],
9605 vcpus=self.be_full[constants.BE_VCPUS],
9606 nics=_NICListToTuple(self, self.nics),
9607 disk_template=self.op.disk_template,
9608 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9609 for d in self.disks],
9612 hypervisor_name=self.op.hypervisor,
9618 def BuildHooksNodes(self):
9619 """Build hooks nodes.
9622 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9625 def _ReadExportInfo(self):
9626 """Reads the export information from disk.
9628 It will override the opcode source node and path with the actual
9629 information, if these two were not specified before.
9631 @return: the export information
9634 assert self.op.mode == constants.INSTANCE_IMPORT
9636 src_node = self.op.src_node
9637 src_path = self.op.src_path
9639 if src_node is None:
9640 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9641 exp_list = self.rpc.call_export_list(locked_nodes)
9643 for node in exp_list:
9644 if exp_list[node].fail_msg:
9646 if src_path in exp_list[node].payload:
9648 self.op.src_node = src_node = node
9649 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9653 raise errors.OpPrereqError("No export found for relative path %s" %
9654 src_path, errors.ECODE_INVAL)
9656 _CheckNodeOnline(self, src_node)
9657 result = self.rpc.call_export_info(src_node, src_path)
9658 result.Raise("No export or invalid export found in dir %s" % src_path)
9660 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9661 if not export_info.has_section(constants.INISECT_EXP):
9662 raise errors.ProgrammerError("Corrupted export config",
9663 errors.ECODE_ENVIRON)
9665 ei_version = export_info.get(constants.INISECT_EXP, "version")
9666 if (int(ei_version) != constants.EXPORT_VERSION):
9667 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9668 (ei_version, constants.EXPORT_VERSION),
9669 errors.ECODE_ENVIRON)
9672 def _ReadExportParams(self, einfo):
9673 """Use export parameters as defaults.
9675 In case the opcode doesn't specify (as in override) some instance
9676 parameters, then try to use them from the export information, if
9680 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9682 if self.op.disk_template is None:
9683 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9684 self.op.disk_template = einfo.get(constants.INISECT_INS,
9686 if self.op.disk_template not in constants.DISK_TEMPLATES:
9687 raise errors.OpPrereqError("Disk template specified in configuration"
9688 " file is not one of the allowed values:"
9690 " ".join(constants.DISK_TEMPLATES),
9693 raise errors.OpPrereqError("No disk template specified and the export"
9694 " is missing the disk_template information",
9697 if not self.op.disks:
9699 # TODO: import the disk iv_name too
9700 for idx in range(constants.MAX_DISKS):
9701 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9702 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9703 disks.append({constants.IDISK_SIZE: disk_sz})
9704 self.op.disks = disks
9705 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9706 raise errors.OpPrereqError("No disk info specified and the export"
9707 " is missing the disk information",
9710 if not self.op.nics:
9712 for idx in range(constants.MAX_NICS):
9713 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9715 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9716 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9723 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9724 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9726 if (self.op.hypervisor is None and
9727 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9728 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9730 if einfo.has_section(constants.INISECT_HYP):
9731 # use the export parameters but do not override the ones
9732 # specified by the user
9733 for name, value in einfo.items(constants.INISECT_HYP):
9734 if name not in self.op.hvparams:
9735 self.op.hvparams[name] = value
9737 if einfo.has_section(constants.INISECT_BEP):
9738 # use the parameters, without overriding
9739 for name, value in einfo.items(constants.INISECT_BEP):
9740 if name not in self.op.beparams:
9741 self.op.beparams[name] = value
9742 # Compatibility for the old "memory" be param
9743 if name == constants.BE_MEMORY:
9744 if constants.BE_MAXMEM not in self.op.beparams:
9745 self.op.beparams[constants.BE_MAXMEM] = value
9746 if constants.BE_MINMEM not in self.op.beparams:
9747 self.op.beparams[constants.BE_MINMEM] = value
9749 # try to read the parameters old style, from the main section
9750 for name in constants.BES_PARAMETERS:
9751 if (name not in self.op.beparams and
9752 einfo.has_option(constants.INISECT_INS, name)):
9753 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9755 if einfo.has_section(constants.INISECT_OSP):
9756 # use the parameters, without overriding
9757 for name, value in einfo.items(constants.INISECT_OSP):
9758 if name not in self.op.osparams:
9759 self.op.osparams[name] = value
9761 def _RevertToDefaults(self, cluster):
9762 """Revert the instance parameters to the default values.
9766 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9767 for name in self.op.hvparams.keys():
9768 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9769 del self.op.hvparams[name]
9771 be_defs = cluster.SimpleFillBE({})
9772 for name in self.op.beparams.keys():
9773 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9774 del self.op.beparams[name]
9776 nic_defs = cluster.SimpleFillNIC({})
9777 for nic in self.op.nics:
9778 for name in constants.NICS_PARAMETERS:
9779 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9782 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9783 for name in self.op.osparams.keys():
9784 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9785 del self.op.osparams[name]
9787 def _CalculateFileStorageDir(self):
9788 """Calculate final instance file storage dir.
9791 # file storage dir calculation/check
9792 self.instance_file_storage_dir = None
9793 if self.op.disk_template in constants.DTS_FILEBASED:
9794 # build the full file storage dir path
9797 if self.op.disk_template == constants.DT_SHARED_FILE:
9798 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9800 get_fsd_fn = self.cfg.GetFileStorageDir
9802 cfg_storagedir = get_fsd_fn()
9803 if not cfg_storagedir:
9804 raise errors.OpPrereqError("Cluster file storage dir not defined",
9806 joinargs.append(cfg_storagedir)
9808 if self.op.file_storage_dir is not None:
9809 joinargs.append(self.op.file_storage_dir)
9811 joinargs.append(self.op.instance_name)
9813 # pylint: disable=W0142
9814 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9816 def CheckPrereq(self): # pylint: disable=R0914
9817 """Check prerequisites.
9820 self._CalculateFileStorageDir()
9822 if self.op.mode == constants.INSTANCE_IMPORT:
9823 export_info = self._ReadExportInfo()
9824 self._ReadExportParams(export_info)
9825 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9827 self._old_instance_name = None
9829 if (not self.cfg.GetVGName() and
9830 self.op.disk_template not in constants.DTS_NOT_LVM):
9831 raise errors.OpPrereqError("Cluster does not support lvm-based"
9832 " instances", errors.ECODE_STATE)
9834 if (self.op.hypervisor is None or
9835 self.op.hypervisor == constants.VALUE_AUTO):
9836 self.op.hypervisor = self.cfg.GetHypervisorType()
9838 cluster = self.cfg.GetClusterInfo()
9839 enabled_hvs = cluster.enabled_hypervisors
9840 if self.op.hypervisor not in enabled_hvs:
9841 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9843 (self.op.hypervisor, ",".join(enabled_hvs)),
9846 # Check tag validity
9847 for tag in self.op.tags:
9848 objects.TaggableObject.ValidateTag(tag)
9850 # check hypervisor parameter syntax (locally)
9851 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9852 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9854 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9855 hv_type.CheckParameterSyntax(filled_hvp)
9856 self.hv_full = filled_hvp
9857 # check that we don't specify global parameters on an instance
9858 _CheckGlobalHvParams(self.op.hvparams)
9860 # fill and remember the beparams dict
9861 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9862 for param, value in self.op.beparams.iteritems():
9863 if value == constants.VALUE_AUTO:
9864 self.op.beparams[param] = default_beparams[param]
9865 objects.UpgradeBeParams(self.op.beparams)
9866 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9867 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9869 # build os parameters
9870 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9872 # now that hvp/bep are in final format, let's reset to defaults,
9874 if self.op.identify_defaults:
9875 self._RevertToDefaults(cluster)
9879 for idx, nic in enumerate(self.op.nics):
9880 nic_mode_req = nic.get(constants.INIC_MODE, None)
9881 nic_mode = nic_mode_req
9882 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9883 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9885 # in routed mode, for the first nic, the default ip is 'auto'
9886 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9887 default_ip_mode = constants.VALUE_AUTO
9889 default_ip_mode = constants.VALUE_NONE
9891 # ip validity checks
9892 ip = nic.get(constants.INIC_IP, default_ip_mode)
9893 if ip is None or ip.lower() == constants.VALUE_NONE:
9895 elif ip.lower() == constants.VALUE_AUTO:
9896 if not self.op.name_check:
9897 raise errors.OpPrereqError("IP address set to auto but name checks"
9898 " have been skipped",
9900 nic_ip = self.hostname1.ip
9902 if not netutils.IPAddress.IsValid(ip):
9903 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9907 # TODO: check the ip address for uniqueness
9908 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9909 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9912 # MAC address verification
9913 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9914 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9915 mac = utils.NormalizeAndValidateMac(mac)
9918 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9919 except errors.ReservationError:
9920 raise errors.OpPrereqError("MAC address %s already in use"
9921 " in cluster" % mac,
9922 errors.ECODE_NOTUNIQUE)
9924 # Build nic parameters
9925 link = nic.get(constants.INIC_LINK, None)
9926 if link == constants.VALUE_AUTO:
9927 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9930 nicparams[constants.NIC_MODE] = nic_mode
9932 nicparams[constants.NIC_LINK] = link
9934 check_params = cluster.SimpleFillNIC(nicparams)
9935 objects.NIC.CheckParameterSyntax(check_params)
9936 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9938 # disk checks/pre-build
9939 default_vg = self.cfg.GetVGName()
9941 for disk in self.op.disks:
9942 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9943 if mode not in constants.DISK_ACCESS_SET:
9944 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9945 mode, errors.ECODE_INVAL)
9946 size = disk.get(constants.IDISK_SIZE, None)
9948 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9951 except (TypeError, ValueError):
9952 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9955 data_vg = disk.get(constants.IDISK_VG, default_vg)
9957 constants.IDISK_SIZE: size,
9958 constants.IDISK_MODE: mode,
9959 constants.IDISK_VG: data_vg,
9961 if constants.IDISK_METAVG in disk:
9962 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9963 if constants.IDISK_ADOPT in disk:
9964 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9965 self.disks.append(new_disk)
9967 if self.op.mode == constants.INSTANCE_IMPORT:
9969 for idx in range(len(self.disks)):
9970 option = "disk%d_dump" % idx
9971 if export_info.has_option(constants.INISECT_INS, option):
9972 # FIXME: are the old os-es, disk sizes, etc. useful?
9973 export_name = export_info.get(constants.INISECT_INS, option)
9974 image = utils.PathJoin(self.op.src_path, export_name)
9975 disk_images.append(image)
9977 disk_images.append(False)
9979 self.src_images = disk_images
9981 if self.op.instance_name == self._old_instance_name:
9982 for idx, nic in enumerate(self.nics):
9983 if nic.mac == constants.VALUE_AUTO:
9984 nic_mac_ini = "nic%d_mac" % idx
9985 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9987 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9989 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9990 if self.op.ip_check:
9991 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9992 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9993 (self.check_ip, self.op.instance_name),
9994 errors.ECODE_NOTUNIQUE)
9996 #### mac address generation
9997 # By generating here the mac address both the allocator and the hooks get
9998 # the real final mac address rather than the 'auto' or 'generate' value.
9999 # There is a race condition between the generation and the instance object
10000 # creation, which means that we know the mac is valid now, but we're not
10001 # sure it will be when we actually add the instance. If things go bad
10002 # adding the instance will abort because of a duplicate mac, and the
10003 # creation job will fail.
10004 for nic in self.nics:
10005 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10006 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10010 if self.op.iallocator is not None:
10011 self._RunAllocator()
10013 # Release all unneeded node locks
10014 _ReleaseLocks(self, locking.LEVEL_NODE,
10015 keep=filter(None, [self.op.pnode, self.op.snode,
10016 self.op.src_node]))
10017 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10018 keep=filter(None, [self.op.pnode, self.op.snode,
10019 self.op.src_node]))
10021 #### node related checks
10023 # check primary node
10024 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10025 assert self.pnode is not None, \
10026 "Cannot retrieve locked node %s" % self.op.pnode
10028 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10029 pnode.name, errors.ECODE_STATE)
10031 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10032 pnode.name, errors.ECODE_STATE)
10033 if not pnode.vm_capable:
10034 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10035 " '%s'" % pnode.name, errors.ECODE_STATE)
10037 self.secondaries = []
10039 # mirror node verification
10040 if self.op.disk_template in constants.DTS_INT_MIRROR:
10041 if self.op.snode == pnode.name:
10042 raise errors.OpPrereqError("The secondary node cannot be the"
10043 " primary node", errors.ECODE_INVAL)
10044 _CheckNodeOnline(self, self.op.snode)
10045 _CheckNodeNotDrained(self, self.op.snode)
10046 _CheckNodeVmCapable(self, self.op.snode)
10047 self.secondaries.append(self.op.snode)
10049 snode = self.cfg.GetNodeInfo(self.op.snode)
10050 if pnode.group != snode.group:
10051 self.LogWarning("The primary and secondary nodes are in two"
10052 " different node groups; the disk parameters"
10053 " from the first disk's node group will be"
10056 nodenames = [pnode.name] + self.secondaries
10058 # Verify instance specs
10059 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10061 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10062 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10063 constants.ISPEC_DISK_COUNT: len(self.disks),
10064 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10065 constants.ISPEC_NIC_COUNT: len(self.nics),
10066 constants.ISPEC_SPINDLE_USE: spindle_use,
10069 group_info = self.cfg.GetNodeGroup(pnode.group)
10070 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10071 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10072 if not self.op.ignore_ipolicy and res:
10073 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10074 " policy: %s") % (pnode.group,
10075 utils.CommaJoin(res)),
10076 errors.ECODE_INVAL)
10078 if not self.adopt_disks:
10079 if self.op.disk_template == constants.DT_RBD:
10080 # _CheckRADOSFreeSpace() is just a placeholder.
10081 # Any function that checks prerequisites can be placed here.
10082 # Check if there is enough space on the RADOS cluster.
10083 _CheckRADOSFreeSpace()
10085 # Check lv size requirements, if not adopting
10086 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10087 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10089 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10090 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10091 disk[constants.IDISK_ADOPT])
10092 for disk in self.disks])
10093 if len(all_lvs) != len(self.disks):
10094 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10095 errors.ECODE_INVAL)
10096 for lv_name in all_lvs:
10098 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10099 # to ReserveLV uses the same syntax
10100 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10101 except errors.ReservationError:
10102 raise errors.OpPrereqError("LV named %s used by another instance" %
10103 lv_name, errors.ECODE_NOTUNIQUE)
10105 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10106 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10108 node_lvs = self.rpc.call_lv_list([pnode.name],
10109 vg_names.payload.keys())[pnode.name]
10110 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10111 node_lvs = node_lvs.payload
10113 delta = all_lvs.difference(node_lvs.keys())
10115 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10116 utils.CommaJoin(delta),
10117 errors.ECODE_INVAL)
10118 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10120 raise errors.OpPrereqError("Online logical volumes found, cannot"
10121 " adopt: %s" % utils.CommaJoin(online_lvs),
10122 errors.ECODE_STATE)
10123 # update the size of disk based on what is found
10124 for dsk in self.disks:
10125 dsk[constants.IDISK_SIZE] = \
10126 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10127 dsk[constants.IDISK_ADOPT])][0]))
10129 elif self.op.disk_template == constants.DT_BLOCK:
10130 # Normalize and de-duplicate device paths
10131 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10132 for disk in self.disks])
10133 if len(all_disks) != len(self.disks):
10134 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10135 errors.ECODE_INVAL)
10136 baddisks = [d for d in all_disks
10137 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10139 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10140 " cannot be adopted" %
10141 (", ".join(baddisks),
10142 constants.ADOPTABLE_BLOCKDEV_ROOT),
10143 errors.ECODE_INVAL)
10145 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10146 list(all_disks))[pnode.name]
10147 node_disks.Raise("Cannot get block device information from node %s" %
10149 node_disks = node_disks.payload
10150 delta = all_disks.difference(node_disks.keys())
10152 raise errors.OpPrereqError("Missing block device(s): %s" %
10153 utils.CommaJoin(delta),
10154 errors.ECODE_INVAL)
10155 for dsk in self.disks:
10156 dsk[constants.IDISK_SIZE] = \
10157 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10159 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10161 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10162 # check OS parameters (remotely)
10163 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10165 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10167 # memory check on primary node
10168 #TODO(dynmem): use MINMEM for checking
10170 _CheckNodeFreeMemory(self, self.pnode.name,
10171 "creating instance %s" % self.op.instance_name,
10172 self.be_full[constants.BE_MAXMEM],
10173 self.op.hypervisor)
10175 self.dry_run_result = list(nodenames)
10177 def Exec(self, feedback_fn):
10178 """Create and add the instance to the cluster.
10181 instance = self.op.instance_name
10182 pnode_name = self.pnode.name
10184 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10185 self.owned_locks(locking.LEVEL_NODE)), \
10186 "Node locks differ from node resource locks"
10188 ht_kind = self.op.hypervisor
10189 if ht_kind in constants.HTS_REQ_PORT:
10190 network_port = self.cfg.AllocatePort()
10192 network_port = None
10194 # This is ugly but we got a chicken-egg problem here
10195 # We can only take the group disk parameters, as the instance
10196 # has no disks yet (we are generating them right here).
10197 node = self.cfg.GetNodeInfo(pnode_name)
10198 nodegroup = self.cfg.GetNodeGroup(node.group)
10199 disks = _GenerateDiskTemplate(self,
10200 self.op.disk_template,
10201 instance, pnode_name,
10204 self.instance_file_storage_dir,
10205 self.op.file_driver,
10208 self.cfg.GetGroupDiskParams(nodegroup))
10210 iobj = objects.Instance(name=instance, os=self.op.os_type,
10211 primary_node=pnode_name,
10212 nics=self.nics, disks=disks,
10213 disk_template=self.op.disk_template,
10214 admin_state=constants.ADMINST_DOWN,
10215 network_port=network_port,
10216 beparams=self.op.beparams,
10217 hvparams=self.op.hvparams,
10218 hypervisor=self.op.hypervisor,
10219 osparams=self.op.osparams,
10223 for tag in self.op.tags:
10226 if self.adopt_disks:
10227 if self.op.disk_template == constants.DT_PLAIN:
10228 # rename LVs to the newly-generated names; we need to construct
10229 # 'fake' LV disks with the old data, plus the new unique_id
10230 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10232 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10233 rename_to.append(t_dsk.logical_id)
10234 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10235 self.cfg.SetDiskID(t_dsk, pnode_name)
10236 result = self.rpc.call_blockdev_rename(pnode_name,
10237 zip(tmp_disks, rename_to))
10238 result.Raise("Failed to rename adoped LVs")
10240 feedback_fn("* creating instance disks...")
10242 _CreateDisks(self, iobj)
10243 except errors.OpExecError:
10244 self.LogWarning("Device creation failed, reverting...")
10246 _RemoveDisks(self, iobj)
10248 self.cfg.ReleaseDRBDMinors(instance)
10251 feedback_fn("adding instance %s to cluster config" % instance)
10253 self.cfg.AddInstance(iobj, self.proc.GetECId())
10255 # Declare that we don't want to remove the instance lock anymore, as we've
10256 # added the instance to the config
10257 del self.remove_locks[locking.LEVEL_INSTANCE]
10259 if self.op.mode == constants.INSTANCE_IMPORT:
10260 # Release unused nodes
10261 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10263 # Release all nodes
10264 _ReleaseLocks(self, locking.LEVEL_NODE)
10267 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10268 feedback_fn("* wiping instance disks...")
10270 _WipeDisks(self, iobj)
10271 except errors.OpExecError, err:
10272 logging.exception("Wiping disks failed")
10273 self.LogWarning("Wiping instance disks failed (%s)", err)
10277 # Something is already wrong with the disks, don't do anything else
10279 elif self.op.wait_for_sync:
10280 disk_abort = not _WaitForSync(self, iobj)
10281 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10282 # make sure the disks are not degraded (still sync-ing is ok)
10283 feedback_fn("* checking mirrors status")
10284 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10289 _RemoveDisks(self, iobj)
10290 self.cfg.RemoveInstance(iobj.name)
10291 # Make sure the instance lock gets removed
10292 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10293 raise errors.OpExecError("There are some degraded disks for"
10296 # Release all node resource locks
10297 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10299 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10300 # we need to set the disks ID to the primary node, since the
10301 # preceding code might or might have not done it, depending on
10302 # disk template and other options
10303 for disk in iobj.disks:
10304 self.cfg.SetDiskID(disk, pnode_name)
10305 if self.op.mode == constants.INSTANCE_CREATE:
10306 if not self.op.no_install:
10307 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10308 not self.op.wait_for_sync)
10310 feedback_fn("* pausing disk sync to install instance OS")
10311 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10314 for idx, success in enumerate(result.payload):
10316 logging.warn("pause-sync of instance %s for disk %d failed",
10319 feedback_fn("* running the instance OS create scripts...")
10320 # FIXME: pass debug option from opcode to backend
10322 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10323 self.op.debug_level)
10325 feedback_fn("* resuming disk sync")
10326 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10329 for idx, success in enumerate(result.payload):
10331 logging.warn("resume-sync of instance %s for disk %d failed",
10334 os_add_result.Raise("Could not add os for instance %s"
10335 " on node %s" % (instance, pnode_name))
10338 if self.op.mode == constants.INSTANCE_IMPORT:
10339 feedback_fn("* running the instance OS import scripts...")
10343 for idx, image in enumerate(self.src_images):
10347 # FIXME: pass debug option from opcode to backend
10348 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10349 constants.IEIO_FILE, (image, ),
10350 constants.IEIO_SCRIPT,
10351 (iobj.disks[idx], idx),
10353 transfers.append(dt)
10356 masterd.instance.TransferInstanceData(self, feedback_fn,
10357 self.op.src_node, pnode_name,
10358 self.pnode.secondary_ip,
10360 if not compat.all(import_result):
10361 self.LogWarning("Some disks for instance %s on node %s were not"
10362 " imported successfully" % (instance, pnode_name))
10364 rename_from = self._old_instance_name
10366 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10367 feedback_fn("* preparing remote import...")
10368 # The source cluster will stop the instance before attempting to make
10369 # a connection. In some cases stopping an instance can take a long
10370 # time, hence the shutdown timeout is added to the connection
10372 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10373 self.op.source_shutdown_timeout)
10374 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10376 assert iobj.primary_node == self.pnode.name
10378 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10379 self.source_x509_ca,
10380 self._cds, timeouts)
10381 if not compat.all(disk_results):
10382 # TODO: Should the instance still be started, even if some disks
10383 # failed to import (valid for local imports, too)?
10384 self.LogWarning("Some disks for instance %s on node %s were not"
10385 " imported successfully" % (instance, pnode_name))
10387 rename_from = self.source_instance_name
10390 # also checked in the prereq part
10391 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10394 # Run rename script on newly imported instance
10395 assert iobj.name == instance
10396 feedback_fn("Running rename script for %s" % instance)
10397 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10399 self.op.debug_level)
10400 if result.fail_msg:
10401 self.LogWarning("Failed to run rename script for %s on node"
10402 " %s: %s" % (instance, pnode_name, result.fail_msg))
10404 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10407 iobj.admin_state = constants.ADMINST_UP
10408 self.cfg.Update(iobj, feedback_fn)
10409 logging.info("Starting instance %s on node %s", instance, pnode_name)
10410 feedback_fn("* starting instance...")
10411 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10413 result.Raise("Could not start instance")
10415 return list(iobj.all_nodes)
10418 def _CheckRADOSFreeSpace():
10419 """Compute disk size requirements inside the RADOS cluster.
10422 # For the RADOS cluster we assume there is always enough space.
10426 class LUInstanceConsole(NoHooksLU):
10427 """Connect to an instance's console.
10429 This is somewhat special in that it returns the command line that
10430 you need to run on the master node in order to connect to the
10436 def ExpandNames(self):
10437 self.share_locks = _ShareAll()
10438 self._ExpandAndLockInstance()
10440 def CheckPrereq(self):
10441 """Check prerequisites.
10443 This checks that the instance is in the cluster.
10446 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10447 assert self.instance is not None, \
10448 "Cannot retrieve locked instance %s" % self.op.instance_name
10449 _CheckNodeOnline(self, self.instance.primary_node)
10451 def Exec(self, feedback_fn):
10452 """Connect to the console of an instance
10455 instance = self.instance
10456 node = instance.primary_node
10458 node_insts = self.rpc.call_instance_list([node],
10459 [instance.hypervisor])[node]
10460 node_insts.Raise("Can't get node information from %s" % node)
10462 if instance.name not in node_insts.payload:
10463 if instance.admin_state == constants.ADMINST_UP:
10464 state = constants.INSTST_ERRORDOWN
10465 elif instance.admin_state == constants.ADMINST_DOWN:
10466 state = constants.INSTST_ADMINDOWN
10468 state = constants.INSTST_ADMINOFFLINE
10469 raise errors.OpExecError("Instance %s is not running (state %s)" %
10470 (instance.name, state))
10472 logging.debug("Connecting to console of %s on %s", instance.name, node)
10474 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10477 def _GetInstanceConsole(cluster, instance):
10478 """Returns console information for an instance.
10480 @type cluster: L{objects.Cluster}
10481 @type instance: L{objects.Instance}
10485 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10486 # beparams and hvparams are passed separately, to avoid editing the
10487 # instance and then saving the defaults in the instance itself.
10488 hvparams = cluster.FillHV(instance)
10489 beparams = cluster.FillBE(instance)
10490 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10492 assert console.instance == instance.name
10493 assert console.Validate()
10495 return console.ToDict()
10498 class LUInstanceReplaceDisks(LogicalUnit):
10499 """Replace the disks of an instance.
10502 HPATH = "mirrors-replace"
10503 HTYPE = constants.HTYPE_INSTANCE
10506 def CheckArguments(self):
10507 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10508 self.op.iallocator)
10510 def ExpandNames(self):
10511 self._ExpandAndLockInstance()
10513 assert locking.LEVEL_NODE not in self.needed_locks
10514 assert locking.LEVEL_NODE_RES not in self.needed_locks
10515 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10517 assert self.op.iallocator is None or self.op.remote_node is None, \
10518 "Conflicting options"
10520 if self.op.remote_node is not None:
10521 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10523 # Warning: do not remove the locking of the new secondary here
10524 # unless DRBD8.AddChildren is changed to work in parallel;
10525 # currently it doesn't since parallel invocations of
10526 # FindUnusedMinor will conflict
10527 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10528 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10530 self.needed_locks[locking.LEVEL_NODE] = []
10531 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10533 if self.op.iallocator is not None:
10534 # iallocator will select a new node in the same group
10535 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10537 self.needed_locks[locking.LEVEL_NODE_RES] = []
10539 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10540 self.op.iallocator, self.op.remote_node,
10541 self.op.disks, False, self.op.early_release,
10542 self.op.ignore_ipolicy)
10544 self.tasklets = [self.replacer]
10546 def DeclareLocks(self, level):
10547 if level == locking.LEVEL_NODEGROUP:
10548 assert self.op.remote_node is None
10549 assert self.op.iallocator is not None
10550 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10552 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10553 # Lock all groups used by instance optimistically; this requires going
10554 # via the node before it's locked, requiring verification later on
10555 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10556 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10558 elif level == locking.LEVEL_NODE:
10559 if self.op.iallocator is not None:
10560 assert self.op.remote_node is None
10561 assert not self.needed_locks[locking.LEVEL_NODE]
10563 # Lock member nodes of all locked groups
10564 self.needed_locks[locking.LEVEL_NODE] = \
10566 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10567 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10569 self._LockInstancesNodes()
10570 elif level == locking.LEVEL_NODE_RES:
10572 self.needed_locks[locking.LEVEL_NODE_RES] = \
10573 self.needed_locks[locking.LEVEL_NODE]
10575 def BuildHooksEnv(self):
10576 """Build hooks env.
10578 This runs on the master, the primary and all the secondaries.
10581 instance = self.replacer.instance
10583 "MODE": self.op.mode,
10584 "NEW_SECONDARY": self.op.remote_node,
10585 "OLD_SECONDARY": instance.secondary_nodes[0],
10587 env.update(_BuildInstanceHookEnvByObject(self, instance))
10590 def BuildHooksNodes(self):
10591 """Build hooks nodes.
10594 instance = self.replacer.instance
10596 self.cfg.GetMasterNode(),
10597 instance.primary_node,
10599 if self.op.remote_node is not None:
10600 nl.append(self.op.remote_node)
10603 def CheckPrereq(self):
10604 """Check prerequisites.
10607 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10608 self.op.iallocator is None)
10610 # Verify if node group locks are still correct
10611 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10613 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10615 return LogicalUnit.CheckPrereq(self)
10618 class TLReplaceDisks(Tasklet):
10619 """Replaces disks for an instance.
10621 Note: Locking is not within the scope of this class.
10624 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10625 disks, delay_iallocator, early_release, ignore_ipolicy):
10626 """Initializes this class.
10629 Tasklet.__init__(self, lu)
10632 self.instance_name = instance_name
10634 self.iallocator_name = iallocator_name
10635 self.remote_node = remote_node
10637 self.delay_iallocator = delay_iallocator
10638 self.early_release = early_release
10639 self.ignore_ipolicy = ignore_ipolicy
10642 self.instance = None
10643 self.new_node = None
10644 self.target_node = None
10645 self.other_node = None
10646 self.remote_node_info = None
10647 self.node_secondary_ip = None
10650 def CheckArguments(mode, remote_node, iallocator):
10651 """Helper function for users of this class.
10654 # check for valid parameter combination
10655 if mode == constants.REPLACE_DISK_CHG:
10656 if remote_node is None and iallocator is None:
10657 raise errors.OpPrereqError("When changing the secondary either an"
10658 " iallocator script must be used or the"
10659 " new node given", errors.ECODE_INVAL)
10661 if remote_node is not None and iallocator is not None:
10662 raise errors.OpPrereqError("Give either the iallocator or the new"
10663 " secondary, not both", errors.ECODE_INVAL)
10665 elif remote_node is not None or iallocator is not None:
10666 # Not replacing the secondary
10667 raise errors.OpPrereqError("The iallocator and new node options can"
10668 " only be used when changing the"
10669 " secondary node", errors.ECODE_INVAL)
10672 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10673 """Compute a new secondary node using an IAllocator.
10676 ial = IAllocator(lu.cfg, lu.rpc,
10677 mode=constants.IALLOCATOR_MODE_RELOC,
10678 name=instance_name,
10679 relocate_from=list(relocate_from))
10681 ial.Run(iallocator_name)
10683 if not ial.success:
10684 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10685 " %s" % (iallocator_name, ial.info),
10686 errors.ECODE_NORES)
10688 if len(ial.result) != ial.required_nodes:
10689 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10690 " of nodes (%s), required %s" %
10692 len(ial.result), ial.required_nodes),
10693 errors.ECODE_FAULT)
10695 remote_node_name = ial.result[0]
10697 lu.LogInfo("Selected new secondary for instance '%s': %s",
10698 instance_name, remote_node_name)
10700 return remote_node_name
10702 def _FindFaultyDisks(self, node_name):
10703 """Wrapper for L{_FindFaultyInstanceDisks}.
10706 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10709 def _CheckDisksActivated(self, instance):
10710 """Checks if the instance disks are activated.
10712 @param instance: The instance to check disks
10713 @return: True if they are activated, False otherwise
10716 nodes = instance.all_nodes
10718 for idx, dev in enumerate(instance.disks):
10720 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10721 self.cfg.SetDiskID(dev, node)
10723 result = _BlockdevFind(self, node, dev, instance)
10727 elif result.fail_msg or not result.payload:
10732 def CheckPrereq(self):
10733 """Check prerequisites.
10735 This checks that the instance is in the cluster.
10738 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10739 assert instance is not None, \
10740 "Cannot retrieve locked instance %s" % self.instance_name
10742 if instance.disk_template != constants.DT_DRBD8:
10743 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10744 " instances", errors.ECODE_INVAL)
10746 if len(instance.secondary_nodes) != 1:
10747 raise errors.OpPrereqError("The instance has a strange layout,"
10748 " expected one secondary but found %d" %
10749 len(instance.secondary_nodes),
10750 errors.ECODE_FAULT)
10752 if not self.delay_iallocator:
10753 self._CheckPrereq2()
10755 def _CheckPrereq2(self):
10756 """Check prerequisites, second part.
10758 This function should always be part of CheckPrereq. It was separated and is
10759 now called from Exec because during node evacuation iallocator was only
10760 called with an unmodified cluster model, not taking planned changes into
10764 instance = self.instance
10765 secondary_node = instance.secondary_nodes[0]
10767 if self.iallocator_name is None:
10768 remote_node = self.remote_node
10770 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10771 instance.name, instance.secondary_nodes)
10773 if remote_node is None:
10774 self.remote_node_info = None
10776 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10777 "Remote node '%s' is not locked" % remote_node
10779 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10780 assert self.remote_node_info is not None, \
10781 "Cannot retrieve locked node %s" % remote_node
10783 if remote_node == self.instance.primary_node:
10784 raise errors.OpPrereqError("The specified node is the primary node of"
10785 " the instance", errors.ECODE_INVAL)
10787 if remote_node == secondary_node:
10788 raise errors.OpPrereqError("The specified node is already the"
10789 " secondary node of the instance",
10790 errors.ECODE_INVAL)
10792 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10793 constants.REPLACE_DISK_CHG):
10794 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10795 errors.ECODE_INVAL)
10797 if self.mode == constants.REPLACE_DISK_AUTO:
10798 if not self._CheckDisksActivated(instance):
10799 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10800 " first" % self.instance_name,
10801 errors.ECODE_STATE)
10802 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10803 faulty_secondary = self._FindFaultyDisks(secondary_node)
10805 if faulty_primary and faulty_secondary:
10806 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10807 " one node and can not be repaired"
10808 " automatically" % self.instance_name,
10809 errors.ECODE_STATE)
10812 self.disks = faulty_primary
10813 self.target_node = instance.primary_node
10814 self.other_node = secondary_node
10815 check_nodes = [self.target_node, self.other_node]
10816 elif faulty_secondary:
10817 self.disks = faulty_secondary
10818 self.target_node = secondary_node
10819 self.other_node = instance.primary_node
10820 check_nodes = [self.target_node, self.other_node]
10826 # Non-automatic modes
10827 if self.mode == constants.REPLACE_DISK_PRI:
10828 self.target_node = instance.primary_node
10829 self.other_node = secondary_node
10830 check_nodes = [self.target_node, self.other_node]
10832 elif self.mode == constants.REPLACE_DISK_SEC:
10833 self.target_node = secondary_node
10834 self.other_node = instance.primary_node
10835 check_nodes = [self.target_node, self.other_node]
10837 elif self.mode == constants.REPLACE_DISK_CHG:
10838 self.new_node = remote_node
10839 self.other_node = instance.primary_node
10840 self.target_node = secondary_node
10841 check_nodes = [self.new_node, self.other_node]
10843 _CheckNodeNotDrained(self.lu, remote_node)
10844 _CheckNodeVmCapable(self.lu, remote_node)
10846 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10847 assert old_node_info is not None
10848 if old_node_info.offline and not self.early_release:
10849 # doesn't make sense to delay the release
10850 self.early_release = True
10851 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10852 " early-release mode", secondary_node)
10855 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10858 # If not specified all disks should be replaced
10860 self.disks = range(len(self.instance.disks))
10862 # TODO: This is ugly, but right now we can't distinguish between internal
10863 # submitted opcode and external one. We should fix that.
10864 if self.remote_node_info:
10865 # We change the node, lets verify it still meets instance policy
10866 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10867 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10869 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10870 ignore=self.ignore_ipolicy)
10872 for node in check_nodes:
10873 _CheckNodeOnline(self.lu, node)
10875 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10878 if node_name is not None)
10880 # Release unneeded node and node resource locks
10881 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10882 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10884 # Release any owned node group
10885 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10886 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10888 # Check whether disks are valid
10889 for disk_idx in self.disks:
10890 instance.FindDisk(disk_idx)
10892 # Get secondary node IP addresses
10893 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10894 in self.cfg.GetMultiNodeInfo(touched_nodes))
10896 def Exec(self, feedback_fn):
10897 """Execute disk replacement.
10899 This dispatches the disk replacement to the appropriate handler.
10902 if self.delay_iallocator:
10903 self._CheckPrereq2()
10906 # Verify owned locks before starting operation
10907 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10908 assert set(owned_nodes) == set(self.node_secondary_ip), \
10909 ("Incorrect node locks, owning %s, expected %s" %
10910 (owned_nodes, self.node_secondary_ip.keys()))
10911 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10912 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10914 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10915 assert list(owned_instances) == [self.instance_name], \
10916 "Instance '%s' not locked" % self.instance_name
10918 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10919 "Should not own any node group lock at this point"
10922 feedback_fn("No disks need replacement")
10925 feedback_fn("Replacing disk(s) %s for %s" %
10926 (utils.CommaJoin(self.disks), self.instance.name))
10928 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10930 # Activate the instance disks if we're replacing them on a down instance
10932 _StartInstanceDisks(self.lu, self.instance, True)
10935 # Should we replace the secondary node?
10936 if self.new_node is not None:
10937 fn = self._ExecDrbd8Secondary
10939 fn = self._ExecDrbd8DiskOnly
10941 result = fn(feedback_fn)
10943 # Deactivate the instance disks if we're replacing them on a
10946 _SafeShutdownInstanceDisks(self.lu, self.instance)
10948 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10951 # Verify owned locks
10952 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10953 nodes = frozenset(self.node_secondary_ip)
10954 assert ((self.early_release and not owned_nodes) or
10955 (not self.early_release and not (set(owned_nodes) - nodes))), \
10956 ("Not owning the correct locks, early_release=%s, owned=%r,"
10957 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10961 def _CheckVolumeGroup(self, nodes):
10962 self.lu.LogInfo("Checking volume groups")
10964 vgname = self.cfg.GetVGName()
10966 # Make sure volume group exists on all involved nodes
10967 results = self.rpc.call_vg_list(nodes)
10969 raise errors.OpExecError("Can't list volume groups on the nodes")
10972 res = results[node]
10973 res.Raise("Error checking node %s" % node)
10974 if vgname not in res.payload:
10975 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10978 def _CheckDisksExistence(self, nodes):
10979 # Check disk existence
10980 for idx, dev in enumerate(self.instance.disks):
10981 if idx not in self.disks:
10985 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10986 self.cfg.SetDiskID(dev, node)
10988 result = _BlockdevFind(self, node, dev, self.instance)
10990 msg = result.fail_msg
10991 if msg or not result.payload:
10993 msg = "disk not found"
10994 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10997 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10998 for idx, dev in enumerate(self.instance.disks):
10999 if idx not in self.disks:
11002 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11005 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11006 on_primary, ldisk=ldisk):
11007 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11008 " replace disks for instance %s" %
11009 (node_name, self.instance.name))
11011 def _CreateNewStorage(self, node_name):
11012 """Create new storage on the primary or secondary node.
11014 This is only used for same-node replaces, not for changing the
11015 secondary node, hence we don't want to modify the existing disk.
11020 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11021 for idx, dev in enumerate(disks):
11022 if idx not in self.disks:
11025 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11027 self.cfg.SetDiskID(dev, node_name)
11029 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11030 names = _GenerateUniqueNames(self.lu, lv_names)
11032 (data_disk, meta_disk) = dev.children
11033 vg_data = data_disk.logical_id[0]
11034 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11035 logical_id=(vg_data, names[0]),
11036 params=data_disk.params)
11037 vg_meta = meta_disk.logical_id[0]
11038 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
11039 logical_id=(vg_meta, names[1]),
11040 params=meta_disk.params)
11042 new_lvs = [lv_data, lv_meta]
11043 old_lvs = [child.Copy() for child in dev.children]
11044 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11046 # we pass force_create=True to force the LVM creation
11047 for new_lv in new_lvs:
11048 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11049 _GetInstanceInfoText(self.instance), False)
11053 def _CheckDevices(self, node_name, iv_names):
11054 for name, (dev, _, _) in iv_names.iteritems():
11055 self.cfg.SetDiskID(dev, node_name)
11057 result = _BlockdevFind(self, node_name, dev, self.instance)
11059 msg = result.fail_msg
11060 if msg or not result.payload:
11062 msg = "disk not found"
11063 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11066 if result.payload.is_degraded:
11067 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11069 def _RemoveOldStorage(self, node_name, iv_names):
11070 for name, (_, old_lvs, _) in iv_names.iteritems():
11071 self.lu.LogInfo("Remove logical volumes for %s" % name)
11074 self.cfg.SetDiskID(lv, node_name)
11076 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11078 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11079 hint="remove unused LVs manually")
11081 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11082 """Replace a disk on the primary or secondary for DRBD 8.
11084 The algorithm for replace is quite complicated:
11086 1. for each disk to be replaced:
11088 1. create new LVs on the target node with unique names
11089 1. detach old LVs from the drbd device
11090 1. rename old LVs to name_replaced.<time_t>
11091 1. rename new LVs to old LVs
11092 1. attach the new LVs (with the old names now) to the drbd device
11094 1. wait for sync across all devices
11096 1. for each modified disk:
11098 1. remove old LVs (which have the name name_replaces.<time_t>)
11100 Failures are not very well handled.
11105 # Step: check device activation
11106 self.lu.LogStep(1, steps_total, "Check device existence")
11107 self._CheckDisksExistence([self.other_node, self.target_node])
11108 self._CheckVolumeGroup([self.target_node, self.other_node])
11110 # Step: check other node consistency
11111 self.lu.LogStep(2, steps_total, "Check peer consistency")
11112 self._CheckDisksConsistency(self.other_node,
11113 self.other_node == self.instance.primary_node,
11116 # Step: create new storage
11117 self.lu.LogStep(3, steps_total, "Allocate new storage")
11118 iv_names = self._CreateNewStorage(self.target_node)
11120 # Step: for each lv, detach+rename*2+attach
11121 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11122 for dev, old_lvs, new_lvs in iv_names.itervalues():
11123 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11125 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11127 result.Raise("Can't detach drbd from local storage on node"
11128 " %s for device %s" % (self.target_node, dev.iv_name))
11130 #cfg.Update(instance)
11132 # ok, we created the new LVs, so now we know we have the needed
11133 # storage; as such, we proceed on the target node to rename
11134 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11135 # using the assumption that logical_id == physical_id (which in
11136 # turn is the unique_id on that node)
11138 # FIXME(iustin): use a better name for the replaced LVs
11139 temp_suffix = int(time.time())
11140 ren_fn = lambda d, suff: (d.physical_id[0],
11141 d.physical_id[1] + "_replaced-%s" % suff)
11143 # Build the rename list based on what LVs exist on the node
11144 rename_old_to_new = []
11145 for to_ren in old_lvs:
11146 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11147 if not result.fail_msg and result.payload:
11149 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11151 self.lu.LogInfo("Renaming the old LVs on the target node")
11152 result = self.rpc.call_blockdev_rename(self.target_node,
11154 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11156 # Now we rename the new LVs to the old LVs
11157 self.lu.LogInfo("Renaming the new LVs on the target node")
11158 rename_new_to_old = [(new, old.physical_id)
11159 for old, new in zip(old_lvs, new_lvs)]
11160 result = self.rpc.call_blockdev_rename(self.target_node,
11162 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11164 # Intermediate steps of in memory modifications
11165 for old, new in zip(old_lvs, new_lvs):
11166 new.logical_id = old.logical_id
11167 self.cfg.SetDiskID(new, self.target_node)
11169 # We need to modify old_lvs so that removal later removes the
11170 # right LVs, not the newly added ones; note that old_lvs is a
11172 for disk in old_lvs:
11173 disk.logical_id = ren_fn(disk, temp_suffix)
11174 self.cfg.SetDiskID(disk, self.target_node)
11176 # Now that the new lvs have the old name, we can add them to the device
11177 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11178 result = self.rpc.call_blockdev_addchildren(self.target_node,
11179 (dev, self.instance), new_lvs)
11180 msg = result.fail_msg
11182 for new_lv in new_lvs:
11183 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11186 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11187 hint=("cleanup manually the unused logical"
11189 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11191 cstep = itertools.count(5)
11193 if self.early_release:
11194 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11195 self._RemoveOldStorage(self.target_node, iv_names)
11196 # TODO: Check if releasing locks early still makes sense
11197 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11199 # Release all resource locks except those used by the instance
11200 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11201 keep=self.node_secondary_ip.keys())
11203 # Release all node locks while waiting for sync
11204 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11206 # TODO: Can the instance lock be downgraded here? Take the optional disk
11207 # shutdown in the caller into consideration.
11210 # This can fail as the old devices are degraded and _WaitForSync
11211 # does a combined result over all disks, so we don't check its return value
11212 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11213 _WaitForSync(self.lu, self.instance)
11215 # Check all devices manually
11216 self._CheckDevices(self.instance.primary_node, iv_names)
11218 # Step: remove old storage
11219 if not self.early_release:
11220 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11221 self._RemoveOldStorage(self.target_node, iv_names)
11223 def _ExecDrbd8Secondary(self, feedback_fn):
11224 """Replace the secondary node for DRBD 8.
11226 The algorithm for replace is quite complicated:
11227 - for all disks of the instance:
11228 - create new LVs on the new node with same names
11229 - shutdown the drbd device on the old secondary
11230 - disconnect the drbd network on the primary
11231 - create the drbd device on the new secondary
11232 - network attach the drbd on the primary, using an artifice:
11233 the drbd code for Attach() will connect to the network if it
11234 finds a device which is connected to the good local disks but
11235 not network enabled
11236 - wait for sync across all devices
11237 - remove all disks from the old secondary
11239 Failures are not very well handled.
11244 pnode = self.instance.primary_node
11246 # Step: check device activation
11247 self.lu.LogStep(1, steps_total, "Check device existence")
11248 self._CheckDisksExistence([self.instance.primary_node])
11249 self._CheckVolumeGroup([self.instance.primary_node])
11251 # Step: check other node consistency
11252 self.lu.LogStep(2, steps_total, "Check peer consistency")
11253 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11255 # Step: create new storage
11256 self.lu.LogStep(3, steps_total, "Allocate new storage")
11257 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11258 for idx, dev in enumerate(disks):
11259 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11260 (self.new_node, idx))
11261 # we pass force_create=True to force LVM creation
11262 for new_lv in dev.children:
11263 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11264 True, _GetInstanceInfoText(self.instance), False)
11266 # Step 4: dbrd minors and drbd setups changes
11267 # after this, we must manually remove the drbd minors on both the
11268 # error and the success paths
11269 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11270 minors = self.cfg.AllocateDRBDMinor([self.new_node
11271 for dev in self.instance.disks],
11272 self.instance.name)
11273 logging.debug("Allocated minors %r", minors)
11276 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11277 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11278 (self.new_node, idx))
11279 # create new devices on new_node; note that we create two IDs:
11280 # one without port, so the drbd will be activated without
11281 # networking information on the new node at this stage, and one
11282 # with network, for the latter activation in step 4
11283 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11284 if self.instance.primary_node == o_node1:
11287 assert self.instance.primary_node == o_node2, "Three-node instance?"
11290 new_alone_id = (self.instance.primary_node, self.new_node, None,
11291 p_minor, new_minor, o_secret)
11292 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11293 p_minor, new_minor, o_secret)
11295 iv_names[idx] = (dev, dev.children, new_net_id)
11296 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11298 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11299 logical_id=new_alone_id,
11300 children=dev.children,
11303 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11306 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11308 _GetInstanceInfoText(self.instance), False)
11309 except errors.GenericError:
11310 self.cfg.ReleaseDRBDMinors(self.instance.name)
11313 # We have new devices, shutdown the drbd on the old secondary
11314 for idx, dev in enumerate(self.instance.disks):
11315 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11316 self.cfg.SetDiskID(dev, self.target_node)
11317 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11318 (dev, self.instance)).fail_msg
11320 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11321 "node: %s" % (idx, msg),
11322 hint=("Please cleanup this device manually as"
11323 " soon as possible"))
11325 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11326 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11327 self.instance.disks)[pnode]
11329 msg = result.fail_msg
11331 # detaches didn't succeed (unlikely)
11332 self.cfg.ReleaseDRBDMinors(self.instance.name)
11333 raise errors.OpExecError("Can't detach the disks from the network on"
11334 " old node: %s" % (msg,))
11336 # if we managed to detach at least one, we update all the disks of
11337 # the instance to point to the new secondary
11338 self.lu.LogInfo("Updating instance configuration")
11339 for dev, _, new_logical_id in iv_names.itervalues():
11340 dev.logical_id = new_logical_id
11341 self.cfg.SetDiskID(dev, self.instance.primary_node)
11343 self.cfg.Update(self.instance, feedback_fn)
11345 # Release all node locks (the configuration has been updated)
11346 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11348 # and now perform the drbd attach
11349 self.lu.LogInfo("Attaching primary drbds to new secondary"
11350 " (standalone => connected)")
11351 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11353 self.node_secondary_ip,
11354 (self.instance.disks, self.instance),
11355 self.instance.name,
11357 for to_node, to_result in result.items():
11358 msg = to_result.fail_msg
11360 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11362 hint=("please do a gnt-instance info to see the"
11363 " status of disks"))
11365 cstep = itertools.count(5)
11367 if self.early_release:
11368 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11369 self._RemoveOldStorage(self.target_node, iv_names)
11370 # TODO: Check if releasing locks early still makes sense
11371 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11373 # Release all resource locks except those used by the instance
11374 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11375 keep=self.node_secondary_ip.keys())
11377 # TODO: Can the instance lock be downgraded here? Take the optional disk
11378 # shutdown in the caller into consideration.
11381 # This can fail as the old devices are degraded and _WaitForSync
11382 # does a combined result over all disks, so we don't check its return value
11383 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11384 _WaitForSync(self.lu, self.instance)
11386 # Check all devices manually
11387 self._CheckDevices(self.instance.primary_node, iv_names)
11389 # Step: remove old storage
11390 if not self.early_release:
11391 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11392 self._RemoveOldStorage(self.target_node, iv_names)
11395 class LURepairNodeStorage(NoHooksLU):
11396 """Repairs the volume group on a node.
11401 def CheckArguments(self):
11402 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11404 storage_type = self.op.storage_type
11406 if (constants.SO_FIX_CONSISTENCY not in
11407 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11408 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11409 " repaired" % storage_type,
11410 errors.ECODE_INVAL)
11412 def ExpandNames(self):
11413 self.needed_locks = {
11414 locking.LEVEL_NODE: [self.op.node_name],
11417 def _CheckFaultyDisks(self, instance, node_name):
11418 """Ensure faulty disks abort the opcode or at least warn."""
11420 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11422 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11423 " node '%s'" % (instance.name, node_name),
11424 errors.ECODE_STATE)
11425 except errors.OpPrereqError, err:
11426 if self.op.ignore_consistency:
11427 self.proc.LogWarning(str(err.args[0]))
11431 def CheckPrereq(self):
11432 """Check prerequisites.
11435 # Check whether any instance on this node has faulty disks
11436 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11437 if inst.admin_state != constants.ADMINST_UP:
11439 check_nodes = set(inst.all_nodes)
11440 check_nodes.discard(self.op.node_name)
11441 for inst_node_name in check_nodes:
11442 self._CheckFaultyDisks(inst, inst_node_name)
11444 def Exec(self, feedback_fn):
11445 feedback_fn("Repairing storage unit '%s' on %s ..." %
11446 (self.op.name, self.op.node_name))
11448 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11449 result = self.rpc.call_storage_execute(self.op.node_name,
11450 self.op.storage_type, st_args,
11452 constants.SO_FIX_CONSISTENCY)
11453 result.Raise("Failed to repair storage unit '%s' on %s" %
11454 (self.op.name, self.op.node_name))
11457 class LUNodeEvacuate(NoHooksLU):
11458 """Evacuates instances off a list of nodes.
11463 _MODE2IALLOCATOR = {
11464 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11465 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11466 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11468 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11469 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11470 constants.IALLOCATOR_NEVAC_MODES)
11472 def CheckArguments(self):
11473 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11475 def ExpandNames(self):
11476 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11478 if self.op.remote_node is not None:
11479 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11480 assert self.op.remote_node
11482 if self.op.remote_node == self.op.node_name:
11483 raise errors.OpPrereqError("Can not use evacuated node as a new"
11484 " secondary node", errors.ECODE_INVAL)
11486 if self.op.mode != constants.NODE_EVAC_SEC:
11487 raise errors.OpPrereqError("Without the use of an iallocator only"
11488 " secondary instances can be evacuated",
11489 errors.ECODE_INVAL)
11492 self.share_locks = _ShareAll()
11493 self.needed_locks = {
11494 locking.LEVEL_INSTANCE: [],
11495 locking.LEVEL_NODEGROUP: [],
11496 locking.LEVEL_NODE: [],
11499 # Determine nodes (via group) optimistically, needs verification once locks
11500 # have been acquired
11501 self.lock_nodes = self._DetermineNodes()
11503 def _DetermineNodes(self):
11504 """Gets the list of nodes to operate on.
11507 if self.op.remote_node is None:
11508 # Iallocator will choose any node(s) in the same group
11509 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11511 group_nodes = frozenset([self.op.remote_node])
11513 # Determine nodes to be locked
11514 return set([self.op.node_name]) | group_nodes
11516 def _DetermineInstances(self):
11517 """Builds list of instances to operate on.
11520 assert self.op.mode in constants.NODE_EVAC_MODES
11522 if self.op.mode == constants.NODE_EVAC_PRI:
11523 # Primary instances only
11524 inst_fn = _GetNodePrimaryInstances
11525 assert self.op.remote_node is None, \
11526 "Evacuating primary instances requires iallocator"
11527 elif self.op.mode == constants.NODE_EVAC_SEC:
11528 # Secondary instances only
11529 inst_fn = _GetNodeSecondaryInstances
11532 assert self.op.mode == constants.NODE_EVAC_ALL
11533 inst_fn = _GetNodeInstances
11534 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11536 raise errors.OpPrereqError("Due to an issue with the iallocator"
11537 " interface it is not possible to evacuate"
11538 " all instances at once; specify explicitly"
11539 " whether to evacuate primary or secondary"
11541 errors.ECODE_INVAL)
11543 return inst_fn(self.cfg, self.op.node_name)
11545 def DeclareLocks(self, level):
11546 if level == locking.LEVEL_INSTANCE:
11547 # Lock instances optimistically, needs verification once node and group
11548 # locks have been acquired
11549 self.needed_locks[locking.LEVEL_INSTANCE] = \
11550 set(i.name for i in self._DetermineInstances())
11552 elif level == locking.LEVEL_NODEGROUP:
11553 # Lock node groups for all potential target nodes optimistically, needs
11554 # verification once nodes have been acquired
11555 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11556 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11558 elif level == locking.LEVEL_NODE:
11559 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11561 def CheckPrereq(self):
11563 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11564 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11565 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11567 need_nodes = self._DetermineNodes()
11569 if not owned_nodes.issuperset(need_nodes):
11570 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11571 " locks were acquired, current nodes are"
11572 " are '%s', used to be '%s'; retry the"
11574 (self.op.node_name,
11575 utils.CommaJoin(need_nodes),
11576 utils.CommaJoin(owned_nodes)),
11577 errors.ECODE_STATE)
11579 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11580 if owned_groups != wanted_groups:
11581 raise errors.OpExecError("Node groups changed since locks were acquired,"
11582 " current groups are '%s', used to be '%s';"
11583 " retry the operation" %
11584 (utils.CommaJoin(wanted_groups),
11585 utils.CommaJoin(owned_groups)))
11587 # Determine affected instances
11588 self.instances = self._DetermineInstances()
11589 self.instance_names = [i.name for i in self.instances]
11591 if set(self.instance_names) != owned_instances:
11592 raise errors.OpExecError("Instances on node '%s' changed since locks"
11593 " were acquired, current instances are '%s',"
11594 " used to be '%s'; retry the operation" %
11595 (self.op.node_name,
11596 utils.CommaJoin(self.instance_names),
11597 utils.CommaJoin(owned_instances)))
11599 if self.instance_names:
11600 self.LogInfo("Evacuating instances from node '%s': %s",
11602 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11604 self.LogInfo("No instances to evacuate from node '%s'",
11607 if self.op.remote_node is not None:
11608 for i in self.instances:
11609 if i.primary_node == self.op.remote_node:
11610 raise errors.OpPrereqError("Node %s is the primary node of"
11611 " instance %s, cannot use it as"
11613 (self.op.remote_node, i.name),
11614 errors.ECODE_INVAL)
11616 def Exec(self, feedback_fn):
11617 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11619 if not self.instance_names:
11620 # No instances to evacuate
11623 elif self.op.iallocator is not None:
11624 # TODO: Implement relocation to other group
11625 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11626 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11627 instances=list(self.instance_names))
11629 ial.Run(self.op.iallocator)
11631 if not ial.success:
11632 raise errors.OpPrereqError("Can't compute node evacuation using"
11633 " iallocator '%s': %s" %
11634 (self.op.iallocator, ial.info),
11635 errors.ECODE_NORES)
11637 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11639 elif self.op.remote_node is not None:
11640 assert self.op.mode == constants.NODE_EVAC_SEC
11642 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11643 remote_node=self.op.remote_node,
11645 mode=constants.REPLACE_DISK_CHG,
11646 early_release=self.op.early_release)]
11647 for instance_name in self.instance_names
11651 raise errors.ProgrammerError("No iallocator or remote node")
11653 return ResultWithJobs(jobs)
11656 def _SetOpEarlyRelease(early_release, op):
11657 """Sets C{early_release} flag on opcodes if available.
11661 op.early_release = early_release
11662 except AttributeError:
11663 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11668 def _NodeEvacDest(use_nodes, group, nodes):
11669 """Returns group or nodes depending on caller's choice.
11673 return utils.CommaJoin(nodes)
11678 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11679 """Unpacks the result of change-group and node-evacuate iallocator requests.
11681 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11682 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11684 @type lu: L{LogicalUnit}
11685 @param lu: Logical unit instance
11686 @type alloc_result: tuple/list
11687 @param alloc_result: Result from iallocator
11688 @type early_release: bool
11689 @param early_release: Whether to release locks early if possible
11690 @type use_nodes: bool
11691 @param use_nodes: Whether to display node names instead of groups
11694 (moved, failed, jobs) = alloc_result
11697 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11698 for (name, reason) in failed)
11699 lu.LogWarning("Unable to evacuate instances %s", failreason)
11700 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11703 lu.LogInfo("Instances to be moved: %s",
11704 utils.CommaJoin("%s (to %s)" %
11705 (name, _NodeEvacDest(use_nodes, group, nodes))
11706 for (name, group, nodes) in moved))
11708 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11709 map(opcodes.OpCode.LoadOpCode, ops))
11713 class LUInstanceGrowDisk(LogicalUnit):
11714 """Grow a disk of an instance.
11717 HPATH = "disk-grow"
11718 HTYPE = constants.HTYPE_INSTANCE
11721 def ExpandNames(self):
11722 self._ExpandAndLockInstance()
11723 self.needed_locks[locking.LEVEL_NODE] = []
11724 self.needed_locks[locking.LEVEL_NODE_RES] = []
11725 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11726 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11728 def DeclareLocks(self, level):
11729 if level == locking.LEVEL_NODE:
11730 self._LockInstancesNodes()
11731 elif level == locking.LEVEL_NODE_RES:
11733 self.needed_locks[locking.LEVEL_NODE_RES] = \
11734 self.needed_locks[locking.LEVEL_NODE][:]
11736 def BuildHooksEnv(self):
11737 """Build hooks env.
11739 This runs on the master, the primary and all the secondaries.
11743 "DISK": self.op.disk,
11744 "AMOUNT": self.op.amount,
11745 "ABSOLUTE": self.op.absolute,
11747 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11750 def BuildHooksNodes(self):
11751 """Build hooks nodes.
11754 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11757 def CheckPrereq(self):
11758 """Check prerequisites.
11760 This checks that the instance is in the cluster.
11763 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11764 assert instance is not None, \
11765 "Cannot retrieve locked instance %s" % self.op.instance_name
11766 nodenames = list(instance.all_nodes)
11767 for node in nodenames:
11768 _CheckNodeOnline(self, node)
11770 self.instance = instance
11772 if instance.disk_template not in constants.DTS_GROWABLE:
11773 raise errors.OpPrereqError("Instance's disk layout does not support"
11774 " growing", errors.ECODE_INVAL)
11776 self.disk = instance.FindDisk(self.op.disk)
11778 if self.op.absolute:
11779 self.target = self.op.amount
11780 self.delta = self.target - self.disk.size
11782 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11783 "current disk size (%s)" %
11784 (utils.FormatUnit(self.target, "h"),
11785 utils.FormatUnit(self.disk.size, "h")),
11786 errors.ECODE_STATE)
11788 self.delta = self.op.amount
11789 self.target = self.disk.size + self.delta
11791 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11792 utils.FormatUnit(self.delta, "h"),
11793 errors.ECODE_INVAL)
11795 if instance.disk_template not in (constants.DT_FILE,
11796 constants.DT_SHARED_FILE,
11798 # TODO: check the free disk space for file, when that feature will be
11800 _CheckNodesFreeDiskPerVG(self, nodenames,
11801 self.disk.ComputeGrowth(self.delta))
11803 def Exec(self, feedback_fn):
11804 """Execute disk grow.
11807 instance = self.instance
11810 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11811 assert (self.owned_locks(locking.LEVEL_NODE) ==
11812 self.owned_locks(locking.LEVEL_NODE_RES))
11814 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11816 raise errors.OpExecError("Cannot activate block device to grow")
11818 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11819 (self.op.disk, instance.name,
11820 utils.FormatUnit(self.delta, "h"),
11821 utils.FormatUnit(self.target, "h")))
11823 # First run all grow ops in dry-run mode
11824 for node in instance.all_nodes:
11825 self.cfg.SetDiskID(disk, node)
11826 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11828 result.Raise("Grow request failed to node %s" % node)
11830 # We know that (as far as we can test) operations across different
11831 # nodes will succeed, time to run it for real on the backing storage
11832 for node in instance.all_nodes:
11833 self.cfg.SetDiskID(disk, node)
11834 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11836 result.Raise("Grow request failed to node %s" % node)
11838 # And now execute it for logical storage, on the primary node
11839 node = instance.primary_node
11840 self.cfg.SetDiskID(disk, node)
11841 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11843 result.Raise("Grow request failed to node %s" % node)
11845 disk.RecordGrow(self.delta)
11846 self.cfg.Update(instance, feedback_fn)
11848 # Changes have been recorded, release node lock
11849 _ReleaseLocks(self, locking.LEVEL_NODE)
11851 # Downgrade lock while waiting for sync
11852 self.glm.downgrade(locking.LEVEL_INSTANCE)
11854 if self.op.wait_for_sync:
11855 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11857 self.proc.LogWarning("Disk sync-ing has not returned a good"
11858 " status; please check the instance")
11859 if instance.admin_state != constants.ADMINST_UP:
11860 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11861 elif instance.admin_state != constants.ADMINST_UP:
11862 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11863 " not supposed to be running because no wait for"
11864 " sync mode was requested")
11866 assert self.owned_locks(locking.LEVEL_NODE_RES)
11867 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11870 class LUInstanceQueryData(NoHooksLU):
11871 """Query runtime instance data.
11876 def ExpandNames(self):
11877 self.needed_locks = {}
11879 # Use locking if requested or when non-static information is wanted
11880 if not (self.op.static or self.op.use_locking):
11881 self.LogWarning("Non-static data requested, locks need to be acquired")
11882 self.op.use_locking = True
11884 if self.op.instances or not self.op.use_locking:
11885 # Expand instance names right here
11886 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11888 # Will use acquired locks
11889 self.wanted_names = None
11891 if self.op.use_locking:
11892 self.share_locks = _ShareAll()
11894 if self.wanted_names is None:
11895 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11897 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11899 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11900 self.needed_locks[locking.LEVEL_NODE] = []
11901 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11903 def DeclareLocks(self, level):
11904 if self.op.use_locking:
11905 if level == locking.LEVEL_NODEGROUP:
11906 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11908 # Lock all groups used by instances optimistically; this requires going
11909 # via the node before it's locked, requiring verification later on
11910 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11911 frozenset(group_uuid
11912 for instance_name in owned_instances
11914 self.cfg.GetInstanceNodeGroups(instance_name))
11916 elif level == locking.LEVEL_NODE:
11917 self._LockInstancesNodes()
11919 def CheckPrereq(self):
11920 """Check prerequisites.
11922 This only checks the optional instance list against the existing names.
11925 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11926 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11927 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11929 if self.wanted_names is None:
11930 assert self.op.use_locking, "Locking was not used"
11931 self.wanted_names = owned_instances
11933 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11935 if self.op.use_locking:
11936 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11939 assert not (owned_instances or owned_groups or owned_nodes)
11941 self.wanted_instances = instances.values()
11943 def _ComputeBlockdevStatus(self, node, instance, dev):
11944 """Returns the status of a block device
11947 if self.op.static or not node:
11950 self.cfg.SetDiskID(dev, node)
11952 result = self.rpc.call_blockdev_find(node, dev)
11956 result.Raise("Can't compute disk status for %s" % instance.name)
11958 status = result.payload
11962 return (status.dev_path, status.major, status.minor,
11963 status.sync_percent, status.estimated_time,
11964 status.is_degraded, status.ldisk_status)
11966 def _ComputeDiskStatus(self, instance, snode, dev):
11967 """Compute block device status.
11970 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11972 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11974 def _ComputeDiskStatusInner(self, instance, snode, dev):
11975 """Compute block device status.
11977 @attention: The device has to be annotated already.
11980 if dev.dev_type in constants.LDS_DRBD:
11981 # we change the snode then (otherwise we use the one passed in)
11982 if dev.logical_id[0] == instance.primary_node:
11983 snode = dev.logical_id[1]
11985 snode = dev.logical_id[0]
11987 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11989 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11992 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11999 "iv_name": dev.iv_name,
12000 "dev_type": dev.dev_type,
12001 "logical_id": dev.logical_id,
12002 "physical_id": dev.physical_id,
12003 "pstatus": dev_pstatus,
12004 "sstatus": dev_sstatus,
12005 "children": dev_children,
12010 def Exec(self, feedback_fn):
12011 """Gather and return data"""
12014 cluster = self.cfg.GetClusterInfo()
12016 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12017 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12019 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12020 for node in nodes.values()))
12022 group2name_fn = lambda uuid: groups[uuid].name
12024 for instance in self.wanted_instances:
12025 pnode = nodes[instance.primary_node]
12027 if self.op.static or pnode.offline:
12028 remote_state = None
12030 self.LogWarning("Primary node %s is marked offline, returning static"
12031 " information only for instance %s" %
12032 (pnode.name, instance.name))
12034 remote_info = self.rpc.call_instance_info(instance.primary_node,
12036 instance.hypervisor)
12037 remote_info.Raise("Error checking node %s" % instance.primary_node)
12038 remote_info = remote_info.payload
12039 if remote_info and "state" in remote_info:
12040 remote_state = "up"
12042 if instance.admin_state == constants.ADMINST_UP:
12043 remote_state = "down"
12045 remote_state = instance.admin_state
12047 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12050 snodes_group_uuids = [nodes[snode_name].group
12051 for snode_name in instance.secondary_nodes]
12053 result[instance.name] = {
12054 "name": instance.name,
12055 "config_state": instance.admin_state,
12056 "run_state": remote_state,
12057 "pnode": instance.primary_node,
12058 "pnode_group_uuid": pnode.group,
12059 "pnode_group_name": group2name_fn(pnode.group),
12060 "snodes": instance.secondary_nodes,
12061 "snodes_group_uuids": snodes_group_uuids,
12062 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12064 # this happens to be the same format used for hooks
12065 "nics": _NICListToTuple(self, instance.nics),
12066 "disk_template": instance.disk_template,
12068 "hypervisor": instance.hypervisor,
12069 "network_port": instance.network_port,
12070 "hv_instance": instance.hvparams,
12071 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12072 "be_instance": instance.beparams,
12073 "be_actual": cluster.FillBE(instance),
12074 "os_instance": instance.osparams,
12075 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12076 "serial_no": instance.serial_no,
12077 "mtime": instance.mtime,
12078 "ctime": instance.ctime,
12079 "uuid": instance.uuid,
12085 def PrepareContainerMods(mods, private_fn):
12086 """Prepares a list of container modifications by adding a private data field.
12088 @type mods: list of tuples; (operation, index, parameters)
12089 @param mods: List of modifications
12090 @type private_fn: callable or None
12091 @param private_fn: Callable for constructing a private data field for a
12096 if private_fn is None:
12101 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12104 #: Type description for changes as returned by L{ApplyContainerMods}'s
12106 _TApplyContModsCbChanges = \
12107 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12108 ht.TNonEmptyString,
12113 def ApplyContainerMods(kind, container, chgdesc, mods,
12114 create_fn, modify_fn, remove_fn):
12115 """Applies descriptions in C{mods} to C{container}.
12118 @param kind: One-word item description
12119 @type container: list
12120 @param container: Container to modify
12121 @type chgdesc: None or list
12122 @param chgdesc: List of applied changes
12124 @param mods: Modifications as returned by L{PrepareContainerMods}
12125 @type create_fn: callable
12126 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12127 receives absolute item index, parameters and private data object as added
12128 by L{PrepareContainerMods}, returns tuple containing new item and changes
12130 @type modify_fn: callable
12131 @param modify_fn: Callback for modifying an existing item
12132 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12133 and private data object as added by L{PrepareContainerMods}, returns
12135 @type remove_fn: callable
12136 @param remove_fn: Callback on removing item; receives absolute item index,
12137 item and private data object as added by L{PrepareContainerMods}
12140 for (op, idx, params, private) in mods:
12143 absidx = len(container) - 1
12145 raise IndexError("Not accepting negative indices other than -1")
12146 elif idx > len(container):
12147 raise IndexError("Got %s index %s, but there are only %s" %
12148 (kind, idx, len(container)))
12154 if op == constants.DDM_ADD:
12155 # Calculate where item will be added
12157 addidx = len(container)
12161 if create_fn is None:
12164 (item, changes) = create_fn(addidx, params, private)
12167 container.append(item)
12170 assert idx <= len(container)
12171 # list.insert does so before the specified index
12172 container.insert(idx, item)
12174 # Retrieve existing item
12176 item = container[absidx]
12178 raise IndexError("Invalid %s index %s" % (kind, idx))
12180 if op == constants.DDM_REMOVE:
12183 if remove_fn is not None:
12184 remove_fn(absidx, item, private)
12186 changes = [("%s/%s" % (kind, absidx), "remove")]
12188 assert container[absidx] == item
12189 del container[absidx]
12190 elif op == constants.DDM_MODIFY:
12191 if modify_fn is not None:
12192 changes = modify_fn(absidx, item, params, private)
12194 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12196 assert _TApplyContModsCbChanges(changes)
12198 if not (chgdesc is None or changes is None):
12199 chgdesc.extend(changes)
12202 def _UpdateIvNames(base_index, disks):
12203 """Updates the C{iv_name} attribute of disks.
12205 @type disks: list of L{objects.Disk}
12208 for (idx, disk) in enumerate(disks):
12209 disk.iv_name = "disk/%s" % (base_index + idx, )
12212 class _InstNicModPrivate:
12213 """Data structure for network interface modifications.
12215 Used by L{LUInstanceSetParams}.
12218 def __init__(self):
12223 class LUInstanceSetParams(LogicalUnit):
12224 """Modifies an instances's parameters.
12227 HPATH = "instance-modify"
12228 HTYPE = constants.HTYPE_INSTANCE
12232 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12233 assert ht.TList(mods)
12234 assert not mods or len(mods[0]) in (2, 3)
12236 if mods and len(mods[0]) == 2:
12240 for op, params in mods:
12241 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12242 result.append((op, -1, params))
12246 raise errors.OpPrereqError("Only one %s add or remove operation is"
12247 " supported at a time" % kind,
12248 errors.ECODE_INVAL)
12250 result.append((constants.DDM_MODIFY, op, params))
12252 assert verify_fn(result)
12259 def _CheckMods(kind, mods, key_types, item_fn):
12260 """Ensures requested disk/NIC modifications are valid.
12263 for (op, _, params) in mods:
12264 assert ht.TDict(params)
12266 utils.ForceDictType(params, key_types)
12268 if op == constants.DDM_REMOVE:
12270 raise errors.OpPrereqError("No settings should be passed when"
12271 " removing a %s" % kind,
12272 errors.ECODE_INVAL)
12273 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12274 item_fn(op, params)
12276 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12279 def _VerifyDiskModification(op, params):
12280 """Verifies a disk modification.
12283 if op == constants.DDM_ADD:
12284 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12285 if mode not in constants.DISK_ACCESS_SET:
12286 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12287 errors.ECODE_INVAL)
12289 size = params.get(constants.IDISK_SIZE, None)
12291 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12292 constants.IDISK_SIZE, errors.ECODE_INVAL)
12296 except (TypeError, ValueError), err:
12297 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12298 errors.ECODE_INVAL)
12300 params[constants.IDISK_SIZE] = size
12302 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12303 raise errors.OpPrereqError("Disk size change not possible, use"
12304 " grow-disk", errors.ECODE_INVAL)
12307 def _VerifyNicModification(op, params):
12308 """Verifies a network interface modification.
12311 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12312 ip = params.get(constants.INIC_IP, None)
12315 elif ip.lower() == constants.VALUE_NONE:
12316 params[constants.INIC_IP] = None
12317 elif not netutils.IPAddress.IsValid(ip):
12318 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12319 errors.ECODE_INVAL)
12321 bridge = params.get("bridge", None)
12322 link = params.get(constants.INIC_LINK, None)
12323 if bridge and link:
12324 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12325 " at the same time", errors.ECODE_INVAL)
12326 elif bridge and bridge.lower() == constants.VALUE_NONE:
12327 params["bridge"] = None
12328 elif link and link.lower() == constants.VALUE_NONE:
12329 params[constants.INIC_LINK] = None
12331 if op == constants.DDM_ADD:
12332 macaddr = params.get(constants.INIC_MAC, None)
12333 if macaddr is None:
12334 params[constants.INIC_MAC] = constants.VALUE_AUTO
12336 if constants.INIC_MAC in params:
12337 macaddr = params[constants.INIC_MAC]
12338 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12339 macaddr = utils.NormalizeAndValidateMac(macaddr)
12341 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12342 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12343 " modifying an existing NIC",
12344 errors.ECODE_INVAL)
12346 def CheckArguments(self):
12347 if not (self.op.nics or self.op.disks or self.op.disk_template or
12348 self.op.hvparams or self.op.beparams or self.op.os_name or
12349 self.op.offline is not None or self.op.runtime_mem):
12350 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12352 if self.op.hvparams:
12353 _CheckGlobalHvParams(self.op.hvparams)
12355 self.op.disks = self._UpgradeDiskNicMods(
12356 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12357 self.op.nics = self._UpgradeDiskNicMods(
12358 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12360 # Check disk modifications
12361 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12362 self._VerifyDiskModification)
12364 if self.op.disks and self.op.disk_template is not None:
12365 raise errors.OpPrereqError("Disk template conversion and other disk"
12366 " changes not supported at the same time",
12367 errors.ECODE_INVAL)
12369 if (self.op.disk_template and
12370 self.op.disk_template in constants.DTS_INT_MIRROR and
12371 self.op.remote_node is None):
12372 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12373 " one requires specifying a secondary node",
12374 errors.ECODE_INVAL)
12376 # Check NIC modifications
12377 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12378 self._VerifyNicModification)
12380 def ExpandNames(self):
12381 self._ExpandAndLockInstance()
12382 # Can't even acquire node locks in shared mode as upcoming changes in
12383 # Ganeti 2.6 will start to modify the node object on disk conversion
12384 self.needed_locks[locking.LEVEL_NODE] = []
12385 self.needed_locks[locking.LEVEL_NODE_RES] = []
12386 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12388 def DeclareLocks(self, level):
12389 # TODO: Acquire group lock in shared mode (disk parameters)
12390 if level == locking.LEVEL_NODE:
12391 self._LockInstancesNodes()
12392 if self.op.disk_template and self.op.remote_node:
12393 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12394 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12395 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12397 self.needed_locks[locking.LEVEL_NODE_RES] = \
12398 self.needed_locks[locking.LEVEL_NODE][:]
12400 def BuildHooksEnv(self):
12401 """Build hooks env.
12403 This runs on the master, primary and secondaries.
12407 if constants.BE_MINMEM in self.be_new:
12408 args["minmem"] = self.be_new[constants.BE_MINMEM]
12409 if constants.BE_MAXMEM in self.be_new:
12410 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12411 if constants.BE_VCPUS in self.be_new:
12412 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12413 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12414 # information at all.
12416 if self._new_nics is not None:
12419 for nic in self._new_nics:
12420 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12421 mode = nicparams[constants.NIC_MODE]
12422 link = nicparams[constants.NIC_LINK]
12423 nics.append((nic.ip, nic.mac, mode, link))
12425 args["nics"] = nics
12427 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12428 if self.op.disk_template:
12429 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12430 if self.op.runtime_mem:
12431 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12435 def BuildHooksNodes(self):
12436 """Build hooks nodes.
12439 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12442 def _PrepareNicModification(self, params, private, old_ip, old_params,
12444 update_params_dict = dict([(key, params[key])
12445 for key in constants.NICS_PARAMETERS
12448 if "bridge" in params:
12449 update_params_dict[constants.NIC_LINK] = params["bridge"]
12451 new_params = _GetUpdatedParams(old_params, update_params_dict)
12452 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12454 new_filled_params = cluster.SimpleFillNIC(new_params)
12455 objects.NIC.CheckParameterSyntax(new_filled_params)
12457 new_mode = new_filled_params[constants.NIC_MODE]
12458 if new_mode == constants.NIC_MODE_BRIDGED:
12459 bridge = new_filled_params[constants.NIC_LINK]
12460 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12462 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12464 self.warn.append(msg)
12466 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12468 elif new_mode == constants.NIC_MODE_ROUTED:
12469 ip = params.get(constants.INIC_IP, old_ip)
12471 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12472 " on a routed NIC", errors.ECODE_INVAL)
12474 if constants.INIC_MAC in params:
12475 mac = params[constants.INIC_MAC]
12477 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12478 errors.ECODE_INVAL)
12479 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12480 # otherwise generate the MAC address
12481 params[constants.INIC_MAC] = \
12482 self.cfg.GenerateMAC(self.proc.GetECId())
12484 # or validate/reserve the current one
12486 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12487 except errors.ReservationError:
12488 raise errors.OpPrereqError("MAC address '%s' already in use"
12489 " in cluster" % mac,
12490 errors.ECODE_NOTUNIQUE)
12492 private.params = new_params
12493 private.filled = new_filled_params
12495 def CheckPrereq(self):
12496 """Check prerequisites.
12498 This only checks the instance list against the existing names.
12501 # checking the new params on the primary/secondary nodes
12503 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12504 cluster = self.cluster = self.cfg.GetClusterInfo()
12505 assert self.instance is not None, \
12506 "Cannot retrieve locked instance %s" % self.op.instance_name
12507 pnode = instance.primary_node
12508 nodelist = list(instance.all_nodes)
12509 pnode_info = self.cfg.GetNodeInfo(pnode)
12510 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12512 # Prepare disk/NIC modifications
12513 self.diskmod = PrepareContainerMods(self.op.disks, None)
12514 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12517 if self.op.os_name and not self.op.force:
12518 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12519 self.op.force_variant)
12520 instance_os = self.op.os_name
12522 instance_os = instance.os
12524 assert not (self.op.disk_template and self.op.disks), \
12525 "Can't modify disk template and apply disk changes at the same time"
12527 if self.op.disk_template:
12528 if instance.disk_template == self.op.disk_template:
12529 raise errors.OpPrereqError("Instance already has disk template %s" %
12530 instance.disk_template, errors.ECODE_INVAL)
12532 if (instance.disk_template,
12533 self.op.disk_template) not in self._DISK_CONVERSIONS:
12534 raise errors.OpPrereqError("Unsupported disk template conversion from"
12535 " %s to %s" % (instance.disk_template,
12536 self.op.disk_template),
12537 errors.ECODE_INVAL)
12538 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12539 msg="cannot change disk template")
12540 if self.op.disk_template in constants.DTS_INT_MIRROR:
12541 if self.op.remote_node == pnode:
12542 raise errors.OpPrereqError("Given new secondary node %s is the same"
12543 " as the primary node of the instance" %
12544 self.op.remote_node, errors.ECODE_STATE)
12545 _CheckNodeOnline(self, self.op.remote_node)
12546 _CheckNodeNotDrained(self, self.op.remote_node)
12547 # FIXME: here we assume that the old instance type is DT_PLAIN
12548 assert instance.disk_template == constants.DT_PLAIN
12549 disks = [{constants.IDISK_SIZE: d.size,
12550 constants.IDISK_VG: d.logical_id[0]}
12551 for d in instance.disks]
12552 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12553 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12555 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12556 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12557 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12558 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12559 ignore=self.op.ignore_ipolicy)
12560 if pnode_info.group != snode_info.group:
12561 self.LogWarning("The primary and secondary nodes are in two"
12562 " different node groups; the disk parameters"
12563 " from the first disk's node group will be"
12566 # hvparams processing
12567 if self.op.hvparams:
12568 hv_type = instance.hypervisor
12569 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12570 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12571 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12574 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12575 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12576 self.hv_proposed = self.hv_new = hv_new # the new actual values
12577 self.hv_inst = i_hvdict # the new dict (without defaults)
12579 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12581 self.hv_new = self.hv_inst = {}
12583 # beparams processing
12584 if self.op.beparams:
12585 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12587 objects.UpgradeBeParams(i_bedict)
12588 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12589 be_new = cluster.SimpleFillBE(i_bedict)
12590 self.be_proposed = self.be_new = be_new # the new actual values
12591 self.be_inst = i_bedict # the new dict (without defaults)
12593 self.be_new = self.be_inst = {}
12594 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12595 be_old = cluster.FillBE(instance)
12597 # CPU param validation -- checking every time a parameter is
12598 # changed to cover all cases where either CPU mask or vcpus have
12600 if (constants.BE_VCPUS in self.be_proposed and
12601 constants.HV_CPU_MASK in self.hv_proposed):
12603 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12604 # Verify mask is consistent with number of vCPUs. Can skip this
12605 # test if only 1 entry in the CPU mask, which means same mask
12606 # is applied to all vCPUs.
12607 if (len(cpu_list) > 1 and
12608 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12609 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12611 (self.be_proposed[constants.BE_VCPUS],
12612 self.hv_proposed[constants.HV_CPU_MASK]),
12613 errors.ECODE_INVAL)
12615 # Only perform this test if a new CPU mask is given
12616 if constants.HV_CPU_MASK in self.hv_new:
12617 # Calculate the largest CPU number requested
12618 max_requested_cpu = max(map(max, cpu_list))
12619 # Check that all of the instance's nodes have enough physical CPUs to
12620 # satisfy the requested CPU mask
12621 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12622 max_requested_cpu + 1, instance.hypervisor)
12624 # osparams processing
12625 if self.op.osparams:
12626 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12627 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12628 self.os_inst = i_osdict # the new dict (without defaults)
12634 #TODO(dynmem): do the appropriate check involving MINMEM
12635 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12636 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12637 mem_check_list = [pnode]
12638 if be_new[constants.BE_AUTO_BALANCE]:
12639 # either we changed auto_balance to yes or it was from before
12640 mem_check_list.extend(instance.secondary_nodes)
12641 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12642 instance.hypervisor)
12643 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12644 [instance.hypervisor])
12645 pninfo = nodeinfo[pnode]
12646 msg = pninfo.fail_msg
12648 # Assume the primary node is unreachable and go ahead
12649 self.warn.append("Can't get info from primary node %s: %s" %
12652 (_, _, (pnhvinfo, )) = pninfo.payload
12653 if not isinstance(pnhvinfo.get("memory_free", None), int):
12654 self.warn.append("Node data from primary node %s doesn't contain"
12655 " free memory information" % pnode)
12656 elif instance_info.fail_msg:
12657 self.warn.append("Can't get instance runtime information: %s" %
12658 instance_info.fail_msg)
12660 if instance_info.payload:
12661 current_mem = int(instance_info.payload["memory"])
12663 # Assume instance not running
12664 # (there is a slight race condition here, but it's not very
12665 # probable, and we have no other way to check)
12666 # TODO: Describe race condition
12668 #TODO(dynmem): do the appropriate check involving MINMEM
12669 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12670 pnhvinfo["memory_free"])
12672 raise errors.OpPrereqError("This change will prevent the instance"
12673 " from starting, due to %d MB of memory"
12674 " missing on its primary node" %
12675 miss_mem, errors.ECODE_NORES)
12677 if be_new[constants.BE_AUTO_BALANCE]:
12678 for node, nres in nodeinfo.items():
12679 if node not in instance.secondary_nodes:
12681 nres.Raise("Can't get info from secondary node %s" % node,
12682 prereq=True, ecode=errors.ECODE_STATE)
12683 (_, _, (nhvinfo, )) = nres.payload
12684 if not isinstance(nhvinfo.get("memory_free", None), int):
12685 raise errors.OpPrereqError("Secondary node %s didn't return free"
12686 " memory information" % node,
12687 errors.ECODE_STATE)
12688 #TODO(dynmem): do the appropriate check involving MINMEM
12689 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12690 raise errors.OpPrereqError("This change will prevent the instance"
12691 " from failover to its secondary node"
12692 " %s, due to not enough memory" % node,
12693 errors.ECODE_STATE)
12695 if self.op.runtime_mem:
12696 remote_info = self.rpc.call_instance_info(instance.primary_node,
12698 instance.hypervisor)
12699 remote_info.Raise("Error checking node %s" % instance.primary_node)
12700 if not remote_info.payload: # not running already
12701 raise errors.OpPrereqError("Instance %s is not running" %
12702 instance.name, errors.ECODE_STATE)
12704 current_memory = remote_info.payload["memory"]
12705 if (not self.op.force and
12706 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12707 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12708 raise errors.OpPrereqError("Instance %s must have memory between %d"
12709 " and %d MB of memory unless --force is"
12712 self.be_proposed[constants.BE_MINMEM],
12713 self.be_proposed[constants.BE_MAXMEM]),
12714 errors.ECODE_INVAL)
12716 if self.op.runtime_mem > current_memory:
12717 _CheckNodeFreeMemory(self, instance.primary_node,
12718 "ballooning memory for instance %s" %
12720 self.op.memory - current_memory,
12721 instance.hypervisor)
12723 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12724 raise errors.OpPrereqError("Disk operations not supported for"
12725 " diskless instances", errors.ECODE_INVAL)
12727 def _PrepareNicCreate(_, params, private):
12728 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12729 return (None, None)
12731 def _PrepareNicMod(_, nic, params, private):
12732 self._PrepareNicModification(params, private, nic.ip,
12733 nic.nicparams, cluster, pnode)
12736 # Verify NIC changes (operating on copy)
12737 nics = instance.nics[:]
12738 ApplyContainerMods("NIC", nics, None, self.nicmod,
12739 _PrepareNicCreate, _PrepareNicMod, None)
12740 if len(nics) > constants.MAX_NICS:
12741 raise errors.OpPrereqError("Instance has too many network interfaces"
12742 " (%d), cannot add more" % constants.MAX_NICS,
12743 errors.ECODE_STATE)
12745 # Verify disk changes (operating on a copy)
12746 disks = instance.disks[:]
12747 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12748 if len(disks) > constants.MAX_DISKS:
12749 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12750 " more" % constants.MAX_DISKS,
12751 errors.ECODE_STATE)
12753 if self.op.offline is not None:
12754 if self.op.offline:
12755 msg = "can't change to offline"
12757 msg = "can't change to online"
12758 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12760 # Pre-compute NIC changes (necessary to use result in hooks)
12761 self._nic_chgdesc = []
12763 # Operate on copies as this is still in prereq
12764 nics = [nic.Copy() for nic in instance.nics]
12765 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12766 self._CreateNewNic, self._ApplyNicMods, None)
12767 self._new_nics = nics
12769 self._new_nics = None
12771 def _ConvertPlainToDrbd(self, feedback_fn):
12772 """Converts an instance from plain to drbd.
12775 feedback_fn("Converting template to drbd")
12776 instance = self.instance
12777 pnode = instance.primary_node
12778 snode = self.op.remote_node
12780 assert instance.disk_template == constants.DT_PLAIN
12782 # create a fake disk info for _GenerateDiskTemplate
12783 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12784 constants.IDISK_VG: d.logical_id[0]}
12785 for d in instance.disks]
12786 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12787 instance.name, pnode, [snode],
12788 disk_info, None, None, 0, feedback_fn,
12790 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12792 info = _GetInstanceInfoText(instance)
12793 feedback_fn("Creating additional volumes...")
12794 # first, create the missing data and meta devices
12795 for disk in anno_disks:
12796 # unfortunately this is... not too nice
12797 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12799 for child in disk.children:
12800 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12801 # at this stage, all new LVs have been created, we can rename the
12803 feedback_fn("Renaming original volumes...")
12804 rename_list = [(o, n.children[0].logical_id)
12805 for (o, n) in zip(instance.disks, new_disks)]
12806 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12807 result.Raise("Failed to rename original LVs")
12809 feedback_fn("Initializing DRBD devices...")
12810 # all child devices are in place, we can now create the DRBD devices
12811 for disk in anno_disks:
12812 for node in [pnode, snode]:
12813 f_create = node == pnode
12814 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12816 # at this point, the instance has been modified
12817 instance.disk_template = constants.DT_DRBD8
12818 instance.disks = new_disks
12819 self.cfg.Update(instance, feedback_fn)
12821 # Release node locks while waiting for sync
12822 _ReleaseLocks(self, locking.LEVEL_NODE)
12824 # disks are created, waiting for sync
12825 disk_abort = not _WaitForSync(self, instance,
12826 oneshot=not self.op.wait_for_sync)
12828 raise errors.OpExecError("There are some degraded disks for"
12829 " this instance, please cleanup manually")
12831 # Node resource locks will be released by caller
12833 def _ConvertDrbdToPlain(self, feedback_fn):
12834 """Converts an instance from drbd to plain.
12837 instance = self.instance
12839 assert len(instance.secondary_nodes) == 1
12840 assert instance.disk_template == constants.DT_DRBD8
12842 pnode = instance.primary_node
12843 snode = instance.secondary_nodes[0]
12844 feedback_fn("Converting template to plain")
12846 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12847 new_disks = [d.children[0] for d in instance.disks]
12849 # copy over size and mode
12850 for parent, child in zip(old_disks, new_disks):
12851 child.size = parent.size
12852 child.mode = parent.mode
12854 # this is a DRBD disk, return its port to the pool
12855 # NOTE: this must be done right before the call to cfg.Update!
12856 for disk in old_disks:
12857 tcp_port = disk.logical_id[2]
12858 self.cfg.AddTcpUdpPort(tcp_port)
12860 # update instance structure
12861 instance.disks = new_disks
12862 instance.disk_template = constants.DT_PLAIN
12863 self.cfg.Update(instance, feedback_fn)
12865 # Release locks in case removing disks takes a while
12866 _ReleaseLocks(self, locking.LEVEL_NODE)
12868 feedback_fn("Removing volumes on the secondary node...")
12869 for disk in old_disks:
12870 self.cfg.SetDiskID(disk, snode)
12871 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12873 self.LogWarning("Could not remove block device %s on node %s,"
12874 " continuing anyway: %s", disk.iv_name, snode, msg)
12876 feedback_fn("Removing unneeded volumes on the primary node...")
12877 for idx, disk in enumerate(old_disks):
12878 meta = disk.children[1]
12879 self.cfg.SetDiskID(meta, pnode)
12880 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12882 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12883 " continuing anyway: %s", idx, pnode, msg)
12885 def _CreateNewDisk(self, idx, params, _):
12886 """Creates a new disk.
12889 instance = self.instance
12892 if instance.disk_template in constants.DTS_FILEBASED:
12893 (file_driver, file_path) = instance.disks[0].logical_id
12894 file_path = os.path.dirname(file_path)
12896 file_driver = file_path = None
12899 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12900 instance.primary_node, instance.secondary_nodes,
12901 [params], file_path, file_driver, idx,
12902 self.Log, self.diskparams)[0]
12904 info = _GetInstanceInfoText(instance)
12906 logging.info("Creating volume %s for instance %s",
12907 disk.iv_name, instance.name)
12908 # Note: this needs to be kept in sync with _CreateDisks
12910 for node in instance.all_nodes:
12911 f_create = (node == instance.primary_node)
12913 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12914 except errors.OpExecError, err:
12915 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12916 disk.iv_name, disk, node, err)
12919 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12923 def _ModifyDisk(idx, disk, params, _):
12924 """Modifies a disk.
12927 disk.mode = params[constants.IDISK_MODE]
12930 ("disk.mode/%d" % idx, disk.mode),
12933 def _RemoveDisk(self, idx, root, _):
12937 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12938 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12939 self.cfg.SetDiskID(disk, node)
12940 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12942 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12943 " continuing anyway", idx, node, msg)
12945 # if this is a DRBD disk, return its port to the pool
12946 if root.dev_type in constants.LDS_DRBD:
12947 self.cfg.AddTcpUdpPort(root.logical_id[2])
12950 def _CreateNewNic(idx, params, private):
12951 """Creates data structure for a new network interface.
12954 mac = params[constants.INIC_MAC]
12955 ip = params.get(constants.INIC_IP, None)
12956 nicparams = private.params
12958 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12960 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12961 (mac, ip, private.filled[constants.NIC_MODE],
12962 private.filled[constants.NIC_LINK])),
12966 def _ApplyNicMods(idx, nic, params, private):
12967 """Modifies a network interface.
12972 for key in [constants.INIC_MAC, constants.INIC_IP]:
12974 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12975 setattr(nic, key, params[key])
12978 nic.nicparams = private.params
12980 for (key, val) in params.items():
12981 changes.append(("nic.%s/%d" % (key, idx), val))
12985 def Exec(self, feedback_fn):
12986 """Modifies an instance.
12988 All parameters take effect only at the next restart of the instance.
12991 # Process here the warnings from CheckPrereq, as we don't have a
12992 # feedback_fn there.
12993 # TODO: Replace with self.LogWarning
12994 for warn in self.warn:
12995 feedback_fn("WARNING: %s" % warn)
12997 assert ((self.op.disk_template is None) ^
12998 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12999 "Not owning any node resource locks"
13002 instance = self.instance
13005 if self.op.runtime_mem:
13006 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13008 self.op.runtime_mem)
13009 rpcres.Raise("Cannot modify instance runtime memory")
13010 result.append(("runtime_memory", self.op.runtime_mem))
13012 # Apply disk changes
13013 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13014 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13015 _UpdateIvNames(0, instance.disks)
13017 if self.op.disk_template:
13019 check_nodes = set(instance.all_nodes)
13020 if self.op.remote_node:
13021 check_nodes.add(self.op.remote_node)
13022 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13023 owned = self.owned_locks(level)
13024 assert not (check_nodes - owned), \
13025 ("Not owning the correct locks, owning %r, expected at least %r" %
13026 (owned, check_nodes))
13028 r_shut = _ShutdownInstanceDisks(self, instance)
13030 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13031 " proceed with disk template conversion")
13032 mode = (instance.disk_template, self.op.disk_template)
13034 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13036 self.cfg.ReleaseDRBDMinors(instance.name)
13038 result.append(("disk_template", self.op.disk_template))
13040 assert instance.disk_template == self.op.disk_template, \
13041 ("Expected disk template '%s', found '%s'" %
13042 (self.op.disk_template, instance.disk_template))
13044 # Release node and resource locks if there are any (they might already have
13045 # been released during disk conversion)
13046 _ReleaseLocks(self, locking.LEVEL_NODE)
13047 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13049 # Apply NIC changes
13050 if self._new_nics is not None:
13051 instance.nics = self._new_nics
13052 result.extend(self._nic_chgdesc)
13055 if self.op.hvparams:
13056 instance.hvparams = self.hv_inst
13057 for key, val in self.op.hvparams.iteritems():
13058 result.append(("hv/%s" % key, val))
13061 if self.op.beparams:
13062 instance.beparams = self.be_inst
13063 for key, val in self.op.beparams.iteritems():
13064 result.append(("be/%s" % key, val))
13067 if self.op.os_name:
13068 instance.os = self.op.os_name
13071 if self.op.osparams:
13072 instance.osparams = self.os_inst
13073 for key, val in self.op.osparams.iteritems():
13074 result.append(("os/%s" % key, val))
13076 if self.op.offline is None:
13079 elif self.op.offline:
13080 # Mark instance as offline
13081 self.cfg.MarkInstanceOffline(instance.name)
13082 result.append(("admin_state", constants.ADMINST_OFFLINE))
13084 # Mark instance as online, but stopped
13085 self.cfg.MarkInstanceDown(instance.name)
13086 result.append(("admin_state", constants.ADMINST_DOWN))
13088 self.cfg.Update(instance, feedback_fn)
13090 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13091 self.owned_locks(locking.LEVEL_NODE)), \
13092 "All node locks should have been released by now"
13096 _DISK_CONVERSIONS = {
13097 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13098 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13102 class LUInstanceChangeGroup(LogicalUnit):
13103 HPATH = "instance-change-group"
13104 HTYPE = constants.HTYPE_INSTANCE
13107 def ExpandNames(self):
13108 self.share_locks = _ShareAll()
13109 self.needed_locks = {
13110 locking.LEVEL_NODEGROUP: [],
13111 locking.LEVEL_NODE: [],
13114 self._ExpandAndLockInstance()
13116 if self.op.target_groups:
13117 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13118 self.op.target_groups)
13120 self.req_target_uuids = None
13122 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13124 def DeclareLocks(self, level):
13125 if level == locking.LEVEL_NODEGROUP:
13126 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13128 if self.req_target_uuids:
13129 lock_groups = set(self.req_target_uuids)
13131 # Lock all groups used by instance optimistically; this requires going
13132 # via the node before it's locked, requiring verification later on
13133 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13134 lock_groups.update(instance_groups)
13136 # No target groups, need to lock all of them
13137 lock_groups = locking.ALL_SET
13139 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13141 elif level == locking.LEVEL_NODE:
13142 if self.req_target_uuids:
13143 # Lock all nodes used by instances
13144 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13145 self._LockInstancesNodes()
13147 # Lock all nodes in all potential target groups
13148 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13149 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13150 member_nodes = [node_name
13151 for group in lock_groups
13152 for node_name in self.cfg.GetNodeGroup(group).members]
13153 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13155 # Lock all nodes as all groups are potential targets
13156 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13158 def CheckPrereq(self):
13159 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13160 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13161 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13163 assert (self.req_target_uuids is None or
13164 owned_groups.issuperset(self.req_target_uuids))
13165 assert owned_instances == set([self.op.instance_name])
13167 # Get instance information
13168 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13170 # Check if node groups for locked instance are still correct
13171 assert owned_nodes.issuperset(self.instance.all_nodes), \
13172 ("Instance %s's nodes changed while we kept the lock" %
13173 self.op.instance_name)
13175 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13178 if self.req_target_uuids:
13179 # User requested specific target groups
13180 self.target_uuids = frozenset(self.req_target_uuids)
13182 # All groups except those used by the instance are potential targets
13183 self.target_uuids = owned_groups - inst_groups
13185 conflicting_groups = self.target_uuids & inst_groups
13186 if conflicting_groups:
13187 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13188 " used by the instance '%s'" %
13189 (utils.CommaJoin(conflicting_groups),
13190 self.op.instance_name),
13191 errors.ECODE_INVAL)
13193 if not self.target_uuids:
13194 raise errors.OpPrereqError("There are no possible target groups",
13195 errors.ECODE_INVAL)
13197 def BuildHooksEnv(self):
13198 """Build hooks env.
13201 assert self.target_uuids
13204 "TARGET_GROUPS": " ".join(self.target_uuids),
13207 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13211 def BuildHooksNodes(self):
13212 """Build hooks nodes.
13215 mn = self.cfg.GetMasterNode()
13216 return ([mn], [mn])
13218 def Exec(self, feedback_fn):
13219 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13221 assert instances == [self.op.instance_name], "Instance not locked"
13223 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13224 instances=instances, target_groups=list(self.target_uuids))
13226 ial.Run(self.op.iallocator)
13228 if not ial.success:
13229 raise errors.OpPrereqError("Can't compute solution for changing group of"
13230 " instance '%s' using iallocator '%s': %s" %
13231 (self.op.instance_name, self.op.iallocator,
13232 ial.info), errors.ECODE_NORES)
13234 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13236 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13237 " instance '%s'", len(jobs), self.op.instance_name)
13239 return ResultWithJobs(jobs)
13242 class LUBackupQuery(NoHooksLU):
13243 """Query the exports list
13248 def CheckArguments(self):
13249 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13250 ["node", "export"], self.op.use_locking)
13252 def ExpandNames(self):
13253 self.expq.ExpandNames(self)
13255 def DeclareLocks(self, level):
13256 self.expq.DeclareLocks(self, level)
13258 def Exec(self, feedback_fn):
13261 for (node, expname) in self.expq.OldStyleQuery(self):
13262 if expname is None:
13263 result[node] = False
13265 result.setdefault(node, []).append(expname)
13270 class _ExportQuery(_QueryBase):
13271 FIELDS = query.EXPORT_FIELDS
13273 #: The node name is not a unique key for this query
13274 SORT_FIELD = "node"
13276 def ExpandNames(self, lu):
13277 lu.needed_locks = {}
13279 # The following variables interact with _QueryBase._GetNames
13281 self.wanted = _GetWantedNodes(lu, self.names)
13283 self.wanted = locking.ALL_SET
13285 self.do_locking = self.use_locking
13287 if self.do_locking:
13288 lu.share_locks = _ShareAll()
13289 lu.needed_locks = {
13290 locking.LEVEL_NODE: self.wanted,
13293 def DeclareLocks(self, lu, level):
13296 def _GetQueryData(self, lu):
13297 """Computes the list of nodes and their attributes.
13300 # Locking is not used
13302 assert not (compat.any(lu.glm.is_owned(level)
13303 for level in locking.LEVELS
13304 if level != locking.LEVEL_CLUSTER) or
13305 self.do_locking or self.use_locking)
13307 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13311 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13313 result.append((node, None))
13315 result.extend((node, expname) for expname in nres.payload)
13320 class LUBackupPrepare(NoHooksLU):
13321 """Prepares an instance for an export and returns useful information.
13326 def ExpandNames(self):
13327 self._ExpandAndLockInstance()
13329 def CheckPrereq(self):
13330 """Check prerequisites.
13333 instance_name = self.op.instance_name
13335 self.instance = self.cfg.GetInstanceInfo(instance_name)
13336 assert self.instance is not None, \
13337 "Cannot retrieve locked instance %s" % self.op.instance_name
13338 _CheckNodeOnline(self, self.instance.primary_node)
13340 self._cds = _GetClusterDomainSecret()
13342 def Exec(self, feedback_fn):
13343 """Prepares an instance for an export.
13346 instance = self.instance
13348 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13349 salt = utils.GenerateSecret(8)
13351 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13352 result = self.rpc.call_x509_cert_create(instance.primary_node,
13353 constants.RIE_CERT_VALIDITY)
13354 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13356 (name, cert_pem) = result.payload
13358 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13362 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13363 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13365 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13371 class LUBackupExport(LogicalUnit):
13372 """Export an instance to an image in the cluster.
13375 HPATH = "instance-export"
13376 HTYPE = constants.HTYPE_INSTANCE
13379 def CheckArguments(self):
13380 """Check the arguments.
13383 self.x509_key_name = self.op.x509_key_name
13384 self.dest_x509_ca_pem = self.op.destination_x509_ca
13386 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13387 if not self.x509_key_name:
13388 raise errors.OpPrereqError("Missing X509 key name for encryption",
13389 errors.ECODE_INVAL)
13391 if not self.dest_x509_ca_pem:
13392 raise errors.OpPrereqError("Missing destination X509 CA",
13393 errors.ECODE_INVAL)
13395 def ExpandNames(self):
13396 self._ExpandAndLockInstance()
13398 # Lock all nodes for local exports
13399 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13400 # FIXME: lock only instance primary and destination node
13402 # Sad but true, for now we have do lock all nodes, as we don't know where
13403 # the previous export might be, and in this LU we search for it and
13404 # remove it from its current node. In the future we could fix this by:
13405 # - making a tasklet to search (share-lock all), then create the
13406 # new one, then one to remove, after
13407 # - removing the removal operation altogether
13408 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13410 def DeclareLocks(self, level):
13411 """Last minute lock declaration."""
13412 # All nodes are locked anyway, so nothing to do here.
13414 def BuildHooksEnv(self):
13415 """Build hooks env.
13417 This will run on the master, primary node and target node.
13421 "EXPORT_MODE": self.op.mode,
13422 "EXPORT_NODE": self.op.target_node,
13423 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13424 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13425 # TODO: Generic function for boolean env variables
13426 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13429 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13433 def BuildHooksNodes(self):
13434 """Build hooks nodes.
13437 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13439 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13440 nl.append(self.op.target_node)
13444 def CheckPrereq(self):
13445 """Check prerequisites.
13447 This checks that the instance and node names are valid.
13450 instance_name = self.op.instance_name
13452 self.instance = self.cfg.GetInstanceInfo(instance_name)
13453 assert self.instance is not None, \
13454 "Cannot retrieve locked instance %s" % self.op.instance_name
13455 _CheckNodeOnline(self, self.instance.primary_node)
13457 if (self.op.remove_instance and
13458 self.instance.admin_state == constants.ADMINST_UP and
13459 not self.op.shutdown):
13460 raise errors.OpPrereqError("Can not remove instance without shutting it"
13461 " down before", errors.ECODE_STATE)
13463 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13464 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13465 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13466 assert self.dst_node is not None
13468 _CheckNodeOnline(self, self.dst_node.name)
13469 _CheckNodeNotDrained(self, self.dst_node.name)
13472 self.dest_disk_info = None
13473 self.dest_x509_ca = None
13475 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13476 self.dst_node = None
13478 if len(self.op.target_node) != len(self.instance.disks):
13479 raise errors.OpPrereqError(("Received destination information for %s"
13480 " disks, but instance %s has %s disks") %
13481 (len(self.op.target_node), instance_name,
13482 len(self.instance.disks)),
13483 errors.ECODE_INVAL)
13485 cds = _GetClusterDomainSecret()
13487 # Check X509 key name
13489 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13490 except (TypeError, ValueError), err:
13491 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13492 errors.ECODE_INVAL)
13494 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13495 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13496 errors.ECODE_INVAL)
13498 # Load and verify CA
13500 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13501 except OpenSSL.crypto.Error, err:
13502 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13503 (err, ), errors.ECODE_INVAL)
13505 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13506 if errcode is not None:
13507 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13508 (msg, ), errors.ECODE_INVAL)
13510 self.dest_x509_ca = cert
13512 # Verify target information
13514 for idx, disk_data in enumerate(self.op.target_node):
13516 (host, port, magic) = \
13517 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13518 except errors.GenericError, err:
13519 raise errors.OpPrereqError("Target info for disk %s: %s" %
13520 (idx, err), errors.ECODE_INVAL)
13522 disk_info.append((host, port, magic))
13524 assert len(disk_info) == len(self.op.target_node)
13525 self.dest_disk_info = disk_info
13528 raise errors.ProgrammerError("Unhandled export mode %r" %
13531 # instance disk type verification
13532 # TODO: Implement export support for file-based disks
13533 for disk in self.instance.disks:
13534 if disk.dev_type == constants.LD_FILE:
13535 raise errors.OpPrereqError("Export not supported for instances with"
13536 " file-based disks", errors.ECODE_INVAL)
13538 def _CleanupExports(self, feedback_fn):
13539 """Removes exports of current instance from all other nodes.
13541 If an instance in a cluster with nodes A..D was exported to node C, its
13542 exports will be removed from the nodes A, B and D.
13545 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13547 nodelist = self.cfg.GetNodeList()
13548 nodelist.remove(self.dst_node.name)
13550 # on one-node clusters nodelist will be empty after the removal
13551 # if we proceed the backup would be removed because OpBackupQuery
13552 # substitutes an empty list with the full cluster node list.
13553 iname = self.instance.name
13555 feedback_fn("Removing old exports for instance %s" % iname)
13556 exportlist = self.rpc.call_export_list(nodelist)
13557 for node in exportlist:
13558 if exportlist[node].fail_msg:
13560 if iname in exportlist[node].payload:
13561 msg = self.rpc.call_export_remove(node, iname).fail_msg
13563 self.LogWarning("Could not remove older export for instance %s"
13564 " on node %s: %s", iname, node, msg)
13566 def Exec(self, feedback_fn):
13567 """Export an instance to an image in the cluster.
13570 assert self.op.mode in constants.EXPORT_MODES
13572 instance = self.instance
13573 src_node = instance.primary_node
13575 if self.op.shutdown:
13576 # shutdown the instance, but not the disks
13577 feedback_fn("Shutting down instance %s" % instance.name)
13578 result = self.rpc.call_instance_shutdown(src_node, instance,
13579 self.op.shutdown_timeout)
13580 # TODO: Maybe ignore failures if ignore_remove_failures is set
13581 result.Raise("Could not shutdown instance %s on"
13582 " node %s" % (instance.name, src_node))
13584 # set the disks ID correctly since call_instance_start needs the
13585 # correct drbd minor to create the symlinks
13586 for disk in instance.disks:
13587 self.cfg.SetDiskID(disk, src_node)
13589 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13592 # Activate the instance disks if we'exporting a stopped instance
13593 feedback_fn("Activating disks for %s" % instance.name)
13594 _StartInstanceDisks(self, instance, None)
13597 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13600 helper.CreateSnapshots()
13602 if (self.op.shutdown and
13603 instance.admin_state == constants.ADMINST_UP and
13604 not self.op.remove_instance):
13605 assert not activate_disks
13606 feedback_fn("Starting instance %s" % instance.name)
13607 result = self.rpc.call_instance_start(src_node,
13608 (instance, None, None), False)
13609 msg = result.fail_msg
13611 feedback_fn("Failed to start instance: %s" % msg)
13612 _ShutdownInstanceDisks(self, instance)
13613 raise errors.OpExecError("Could not start instance: %s" % msg)
13615 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13616 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13617 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13618 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13619 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13621 (key_name, _, _) = self.x509_key_name
13624 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13627 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13628 key_name, dest_ca_pem,
13633 # Check for backwards compatibility
13634 assert len(dresults) == len(instance.disks)
13635 assert compat.all(isinstance(i, bool) for i in dresults), \
13636 "Not all results are boolean: %r" % dresults
13640 feedback_fn("Deactivating disks for %s" % instance.name)
13641 _ShutdownInstanceDisks(self, instance)
13643 if not (compat.all(dresults) and fin_resu):
13646 failures.append("export finalization")
13647 if not compat.all(dresults):
13648 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13650 failures.append("disk export: disk(s) %s" % fdsk)
13652 raise errors.OpExecError("Export failed, errors in %s" %
13653 utils.CommaJoin(failures))
13655 # At this point, the export was successful, we can cleanup/finish
13657 # Remove instance if requested
13658 if self.op.remove_instance:
13659 feedback_fn("Removing instance %s" % instance.name)
13660 _RemoveInstance(self, feedback_fn, instance,
13661 self.op.ignore_remove_failures)
13663 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13664 self._CleanupExports(feedback_fn)
13666 return fin_resu, dresults
13669 class LUBackupRemove(NoHooksLU):
13670 """Remove exports related to the named instance.
13675 def ExpandNames(self):
13676 self.needed_locks = {}
13677 # We need all nodes to be locked in order for RemoveExport to work, but we
13678 # don't need to lock the instance itself, as nothing will happen to it (and
13679 # we can remove exports also for a removed instance)
13680 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13682 def Exec(self, feedback_fn):
13683 """Remove any export.
13686 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13687 # If the instance was not found we'll try with the name that was passed in.
13688 # This will only work if it was an FQDN, though.
13690 if not instance_name:
13692 instance_name = self.op.instance_name
13694 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13695 exportlist = self.rpc.call_export_list(locked_nodes)
13697 for node in exportlist:
13698 msg = exportlist[node].fail_msg
13700 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13702 if instance_name in exportlist[node].payload:
13704 result = self.rpc.call_export_remove(node, instance_name)
13705 msg = result.fail_msg
13707 logging.error("Could not remove export for instance %s"
13708 " on node %s: %s", instance_name, node, msg)
13710 if fqdn_warn and not found:
13711 feedback_fn("Export not found. If trying to remove an export belonging"
13712 " to a deleted instance please use its Fully Qualified"
13716 class LUGroupAdd(LogicalUnit):
13717 """Logical unit for creating node groups.
13720 HPATH = "group-add"
13721 HTYPE = constants.HTYPE_GROUP
13724 def ExpandNames(self):
13725 # We need the new group's UUID here so that we can create and acquire the
13726 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13727 # that it should not check whether the UUID exists in the configuration.
13728 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13729 self.needed_locks = {}
13730 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13732 def CheckPrereq(self):
13733 """Check prerequisites.
13735 This checks that the given group name is not an existing node group
13740 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13741 except errors.OpPrereqError:
13744 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13745 " node group (UUID: %s)" %
13746 (self.op.group_name, existing_uuid),
13747 errors.ECODE_EXISTS)
13749 if self.op.ndparams:
13750 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13752 if self.op.hv_state:
13753 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13755 self.new_hv_state = None
13757 if self.op.disk_state:
13758 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13760 self.new_disk_state = None
13762 if self.op.diskparams:
13763 for templ in constants.DISK_TEMPLATES:
13764 if templ in self.op.diskparams:
13765 utils.ForceDictType(self.op.diskparams[templ],
13766 constants.DISK_DT_TYPES)
13767 self.new_diskparams = self.op.diskparams
13769 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13770 except errors.OpPrereqError, err:
13771 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13772 errors.ECODE_INVAL)
13774 self.new_diskparams = {}
13776 if self.op.ipolicy:
13777 cluster = self.cfg.GetClusterInfo()
13778 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13780 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13781 except errors.ConfigurationError, err:
13782 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13783 errors.ECODE_INVAL)
13785 def BuildHooksEnv(self):
13786 """Build hooks env.
13790 "GROUP_NAME": self.op.group_name,
13793 def BuildHooksNodes(self):
13794 """Build hooks nodes.
13797 mn = self.cfg.GetMasterNode()
13798 return ([mn], [mn])
13800 def Exec(self, feedback_fn):
13801 """Add the node group to the cluster.
13804 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13805 uuid=self.group_uuid,
13806 alloc_policy=self.op.alloc_policy,
13807 ndparams=self.op.ndparams,
13808 diskparams=self.new_diskparams,
13809 ipolicy=self.op.ipolicy,
13810 hv_state_static=self.new_hv_state,
13811 disk_state_static=self.new_disk_state)
13813 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13814 del self.remove_locks[locking.LEVEL_NODEGROUP]
13817 class LUGroupAssignNodes(NoHooksLU):
13818 """Logical unit for assigning nodes to groups.
13823 def ExpandNames(self):
13824 # These raise errors.OpPrereqError on their own:
13825 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13826 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13828 # We want to lock all the affected nodes and groups. We have readily
13829 # available the list of nodes, and the *destination* group. To gather the
13830 # list of "source" groups, we need to fetch node information later on.
13831 self.needed_locks = {
13832 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13833 locking.LEVEL_NODE: self.op.nodes,
13836 def DeclareLocks(self, level):
13837 if level == locking.LEVEL_NODEGROUP:
13838 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13840 # Try to get all affected nodes' groups without having the group or node
13841 # lock yet. Needs verification later in the code flow.
13842 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13844 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13846 def CheckPrereq(self):
13847 """Check prerequisites.
13850 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13851 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13852 frozenset(self.op.nodes))
13854 expected_locks = (set([self.group_uuid]) |
13855 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13856 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13857 if actual_locks != expected_locks:
13858 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13859 " current groups are '%s', used to be '%s'" %
13860 (utils.CommaJoin(expected_locks),
13861 utils.CommaJoin(actual_locks)))
13863 self.node_data = self.cfg.GetAllNodesInfo()
13864 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13865 instance_data = self.cfg.GetAllInstancesInfo()
13867 if self.group is None:
13868 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13869 (self.op.group_name, self.group_uuid))
13871 (new_splits, previous_splits) = \
13872 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13873 for node in self.op.nodes],
13874 self.node_data, instance_data)
13877 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13879 if not self.op.force:
13880 raise errors.OpExecError("The following instances get split by this"
13881 " change and --force was not given: %s" %
13884 self.LogWarning("This operation will split the following instances: %s",
13887 if previous_splits:
13888 self.LogWarning("In addition, these already-split instances continue"
13889 " to be split across groups: %s",
13890 utils.CommaJoin(utils.NiceSort(previous_splits)))
13892 def Exec(self, feedback_fn):
13893 """Assign nodes to a new group.
13896 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13898 self.cfg.AssignGroupNodes(mods)
13901 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13902 """Check for split instances after a node assignment.
13904 This method considers a series of node assignments as an atomic operation,
13905 and returns information about split instances after applying the set of
13908 In particular, it returns information about newly split instances, and
13909 instances that were already split, and remain so after the change.
13911 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13914 @type changes: list of (node_name, new_group_uuid) pairs.
13915 @param changes: list of node assignments to consider.
13916 @param node_data: a dict with data for all nodes
13917 @param instance_data: a dict with all instances to consider
13918 @rtype: a two-tuple
13919 @return: a list of instances that were previously okay and result split as a
13920 consequence of this change, and a list of instances that were previously
13921 split and this change does not fix.
13924 changed_nodes = dict((node, group) for node, group in changes
13925 if node_data[node].group != group)
13927 all_split_instances = set()
13928 previously_split_instances = set()
13930 def InstanceNodes(instance):
13931 return [instance.primary_node] + list(instance.secondary_nodes)
13933 for inst in instance_data.values():
13934 if inst.disk_template not in constants.DTS_INT_MIRROR:
13937 instance_nodes = InstanceNodes(inst)
13939 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13940 previously_split_instances.add(inst.name)
13942 if len(set(changed_nodes.get(node, node_data[node].group)
13943 for node in instance_nodes)) > 1:
13944 all_split_instances.add(inst.name)
13946 return (list(all_split_instances - previously_split_instances),
13947 list(previously_split_instances & all_split_instances))
13950 class _GroupQuery(_QueryBase):
13951 FIELDS = query.GROUP_FIELDS
13953 def ExpandNames(self, lu):
13954 lu.needed_locks = {}
13956 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13957 self._cluster = lu.cfg.GetClusterInfo()
13958 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13961 self.wanted = [name_to_uuid[name]
13962 for name in utils.NiceSort(name_to_uuid.keys())]
13964 # Accept names to be either names or UUIDs.
13967 all_uuid = frozenset(self._all_groups.keys())
13969 for name in self.names:
13970 if name in all_uuid:
13971 self.wanted.append(name)
13972 elif name in name_to_uuid:
13973 self.wanted.append(name_to_uuid[name])
13975 missing.append(name)
13978 raise errors.OpPrereqError("Some groups do not exist: %s" %
13979 utils.CommaJoin(missing),
13980 errors.ECODE_NOENT)
13982 def DeclareLocks(self, lu, level):
13985 def _GetQueryData(self, lu):
13986 """Computes the list of node groups and their attributes.
13989 do_nodes = query.GQ_NODE in self.requested_data
13990 do_instances = query.GQ_INST in self.requested_data
13992 group_to_nodes = None
13993 group_to_instances = None
13995 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13996 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13997 # latter GetAllInstancesInfo() is not enough, for we have to go through
13998 # instance->node. Hence, we will need to process nodes even if we only need
13999 # instance information.
14000 if do_nodes or do_instances:
14001 all_nodes = lu.cfg.GetAllNodesInfo()
14002 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14005 for node in all_nodes.values():
14006 if node.group in group_to_nodes:
14007 group_to_nodes[node.group].append(node.name)
14008 node_to_group[node.name] = node.group
14011 all_instances = lu.cfg.GetAllInstancesInfo()
14012 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14014 for instance in all_instances.values():
14015 node = instance.primary_node
14016 if node in node_to_group:
14017 group_to_instances[node_to_group[node]].append(instance.name)
14020 # Do not pass on node information if it was not requested.
14021 group_to_nodes = None
14023 return query.GroupQueryData(self._cluster,
14024 [self._all_groups[uuid]
14025 for uuid in self.wanted],
14026 group_to_nodes, group_to_instances,
14027 query.GQ_DISKPARAMS in self.requested_data)
14030 class LUGroupQuery(NoHooksLU):
14031 """Logical unit for querying node groups.
14036 def CheckArguments(self):
14037 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14038 self.op.output_fields, False)
14040 def ExpandNames(self):
14041 self.gq.ExpandNames(self)
14043 def DeclareLocks(self, level):
14044 self.gq.DeclareLocks(self, level)
14046 def Exec(self, feedback_fn):
14047 return self.gq.OldStyleQuery(self)
14050 class LUGroupSetParams(LogicalUnit):
14051 """Modifies the parameters of a node group.
14054 HPATH = "group-modify"
14055 HTYPE = constants.HTYPE_GROUP
14058 def CheckArguments(self):
14061 self.op.diskparams,
14062 self.op.alloc_policy,
14064 self.op.disk_state,
14068 if all_changes.count(None) == len(all_changes):
14069 raise errors.OpPrereqError("Please pass at least one modification",
14070 errors.ECODE_INVAL)
14072 def ExpandNames(self):
14073 # This raises errors.OpPrereqError on its own:
14074 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14076 self.needed_locks = {
14077 locking.LEVEL_INSTANCE: [],
14078 locking.LEVEL_NODEGROUP: [self.group_uuid],
14081 self.share_locks[locking.LEVEL_INSTANCE] = 1
14083 def DeclareLocks(self, level):
14084 if level == locking.LEVEL_INSTANCE:
14085 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14087 # Lock instances optimistically, needs verification once group lock has
14089 self.needed_locks[locking.LEVEL_INSTANCE] = \
14090 self.cfg.GetNodeGroupInstances(self.group_uuid)
14093 def _UpdateAndVerifyDiskParams(old, new):
14094 """Updates and verifies disk parameters.
14097 new_params = _GetUpdatedParams(old, new)
14098 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14101 def CheckPrereq(self):
14102 """Check prerequisites.
14105 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14107 # Check if locked instances are still correct
14108 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14110 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14111 cluster = self.cfg.GetClusterInfo()
14113 if self.group is None:
14114 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14115 (self.op.group_name, self.group_uuid))
14117 if self.op.ndparams:
14118 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14119 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14120 self.new_ndparams = new_ndparams
14122 if self.op.diskparams:
14123 diskparams = self.group.diskparams
14124 uavdp = self._UpdateAndVerifyDiskParams
14125 # For each disktemplate subdict update and verify the values
14126 new_diskparams = dict((dt,
14127 uavdp(diskparams.get(dt, {}),
14128 self.op.diskparams[dt]))
14129 for dt in constants.DISK_TEMPLATES
14130 if dt in self.op.diskparams)
14131 # As we've all subdicts of diskparams ready, lets merge the actual
14132 # dict with all updated subdicts
14133 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14135 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14136 except errors.OpPrereqError, err:
14137 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14138 errors.ECODE_INVAL)
14140 if self.op.hv_state:
14141 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14142 self.group.hv_state_static)
14144 if self.op.disk_state:
14145 self.new_disk_state = \
14146 _MergeAndVerifyDiskState(self.op.disk_state,
14147 self.group.disk_state_static)
14149 if self.op.ipolicy:
14150 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14154 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14155 inst_filter = lambda inst: inst.name in owned_instances
14156 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14158 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14160 new_ipolicy, instances)
14163 self.LogWarning("After the ipolicy change the following instances"
14164 " violate them: %s",
14165 utils.CommaJoin(violations))
14167 def BuildHooksEnv(self):
14168 """Build hooks env.
14172 "GROUP_NAME": self.op.group_name,
14173 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14176 def BuildHooksNodes(self):
14177 """Build hooks nodes.
14180 mn = self.cfg.GetMasterNode()
14181 return ([mn], [mn])
14183 def Exec(self, feedback_fn):
14184 """Modifies the node group.
14189 if self.op.ndparams:
14190 self.group.ndparams = self.new_ndparams
14191 result.append(("ndparams", str(self.group.ndparams)))
14193 if self.op.diskparams:
14194 self.group.diskparams = self.new_diskparams
14195 result.append(("diskparams", str(self.group.diskparams)))
14197 if self.op.alloc_policy:
14198 self.group.alloc_policy = self.op.alloc_policy
14200 if self.op.hv_state:
14201 self.group.hv_state_static = self.new_hv_state
14203 if self.op.disk_state:
14204 self.group.disk_state_static = self.new_disk_state
14206 if self.op.ipolicy:
14207 self.group.ipolicy = self.new_ipolicy
14209 self.cfg.Update(self.group, feedback_fn)
14213 class LUGroupRemove(LogicalUnit):
14214 HPATH = "group-remove"
14215 HTYPE = constants.HTYPE_GROUP
14218 def ExpandNames(self):
14219 # This will raises errors.OpPrereqError on its own:
14220 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14221 self.needed_locks = {
14222 locking.LEVEL_NODEGROUP: [self.group_uuid],
14225 def CheckPrereq(self):
14226 """Check prerequisites.
14228 This checks that the given group name exists as a node group, that is
14229 empty (i.e., contains no nodes), and that is not the last group of the
14233 # Verify that the group is empty.
14234 group_nodes = [node.name
14235 for node in self.cfg.GetAllNodesInfo().values()
14236 if node.group == self.group_uuid]
14239 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14241 (self.op.group_name,
14242 utils.CommaJoin(utils.NiceSort(group_nodes))),
14243 errors.ECODE_STATE)
14245 # Verify the cluster would not be left group-less.
14246 if len(self.cfg.GetNodeGroupList()) == 1:
14247 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14248 " removed" % self.op.group_name,
14249 errors.ECODE_STATE)
14251 def BuildHooksEnv(self):
14252 """Build hooks env.
14256 "GROUP_NAME": self.op.group_name,
14259 def BuildHooksNodes(self):
14260 """Build hooks nodes.
14263 mn = self.cfg.GetMasterNode()
14264 return ([mn], [mn])
14266 def Exec(self, feedback_fn):
14267 """Remove the node group.
14271 self.cfg.RemoveNodeGroup(self.group_uuid)
14272 except errors.ConfigurationError:
14273 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14274 (self.op.group_name, self.group_uuid))
14276 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14279 class LUGroupRename(LogicalUnit):
14280 HPATH = "group-rename"
14281 HTYPE = constants.HTYPE_GROUP
14284 def ExpandNames(self):
14285 # This raises errors.OpPrereqError on its own:
14286 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14288 self.needed_locks = {
14289 locking.LEVEL_NODEGROUP: [self.group_uuid],
14292 def CheckPrereq(self):
14293 """Check prerequisites.
14295 Ensures requested new name is not yet used.
14299 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14300 except errors.OpPrereqError:
14303 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14304 " node group (UUID: %s)" %
14305 (self.op.new_name, new_name_uuid),
14306 errors.ECODE_EXISTS)
14308 def BuildHooksEnv(self):
14309 """Build hooks env.
14313 "OLD_NAME": self.op.group_name,
14314 "NEW_NAME": self.op.new_name,
14317 def BuildHooksNodes(self):
14318 """Build hooks nodes.
14321 mn = self.cfg.GetMasterNode()
14323 all_nodes = self.cfg.GetAllNodesInfo()
14324 all_nodes.pop(mn, None)
14327 run_nodes.extend(node.name for node in all_nodes.values()
14328 if node.group == self.group_uuid)
14330 return (run_nodes, run_nodes)
14332 def Exec(self, feedback_fn):
14333 """Rename the node group.
14336 group = self.cfg.GetNodeGroup(self.group_uuid)
14339 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14340 (self.op.group_name, self.group_uuid))
14342 group.name = self.op.new_name
14343 self.cfg.Update(group, feedback_fn)
14345 return self.op.new_name
14348 class LUGroupEvacuate(LogicalUnit):
14349 HPATH = "group-evacuate"
14350 HTYPE = constants.HTYPE_GROUP
14353 def ExpandNames(self):
14354 # This raises errors.OpPrereqError on its own:
14355 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14357 if self.op.target_groups:
14358 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14359 self.op.target_groups)
14361 self.req_target_uuids = []
14363 if self.group_uuid in self.req_target_uuids:
14364 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14365 " as a target group (targets are %s)" %
14367 utils.CommaJoin(self.req_target_uuids)),
14368 errors.ECODE_INVAL)
14370 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14372 self.share_locks = _ShareAll()
14373 self.needed_locks = {
14374 locking.LEVEL_INSTANCE: [],
14375 locking.LEVEL_NODEGROUP: [],
14376 locking.LEVEL_NODE: [],
14379 def DeclareLocks(self, level):
14380 if level == locking.LEVEL_INSTANCE:
14381 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14383 # Lock instances optimistically, needs verification once node and group
14384 # locks have been acquired
14385 self.needed_locks[locking.LEVEL_INSTANCE] = \
14386 self.cfg.GetNodeGroupInstances(self.group_uuid)
14388 elif level == locking.LEVEL_NODEGROUP:
14389 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14391 if self.req_target_uuids:
14392 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14394 # Lock all groups used by instances optimistically; this requires going
14395 # via the node before it's locked, requiring verification later on
14396 lock_groups.update(group_uuid
14397 for instance_name in
14398 self.owned_locks(locking.LEVEL_INSTANCE)
14400 self.cfg.GetInstanceNodeGroups(instance_name))
14402 # No target groups, need to lock all of them
14403 lock_groups = locking.ALL_SET
14405 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14407 elif level == locking.LEVEL_NODE:
14408 # This will only lock the nodes in the group to be evacuated which
14409 # contain actual instances
14410 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14411 self._LockInstancesNodes()
14413 # Lock all nodes in group to be evacuated and target groups
14414 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14415 assert self.group_uuid in owned_groups
14416 member_nodes = [node_name
14417 for group in owned_groups
14418 for node_name in self.cfg.GetNodeGroup(group).members]
14419 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14421 def CheckPrereq(self):
14422 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14423 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14424 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14426 assert owned_groups.issuperset(self.req_target_uuids)
14427 assert self.group_uuid in owned_groups
14429 # Check if locked instances are still correct
14430 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14432 # Get instance information
14433 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14435 # Check if node groups for locked instances are still correct
14436 _CheckInstancesNodeGroups(self.cfg, self.instances,
14437 owned_groups, owned_nodes, self.group_uuid)
14439 if self.req_target_uuids:
14440 # User requested specific target groups
14441 self.target_uuids = self.req_target_uuids
14443 # All groups except the one to be evacuated are potential targets
14444 self.target_uuids = [group_uuid for group_uuid in owned_groups
14445 if group_uuid != self.group_uuid]
14447 if not self.target_uuids:
14448 raise errors.OpPrereqError("There are no possible target groups",
14449 errors.ECODE_INVAL)
14451 def BuildHooksEnv(self):
14452 """Build hooks env.
14456 "GROUP_NAME": self.op.group_name,
14457 "TARGET_GROUPS": " ".join(self.target_uuids),
14460 def BuildHooksNodes(self):
14461 """Build hooks nodes.
14464 mn = self.cfg.GetMasterNode()
14466 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14468 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14470 return (run_nodes, run_nodes)
14472 def Exec(self, feedback_fn):
14473 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14475 assert self.group_uuid not in self.target_uuids
14477 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14478 instances=instances, target_groups=self.target_uuids)
14480 ial.Run(self.op.iallocator)
14482 if not ial.success:
14483 raise errors.OpPrereqError("Can't compute group evacuation using"
14484 " iallocator '%s': %s" %
14485 (self.op.iallocator, ial.info),
14486 errors.ECODE_NORES)
14488 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14490 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14491 len(jobs), self.op.group_name)
14493 return ResultWithJobs(jobs)
14496 class TagsLU(NoHooksLU): # pylint: disable=W0223
14497 """Generic tags LU.
14499 This is an abstract class which is the parent of all the other tags LUs.
14502 def ExpandNames(self):
14503 self.group_uuid = None
14504 self.needed_locks = {}
14506 if self.op.kind == constants.TAG_NODE:
14507 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14508 lock_level = locking.LEVEL_NODE
14509 lock_name = self.op.name
14510 elif self.op.kind == constants.TAG_INSTANCE:
14511 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14512 lock_level = locking.LEVEL_INSTANCE
14513 lock_name = self.op.name
14514 elif self.op.kind == constants.TAG_NODEGROUP:
14515 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14516 lock_level = locking.LEVEL_NODEGROUP
14517 lock_name = self.group_uuid
14522 if lock_level and getattr(self.op, "use_locking", True):
14523 self.needed_locks[lock_level] = lock_name
14525 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14526 # not possible to acquire the BGL based on opcode parameters)
14528 def CheckPrereq(self):
14529 """Check prerequisites.
14532 if self.op.kind == constants.TAG_CLUSTER:
14533 self.target = self.cfg.GetClusterInfo()
14534 elif self.op.kind == constants.TAG_NODE:
14535 self.target = self.cfg.GetNodeInfo(self.op.name)
14536 elif self.op.kind == constants.TAG_INSTANCE:
14537 self.target = self.cfg.GetInstanceInfo(self.op.name)
14538 elif self.op.kind == constants.TAG_NODEGROUP:
14539 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14541 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14542 str(self.op.kind), errors.ECODE_INVAL)
14545 class LUTagsGet(TagsLU):
14546 """Returns the tags of a given object.
14551 def ExpandNames(self):
14552 TagsLU.ExpandNames(self)
14554 # Share locks as this is only a read operation
14555 self.share_locks = _ShareAll()
14557 def Exec(self, feedback_fn):
14558 """Returns the tag list.
14561 return list(self.target.GetTags())
14564 class LUTagsSearch(NoHooksLU):
14565 """Searches the tags for a given pattern.
14570 def ExpandNames(self):
14571 self.needed_locks = {}
14573 def CheckPrereq(self):
14574 """Check prerequisites.
14576 This checks the pattern passed for validity by compiling it.
14580 self.re = re.compile(self.op.pattern)
14581 except re.error, err:
14582 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14583 (self.op.pattern, err), errors.ECODE_INVAL)
14585 def Exec(self, feedback_fn):
14586 """Returns the tag list.
14590 tgts = [("/cluster", cfg.GetClusterInfo())]
14591 ilist = cfg.GetAllInstancesInfo().values()
14592 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14593 nlist = cfg.GetAllNodesInfo().values()
14594 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14595 tgts.extend(("/nodegroup/%s" % n.name, n)
14596 for n in cfg.GetAllNodeGroupsInfo().values())
14598 for path, target in tgts:
14599 for tag in target.GetTags():
14600 if self.re.search(tag):
14601 results.append((path, tag))
14605 class LUTagsSet(TagsLU):
14606 """Sets a tag on a given object.
14611 def CheckPrereq(self):
14612 """Check prerequisites.
14614 This checks the type and length of the tag name and value.
14617 TagsLU.CheckPrereq(self)
14618 for tag in self.op.tags:
14619 objects.TaggableObject.ValidateTag(tag)
14621 def Exec(self, feedback_fn):
14626 for tag in self.op.tags:
14627 self.target.AddTag(tag)
14628 except errors.TagError, err:
14629 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14630 self.cfg.Update(self.target, feedback_fn)
14633 class LUTagsDel(TagsLU):
14634 """Delete a list of tags from a given object.
14639 def CheckPrereq(self):
14640 """Check prerequisites.
14642 This checks that we have the given tag.
14645 TagsLU.CheckPrereq(self)
14646 for tag in self.op.tags:
14647 objects.TaggableObject.ValidateTag(tag)
14648 del_tags = frozenset(self.op.tags)
14649 cur_tags = self.target.GetTags()
14651 diff_tags = del_tags - cur_tags
14653 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14654 raise errors.OpPrereqError("Tag(s) %s not found" %
14655 (utils.CommaJoin(diff_names), ),
14656 errors.ECODE_NOENT)
14658 def Exec(self, feedback_fn):
14659 """Remove the tag from the object.
14662 for tag in self.op.tags:
14663 self.target.RemoveTag(tag)
14664 self.cfg.Update(self.target, feedback_fn)
14667 class LUTestDelay(NoHooksLU):
14668 """Sleep for a specified amount of time.
14670 This LU sleeps on the master and/or nodes for a specified amount of
14676 def ExpandNames(self):
14677 """Expand names and set required locks.
14679 This expands the node list, if any.
14682 self.needed_locks = {}
14683 if self.op.on_nodes:
14684 # _GetWantedNodes can be used here, but is not always appropriate to use
14685 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14686 # more information.
14687 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14688 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14690 def _TestDelay(self):
14691 """Do the actual sleep.
14694 if self.op.on_master:
14695 if not utils.TestDelay(self.op.duration):
14696 raise errors.OpExecError("Error during master delay test")
14697 if self.op.on_nodes:
14698 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14699 for node, node_result in result.items():
14700 node_result.Raise("Failure during rpc call to node %s" % node)
14702 def Exec(self, feedback_fn):
14703 """Execute the test delay opcode, with the wanted repetitions.
14706 if self.op.repeat == 0:
14709 top_value = self.op.repeat - 1
14710 for i in range(self.op.repeat):
14711 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14715 class LUTestJqueue(NoHooksLU):
14716 """Utility LU to test some aspects of the job queue.
14721 # Must be lower than default timeout for WaitForJobChange to see whether it
14722 # notices changed jobs
14723 _CLIENT_CONNECT_TIMEOUT = 20.0
14724 _CLIENT_CONFIRM_TIMEOUT = 60.0
14727 def _NotifyUsingSocket(cls, cb, errcls):
14728 """Opens a Unix socket and waits for another program to connect.
14731 @param cb: Callback to send socket name to client
14732 @type errcls: class
14733 @param errcls: Exception class to use for errors
14736 # Using a temporary directory as there's no easy way to create temporary
14737 # sockets without writing a custom loop around tempfile.mktemp and
14739 tmpdir = tempfile.mkdtemp()
14741 tmpsock = utils.PathJoin(tmpdir, "sock")
14743 logging.debug("Creating temporary socket at %s", tmpsock)
14744 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14749 # Send details to client
14752 # Wait for client to connect before continuing
14753 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14755 (conn, _) = sock.accept()
14756 except socket.error, err:
14757 raise errcls("Client didn't connect in time (%s)" % err)
14761 # Remove as soon as client is connected
14762 shutil.rmtree(tmpdir)
14764 # Wait for client to close
14767 # pylint: disable=E1101
14768 # Instance of '_socketobject' has no ... member
14769 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14771 except socket.error, err:
14772 raise errcls("Client failed to confirm notification (%s)" % err)
14776 def _SendNotification(self, test, arg, sockname):
14777 """Sends a notification to the client.
14780 @param test: Test name
14781 @param arg: Test argument (depends on test)
14782 @type sockname: string
14783 @param sockname: Socket path
14786 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14788 def _Notify(self, prereq, test, arg):
14789 """Notifies the client of a test.
14792 @param prereq: Whether this is a prereq-phase test
14794 @param test: Test name
14795 @param arg: Test argument (depends on test)
14799 errcls = errors.OpPrereqError
14801 errcls = errors.OpExecError
14803 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14807 def CheckArguments(self):
14808 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14809 self.expandnames_calls = 0
14811 def ExpandNames(self):
14812 checkargs_calls = getattr(self, "checkargs_calls", 0)
14813 if checkargs_calls < 1:
14814 raise errors.ProgrammerError("CheckArguments was not called")
14816 self.expandnames_calls += 1
14818 if self.op.notify_waitlock:
14819 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14821 self.LogInfo("Expanding names")
14823 # Get lock on master node (just to get a lock, not for a particular reason)
14824 self.needed_locks = {
14825 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14828 def Exec(self, feedback_fn):
14829 if self.expandnames_calls < 1:
14830 raise errors.ProgrammerError("ExpandNames was not called")
14832 if self.op.notify_exec:
14833 self._Notify(False, constants.JQT_EXEC, None)
14835 self.LogInfo("Executing")
14837 if self.op.log_messages:
14838 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14839 for idx, msg in enumerate(self.op.log_messages):
14840 self.LogInfo("Sending log message %s", idx + 1)
14841 feedback_fn(constants.JQT_MSGPREFIX + msg)
14842 # Report how many test messages have been sent
14843 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14846 raise errors.OpExecError("Opcode failure was requested")
14851 class IAllocator(object):
14852 """IAllocator framework.
14854 An IAllocator instance has three sets of attributes:
14855 - cfg that is needed to query the cluster
14856 - input data (all members of the _KEYS class attribute are required)
14857 - four buffer attributes (in|out_data|text), that represent the
14858 input (to the external script) in text and data structure format,
14859 and the output from it, again in two formats
14860 - the result variables from the script (success, info, nodes) for
14864 # pylint: disable=R0902
14865 # lots of instance attributes
14867 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14869 self.rpc = rpc_runner
14870 # init buffer variables
14871 self.in_text = self.out_text = self.in_data = self.out_data = None
14872 # init all input fields so that pylint is happy
14874 self.memory = self.disks = self.disk_template = self.spindle_use = None
14875 self.os = self.tags = self.nics = self.vcpus = None
14876 self.hypervisor = None
14877 self.relocate_from = None
14879 self.instances = None
14880 self.evac_mode = None
14881 self.target_groups = []
14883 self.required_nodes = None
14884 # init result fields
14885 self.success = self.info = self.result = None
14888 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14890 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14891 " IAllocator" % self.mode)
14893 keyset = [n for (n, _) in keydata]
14896 if key not in keyset:
14897 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14898 " IAllocator" % key)
14899 setattr(self, key, kwargs[key])
14902 if key not in kwargs:
14903 raise errors.ProgrammerError("Missing input parameter '%s' to"
14904 " IAllocator" % key)
14905 self._BuildInputData(compat.partial(fn, self), keydata)
14907 def _ComputeClusterData(self):
14908 """Compute the generic allocator input data.
14910 This is the data that is independent of the actual operation.
14914 cluster_info = cfg.GetClusterInfo()
14917 "version": constants.IALLOCATOR_VERSION,
14918 "cluster_name": cfg.GetClusterName(),
14919 "cluster_tags": list(cluster_info.GetTags()),
14920 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14921 "ipolicy": cluster_info.ipolicy,
14923 ninfo = cfg.GetAllNodesInfo()
14924 iinfo = cfg.GetAllInstancesInfo().values()
14925 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14928 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14930 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14931 hypervisor_name = self.hypervisor
14932 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14933 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14935 hypervisor_name = cluster_info.primary_hypervisor
14937 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14940 self.rpc.call_all_instances_info(node_list,
14941 cluster_info.enabled_hypervisors)
14943 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14945 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14946 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14947 i_list, config_ndata)
14948 assert len(data["nodes"]) == len(ninfo), \
14949 "Incomplete node data computed"
14951 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14953 self.in_data = data
14956 def _ComputeNodeGroupData(cfg):
14957 """Compute node groups data.
14960 cluster = cfg.GetClusterInfo()
14961 ng = dict((guuid, {
14962 "name": gdata.name,
14963 "alloc_policy": gdata.alloc_policy,
14964 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14966 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14971 def _ComputeBasicNodeData(cfg, node_cfg):
14972 """Compute global node data.
14975 @returns: a dict of name: (node dict, node config)
14978 # fill in static (config-based) values
14979 node_results = dict((ninfo.name, {
14980 "tags": list(ninfo.GetTags()),
14981 "primary_ip": ninfo.primary_ip,
14982 "secondary_ip": ninfo.secondary_ip,
14983 "offline": ninfo.offline,
14984 "drained": ninfo.drained,
14985 "master_candidate": ninfo.master_candidate,
14986 "group": ninfo.group,
14987 "master_capable": ninfo.master_capable,
14988 "vm_capable": ninfo.vm_capable,
14989 "ndparams": cfg.GetNdParams(ninfo),
14991 for ninfo in node_cfg.values())
14993 return node_results
14996 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14998 """Compute global node data.
15000 @param node_results: the basic node structures as filled from the config
15003 #TODO(dynmem): compute the right data on MAX and MIN memory
15004 # make a copy of the current dict
15005 node_results = dict(node_results)
15006 for nname, nresult in node_data.items():
15007 assert nname in node_results, "Missing basic data for node %s" % nname
15008 ninfo = node_cfg[nname]
15010 if not (ninfo.offline or ninfo.drained):
15011 nresult.Raise("Can't get data for node %s" % nname)
15012 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
15014 remote_info = _MakeLegacyNodeInfo(nresult.payload)
15016 for attr in ["memory_total", "memory_free", "memory_dom0",
15017 "vg_size", "vg_free", "cpu_total"]:
15018 if attr not in remote_info:
15019 raise errors.OpExecError("Node '%s' didn't return attribute"
15020 " '%s'" % (nname, attr))
15021 if not isinstance(remote_info[attr], int):
15022 raise errors.OpExecError("Node '%s' returned invalid value"
15024 (nname, attr, remote_info[attr]))
15025 # compute memory used by primary instances
15026 i_p_mem = i_p_up_mem = 0
15027 for iinfo, beinfo in i_list:
15028 if iinfo.primary_node == nname:
15029 i_p_mem += beinfo[constants.BE_MAXMEM]
15030 if iinfo.name not in node_iinfo[nname].payload:
15033 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
15034 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
15035 remote_info["memory_free"] -= max(0, i_mem_diff)
15037 if iinfo.admin_state == constants.ADMINST_UP:
15038 i_p_up_mem += beinfo[constants.BE_MAXMEM]
15040 # compute memory used by instances
15042 "total_memory": remote_info["memory_total"],
15043 "reserved_memory": remote_info["memory_dom0"],
15044 "free_memory": remote_info["memory_free"],
15045 "total_disk": remote_info["vg_size"],
15046 "free_disk": remote_info["vg_free"],
15047 "total_cpus": remote_info["cpu_total"],
15048 "i_pri_memory": i_p_mem,
15049 "i_pri_up_memory": i_p_up_mem,
15051 pnr_dyn.update(node_results[nname])
15052 node_results[nname] = pnr_dyn
15054 return node_results
15057 def _ComputeInstanceData(cluster_info, i_list):
15058 """Compute global instance data.
15062 for iinfo, beinfo in i_list:
15064 for nic in iinfo.nics:
15065 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15069 "mode": filled_params[constants.NIC_MODE],
15070 "link": filled_params[constants.NIC_LINK],
15072 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15073 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15074 nic_data.append(nic_dict)
15076 "tags": list(iinfo.GetTags()),
15077 "admin_state": iinfo.admin_state,
15078 "vcpus": beinfo[constants.BE_VCPUS],
15079 "memory": beinfo[constants.BE_MAXMEM],
15080 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15082 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15084 "disks": [{constants.IDISK_SIZE: dsk.size,
15085 constants.IDISK_MODE: dsk.mode}
15086 for dsk in iinfo.disks],
15087 "disk_template": iinfo.disk_template,
15088 "hypervisor": iinfo.hypervisor,
15090 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15092 instance_data[iinfo.name] = pir
15094 return instance_data
15096 def _AddNewInstance(self):
15097 """Add new instance data to allocator structure.
15099 This in combination with _AllocatorGetClusterData will create the
15100 correct structure needed as input for the allocator.
15102 The checks for the completeness of the opcode must have already been
15106 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15108 if self.disk_template in constants.DTS_INT_MIRROR:
15109 self.required_nodes = 2
15111 self.required_nodes = 1
15115 "disk_template": self.disk_template,
15118 "vcpus": self.vcpus,
15119 "memory": self.memory,
15120 "spindle_use": self.spindle_use,
15121 "disks": self.disks,
15122 "disk_space_total": disk_space,
15124 "required_nodes": self.required_nodes,
15125 "hypervisor": self.hypervisor,
15130 def _AddRelocateInstance(self):
15131 """Add relocate instance data to allocator structure.
15133 This in combination with _IAllocatorGetClusterData will create the
15134 correct structure needed as input for the allocator.
15136 The checks for the completeness of the opcode must have already been
15140 instance = self.cfg.GetInstanceInfo(self.name)
15141 if instance is None:
15142 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15143 " IAllocator" % self.name)
15145 if instance.disk_template not in constants.DTS_MIRRORED:
15146 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15147 errors.ECODE_INVAL)
15149 if instance.disk_template in constants.DTS_INT_MIRROR and \
15150 len(instance.secondary_nodes) != 1:
15151 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15152 errors.ECODE_STATE)
15154 self.required_nodes = 1
15155 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15156 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15160 "disk_space_total": disk_space,
15161 "required_nodes": self.required_nodes,
15162 "relocate_from": self.relocate_from,
15166 def _AddNodeEvacuate(self):
15167 """Get data for node-evacuate requests.
15171 "instances": self.instances,
15172 "evac_mode": self.evac_mode,
15175 def _AddChangeGroup(self):
15176 """Get data for node-evacuate requests.
15180 "instances": self.instances,
15181 "target_groups": self.target_groups,
15184 def _BuildInputData(self, fn, keydata):
15185 """Build input data structures.
15188 self._ComputeClusterData()
15191 request["type"] = self.mode
15192 for keyname, keytype in keydata:
15193 if keyname not in request:
15194 raise errors.ProgrammerError("Request parameter %s is missing" %
15196 val = request[keyname]
15197 if not keytype(val):
15198 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15199 " validation, value %s, expected"
15200 " type %s" % (keyname, val, keytype))
15201 self.in_data["request"] = request
15203 self.in_text = serializer.Dump(self.in_data)
15205 _STRING_LIST = ht.TListOf(ht.TString)
15206 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15207 # pylint: disable=E1101
15208 # Class '...' has no 'OP_ID' member
15209 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15210 opcodes.OpInstanceMigrate.OP_ID,
15211 opcodes.OpInstanceReplaceDisks.OP_ID])
15215 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15216 ht.TItems([ht.TNonEmptyString,
15217 ht.TNonEmptyString,
15218 ht.TListOf(ht.TNonEmptyString),
15221 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15222 ht.TItems([ht.TNonEmptyString,
15225 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15226 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15229 constants.IALLOCATOR_MODE_ALLOC:
15232 ("name", ht.TString),
15233 ("memory", ht.TInt),
15234 ("spindle_use", ht.TInt),
15235 ("disks", ht.TListOf(ht.TDict)),
15236 ("disk_template", ht.TString),
15237 ("os", ht.TString),
15238 ("tags", _STRING_LIST),
15239 ("nics", ht.TListOf(ht.TDict)),
15240 ("vcpus", ht.TInt),
15241 ("hypervisor", ht.TString),
15243 constants.IALLOCATOR_MODE_RELOC:
15244 (_AddRelocateInstance,
15245 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15247 constants.IALLOCATOR_MODE_NODE_EVAC:
15248 (_AddNodeEvacuate, [
15249 ("instances", _STRING_LIST),
15250 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15252 constants.IALLOCATOR_MODE_CHG_GROUP:
15253 (_AddChangeGroup, [
15254 ("instances", _STRING_LIST),
15255 ("target_groups", _STRING_LIST),
15259 def Run(self, name, validate=True, call_fn=None):
15260 """Run an instance allocator and return the results.
15263 if call_fn is None:
15264 call_fn = self.rpc.call_iallocator_runner
15266 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15267 result.Raise("Failure while running the iallocator script")
15269 self.out_text = result.payload
15271 self._ValidateResult()
15273 def _ValidateResult(self):
15274 """Process the allocator results.
15276 This will process and if successful save the result in
15277 self.out_data and the other parameters.
15281 rdict = serializer.Load(self.out_text)
15282 except Exception, err:
15283 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15285 if not isinstance(rdict, dict):
15286 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15288 # TODO: remove backwards compatiblity in later versions
15289 if "nodes" in rdict and "result" not in rdict:
15290 rdict["result"] = rdict["nodes"]
15293 for key in "success", "info", "result":
15294 if key not in rdict:
15295 raise errors.OpExecError("Can't parse iallocator results:"
15296 " missing key '%s'" % key)
15297 setattr(self, key, rdict[key])
15299 if not self._result_check(self.result):
15300 raise errors.OpExecError("Iallocator returned invalid result,"
15301 " expected %s, got %s" %
15302 (self._result_check, self.result),
15303 errors.ECODE_INVAL)
15305 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15306 assert self.relocate_from is not None
15307 assert self.required_nodes == 1
15309 node2group = dict((name, ndata["group"])
15310 for (name, ndata) in self.in_data["nodes"].items())
15312 fn = compat.partial(self._NodesToGroups, node2group,
15313 self.in_data["nodegroups"])
15315 instance = self.cfg.GetInstanceInfo(self.name)
15316 request_groups = fn(self.relocate_from + [instance.primary_node])
15317 result_groups = fn(rdict["result"] + [instance.primary_node])
15319 if self.success and not set(result_groups).issubset(request_groups):
15320 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15321 " differ from original groups (%s)" %
15322 (utils.CommaJoin(result_groups),
15323 utils.CommaJoin(request_groups)))
15325 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15326 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15328 self.out_data = rdict
15331 def _NodesToGroups(node2group, groups, nodes):
15332 """Returns a list of unique group names for a list of nodes.
15334 @type node2group: dict
15335 @param node2group: Map from node name to group UUID
15337 @param groups: Group information
15339 @param nodes: Node names
15346 group_uuid = node2group[node]
15348 # Ignore unknown node
15352 group = groups[group_uuid]
15354 # Can't find group, let's use UUID
15355 group_name = group_uuid
15357 group_name = group["name"]
15359 result.add(group_name)
15361 return sorted(result)
15364 class LUTestAllocator(NoHooksLU):
15365 """Run allocator tests.
15367 This LU runs the allocator tests
15370 def CheckPrereq(self):
15371 """Check prerequisites.
15373 This checks the opcode parameters depending on the director and mode test.
15376 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15377 for attr in ["memory", "disks", "disk_template",
15378 "os", "tags", "nics", "vcpus"]:
15379 if not hasattr(self.op, attr):
15380 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15381 attr, errors.ECODE_INVAL)
15382 iname = self.cfg.ExpandInstanceName(self.op.name)
15383 if iname is not None:
15384 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15385 iname, errors.ECODE_EXISTS)
15386 if not isinstance(self.op.nics, list):
15387 raise errors.OpPrereqError("Invalid parameter 'nics'",
15388 errors.ECODE_INVAL)
15389 if not isinstance(self.op.disks, list):
15390 raise errors.OpPrereqError("Invalid parameter 'disks'",
15391 errors.ECODE_INVAL)
15392 for row in self.op.disks:
15393 if (not isinstance(row, dict) or
15394 constants.IDISK_SIZE not in row or
15395 not isinstance(row[constants.IDISK_SIZE], int) or
15396 constants.IDISK_MODE not in row or
15397 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15398 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15399 " parameter", errors.ECODE_INVAL)
15400 if self.op.hypervisor is None:
15401 self.op.hypervisor = self.cfg.GetHypervisorType()
15402 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15403 fname = _ExpandInstanceName(self.cfg, self.op.name)
15404 self.op.name = fname
15405 self.relocate_from = \
15406 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15407 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15408 constants.IALLOCATOR_MODE_NODE_EVAC):
15409 if not self.op.instances:
15410 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15411 self.op.instances = _GetWantedInstances(self, self.op.instances)
15413 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15414 self.op.mode, errors.ECODE_INVAL)
15416 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15417 if self.op.allocator is None:
15418 raise errors.OpPrereqError("Missing allocator name",
15419 errors.ECODE_INVAL)
15420 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15421 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15422 self.op.direction, errors.ECODE_INVAL)
15424 def Exec(self, feedback_fn):
15425 """Run the allocator test.
15428 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15429 ial = IAllocator(self.cfg, self.rpc,
15432 memory=self.op.memory,
15433 disks=self.op.disks,
15434 disk_template=self.op.disk_template,
15438 vcpus=self.op.vcpus,
15439 hypervisor=self.op.hypervisor,
15441 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15442 ial = IAllocator(self.cfg, self.rpc,
15445 relocate_from=list(self.relocate_from),
15447 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15448 ial = IAllocator(self.cfg, self.rpc,
15450 instances=self.op.instances,
15451 target_groups=self.op.target_groups)
15452 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15453 ial = IAllocator(self.cfg, self.rpc,
15455 instances=self.op.instances,
15456 evac_mode=self.op.evac_mode)
15458 raise errors.ProgrammerError("Uncatched mode %s in"
15459 " LUTestAllocator.Exec", self.op.mode)
15461 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15462 result = ial.in_text
15464 ial.Run(self.op.allocator, validate=False)
15465 result = ial.out_text
15469 #: Query type implementations
15471 constants.QR_CLUSTER: _ClusterQuery,
15472 constants.QR_INSTANCE: _InstanceQuery,
15473 constants.QR_NODE: _NodeQuery,
15474 constants.QR_GROUP: _GroupQuery,
15475 constants.QR_OS: _OsQuery,
15476 constants.QR_EXPORT: _ExportQuery,
15479 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15482 def _GetQueryImplementation(name):
15483 """Returns the implemtnation for a query type.
15485 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15489 return _QUERY_IMPL[name]
15491 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15492 errors.ECODE_INVAL)