4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _CopyLockList(names):
707 """Makes a copy of a list of lock names.
709 Handles L{locking.ALL_SET} correctly.
712 if names == locking.ALL_SET:
713 return locking.ALL_SET
718 def _GetWantedNodes(lu, nodes):
719 """Returns list of checked and expanded node names.
721 @type lu: L{LogicalUnit}
722 @param lu: the logical unit on whose behalf we execute
724 @param nodes: list of node names or None for all nodes
726 @return: the list of nodes, sorted
727 @raise errors.ProgrammerError: if the nodes parameter is wrong type
731 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
733 return utils.NiceSort(lu.cfg.GetNodeList())
736 def _GetWantedInstances(lu, instances):
737 """Returns list of checked and expanded instance names.
739 @type lu: L{LogicalUnit}
740 @param lu: the logical unit on whose behalf we execute
741 @type instances: list
742 @param instances: list of instance names or None for all instances
744 @return: the list of instances, sorted
745 @raise errors.OpPrereqError: if the instances parameter is wrong type
746 @raise errors.OpPrereqError: if any of the passed instances is not found
750 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
752 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
756 def _GetUpdatedParams(old_params, update_dict,
757 use_default=True, use_none=False):
758 """Return the new version of a parameter dictionary.
760 @type old_params: dict
761 @param old_params: old parameters
762 @type update_dict: dict
763 @param update_dict: dict containing new parameter values, or
764 constants.VALUE_DEFAULT to reset the parameter to its default
766 @param use_default: boolean
767 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
768 values as 'to be deleted' values
769 @param use_none: boolean
770 @type use_none: whether to recognise C{None} values as 'to be
773 @return: the new parameter dictionary
776 params_copy = copy.deepcopy(old_params)
777 for key, val in update_dict.iteritems():
778 if ((use_default and val == constants.VALUE_DEFAULT) or
779 (use_none and val is None)):
785 params_copy[key] = val
789 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
790 """Return the new version of a instance policy.
792 @param group_policy: whether this policy applies to a group and thus
793 we should support removal of policy entries
796 use_none = use_default = group_policy
797 ipolicy = copy.deepcopy(old_ipolicy)
798 for key, value in new_ipolicy.items():
799 if key not in constants.IPOLICY_ALL_KEYS:
800 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
802 if key in constants.IPOLICY_ISPECS:
803 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
804 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
806 use_default=use_default)
808 if (not value or value == [constants.VALUE_DEFAULT] or
809 value == constants.VALUE_DEFAULT):
813 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
814 " on the cluster'" % key,
817 if key in constants.IPOLICY_PARAMETERS:
818 # FIXME: we assume all such values are float
820 ipolicy[key] = float(value)
821 except (TypeError, ValueError), err:
822 raise errors.OpPrereqError("Invalid value for attribute"
823 " '%s': '%s', error: %s" %
824 (key, value, err), errors.ECODE_INVAL)
826 # FIXME: we assume all others are lists; this should be redone
828 ipolicy[key] = list(value)
830 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
831 except errors.ConfigurationError, err:
832 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
837 def _UpdateAndVerifySubDict(base, updates, type_check):
838 """Updates and verifies a dict with sub dicts of the same type.
840 @param base: The dict with the old data
841 @param updates: The dict with the new data
842 @param type_check: Dict suitable to ForceDictType to verify correct types
843 @returns: A new dict with updated and verified values
847 new = _GetUpdatedParams(old, value)
848 utils.ForceDictType(new, type_check)
851 ret = copy.deepcopy(base)
852 ret.update(dict((key, fn(base.get(key, {}), value))
853 for key, value in updates.items()))
857 def _MergeAndVerifyHvState(op_input, obj_input):
858 """Combines the hv state from an opcode with the one of the object
860 @param op_input: The input dict from the opcode
861 @param obj_input: The input dict from the objects
862 @return: The verified and updated dict
866 invalid_hvs = set(op_input) - constants.HYPER_TYPES
868 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
869 " %s" % utils.CommaJoin(invalid_hvs),
871 if obj_input is None:
873 type_check = constants.HVSTS_PARAMETER_TYPES
874 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
879 def _MergeAndVerifyDiskState(op_input, obj_input):
880 """Combines the disk state from an opcode with the one of the object
882 @param op_input: The input dict from the opcode
883 @param obj_input: The input dict from the objects
884 @return: The verified and updated dict
887 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
889 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
890 utils.CommaJoin(invalid_dst),
892 type_check = constants.DSS_PARAMETER_TYPES
893 if obj_input is None:
895 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
897 for key, value in op_input.items())
902 def _ReleaseLocks(lu, level, names=None, keep=None):
903 """Releases locks owned by an LU.
905 @type lu: L{LogicalUnit}
906 @param level: Lock level
907 @type names: list or None
908 @param names: Names of locks to release
909 @type keep: list or None
910 @param keep: Names of locks to retain
913 assert not (keep is not None and names is not None), \
914 "Only one of the 'names' and the 'keep' parameters can be given"
916 if names is not None:
917 should_release = names.__contains__
919 should_release = lambda name: name not in keep
921 should_release = None
923 owned = lu.owned_locks(level)
925 # Not owning any lock at this level, do nothing
932 # Determine which locks to release
934 if should_release(name):
939 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
941 # Release just some locks
942 lu.glm.release(level, names=release)
944 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
947 lu.glm.release(level)
949 assert not lu.glm.is_owned(level), "No locks should be owned"
952 def _MapInstanceDisksToNodes(instances):
953 """Creates a map from (node, volume) to instance name.
955 @type instances: list of L{objects.Instance}
956 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
959 return dict(((node, vol), inst.name)
960 for inst in instances
961 for (node, vols) in inst.MapLVsByNode().items()
965 def _RunPostHook(lu, node_name):
966 """Runs the post-hook for an opcode on a single node.
969 hm = lu.proc.BuildHooksManager(lu)
971 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
972 except Exception, err: # pylint: disable=W0703
973 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
976 def _CheckOutputFields(static, dynamic, selected):
977 """Checks whether all selected fields are valid.
979 @type static: L{utils.FieldSet}
980 @param static: static fields set
981 @type dynamic: L{utils.FieldSet}
982 @param dynamic: dynamic fields set
989 delta = f.NonMatching(selected)
991 raise errors.OpPrereqError("Unknown output fields selected: %s"
992 % ",".join(delta), errors.ECODE_INVAL)
995 def _CheckGlobalHvParams(params):
996 """Validates that given hypervisor params are not global ones.
998 This will ensure that instances don't get customised versions of
1002 used_globals = constants.HVC_GLOBALS.intersection(params)
1004 msg = ("The following hypervisor parameters are global and cannot"
1005 " be customized at instance level, please modify them at"
1006 " cluster level: %s" % utils.CommaJoin(used_globals))
1007 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1010 def _CheckNodeOnline(lu, node, msg=None):
1011 """Ensure that a given node is online.
1013 @param lu: the LU on behalf of which we make the check
1014 @param node: the node to check
1015 @param msg: if passed, should be a message to replace the default one
1016 @raise errors.OpPrereqError: if the node is offline
1020 msg = "Can't use offline node"
1021 if lu.cfg.GetNodeInfo(node).offline:
1022 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1025 def _CheckNodeNotDrained(lu, node):
1026 """Ensure that a given node is not drained.
1028 @param lu: the LU on behalf of which we make the check
1029 @param node: the node to check
1030 @raise errors.OpPrereqError: if the node is drained
1033 if lu.cfg.GetNodeInfo(node).drained:
1034 raise errors.OpPrereqError("Can't use drained node %s" % node,
1038 def _CheckNodeVmCapable(lu, node):
1039 """Ensure that a given node is vm capable.
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @raise errors.OpPrereqError: if the node is not vm capable
1046 if not lu.cfg.GetNodeInfo(node).vm_capable:
1047 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1051 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1052 """Ensure that a node supports a given OS.
1054 @param lu: the LU on behalf of which we make the check
1055 @param node: the node to check
1056 @param os_name: the OS to query about
1057 @param force_variant: whether to ignore variant errors
1058 @raise errors.OpPrereqError: if the node is not supporting the OS
1061 result = lu.rpc.call_os_get(node, os_name)
1062 result.Raise("OS '%s' not in supported OS list for node %s" %
1064 prereq=True, ecode=errors.ECODE_INVAL)
1065 if not force_variant:
1066 _CheckOSVariant(result.payload, os_name)
1069 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1070 """Ensure that a node has the given secondary ip.
1072 @type lu: L{LogicalUnit}
1073 @param lu: the LU on behalf of which we make the check
1075 @param node: the node to check
1076 @type secondary_ip: string
1077 @param secondary_ip: the ip to check
1078 @type prereq: boolean
1079 @param prereq: whether to throw a prerequisite or an execute error
1080 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1081 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1084 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1085 result.Raise("Failure checking secondary ip on node %s" % node,
1086 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1087 if not result.payload:
1088 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1089 " please fix and re-run this command" % secondary_ip)
1091 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1093 raise errors.OpExecError(msg)
1096 def _GetClusterDomainSecret():
1097 """Reads the cluster domain secret.
1100 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1104 def _CheckInstanceState(lu, instance, req_states, msg=None):
1105 """Ensure that an instance is in one of the required states.
1107 @param lu: the LU on behalf of which we make the check
1108 @param instance: the instance to check
1109 @param msg: if passed, should be a message to replace the default one
1110 @raise errors.OpPrereqError: if the instance is not in the required state
1114 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1115 if instance.admin_state not in req_states:
1116 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1117 (instance.name, instance.admin_state, msg),
1120 if constants.ADMINST_UP not in req_states:
1121 pnode = instance.primary_node
1122 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1123 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1124 prereq=True, ecode=errors.ECODE_ENVIRON)
1126 if instance.name in ins_l.payload:
1127 raise errors.OpPrereqError("Instance %s is running, %s" %
1128 (instance.name, msg), errors.ECODE_STATE)
1131 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1132 """Computes if value is in the desired range.
1134 @param name: name of the parameter for which we perform the check
1135 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1137 @param ipolicy: dictionary containing min, max and std values
1138 @param value: actual value that we want to use
1139 @return: None or element not meeting the criteria
1143 if value in [None, constants.VALUE_AUTO]:
1145 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1146 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1147 if value > max_v or min_v > value:
1149 fqn = "%s/%s" % (name, qualifier)
1152 return ("%s value %s is not in range [%s, %s]" %
1153 (fqn, value, min_v, max_v))
1157 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1158 nic_count, disk_sizes, spindle_use,
1159 _compute_fn=_ComputeMinMaxSpec):
1160 """Verifies ipolicy against provided specs.
1163 @param ipolicy: The ipolicy
1165 @param mem_size: The memory size
1166 @type cpu_count: int
1167 @param cpu_count: Used cpu cores
1168 @type disk_count: int
1169 @param disk_count: Number of disks used
1170 @type nic_count: int
1171 @param nic_count: Number of nics used
1172 @type disk_sizes: list of ints
1173 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1174 @type spindle_use: int
1175 @param spindle_use: The number of spindles this instance uses
1176 @param _compute_fn: The compute function (unittest only)
1177 @return: A list of violations, or an empty list of no violations are found
1180 assert disk_count == len(disk_sizes)
1183 (constants.ISPEC_MEM_SIZE, "", mem_size),
1184 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1185 (constants.ISPEC_DISK_COUNT, "", disk_count),
1186 (constants.ISPEC_NIC_COUNT, "", nic_count),
1187 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1188 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1189 for idx, d in enumerate(disk_sizes)]
1192 (_compute_fn(name, qualifier, ipolicy, value)
1193 for (name, qualifier, value) in test_settings))
1196 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1197 _compute_fn=_ComputeIPolicySpecViolation):
1198 """Compute if instance meets the specs of ipolicy.
1201 @param ipolicy: The ipolicy to verify against
1202 @type instance: L{objects.Instance}
1203 @param instance: The instance to verify
1204 @param _compute_fn: The function to verify ipolicy (unittest only)
1205 @see: L{_ComputeIPolicySpecViolation}
1208 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1209 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1210 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1211 disk_count = len(instance.disks)
1212 disk_sizes = [disk.size for disk in instance.disks]
1213 nic_count = len(instance.nics)
1215 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216 disk_sizes, spindle_use)
1219 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1220 _compute_fn=_ComputeIPolicySpecViolation):
1221 """Compute if instance specs meets the specs of ipolicy.
1224 @param ipolicy: The ipolicy to verify against
1225 @param instance_spec: dict
1226 @param instance_spec: The instance spec to verify
1227 @param _compute_fn: The function to verify ipolicy (unittest only)
1228 @see: L{_ComputeIPolicySpecViolation}
1231 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1232 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1233 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1234 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1235 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1236 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1238 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1239 disk_sizes, spindle_use)
1242 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1244 _compute_fn=_ComputeIPolicyInstanceViolation):
1245 """Compute if instance meets the specs of the new target group.
1247 @param ipolicy: The ipolicy to verify
1248 @param instance: The instance object to verify
1249 @param current_group: The current group of the instance
1250 @param target_group: The new group of the instance
1251 @param _compute_fn: The function to verify ipolicy (unittest only)
1252 @see: L{_ComputeIPolicySpecViolation}
1255 if current_group == target_group:
1258 return _compute_fn(ipolicy, instance)
1261 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1262 _compute_fn=_ComputeIPolicyNodeViolation):
1263 """Checks that the target node is correct in terms of instance policy.
1265 @param ipolicy: The ipolicy to verify
1266 @param instance: The instance object to verify
1267 @param node: The new node to relocate
1268 @param ignore: Ignore violations of the ipolicy
1269 @param _compute_fn: The function to verify ipolicy (unittest only)
1270 @see: L{_ComputeIPolicySpecViolation}
1273 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1274 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1277 msg = ("Instance does not meet target node group's (%s) instance"
1278 " policy: %s") % (node.group, utils.CommaJoin(res))
1282 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1285 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1286 """Computes a set of any instances that would violate the new ipolicy.
1288 @param old_ipolicy: The current (still in-place) ipolicy
1289 @param new_ipolicy: The new (to become) ipolicy
1290 @param instances: List of instances to verify
1291 @return: A list of instances which violates the new ipolicy but
1295 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1296 _ComputeViolatingInstances(old_ipolicy, instances))
1299 def _ExpandItemName(fn, name, kind):
1300 """Expand an item name.
1302 @param fn: the function to use for expansion
1303 @param name: requested item name
1304 @param kind: text description ('Node' or 'Instance')
1305 @return: the resolved (full) name
1306 @raise errors.OpPrereqError: if the item is not found
1309 full_name = fn(name)
1310 if full_name is None:
1311 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1316 def _ExpandNodeName(cfg, name):
1317 """Wrapper over L{_ExpandItemName} for nodes."""
1318 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1321 def _ExpandInstanceName(cfg, name):
1322 """Wrapper over L{_ExpandItemName} for instance."""
1323 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1326 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1327 minmem, maxmem, vcpus, nics, disk_template, disks,
1328 bep, hvp, hypervisor_name, tags):
1329 """Builds instance related env variables for hooks
1331 This builds the hook environment from individual variables.
1334 @param name: the name of the instance
1335 @type primary_node: string
1336 @param primary_node: the name of the instance's primary node
1337 @type secondary_nodes: list
1338 @param secondary_nodes: list of secondary nodes as strings
1339 @type os_type: string
1340 @param os_type: the name of the instance's OS
1341 @type status: string
1342 @param status: the desired status of the instance
1343 @type minmem: string
1344 @param minmem: the minimum memory size of the instance
1345 @type maxmem: string
1346 @param maxmem: the maximum memory size of the instance
1348 @param vcpus: the count of VCPUs the instance has
1350 @param nics: list of tuples (ip, mac, mode, link) representing
1351 the NICs the instance has
1352 @type disk_template: string
1353 @param disk_template: the disk template of the instance
1355 @param disks: the list of (size, mode) pairs
1357 @param bep: the backend parameters for the instance
1359 @param hvp: the hypervisor parameters for the instance
1360 @type hypervisor_name: string
1361 @param hypervisor_name: the hypervisor for the instance
1363 @param tags: list of instance tags as strings
1365 @return: the hook environment for this instance
1370 "INSTANCE_NAME": name,
1371 "INSTANCE_PRIMARY": primary_node,
1372 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1373 "INSTANCE_OS_TYPE": os_type,
1374 "INSTANCE_STATUS": status,
1375 "INSTANCE_MINMEM": minmem,
1376 "INSTANCE_MAXMEM": maxmem,
1377 # TODO(2.7) remove deprecated "memory" value
1378 "INSTANCE_MEMORY": maxmem,
1379 "INSTANCE_VCPUS": vcpus,
1380 "INSTANCE_DISK_TEMPLATE": disk_template,
1381 "INSTANCE_HYPERVISOR": hypervisor_name,
1384 nic_count = len(nics)
1385 for idx, (ip, mac, mode, link) in enumerate(nics):
1388 env["INSTANCE_NIC%d_IP" % idx] = ip
1389 env["INSTANCE_NIC%d_MAC" % idx] = mac
1390 env["INSTANCE_NIC%d_MODE" % idx] = mode
1391 env["INSTANCE_NIC%d_LINK" % idx] = link
1392 if mode == constants.NIC_MODE_BRIDGED:
1393 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1397 env["INSTANCE_NIC_COUNT"] = nic_count
1400 disk_count = len(disks)
1401 for idx, (size, mode) in enumerate(disks):
1402 env["INSTANCE_DISK%d_SIZE" % idx] = size
1403 env["INSTANCE_DISK%d_MODE" % idx] = mode
1407 env["INSTANCE_DISK_COUNT"] = disk_count
1412 env["INSTANCE_TAGS"] = " ".join(tags)
1414 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1415 for key, value in source.items():
1416 env["INSTANCE_%s_%s" % (kind, key)] = value
1421 def _NICListToTuple(lu, nics):
1422 """Build a list of nic information tuples.
1424 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1425 value in LUInstanceQueryData.
1427 @type lu: L{LogicalUnit}
1428 @param lu: the logical unit on whose behalf we execute
1429 @type nics: list of L{objects.NIC}
1430 @param nics: list of nics to convert to hooks tuples
1434 cluster = lu.cfg.GetClusterInfo()
1438 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1439 mode = filled_params[constants.NIC_MODE]
1440 link = filled_params[constants.NIC_LINK]
1441 hooks_nics.append((ip, mac, mode, link))
1445 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1446 """Builds instance related env variables for hooks from an object.
1448 @type lu: L{LogicalUnit}
1449 @param lu: the logical unit on whose behalf we execute
1450 @type instance: L{objects.Instance}
1451 @param instance: the instance for which we should build the
1453 @type override: dict
1454 @param override: dictionary with key/values that will override
1457 @return: the hook environment dictionary
1460 cluster = lu.cfg.GetClusterInfo()
1461 bep = cluster.FillBE(instance)
1462 hvp = cluster.FillHV(instance)
1464 "name": instance.name,
1465 "primary_node": instance.primary_node,
1466 "secondary_nodes": instance.secondary_nodes,
1467 "os_type": instance.os,
1468 "status": instance.admin_state,
1469 "maxmem": bep[constants.BE_MAXMEM],
1470 "minmem": bep[constants.BE_MINMEM],
1471 "vcpus": bep[constants.BE_VCPUS],
1472 "nics": _NICListToTuple(lu, instance.nics),
1473 "disk_template": instance.disk_template,
1474 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1477 "hypervisor_name": instance.hypervisor,
1478 "tags": instance.tags,
1481 args.update(override)
1482 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1485 def _AdjustCandidatePool(lu, exceptions):
1486 """Adjust the candidate pool after node operations.
1489 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1491 lu.LogInfo("Promoted nodes to master candidate role: %s",
1492 utils.CommaJoin(node.name for node in mod_list))
1493 for name in mod_list:
1494 lu.context.ReaddNode(name)
1495 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1497 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1501 def _DecideSelfPromotion(lu, exceptions=None):
1502 """Decide whether I should promote myself as a master candidate.
1505 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1506 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1507 # the new node will increase mc_max with one, so:
1508 mc_should = min(mc_should + 1, cp_size)
1509 return mc_now < mc_should
1512 def _CalculateGroupIPolicy(cluster, group):
1513 """Calculate instance policy for group.
1516 return cluster.SimpleFillIPolicy(group.ipolicy)
1519 def _ComputeViolatingInstances(ipolicy, instances):
1520 """Computes a set of instances who violates given ipolicy.
1522 @param ipolicy: The ipolicy to verify
1523 @type instances: object.Instance
1524 @param instances: List of instances to verify
1525 @return: A frozenset of instance names violating the ipolicy
1528 return frozenset([inst.name for inst in instances
1529 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1532 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1533 """Check that the brigdes needed by a list of nics exist.
1536 cluster = lu.cfg.GetClusterInfo()
1537 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1538 brlist = [params[constants.NIC_LINK] for params in paramslist
1539 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1541 result = lu.rpc.call_bridges_exist(target_node, brlist)
1542 result.Raise("Error checking bridges on destination node '%s'" %
1543 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1546 def _CheckInstanceBridgesExist(lu, instance, node=None):
1547 """Check that the brigdes needed by an instance exist.
1551 node = instance.primary_node
1552 _CheckNicsBridgesExist(lu, instance.nics, node)
1555 def _CheckOSVariant(os_obj, name):
1556 """Check whether an OS name conforms to the os variants specification.
1558 @type os_obj: L{objects.OS}
1559 @param os_obj: OS object to check
1561 @param name: OS name passed by the user, to check for validity
1564 variant = objects.OS.GetVariant(name)
1565 if not os_obj.supported_variants:
1567 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1568 " passed)" % (os_obj.name, variant),
1572 raise errors.OpPrereqError("OS name must include a variant",
1575 if variant not in os_obj.supported_variants:
1576 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1579 def _GetNodeInstancesInner(cfg, fn):
1580 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1583 def _GetNodeInstances(cfg, node_name):
1584 """Returns a list of all primary and secondary instances on a node.
1588 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1591 def _GetNodePrimaryInstances(cfg, node_name):
1592 """Returns primary instances on a node.
1595 return _GetNodeInstancesInner(cfg,
1596 lambda inst: node_name == inst.primary_node)
1599 def _GetNodeSecondaryInstances(cfg, node_name):
1600 """Returns secondary instances on a node.
1603 return _GetNodeInstancesInner(cfg,
1604 lambda inst: node_name in inst.secondary_nodes)
1607 def _GetStorageTypeArgs(cfg, storage_type):
1608 """Returns the arguments for a storage type.
1611 # Special case for file storage
1612 if storage_type == constants.ST_FILE:
1613 # storage.FileStorage wants a list of storage directories
1614 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1619 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1622 for dev in instance.disks:
1623 cfg.SetDiskID(dev, node_name)
1625 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1627 result.Raise("Failed to get disk status from node %s" % node_name,
1628 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1630 for idx, bdev_status in enumerate(result.payload):
1631 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1637 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1638 """Check the sanity of iallocator and node arguments and use the
1639 cluster-wide iallocator if appropriate.
1641 Check that at most one of (iallocator, node) is specified. If none is
1642 specified, then the LU's opcode's iallocator slot is filled with the
1643 cluster-wide default iallocator.
1645 @type iallocator_slot: string
1646 @param iallocator_slot: the name of the opcode iallocator slot
1647 @type node_slot: string
1648 @param node_slot: the name of the opcode target node slot
1651 node = getattr(lu.op, node_slot, None)
1652 iallocator = getattr(lu.op, iallocator_slot, None)
1654 if node is not None and iallocator is not None:
1655 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1657 elif node is None and iallocator is None:
1658 default_iallocator = lu.cfg.GetDefaultIAllocator()
1659 if default_iallocator:
1660 setattr(lu.op, iallocator_slot, default_iallocator)
1662 raise errors.OpPrereqError("No iallocator or node given and no"
1663 " cluster-wide default iallocator found;"
1664 " please specify either an iallocator or a"
1665 " node, or set a cluster-wide default"
1669 def _GetDefaultIAllocator(cfg, iallocator):
1670 """Decides on which iallocator to use.
1672 @type cfg: L{config.ConfigWriter}
1673 @param cfg: Cluster configuration object
1674 @type iallocator: string or None
1675 @param iallocator: Iallocator specified in opcode
1677 @return: Iallocator name
1681 # Use default iallocator
1682 iallocator = cfg.GetDefaultIAllocator()
1685 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1686 " opcode nor as a cluster-wide default",
1692 class LUClusterPostInit(LogicalUnit):
1693 """Logical unit for running hooks after cluster initialization.
1696 HPATH = "cluster-init"
1697 HTYPE = constants.HTYPE_CLUSTER
1699 def BuildHooksEnv(self):
1704 "OP_TARGET": self.cfg.GetClusterName(),
1707 def BuildHooksNodes(self):
1708 """Build hooks nodes.
1711 return ([], [self.cfg.GetMasterNode()])
1713 def Exec(self, feedback_fn):
1720 class LUClusterDestroy(LogicalUnit):
1721 """Logical unit for destroying the cluster.
1724 HPATH = "cluster-destroy"
1725 HTYPE = constants.HTYPE_CLUSTER
1727 def BuildHooksEnv(self):
1732 "OP_TARGET": self.cfg.GetClusterName(),
1735 def BuildHooksNodes(self):
1736 """Build hooks nodes.
1741 def CheckPrereq(self):
1742 """Check prerequisites.
1744 This checks whether the cluster is empty.
1746 Any errors are signaled by raising errors.OpPrereqError.
1749 master = self.cfg.GetMasterNode()
1751 nodelist = self.cfg.GetNodeList()
1752 if len(nodelist) != 1 or nodelist[0] != master:
1753 raise errors.OpPrereqError("There are still %d node(s) in"
1754 " this cluster." % (len(nodelist) - 1),
1756 instancelist = self.cfg.GetInstanceList()
1758 raise errors.OpPrereqError("There are still %d instance(s) in"
1759 " this cluster." % len(instancelist),
1762 def Exec(self, feedback_fn):
1763 """Destroys the cluster.
1766 master_params = self.cfg.GetMasterNetworkParameters()
1768 # Run post hooks on master node before it's removed
1769 _RunPostHook(self, master_params.name)
1771 ems = self.cfg.GetUseExternalMipScript()
1772 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1775 self.LogWarning("Error disabling the master IP address: %s",
1778 return master_params.name
1781 def _VerifyCertificate(filename):
1782 """Verifies a certificate for L{LUClusterVerifyConfig}.
1784 @type filename: string
1785 @param filename: Path to PEM file
1789 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1790 utils.ReadFile(filename))
1791 except Exception, err: # pylint: disable=W0703
1792 return (LUClusterVerifyConfig.ETYPE_ERROR,
1793 "Failed to load X509 certificate %s: %s" % (filename, err))
1796 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1797 constants.SSL_CERT_EXPIRATION_ERROR)
1800 fnamemsg = "While verifying %s: %s" % (filename, msg)
1805 return (None, fnamemsg)
1806 elif errcode == utils.CERT_WARNING:
1807 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1808 elif errcode == utils.CERT_ERROR:
1809 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1811 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1814 def _GetAllHypervisorParameters(cluster, instances):
1815 """Compute the set of all hypervisor parameters.
1817 @type cluster: L{objects.Cluster}
1818 @param cluster: the cluster object
1819 @param instances: list of L{objects.Instance}
1820 @param instances: additional instances from which to obtain parameters
1821 @rtype: list of (origin, hypervisor, parameters)
1822 @return: a list with all parameters found, indicating the hypervisor they
1823 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1828 for hv_name in cluster.enabled_hypervisors:
1829 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1831 for os_name, os_hvp in cluster.os_hvp.items():
1832 for hv_name, hv_params in os_hvp.items():
1834 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1835 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1837 # TODO: collapse identical parameter values in a single one
1838 for instance in instances:
1839 if instance.hvparams:
1840 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1841 cluster.FillHV(instance)))
1846 class _VerifyErrors(object):
1847 """Mix-in for cluster/group verify LUs.
1849 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1850 self.op and self._feedback_fn to be available.)
1854 ETYPE_FIELD = "code"
1855 ETYPE_ERROR = "ERROR"
1856 ETYPE_WARNING = "WARNING"
1858 def _Error(self, ecode, item, msg, *args, **kwargs):
1859 """Format an error message.
1861 Based on the opcode's error_codes parameter, either format a
1862 parseable error code, or a simpler error string.
1864 This must be called only from Exec and functions called from Exec.
1867 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1868 itype, etxt, _ = ecode
1869 # first complete the msg
1872 # then format the whole message
1873 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1874 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1880 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1881 # and finally report it via the feedback_fn
1882 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1884 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1885 """Log an error message if the passed condition is True.
1889 or self.op.debug_simulate_errors) # pylint: disable=E1101
1891 # If the error code is in the list of ignored errors, demote the error to a
1893 (_, etxt, _) = ecode
1894 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1895 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1898 self._Error(ecode, *args, **kwargs)
1900 # do not mark the operation as failed for WARN cases only
1901 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1902 self.bad = self.bad or cond
1905 class LUClusterVerify(NoHooksLU):
1906 """Submits all jobs necessary to verify the cluster.
1911 def ExpandNames(self):
1912 self.needed_locks = {}
1914 def Exec(self, feedback_fn):
1917 if self.op.group_name:
1918 groups = [self.op.group_name]
1919 depends_fn = lambda: None
1921 groups = self.cfg.GetNodeGroupList()
1923 # Verify global configuration
1925 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1928 # Always depend on global verification
1929 depends_fn = lambda: [(-len(jobs), [])]
1931 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1932 ignore_errors=self.op.ignore_errors,
1933 depends=depends_fn())]
1934 for group in groups)
1936 # Fix up all parameters
1937 for op in itertools.chain(*jobs): # pylint: disable=W0142
1938 op.debug_simulate_errors = self.op.debug_simulate_errors
1939 op.verbose = self.op.verbose
1940 op.error_codes = self.op.error_codes
1942 op.skip_checks = self.op.skip_checks
1943 except AttributeError:
1944 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1946 return ResultWithJobs(jobs)
1949 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1950 """Verifies the cluster config.
1955 def _VerifyHVP(self, hvp_data):
1956 """Verifies locally the syntax of the hypervisor parameters.
1959 for item, hv_name, hv_params in hvp_data:
1960 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1963 hv_class = hypervisor.GetHypervisor(hv_name)
1964 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1965 hv_class.CheckParameterSyntax(hv_params)
1966 except errors.GenericError, err:
1967 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1969 def ExpandNames(self):
1970 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1971 self.share_locks = _ShareAll()
1973 def CheckPrereq(self):
1974 """Check prerequisites.
1977 # Retrieve all information
1978 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1979 self.all_node_info = self.cfg.GetAllNodesInfo()
1980 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1982 def Exec(self, feedback_fn):
1983 """Verify integrity of cluster, performing various test on nodes.
1987 self._feedback_fn = feedback_fn
1989 feedback_fn("* Verifying cluster config")
1991 for msg in self.cfg.VerifyConfig():
1992 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1994 feedback_fn("* Verifying cluster certificate files")
1996 for cert_filename in constants.ALL_CERT_FILES:
1997 (errcode, msg) = _VerifyCertificate(cert_filename)
1998 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2000 feedback_fn("* Verifying hypervisor parameters")
2002 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2003 self.all_inst_info.values()))
2005 feedback_fn("* Verifying all nodes belong to an existing group")
2007 # We do this verification here because, should this bogus circumstance
2008 # occur, it would never be caught by VerifyGroup, which only acts on
2009 # nodes/instances reachable from existing node groups.
2011 dangling_nodes = set(node.name for node in self.all_node_info.values()
2012 if node.group not in self.all_group_info)
2014 dangling_instances = {}
2015 no_node_instances = []
2017 for inst in self.all_inst_info.values():
2018 if inst.primary_node in dangling_nodes:
2019 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2020 elif inst.primary_node not in self.all_node_info:
2021 no_node_instances.append(inst.name)
2026 utils.CommaJoin(dangling_instances.get(node.name,
2028 for node in dangling_nodes]
2030 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2032 "the following nodes (and their instances) belong to a non"
2033 " existing group: %s", utils.CommaJoin(pretty_dangling))
2035 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2037 "the following instances have a non-existing primary-node:"
2038 " %s", utils.CommaJoin(no_node_instances))
2043 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2044 """Verifies the status of a node group.
2047 HPATH = "cluster-verify"
2048 HTYPE = constants.HTYPE_CLUSTER
2051 _HOOKS_INDENT_RE = re.compile("^", re.M)
2053 class NodeImage(object):
2054 """A class representing the logical and physical status of a node.
2057 @ivar name: the node name to which this object refers
2058 @ivar volumes: a structure as returned from
2059 L{ganeti.backend.GetVolumeList} (runtime)
2060 @ivar instances: a list of running instances (runtime)
2061 @ivar pinst: list of configured primary instances (config)
2062 @ivar sinst: list of configured secondary instances (config)
2063 @ivar sbp: dictionary of {primary-node: list of instances} for all
2064 instances for which this node is secondary (config)
2065 @ivar mfree: free memory, as reported by hypervisor (runtime)
2066 @ivar dfree: free disk, as reported by the node (runtime)
2067 @ivar offline: the offline status (config)
2068 @type rpc_fail: boolean
2069 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2070 not whether the individual keys were correct) (runtime)
2071 @type lvm_fail: boolean
2072 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2073 @type hyp_fail: boolean
2074 @ivar hyp_fail: whether the RPC call didn't return the instance list
2075 @type ghost: boolean
2076 @ivar ghost: whether this is a known node or not (config)
2077 @type os_fail: boolean
2078 @ivar os_fail: whether the RPC call didn't return valid OS data
2080 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2081 @type vm_capable: boolean
2082 @ivar vm_capable: whether the node can host instances
2085 def __init__(self, offline=False, name=None, vm_capable=True):
2094 self.offline = offline
2095 self.vm_capable = vm_capable
2096 self.rpc_fail = False
2097 self.lvm_fail = False
2098 self.hyp_fail = False
2100 self.os_fail = False
2103 def ExpandNames(self):
2104 # This raises errors.OpPrereqError on its own:
2105 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2107 # Get instances in node group; this is unsafe and needs verification later
2109 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2111 self.needed_locks = {
2112 locking.LEVEL_INSTANCE: inst_names,
2113 locking.LEVEL_NODEGROUP: [self.group_uuid],
2114 locking.LEVEL_NODE: [],
2117 self.share_locks = _ShareAll()
2119 def DeclareLocks(self, level):
2120 if level == locking.LEVEL_NODE:
2121 # Get members of node group; this is unsafe and needs verification later
2122 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2124 all_inst_info = self.cfg.GetAllInstancesInfo()
2126 # In Exec(), we warn about mirrored instances that have primary and
2127 # secondary living in separate node groups. To fully verify that
2128 # volumes for these instances are healthy, we will need to do an
2129 # extra call to their secondaries. We ensure here those nodes will
2131 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2132 # Important: access only the instances whose lock is owned
2133 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2134 nodes.update(all_inst_info[inst].secondary_nodes)
2136 self.needed_locks[locking.LEVEL_NODE] = nodes
2138 def CheckPrereq(self):
2139 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2140 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2142 group_nodes = set(self.group_info.members)
2144 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2147 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2149 unlocked_instances = \
2150 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2153 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2154 utils.CommaJoin(unlocked_nodes),
2157 if unlocked_instances:
2158 raise errors.OpPrereqError("Missing lock for instances: %s" %
2159 utils.CommaJoin(unlocked_instances),
2162 self.all_node_info = self.cfg.GetAllNodesInfo()
2163 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2165 self.my_node_names = utils.NiceSort(group_nodes)
2166 self.my_inst_names = utils.NiceSort(group_instances)
2168 self.my_node_info = dict((name, self.all_node_info[name])
2169 for name in self.my_node_names)
2171 self.my_inst_info = dict((name, self.all_inst_info[name])
2172 for name in self.my_inst_names)
2174 # We detect here the nodes that will need the extra RPC calls for verifying
2175 # split LV volumes; they should be locked.
2176 extra_lv_nodes = set()
2178 for inst in self.my_inst_info.values():
2179 if inst.disk_template in constants.DTS_INT_MIRROR:
2180 for nname in inst.all_nodes:
2181 if self.all_node_info[nname].group != self.group_uuid:
2182 extra_lv_nodes.add(nname)
2184 unlocked_lv_nodes = \
2185 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2187 if unlocked_lv_nodes:
2188 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2189 utils.CommaJoin(unlocked_lv_nodes),
2191 self.extra_lv_nodes = list(extra_lv_nodes)
2193 def _VerifyNode(self, ninfo, nresult):
2194 """Perform some basic validation on data returned from a node.
2196 - check the result data structure is well formed and has all the
2198 - check ganeti version
2200 @type ninfo: L{objects.Node}
2201 @param ninfo: the node to check
2202 @param nresult: the results from the node
2204 @return: whether overall this call was successful (and we can expect
2205 reasonable values in the respose)
2209 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2211 # main result, nresult should be a non-empty dict
2212 test = not nresult or not isinstance(nresult, dict)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "unable to verify node: no data returned")
2218 # compares ganeti version
2219 local_version = constants.PROTOCOL_VERSION
2220 remote_version = nresult.get("version", None)
2221 test = not (remote_version and
2222 isinstance(remote_version, (list, tuple)) and
2223 len(remote_version) == 2)
2224 _ErrorIf(test, constants.CV_ENODERPC, node,
2225 "connection to node returned invalid data")
2229 test = local_version != remote_version[0]
2230 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2231 "incompatible protocol versions: master %s,"
2232 " node %s", local_version, remote_version[0])
2236 # node seems compatible, we can actually try to look into its results
2238 # full package version
2239 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2240 constants.CV_ENODEVERSION, node,
2241 "software version mismatch: master %s, node %s",
2242 constants.RELEASE_VERSION, remote_version[1],
2243 code=self.ETYPE_WARNING)
2245 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2246 if ninfo.vm_capable and isinstance(hyp_result, dict):
2247 for hv_name, hv_result in hyp_result.iteritems():
2248 test = hv_result is not None
2249 _ErrorIf(test, constants.CV_ENODEHV, node,
2250 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2252 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2253 if ninfo.vm_capable and isinstance(hvp_result, list):
2254 for item, hv_name, hv_result in hvp_result:
2255 _ErrorIf(True, constants.CV_ENODEHV, node,
2256 "hypervisor %s parameter verify failure (source %s): %s",
2257 hv_name, item, hv_result)
2259 test = nresult.get(constants.NV_NODESETUP,
2260 ["Missing NODESETUP results"])
2261 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2266 def _VerifyNodeTime(self, ninfo, nresult,
2267 nvinfo_starttime, nvinfo_endtime):
2268 """Check the node time.
2270 @type ninfo: L{objects.Node}
2271 @param ninfo: the node to check
2272 @param nresult: the remote results for the node
2273 @param nvinfo_starttime: the start time of the RPC call
2274 @param nvinfo_endtime: the end time of the RPC call
2278 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2280 ntime = nresult.get(constants.NV_TIME, None)
2282 ntime_merged = utils.MergeTime(ntime)
2283 except (ValueError, TypeError):
2284 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2287 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2288 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2289 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2290 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2294 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2295 "Node time diverges by at least %s from master node time",
2298 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2299 """Check the node LVM results.
2301 @type ninfo: L{objects.Node}
2302 @param ninfo: the node to check
2303 @param nresult: the remote results for the node
2304 @param vg_name: the configured VG name
2311 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2313 # checks vg existence and size > 20G
2314 vglist = nresult.get(constants.NV_VGLIST, None)
2316 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2318 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2319 constants.MIN_VG_SIZE)
2320 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2323 pvlist = nresult.get(constants.NV_PVLIST, None)
2324 test = pvlist is None
2325 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2327 # check that ':' is not present in PV names, since it's a
2328 # special character for lvcreate (denotes the range of PEs to
2330 for _, pvname, owner_vg in pvlist:
2331 test = ":" in pvname
2332 _ErrorIf(test, constants.CV_ENODELVM, node,
2333 "Invalid character ':' in PV '%s' of VG '%s'",
2336 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2337 """Check the node bridges.
2339 @type ninfo: L{objects.Node}
2340 @param ninfo: the node to check
2341 @param nresult: the remote results for the node
2342 @param bridges: the expected list of bridges
2349 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2351 missing = nresult.get(constants.NV_BRIDGES, None)
2352 test = not isinstance(missing, list)
2353 _ErrorIf(test, constants.CV_ENODENET, node,
2354 "did not return valid bridge information")
2356 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2357 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2359 def _VerifyNodeUserScripts(self, ninfo, nresult):
2360 """Check the results of user scripts presence and executability on the node
2362 @type ninfo: L{objects.Node}
2363 @param ninfo: the node to check
2364 @param nresult: the remote results for the node
2369 test = not constants.NV_USERSCRIPTS in nresult
2370 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2371 "did not return user scripts information")
2373 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2375 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2376 "user scripts not present or not executable: %s" %
2377 utils.CommaJoin(sorted(broken_scripts)))
2379 def _VerifyNodeNetwork(self, ninfo, nresult):
2380 """Check the node network connectivity results.
2382 @type ninfo: L{objects.Node}
2383 @param ninfo: the node to check
2384 @param nresult: the remote results for the node
2388 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2390 test = constants.NV_NODELIST not in nresult
2391 _ErrorIf(test, constants.CV_ENODESSH, node,
2392 "node hasn't returned node ssh connectivity data")
2394 if nresult[constants.NV_NODELIST]:
2395 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2396 _ErrorIf(True, constants.CV_ENODESSH, node,
2397 "ssh communication with node '%s': %s", a_node, a_msg)
2399 test = constants.NV_NODENETTEST not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node tcp connectivity data")
2403 if nresult[constants.NV_NODENETTEST]:
2404 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2406 _ErrorIf(True, constants.CV_ENODENET, node,
2407 "tcp communication with node '%s': %s",
2408 anode, nresult[constants.NV_NODENETTEST][anode])
2410 test = constants.NV_MASTERIP not in nresult
2411 _ErrorIf(test, constants.CV_ENODENET, node,
2412 "node hasn't returned node master IP reachability data")
2414 if not nresult[constants.NV_MASTERIP]:
2415 if node == self.master_node:
2416 msg = "the master node cannot reach the master IP (not configured?)"
2418 msg = "cannot reach the master IP"
2419 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2421 def _VerifyInstance(self, instance, instanceconfig, node_image,
2423 """Verify an instance.
2425 This function checks to see if the required block devices are
2426 available on the instance's node.
2429 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2430 node_current = instanceconfig.primary_node
2432 node_vol_should = {}
2433 instanceconfig.MapLVsByNode(node_vol_should)
2435 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2436 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2437 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2439 for node in node_vol_should:
2440 n_img = node_image[node]
2441 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2442 # ignore missing volumes on offline or broken nodes
2444 for volume in node_vol_should[node]:
2445 test = volume not in n_img.volumes
2446 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2447 "volume %s missing on node %s", volume, node)
2449 if instanceconfig.admin_state == constants.ADMINST_UP:
2450 pri_img = node_image[node_current]
2451 test = instance not in pri_img.instances and not pri_img.offline
2452 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2453 "instance not running on its primary node %s",
2456 diskdata = [(nname, success, status, idx)
2457 for (nname, disks) in diskstatus.items()
2458 for idx, (success, status) in enumerate(disks)]
2460 for nname, success, bdev_status, idx in diskdata:
2461 # the 'ghost node' construction in Exec() ensures that we have a
2463 snode = node_image[nname]
2464 bad_snode = snode.ghost or snode.offline
2465 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2466 not success and not bad_snode,
2467 constants.CV_EINSTANCEFAULTYDISK, instance,
2468 "couldn't retrieve status for disk/%s on %s: %s",
2469 idx, nname, bdev_status)
2470 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2471 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2472 constants.CV_EINSTANCEFAULTYDISK, instance,
2473 "disk/%s on %s is faulty", idx, nname)
2475 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2476 """Verify if there are any unknown volumes in the cluster.
2478 The .os, .swap and backup volumes are ignored. All other volumes are
2479 reported as unknown.
2481 @type reserved: L{ganeti.utils.FieldSet}
2482 @param reserved: a FieldSet of reserved volume names
2485 for node, n_img in node_image.items():
2486 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2487 self.all_node_info[node].group != self.group_uuid):
2488 # skip non-healthy nodes
2490 for volume in n_img.volumes:
2491 test = ((node not in node_vol_should or
2492 volume not in node_vol_should[node]) and
2493 not reserved.Matches(volume))
2494 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2495 "volume %s is unknown", volume)
2497 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2498 """Verify N+1 Memory Resilience.
2500 Check that if one single node dies we can still start all the
2501 instances it was primary for.
2504 cluster_info = self.cfg.GetClusterInfo()
2505 for node, n_img in node_image.items():
2506 # This code checks that every node which is now listed as
2507 # secondary has enough memory to host all instances it is
2508 # supposed to should a single other node in the cluster fail.
2509 # FIXME: not ready for failover to an arbitrary node
2510 # FIXME: does not support file-backed instances
2511 # WARNING: we currently take into account down instances as well
2512 # as up ones, considering that even if they're down someone
2513 # might want to start them even in the event of a node failure.
2514 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2515 # we're skipping nodes marked offline and nodes in other groups from
2516 # the N+1 warning, since most likely we don't have good memory
2517 # infromation from them; we already list instances living on such
2518 # nodes, and that's enough warning
2520 #TODO(dynmem): also consider ballooning out other instances
2521 for prinode, instances in n_img.sbp.items():
2523 for instance in instances:
2524 bep = cluster_info.FillBE(instance_cfg[instance])
2525 if bep[constants.BE_AUTO_BALANCE]:
2526 needed_mem += bep[constants.BE_MINMEM]
2527 test = n_img.mfree < needed_mem
2528 self._ErrorIf(test, constants.CV_ENODEN1, node,
2529 "not enough memory to accomodate instance failovers"
2530 " should node %s fail (%dMiB needed, %dMiB available)",
2531 prinode, needed_mem, n_img.mfree)
2534 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2535 (files_all, files_opt, files_mc, files_vm)):
2536 """Verifies file checksums collected from all nodes.
2538 @param errorif: Callback for reporting errors
2539 @param nodeinfo: List of L{objects.Node} objects
2540 @param master_node: Name of master node
2541 @param all_nvinfo: RPC results
2544 # Define functions determining which nodes to consider for a file
2547 (files_mc, lambda node: (node.master_candidate or
2548 node.name == master_node)),
2549 (files_vm, lambda node: node.vm_capable),
2552 # Build mapping from filename to list of nodes which should have the file
2554 for (files, fn) in files2nodefn:
2556 filenodes = nodeinfo
2558 filenodes = filter(fn, nodeinfo)
2559 nodefiles.update((filename,
2560 frozenset(map(operator.attrgetter("name"), filenodes)))
2561 for filename in files)
2563 assert set(nodefiles) == (files_all | files_mc | files_vm)
2565 fileinfo = dict((filename, {}) for filename in nodefiles)
2566 ignore_nodes = set()
2568 for node in nodeinfo:
2570 ignore_nodes.add(node.name)
2573 nresult = all_nvinfo[node.name]
2575 if nresult.fail_msg or not nresult.payload:
2578 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2580 test = not (node_files and isinstance(node_files, dict))
2581 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2582 "Node did not return file checksum data")
2584 ignore_nodes.add(node.name)
2587 # Build per-checksum mapping from filename to nodes having it
2588 for (filename, checksum) in node_files.items():
2589 assert filename in nodefiles
2590 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2592 for (filename, checksums) in fileinfo.items():
2593 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2595 # Nodes having the file
2596 with_file = frozenset(node_name
2597 for nodes in fileinfo[filename].values()
2598 for node_name in nodes) - ignore_nodes
2600 expected_nodes = nodefiles[filename] - ignore_nodes
2602 # Nodes missing file
2603 missing_file = expected_nodes - with_file
2605 if filename in files_opt:
2607 errorif(missing_file and missing_file != expected_nodes,
2608 constants.CV_ECLUSTERFILECHECK, None,
2609 "File %s is optional, but it must exist on all or no"
2610 " nodes (not found on %s)",
2611 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2613 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2614 "File %s is missing from node(s) %s", filename,
2615 utils.CommaJoin(utils.NiceSort(missing_file)))
2617 # Warn if a node has a file it shouldn't
2618 unexpected = with_file - expected_nodes
2620 constants.CV_ECLUSTERFILECHECK, None,
2621 "File %s should not exist on node(s) %s",
2622 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2624 # See if there are multiple versions of the file
2625 test = len(checksums) > 1
2627 variants = ["variant %s on %s" %
2628 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2629 for (idx, (checksum, nodes)) in
2630 enumerate(sorted(checksums.items()))]
2634 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2635 "File %s found with %s different checksums (%s)",
2636 filename, len(checksums), "; ".join(variants))
2638 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2640 """Verifies and the node DRBD status.
2642 @type ninfo: L{objects.Node}
2643 @param ninfo: the node to check
2644 @param nresult: the remote results for the node
2645 @param instanceinfo: the dict of instances
2646 @param drbd_helper: the configured DRBD usermode helper
2647 @param drbd_map: the DRBD map as returned by
2648 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2652 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2655 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2656 test = (helper_result == None)
2657 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2658 "no drbd usermode helper returned")
2660 status, payload = helper_result
2662 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2663 "drbd usermode helper check unsuccessful: %s", payload)
2664 test = status and (payload != drbd_helper)
2665 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2666 "wrong drbd usermode helper: %s", payload)
2668 # compute the DRBD minors
2670 for minor, instance in drbd_map[node].items():
2671 test = instance not in instanceinfo
2672 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2673 "ghost instance '%s' in temporary DRBD map", instance)
2674 # ghost instance should not be running, but otherwise we
2675 # don't give double warnings (both ghost instance and
2676 # unallocated minor in use)
2678 node_drbd[minor] = (instance, False)
2680 instance = instanceinfo[instance]
2681 node_drbd[minor] = (instance.name,
2682 instance.admin_state == constants.ADMINST_UP)
2684 # and now check them
2685 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2686 test = not isinstance(used_minors, (tuple, list))
2687 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2688 "cannot parse drbd status file: %s", str(used_minors))
2690 # we cannot check drbd status
2693 for minor, (iname, must_exist) in node_drbd.items():
2694 test = minor not in used_minors and must_exist
2695 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2696 "drbd minor %d of instance %s is not active", minor, iname)
2697 for minor in used_minors:
2698 test = minor not in node_drbd
2699 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2700 "unallocated drbd minor %d is in use", minor)
2702 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2703 """Builds the node OS structures.
2705 @type ninfo: L{objects.Node}
2706 @param ninfo: the node to check
2707 @param nresult: the remote results for the node
2708 @param nimg: the node image object
2712 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2714 remote_os = nresult.get(constants.NV_OSLIST, None)
2715 test = (not isinstance(remote_os, list) or
2716 not compat.all(isinstance(v, list) and len(v) == 7
2717 for v in remote_os))
2719 _ErrorIf(test, constants.CV_ENODEOS, node,
2720 "node hasn't returned valid OS data")
2729 for (name, os_path, status, diagnose,
2730 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2732 if name not in os_dict:
2735 # parameters is a list of lists instead of list of tuples due to
2736 # JSON lacking a real tuple type, fix it:
2737 parameters = [tuple(v) for v in parameters]
2738 os_dict[name].append((os_path, status, diagnose,
2739 set(variants), set(parameters), set(api_ver)))
2741 nimg.oslist = os_dict
2743 def _VerifyNodeOS(self, ninfo, nimg, base):
2744 """Verifies the node OS list.
2746 @type ninfo: L{objects.Node}
2747 @param ninfo: the node to check
2748 @param nimg: the node image object
2749 @param base: the 'template' node we match against (e.g. from the master)
2753 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2755 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2757 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2758 for os_name, os_data in nimg.oslist.items():
2759 assert os_data, "Empty OS status for OS %s?!" % os_name
2760 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2761 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2762 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2763 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2764 "OS '%s' has multiple entries (first one shadows the rest): %s",
2765 os_name, utils.CommaJoin([v[0] for v in os_data]))
2766 # comparisons with the 'base' image
2767 test = os_name not in base.oslist
2768 _ErrorIf(test, constants.CV_ENODEOS, node,
2769 "Extra OS %s not present on reference node (%s)",
2773 assert base.oslist[os_name], "Base node has empty OS status?"
2774 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2776 # base OS is invalid, skipping
2778 for kind, a, b in [("API version", f_api, b_api),
2779 ("variants list", f_var, b_var),
2780 ("parameters", beautify_params(f_param),
2781 beautify_params(b_param))]:
2782 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2783 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2784 kind, os_name, base.name,
2785 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2787 # check any missing OSes
2788 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2789 _ErrorIf(missing, constants.CV_ENODEOS, node,
2790 "OSes present on reference node %s but missing on this node: %s",
2791 base.name, utils.CommaJoin(missing))
2793 def _VerifyOob(self, ninfo, nresult):
2794 """Verifies out of band functionality of a node.
2796 @type ninfo: L{objects.Node}
2797 @param ninfo: the node to check
2798 @param nresult: the remote results for the node
2802 # We just have to verify the paths on master and/or master candidates
2803 # as the oob helper is invoked on the master
2804 if ((ninfo.master_candidate or ninfo.master_capable) and
2805 constants.NV_OOB_PATHS in nresult):
2806 for path_result in nresult[constants.NV_OOB_PATHS]:
2807 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2809 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2810 """Verifies and updates the node volume data.
2812 This function will update a L{NodeImage}'s internal structures
2813 with data from the remote call.
2815 @type ninfo: L{objects.Node}
2816 @param ninfo: the node to check
2817 @param nresult: the remote results for the node
2818 @param nimg: the node image object
2819 @param vg_name: the configured VG name
2823 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2825 nimg.lvm_fail = True
2826 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2829 elif isinstance(lvdata, basestring):
2830 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2831 utils.SafeEncode(lvdata))
2832 elif not isinstance(lvdata, dict):
2833 _ErrorIf(True, constants.CV_ENODELVM, node,
2834 "rpc call to node failed (lvlist)")
2836 nimg.volumes = lvdata
2837 nimg.lvm_fail = False
2839 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2840 """Verifies and updates the node instance list.
2842 If the listing was successful, then updates this node's instance
2843 list. Otherwise, it marks the RPC call as failed for the instance
2846 @type ninfo: L{objects.Node}
2847 @param ninfo: the node to check
2848 @param nresult: the remote results for the node
2849 @param nimg: the node image object
2852 idata = nresult.get(constants.NV_INSTANCELIST, None)
2853 test = not isinstance(idata, list)
2854 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2855 "rpc call to node failed (instancelist): %s",
2856 utils.SafeEncode(str(idata)))
2858 nimg.hyp_fail = True
2860 nimg.instances = idata
2862 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2863 """Verifies and computes a node information map
2865 @type ninfo: L{objects.Node}
2866 @param ninfo: the node to check
2867 @param nresult: the remote results for the node
2868 @param nimg: the node image object
2869 @param vg_name: the configured VG name
2873 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2875 # try to read free memory (from the hypervisor)
2876 hv_info = nresult.get(constants.NV_HVINFO, None)
2877 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2878 _ErrorIf(test, constants.CV_ENODEHV, node,
2879 "rpc call to node failed (hvinfo)")
2882 nimg.mfree = int(hv_info["memory_free"])
2883 except (ValueError, TypeError):
2884 _ErrorIf(True, constants.CV_ENODERPC, node,
2885 "node returned invalid nodeinfo, check hypervisor")
2887 # FIXME: devise a free space model for file based instances as well
2888 if vg_name is not None:
2889 test = (constants.NV_VGLIST not in nresult or
2890 vg_name not in nresult[constants.NV_VGLIST])
2891 _ErrorIf(test, constants.CV_ENODELVM, node,
2892 "node didn't return data for the volume group '%s'"
2893 " - it is either missing or broken", vg_name)
2896 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2897 except (ValueError, TypeError):
2898 _ErrorIf(True, constants.CV_ENODERPC, node,
2899 "node returned invalid LVM info, check LVM status")
2901 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2902 """Gets per-disk status information for all instances.
2904 @type nodelist: list of strings
2905 @param nodelist: Node names
2906 @type node_image: dict of (name, L{objects.Node})
2907 @param node_image: Node objects
2908 @type instanceinfo: dict of (name, L{objects.Instance})
2909 @param instanceinfo: Instance objects
2910 @rtype: {instance: {node: [(succes, payload)]}}
2911 @return: a dictionary of per-instance dictionaries with nodes as
2912 keys and disk information as values; the disk information is a
2913 list of tuples (success, payload)
2916 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2919 node_disks_devonly = {}
2920 diskless_instances = set()
2921 diskless = constants.DT_DISKLESS
2923 for nname in nodelist:
2924 node_instances = list(itertools.chain(node_image[nname].pinst,
2925 node_image[nname].sinst))
2926 diskless_instances.update(inst for inst in node_instances
2927 if instanceinfo[inst].disk_template == diskless)
2928 disks = [(inst, disk)
2929 for inst in node_instances
2930 for disk in instanceinfo[inst].disks]
2933 # No need to collect data
2936 node_disks[nname] = disks
2938 # _AnnotateDiskParams makes already copies of the disks
2940 for (inst, dev) in disks:
2941 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2942 self.cfg.SetDiskID(anno_disk, nname)
2943 devonly.append(anno_disk)
2945 node_disks_devonly[nname] = devonly
2947 assert len(node_disks) == len(node_disks_devonly)
2949 # Collect data from all nodes with disks
2950 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2953 assert len(result) == len(node_disks)
2957 for (nname, nres) in result.items():
2958 disks = node_disks[nname]
2961 # No data from this node
2962 data = len(disks) * [(False, "node offline")]
2965 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2966 "while getting disk information: %s", msg)
2968 # No data from this node
2969 data = len(disks) * [(False, msg)]
2972 for idx, i in enumerate(nres.payload):
2973 if isinstance(i, (tuple, list)) and len(i) == 2:
2976 logging.warning("Invalid result from node %s, entry %d: %s",
2978 data.append((False, "Invalid result from the remote node"))
2980 for ((inst, _), status) in zip(disks, data):
2981 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2983 # Add empty entries for diskless instances.
2984 for inst in diskless_instances:
2985 assert inst not in instdisk
2988 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2989 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2990 compat.all(isinstance(s, (tuple, list)) and
2991 len(s) == 2 for s in statuses)
2992 for inst, nnames in instdisk.items()
2993 for nname, statuses in nnames.items())
2994 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2999 def _SshNodeSelector(group_uuid, all_nodes):
3000 """Create endless iterators for all potential SSH check hosts.
3003 nodes = [node for node in all_nodes
3004 if (node.group != group_uuid and
3006 keyfunc = operator.attrgetter("group")
3008 return map(itertools.cycle,
3009 [sorted(map(operator.attrgetter("name"), names))
3010 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3014 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3015 """Choose which nodes should talk to which other nodes.
3017 We will make nodes contact all nodes in their group, and one node from
3020 @warning: This algorithm has a known issue if one node group is much
3021 smaller than others (e.g. just one node). In such a case all other
3022 nodes will talk to the single node.
3025 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3026 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3028 return (online_nodes,
3029 dict((name, sorted([i.next() for i in sel]))
3030 for name in online_nodes))
3032 def BuildHooksEnv(self):
3035 Cluster-Verify hooks just ran in the post phase and their failure makes
3036 the output be logged in the verify output and the verification to fail.
3040 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3043 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3044 for node in self.my_node_info.values())
3048 def BuildHooksNodes(self):
3049 """Build hooks nodes.
3052 return ([], self.my_node_names)
3054 def Exec(self, feedback_fn):
3055 """Verify integrity of the node group, performing various test on nodes.
3058 # This method has too many local variables. pylint: disable=R0914
3059 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3061 if not self.my_node_names:
3063 feedback_fn("* Empty node group, skipping verification")
3067 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3068 verbose = self.op.verbose
3069 self._feedback_fn = feedback_fn
3071 vg_name = self.cfg.GetVGName()
3072 drbd_helper = self.cfg.GetDRBDHelper()
3073 cluster = self.cfg.GetClusterInfo()
3074 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3075 hypervisors = cluster.enabled_hypervisors
3076 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3078 i_non_redundant = [] # Non redundant instances
3079 i_non_a_balanced = [] # Non auto-balanced instances
3080 i_offline = 0 # Count of offline instances
3081 n_offline = 0 # Count of offline nodes
3082 n_drained = 0 # Count of nodes being drained
3083 node_vol_should = {}
3085 # FIXME: verify OS list
3088 filemap = _ComputeAncillaryFiles(cluster, False)
3090 # do local checksums
3091 master_node = self.master_node = self.cfg.GetMasterNode()
3092 master_ip = self.cfg.GetMasterIP()
3094 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3097 if self.cfg.GetUseExternalMipScript():
3098 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3100 node_verify_param = {
3101 constants.NV_FILELIST:
3102 utils.UniqueSequence(filename
3103 for files in filemap
3104 for filename in files),
3105 constants.NV_NODELIST:
3106 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3107 self.all_node_info.values()),
3108 constants.NV_HYPERVISOR: hypervisors,
3109 constants.NV_HVPARAMS:
3110 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3111 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3112 for node in node_data_list
3113 if not node.offline],
3114 constants.NV_INSTANCELIST: hypervisors,
3115 constants.NV_VERSION: None,
3116 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3117 constants.NV_NODESETUP: None,
3118 constants.NV_TIME: None,
3119 constants.NV_MASTERIP: (master_node, master_ip),
3120 constants.NV_OSLIST: None,
3121 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3122 constants.NV_USERSCRIPTS: user_scripts,
3125 if vg_name is not None:
3126 node_verify_param[constants.NV_VGLIST] = None
3127 node_verify_param[constants.NV_LVLIST] = vg_name
3128 node_verify_param[constants.NV_PVLIST] = [vg_name]
3129 node_verify_param[constants.NV_DRBDLIST] = None
3132 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3135 # FIXME: this needs to be changed per node-group, not cluster-wide
3137 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3138 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3139 bridges.add(default_nicpp[constants.NIC_LINK])
3140 for instance in self.my_inst_info.values():
3141 for nic in instance.nics:
3142 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3143 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3144 bridges.add(full_nic[constants.NIC_LINK])
3147 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3149 # Build our expected cluster state
3150 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3152 vm_capable=node.vm_capable))
3153 for node in node_data_list)
3157 for node in self.all_node_info.values():
3158 path = _SupportsOob(self.cfg, node)
3159 if path and path not in oob_paths:
3160 oob_paths.append(path)
3163 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3165 for instance in self.my_inst_names:
3166 inst_config = self.my_inst_info[instance]
3167 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3170 for nname in inst_config.all_nodes:
3171 if nname not in node_image:
3172 gnode = self.NodeImage(name=nname)
3173 gnode.ghost = (nname not in self.all_node_info)
3174 node_image[nname] = gnode
3176 inst_config.MapLVsByNode(node_vol_should)
3178 pnode = inst_config.primary_node
3179 node_image[pnode].pinst.append(instance)
3181 for snode in inst_config.secondary_nodes:
3182 nimg = node_image[snode]
3183 nimg.sinst.append(instance)
3184 if pnode not in nimg.sbp:
3185 nimg.sbp[pnode] = []
3186 nimg.sbp[pnode].append(instance)
3188 # At this point, we have the in-memory data structures complete,
3189 # except for the runtime information, which we'll gather next
3191 # Due to the way our RPC system works, exact response times cannot be
3192 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3193 # time before and after executing the request, we can at least have a time
3195 nvinfo_starttime = time.time()
3196 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3198 self.cfg.GetClusterName())
3199 nvinfo_endtime = time.time()
3201 if self.extra_lv_nodes and vg_name is not None:
3203 self.rpc.call_node_verify(self.extra_lv_nodes,
3204 {constants.NV_LVLIST: vg_name},
3205 self.cfg.GetClusterName())
3207 extra_lv_nvinfo = {}
3209 all_drbd_map = self.cfg.ComputeDRBDMap()
3211 feedback_fn("* Gathering disk information (%s nodes)" %
3212 len(self.my_node_names))
3213 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3216 feedback_fn("* Verifying configuration file consistency")
3218 # If not all nodes are being checked, we need to make sure the master node
3219 # and a non-checked vm_capable node are in the list.
3220 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3222 vf_nvinfo = all_nvinfo.copy()
3223 vf_node_info = list(self.my_node_info.values())
3224 additional_nodes = []
3225 if master_node not in self.my_node_info:
3226 additional_nodes.append(master_node)
3227 vf_node_info.append(self.all_node_info[master_node])
3228 # Add the first vm_capable node we find which is not included,
3229 # excluding the master node (which we already have)
3230 for node in absent_nodes:
3231 nodeinfo = self.all_node_info[node]
3232 if (nodeinfo.vm_capable and not nodeinfo.offline and
3233 node != master_node):
3234 additional_nodes.append(node)
3235 vf_node_info.append(self.all_node_info[node])
3237 key = constants.NV_FILELIST
3238 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3239 {key: node_verify_param[key]},
3240 self.cfg.GetClusterName()))
3242 vf_nvinfo = all_nvinfo
3243 vf_node_info = self.my_node_info.values()
3245 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3247 feedback_fn("* Verifying node status")
3251 for node_i in node_data_list:
3253 nimg = node_image[node]
3257 feedback_fn("* Skipping offline node %s" % (node,))
3261 if node == master_node:
3263 elif node_i.master_candidate:
3264 ntype = "master candidate"
3265 elif node_i.drained:
3271 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3273 msg = all_nvinfo[node].fail_msg
3274 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3277 nimg.rpc_fail = True
3280 nresult = all_nvinfo[node].payload
3282 nimg.call_ok = self._VerifyNode(node_i, nresult)
3283 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3284 self._VerifyNodeNetwork(node_i, nresult)
3285 self._VerifyNodeUserScripts(node_i, nresult)
3286 self._VerifyOob(node_i, nresult)
3289 self._VerifyNodeLVM(node_i, nresult, vg_name)
3290 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3293 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3294 self._UpdateNodeInstances(node_i, nresult, nimg)
3295 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3296 self._UpdateNodeOS(node_i, nresult, nimg)
3298 if not nimg.os_fail:
3299 if refos_img is None:
3301 self._VerifyNodeOS(node_i, nimg, refos_img)
3302 self._VerifyNodeBridges(node_i, nresult, bridges)
3304 # Check whether all running instancies are primary for the node. (This
3305 # can no longer be done from _VerifyInstance below, since some of the
3306 # wrong instances could be from other node groups.)
3307 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3309 for inst in non_primary_inst:
3310 test = inst in self.all_inst_info
3311 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3312 "instance should not run on node %s", node_i.name)
3313 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3314 "node is running unknown instance %s", inst)
3316 for node, result in extra_lv_nvinfo.items():
3317 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3318 node_image[node], vg_name)
3320 feedback_fn("* Verifying instance status")
3321 for instance in self.my_inst_names:
3323 feedback_fn("* Verifying instance %s" % instance)
3324 inst_config = self.my_inst_info[instance]
3325 self._VerifyInstance(instance, inst_config, node_image,
3327 inst_nodes_offline = []
3329 pnode = inst_config.primary_node
3330 pnode_img = node_image[pnode]
3331 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3332 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3333 " primary node failed", instance)
3335 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3337 constants.CV_EINSTANCEBADNODE, instance,
3338 "instance is marked as running and lives on offline node %s",
3339 inst_config.primary_node)
3341 # If the instance is non-redundant we cannot survive losing its primary
3342 # node, so we are not N+1 compliant. On the other hand we have no disk
3343 # templates with more than one secondary so that situation is not well
3345 # FIXME: does not support file-backed instances
3346 if not inst_config.secondary_nodes:
3347 i_non_redundant.append(instance)
3349 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3350 constants.CV_EINSTANCELAYOUT,
3351 instance, "instance has multiple secondary nodes: %s",
3352 utils.CommaJoin(inst_config.secondary_nodes),
3353 code=self.ETYPE_WARNING)
3355 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3356 pnode = inst_config.primary_node
3357 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3358 instance_groups = {}
3360 for node in instance_nodes:
3361 instance_groups.setdefault(self.all_node_info[node].group,
3365 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3366 # Sort so that we always list the primary node first.
3367 for group, nodes in sorted(instance_groups.items(),
3368 key=lambda (_, nodes): pnode in nodes,
3371 self._ErrorIf(len(instance_groups) > 1,
3372 constants.CV_EINSTANCESPLITGROUPS,
3373 instance, "instance has primary and secondary nodes in"
3374 " different groups: %s", utils.CommaJoin(pretty_list),
3375 code=self.ETYPE_WARNING)
3377 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3378 i_non_a_balanced.append(instance)
3380 for snode in inst_config.secondary_nodes:
3381 s_img = node_image[snode]
3382 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3383 snode, "instance %s, connection to secondary node failed",
3387 inst_nodes_offline.append(snode)
3389 # warn that the instance lives on offline nodes
3390 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3391 "instance has offline secondary node(s) %s",
3392 utils.CommaJoin(inst_nodes_offline))
3393 # ... or ghost/non-vm_capable nodes
3394 for node in inst_config.all_nodes:
3395 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3396 instance, "instance lives on ghost node %s", node)
3397 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3398 instance, "instance lives on non-vm_capable node %s", node)
3400 feedback_fn("* Verifying orphan volumes")
3401 reserved = utils.FieldSet(*cluster.reserved_lvs)
3403 # We will get spurious "unknown volume" warnings if any node of this group
3404 # is secondary for an instance whose primary is in another group. To avoid
3405 # them, we find these instances and add their volumes to node_vol_should.
3406 for inst in self.all_inst_info.values():
3407 for secondary in inst.secondary_nodes:
3408 if (secondary in self.my_node_info
3409 and inst.name not in self.my_inst_info):
3410 inst.MapLVsByNode(node_vol_should)
3413 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3415 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3416 feedback_fn("* Verifying N+1 Memory redundancy")
3417 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3419 feedback_fn("* Other Notes")
3421 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3422 % len(i_non_redundant))
3424 if i_non_a_balanced:
3425 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3426 % len(i_non_a_balanced))
3429 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3432 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3435 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3439 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3440 """Analyze the post-hooks' result
3442 This method analyses the hook result, handles it, and sends some
3443 nicely-formatted feedback back to the user.
3445 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3446 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3447 @param hooks_results: the results of the multi-node hooks rpc call
3448 @param feedback_fn: function used send feedback back to the caller
3449 @param lu_result: previous Exec result
3450 @return: the new Exec result, based on the previous result
3454 # We only really run POST phase hooks, only for non-empty groups,
3455 # and are only interested in their results
3456 if not self.my_node_names:
3459 elif phase == constants.HOOKS_PHASE_POST:
3460 # Used to change hooks' output to proper indentation
3461 feedback_fn("* Hooks Results")
3462 assert hooks_results, "invalid result from hooks"
3464 for node_name in hooks_results:
3465 res = hooks_results[node_name]
3467 test = msg and not res.offline
3468 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3469 "Communication failure in hooks execution: %s", msg)
3470 if res.offline or msg:
3471 # No need to investigate payload if node is offline or gave
3474 for script, hkr, output in res.payload:
3475 test = hkr == constants.HKR_FAIL
3476 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3477 "Script %s failed, output:", script)
3479 output = self._HOOKS_INDENT_RE.sub(" ", output)
3480 feedback_fn("%s" % output)
3486 class LUClusterVerifyDisks(NoHooksLU):
3487 """Verifies the cluster disks status.
3492 def ExpandNames(self):
3493 self.share_locks = _ShareAll()
3494 self.needed_locks = {
3495 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3498 def Exec(self, feedback_fn):
3499 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3501 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3502 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3503 for group in group_names])
3506 class LUGroupVerifyDisks(NoHooksLU):
3507 """Verifies the status of all disks in a node group.
3512 def ExpandNames(self):
3513 # Raises errors.OpPrereqError on its own if group can't be found
3514 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3516 self.share_locks = _ShareAll()
3517 self.needed_locks = {
3518 locking.LEVEL_INSTANCE: [],
3519 locking.LEVEL_NODEGROUP: [],
3520 locking.LEVEL_NODE: [],
3523 def DeclareLocks(self, level):
3524 if level == locking.LEVEL_INSTANCE:
3525 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3527 # Lock instances optimistically, needs verification once node and group
3528 # locks have been acquired
3529 self.needed_locks[locking.LEVEL_INSTANCE] = \
3530 self.cfg.GetNodeGroupInstances(self.group_uuid)
3532 elif level == locking.LEVEL_NODEGROUP:
3533 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3535 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3536 set([self.group_uuid] +
3537 # Lock all groups used by instances optimistically; this requires
3538 # going via the node before it's locked, requiring verification
3541 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3542 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3544 elif level == locking.LEVEL_NODE:
3545 # This will only lock the nodes in the group to be verified which contain
3547 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3548 self._LockInstancesNodes()
3550 # Lock all nodes in group to be verified
3551 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3552 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3553 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3555 def CheckPrereq(self):
3556 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3557 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3558 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3560 assert self.group_uuid in owned_groups
3562 # Check if locked instances are still correct
3563 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3565 # Get instance information
3566 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3568 # Check if node groups for locked instances are still correct
3569 _CheckInstancesNodeGroups(self.cfg, self.instances,
3570 owned_groups, owned_nodes, self.group_uuid)
3572 def Exec(self, feedback_fn):
3573 """Verify integrity of cluster disks.
3575 @rtype: tuple of three items
3576 @return: a tuple of (dict of node-to-node_error, list of instances
3577 which need activate-disks, dict of instance: (node, volume) for
3582 res_instances = set()
3585 nv_dict = _MapInstanceDisksToNodes([inst
3586 for inst in self.instances.values()
3587 if inst.admin_state == constants.ADMINST_UP])
3590 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3591 set(self.cfg.GetVmCapableNodeList()))
3593 node_lvs = self.rpc.call_lv_list(nodes, [])
3595 for (node, node_res) in node_lvs.items():
3596 if node_res.offline:
3599 msg = node_res.fail_msg
3601 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3602 res_nodes[node] = msg
3605 for lv_name, (_, _, lv_online) in node_res.payload.items():
3606 inst = nv_dict.pop((node, lv_name), None)
3607 if not (lv_online or inst is None):
3608 res_instances.add(inst)
3610 # any leftover items in nv_dict are missing LVs, let's arrange the data
3612 for key, inst in nv_dict.iteritems():
3613 res_missing.setdefault(inst, []).append(list(key))
3615 return (res_nodes, list(res_instances), res_missing)
3618 class LUClusterRepairDiskSizes(NoHooksLU):
3619 """Verifies the cluster disks sizes.
3624 def ExpandNames(self):
3625 if self.op.instances:
3626 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3627 self.needed_locks = {
3628 locking.LEVEL_NODE_RES: [],
3629 locking.LEVEL_INSTANCE: self.wanted_names,
3631 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3633 self.wanted_names = None
3634 self.needed_locks = {
3635 locking.LEVEL_NODE_RES: locking.ALL_SET,
3636 locking.LEVEL_INSTANCE: locking.ALL_SET,
3638 self.share_locks = {
3639 locking.LEVEL_NODE_RES: 1,
3640 locking.LEVEL_INSTANCE: 0,
3643 def DeclareLocks(self, level):
3644 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3645 self._LockInstancesNodes(primary_only=True, level=level)
3647 def CheckPrereq(self):
3648 """Check prerequisites.
3650 This only checks the optional instance list against the existing names.
3653 if self.wanted_names is None:
3654 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3656 self.wanted_instances = \
3657 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3659 def _EnsureChildSizes(self, disk):
3660 """Ensure children of the disk have the needed disk size.
3662 This is valid mainly for DRBD8 and fixes an issue where the
3663 children have smaller disk size.
3665 @param disk: an L{ganeti.objects.Disk} object
3668 if disk.dev_type == constants.LD_DRBD8:
3669 assert disk.children, "Empty children for DRBD8?"
3670 fchild = disk.children[0]
3671 mismatch = fchild.size < disk.size
3673 self.LogInfo("Child disk has size %d, parent %d, fixing",
3674 fchild.size, disk.size)
3675 fchild.size = disk.size
3677 # and we recurse on this child only, not on the metadev
3678 return self._EnsureChildSizes(fchild) or mismatch
3682 def Exec(self, feedback_fn):
3683 """Verify the size of cluster disks.
3686 # TODO: check child disks too
3687 # TODO: check differences in size between primary/secondary nodes
3689 for instance in self.wanted_instances:
3690 pnode = instance.primary_node
3691 if pnode not in per_node_disks:
3692 per_node_disks[pnode] = []
3693 for idx, disk in enumerate(instance.disks):
3694 per_node_disks[pnode].append((instance, idx, disk))
3696 assert not (frozenset(per_node_disks.keys()) -
3697 self.owned_locks(locking.LEVEL_NODE_RES)), \
3698 "Not owning correct locks"
3699 assert not self.owned_locks(locking.LEVEL_NODE)
3702 for node, dskl in per_node_disks.items():
3703 newl = [v[2].Copy() for v in dskl]
3705 self.cfg.SetDiskID(dsk, node)
3706 result = self.rpc.call_blockdev_getsize(node, newl)
3708 self.LogWarning("Failure in blockdev_getsize call to node"
3709 " %s, ignoring", node)
3711 if len(result.payload) != len(dskl):
3712 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3713 " result.payload=%s", node, len(dskl), result.payload)
3714 self.LogWarning("Invalid result from node %s, ignoring node results",
3717 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3719 self.LogWarning("Disk %d of instance %s did not return size"
3720 " information, ignoring", idx, instance.name)
3722 if not isinstance(size, (int, long)):
3723 self.LogWarning("Disk %d of instance %s did not return valid"
3724 " size information, ignoring", idx, instance.name)
3727 if size != disk.size:
3728 self.LogInfo("Disk %d of instance %s has mismatched size,"
3729 " correcting: recorded %d, actual %d", idx,
3730 instance.name, disk.size, size)
3732 self.cfg.Update(instance, feedback_fn)
3733 changed.append((instance.name, idx, size))
3734 if self._EnsureChildSizes(disk):
3735 self.cfg.Update(instance, feedback_fn)
3736 changed.append((instance.name, idx, disk.size))
3740 class LUClusterRename(LogicalUnit):
3741 """Rename the cluster.
3744 HPATH = "cluster-rename"
3745 HTYPE = constants.HTYPE_CLUSTER
3747 def BuildHooksEnv(self):
3752 "OP_TARGET": self.cfg.GetClusterName(),
3753 "NEW_NAME": self.op.name,
3756 def BuildHooksNodes(self):
3757 """Build hooks nodes.
3760 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3762 def CheckPrereq(self):
3763 """Verify that the passed name is a valid one.
3766 hostname = netutils.GetHostname(name=self.op.name,
3767 family=self.cfg.GetPrimaryIPFamily())
3769 new_name = hostname.name
3770 self.ip = new_ip = hostname.ip
3771 old_name = self.cfg.GetClusterName()
3772 old_ip = self.cfg.GetMasterIP()
3773 if new_name == old_name and new_ip == old_ip:
3774 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3775 " cluster has changed",
3777 if new_ip != old_ip:
3778 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3779 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3780 " reachable on the network" %
3781 new_ip, errors.ECODE_NOTUNIQUE)
3783 self.op.name = new_name
3785 def Exec(self, feedback_fn):
3786 """Rename the cluster.
3789 clustername = self.op.name
3792 # shutdown the master IP
3793 master_params = self.cfg.GetMasterNetworkParameters()
3794 ems = self.cfg.GetUseExternalMipScript()
3795 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3797 result.Raise("Could not disable the master role")
3800 cluster = self.cfg.GetClusterInfo()
3801 cluster.cluster_name = clustername
3802 cluster.master_ip = new_ip
3803 self.cfg.Update(cluster, feedback_fn)
3805 # update the known hosts file
3806 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3807 node_list = self.cfg.GetOnlineNodeList()
3809 node_list.remove(master_params.name)
3812 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3814 master_params.ip = new_ip
3815 result = self.rpc.call_node_activate_master_ip(master_params.name,
3817 msg = result.fail_msg
3819 self.LogWarning("Could not re-enable the master role on"
3820 " the master, please restart manually: %s", msg)
3825 def _ValidateNetmask(cfg, netmask):
3826 """Checks if a netmask is valid.
3828 @type cfg: L{config.ConfigWriter}
3829 @param cfg: The cluster configuration
3831 @param netmask: the netmask to be verified
3832 @raise errors.OpPrereqError: if the validation fails
3835 ip_family = cfg.GetPrimaryIPFamily()
3837 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3838 except errors.ProgrammerError:
3839 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3841 if not ipcls.ValidateNetmask(netmask):
3842 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3846 class LUClusterSetParams(LogicalUnit):
3847 """Change the parameters of the cluster.
3850 HPATH = "cluster-modify"
3851 HTYPE = constants.HTYPE_CLUSTER
3854 def CheckArguments(self):
3858 if self.op.uid_pool:
3859 uidpool.CheckUidPool(self.op.uid_pool)
3861 if self.op.add_uids:
3862 uidpool.CheckUidPool(self.op.add_uids)
3864 if self.op.remove_uids:
3865 uidpool.CheckUidPool(self.op.remove_uids)
3867 if self.op.master_netmask is not None:
3868 _ValidateNetmask(self.cfg, self.op.master_netmask)
3870 if self.op.diskparams:
3871 for dt_params in self.op.diskparams.values():
3872 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3874 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3875 except errors.OpPrereqError, err:
3876 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3879 def ExpandNames(self):
3880 # FIXME: in the future maybe other cluster params won't require checking on
3881 # all nodes to be modified.
3882 self.needed_locks = {
3883 locking.LEVEL_NODE: locking.ALL_SET,
3884 locking.LEVEL_INSTANCE: locking.ALL_SET,
3885 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3887 self.share_locks = {
3888 locking.LEVEL_NODE: 1,
3889 locking.LEVEL_INSTANCE: 1,
3890 locking.LEVEL_NODEGROUP: 1,
3893 def BuildHooksEnv(self):
3898 "OP_TARGET": self.cfg.GetClusterName(),
3899 "NEW_VG_NAME": self.op.vg_name,
3902 def BuildHooksNodes(self):
3903 """Build hooks nodes.
3906 mn = self.cfg.GetMasterNode()
3909 def CheckPrereq(self):
3910 """Check prerequisites.
3912 This checks whether the given params don't conflict and
3913 if the given volume group is valid.
3916 if self.op.vg_name is not None and not self.op.vg_name:
3917 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3918 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3919 " instances exist", errors.ECODE_INVAL)
3921 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3922 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3923 raise errors.OpPrereqError("Cannot disable drbd helper while"
3924 " drbd-based instances exist",
3927 node_list = self.owned_locks(locking.LEVEL_NODE)
3929 # if vg_name not None, checks given volume group on all nodes
3931 vglist = self.rpc.call_vg_list(node_list)
3932 for node in node_list:
3933 msg = vglist[node].fail_msg
3935 # ignoring down node
3936 self.LogWarning("Error while gathering data on node %s"
3937 " (ignoring node): %s", node, msg)
3939 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3941 constants.MIN_VG_SIZE)
3943 raise errors.OpPrereqError("Error on node '%s': %s" %
3944 (node, vgstatus), errors.ECODE_ENVIRON)
3946 if self.op.drbd_helper:
3947 # checks given drbd helper on all nodes
3948 helpers = self.rpc.call_drbd_helper(node_list)
3949 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3951 self.LogInfo("Not checking drbd helper on offline node %s", node)
3953 msg = helpers[node].fail_msg
3955 raise errors.OpPrereqError("Error checking drbd helper on node"
3956 " '%s': %s" % (node, msg),
3957 errors.ECODE_ENVIRON)
3958 node_helper = helpers[node].payload
3959 if node_helper != self.op.drbd_helper:
3960 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3961 (node, node_helper), errors.ECODE_ENVIRON)
3963 self.cluster = cluster = self.cfg.GetClusterInfo()
3964 # validate params changes
3965 if self.op.beparams:
3966 objects.UpgradeBeParams(self.op.beparams)
3967 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3968 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3970 if self.op.ndparams:
3971 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3972 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3974 # TODO: we need a more general way to handle resetting
3975 # cluster-level parameters to default values
3976 if self.new_ndparams["oob_program"] == "":
3977 self.new_ndparams["oob_program"] = \
3978 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3980 if self.op.hv_state:
3981 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3982 self.cluster.hv_state_static)
3983 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3984 for hv, values in new_hv_state.items())
3986 if self.op.disk_state:
3987 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3988 self.cluster.disk_state_static)
3989 self.new_disk_state = \
3990 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3991 for name, values in svalues.items()))
3992 for storage, svalues in new_disk_state.items())
3995 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3998 all_instances = self.cfg.GetAllInstancesInfo().values()
4000 for group in self.cfg.GetAllNodeGroupsInfo().values():
4001 instances = frozenset([inst for inst in all_instances
4002 if compat.any(node in group.members
4003 for node in inst.all_nodes)])
4004 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4005 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4007 new_ipolicy, instances)
4009 violations.update(new)
4012 self.LogWarning("After the ipolicy change the following instances"
4013 " violate them: %s",
4014 utils.CommaJoin(utils.NiceSort(violations)))
4016 if self.op.nicparams:
4017 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4018 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4019 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4022 # check all instances for consistency
4023 for instance in self.cfg.GetAllInstancesInfo().values():
4024 for nic_idx, nic in enumerate(instance.nics):
4025 params_copy = copy.deepcopy(nic.nicparams)
4026 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4028 # check parameter syntax
4030 objects.NIC.CheckParameterSyntax(params_filled)
4031 except errors.ConfigurationError, err:
4032 nic_errors.append("Instance %s, nic/%d: %s" %
4033 (instance.name, nic_idx, err))
4035 # if we're moving instances to routed, check that they have an ip
4036 target_mode = params_filled[constants.NIC_MODE]
4037 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4038 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4039 " address" % (instance.name, nic_idx))
4041 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4042 "\n".join(nic_errors))
4044 # hypervisor list/parameters
4045 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4046 if self.op.hvparams:
4047 for hv_name, hv_dict in self.op.hvparams.items():
4048 if hv_name not in self.new_hvparams:
4049 self.new_hvparams[hv_name] = hv_dict
4051 self.new_hvparams[hv_name].update(hv_dict)
4053 # disk template parameters
4054 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4055 if self.op.diskparams:
4056 for dt_name, dt_params in self.op.diskparams.items():
4057 if dt_name not in self.op.diskparams:
4058 self.new_diskparams[dt_name] = dt_params
4060 self.new_diskparams[dt_name].update(dt_params)
4062 # os hypervisor parameters
4063 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4065 for os_name, hvs in self.op.os_hvp.items():
4066 if os_name not in self.new_os_hvp:
4067 self.new_os_hvp[os_name] = hvs
4069 for hv_name, hv_dict in hvs.items():
4070 if hv_name not in self.new_os_hvp[os_name]:
4071 self.new_os_hvp[os_name][hv_name] = hv_dict
4073 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4076 self.new_osp = objects.FillDict(cluster.osparams, {})
4077 if self.op.osparams:
4078 for os_name, osp in self.op.osparams.items():
4079 if os_name not in self.new_osp:
4080 self.new_osp[os_name] = {}
4082 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4085 if not self.new_osp[os_name]:
4086 # we removed all parameters
4087 del self.new_osp[os_name]
4089 # check the parameter validity (remote check)
4090 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4091 os_name, self.new_osp[os_name])
4093 # changes to the hypervisor list
4094 if self.op.enabled_hypervisors is not None:
4095 self.hv_list = self.op.enabled_hypervisors
4096 for hv in self.hv_list:
4097 # if the hypervisor doesn't already exist in the cluster
4098 # hvparams, we initialize it to empty, and then (in both
4099 # cases) we make sure to fill the defaults, as we might not
4100 # have a complete defaults list if the hypervisor wasn't
4102 if hv not in new_hvp:
4104 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4105 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4107 self.hv_list = cluster.enabled_hypervisors
4109 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4110 # either the enabled list has changed, or the parameters have, validate
4111 for hv_name, hv_params in self.new_hvparams.items():
4112 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4113 (self.op.enabled_hypervisors and
4114 hv_name in self.op.enabled_hypervisors)):
4115 # either this is a new hypervisor, or its parameters have changed
4116 hv_class = hypervisor.GetHypervisor(hv_name)
4117 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4118 hv_class.CheckParameterSyntax(hv_params)
4119 _CheckHVParams(self, node_list, hv_name, hv_params)
4122 # no need to check any newly-enabled hypervisors, since the
4123 # defaults have already been checked in the above code-block
4124 for os_name, os_hvp in self.new_os_hvp.items():
4125 for hv_name, hv_params in os_hvp.items():
4126 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4127 # we need to fill in the new os_hvp on top of the actual hv_p
4128 cluster_defaults = self.new_hvparams.get(hv_name, {})
4129 new_osp = objects.FillDict(cluster_defaults, hv_params)
4130 hv_class = hypervisor.GetHypervisor(hv_name)
4131 hv_class.CheckParameterSyntax(new_osp)
4132 _CheckHVParams(self, node_list, hv_name, new_osp)
4134 if self.op.default_iallocator:
4135 alloc_script = utils.FindFile(self.op.default_iallocator,
4136 constants.IALLOCATOR_SEARCH_PATH,
4138 if alloc_script is None:
4139 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4140 " specified" % self.op.default_iallocator,
4143 def Exec(self, feedback_fn):
4144 """Change the parameters of the cluster.
4147 if self.op.vg_name is not None:
4148 new_volume = self.op.vg_name
4151 if new_volume != self.cfg.GetVGName():
4152 self.cfg.SetVGName(new_volume)
4154 feedback_fn("Cluster LVM configuration already in desired"
4155 " state, not changing")
4156 if self.op.drbd_helper is not None:
4157 new_helper = self.op.drbd_helper
4160 if new_helper != self.cfg.GetDRBDHelper():
4161 self.cfg.SetDRBDHelper(new_helper)
4163 feedback_fn("Cluster DRBD helper already in desired state,"
4165 if self.op.hvparams:
4166 self.cluster.hvparams = self.new_hvparams
4168 self.cluster.os_hvp = self.new_os_hvp
4169 if self.op.enabled_hypervisors is not None:
4170 self.cluster.hvparams = self.new_hvparams
4171 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4172 if self.op.beparams:
4173 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4174 if self.op.nicparams:
4175 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4177 self.cluster.ipolicy = self.new_ipolicy
4178 if self.op.osparams:
4179 self.cluster.osparams = self.new_osp
4180 if self.op.ndparams:
4181 self.cluster.ndparams = self.new_ndparams
4182 if self.op.diskparams:
4183 self.cluster.diskparams = self.new_diskparams
4184 if self.op.hv_state:
4185 self.cluster.hv_state_static = self.new_hv_state
4186 if self.op.disk_state:
4187 self.cluster.disk_state_static = self.new_disk_state
4189 if self.op.candidate_pool_size is not None:
4190 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4191 # we need to update the pool size here, otherwise the save will fail
4192 _AdjustCandidatePool(self, [])
4194 if self.op.maintain_node_health is not None:
4195 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4196 feedback_fn("Note: CONFD was disabled at build time, node health"
4197 " maintenance is not useful (still enabling it)")
4198 self.cluster.maintain_node_health = self.op.maintain_node_health
4200 if self.op.prealloc_wipe_disks is not None:
4201 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4203 if self.op.add_uids is not None:
4204 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4206 if self.op.remove_uids is not None:
4207 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4209 if self.op.uid_pool is not None:
4210 self.cluster.uid_pool = self.op.uid_pool
4212 if self.op.default_iallocator is not None:
4213 self.cluster.default_iallocator = self.op.default_iallocator
4215 if self.op.reserved_lvs is not None:
4216 self.cluster.reserved_lvs = self.op.reserved_lvs
4218 if self.op.use_external_mip_script is not None:
4219 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4221 def helper_os(aname, mods, desc):
4223 lst = getattr(self.cluster, aname)
4224 for key, val in mods:
4225 if key == constants.DDM_ADD:
4227 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4230 elif key == constants.DDM_REMOVE:
4234 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4236 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4238 if self.op.hidden_os:
4239 helper_os("hidden_os", self.op.hidden_os, "hidden")
4241 if self.op.blacklisted_os:
4242 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4244 if self.op.master_netdev:
4245 master_params = self.cfg.GetMasterNetworkParameters()
4246 ems = self.cfg.GetUseExternalMipScript()
4247 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4248 self.cluster.master_netdev)
4249 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4251 result.Raise("Could not disable the master ip")
4252 feedback_fn("Changing master_netdev from %s to %s" %
4253 (master_params.netdev, self.op.master_netdev))
4254 self.cluster.master_netdev = self.op.master_netdev
4256 if self.op.master_netmask:
4257 master_params = self.cfg.GetMasterNetworkParameters()
4258 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4259 result = self.rpc.call_node_change_master_netmask(master_params.name,
4260 master_params.netmask,
4261 self.op.master_netmask,
4263 master_params.netdev)
4265 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4268 self.cluster.master_netmask = self.op.master_netmask
4270 self.cfg.Update(self.cluster, feedback_fn)
4272 if self.op.master_netdev:
4273 master_params = self.cfg.GetMasterNetworkParameters()
4274 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4275 self.op.master_netdev)
4276 ems = self.cfg.GetUseExternalMipScript()
4277 result = self.rpc.call_node_activate_master_ip(master_params.name,
4280 self.LogWarning("Could not re-enable the master ip on"
4281 " the master, please restart manually: %s",
4285 def _UploadHelper(lu, nodes, fname):
4286 """Helper for uploading a file and showing warnings.
4289 if os.path.exists(fname):
4290 result = lu.rpc.call_upload_file(nodes, fname)
4291 for to_node, to_result in result.items():
4292 msg = to_result.fail_msg
4294 msg = ("Copy of file %s to node %s failed: %s" %
4295 (fname, to_node, msg))
4296 lu.proc.LogWarning(msg)
4299 def _ComputeAncillaryFiles(cluster, redist):
4300 """Compute files external to Ganeti which need to be consistent.
4302 @type redist: boolean
4303 @param redist: Whether to include files which need to be redistributed
4306 # Compute files for all nodes
4308 constants.SSH_KNOWN_HOSTS_FILE,
4309 constants.CONFD_HMAC_KEY,
4310 constants.CLUSTER_DOMAIN_SECRET_FILE,
4311 constants.SPICE_CERT_FILE,
4312 constants.SPICE_CACERT_FILE,
4313 constants.RAPI_USERS_FILE,
4317 files_all.update(constants.ALL_CERT_FILES)
4318 files_all.update(ssconf.SimpleStore().GetFileList())
4320 # we need to ship at least the RAPI certificate
4321 files_all.add(constants.RAPI_CERT_FILE)
4323 if cluster.modify_etc_hosts:
4324 files_all.add(constants.ETC_HOSTS)
4326 if cluster.use_external_mip_script:
4327 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4329 # Files which are optional, these must:
4330 # - be present in one other category as well
4331 # - either exist or not exist on all nodes of that category (mc, vm all)
4333 constants.RAPI_USERS_FILE,
4336 # Files which should only be on master candidates
4340 files_mc.add(constants.CLUSTER_CONF_FILE)
4342 # Files which should only be on VM-capable nodes
4343 files_vm = set(filename
4344 for hv_name in cluster.enabled_hypervisors
4345 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4347 files_opt |= set(filename
4348 for hv_name in cluster.enabled_hypervisors
4349 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4351 # Filenames in each category must be unique
4352 all_files_set = files_all | files_mc | files_vm
4353 assert (len(all_files_set) ==
4354 sum(map(len, [files_all, files_mc, files_vm]))), \
4355 "Found file listed in more than one file list"
4357 # Optional files must be present in one other category
4358 assert all_files_set.issuperset(files_opt), \
4359 "Optional file not in a different required list"
4361 return (files_all, files_opt, files_mc, files_vm)
4364 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4365 """Distribute additional files which are part of the cluster configuration.
4367 ConfigWriter takes care of distributing the config and ssconf files, but
4368 there are more files which should be distributed to all nodes. This function
4369 makes sure those are copied.
4371 @param lu: calling logical unit
4372 @param additional_nodes: list of nodes not in the config to distribute to
4373 @type additional_vm: boolean
4374 @param additional_vm: whether the additional nodes are vm-capable or not
4377 # Gather target nodes
4378 cluster = lu.cfg.GetClusterInfo()
4379 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4381 online_nodes = lu.cfg.GetOnlineNodeList()
4382 online_set = frozenset(online_nodes)
4383 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4385 if additional_nodes is not None:
4386 online_nodes.extend(additional_nodes)
4388 vm_nodes.extend(additional_nodes)
4390 # Never distribute to master node
4391 for nodelist in [online_nodes, vm_nodes]:
4392 if master_info.name in nodelist:
4393 nodelist.remove(master_info.name)
4396 (files_all, _, files_mc, files_vm) = \
4397 _ComputeAncillaryFiles(cluster, True)
4399 # Never re-distribute configuration file from here
4400 assert not (constants.CLUSTER_CONF_FILE in files_all or
4401 constants.CLUSTER_CONF_FILE in files_vm)
4402 assert not files_mc, "Master candidates not handled in this function"
4405 (online_nodes, files_all),
4406 (vm_nodes, files_vm),
4410 for (node_list, files) in filemap:
4412 _UploadHelper(lu, node_list, fname)
4415 class LUClusterRedistConf(NoHooksLU):
4416 """Force the redistribution of cluster configuration.
4418 This is a very simple LU.
4423 def ExpandNames(self):
4424 self.needed_locks = {
4425 locking.LEVEL_NODE: locking.ALL_SET,
4427 self.share_locks[locking.LEVEL_NODE] = 1
4429 def Exec(self, feedback_fn):
4430 """Redistribute the configuration.
4433 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4434 _RedistributeAncillaryFiles(self)
4437 class LUClusterActivateMasterIp(NoHooksLU):
4438 """Activate the master IP on the master node.
4441 def Exec(self, feedback_fn):
4442 """Activate the master IP.
4445 master_params = self.cfg.GetMasterNetworkParameters()
4446 ems = self.cfg.GetUseExternalMipScript()
4447 result = self.rpc.call_node_activate_master_ip(master_params.name,
4449 result.Raise("Could not activate the master IP")
4452 class LUClusterDeactivateMasterIp(NoHooksLU):
4453 """Deactivate the master IP on the master node.
4456 def Exec(self, feedback_fn):
4457 """Deactivate the master IP.
4460 master_params = self.cfg.GetMasterNetworkParameters()
4461 ems = self.cfg.GetUseExternalMipScript()
4462 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4464 result.Raise("Could not deactivate the master IP")
4467 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4468 """Sleep and poll for an instance's disk to sync.
4471 if not instance.disks or disks is not None and not disks:
4474 disks = _ExpandCheckDisks(instance, disks)
4477 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4479 node = instance.primary_node
4482 lu.cfg.SetDiskID(dev, node)
4484 # TODO: Convert to utils.Retry
4487 degr_retries = 10 # in seconds, as we sleep 1 second each time
4491 cumul_degraded = False
4492 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4493 msg = rstats.fail_msg
4495 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4498 raise errors.RemoteError("Can't contact node %s for mirror data,"
4499 " aborting." % node)
4502 rstats = rstats.payload
4504 for i, mstat in enumerate(rstats):
4506 lu.LogWarning("Can't compute data for node %s/%s",
4507 node, disks[i].iv_name)
4510 cumul_degraded = (cumul_degraded or
4511 (mstat.is_degraded and mstat.sync_percent is None))
4512 if mstat.sync_percent is not None:
4514 if mstat.estimated_time is not None:
4515 rem_time = ("%s remaining (estimated)" %
4516 utils.FormatSeconds(mstat.estimated_time))
4517 max_time = mstat.estimated_time
4519 rem_time = "no time estimate"
4520 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4521 (disks[i].iv_name, mstat.sync_percent, rem_time))
4523 # if we're done but degraded, let's do a few small retries, to
4524 # make sure we see a stable and not transient situation; therefore
4525 # we force restart of the loop
4526 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4527 logging.info("Degraded disks found, %d retries left", degr_retries)
4535 time.sleep(min(60, max_time))
4538 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4539 return not cumul_degraded
4542 def _BlockdevFind(lu, node, dev, instance):
4543 """Wrapper around call_blockdev_find to annotate diskparams.
4545 @param lu: A reference to the lu object
4546 @param node: The node to call out
4547 @param dev: The device to find
4548 @param instance: The instance object the device belongs to
4549 @returns The result of the rpc call
4552 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4553 return lu.rpc.call_blockdev_find(node, disk)
4556 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4557 """Wrapper around L{_CheckDiskConsistencyInner}.
4560 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4561 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4565 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4567 """Check that mirrors are not degraded.
4569 @attention: The device has to be annotated already.
4571 The ldisk parameter, if True, will change the test from the
4572 is_degraded attribute (which represents overall non-ok status for
4573 the device(s)) to the ldisk (representing the local storage status).
4576 lu.cfg.SetDiskID(dev, node)
4580 if on_primary or dev.AssembleOnSecondary():
4581 rstats = lu.rpc.call_blockdev_find(node, dev)
4582 msg = rstats.fail_msg
4584 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4586 elif not rstats.payload:
4587 lu.LogWarning("Can't find disk on node %s", node)
4591 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4593 result = result and not rstats.payload.is_degraded
4596 for child in dev.children:
4597 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4603 class LUOobCommand(NoHooksLU):
4604 """Logical unit for OOB handling.
4608 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4610 def ExpandNames(self):
4611 """Gather locks we need.
4614 if self.op.node_names:
4615 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4616 lock_names = self.op.node_names
4618 lock_names = locking.ALL_SET
4620 self.needed_locks = {
4621 locking.LEVEL_NODE: lock_names,
4624 def CheckPrereq(self):
4625 """Check prerequisites.
4628 - the node exists in the configuration
4631 Any errors are signaled by raising errors.OpPrereqError.
4635 self.master_node = self.cfg.GetMasterNode()
4637 assert self.op.power_delay >= 0.0
4639 if self.op.node_names:
4640 if (self.op.command in self._SKIP_MASTER and
4641 self.master_node in self.op.node_names):
4642 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4643 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4645 if master_oob_handler:
4646 additional_text = ("run '%s %s %s' if you want to operate on the"
4647 " master regardless") % (master_oob_handler,
4651 additional_text = "it does not support out-of-band operations"
4653 raise errors.OpPrereqError(("Operating on the master node %s is not"
4654 " allowed for %s; %s") %
4655 (self.master_node, self.op.command,
4656 additional_text), errors.ECODE_INVAL)
4658 self.op.node_names = self.cfg.GetNodeList()
4659 if self.op.command in self._SKIP_MASTER:
4660 self.op.node_names.remove(self.master_node)
4662 if self.op.command in self._SKIP_MASTER:
4663 assert self.master_node not in self.op.node_names
4665 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4667 raise errors.OpPrereqError("Node %s not found" % node_name,
4670 self.nodes.append(node)
4672 if (not self.op.ignore_status and
4673 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4674 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4675 " not marked offline") % node_name,
4678 def Exec(self, feedback_fn):
4679 """Execute OOB and return result if we expect any.
4682 master_node = self.master_node
4685 for idx, node in enumerate(utils.NiceSort(self.nodes,
4686 key=lambda node: node.name)):
4687 node_entry = [(constants.RS_NORMAL, node.name)]
4688 ret.append(node_entry)
4690 oob_program = _SupportsOob(self.cfg, node)
4693 node_entry.append((constants.RS_UNAVAIL, None))
4696 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4697 self.op.command, oob_program, node.name)
4698 result = self.rpc.call_run_oob(master_node, oob_program,
4699 self.op.command, node.name,
4703 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4704 node.name, result.fail_msg)
4705 node_entry.append((constants.RS_NODATA, None))
4708 self._CheckPayload(result)
4709 except errors.OpExecError, err:
4710 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4712 node_entry.append((constants.RS_NODATA, None))
4714 if self.op.command == constants.OOB_HEALTH:
4715 # For health we should log important events
4716 for item, status in result.payload:
4717 if status in [constants.OOB_STATUS_WARNING,
4718 constants.OOB_STATUS_CRITICAL]:
4719 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4720 item, node.name, status)
4722 if self.op.command == constants.OOB_POWER_ON:
4724 elif self.op.command == constants.OOB_POWER_OFF:
4725 node.powered = False
4726 elif self.op.command == constants.OOB_POWER_STATUS:
4727 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4728 if powered != node.powered:
4729 logging.warning(("Recorded power state (%s) of node '%s' does not"
4730 " match actual power state (%s)"), node.powered,
4733 # For configuration changing commands we should update the node
4734 if self.op.command in (constants.OOB_POWER_ON,
4735 constants.OOB_POWER_OFF):
4736 self.cfg.Update(node, feedback_fn)
4738 node_entry.append((constants.RS_NORMAL, result.payload))
4740 if (self.op.command == constants.OOB_POWER_ON and
4741 idx < len(self.nodes) - 1):
4742 time.sleep(self.op.power_delay)
4746 def _CheckPayload(self, result):
4747 """Checks if the payload is valid.
4749 @param result: RPC result
4750 @raises errors.OpExecError: If payload is not valid
4754 if self.op.command == constants.OOB_HEALTH:
4755 if not isinstance(result.payload, list):
4756 errs.append("command 'health' is expected to return a list but got %s" %
4757 type(result.payload))
4759 for item, status in result.payload:
4760 if status not in constants.OOB_STATUSES:
4761 errs.append("health item '%s' has invalid status '%s'" %
4764 if self.op.command == constants.OOB_POWER_STATUS:
4765 if not isinstance(result.payload, dict):
4766 errs.append("power-status is expected to return a dict but got %s" %
4767 type(result.payload))
4769 if self.op.command in [
4770 constants.OOB_POWER_ON,
4771 constants.OOB_POWER_OFF,
4772 constants.OOB_POWER_CYCLE,
4774 if result.payload is not None:
4775 errs.append("%s is expected to not return payload but got '%s'" %
4776 (self.op.command, result.payload))
4779 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4780 utils.CommaJoin(errs))
4783 class _OsQuery(_QueryBase):
4784 FIELDS = query.OS_FIELDS
4786 def ExpandNames(self, lu):
4787 # Lock all nodes in shared mode
4788 # Temporary removal of locks, should be reverted later
4789 # TODO: reintroduce locks when they are lighter-weight
4790 lu.needed_locks = {}
4791 #self.share_locks[locking.LEVEL_NODE] = 1
4792 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4794 # The following variables interact with _QueryBase._GetNames
4796 self.wanted = self.names
4798 self.wanted = locking.ALL_SET
4800 self.do_locking = self.use_locking
4802 def DeclareLocks(self, lu, level):
4806 def _DiagnoseByOS(rlist):
4807 """Remaps a per-node return list into an a per-os per-node dictionary
4809 @param rlist: a map with node names as keys and OS objects as values
4812 @return: a dictionary with osnames as keys and as value another
4813 map, with nodes as keys and tuples of (path, status, diagnose,
4814 variants, parameters, api_versions) as values, eg::
4816 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4817 (/srv/..., False, "invalid api")],
4818 "node2": [(/srv/..., True, "", [], [])]}
4823 # we build here the list of nodes that didn't fail the RPC (at RPC
4824 # level), so that nodes with a non-responding node daemon don't
4825 # make all OSes invalid
4826 good_nodes = [node_name for node_name in rlist
4827 if not rlist[node_name].fail_msg]
4828 for node_name, nr in rlist.items():
4829 if nr.fail_msg or not nr.payload:
4831 for (name, path, status, diagnose, variants,
4832 params, api_versions) in nr.payload:
4833 if name not in all_os:
4834 # build a list of nodes for this os containing empty lists
4835 # for each node in node_list
4837 for nname in good_nodes:
4838 all_os[name][nname] = []
4839 # convert params from [name, help] to (name, help)
4840 params = [tuple(v) for v in params]
4841 all_os[name][node_name].append((path, status, diagnose,
4842 variants, params, api_versions))
4845 def _GetQueryData(self, lu):
4846 """Computes the list of nodes and their attributes.
4849 # Locking is not used
4850 assert not (compat.any(lu.glm.is_owned(level)
4851 for level in locking.LEVELS
4852 if level != locking.LEVEL_CLUSTER) or
4853 self.do_locking or self.use_locking)
4855 valid_nodes = [node.name
4856 for node in lu.cfg.GetAllNodesInfo().values()
4857 if not node.offline and node.vm_capable]
4858 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4859 cluster = lu.cfg.GetClusterInfo()
4863 for (os_name, os_data) in pol.items():
4864 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4865 hidden=(os_name in cluster.hidden_os),
4866 blacklisted=(os_name in cluster.blacklisted_os))
4870 api_versions = set()
4872 for idx, osl in enumerate(os_data.values()):
4873 info.valid = bool(info.valid and osl and osl[0][1])
4877 (node_variants, node_params, node_api) = osl[0][3:6]
4880 variants.update(node_variants)
4881 parameters.update(node_params)
4882 api_versions.update(node_api)
4884 # Filter out inconsistent values
4885 variants.intersection_update(node_variants)
4886 parameters.intersection_update(node_params)
4887 api_versions.intersection_update(node_api)
4889 info.variants = list(variants)
4890 info.parameters = list(parameters)
4891 info.api_versions = list(api_versions)
4893 data[os_name] = info
4895 # Prepare data in requested order
4896 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4900 class LUOsDiagnose(NoHooksLU):
4901 """Logical unit for OS diagnose/query.
4907 def _BuildFilter(fields, names):
4908 """Builds a filter for querying OSes.
4911 name_filter = qlang.MakeSimpleFilter("name", names)
4913 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4914 # respective field is not requested
4915 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4916 for fname in ["hidden", "blacklisted"]
4917 if fname not in fields]
4918 if "valid" not in fields:
4919 status_filter.append([qlang.OP_TRUE, "valid"])
4922 status_filter.insert(0, qlang.OP_AND)
4924 status_filter = None
4926 if name_filter and status_filter:
4927 return [qlang.OP_AND, name_filter, status_filter]
4931 return status_filter
4933 def CheckArguments(self):
4934 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4935 self.op.output_fields, False)
4937 def ExpandNames(self):
4938 self.oq.ExpandNames(self)
4940 def Exec(self, feedback_fn):
4941 return self.oq.OldStyleQuery(self)
4944 class LUNodeRemove(LogicalUnit):
4945 """Logical unit for removing a node.
4948 HPATH = "node-remove"
4949 HTYPE = constants.HTYPE_NODE
4951 def BuildHooksEnv(self):
4956 "OP_TARGET": self.op.node_name,
4957 "NODE_NAME": self.op.node_name,
4960 def BuildHooksNodes(self):
4961 """Build hooks nodes.
4963 This doesn't run on the target node in the pre phase as a failed
4964 node would then be impossible to remove.
4967 all_nodes = self.cfg.GetNodeList()
4969 all_nodes.remove(self.op.node_name)
4972 return (all_nodes, all_nodes)
4974 def CheckPrereq(self):
4975 """Check prerequisites.
4978 - the node exists in the configuration
4979 - it does not have primary or secondary instances
4980 - it's not the master
4982 Any errors are signaled by raising errors.OpPrereqError.
4985 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4986 node = self.cfg.GetNodeInfo(self.op.node_name)
4987 assert node is not None
4989 masternode = self.cfg.GetMasterNode()
4990 if node.name == masternode:
4991 raise errors.OpPrereqError("Node is the master node, failover to another"
4992 " node is required", errors.ECODE_INVAL)
4994 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4995 if node.name in instance.all_nodes:
4996 raise errors.OpPrereqError("Instance %s is still running on the node,"
4997 " please remove first" % instance_name,
4999 self.op.node_name = node.name
5002 def Exec(self, feedback_fn):
5003 """Removes the node from the cluster.
5007 logging.info("Stopping the node daemon and removing configs from node %s",
5010 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5012 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5015 # Promote nodes to master candidate as needed
5016 _AdjustCandidatePool(self, exceptions=[node.name])
5017 self.context.RemoveNode(node.name)
5019 # Run post hooks on the node before it's removed
5020 _RunPostHook(self, node.name)
5022 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5023 msg = result.fail_msg
5025 self.LogWarning("Errors encountered on the remote node while leaving"
5026 " the cluster: %s", msg)
5028 # Remove node from our /etc/hosts
5029 if self.cfg.GetClusterInfo().modify_etc_hosts:
5030 master_node = self.cfg.GetMasterNode()
5031 result = self.rpc.call_etc_hosts_modify(master_node,
5032 constants.ETC_HOSTS_REMOVE,
5034 result.Raise("Can't update hosts file with new host data")
5035 _RedistributeAncillaryFiles(self)
5038 class _NodeQuery(_QueryBase):
5039 FIELDS = query.NODE_FIELDS
5041 def ExpandNames(self, lu):
5042 lu.needed_locks = {}
5043 lu.share_locks = _ShareAll()
5046 self.wanted = _GetWantedNodes(lu, self.names)
5048 self.wanted = locking.ALL_SET
5050 self.do_locking = (self.use_locking and
5051 query.NQ_LIVE in self.requested_data)
5054 # If any non-static field is requested we need to lock the nodes
5055 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5057 def DeclareLocks(self, lu, level):
5060 def _GetQueryData(self, lu):
5061 """Computes the list of nodes and their attributes.
5064 all_info = lu.cfg.GetAllNodesInfo()
5066 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5068 # Gather data as requested
5069 if query.NQ_LIVE in self.requested_data:
5070 # filter out non-vm_capable nodes
5071 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5073 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5074 [lu.cfg.GetHypervisorType()])
5075 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5076 for (name, nresult) in node_data.items()
5077 if not nresult.fail_msg and nresult.payload)
5081 if query.NQ_INST in self.requested_data:
5082 node_to_primary = dict([(name, set()) for name in nodenames])
5083 node_to_secondary = dict([(name, set()) for name in nodenames])
5085 inst_data = lu.cfg.GetAllInstancesInfo()
5087 for inst in inst_data.values():
5088 if inst.primary_node in node_to_primary:
5089 node_to_primary[inst.primary_node].add(inst.name)
5090 for secnode in inst.secondary_nodes:
5091 if secnode in node_to_secondary:
5092 node_to_secondary[secnode].add(inst.name)
5094 node_to_primary = None
5095 node_to_secondary = None
5097 if query.NQ_OOB in self.requested_data:
5098 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5099 for name, node in all_info.iteritems())
5103 if query.NQ_GROUP in self.requested_data:
5104 groups = lu.cfg.GetAllNodeGroupsInfo()
5108 return query.NodeQueryData([all_info[name] for name in nodenames],
5109 live_data, lu.cfg.GetMasterNode(),
5110 node_to_primary, node_to_secondary, groups,
5111 oob_support, lu.cfg.GetClusterInfo())
5114 class LUNodeQuery(NoHooksLU):
5115 """Logical unit for querying nodes.
5118 # pylint: disable=W0142
5121 def CheckArguments(self):
5122 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5123 self.op.output_fields, self.op.use_locking)
5125 def ExpandNames(self):
5126 self.nq.ExpandNames(self)
5128 def DeclareLocks(self, level):
5129 self.nq.DeclareLocks(self, level)
5131 def Exec(self, feedback_fn):
5132 return self.nq.OldStyleQuery(self)
5135 class LUNodeQueryvols(NoHooksLU):
5136 """Logical unit for getting volumes on node(s).
5140 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5141 _FIELDS_STATIC = utils.FieldSet("node")
5143 def CheckArguments(self):
5144 _CheckOutputFields(static=self._FIELDS_STATIC,
5145 dynamic=self._FIELDS_DYNAMIC,
5146 selected=self.op.output_fields)
5148 def ExpandNames(self):
5149 self.share_locks = _ShareAll()
5150 self.needed_locks = {}
5152 if not self.op.nodes:
5153 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5155 self.needed_locks[locking.LEVEL_NODE] = \
5156 _GetWantedNodes(self, self.op.nodes)
5158 def Exec(self, feedback_fn):
5159 """Computes the list of nodes and their attributes.
5162 nodenames = self.owned_locks(locking.LEVEL_NODE)
5163 volumes = self.rpc.call_node_volumes(nodenames)
5165 ilist = self.cfg.GetAllInstancesInfo()
5166 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5169 for node in nodenames:
5170 nresult = volumes[node]
5173 msg = nresult.fail_msg
5175 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5178 node_vols = sorted(nresult.payload,
5179 key=operator.itemgetter("dev"))
5181 for vol in node_vols:
5183 for field in self.op.output_fields:
5186 elif field == "phys":
5190 elif field == "name":
5192 elif field == "size":
5193 val = int(float(vol["size"]))
5194 elif field == "instance":
5195 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5197 raise errors.ParameterError(field)
5198 node_output.append(str(val))
5200 output.append(node_output)
5205 class LUNodeQueryStorage(NoHooksLU):
5206 """Logical unit for getting information on storage units on node(s).
5209 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5212 def CheckArguments(self):
5213 _CheckOutputFields(static=self._FIELDS_STATIC,
5214 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5215 selected=self.op.output_fields)
5217 def ExpandNames(self):
5218 self.share_locks = _ShareAll()
5219 self.needed_locks = {}
5222 self.needed_locks[locking.LEVEL_NODE] = \
5223 _GetWantedNodes(self, self.op.nodes)
5225 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5227 def Exec(self, feedback_fn):
5228 """Computes the list of nodes and their attributes.
5231 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5233 # Always get name to sort by
5234 if constants.SF_NAME in self.op.output_fields:
5235 fields = self.op.output_fields[:]
5237 fields = [constants.SF_NAME] + self.op.output_fields
5239 # Never ask for node or type as it's only known to the LU
5240 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5241 while extra in fields:
5242 fields.remove(extra)
5244 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5245 name_idx = field_idx[constants.SF_NAME]
5247 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5248 data = self.rpc.call_storage_list(self.nodes,
5249 self.op.storage_type, st_args,
5250 self.op.name, fields)
5254 for node in utils.NiceSort(self.nodes):
5255 nresult = data[node]
5259 msg = nresult.fail_msg
5261 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5264 rows = dict([(row[name_idx], row) for row in nresult.payload])
5266 for name in utils.NiceSort(rows.keys()):
5271 for field in self.op.output_fields:
5272 if field == constants.SF_NODE:
5274 elif field == constants.SF_TYPE:
5275 val = self.op.storage_type
5276 elif field in field_idx:
5277 val = row[field_idx[field]]
5279 raise errors.ParameterError(field)
5288 class _InstanceQuery(_QueryBase):
5289 FIELDS = query.INSTANCE_FIELDS
5291 def ExpandNames(self, lu):
5292 lu.needed_locks = {}
5293 lu.share_locks = _ShareAll()
5296 self.wanted = _GetWantedInstances(lu, self.names)
5298 self.wanted = locking.ALL_SET
5300 self.do_locking = (self.use_locking and
5301 query.IQ_LIVE in self.requested_data)
5303 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5304 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5305 lu.needed_locks[locking.LEVEL_NODE] = []
5306 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5308 self.do_grouplocks = (self.do_locking and
5309 query.IQ_NODES in self.requested_data)
5311 def DeclareLocks(self, lu, level):
5313 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5314 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5316 # Lock all groups used by instances optimistically; this requires going
5317 # via the node before it's locked, requiring verification later on
5318 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5320 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5321 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5322 elif level == locking.LEVEL_NODE:
5323 lu._LockInstancesNodes() # pylint: disable=W0212
5326 def _CheckGroupLocks(lu):
5327 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5328 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5330 # Check if node groups for locked instances are still correct
5331 for instance_name in owned_instances:
5332 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5334 def _GetQueryData(self, lu):
5335 """Computes the list of instances and their attributes.
5338 if self.do_grouplocks:
5339 self._CheckGroupLocks(lu)
5341 cluster = lu.cfg.GetClusterInfo()
5342 all_info = lu.cfg.GetAllInstancesInfo()
5344 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5346 instance_list = [all_info[name] for name in instance_names]
5347 nodes = frozenset(itertools.chain(*(inst.all_nodes
5348 for inst in instance_list)))
5349 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5352 wrongnode_inst = set()
5354 # Gather data as requested
5355 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5357 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5359 result = node_data[name]
5361 # offline nodes will be in both lists
5362 assert result.fail_msg
5363 offline_nodes.append(name)
5365 bad_nodes.append(name)
5366 elif result.payload:
5367 for inst in result.payload:
5368 if inst in all_info:
5369 if all_info[inst].primary_node == name:
5370 live_data.update(result.payload)
5372 wrongnode_inst.add(inst)
5374 # orphan instance; we don't list it here as we don't
5375 # handle this case yet in the output of instance listing
5376 logging.warning("Orphan instance '%s' found on node %s",
5378 # else no instance is alive
5382 if query.IQ_DISKUSAGE in self.requested_data:
5383 disk_usage = dict((inst.name,
5384 _ComputeDiskSize(inst.disk_template,
5385 [{constants.IDISK_SIZE: disk.size}
5386 for disk in inst.disks]))
5387 for inst in instance_list)
5391 if query.IQ_CONSOLE in self.requested_data:
5393 for inst in instance_list:
5394 if inst.name in live_data:
5395 # Instance is running
5396 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5398 consinfo[inst.name] = None
5399 assert set(consinfo.keys()) == set(instance_names)
5403 if query.IQ_NODES in self.requested_data:
5404 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5406 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5407 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5408 for uuid in set(map(operator.attrgetter("group"),
5414 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5415 disk_usage, offline_nodes, bad_nodes,
5416 live_data, wrongnode_inst, consinfo,
5420 class LUQuery(NoHooksLU):
5421 """Query for resources/items of a certain kind.
5424 # pylint: disable=W0142
5427 def CheckArguments(self):
5428 qcls = _GetQueryImplementation(self.op.what)
5430 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5432 def ExpandNames(self):
5433 self.impl.ExpandNames(self)
5435 def DeclareLocks(self, level):
5436 self.impl.DeclareLocks(self, level)
5438 def Exec(self, feedback_fn):
5439 return self.impl.NewStyleQuery(self)
5442 class LUQueryFields(NoHooksLU):
5443 """Query for resources/items of a certain kind.
5446 # pylint: disable=W0142
5449 def CheckArguments(self):
5450 self.qcls = _GetQueryImplementation(self.op.what)
5452 def ExpandNames(self):
5453 self.needed_locks = {}
5455 def Exec(self, feedback_fn):
5456 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5459 class LUNodeModifyStorage(NoHooksLU):
5460 """Logical unit for modifying a storage volume on a node.
5465 def CheckArguments(self):
5466 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5468 storage_type = self.op.storage_type
5471 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5473 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5474 " modified" % storage_type,
5477 diff = set(self.op.changes.keys()) - modifiable
5479 raise errors.OpPrereqError("The following fields can not be modified for"
5480 " storage units of type '%s': %r" %
5481 (storage_type, list(diff)),
5484 def ExpandNames(self):
5485 self.needed_locks = {
5486 locking.LEVEL_NODE: self.op.node_name,
5489 def Exec(self, feedback_fn):
5490 """Computes the list of nodes and their attributes.
5493 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5494 result = self.rpc.call_storage_modify(self.op.node_name,
5495 self.op.storage_type, st_args,
5496 self.op.name, self.op.changes)
5497 result.Raise("Failed to modify storage unit '%s' on %s" %
5498 (self.op.name, self.op.node_name))
5501 class LUNodeAdd(LogicalUnit):
5502 """Logical unit for adding node to the cluster.
5506 HTYPE = constants.HTYPE_NODE
5507 _NFLAGS = ["master_capable", "vm_capable"]
5509 def CheckArguments(self):
5510 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5511 # validate/normalize the node name
5512 self.hostname = netutils.GetHostname(name=self.op.node_name,
5513 family=self.primary_ip_family)
5514 self.op.node_name = self.hostname.name
5516 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5517 raise errors.OpPrereqError("Cannot readd the master node",
5520 if self.op.readd and self.op.group:
5521 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5522 " being readded", errors.ECODE_INVAL)
5524 def BuildHooksEnv(self):
5527 This will run on all nodes before, and on all nodes + the new node after.
5531 "OP_TARGET": self.op.node_name,
5532 "NODE_NAME": self.op.node_name,
5533 "NODE_PIP": self.op.primary_ip,
5534 "NODE_SIP": self.op.secondary_ip,
5535 "MASTER_CAPABLE": str(self.op.master_capable),
5536 "VM_CAPABLE": str(self.op.vm_capable),
5539 def BuildHooksNodes(self):
5540 """Build hooks nodes.
5543 # Exclude added node
5544 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5545 post_nodes = pre_nodes + [self.op.node_name, ]
5547 return (pre_nodes, post_nodes)
5549 def CheckPrereq(self):
5550 """Check prerequisites.
5553 - the new node is not already in the config
5555 - its parameters (single/dual homed) matches the cluster
5557 Any errors are signaled by raising errors.OpPrereqError.
5561 hostname = self.hostname
5562 node = hostname.name
5563 primary_ip = self.op.primary_ip = hostname.ip
5564 if self.op.secondary_ip is None:
5565 if self.primary_ip_family == netutils.IP6Address.family:
5566 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5567 " IPv4 address must be given as secondary",
5569 self.op.secondary_ip = primary_ip
5571 secondary_ip = self.op.secondary_ip
5572 if not netutils.IP4Address.IsValid(secondary_ip):
5573 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5574 " address" % secondary_ip, errors.ECODE_INVAL)
5576 node_list = cfg.GetNodeList()
5577 if not self.op.readd and node in node_list:
5578 raise errors.OpPrereqError("Node %s is already in the configuration" %
5579 node, errors.ECODE_EXISTS)
5580 elif self.op.readd and node not in node_list:
5581 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5584 self.changed_primary_ip = False
5586 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5587 if self.op.readd and node == existing_node_name:
5588 if existing_node.secondary_ip != secondary_ip:
5589 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5590 " address configuration as before",
5592 if existing_node.primary_ip != primary_ip:
5593 self.changed_primary_ip = True
5597 if (existing_node.primary_ip == primary_ip or
5598 existing_node.secondary_ip == primary_ip or
5599 existing_node.primary_ip == secondary_ip or
5600 existing_node.secondary_ip == secondary_ip):
5601 raise errors.OpPrereqError("New node ip address(es) conflict with"
5602 " existing node %s" % existing_node.name,
5603 errors.ECODE_NOTUNIQUE)
5605 # After this 'if' block, None is no longer a valid value for the
5606 # _capable op attributes
5608 old_node = self.cfg.GetNodeInfo(node)
5609 assert old_node is not None, "Can't retrieve locked node %s" % node
5610 for attr in self._NFLAGS:
5611 if getattr(self.op, attr) is None:
5612 setattr(self.op, attr, getattr(old_node, attr))
5614 for attr in self._NFLAGS:
5615 if getattr(self.op, attr) is None:
5616 setattr(self.op, attr, True)
5618 if self.op.readd and not self.op.vm_capable:
5619 pri, sec = cfg.GetNodeInstances(node)
5621 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5622 " flag set to false, but it already holds"
5623 " instances" % node,
5626 # check that the type of the node (single versus dual homed) is the
5627 # same as for the master
5628 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5629 master_singlehomed = myself.secondary_ip == myself.primary_ip
5630 newbie_singlehomed = secondary_ip == primary_ip
5631 if master_singlehomed != newbie_singlehomed:
5632 if master_singlehomed:
5633 raise errors.OpPrereqError("The master has no secondary ip but the"
5634 " new node has one",
5637 raise errors.OpPrereqError("The master has a secondary ip but the"
5638 " new node doesn't have one",
5641 # checks reachability
5642 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5643 raise errors.OpPrereqError("Node not reachable by ping",
5644 errors.ECODE_ENVIRON)
5646 if not newbie_singlehomed:
5647 # check reachability from my secondary ip to newbie's secondary ip
5648 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5649 source=myself.secondary_ip):
5650 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5651 " based ping to node daemon port",
5652 errors.ECODE_ENVIRON)
5659 if self.op.master_capable:
5660 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5662 self.master_candidate = False
5665 self.new_node = old_node
5667 node_group = cfg.LookupNodeGroup(self.op.group)
5668 self.new_node = objects.Node(name=node,
5669 primary_ip=primary_ip,
5670 secondary_ip=secondary_ip,
5671 master_candidate=self.master_candidate,
5672 offline=False, drained=False,
5675 if self.op.ndparams:
5676 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5678 if self.op.hv_state:
5679 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5681 if self.op.disk_state:
5682 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5684 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5685 # it a property on the base class.
5686 result = rpc.DnsOnlyRunner().call_version([node])[node]
5687 result.Raise("Can't get version information from node %s" % node)
5688 if constants.PROTOCOL_VERSION == result.payload:
5689 logging.info("Communication to node %s fine, sw version %s match",
5690 node, result.payload)
5692 raise errors.OpPrereqError("Version mismatch master version %s,"
5693 " node version %s" %
5694 (constants.PROTOCOL_VERSION, result.payload),
5695 errors.ECODE_ENVIRON)
5697 def Exec(self, feedback_fn):
5698 """Adds the new node to the cluster.
5701 new_node = self.new_node
5702 node = new_node.name
5704 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5707 # We adding a new node so we assume it's powered
5708 new_node.powered = True
5710 # for re-adds, reset the offline/drained/master-candidate flags;
5711 # we need to reset here, otherwise offline would prevent RPC calls
5712 # later in the procedure; this also means that if the re-add
5713 # fails, we are left with a non-offlined, broken node
5715 new_node.drained = new_node.offline = False # pylint: disable=W0201
5716 self.LogInfo("Readding a node, the offline/drained flags were reset")
5717 # if we demote the node, we do cleanup later in the procedure
5718 new_node.master_candidate = self.master_candidate
5719 if self.changed_primary_ip:
5720 new_node.primary_ip = self.op.primary_ip
5722 # copy the master/vm_capable flags
5723 for attr in self._NFLAGS:
5724 setattr(new_node, attr, getattr(self.op, attr))
5726 # notify the user about any possible mc promotion
5727 if new_node.master_candidate:
5728 self.LogInfo("Node will be a master candidate")
5730 if self.op.ndparams:
5731 new_node.ndparams = self.op.ndparams
5733 new_node.ndparams = {}
5735 if self.op.hv_state:
5736 new_node.hv_state_static = self.new_hv_state
5738 if self.op.disk_state:
5739 new_node.disk_state_static = self.new_disk_state
5741 # Add node to our /etc/hosts, and add key to known_hosts
5742 if self.cfg.GetClusterInfo().modify_etc_hosts:
5743 master_node = self.cfg.GetMasterNode()
5744 result = self.rpc.call_etc_hosts_modify(master_node,
5745 constants.ETC_HOSTS_ADD,
5748 result.Raise("Can't update hosts file with new host data")
5750 if new_node.secondary_ip != new_node.primary_ip:
5751 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5754 node_verify_list = [self.cfg.GetMasterNode()]
5755 node_verify_param = {
5756 constants.NV_NODELIST: ([node], {}),
5757 # TODO: do a node-net-test as well?
5760 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5761 self.cfg.GetClusterName())
5762 for verifier in node_verify_list:
5763 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5764 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5766 for failed in nl_payload:
5767 feedback_fn("ssh/hostname verification failed"
5768 " (checking from %s): %s" %
5769 (verifier, nl_payload[failed]))
5770 raise errors.OpExecError("ssh/hostname verification failed")
5773 _RedistributeAncillaryFiles(self)
5774 self.context.ReaddNode(new_node)
5775 # make sure we redistribute the config
5776 self.cfg.Update(new_node, feedback_fn)
5777 # and make sure the new node will not have old files around
5778 if not new_node.master_candidate:
5779 result = self.rpc.call_node_demote_from_mc(new_node.name)
5780 msg = result.fail_msg
5782 self.LogWarning("Node failed to demote itself from master"
5783 " candidate status: %s" % msg)
5785 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5786 additional_vm=self.op.vm_capable)
5787 self.context.AddNode(new_node, self.proc.GetECId())
5790 class LUNodeSetParams(LogicalUnit):
5791 """Modifies the parameters of a node.
5793 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5794 to the node role (as _ROLE_*)
5795 @cvar _R2F: a dictionary from node role to tuples of flags
5796 @cvar _FLAGS: a list of attribute names corresponding to the flags
5799 HPATH = "node-modify"
5800 HTYPE = constants.HTYPE_NODE
5802 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5804 (True, False, False): _ROLE_CANDIDATE,
5805 (False, True, False): _ROLE_DRAINED,
5806 (False, False, True): _ROLE_OFFLINE,
5807 (False, False, False): _ROLE_REGULAR,
5809 _R2F = dict((v, k) for k, v in _F2R.items())
5810 _FLAGS = ["master_candidate", "drained", "offline"]
5812 def CheckArguments(self):
5813 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5814 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5815 self.op.master_capable, self.op.vm_capable,
5816 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5818 if all_mods.count(None) == len(all_mods):
5819 raise errors.OpPrereqError("Please pass at least one modification",
5821 if all_mods.count(True) > 1:
5822 raise errors.OpPrereqError("Can't set the node into more than one"
5823 " state at the same time",
5826 # Boolean value that tells us whether we might be demoting from MC
5827 self.might_demote = (self.op.master_candidate == False or
5828 self.op.offline == True or
5829 self.op.drained == True or
5830 self.op.master_capable == False)
5832 if self.op.secondary_ip:
5833 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5834 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5835 " address" % self.op.secondary_ip,
5838 self.lock_all = self.op.auto_promote and self.might_demote
5839 self.lock_instances = self.op.secondary_ip is not None
5841 def _InstanceFilter(self, instance):
5842 """Filter for getting affected instances.
5845 return (instance.disk_template in constants.DTS_INT_MIRROR and
5846 self.op.node_name in instance.all_nodes)
5848 def ExpandNames(self):
5850 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5852 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5854 # Since modifying a node can have severe effects on currently running
5855 # operations the resource lock is at least acquired in shared mode
5856 self.needed_locks[locking.LEVEL_NODE_RES] = \
5857 self.needed_locks[locking.LEVEL_NODE]
5859 # Get node resource and instance locks in shared mode; they are not used
5860 # for anything but read-only access
5861 self.share_locks[locking.LEVEL_NODE_RES] = 1
5862 self.share_locks[locking.LEVEL_INSTANCE] = 1
5864 if self.lock_instances:
5865 self.needed_locks[locking.LEVEL_INSTANCE] = \
5866 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5868 def BuildHooksEnv(self):
5871 This runs on the master node.
5875 "OP_TARGET": self.op.node_name,
5876 "MASTER_CANDIDATE": str(self.op.master_candidate),
5877 "OFFLINE": str(self.op.offline),
5878 "DRAINED": str(self.op.drained),
5879 "MASTER_CAPABLE": str(self.op.master_capable),
5880 "VM_CAPABLE": str(self.op.vm_capable),
5883 def BuildHooksNodes(self):
5884 """Build hooks nodes.
5887 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5890 def CheckPrereq(self):
5891 """Check prerequisites.
5893 This only checks the instance list against the existing names.
5896 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5898 if self.lock_instances:
5899 affected_instances = \
5900 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5902 # Verify instance locks
5903 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5904 wanted_instances = frozenset(affected_instances.keys())
5905 if wanted_instances - owned_instances:
5906 raise errors.OpPrereqError("Instances affected by changing node %s's"
5907 " secondary IP address have changed since"
5908 " locks were acquired, wanted '%s', have"
5909 " '%s'; retry the operation" %
5911 utils.CommaJoin(wanted_instances),
5912 utils.CommaJoin(owned_instances)),
5915 affected_instances = None
5917 if (self.op.master_candidate is not None or
5918 self.op.drained is not None or
5919 self.op.offline is not None):
5920 # we can't change the master's node flags
5921 if self.op.node_name == self.cfg.GetMasterNode():
5922 raise errors.OpPrereqError("The master role can be changed"
5923 " only via master-failover",
5926 if self.op.master_candidate and not node.master_capable:
5927 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5928 " it a master candidate" % node.name,
5931 if self.op.vm_capable == False:
5932 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5934 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5935 " the vm_capable flag" % node.name,
5938 if node.master_candidate and self.might_demote and not self.lock_all:
5939 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5940 # check if after removing the current node, we're missing master
5942 (mc_remaining, mc_should, _) = \
5943 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5944 if mc_remaining < mc_should:
5945 raise errors.OpPrereqError("Not enough master candidates, please"
5946 " pass auto promote option to allow"
5947 " promotion (--auto-promote or RAPI"
5948 " auto_promote=True)", errors.ECODE_STATE)
5950 self.old_flags = old_flags = (node.master_candidate,
5951 node.drained, node.offline)
5952 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5953 self.old_role = old_role = self._F2R[old_flags]
5955 # Check for ineffective changes
5956 for attr in self._FLAGS:
5957 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5958 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5959 setattr(self.op, attr, None)
5961 # Past this point, any flag change to False means a transition
5962 # away from the respective state, as only real changes are kept
5964 # TODO: We might query the real power state if it supports OOB
5965 if _SupportsOob(self.cfg, node):
5966 if self.op.offline is False and not (node.powered or
5967 self.op.powered == True):
5968 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5969 " offline status can be reset") %
5971 elif self.op.powered is not None:
5972 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5973 " as it does not support out-of-band"
5974 " handling") % self.op.node_name)
5976 # If we're being deofflined/drained, we'll MC ourself if needed
5977 if (self.op.drained == False or self.op.offline == False or
5978 (self.op.master_capable and not node.master_capable)):
5979 if _DecideSelfPromotion(self):
5980 self.op.master_candidate = True
5981 self.LogInfo("Auto-promoting node to master candidate")
5983 # If we're no longer master capable, we'll demote ourselves from MC
5984 if self.op.master_capable == False and node.master_candidate:
5985 self.LogInfo("Demoting from master candidate")
5986 self.op.master_candidate = False
5989 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5990 if self.op.master_candidate:
5991 new_role = self._ROLE_CANDIDATE
5992 elif self.op.drained:
5993 new_role = self._ROLE_DRAINED
5994 elif self.op.offline:
5995 new_role = self._ROLE_OFFLINE
5996 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5997 # False is still in new flags, which means we're un-setting (the
5999 new_role = self._ROLE_REGULAR
6000 else: # no new flags, nothing, keep old role
6003 self.new_role = new_role
6005 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6006 # Trying to transition out of offline status
6007 result = self.rpc.call_version([node.name])[node.name]
6009 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6010 " to report its version: %s" %
6011 (node.name, result.fail_msg),
6014 self.LogWarning("Transitioning node from offline to online state"
6015 " without using re-add. Please make sure the node"
6018 if self.op.secondary_ip:
6019 # Ok even without locking, because this can't be changed by any LU
6020 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6021 master_singlehomed = master.secondary_ip == master.primary_ip
6022 if master_singlehomed and self.op.secondary_ip:
6023 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6024 " homed cluster", errors.ECODE_INVAL)
6026 assert not (frozenset(affected_instances) -
6027 self.owned_locks(locking.LEVEL_INSTANCE))
6030 if affected_instances:
6031 raise errors.OpPrereqError("Cannot change secondary IP address:"
6032 " offline node has instances (%s)"
6033 " configured to use it" %
6034 utils.CommaJoin(affected_instances.keys()))
6036 # On online nodes, check that no instances are running, and that
6037 # the node has the new ip and we can reach it.
6038 for instance in affected_instances.values():
6039 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6040 msg="cannot change secondary ip")
6042 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6043 if master.name != node.name:
6044 # check reachability from master secondary ip to new secondary ip
6045 if not netutils.TcpPing(self.op.secondary_ip,
6046 constants.DEFAULT_NODED_PORT,
6047 source=master.secondary_ip):
6048 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6049 " based ping to node daemon port",
6050 errors.ECODE_ENVIRON)
6052 if self.op.ndparams:
6053 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6054 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6055 self.new_ndparams = new_ndparams
6057 if self.op.hv_state:
6058 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6059 self.node.hv_state_static)
6061 if self.op.disk_state:
6062 self.new_disk_state = \
6063 _MergeAndVerifyDiskState(self.op.disk_state,
6064 self.node.disk_state_static)
6066 def Exec(self, feedback_fn):
6071 old_role = self.old_role
6072 new_role = self.new_role
6076 if self.op.ndparams:
6077 node.ndparams = self.new_ndparams
6079 if self.op.powered is not None:
6080 node.powered = self.op.powered
6082 if self.op.hv_state:
6083 node.hv_state_static = self.new_hv_state
6085 if self.op.disk_state:
6086 node.disk_state_static = self.new_disk_state
6088 for attr in ["master_capable", "vm_capable"]:
6089 val = getattr(self.op, attr)
6091 setattr(node, attr, val)
6092 result.append((attr, str(val)))
6094 if new_role != old_role:
6095 # Tell the node to demote itself, if no longer MC and not offline
6096 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6097 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6099 self.LogWarning("Node failed to demote itself: %s", msg)
6101 new_flags = self._R2F[new_role]
6102 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6104 result.append((desc, str(nf)))
6105 (node.master_candidate, node.drained, node.offline) = new_flags
6107 # we locked all nodes, we adjust the CP before updating this node
6109 _AdjustCandidatePool(self, [node.name])
6111 if self.op.secondary_ip:
6112 node.secondary_ip = self.op.secondary_ip
6113 result.append(("secondary_ip", self.op.secondary_ip))
6115 # this will trigger configuration file update, if needed
6116 self.cfg.Update(node, feedback_fn)
6118 # this will trigger job queue propagation or cleanup if the mc
6120 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6121 self.context.ReaddNode(node)
6126 class LUNodePowercycle(NoHooksLU):
6127 """Powercycles a node.
6132 def CheckArguments(self):
6133 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6134 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6135 raise errors.OpPrereqError("The node is the master and the force"
6136 " parameter was not set",
6139 def ExpandNames(self):
6140 """Locking for PowercycleNode.
6142 This is a last-resort option and shouldn't block on other
6143 jobs. Therefore, we grab no locks.
6146 self.needed_locks = {}
6148 def Exec(self, feedback_fn):
6152 result = self.rpc.call_node_powercycle(self.op.node_name,
6153 self.cfg.GetHypervisorType())
6154 result.Raise("Failed to schedule the reboot")
6155 return result.payload
6158 class LUClusterQuery(NoHooksLU):
6159 """Query cluster configuration.
6164 def ExpandNames(self):
6165 self.needed_locks = {}
6167 def Exec(self, feedback_fn):
6168 """Return cluster config.
6171 cluster = self.cfg.GetClusterInfo()
6174 # Filter just for enabled hypervisors
6175 for os_name, hv_dict in cluster.os_hvp.items():
6176 os_hvp[os_name] = {}
6177 for hv_name, hv_params in hv_dict.items():
6178 if hv_name in cluster.enabled_hypervisors:
6179 os_hvp[os_name][hv_name] = hv_params
6181 # Convert ip_family to ip_version
6182 primary_ip_version = constants.IP4_VERSION
6183 if cluster.primary_ip_family == netutils.IP6Address.family:
6184 primary_ip_version = constants.IP6_VERSION
6187 "software_version": constants.RELEASE_VERSION,
6188 "protocol_version": constants.PROTOCOL_VERSION,
6189 "config_version": constants.CONFIG_VERSION,
6190 "os_api_version": max(constants.OS_API_VERSIONS),
6191 "export_version": constants.EXPORT_VERSION,
6192 "architecture": runtime.GetArchInfo(),
6193 "name": cluster.cluster_name,
6194 "master": cluster.master_node,
6195 "default_hypervisor": cluster.primary_hypervisor,
6196 "enabled_hypervisors": cluster.enabled_hypervisors,
6197 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6198 for hypervisor_name in cluster.enabled_hypervisors]),
6200 "beparams": cluster.beparams,
6201 "osparams": cluster.osparams,
6202 "ipolicy": cluster.ipolicy,
6203 "nicparams": cluster.nicparams,
6204 "ndparams": cluster.ndparams,
6205 "diskparams": cluster.diskparams,
6206 "candidate_pool_size": cluster.candidate_pool_size,
6207 "master_netdev": cluster.master_netdev,
6208 "master_netmask": cluster.master_netmask,
6209 "use_external_mip_script": cluster.use_external_mip_script,
6210 "volume_group_name": cluster.volume_group_name,
6211 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6212 "file_storage_dir": cluster.file_storage_dir,
6213 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6214 "maintain_node_health": cluster.maintain_node_health,
6215 "ctime": cluster.ctime,
6216 "mtime": cluster.mtime,
6217 "uuid": cluster.uuid,
6218 "tags": list(cluster.GetTags()),
6219 "uid_pool": cluster.uid_pool,
6220 "default_iallocator": cluster.default_iallocator,
6221 "reserved_lvs": cluster.reserved_lvs,
6222 "primary_ip_version": primary_ip_version,
6223 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6224 "hidden_os": cluster.hidden_os,
6225 "blacklisted_os": cluster.blacklisted_os,
6231 class LUClusterConfigQuery(NoHooksLU):
6232 """Return configuration values.
6237 def CheckArguments(self):
6238 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6240 def ExpandNames(self):
6241 self.cq.ExpandNames(self)
6243 def DeclareLocks(self, level):
6244 self.cq.DeclareLocks(self, level)
6246 def Exec(self, feedback_fn):
6247 result = self.cq.OldStyleQuery(self)
6249 assert len(result) == 1
6254 class _ClusterQuery(_QueryBase):
6255 FIELDS = query.CLUSTER_FIELDS
6257 #: Do not sort (there is only one item)
6260 def ExpandNames(self, lu):
6261 lu.needed_locks = {}
6263 # The following variables interact with _QueryBase._GetNames
6264 self.wanted = locking.ALL_SET
6265 self.do_locking = self.use_locking
6268 raise errors.OpPrereqError("Can not use locking for cluster queries",
6271 def DeclareLocks(self, lu, level):
6274 def _GetQueryData(self, lu):
6275 """Computes the list of nodes and their attributes.
6278 # Locking is not used
6279 assert not (compat.any(lu.glm.is_owned(level)
6280 for level in locking.LEVELS
6281 if level != locking.LEVEL_CLUSTER) or
6282 self.do_locking or self.use_locking)
6284 if query.CQ_CONFIG in self.requested_data:
6285 cluster = lu.cfg.GetClusterInfo()
6287 cluster = NotImplemented
6289 if query.CQ_QUEUE_DRAINED in self.requested_data:
6290 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6292 drain_flag = NotImplemented
6294 if query.CQ_WATCHER_PAUSE in self.requested_data:
6295 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6297 watcher_pause = NotImplemented
6299 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6302 class LUInstanceActivateDisks(NoHooksLU):
6303 """Bring up an instance's disks.
6308 def ExpandNames(self):
6309 self._ExpandAndLockInstance()
6310 self.needed_locks[locking.LEVEL_NODE] = []
6311 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6313 def DeclareLocks(self, level):
6314 if level == locking.LEVEL_NODE:
6315 self._LockInstancesNodes()
6317 def CheckPrereq(self):
6318 """Check prerequisites.
6320 This checks that the instance is in the cluster.
6323 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6324 assert self.instance is not None, \
6325 "Cannot retrieve locked instance %s" % self.op.instance_name
6326 _CheckNodeOnline(self, self.instance.primary_node)
6328 def Exec(self, feedback_fn):
6329 """Activate the disks.
6332 disks_ok, disks_info = \
6333 _AssembleInstanceDisks(self, self.instance,
6334 ignore_size=self.op.ignore_size)
6336 raise errors.OpExecError("Cannot activate block devices")
6341 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6343 """Prepare the block devices for an instance.
6345 This sets up the block devices on all nodes.
6347 @type lu: L{LogicalUnit}
6348 @param lu: the logical unit on whose behalf we execute
6349 @type instance: L{objects.Instance}
6350 @param instance: the instance for whose disks we assemble
6351 @type disks: list of L{objects.Disk} or None
6352 @param disks: which disks to assemble (or all, if None)
6353 @type ignore_secondaries: boolean
6354 @param ignore_secondaries: if true, errors on secondary nodes
6355 won't result in an error return from the function
6356 @type ignore_size: boolean
6357 @param ignore_size: if true, the current known size of the disk
6358 will not be used during the disk activation, useful for cases
6359 when the size is wrong
6360 @return: False if the operation failed, otherwise a list of
6361 (host, instance_visible_name, node_visible_name)
6362 with the mapping from node devices to instance devices
6367 iname = instance.name
6368 disks = _ExpandCheckDisks(instance, disks)
6370 # With the two passes mechanism we try to reduce the window of
6371 # opportunity for the race condition of switching DRBD to primary
6372 # before handshaking occured, but we do not eliminate it
6374 # The proper fix would be to wait (with some limits) until the
6375 # connection has been made and drbd transitions from WFConnection
6376 # into any other network-connected state (Connected, SyncTarget,
6379 # 1st pass, assemble on all nodes in secondary mode
6380 for idx, inst_disk in enumerate(disks):
6381 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6383 node_disk = node_disk.Copy()
6384 node_disk.UnsetSize()
6385 lu.cfg.SetDiskID(node_disk, node)
6386 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6388 msg = result.fail_msg
6390 is_offline_secondary = (node in instance.secondary_nodes and
6392 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6393 " (is_primary=False, pass=1): %s",
6394 inst_disk.iv_name, node, msg)
6395 if not (ignore_secondaries or is_offline_secondary):
6398 # FIXME: race condition on drbd migration to primary
6400 # 2nd pass, do only the primary node
6401 for idx, inst_disk in enumerate(disks):
6404 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6405 if node != instance.primary_node:
6408 node_disk = node_disk.Copy()
6409 node_disk.UnsetSize()
6410 lu.cfg.SetDiskID(node_disk, node)
6411 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6413 msg = result.fail_msg
6415 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6416 " (is_primary=True, pass=2): %s",
6417 inst_disk.iv_name, node, msg)
6420 dev_path = result.payload
6422 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6424 # leave the disks configured for the primary node
6425 # this is a workaround that would be fixed better by
6426 # improving the logical/physical id handling
6428 lu.cfg.SetDiskID(disk, instance.primary_node)
6430 return disks_ok, device_info
6433 def _StartInstanceDisks(lu, instance, force):
6434 """Start the disks of an instance.
6437 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6438 ignore_secondaries=force)
6440 _ShutdownInstanceDisks(lu, instance)
6441 if force is not None and not force:
6442 lu.proc.LogWarning("", hint="If the message above refers to a"
6444 " you can retry the operation using '--force'.")
6445 raise errors.OpExecError("Disk consistency error")
6448 class LUInstanceDeactivateDisks(NoHooksLU):
6449 """Shutdown an instance's disks.
6454 def ExpandNames(self):
6455 self._ExpandAndLockInstance()
6456 self.needed_locks[locking.LEVEL_NODE] = []
6457 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6459 def DeclareLocks(self, level):
6460 if level == locking.LEVEL_NODE:
6461 self._LockInstancesNodes()
6463 def CheckPrereq(self):
6464 """Check prerequisites.
6466 This checks that the instance is in the cluster.
6469 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6470 assert self.instance is not None, \
6471 "Cannot retrieve locked instance %s" % self.op.instance_name
6473 def Exec(self, feedback_fn):
6474 """Deactivate the disks
6477 instance = self.instance
6479 _ShutdownInstanceDisks(self, instance)
6481 _SafeShutdownInstanceDisks(self, instance)
6484 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6485 """Shutdown block devices of an instance.
6487 This function checks if an instance is running, before calling
6488 _ShutdownInstanceDisks.
6491 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6492 _ShutdownInstanceDisks(lu, instance, disks=disks)
6495 def _ExpandCheckDisks(instance, disks):
6496 """Return the instance disks selected by the disks list
6498 @type disks: list of L{objects.Disk} or None
6499 @param disks: selected disks
6500 @rtype: list of L{objects.Disk}
6501 @return: selected instance disks to act on
6505 return instance.disks
6507 if not set(disks).issubset(instance.disks):
6508 raise errors.ProgrammerError("Can only act on disks belonging to the"
6513 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6514 """Shutdown block devices of an instance.
6516 This does the shutdown on all nodes of the instance.
6518 If the ignore_primary is false, errors on the primary node are
6523 disks = _ExpandCheckDisks(instance, disks)
6526 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6527 lu.cfg.SetDiskID(top_disk, node)
6528 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6529 msg = result.fail_msg
6531 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6532 disk.iv_name, node, msg)
6533 if ((node == instance.primary_node and not ignore_primary) or
6534 (node != instance.primary_node and not result.offline)):
6539 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6540 """Checks if a node has enough free memory.
6542 This function check if a given node has the needed amount of free
6543 memory. In case the node has less memory or we cannot get the
6544 information from the node, this function raise an OpPrereqError
6547 @type lu: C{LogicalUnit}
6548 @param lu: a logical unit from which we get configuration data
6550 @param node: the node to check
6551 @type reason: C{str}
6552 @param reason: string to use in the error message
6553 @type requested: C{int}
6554 @param requested: the amount of memory in MiB to check for
6555 @type hypervisor_name: C{str}
6556 @param hypervisor_name: the hypervisor to ask for memory stats
6558 @return: node current free memory
6559 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6560 we cannot check the node
6563 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6564 nodeinfo[node].Raise("Can't get data from node %s" % node,
6565 prereq=True, ecode=errors.ECODE_ENVIRON)
6566 (_, _, (hv_info, )) = nodeinfo[node].payload
6568 free_mem = hv_info.get("memory_free", None)
6569 if not isinstance(free_mem, int):
6570 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6571 " was '%s'" % (node, free_mem),
6572 errors.ECODE_ENVIRON)
6573 if requested > free_mem:
6574 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6575 " needed %s MiB, available %s MiB" %
6576 (node, reason, requested, free_mem),
6581 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6582 """Checks if nodes have enough free disk space in the all VGs.
6584 This function check if all given nodes have the needed amount of
6585 free disk. In case any node has less disk or we cannot get the
6586 information from the node, this function raise an OpPrereqError
6589 @type lu: C{LogicalUnit}
6590 @param lu: a logical unit from which we get configuration data
6591 @type nodenames: C{list}
6592 @param nodenames: the list of node names to check
6593 @type req_sizes: C{dict}
6594 @param req_sizes: the hash of vg and corresponding amount of disk in
6596 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6597 or we cannot check the node
6600 for vg, req_size in req_sizes.items():
6601 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6604 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6605 """Checks if nodes have enough free disk space in the specified VG.
6607 This function check if all given nodes have the needed amount of
6608 free disk. In case any node has less disk or we cannot get the
6609 information from the node, this function raise an OpPrereqError
6612 @type lu: C{LogicalUnit}
6613 @param lu: a logical unit from which we get configuration data
6614 @type nodenames: C{list}
6615 @param nodenames: the list of node names to check
6617 @param vg: the volume group to check
6618 @type requested: C{int}
6619 @param requested: the amount of disk in MiB to check for
6620 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6621 or we cannot check the node
6624 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6625 for node in nodenames:
6626 info = nodeinfo[node]
6627 info.Raise("Cannot get current information from node %s" % node,
6628 prereq=True, ecode=errors.ECODE_ENVIRON)
6629 (_, (vg_info, ), _) = info.payload
6630 vg_free = vg_info.get("vg_free", None)
6631 if not isinstance(vg_free, int):
6632 raise errors.OpPrereqError("Can't compute free disk space on node"
6633 " %s for vg %s, result was '%s'" %
6634 (node, vg, vg_free), errors.ECODE_ENVIRON)
6635 if requested > vg_free:
6636 raise errors.OpPrereqError("Not enough disk space on target node %s"
6637 " vg %s: required %d MiB, available %d MiB" %
6638 (node, vg, requested, vg_free),
6642 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6643 """Checks if nodes have enough physical CPUs
6645 This function checks if all given nodes have the needed number of
6646 physical CPUs. In case any node has less CPUs or we cannot get the
6647 information from the node, this function raises an OpPrereqError
6650 @type lu: C{LogicalUnit}
6651 @param lu: a logical unit from which we get configuration data
6652 @type nodenames: C{list}
6653 @param nodenames: the list of node names to check
6654 @type requested: C{int}
6655 @param requested: the minimum acceptable number of physical CPUs
6656 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6657 or we cannot check the node
6660 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6661 for node in nodenames:
6662 info = nodeinfo[node]
6663 info.Raise("Cannot get current information from node %s" % node,
6664 prereq=True, ecode=errors.ECODE_ENVIRON)
6665 (_, _, (hv_info, )) = info.payload
6666 num_cpus = hv_info.get("cpu_total", None)
6667 if not isinstance(num_cpus, int):
6668 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6669 " on node %s, result was '%s'" %
6670 (node, num_cpus), errors.ECODE_ENVIRON)
6671 if requested > num_cpus:
6672 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6673 "required" % (node, num_cpus, requested),
6677 class LUInstanceStartup(LogicalUnit):
6678 """Starts an instance.
6681 HPATH = "instance-start"
6682 HTYPE = constants.HTYPE_INSTANCE
6685 def CheckArguments(self):
6687 if self.op.beparams:
6688 # fill the beparams dict
6689 objects.UpgradeBeParams(self.op.beparams)
6690 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6692 def ExpandNames(self):
6693 self._ExpandAndLockInstance()
6694 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6696 def DeclareLocks(self, level):
6697 if level == locking.LEVEL_NODE_RES:
6698 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6700 def BuildHooksEnv(self):
6703 This runs on master, primary and secondary nodes of the instance.
6707 "FORCE": self.op.force,
6710 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6714 def BuildHooksNodes(self):
6715 """Build hooks nodes.
6718 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6721 def CheckPrereq(self):
6722 """Check prerequisites.
6724 This checks that the instance is in the cluster.
6727 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6728 assert self.instance is not None, \
6729 "Cannot retrieve locked instance %s" % self.op.instance_name
6732 if self.op.hvparams:
6733 # check hypervisor parameter syntax (locally)
6734 cluster = self.cfg.GetClusterInfo()
6735 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6736 filled_hvp = cluster.FillHV(instance)
6737 filled_hvp.update(self.op.hvparams)
6738 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6739 hv_type.CheckParameterSyntax(filled_hvp)
6740 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6742 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6744 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6746 if self.primary_offline and self.op.ignore_offline_nodes:
6747 self.proc.LogWarning("Ignoring offline primary node")
6749 if self.op.hvparams or self.op.beparams:
6750 self.proc.LogWarning("Overridden parameters are ignored")
6752 _CheckNodeOnline(self, instance.primary_node)
6754 bep = self.cfg.GetClusterInfo().FillBE(instance)
6755 bep.update(self.op.beparams)
6757 # check bridges existence
6758 _CheckInstanceBridgesExist(self, instance)
6760 remote_info = self.rpc.call_instance_info(instance.primary_node,
6762 instance.hypervisor)
6763 remote_info.Raise("Error checking node %s" % instance.primary_node,
6764 prereq=True, ecode=errors.ECODE_ENVIRON)
6765 if not remote_info.payload: # not running already
6766 _CheckNodeFreeMemory(self, instance.primary_node,
6767 "starting instance %s" % instance.name,
6768 bep[constants.BE_MINMEM], instance.hypervisor)
6770 def Exec(self, feedback_fn):
6771 """Start the instance.
6774 instance = self.instance
6775 force = self.op.force
6777 if not self.op.no_remember:
6778 self.cfg.MarkInstanceUp(instance.name)
6780 if self.primary_offline:
6781 assert self.op.ignore_offline_nodes
6782 self.proc.LogInfo("Primary node offline, marked instance as started")
6784 node_current = instance.primary_node
6786 _StartInstanceDisks(self, instance, force)
6789 self.rpc.call_instance_start(node_current,
6790 (instance, self.op.hvparams,
6792 self.op.startup_paused)
6793 msg = result.fail_msg
6795 _ShutdownInstanceDisks(self, instance)
6796 raise errors.OpExecError("Could not start instance: %s" % msg)
6799 class LUInstanceReboot(LogicalUnit):
6800 """Reboot an instance.
6803 HPATH = "instance-reboot"
6804 HTYPE = constants.HTYPE_INSTANCE
6807 def ExpandNames(self):
6808 self._ExpandAndLockInstance()
6810 def BuildHooksEnv(self):
6813 This runs on master, primary and secondary nodes of the instance.
6817 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6818 "REBOOT_TYPE": self.op.reboot_type,
6819 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6822 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6826 def BuildHooksNodes(self):
6827 """Build hooks nodes.
6830 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6833 def CheckPrereq(self):
6834 """Check prerequisites.
6836 This checks that the instance is in the cluster.
6839 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6840 assert self.instance is not None, \
6841 "Cannot retrieve locked instance %s" % self.op.instance_name
6842 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6843 _CheckNodeOnline(self, instance.primary_node)
6845 # check bridges existence
6846 _CheckInstanceBridgesExist(self, instance)
6848 def Exec(self, feedback_fn):
6849 """Reboot the instance.
6852 instance = self.instance
6853 ignore_secondaries = self.op.ignore_secondaries
6854 reboot_type = self.op.reboot_type
6856 remote_info = self.rpc.call_instance_info(instance.primary_node,
6858 instance.hypervisor)
6859 remote_info.Raise("Error checking node %s" % instance.primary_node)
6860 instance_running = bool(remote_info.payload)
6862 node_current = instance.primary_node
6864 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6865 constants.INSTANCE_REBOOT_HARD]:
6866 for disk in instance.disks:
6867 self.cfg.SetDiskID(disk, node_current)
6868 result = self.rpc.call_instance_reboot(node_current, instance,
6870 self.op.shutdown_timeout)
6871 result.Raise("Could not reboot instance")
6873 if instance_running:
6874 result = self.rpc.call_instance_shutdown(node_current, instance,
6875 self.op.shutdown_timeout)
6876 result.Raise("Could not shutdown instance for full reboot")
6877 _ShutdownInstanceDisks(self, instance)
6879 self.LogInfo("Instance %s was already stopped, starting now",
6881 _StartInstanceDisks(self, instance, ignore_secondaries)
6882 result = self.rpc.call_instance_start(node_current,
6883 (instance, None, None), False)
6884 msg = result.fail_msg
6886 _ShutdownInstanceDisks(self, instance)
6887 raise errors.OpExecError("Could not start instance for"
6888 " full reboot: %s" % msg)
6890 self.cfg.MarkInstanceUp(instance.name)
6893 class LUInstanceShutdown(LogicalUnit):
6894 """Shutdown an instance.
6897 HPATH = "instance-stop"
6898 HTYPE = constants.HTYPE_INSTANCE
6901 def ExpandNames(self):
6902 self._ExpandAndLockInstance()
6904 def BuildHooksEnv(self):
6907 This runs on master, primary and secondary nodes of the instance.
6910 env = _BuildInstanceHookEnvByObject(self, self.instance)
6911 env["TIMEOUT"] = self.op.timeout
6914 def BuildHooksNodes(self):
6915 """Build hooks nodes.
6918 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6921 def CheckPrereq(self):
6922 """Check prerequisites.
6924 This checks that the instance is in the cluster.
6927 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6928 assert self.instance is not None, \
6929 "Cannot retrieve locked instance %s" % self.op.instance_name
6931 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6933 self.primary_offline = \
6934 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6936 if self.primary_offline and self.op.ignore_offline_nodes:
6937 self.proc.LogWarning("Ignoring offline primary node")
6939 _CheckNodeOnline(self, self.instance.primary_node)
6941 def Exec(self, feedback_fn):
6942 """Shutdown the instance.
6945 instance = self.instance
6946 node_current = instance.primary_node
6947 timeout = self.op.timeout
6949 if not self.op.no_remember:
6950 self.cfg.MarkInstanceDown(instance.name)
6952 if self.primary_offline:
6953 assert self.op.ignore_offline_nodes
6954 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6956 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6957 msg = result.fail_msg
6959 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6961 _ShutdownInstanceDisks(self, instance)
6964 class LUInstanceReinstall(LogicalUnit):
6965 """Reinstall an instance.
6968 HPATH = "instance-reinstall"
6969 HTYPE = constants.HTYPE_INSTANCE
6972 def ExpandNames(self):
6973 self._ExpandAndLockInstance()
6975 def BuildHooksEnv(self):
6978 This runs on master, primary and secondary nodes of the instance.
6981 return _BuildInstanceHookEnvByObject(self, self.instance)
6983 def BuildHooksNodes(self):
6984 """Build hooks nodes.
6987 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6990 def CheckPrereq(self):
6991 """Check prerequisites.
6993 This checks that the instance is in the cluster and is not running.
6996 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6997 assert instance is not None, \
6998 "Cannot retrieve locked instance %s" % self.op.instance_name
6999 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7000 " offline, cannot reinstall")
7002 if instance.disk_template == constants.DT_DISKLESS:
7003 raise errors.OpPrereqError("Instance '%s' has no disks" %
7004 self.op.instance_name,
7006 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7008 if self.op.os_type is not None:
7010 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7011 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7012 instance_os = self.op.os_type
7014 instance_os = instance.os
7016 nodelist = list(instance.all_nodes)
7018 if self.op.osparams:
7019 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7020 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7021 self.os_inst = i_osdict # the new dict (without defaults)
7025 self.instance = instance
7027 def Exec(self, feedback_fn):
7028 """Reinstall the instance.
7031 inst = self.instance
7033 if self.op.os_type is not None:
7034 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7035 inst.os = self.op.os_type
7036 # Write to configuration
7037 self.cfg.Update(inst, feedback_fn)
7039 _StartInstanceDisks(self, inst, None)
7041 feedback_fn("Running the instance OS create scripts...")
7042 # FIXME: pass debug option from opcode to backend
7043 result = self.rpc.call_instance_os_add(inst.primary_node,
7044 (inst, self.os_inst), True,
7045 self.op.debug_level)
7046 result.Raise("Could not install OS for instance %s on node %s" %
7047 (inst.name, inst.primary_node))
7049 _ShutdownInstanceDisks(self, inst)
7052 class LUInstanceRecreateDisks(LogicalUnit):
7053 """Recreate an instance's missing disks.
7056 HPATH = "instance-recreate-disks"
7057 HTYPE = constants.HTYPE_INSTANCE
7060 _MODIFYABLE = frozenset([
7061 constants.IDISK_SIZE,
7062 constants.IDISK_MODE,
7065 # New or changed disk parameters may have different semantics
7066 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7067 constants.IDISK_ADOPT,
7069 # TODO: Implement support changing VG while recreating
7071 constants.IDISK_METAVG,
7074 def CheckArguments(self):
7075 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7076 # Normalize and convert deprecated list of disk indices
7077 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7079 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7081 raise errors.OpPrereqError("Some disks have been specified more than"
7082 " once: %s" % utils.CommaJoin(duplicates),
7085 for (idx, params) in self.op.disks:
7086 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7087 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7089 raise errors.OpPrereqError("Parameters for disk %s try to change"
7090 " unmodifyable parameter(s): %s" %
7091 (idx, utils.CommaJoin(unsupported)),
7094 def ExpandNames(self):
7095 self._ExpandAndLockInstance()
7096 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7098 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7099 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7101 self.needed_locks[locking.LEVEL_NODE] = []
7102 self.needed_locks[locking.LEVEL_NODE_RES] = []
7104 def DeclareLocks(self, level):
7105 if level == locking.LEVEL_NODE:
7106 # if we replace the nodes, we only need to lock the old primary,
7107 # otherwise we need to lock all nodes for disk re-creation
7108 primary_only = bool(self.op.nodes)
7109 self._LockInstancesNodes(primary_only=primary_only)
7110 elif level == locking.LEVEL_NODE_RES:
7112 self.needed_locks[locking.LEVEL_NODE_RES] = \
7113 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7115 def BuildHooksEnv(self):
7118 This runs on master, primary and secondary nodes of the instance.
7121 return _BuildInstanceHookEnvByObject(self, self.instance)
7123 def BuildHooksNodes(self):
7124 """Build hooks nodes.
7127 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7130 def CheckPrereq(self):
7131 """Check prerequisites.
7133 This checks that the instance is in the cluster and is not running.
7136 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7137 assert instance is not None, \
7138 "Cannot retrieve locked instance %s" % self.op.instance_name
7140 if len(self.op.nodes) != len(instance.all_nodes):
7141 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7142 " %d replacement nodes were specified" %
7143 (instance.name, len(instance.all_nodes),
7144 len(self.op.nodes)),
7146 assert instance.disk_template != constants.DT_DRBD8 or \
7147 len(self.op.nodes) == 2
7148 assert instance.disk_template != constants.DT_PLAIN or \
7149 len(self.op.nodes) == 1
7150 primary_node = self.op.nodes[0]
7152 primary_node = instance.primary_node
7153 _CheckNodeOnline(self, primary_node)
7155 if instance.disk_template == constants.DT_DISKLESS:
7156 raise errors.OpPrereqError("Instance '%s' has no disks" %
7157 self.op.instance_name, errors.ECODE_INVAL)
7159 # if we replace nodes *and* the old primary is offline, we don't
7161 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7162 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7163 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7164 if not (self.op.nodes and old_pnode.offline):
7165 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7166 msg="cannot recreate disks")
7169 self.disks = dict(self.op.disks)
7171 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7173 maxidx = max(self.disks.keys())
7174 if maxidx >= len(instance.disks):
7175 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7178 if (self.op.nodes and
7179 sorted(self.disks.keys()) != range(len(instance.disks))):
7180 raise errors.OpPrereqError("Can't recreate disks partially and"
7181 " change the nodes at the same time",
7184 self.instance = instance
7186 def Exec(self, feedback_fn):
7187 """Recreate the disks.
7190 instance = self.instance
7192 assert (self.owned_locks(locking.LEVEL_NODE) ==
7193 self.owned_locks(locking.LEVEL_NODE_RES))
7196 mods = [] # keeps track of needed changes
7198 for idx, disk in enumerate(instance.disks):
7200 changes = self.disks[idx]
7202 # Disk should not be recreated
7206 # update secondaries for disks, if needed
7207 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7208 # need to update the nodes and minors
7209 assert len(self.op.nodes) == 2
7210 assert len(disk.logical_id) == 6 # otherwise disk internals
7212 (_, _, old_port, _, _, old_secret) = disk.logical_id
7213 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7214 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7215 new_minors[0], new_minors[1], old_secret)
7216 assert len(disk.logical_id) == len(new_id)
7220 mods.append((idx, new_id, changes))
7222 # now that we have passed all asserts above, we can apply the mods
7223 # in a single run (to avoid partial changes)
7224 for idx, new_id, changes in mods:
7225 disk = instance.disks[idx]
7226 if new_id is not None:
7227 assert disk.dev_type == constants.LD_DRBD8
7228 disk.logical_id = new_id
7230 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7231 mode=changes.get(constants.IDISK_MODE, None))
7233 # change primary node, if needed
7235 instance.primary_node = self.op.nodes[0]
7236 self.LogWarning("Changing the instance's nodes, you will have to"
7237 " remove any disks left on the older nodes manually")
7240 self.cfg.Update(instance, feedback_fn)
7242 _CreateDisks(self, instance, to_skip=to_skip)
7245 class LUInstanceRename(LogicalUnit):
7246 """Rename an instance.
7249 HPATH = "instance-rename"
7250 HTYPE = constants.HTYPE_INSTANCE
7252 def CheckArguments(self):
7256 if self.op.ip_check and not self.op.name_check:
7257 # TODO: make the ip check more flexible and not depend on the name check
7258 raise errors.OpPrereqError("IP address check requires a name check",
7261 def BuildHooksEnv(self):
7264 This runs on master, primary and secondary nodes of the instance.
7267 env = _BuildInstanceHookEnvByObject(self, self.instance)
7268 env["INSTANCE_NEW_NAME"] = self.op.new_name
7271 def BuildHooksNodes(self):
7272 """Build hooks nodes.
7275 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7278 def CheckPrereq(self):
7279 """Check prerequisites.
7281 This checks that the instance is in the cluster and is not running.
7284 self.op.instance_name = _ExpandInstanceName(self.cfg,
7285 self.op.instance_name)
7286 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7287 assert instance is not None
7288 _CheckNodeOnline(self, instance.primary_node)
7289 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7290 msg="cannot rename")
7291 self.instance = instance
7293 new_name = self.op.new_name
7294 if self.op.name_check:
7295 hostname = netutils.GetHostname(name=new_name)
7296 if hostname.name != new_name:
7297 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7299 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7300 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7301 " same as given hostname '%s'") %
7302 (hostname.name, self.op.new_name),
7304 new_name = self.op.new_name = hostname.name
7305 if (self.op.ip_check and
7306 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7307 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7308 (hostname.ip, new_name),
7309 errors.ECODE_NOTUNIQUE)
7311 instance_list = self.cfg.GetInstanceList()
7312 if new_name in instance_list and new_name != instance.name:
7313 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7314 new_name, errors.ECODE_EXISTS)
7316 def Exec(self, feedback_fn):
7317 """Rename the instance.
7320 inst = self.instance
7321 old_name = inst.name
7323 rename_file_storage = False
7324 if (inst.disk_template in constants.DTS_FILEBASED and
7325 self.op.new_name != inst.name):
7326 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7327 rename_file_storage = True
7329 self.cfg.RenameInstance(inst.name, self.op.new_name)
7330 # Change the instance lock. This is definitely safe while we hold the BGL.
7331 # Otherwise the new lock would have to be added in acquired mode.
7333 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7334 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7336 # re-read the instance from the configuration after rename
7337 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7339 if rename_file_storage:
7340 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7341 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7342 old_file_storage_dir,
7343 new_file_storage_dir)
7344 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7345 " (but the instance has been renamed in Ganeti)" %
7346 (inst.primary_node, old_file_storage_dir,
7347 new_file_storage_dir))
7349 _StartInstanceDisks(self, inst, None)
7351 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7352 old_name, self.op.debug_level)
7353 msg = result.fail_msg
7355 msg = ("Could not run OS rename script for instance %s on node %s"
7356 " (but the instance has been renamed in Ganeti): %s" %
7357 (inst.name, inst.primary_node, msg))
7358 self.proc.LogWarning(msg)
7360 _ShutdownInstanceDisks(self, inst)
7365 class LUInstanceRemove(LogicalUnit):
7366 """Remove an instance.
7369 HPATH = "instance-remove"
7370 HTYPE = constants.HTYPE_INSTANCE
7373 def ExpandNames(self):
7374 self._ExpandAndLockInstance()
7375 self.needed_locks[locking.LEVEL_NODE] = []
7376 self.needed_locks[locking.LEVEL_NODE_RES] = []
7377 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7379 def DeclareLocks(self, level):
7380 if level == locking.LEVEL_NODE:
7381 self._LockInstancesNodes()
7382 elif level == locking.LEVEL_NODE_RES:
7384 self.needed_locks[locking.LEVEL_NODE_RES] = \
7385 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7387 def BuildHooksEnv(self):
7390 This runs on master, primary and secondary nodes of the instance.
7393 env = _BuildInstanceHookEnvByObject(self, self.instance)
7394 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7397 def BuildHooksNodes(self):
7398 """Build hooks nodes.
7401 nl = [self.cfg.GetMasterNode()]
7402 nl_post = list(self.instance.all_nodes) + nl
7403 return (nl, nl_post)
7405 def CheckPrereq(self):
7406 """Check prerequisites.
7408 This checks that the instance is in the cluster.
7411 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7412 assert self.instance is not None, \
7413 "Cannot retrieve locked instance %s" % self.op.instance_name
7415 def Exec(self, feedback_fn):
7416 """Remove the instance.
7419 instance = self.instance
7420 logging.info("Shutting down instance %s on node %s",
7421 instance.name, instance.primary_node)
7423 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7424 self.op.shutdown_timeout)
7425 msg = result.fail_msg
7427 if self.op.ignore_failures:
7428 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7430 raise errors.OpExecError("Could not shutdown instance %s on"
7432 (instance.name, instance.primary_node, msg))
7434 assert (self.owned_locks(locking.LEVEL_NODE) ==
7435 self.owned_locks(locking.LEVEL_NODE_RES))
7436 assert not (set(instance.all_nodes) -
7437 self.owned_locks(locking.LEVEL_NODE)), \
7438 "Not owning correct locks"
7440 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7443 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7444 """Utility function to remove an instance.
7447 logging.info("Removing block devices for instance %s", instance.name)
7449 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7450 if not ignore_failures:
7451 raise errors.OpExecError("Can't remove instance's disks")
7452 feedback_fn("Warning: can't remove instance's disks")
7454 logging.info("Removing instance %s out of cluster config", instance.name)
7456 lu.cfg.RemoveInstance(instance.name)
7458 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7459 "Instance lock removal conflict"
7461 # Remove lock for the instance
7462 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7465 class LUInstanceQuery(NoHooksLU):
7466 """Logical unit for querying instances.
7469 # pylint: disable=W0142
7472 def CheckArguments(self):
7473 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7474 self.op.output_fields, self.op.use_locking)
7476 def ExpandNames(self):
7477 self.iq.ExpandNames(self)
7479 def DeclareLocks(self, level):
7480 self.iq.DeclareLocks(self, level)
7482 def Exec(self, feedback_fn):
7483 return self.iq.OldStyleQuery(self)
7486 class LUInstanceFailover(LogicalUnit):
7487 """Failover an instance.
7490 HPATH = "instance-failover"
7491 HTYPE = constants.HTYPE_INSTANCE
7494 def CheckArguments(self):
7495 """Check the arguments.
7498 self.iallocator = getattr(self.op, "iallocator", None)
7499 self.target_node = getattr(self.op, "target_node", None)
7501 def ExpandNames(self):
7502 self._ExpandAndLockInstance()
7504 if self.op.target_node is not None:
7505 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7507 self.needed_locks[locking.LEVEL_NODE] = []
7508 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7510 self.needed_locks[locking.LEVEL_NODE_RES] = []
7511 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7513 ignore_consistency = self.op.ignore_consistency
7514 shutdown_timeout = self.op.shutdown_timeout
7515 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7518 ignore_consistency=ignore_consistency,
7519 shutdown_timeout=shutdown_timeout,
7520 ignore_ipolicy=self.op.ignore_ipolicy)
7521 self.tasklets = [self._migrater]
7523 def DeclareLocks(self, level):
7524 if level == locking.LEVEL_NODE:
7525 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7526 if instance.disk_template in constants.DTS_EXT_MIRROR:
7527 if self.op.target_node is None:
7528 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7530 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7531 self.op.target_node]
7532 del self.recalculate_locks[locking.LEVEL_NODE]
7534 self._LockInstancesNodes()
7535 elif level == locking.LEVEL_NODE_RES:
7537 self.needed_locks[locking.LEVEL_NODE_RES] = \
7538 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7540 def BuildHooksEnv(self):
7543 This runs on master, primary and secondary nodes of the instance.
7546 instance = self._migrater.instance
7547 source_node = instance.primary_node
7548 target_node = self.op.target_node
7550 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7551 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7552 "OLD_PRIMARY": source_node,
7553 "NEW_PRIMARY": target_node,
7556 if instance.disk_template in constants.DTS_INT_MIRROR:
7557 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7558 env["NEW_SECONDARY"] = source_node
7560 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7562 env.update(_BuildInstanceHookEnvByObject(self, instance))
7566 def BuildHooksNodes(self):
7567 """Build hooks nodes.
7570 instance = self._migrater.instance
7571 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7572 return (nl, nl + [instance.primary_node])
7575 class LUInstanceMigrate(LogicalUnit):
7576 """Migrate an instance.
7578 This is migration without shutting down, compared to the failover,
7579 which is done with shutdown.
7582 HPATH = "instance-migrate"
7583 HTYPE = constants.HTYPE_INSTANCE
7586 def ExpandNames(self):
7587 self._ExpandAndLockInstance()
7589 if self.op.target_node is not None:
7590 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7592 self.needed_locks[locking.LEVEL_NODE] = []
7593 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7595 self.needed_locks[locking.LEVEL_NODE] = []
7596 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7599 TLMigrateInstance(self, self.op.instance_name,
7600 cleanup=self.op.cleanup,
7602 fallback=self.op.allow_failover,
7603 allow_runtime_changes=self.op.allow_runtime_changes,
7604 ignore_ipolicy=self.op.ignore_ipolicy)
7605 self.tasklets = [self._migrater]
7607 def DeclareLocks(self, level):
7608 if level == locking.LEVEL_NODE:
7609 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7610 if instance.disk_template in constants.DTS_EXT_MIRROR:
7611 if self.op.target_node is None:
7612 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7614 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7615 self.op.target_node]
7616 del self.recalculate_locks[locking.LEVEL_NODE]
7618 self._LockInstancesNodes()
7619 elif level == locking.LEVEL_NODE_RES:
7621 self.needed_locks[locking.LEVEL_NODE_RES] = \
7622 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7624 def BuildHooksEnv(self):
7627 This runs on master, primary and secondary nodes of the instance.
7630 instance = self._migrater.instance
7631 source_node = instance.primary_node
7632 target_node = self.op.target_node
7633 env = _BuildInstanceHookEnvByObject(self, instance)
7635 "MIGRATE_LIVE": self._migrater.live,
7636 "MIGRATE_CLEANUP": self.op.cleanup,
7637 "OLD_PRIMARY": source_node,
7638 "NEW_PRIMARY": target_node,
7639 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7642 if instance.disk_template in constants.DTS_INT_MIRROR:
7643 env["OLD_SECONDARY"] = target_node
7644 env["NEW_SECONDARY"] = source_node
7646 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7650 def BuildHooksNodes(self):
7651 """Build hooks nodes.
7654 instance = self._migrater.instance
7655 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7656 return (nl, nl + [instance.primary_node])
7659 class LUInstanceMove(LogicalUnit):
7660 """Move an instance by data-copying.
7663 HPATH = "instance-move"
7664 HTYPE = constants.HTYPE_INSTANCE
7667 def ExpandNames(self):
7668 self._ExpandAndLockInstance()
7669 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7670 self.op.target_node = target_node
7671 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7672 self.needed_locks[locking.LEVEL_NODE_RES] = []
7673 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7675 def DeclareLocks(self, level):
7676 if level == locking.LEVEL_NODE:
7677 self._LockInstancesNodes(primary_only=True)
7678 elif level == locking.LEVEL_NODE_RES:
7680 self.needed_locks[locking.LEVEL_NODE_RES] = \
7681 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7683 def BuildHooksEnv(self):
7686 This runs on master, primary and secondary nodes of the instance.
7690 "TARGET_NODE": self.op.target_node,
7691 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7693 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7696 def BuildHooksNodes(self):
7697 """Build hooks nodes.
7701 self.cfg.GetMasterNode(),
7702 self.instance.primary_node,
7703 self.op.target_node,
7707 def CheckPrereq(self):
7708 """Check prerequisites.
7710 This checks that the instance is in the cluster.
7713 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7714 assert self.instance is not None, \
7715 "Cannot retrieve locked instance %s" % self.op.instance_name
7717 node = self.cfg.GetNodeInfo(self.op.target_node)
7718 assert node is not None, \
7719 "Cannot retrieve locked node %s" % self.op.target_node
7721 self.target_node = target_node = node.name
7723 if target_node == instance.primary_node:
7724 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7725 (instance.name, target_node),
7728 bep = self.cfg.GetClusterInfo().FillBE(instance)
7730 for idx, dsk in enumerate(instance.disks):
7731 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7732 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7733 " cannot copy" % idx, errors.ECODE_STATE)
7735 _CheckNodeOnline(self, target_node)
7736 _CheckNodeNotDrained(self, target_node)
7737 _CheckNodeVmCapable(self, target_node)
7738 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7739 self.cfg.GetNodeGroup(node.group))
7740 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7741 ignore=self.op.ignore_ipolicy)
7743 if instance.admin_state == constants.ADMINST_UP:
7744 # check memory requirements on the secondary node
7745 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7746 instance.name, bep[constants.BE_MAXMEM],
7747 instance.hypervisor)
7749 self.LogInfo("Not checking memory on the secondary node as"
7750 " instance will not be started")
7752 # check bridge existance
7753 _CheckInstanceBridgesExist(self, instance, node=target_node)
7755 def Exec(self, feedback_fn):
7756 """Move an instance.
7758 The move is done by shutting it down on its present node, copying
7759 the data over (slow) and starting it on the new node.
7762 instance = self.instance
7764 source_node = instance.primary_node
7765 target_node = self.target_node
7767 self.LogInfo("Shutting down instance %s on source node %s",
7768 instance.name, source_node)
7770 assert (self.owned_locks(locking.LEVEL_NODE) ==
7771 self.owned_locks(locking.LEVEL_NODE_RES))
7773 result = self.rpc.call_instance_shutdown(source_node, instance,
7774 self.op.shutdown_timeout)
7775 msg = result.fail_msg
7777 if self.op.ignore_consistency:
7778 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7779 " Proceeding anyway. Please make sure node"
7780 " %s is down. Error details: %s",
7781 instance.name, source_node, source_node, msg)
7783 raise errors.OpExecError("Could not shutdown instance %s on"
7785 (instance.name, source_node, msg))
7787 # create the target disks
7789 _CreateDisks(self, instance, target_node=target_node)
7790 except errors.OpExecError:
7791 self.LogWarning("Device creation failed, reverting...")
7793 _RemoveDisks(self, instance, target_node=target_node)
7795 self.cfg.ReleaseDRBDMinors(instance.name)
7798 cluster_name = self.cfg.GetClusterInfo().cluster_name
7801 # activate, get path, copy the data over
7802 for idx, disk in enumerate(instance.disks):
7803 self.LogInfo("Copying data for disk %d", idx)
7804 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7805 instance.name, True, idx)
7807 self.LogWarning("Can't assemble newly created disk %d: %s",
7808 idx, result.fail_msg)
7809 errs.append(result.fail_msg)
7811 dev_path = result.payload
7812 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7813 target_node, dev_path,
7816 self.LogWarning("Can't copy data over for disk %d: %s",
7817 idx, result.fail_msg)
7818 errs.append(result.fail_msg)
7822 self.LogWarning("Some disks failed to copy, aborting")
7824 _RemoveDisks(self, instance, target_node=target_node)
7826 self.cfg.ReleaseDRBDMinors(instance.name)
7827 raise errors.OpExecError("Errors during disk copy: %s" %
7830 instance.primary_node = target_node
7831 self.cfg.Update(instance, feedback_fn)
7833 self.LogInfo("Removing the disks on the original node")
7834 _RemoveDisks(self, instance, target_node=source_node)
7836 # Only start the instance if it's marked as up
7837 if instance.admin_state == constants.ADMINST_UP:
7838 self.LogInfo("Starting instance %s on node %s",
7839 instance.name, target_node)
7841 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7842 ignore_secondaries=True)
7844 _ShutdownInstanceDisks(self, instance)
7845 raise errors.OpExecError("Can't activate the instance's disks")
7847 result = self.rpc.call_instance_start(target_node,
7848 (instance, None, None), False)
7849 msg = result.fail_msg
7851 _ShutdownInstanceDisks(self, instance)
7852 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7853 (instance.name, target_node, msg))
7856 class LUNodeMigrate(LogicalUnit):
7857 """Migrate all instances from a node.
7860 HPATH = "node-migrate"
7861 HTYPE = constants.HTYPE_NODE
7864 def CheckArguments(self):
7867 def ExpandNames(self):
7868 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7870 self.share_locks = _ShareAll()
7871 self.needed_locks = {
7872 locking.LEVEL_NODE: [self.op.node_name],
7875 def BuildHooksEnv(self):
7878 This runs on the master, the primary and all the secondaries.
7882 "NODE_NAME": self.op.node_name,
7883 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7886 def BuildHooksNodes(self):
7887 """Build hooks nodes.
7890 nl = [self.cfg.GetMasterNode()]
7893 def CheckPrereq(self):
7896 def Exec(self, feedback_fn):
7897 # Prepare jobs for migration instances
7898 allow_runtime_changes = self.op.allow_runtime_changes
7900 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7903 iallocator=self.op.iallocator,
7904 target_node=self.op.target_node,
7905 allow_runtime_changes=allow_runtime_changes,
7906 ignore_ipolicy=self.op.ignore_ipolicy)]
7907 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7910 # TODO: Run iallocator in this opcode and pass correct placement options to
7911 # OpInstanceMigrate. Since other jobs can modify the cluster between
7912 # running the iallocator and the actual migration, a good consistency model
7913 # will have to be found.
7915 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7916 frozenset([self.op.node_name]))
7918 return ResultWithJobs(jobs)
7921 class TLMigrateInstance(Tasklet):
7922 """Tasklet class for instance migration.
7925 @ivar live: whether the migration will be done live or non-live;
7926 this variable is initalized only after CheckPrereq has run
7927 @type cleanup: boolean
7928 @ivar cleanup: Wheater we cleanup from a failed migration
7929 @type iallocator: string
7930 @ivar iallocator: The iallocator used to determine target_node
7931 @type target_node: string
7932 @ivar target_node: If given, the target_node to reallocate the instance to
7933 @type failover: boolean
7934 @ivar failover: Whether operation results in failover or migration
7935 @type fallback: boolean
7936 @ivar fallback: Whether fallback to failover is allowed if migration not
7938 @type ignore_consistency: boolean
7939 @ivar ignore_consistency: Wheter we should ignore consistency between source
7941 @type shutdown_timeout: int
7942 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7943 @type ignore_ipolicy: bool
7944 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7949 _MIGRATION_POLL_INTERVAL = 1 # seconds
7950 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7952 def __init__(self, lu, instance_name, cleanup=False,
7953 failover=False, fallback=False,
7954 ignore_consistency=False,
7955 allow_runtime_changes=True,
7956 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7957 ignore_ipolicy=False):
7958 """Initializes this class.
7961 Tasklet.__init__(self, lu)
7964 self.instance_name = instance_name
7965 self.cleanup = cleanup
7966 self.live = False # will be overridden later
7967 self.failover = failover
7968 self.fallback = fallback
7969 self.ignore_consistency = ignore_consistency
7970 self.shutdown_timeout = shutdown_timeout
7971 self.ignore_ipolicy = ignore_ipolicy
7972 self.allow_runtime_changes = allow_runtime_changes
7974 def CheckPrereq(self):
7975 """Check prerequisites.
7977 This checks that the instance is in the cluster.
7980 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7981 instance = self.cfg.GetInstanceInfo(instance_name)
7982 assert instance is not None
7983 self.instance = instance
7984 cluster = self.cfg.GetClusterInfo()
7986 if (not self.cleanup and
7987 not instance.admin_state == constants.ADMINST_UP and
7988 not self.failover and self.fallback):
7989 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7990 " switching to failover")
7991 self.failover = True
7993 if instance.disk_template not in constants.DTS_MIRRORED:
7998 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7999 " %s" % (instance.disk_template, text),
8002 if instance.disk_template in constants.DTS_EXT_MIRROR:
8003 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8005 if self.lu.op.iallocator:
8006 self._RunAllocator()
8008 # We set set self.target_node as it is required by
8010 self.target_node = self.lu.op.target_node
8012 # Check that the target node is correct in terms of instance policy
8013 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8014 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8015 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8016 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8017 ignore=self.ignore_ipolicy)
8019 # self.target_node is already populated, either directly or by the
8021 target_node = self.target_node
8022 if self.target_node == instance.primary_node:
8023 raise errors.OpPrereqError("Cannot migrate instance %s"
8024 " to its primary (%s)" %
8025 (instance.name, instance.primary_node))
8027 if len(self.lu.tasklets) == 1:
8028 # It is safe to release locks only when we're the only tasklet
8030 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8031 keep=[instance.primary_node, self.target_node])
8034 secondary_nodes = instance.secondary_nodes
8035 if not secondary_nodes:
8036 raise errors.ConfigurationError("No secondary node but using"
8037 " %s disk template" %
8038 instance.disk_template)
8039 target_node = secondary_nodes[0]
8040 if self.lu.op.iallocator or (self.lu.op.target_node and
8041 self.lu.op.target_node != target_node):
8043 text = "failed over"
8046 raise errors.OpPrereqError("Instances with disk template %s cannot"
8047 " be %s to arbitrary nodes"
8048 " (neither an iallocator nor a target"
8049 " node can be passed)" %
8050 (instance.disk_template, text),
8052 nodeinfo = self.cfg.GetNodeInfo(target_node)
8053 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8054 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8055 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8056 ignore=self.ignore_ipolicy)
8058 i_be = cluster.FillBE(instance)
8060 # check memory requirements on the secondary node
8061 if (not self.cleanup and
8062 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8063 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8064 "migrating instance %s" %
8066 i_be[constants.BE_MINMEM],
8067 instance.hypervisor)
8069 self.lu.LogInfo("Not checking memory on the secondary node as"
8070 " instance will not be started")
8072 # check if failover must be forced instead of migration
8073 if (not self.cleanup and not self.failover and
8074 i_be[constants.BE_ALWAYS_FAILOVER]):
8076 self.lu.LogInfo("Instance configured to always failover; fallback"
8078 self.failover = True
8080 raise errors.OpPrereqError("This instance has been configured to"
8081 " always failover, please allow failover",
8084 # check bridge existance
8085 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8087 if not self.cleanup:
8088 _CheckNodeNotDrained(self.lu, target_node)
8089 if not self.failover:
8090 result = self.rpc.call_instance_migratable(instance.primary_node,
8092 if result.fail_msg and self.fallback:
8093 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8095 self.failover = True
8097 result.Raise("Can't migrate, please use failover",
8098 prereq=True, ecode=errors.ECODE_STATE)
8100 assert not (self.failover and self.cleanup)
8102 if not self.failover:
8103 if self.lu.op.live is not None and self.lu.op.mode is not None:
8104 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8105 " parameters are accepted",
8107 if self.lu.op.live is not None:
8109 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8111 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8112 # reset the 'live' parameter to None so that repeated
8113 # invocations of CheckPrereq do not raise an exception
8114 self.lu.op.live = None
8115 elif self.lu.op.mode is None:
8116 # read the default value from the hypervisor
8117 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8118 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8120 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8122 # Failover is never live
8125 if not (self.failover or self.cleanup):
8126 remote_info = self.rpc.call_instance_info(instance.primary_node,
8128 instance.hypervisor)
8129 remote_info.Raise("Error checking instance on node %s" %
8130 instance.primary_node)
8131 instance_running = bool(remote_info.payload)
8132 if instance_running:
8133 self.current_mem = int(remote_info.payload["memory"])
8135 def _RunAllocator(self):
8136 """Run the allocator based on input opcode.
8139 # FIXME: add a self.ignore_ipolicy option
8140 ial = IAllocator(self.cfg, self.rpc,
8141 mode=constants.IALLOCATOR_MODE_RELOC,
8142 name=self.instance_name,
8143 relocate_from=[self.instance.primary_node],
8146 ial.Run(self.lu.op.iallocator)
8149 raise errors.OpPrereqError("Can't compute nodes using"
8150 " iallocator '%s': %s" %
8151 (self.lu.op.iallocator, ial.info),
8153 if len(ial.result) != ial.required_nodes:
8154 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8155 " of nodes (%s), required %s" %
8156 (self.lu.op.iallocator, len(ial.result),
8157 ial.required_nodes), errors.ECODE_FAULT)
8158 self.target_node = ial.result[0]
8159 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8160 self.instance_name, self.lu.op.iallocator,
8161 utils.CommaJoin(ial.result))
8163 def _WaitUntilSync(self):
8164 """Poll with custom rpc for disk sync.
8166 This uses our own step-based rpc call.
8169 self.feedback_fn("* wait until resync is done")
8173 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8175 (self.instance.disks,
8178 for node, nres in result.items():
8179 nres.Raise("Cannot resync disks on node %s" % node)
8180 node_done, node_percent = nres.payload
8181 all_done = all_done and node_done
8182 if node_percent is not None:
8183 min_percent = min(min_percent, node_percent)
8185 if min_percent < 100:
8186 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8189 def _EnsureSecondary(self, node):
8190 """Demote a node to secondary.
8193 self.feedback_fn("* switching node %s to secondary mode" % node)
8195 for dev in self.instance.disks:
8196 self.cfg.SetDiskID(dev, node)
8198 result = self.rpc.call_blockdev_close(node, self.instance.name,
8199 self.instance.disks)
8200 result.Raise("Cannot change disk to secondary on node %s" % node)
8202 def _GoStandalone(self):
8203 """Disconnect from the network.
8206 self.feedback_fn("* changing into standalone mode")
8207 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8208 self.instance.disks)
8209 for node, nres in result.items():
8210 nres.Raise("Cannot disconnect disks node %s" % node)
8212 def _GoReconnect(self, multimaster):
8213 """Reconnect to the network.
8219 msg = "single-master"
8220 self.feedback_fn("* changing disks into %s mode" % msg)
8221 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8222 (self.instance.disks, self.instance),
8223 self.instance.name, multimaster)
8224 for node, nres in result.items():
8225 nres.Raise("Cannot change disks config on node %s" % node)
8227 def _ExecCleanup(self):
8228 """Try to cleanup after a failed migration.
8230 The cleanup is done by:
8231 - check that the instance is running only on one node
8232 (and update the config if needed)
8233 - change disks on its secondary node to secondary
8234 - wait until disks are fully synchronized
8235 - disconnect from the network
8236 - change disks into single-master mode
8237 - wait again until disks are fully synchronized
8240 instance = self.instance
8241 target_node = self.target_node
8242 source_node = self.source_node
8244 # check running on only one node
8245 self.feedback_fn("* checking where the instance actually runs"
8246 " (if this hangs, the hypervisor might be in"
8248 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8249 for node, result in ins_l.items():
8250 result.Raise("Can't contact node %s" % node)
8252 runningon_source = instance.name in ins_l[source_node].payload
8253 runningon_target = instance.name in ins_l[target_node].payload
8255 if runningon_source and runningon_target:
8256 raise errors.OpExecError("Instance seems to be running on two nodes,"
8257 " or the hypervisor is confused; you will have"
8258 " to ensure manually that it runs only on one"
8259 " and restart this operation")
8261 if not (runningon_source or runningon_target):
8262 raise errors.OpExecError("Instance does not seem to be running at all;"
8263 " in this case it's safer to repair by"
8264 " running 'gnt-instance stop' to ensure disk"
8265 " shutdown, and then restarting it")
8267 if runningon_target:
8268 # the migration has actually succeeded, we need to update the config
8269 self.feedback_fn("* instance running on secondary node (%s),"
8270 " updating config" % target_node)
8271 instance.primary_node = target_node
8272 self.cfg.Update(instance, self.feedback_fn)
8273 demoted_node = source_node
8275 self.feedback_fn("* instance confirmed to be running on its"
8276 " primary node (%s)" % source_node)
8277 demoted_node = target_node
8279 if instance.disk_template in constants.DTS_INT_MIRROR:
8280 self._EnsureSecondary(demoted_node)
8282 self._WaitUntilSync()
8283 except errors.OpExecError:
8284 # we ignore here errors, since if the device is standalone, it
8285 # won't be able to sync
8287 self._GoStandalone()
8288 self._GoReconnect(False)
8289 self._WaitUntilSync()
8291 self.feedback_fn("* done")
8293 def _RevertDiskStatus(self):
8294 """Try to revert the disk status after a failed migration.
8297 target_node = self.target_node
8298 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8302 self._EnsureSecondary(target_node)
8303 self._GoStandalone()
8304 self._GoReconnect(False)
8305 self._WaitUntilSync()
8306 except errors.OpExecError, err:
8307 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8308 " please try to recover the instance manually;"
8309 " error '%s'" % str(err))
8311 def _AbortMigration(self):
8312 """Call the hypervisor code to abort a started migration.
8315 instance = self.instance
8316 target_node = self.target_node
8317 source_node = self.source_node
8318 migration_info = self.migration_info
8320 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8324 abort_msg = abort_result.fail_msg
8326 logging.error("Aborting migration failed on target node %s: %s",
8327 target_node, abort_msg)
8328 # Don't raise an exception here, as we stil have to try to revert the
8329 # disk status, even if this step failed.
8331 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8332 instance, False, self.live)
8333 abort_msg = abort_result.fail_msg
8335 logging.error("Aborting migration failed on source node %s: %s",
8336 source_node, abort_msg)
8338 def _ExecMigration(self):
8339 """Migrate an instance.
8341 The migrate is done by:
8342 - change the disks into dual-master mode
8343 - wait until disks are fully synchronized again
8344 - migrate the instance
8345 - change disks on the new secondary node (the old primary) to secondary
8346 - wait until disks are fully synchronized
8347 - change disks into single-master mode
8350 instance = self.instance
8351 target_node = self.target_node
8352 source_node = self.source_node
8354 # Check for hypervisor version mismatch and warn the user.
8355 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8356 None, [self.instance.hypervisor])
8357 for ninfo in nodeinfo.values():
8358 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8360 (_, _, (src_info, )) = nodeinfo[source_node].payload
8361 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8363 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8364 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8365 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8366 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8367 if src_version != dst_version:
8368 self.feedback_fn("* warning: hypervisor version mismatch between"
8369 " source (%s) and target (%s) node" %
8370 (src_version, dst_version))
8372 self.feedback_fn("* checking disk consistency between source and target")
8373 for (idx, dev) in enumerate(instance.disks):
8374 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8375 raise errors.OpExecError("Disk %s is degraded or not fully"
8376 " synchronized on target node,"
8377 " aborting migration" % idx)
8379 if self.current_mem > self.tgt_free_mem:
8380 if not self.allow_runtime_changes:
8381 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8382 " free memory to fit instance %s on target"
8383 " node %s (have %dMB, need %dMB)" %
8384 (instance.name, target_node,
8385 self.tgt_free_mem, self.current_mem))
8386 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8387 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8390 rpcres.Raise("Cannot modify instance runtime memory")
8392 # First get the migration information from the remote node
8393 result = self.rpc.call_migration_info(source_node, instance)
8394 msg = result.fail_msg
8396 log_err = ("Failed fetching source migration information from %s: %s" %
8398 logging.error(log_err)
8399 raise errors.OpExecError(log_err)
8401 self.migration_info = migration_info = result.payload
8403 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8404 # Then switch the disks to master/master mode
8405 self._EnsureSecondary(target_node)
8406 self._GoStandalone()
8407 self._GoReconnect(True)
8408 self._WaitUntilSync()
8410 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8411 result = self.rpc.call_accept_instance(target_node,
8414 self.nodes_ip[target_node])
8416 msg = result.fail_msg
8418 logging.error("Instance pre-migration failed, trying to revert"
8419 " disk status: %s", msg)
8420 self.feedback_fn("Pre-migration failed, aborting")
8421 self._AbortMigration()
8422 self._RevertDiskStatus()
8423 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8424 (instance.name, msg))
8426 self.feedback_fn("* migrating instance to %s" % target_node)
8427 result = self.rpc.call_instance_migrate(source_node, instance,
8428 self.nodes_ip[target_node],
8430 msg = result.fail_msg
8432 logging.error("Instance migration failed, trying to revert"
8433 " disk status: %s", msg)
8434 self.feedback_fn("Migration failed, aborting")
8435 self._AbortMigration()
8436 self._RevertDiskStatus()
8437 raise errors.OpExecError("Could not migrate instance %s: %s" %
8438 (instance.name, msg))
8440 self.feedback_fn("* starting memory transfer")
8441 last_feedback = time.time()
8443 result = self.rpc.call_instance_get_migration_status(source_node,
8445 msg = result.fail_msg
8446 ms = result.payload # MigrationStatus instance
8447 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8448 logging.error("Instance migration failed, trying to revert"
8449 " disk status: %s", msg)
8450 self.feedback_fn("Migration failed, aborting")
8451 self._AbortMigration()
8452 self._RevertDiskStatus()
8453 raise errors.OpExecError("Could not migrate instance %s: %s" %
8454 (instance.name, msg))
8456 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8457 self.feedback_fn("* memory transfer complete")
8460 if (utils.TimeoutExpired(last_feedback,
8461 self._MIGRATION_FEEDBACK_INTERVAL) and
8462 ms.transferred_ram is not None):
8463 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8464 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8465 last_feedback = time.time()
8467 time.sleep(self._MIGRATION_POLL_INTERVAL)
8469 result = self.rpc.call_instance_finalize_migration_src(source_node,
8473 msg = result.fail_msg
8475 logging.error("Instance migration succeeded, but finalization failed"
8476 " on the source node: %s", msg)
8477 raise errors.OpExecError("Could not finalize instance migration: %s" %
8480 instance.primary_node = target_node
8482 # distribute new instance config to the other nodes
8483 self.cfg.Update(instance, self.feedback_fn)
8485 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8489 msg = result.fail_msg
8491 logging.error("Instance migration succeeded, but finalization failed"
8492 " on the target node: %s", msg)
8493 raise errors.OpExecError("Could not finalize instance migration: %s" %
8496 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8497 self._EnsureSecondary(source_node)
8498 self._WaitUntilSync()
8499 self._GoStandalone()
8500 self._GoReconnect(False)
8501 self._WaitUntilSync()
8503 # If the instance's disk template is `rbd' and there was a successful
8504 # migration, unmap the device from the source node.
8505 if self.instance.disk_template == constants.DT_RBD:
8506 disks = _ExpandCheckDisks(instance, instance.disks)
8507 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8509 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8510 msg = result.fail_msg
8512 logging.error("Migration was successful, but couldn't unmap the"
8513 " block device %s on source node %s: %s",
8514 disk.iv_name, source_node, msg)
8515 logging.error("You need to unmap the device %s manually on %s",
8516 disk.iv_name, source_node)
8518 self.feedback_fn("* done")
8520 def _ExecFailover(self):
8521 """Failover an instance.
8523 The failover is done by shutting it down on its present node and
8524 starting it on the secondary.
8527 instance = self.instance
8528 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8530 source_node = instance.primary_node
8531 target_node = self.target_node
8533 if instance.admin_state == constants.ADMINST_UP:
8534 self.feedback_fn("* checking disk consistency between source and target")
8535 for (idx, dev) in enumerate(instance.disks):
8536 # for drbd, these are drbd over lvm
8537 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8539 if primary_node.offline:
8540 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8542 (primary_node.name, idx, target_node))
8543 elif not self.ignore_consistency:
8544 raise errors.OpExecError("Disk %s is degraded on target node,"
8545 " aborting failover" % idx)
8547 self.feedback_fn("* not checking disk consistency as instance is not"
8550 self.feedback_fn("* shutting down instance on source node")
8551 logging.info("Shutting down instance %s on node %s",
8552 instance.name, source_node)
8554 result = self.rpc.call_instance_shutdown(source_node, instance,
8555 self.shutdown_timeout)
8556 msg = result.fail_msg
8558 if self.ignore_consistency or primary_node.offline:
8559 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8560 " proceeding anyway; please make sure node"
8561 " %s is down; error details: %s",
8562 instance.name, source_node, source_node, msg)
8564 raise errors.OpExecError("Could not shutdown instance %s on"
8566 (instance.name, source_node, msg))
8568 self.feedback_fn("* deactivating the instance's disks on source node")
8569 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8570 raise errors.OpExecError("Can't shut down the instance's disks")
8572 instance.primary_node = target_node
8573 # distribute new instance config to the other nodes
8574 self.cfg.Update(instance, self.feedback_fn)
8576 # Only start the instance if it's marked as up
8577 if instance.admin_state == constants.ADMINST_UP:
8578 self.feedback_fn("* activating the instance's disks on target node %s" %
8580 logging.info("Starting instance %s on node %s",
8581 instance.name, target_node)
8583 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8584 ignore_secondaries=True)
8586 _ShutdownInstanceDisks(self.lu, instance)
8587 raise errors.OpExecError("Can't activate the instance's disks")
8589 self.feedback_fn("* starting the instance on the target node %s" %
8591 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8593 msg = result.fail_msg
8595 _ShutdownInstanceDisks(self.lu, instance)
8596 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8597 (instance.name, target_node, msg))
8599 def Exec(self, feedback_fn):
8600 """Perform the migration.
8603 self.feedback_fn = feedback_fn
8604 self.source_node = self.instance.primary_node
8606 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8607 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8608 self.target_node = self.instance.secondary_nodes[0]
8609 # Otherwise self.target_node has been populated either
8610 # directly, or through an iallocator.
8612 self.all_nodes = [self.source_node, self.target_node]
8613 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8614 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8617 feedback_fn("Failover instance %s" % self.instance.name)
8618 self._ExecFailover()
8620 feedback_fn("Migrating instance %s" % self.instance.name)
8623 return self._ExecCleanup()
8625 return self._ExecMigration()
8628 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8630 """Wrapper around L{_CreateBlockDevInner}.
8632 This method annotates the root device first.
8635 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8636 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8640 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8642 """Create a tree of block devices on a given node.
8644 If this device type has to be created on secondaries, create it and
8647 If not, just recurse to children keeping the same 'force' value.
8649 @attention: The device has to be annotated already.
8651 @param lu: the lu on whose behalf we execute
8652 @param node: the node on which to create the device
8653 @type instance: L{objects.Instance}
8654 @param instance: the instance which owns the device
8655 @type device: L{objects.Disk}
8656 @param device: the device to create
8657 @type force_create: boolean
8658 @param force_create: whether to force creation of this device; this
8659 will be change to True whenever we find a device which has
8660 CreateOnSecondary() attribute
8661 @param info: the extra 'metadata' we should attach to the device
8662 (this will be represented as a LVM tag)
8663 @type force_open: boolean
8664 @param force_open: this parameter will be passes to the
8665 L{backend.BlockdevCreate} function where it specifies
8666 whether we run on primary or not, and it affects both
8667 the child assembly and the device own Open() execution
8670 if device.CreateOnSecondary():
8674 for child in device.children:
8675 _CreateBlockDevInner(lu, node, instance, child, force_create,
8678 if not force_create:
8681 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8684 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8685 """Create a single block device on a given node.
8687 This will not recurse over children of the device, so they must be
8690 @param lu: the lu on whose behalf we execute
8691 @param node: the node on which to create the device
8692 @type instance: L{objects.Instance}
8693 @param instance: the instance which owns the device
8694 @type device: L{objects.Disk}
8695 @param device: the device to create
8696 @param info: the extra 'metadata' we should attach to the device
8697 (this will be represented as a LVM tag)
8698 @type force_open: boolean
8699 @param force_open: this parameter will be passes to the
8700 L{backend.BlockdevCreate} function where it specifies
8701 whether we run on primary or not, and it affects both
8702 the child assembly and the device own Open() execution
8705 lu.cfg.SetDiskID(device, node)
8706 result = lu.rpc.call_blockdev_create(node, device, device.size,
8707 instance.name, force_open, info)
8708 result.Raise("Can't create block device %s on"
8709 " node %s for instance %s" % (device, node, instance.name))
8710 if device.physical_id is None:
8711 device.physical_id = result.payload
8714 def _GenerateUniqueNames(lu, exts):
8715 """Generate a suitable LV name.
8717 This will generate a logical volume name for the given instance.
8722 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8723 results.append("%s%s" % (new_id, val))
8727 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8728 iv_name, p_minor, s_minor):
8729 """Generate a drbd8 device complete with its children.
8732 assert len(vgnames) == len(names) == 2
8733 port = lu.cfg.AllocatePort()
8734 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8736 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8737 logical_id=(vgnames[0], names[0]),
8739 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8740 logical_id=(vgnames[1], names[1]),
8742 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8743 logical_id=(primary, secondary, port,
8746 children=[dev_data, dev_meta],
8747 iv_name=iv_name, params={})
8751 _DISK_TEMPLATE_NAME_PREFIX = {
8752 constants.DT_PLAIN: "",
8753 constants.DT_RBD: ".rbd",
8757 _DISK_TEMPLATE_DEVICE_TYPE = {
8758 constants.DT_PLAIN: constants.LD_LV,
8759 constants.DT_FILE: constants.LD_FILE,
8760 constants.DT_SHARED_FILE: constants.LD_FILE,
8761 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8762 constants.DT_RBD: constants.LD_RBD,
8766 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8767 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8768 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8769 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8770 """Generate the entire disk layout for a given template type.
8773 #TODO: compute space requirements
8775 vgname = lu.cfg.GetVGName()
8776 disk_count = len(disk_info)
8779 if template_name == constants.DT_DISKLESS:
8781 elif template_name == constants.DT_DRBD8:
8782 if len(secondary_nodes) != 1:
8783 raise errors.ProgrammerError("Wrong template configuration")
8784 remote_node = secondary_nodes[0]
8785 minors = lu.cfg.AllocateDRBDMinor(
8786 [primary_node, remote_node] * len(disk_info), instance_name)
8788 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8790 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8793 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8794 for i in range(disk_count)]):
8795 names.append(lv_prefix + "_data")
8796 names.append(lv_prefix + "_meta")
8797 for idx, disk in enumerate(disk_info):
8798 disk_index = idx + base_index
8799 data_vg = disk.get(constants.IDISK_VG, vgname)
8800 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8801 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8802 disk[constants.IDISK_SIZE],
8804 names[idx * 2:idx * 2 + 2],
8805 "disk/%d" % disk_index,
8806 minors[idx * 2], minors[idx * 2 + 1])
8807 disk_dev.mode = disk[constants.IDISK_MODE]
8808 disks.append(disk_dev)
8811 raise errors.ProgrammerError("Wrong template configuration")
8813 if template_name == constants.DT_FILE:
8815 elif template_name == constants.DT_SHARED_FILE:
8816 _req_shr_file_storage()
8818 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8819 if name_prefix is None:
8822 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8823 (name_prefix, base_index + i)
8824 for i in range(disk_count)])
8826 if template_name == constants.DT_PLAIN:
8827 def logical_id_fn(idx, _, disk):
8828 vg = disk.get(constants.IDISK_VG, vgname)
8829 return (vg, names[idx])
8830 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8832 lambda _, disk_index, disk: (file_driver,
8833 "%s/disk%d" % (file_storage_dir,
8835 elif template_name == constants.DT_BLOCK:
8837 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8838 disk[constants.IDISK_ADOPT])
8839 elif template_name == constants.DT_RBD:
8840 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8842 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8844 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8846 for idx, disk in enumerate(disk_info):
8847 disk_index = idx + base_index
8848 size = disk[constants.IDISK_SIZE]
8849 feedback_fn("* disk %s, size %s" %
8850 (disk_index, utils.FormatUnit(size, "h")))
8851 disks.append(objects.Disk(dev_type=dev_type, size=size,
8852 logical_id=logical_id_fn(idx, disk_index, disk),
8853 iv_name="disk/%d" % disk_index,
8854 mode=disk[constants.IDISK_MODE],
8860 def _GetInstanceInfoText(instance):
8861 """Compute that text that should be added to the disk's metadata.
8864 return "originstname+%s" % instance.name
8867 def _CalcEta(time_taken, written, total_size):
8868 """Calculates the ETA based on size written and total size.
8870 @param time_taken: The time taken so far
8871 @param written: amount written so far
8872 @param total_size: The total size of data to be written
8873 @return: The remaining time in seconds
8876 avg_time = time_taken / float(written)
8877 return (total_size - written) * avg_time
8880 def _WipeDisks(lu, instance):
8881 """Wipes instance disks.
8883 @type lu: L{LogicalUnit}
8884 @param lu: the logical unit on whose behalf we execute
8885 @type instance: L{objects.Instance}
8886 @param instance: the instance whose disks we should create
8887 @return: the success of the wipe
8890 node = instance.primary_node
8892 for device in instance.disks:
8893 lu.cfg.SetDiskID(device, node)
8895 logging.info("Pause sync of instance %s disks", instance.name)
8896 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8897 (instance.disks, instance),
8899 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8901 for idx, success in enumerate(result.payload):
8903 logging.warn("pause-sync of instance %s for disks %d failed",
8907 for idx, device in enumerate(instance.disks):
8908 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8909 # MAX_WIPE_CHUNK at max
8910 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8911 constants.MIN_WIPE_CHUNK_PERCENT)
8912 # we _must_ make this an int, otherwise rounding errors will
8914 wipe_chunk_size = int(wipe_chunk_size)
8916 lu.LogInfo("* Wiping disk %d", idx)
8917 logging.info("Wiping disk %d for instance %s, node %s using"
8918 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8923 start_time = time.time()
8925 while offset < size:
8926 wipe_size = min(wipe_chunk_size, size - offset)
8927 logging.debug("Wiping disk %d, offset %s, chunk %s",
8928 idx, offset, wipe_size)
8929 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8931 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8932 (idx, offset, wipe_size))
8935 if now - last_output >= 60:
8936 eta = _CalcEta(now - start_time, offset, size)
8937 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8938 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8941 logging.info("Resume sync of instance %s disks", instance.name)
8943 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8944 (instance.disks, instance),
8948 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8949 " please have a look at the status and troubleshoot"
8950 " the issue: %s", node, result.fail_msg)
8952 for idx, success in enumerate(result.payload):
8954 lu.LogWarning("Resume sync of disk %d failed, please have a"
8955 " look at the status and troubleshoot the issue", idx)
8956 logging.warn("resume-sync of instance %s for disks %d failed",
8960 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8961 """Create all disks for an instance.
8963 This abstracts away some work from AddInstance.
8965 @type lu: L{LogicalUnit}
8966 @param lu: the logical unit on whose behalf we execute
8967 @type instance: L{objects.Instance}
8968 @param instance: the instance whose disks we should create
8970 @param to_skip: list of indices to skip
8971 @type target_node: string
8972 @param target_node: if passed, overrides the target node for creation
8974 @return: the success of the creation
8977 info = _GetInstanceInfoText(instance)
8978 if target_node is None:
8979 pnode = instance.primary_node
8980 all_nodes = instance.all_nodes
8985 if instance.disk_template in constants.DTS_FILEBASED:
8986 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8987 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8989 result.Raise("Failed to create directory '%s' on"
8990 " node %s" % (file_storage_dir, pnode))
8992 # Note: this needs to be kept in sync with adding of disks in
8993 # LUInstanceSetParams
8994 for idx, device in enumerate(instance.disks):
8995 if to_skip and idx in to_skip:
8997 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8999 for node in all_nodes:
9000 f_create = node == pnode
9001 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9004 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9005 """Remove all disks for an instance.
9007 This abstracts away some work from `AddInstance()` and
9008 `RemoveInstance()`. Note that in case some of the devices couldn't
9009 be removed, the removal will continue with the other ones (compare
9010 with `_CreateDisks()`).
9012 @type lu: L{LogicalUnit}
9013 @param lu: the logical unit on whose behalf we execute
9014 @type instance: L{objects.Instance}
9015 @param instance: the instance whose disks we should remove
9016 @type target_node: string
9017 @param target_node: used to override the node on which to remove the disks
9019 @return: the success of the removal
9022 logging.info("Removing block devices for instance %s", instance.name)
9025 ports_to_release = set()
9026 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9027 for (idx, device) in enumerate(anno_disks):
9029 edata = [(target_node, device)]
9031 edata = device.ComputeNodeTree(instance.primary_node)
9032 for node, disk in edata:
9033 lu.cfg.SetDiskID(disk, node)
9034 result = lu.rpc.call_blockdev_remove(node, disk)
9036 lu.LogWarning("Could not remove disk %s on node %s,"
9037 " continuing anyway: %s", idx, node, result.fail_msg)
9038 if not (result.offline and node != instance.primary_node):
9041 # if this is a DRBD disk, return its port to the pool
9042 if device.dev_type in constants.LDS_DRBD:
9043 ports_to_release.add(device.logical_id[2])
9045 if all_result or ignore_failures:
9046 for port in ports_to_release:
9047 lu.cfg.AddTcpUdpPort(port)
9049 if instance.disk_template == constants.DT_FILE:
9050 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9054 tgt = instance.primary_node
9055 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9057 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9058 file_storage_dir, instance.primary_node, result.fail_msg)
9064 def _ComputeDiskSizePerVG(disk_template, disks):
9065 """Compute disk size requirements in the volume group
9068 def _compute(disks, payload):
9069 """Universal algorithm.
9074 vgs[disk[constants.IDISK_VG]] = \
9075 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9079 # Required free disk space as a function of disk and swap space
9081 constants.DT_DISKLESS: {},
9082 constants.DT_PLAIN: _compute(disks, 0),
9083 # 128 MB are added for drbd metadata for each disk
9084 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9085 constants.DT_FILE: {},
9086 constants.DT_SHARED_FILE: {},
9089 if disk_template not in req_size_dict:
9090 raise errors.ProgrammerError("Disk template '%s' size requirement"
9091 " is unknown" % disk_template)
9093 return req_size_dict[disk_template]
9096 def _ComputeDiskSize(disk_template, disks):
9097 """Compute disk size requirements according to disk template
9100 # Required free disk space as a function of disk and swap space
9102 constants.DT_DISKLESS: None,
9103 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9104 # 128 MB are added for drbd metadata for each disk
9106 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9107 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9108 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9109 constants.DT_BLOCK: 0,
9110 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9113 if disk_template not in req_size_dict:
9114 raise errors.ProgrammerError("Disk template '%s' size requirement"
9115 " is unknown" % disk_template)
9117 return req_size_dict[disk_template]
9120 def _FilterVmNodes(lu, nodenames):
9121 """Filters out non-vm_capable nodes from a list.
9123 @type lu: L{LogicalUnit}
9124 @param lu: the logical unit for which we check
9125 @type nodenames: list
9126 @param nodenames: the list of nodes on which we should check
9128 @return: the list of vm-capable nodes
9131 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9132 return [name for name in nodenames if name not in vm_nodes]
9135 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9136 """Hypervisor parameter validation.
9138 This function abstract the hypervisor parameter validation to be
9139 used in both instance create and instance modify.
9141 @type lu: L{LogicalUnit}
9142 @param lu: the logical unit for which we check
9143 @type nodenames: list
9144 @param nodenames: the list of nodes on which we should check
9145 @type hvname: string
9146 @param hvname: the name of the hypervisor we should use
9147 @type hvparams: dict
9148 @param hvparams: the parameters which we need to check
9149 @raise errors.OpPrereqError: if the parameters are not valid
9152 nodenames = _FilterVmNodes(lu, nodenames)
9154 cluster = lu.cfg.GetClusterInfo()
9155 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9157 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9158 for node in nodenames:
9162 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9165 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9166 """OS parameters validation.
9168 @type lu: L{LogicalUnit}
9169 @param lu: the logical unit for which we check
9170 @type required: boolean
9171 @param required: whether the validation should fail if the OS is not
9173 @type nodenames: list
9174 @param nodenames: the list of nodes on which we should check
9175 @type osname: string
9176 @param osname: the name of the hypervisor we should use
9177 @type osparams: dict
9178 @param osparams: the parameters which we need to check
9179 @raise errors.OpPrereqError: if the parameters are not valid
9182 nodenames = _FilterVmNodes(lu, nodenames)
9183 result = lu.rpc.call_os_validate(nodenames, required, osname,
9184 [constants.OS_VALIDATE_PARAMETERS],
9186 for node, nres in result.items():
9187 # we don't check for offline cases since this should be run only
9188 # against the master node and/or an instance's nodes
9189 nres.Raise("OS Parameters validation failed on node %s" % node)
9190 if not nres.payload:
9191 lu.LogInfo("OS %s not found on node %s, validation skipped",
9195 class LUInstanceCreate(LogicalUnit):
9196 """Create an instance.
9199 HPATH = "instance-add"
9200 HTYPE = constants.HTYPE_INSTANCE
9203 def CheckArguments(self):
9207 # do not require name_check to ease forward/backward compatibility
9209 if self.op.no_install and self.op.start:
9210 self.LogInfo("No-installation mode selected, disabling startup")
9211 self.op.start = False
9212 # validate/normalize the instance name
9213 self.op.instance_name = \
9214 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9216 if self.op.ip_check and not self.op.name_check:
9217 # TODO: make the ip check more flexible and not depend on the name check
9218 raise errors.OpPrereqError("Cannot do IP address check without a name"
9219 " check", errors.ECODE_INVAL)
9221 # check nics' parameter names
9222 for nic in self.op.nics:
9223 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9225 # check disks. parameter names and consistent adopt/no-adopt strategy
9226 has_adopt = has_no_adopt = False
9227 for disk in self.op.disks:
9228 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9229 if constants.IDISK_ADOPT in disk:
9233 if has_adopt and has_no_adopt:
9234 raise errors.OpPrereqError("Either all disks are adopted or none is",
9237 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9238 raise errors.OpPrereqError("Disk adoption is not supported for the"
9239 " '%s' disk template" %
9240 self.op.disk_template,
9242 if self.op.iallocator is not None:
9243 raise errors.OpPrereqError("Disk adoption not allowed with an"
9244 " iallocator script", errors.ECODE_INVAL)
9245 if self.op.mode == constants.INSTANCE_IMPORT:
9246 raise errors.OpPrereqError("Disk adoption not allowed for"
9247 " instance import", errors.ECODE_INVAL)
9249 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9250 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9251 " but no 'adopt' parameter given" %
9252 self.op.disk_template,
9255 self.adopt_disks = has_adopt
9257 # instance name verification
9258 if self.op.name_check:
9259 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9260 self.op.instance_name = self.hostname1.name
9261 # used in CheckPrereq for ip ping check
9262 self.check_ip = self.hostname1.ip
9264 self.check_ip = None
9266 # file storage checks
9267 if (self.op.file_driver and
9268 not self.op.file_driver in constants.FILE_DRIVER):
9269 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9270 self.op.file_driver, errors.ECODE_INVAL)
9272 if self.op.disk_template == constants.DT_FILE:
9273 opcodes.RequireFileStorage()
9274 elif self.op.disk_template == constants.DT_SHARED_FILE:
9275 opcodes.RequireSharedFileStorage()
9277 ### Node/iallocator related checks
9278 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9280 if self.op.pnode is not None:
9281 if self.op.disk_template in constants.DTS_INT_MIRROR:
9282 if self.op.snode is None:
9283 raise errors.OpPrereqError("The networked disk templates need"
9284 " a mirror node", errors.ECODE_INVAL)
9286 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9288 self.op.snode = None
9290 self._cds = _GetClusterDomainSecret()
9292 if self.op.mode == constants.INSTANCE_IMPORT:
9293 # On import force_variant must be True, because if we forced it at
9294 # initial install, our only chance when importing it back is that it
9296 self.op.force_variant = True
9298 if self.op.no_install:
9299 self.LogInfo("No-installation mode has no effect during import")
9301 elif self.op.mode == constants.INSTANCE_CREATE:
9302 if self.op.os_type is None:
9303 raise errors.OpPrereqError("No guest OS specified",
9305 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9306 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9307 " installation" % self.op.os_type,
9309 if self.op.disk_template is None:
9310 raise errors.OpPrereqError("No disk template specified",
9313 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9314 # Check handshake to ensure both clusters have the same domain secret
9315 src_handshake = self.op.source_handshake
9316 if not src_handshake:
9317 raise errors.OpPrereqError("Missing source handshake",
9320 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9323 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9326 # Load and check source CA
9327 self.source_x509_ca_pem = self.op.source_x509_ca
9328 if not self.source_x509_ca_pem:
9329 raise errors.OpPrereqError("Missing source X509 CA",
9333 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9335 except OpenSSL.crypto.Error, err:
9336 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9337 (err, ), errors.ECODE_INVAL)
9339 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9340 if errcode is not None:
9341 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9344 self.source_x509_ca = cert
9346 src_instance_name = self.op.source_instance_name
9347 if not src_instance_name:
9348 raise errors.OpPrereqError("Missing source instance name",
9351 self.source_instance_name = \
9352 netutils.GetHostname(name=src_instance_name).name
9355 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9356 self.op.mode, errors.ECODE_INVAL)
9358 def ExpandNames(self):
9359 """ExpandNames for CreateInstance.
9361 Figure out the right locks for instance creation.
9364 self.needed_locks = {}
9366 instance_name = self.op.instance_name
9367 # this is just a preventive check, but someone might still add this
9368 # instance in the meantime, and creation will fail at lock-add time
9369 if instance_name in self.cfg.GetInstanceList():
9370 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9371 instance_name, errors.ECODE_EXISTS)
9373 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9375 if self.op.iallocator:
9376 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9377 # specifying a group on instance creation and then selecting nodes from
9379 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9380 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9382 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9383 nodelist = [self.op.pnode]
9384 if self.op.snode is not None:
9385 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9386 nodelist.append(self.op.snode)
9387 self.needed_locks[locking.LEVEL_NODE] = nodelist
9388 # Lock resources of instance's primary and secondary nodes (copy to
9389 # prevent accidential modification)
9390 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9392 # in case of import lock the source node too
9393 if self.op.mode == constants.INSTANCE_IMPORT:
9394 src_node = self.op.src_node
9395 src_path = self.op.src_path
9397 if src_path is None:
9398 self.op.src_path = src_path = self.op.instance_name
9400 if src_node is None:
9401 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9402 self.op.src_node = None
9403 if os.path.isabs(src_path):
9404 raise errors.OpPrereqError("Importing an instance from a path"
9405 " requires a source node option",
9408 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9409 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9410 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9411 if not os.path.isabs(src_path):
9412 self.op.src_path = src_path = \
9413 utils.PathJoin(constants.EXPORT_DIR, src_path)
9415 def _RunAllocator(self):
9416 """Run the allocator based on input opcode.
9419 nics = [n.ToDict() for n in self.nics]
9420 ial = IAllocator(self.cfg, self.rpc,
9421 mode=constants.IALLOCATOR_MODE_ALLOC,
9422 name=self.op.instance_name,
9423 disk_template=self.op.disk_template,
9426 vcpus=self.be_full[constants.BE_VCPUS],
9427 memory=self.be_full[constants.BE_MAXMEM],
9428 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9431 hypervisor=self.op.hypervisor,
9434 ial.Run(self.op.iallocator)
9437 raise errors.OpPrereqError("Can't compute nodes using"
9438 " iallocator '%s': %s" %
9439 (self.op.iallocator, ial.info),
9441 if len(ial.result) != ial.required_nodes:
9442 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9443 " of nodes (%s), required %s" %
9444 (self.op.iallocator, len(ial.result),
9445 ial.required_nodes), errors.ECODE_FAULT)
9446 self.op.pnode = ial.result[0]
9447 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9448 self.op.instance_name, self.op.iallocator,
9449 utils.CommaJoin(ial.result))
9450 if ial.required_nodes == 2:
9451 self.op.snode = ial.result[1]
9453 def BuildHooksEnv(self):
9456 This runs on master, primary and secondary nodes of the instance.
9460 "ADD_MODE": self.op.mode,
9462 if self.op.mode == constants.INSTANCE_IMPORT:
9463 env["SRC_NODE"] = self.op.src_node
9464 env["SRC_PATH"] = self.op.src_path
9465 env["SRC_IMAGES"] = self.src_images
9467 env.update(_BuildInstanceHookEnv(
9468 name=self.op.instance_name,
9469 primary_node=self.op.pnode,
9470 secondary_nodes=self.secondaries,
9471 status=self.op.start,
9472 os_type=self.op.os_type,
9473 minmem=self.be_full[constants.BE_MINMEM],
9474 maxmem=self.be_full[constants.BE_MAXMEM],
9475 vcpus=self.be_full[constants.BE_VCPUS],
9476 nics=_NICListToTuple(self, self.nics),
9477 disk_template=self.op.disk_template,
9478 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9479 for d in self.disks],
9482 hypervisor_name=self.op.hypervisor,
9488 def BuildHooksNodes(self):
9489 """Build hooks nodes.
9492 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9495 def _ReadExportInfo(self):
9496 """Reads the export information from disk.
9498 It will override the opcode source node and path with the actual
9499 information, if these two were not specified before.
9501 @return: the export information
9504 assert self.op.mode == constants.INSTANCE_IMPORT
9506 src_node = self.op.src_node
9507 src_path = self.op.src_path
9509 if src_node is None:
9510 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9511 exp_list = self.rpc.call_export_list(locked_nodes)
9513 for node in exp_list:
9514 if exp_list[node].fail_msg:
9516 if src_path in exp_list[node].payload:
9518 self.op.src_node = src_node = node
9519 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9523 raise errors.OpPrereqError("No export found for relative path %s" %
9524 src_path, errors.ECODE_INVAL)
9526 _CheckNodeOnline(self, src_node)
9527 result = self.rpc.call_export_info(src_node, src_path)
9528 result.Raise("No export or invalid export found in dir %s" % src_path)
9530 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9531 if not export_info.has_section(constants.INISECT_EXP):
9532 raise errors.ProgrammerError("Corrupted export config",
9533 errors.ECODE_ENVIRON)
9535 ei_version = export_info.get(constants.INISECT_EXP, "version")
9536 if (int(ei_version) != constants.EXPORT_VERSION):
9537 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9538 (ei_version, constants.EXPORT_VERSION),
9539 errors.ECODE_ENVIRON)
9542 def _ReadExportParams(self, einfo):
9543 """Use export parameters as defaults.
9545 In case the opcode doesn't specify (as in override) some instance
9546 parameters, then try to use them from the export information, if
9550 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9552 if self.op.disk_template is None:
9553 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9554 self.op.disk_template = einfo.get(constants.INISECT_INS,
9556 if self.op.disk_template not in constants.DISK_TEMPLATES:
9557 raise errors.OpPrereqError("Disk template specified in configuration"
9558 " file is not one of the allowed values:"
9559 " %s" % " ".join(constants.DISK_TEMPLATES))
9561 raise errors.OpPrereqError("No disk template specified and the export"
9562 " is missing the disk_template information",
9565 if not self.op.disks:
9567 # TODO: import the disk iv_name too
9568 for idx in range(constants.MAX_DISKS):
9569 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9570 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9571 disks.append({constants.IDISK_SIZE: disk_sz})
9572 self.op.disks = disks
9573 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9574 raise errors.OpPrereqError("No disk info specified and the export"
9575 " is missing the disk information",
9578 if not self.op.nics:
9580 for idx in range(constants.MAX_NICS):
9581 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9583 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9584 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9591 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9592 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9594 if (self.op.hypervisor is None and
9595 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9596 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9598 if einfo.has_section(constants.INISECT_HYP):
9599 # use the export parameters but do not override the ones
9600 # specified by the user
9601 for name, value in einfo.items(constants.INISECT_HYP):
9602 if name not in self.op.hvparams:
9603 self.op.hvparams[name] = value
9605 if einfo.has_section(constants.INISECT_BEP):
9606 # use the parameters, without overriding
9607 for name, value in einfo.items(constants.INISECT_BEP):
9608 if name not in self.op.beparams:
9609 self.op.beparams[name] = value
9610 # Compatibility for the old "memory" be param
9611 if name == constants.BE_MEMORY:
9612 if constants.BE_MAXMEM not in self.op.beparams:
9613 self.op.beparams[constants.BE_MAXMEM] = value
9614 if constants.BE_MINMEM not in self.op.beparams:
9615 self.op.beparams[constants.BE_MINMEM] = value
9617 # try to read the parameters old style, from the main section
9618 for name in constants.BES_PARAMETERS:
9619 if (name not in self.op.beparams and
9620 einfo.has_option(constants.INISECT_INS, name)):
9621 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9623 if einfo.has_section(constants.INISECT_OSP):
9624 # use the parameters, without overriding
9625 for name, value in einfo.items(constants.INISECT_OSP):
9626 if name not in self.op.osparams:
9627 self.op.osparams[name] = value
9629 def _RevertToDefaults(self, cluster):
9630 """Revert the instance parameters to the default values.
9634 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9635 for name in self.op.hvparams.keys():
9636 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9637 del self.op.hvparams[name]
9639 be_defs = cluster.SimpleFillBE({})
9640 for name in self.op.beparams.keys():
9641 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9642 del self.op.beparams[name]
9644 nic_defs = cluster.SimpleFillNIC({})
9645 for nic in self.op.nics:
9646 for name in constants.NICS_PARAMETERS:
9647 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9650 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9651 for name in self.op.osparams.keys():
9652 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9653 del self.op.osparams[name]
9655 def _CalculateFileStorageDir(self):
9656 """Calculate final instance file storage dir.
9659 # file storage dir calculation/check
9660 self.instance_file_storage_dir = None
9661 if self.op.disk_template in constants.DTS_FILEBASED:
9662 # build the full file storage dir path
9665 if self.op.disk_template == constants.DT_SHARED_FILE:
9666 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9668 get_fsd_fn = self.cfg.GetFileStorageDir
9670 cfg_storagedir = get_fsd_fn()
9671 if not cfg_storagedir:
9672 raise errors.OpPrereqError("Cluster file storage dir not defined")
9673 joinargs.append(cfg_storagedir)
9675 if self.op.file_storage_dir is not None:
9676 joinargs.append(self.op.file_storage_dir)
9678 joinargs.append(self.op.instance_name)
9680 # pylint: disable=W0142
9681 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9683 def CheckPrereq(self): # pylint: disable=R0914
9684 """Check prerequisites.
9687 self._CalculateFileStorageDir()
9689 if self.op.mode == constants.INSTANCE_IMPORT:
9690 export_info = self._ReadExportInfo()
9691 self._ReadExportParams(export_info)
9692 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9694 self._old_instance_name = None
9696 if (not self.cfg.GetVGName() and
9697 self.op.disk_template not in constants.DTS_NOT_LVM):
9698 raise errors.OpPrereqError("Cluster does not support lvm-based"
9699 " instances", errors.ECODE_STATE)
9701 if (self.op.hypervisor is None or
9702 self.op.hypervisor == constants.VALUE_AUTO):
9703 self.op.hypervisor = self.cfg.GetHypervisorType()
9705 cluster = self.cfg.GetClusterInfo()
9706 enabled_hvs = cluster.enabled_hypervisors
9707 if self.op.hypervisor not in enabled_hvs:
9708 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9709 " cluster (%s)" % (self.op.hypervisor,
9710 ",".join(enabled_hvs)),
9713 # Check tag validity
9714 for tag in self.op.tags:
9715 objects.TaggableObject.ValidateTag(tag)
9717 # check hypervisor parameter syntax (locally)
9718 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9719 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9721 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9722 hv_type.CheckParameterSyntax(filled_hvp)
9723 self.hv_full = filled_hvp
9724 # check that we don't specify global parameters on an instance
9725 _CheckGlobalHvParams(self.op.hvparams)
9727 # fill and remember the beparams dict
9728 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9729 for param, value in self.op.beparams.iteritems():
9730 if value == constants.VALUE_AUTO:
9731 self.op.beparams[param] = default_beparams[param]
9732 objects.UpgradeBeParams(self.op.beparams)
9733 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9734 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9736 # build os parameters
9737 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9739 # now that hvp/bep are in final format, let's reset to defaults,
9741 if self.op.identify_defaults:
9742 self._RevertToDefaults(cluster)
9746 for idx, nic in enumerate(self.op.nics):
9747 nic_mode_req = nic.get(constants.INIC_MODE, None)
9748 nic_mode = nic_mode_req
9749 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9750 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9752 # in routed mode, for the first nic, the default ip is 'auto'
9753 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9754 default_ip_mode = constants.VALUE_AUTO
9756 default_ip_mode = constants.VALUE_NONE
9758 # ip validity checks
9759 ip = nic.get(constants.INIC_IP, default_ip_mode)
9760 if ip is None or ip.lower() == constants.VALUE_NONE:
9762 elif ip.lower() == constants.VALUE_AUTO:
9763 if not self.op.name_check:
9764 raise errors.OpPrereqError("IP address set to auto but name checks"
9765 " have been skipped",
9767 nic_ip = self.hostname1.ip
9769 if not netutils.IPAddress.IsValid(ip):
9770 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9774 # TODO: check the ip address for uniqueness
9775 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9776 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9779 # MAC address verification
9780 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9781 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9782 mac = utils.NormalizeAndValidateMac(mac)
9785 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9786 except errors.ReservationError:
9787 raise errors.OpPrereqError("MAC address %s already in use"
9788 " in cluster" % mac,
9789 errors.ECODE_NOTUNIQUE)
9791 # Build nic parameters
9792 link = nic.get(constants.INIC_LINK, None)
9793 if link == constants.VALUE_AUTO:
9794 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9797 nicparams[constants.NIC_MODE] = nic_mode
9799 nicparams[constants.NIC_LINK] = link
9801 check_params = cluster.SimpleFillNIC(nicparams)
9802 objects.NIC.CheckParameterSyntax(check_params)
9803 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9805 # disk checks/pre-build
9806 default_vg = self.cfg.GetVGName()
9808 for disk in self.op.disks:
9809 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9810 if mode not in constants.DISK_ACCESS_SET:
9811 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9812 mode, errors.ECODE_INVAL)
9813 size = disk.get(constants.IDISK_SIZE, None)
9815 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9818 except (TypeError, ValueError):
9819 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9822 data_vg = disk.get(constants.IDISK_VG, default_vg)
9824 constants.IDISK_SIZE: size,
9825 constants.IDISK_MODE: mode,
9826 constants.IDISK_VG: data_vg,
9828 if constants.IDISK_METAVG in disk:
9829 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9830 if constants.IDISK_ADOPT in disk:
9831 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9832 self.disks.append(new_disk)
9834 if self.op.mode == constants.INSTANCE_IMPORT:
9836 for idx in range(len(self.disks)):
9837 option = "disk%d_dump" % idx
9838 if export_info.has_option(constants.INISECT_INS, option):
9839 # FIXME: are the old os-es, disk sizes, etc. useful?
9840 export_name = export_info.get(constants.INISECT_INS, option)
9841 image = utils.PathJoin(self.op.src_path, export_name)
9842 disk_images.append(image)
9844 disk_images.append(False)
9846 self.src_images = disk_images
9848 if self.op.instance_name == self._old_instance_name:
9849 for idx, nic in enumerate(self.nics):
9850 if nic.mac == constants.VALUE_AUTO:
9851 nic_mac_ini = "nic%d_mac" % idx
9852 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9854 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9856 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9857 if self.op.ip_check:
9858 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9859 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9860 (self.check_ip, self.op.instance_name),
9861 errors.ECODE_NOTUNIQUE)
9863 #### mac address generation
9864 # By generating here the mac address both the allocator and the hooks get
9865 # the real final mac address rather than the 'auto' or 'generate' value.
9866 # There is a race condition between the generation and the instance object
9867 # creation, which means that we know the mac is valid now, but we're not
9868 # sure it will be when we actually add the instance. If things go bad
9869 # adding the instance will abort because of a duplicate mac, and the
9870 # creation job will fail.
9871 for nic in self.nics:
9872 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9873 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9877 if self.op.iallocator is not None:
9878 self._RunAllocator()
9880 # Release all unneeded node locks
9881 _ReleaseLocks(self, locking.LEVEL_NODE,
9882 keep=filter(None, [self.op.pnode, self.op.snode,
9884 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9885 keep=filter(None, [self.op.pnode, self.op.snode,
9888 #### node related checks
9890 # check primary node
9891 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9892 assert self.pnode is not None, \
9893 "Cannot retrieve locked node %s" % self.op.pnode
9895 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9896 pnode.name, errors.ECODE_STATE)
9898 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9899 pnode.name, errors.ECODE_STATE)
9900 if not pnode.vm_capable:
9901 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9902 " '%s'" % pnode.name, errors.ECODE_STATE)
9904 self.secondaries = []
9906 # mirror node verification
9907 if self.op.disk_template in constants.DTS_INT_MIRROR:
9908 if self.op.snode == pnode.name:
9909 raise errors.OpPrereqError("The secondary node cannot be the"
9910 " primary node", errors.ECODE_INVAL)
9911 _CheckNodeOnline(self, self.op.snode)
9912 _CheckNodeNotDrained(self, self.op.snode)
9913 _CheckNodeVmCapable(self, self.op.snode)
9914 self.secondaries.append(self.op.snode)
9916 snode = self.cfg.GetNodeInfo(self.op.snode)
9917 if pnode.group != snode.group:
9918 self.LogWarning("The primary and secondary nodes are in two"
9919 " different node groups; the disk parameters"
9920 " from the first disk's node group will be"
9923 nodenames = [pnode.name] + self.secondaries
9925 # Verify instance specs
9926 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9928 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9929 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9930 constants.ISPEC_DISK_COUNT: len(self.disks),
9931 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9932 constants.ISPEC_NIC_COUNT: len(self.nics),
9933 constants.ISPEC_SPINDLE_USE: spindle_use,
9936 group_info = self.cfg.GetNodeGroup(pnode.group)
9937 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9938 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9939 if not self.op.ignore_ipolicy and res:
9940 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9941 " policy: %s") % (pnode.group,
9942 utils.CommaJoin(res)),
9945 if not self.adopt_disks:
9946 if self.op.disk_template == constants.DT_RBD:
9947 # _CheckRADOSFreeSpace() is just a placeholder.
9948 # Any function that checks prerequisites can be placed here.
9949 # Check if there is enough space on the RADOS cluster.
9950 _CheckRADOSFreeSpace()
9952 # Check lv size requirements, if not adopting
9953 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9954 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9956 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9957 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9958 disk[constants.IDISK_ADOPT])
9959 for disk in self.disks])
9960 if len(all_lvs) != len(self.disks):
9961 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9963 for lv_name in all_lvs:
9965 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9966 # to ReserveLV uses the same syntax
9967 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9968 except errors.ReservationError:
9969 raise errors.OpPrereqError("LV named %s used by another instance" %
9970 lv_name, errors.ECODE_NOTUNIQUE)
9972 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9973 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9975 node_lvs = self.rpc.call_lv_list([pnode.name],
9976 vg_names.payload.keys())[pnode.name]
9977 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9978 node_lvs = node_lvs.payload
9980 delta = all_lvs.difference(node_lvs.keys())
9982 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9983 utils.CommaJoin(delta),
9985 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9987 raise errors.OpPrereqError("Online logical volumes found, cannot"
9988 " adopt: %s" % utils.CommaJoin(online_lvs),
9990 # update the size of disk based on what is found
9991 for dsk in self.disks:
9992 dsk[constants.IDISK_SIZE] = \
9993 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9994 dsk[constants.IDISK_ADOPT])][0]))
9996 elif self.op.disk_template == constants.DT_BLOCK:
9997 # Normalize and de-duplicate device paths
9998 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9999 for disk in self.disks])
10000 if len(all_disks) != len(self.disks):
10001 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10002 errors.ECODE_INVAL)
10003 baddisks = [d for d in all_disks
10004 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10006 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10007 " cannot be adopted" %
10008 (", ".join(baddisks),
10009 constants.ADOPTABLE_BLOCKDEV_ROOT),
10010 errors.ECODE_INVAL)
10012 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10013 list(all_disks))[pnode.name]
10014 node_disks.Raise("Cannot get block device information from node %s" %
10016 node_disks = node_disks.payload
10017 delta = all_disks.difference(node_disks.keys())
10019 raise errors.OpPrereqError("Missing block device(s): %s" %
10020 utils.CommaJoin(delta),
10021 errors.ECODE_INVAL)
10022 for dsk in self.disks:
10023 dsk[constants.IDISK_SIZE] = \
10024 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10026 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10028 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10029 # check OS parameters (remotely)
10030 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10032 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10034 # memory check on primary node
10035 #TODO(dynmem): use MINMEM for checking
10037 _CheckNodeFreeMemory(self, self.pnode.name,
10038 "creating instance %s" % self.op.instance_name,
10039 self.be_full[constants.BE_MAXMEM],
10040 self.op.hypervisor)
10042 self.dry_run_result = list(nodenames)
10044 def Exec(self, feedback_fn):
10045 """Create and add the instance to the cluster.
10048 instance = self.op.instance_name
10049 pnode_name = self.pnode.name
10051 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10052 self.owned_locks(locking.LEVEL_NODE)), \
10053 "Node locks differ from node resource locks"
10055 ht_kind = self.op.hypervisor
10056 if ht_kind in constants.HTS_REQ_PORT:
10057 network_port = self.cfg.AllocatePort()
10059 network_port = None
10061 # This is ugly but we got a chicken-egg problem here
10062 # We can only take the group disk parameters, as the instance
10063 # has no disks yet (we are generating them right here).
10064 node = self.cfg.GetNodeInfo(pnode_name)
10065 nodegroup = self.cfg.GetNodeGroup(node.group)
10066 disks = _GenerateDiskTemplate(self,
10067 self.op.disk_template,
10068 instance, pnode_name,
10071 self.instance_file_storage_dir,
10072 self.op.file_driver,
10075 self.cfg.GetGroupDiskParams(nodegroup))
10077 iobj = objects.Instance(name=instance, os=self.op.os_type,
10078 primary_node=pnode_name,
10079 nics=self.nics, disks=disks,
10080 disk_template=self.op.disk_template,
10081 admin_state=constants.ADMINST_DOWN,
10082 network_port=network_port,
10083 beparams=self.op.beparams,
10084 hvparams=self.op.hvparams,
10085 hypervisor=self.op.hypervisor,
10086 osparams=self.op.osparams,
10090 for tag in self.op.tags:
10093 if self.adopt_disks:
10094 if self.op.disk_template == constants.DT_PLAIN:
10095 # rename LVs to the newly-generated names; we need to construct
10096 # 'fake' LV disks with the old data, plus the new unique_id
10097 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10099 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10100 rename_to.append(t_dsk.logical_id)
10101 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10102 self.cfg.SetDiskID(t_dsk, pnode_name)
10103 result = self.rpc.call_blockdev_rename(pnode_name,
10104 zip(tmp_disks, rename_to))
10105 result.Raise("Failed to rename adoped LVs")
10107 feedback_fn("* creating instance disks...")
10109 _CreateDisks(self, iobj)
10110 except errors.OpExecError:
10111 self.LogWarning("Device creation failed, reverting...")
10113 _RemoveDisks(self, iobj)
10115 self.cfg.ReleaseDRBDMinors(instance)
10118 feedback_fn("adding instance %s to cluster config" % instance)
10120 self.cfg.AddInstance(iobj, self.proc.GetECId())
10122 # Declare that we don't want to remove the instance lock anymore, as we've
10123 # added the instance to the config
10124 del self.remove_locks[locking.LEVEL_INSTANCE]
10126 if self.op.mode == constants.INSTANCE_IMPORT:
10127 # Release unused nodes
10128 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10130 # Release all nodes
10131 _ReleaseLocks(self, locking.LEVEL_NODE)
10134 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10135 feedback_fn("* wiping instance disks...")
10137 _WipeDisks(self, iobj)
10138 except errors.OpExecError, err:
10139 logging.exception("Wiping disks failed")
10140 self.LogWarning("Wiping instance disks failed (%s)", err)
10144 # Something is already wrong with the disks, don't do anything else
10146 elif self.op.wait_for_sync:
10147 disk_abort = not _WaitForSync(self, iobj)
10148 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10149 # make sure the disks are not degraded (still sync-ing is ok)
10150 feedback_fn("* checking mirrors status")
10151 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10156 _RemoveDisks(self, iobj)
10157 self.cfg.RemoveInstance(iobj.name)
10158 # Make sure the instance lock gets removed
10159 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10160 raise errors.OpExecError("There are some degraded disks for"
10163 # Release all node resource locks
10164 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10166 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10167 # we need to set the disks ID to the primary node, since the
10168 # preceding code might or might have not done it, depending on
10169 # disk template and other options
10170 for disk in iobj.disks:
10171 self.cfg.SetDiskID(disk, pnode_name)
10172 if self.op.mode == constants.INSTANCE_CREATE:
10173 if not self.op.no_install:
10174 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10175 not self.op.wait_for_sync)
10177 feedback_fn("* pausing disk sync to install instance OS")
10178 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10181 for idx, success in enumerate(result.payload):
10183 logging.warn("pause-sync of instance %s for disk %d failed",
10186 feedback_fn("* running the instance OS create scripts...")
10187 # FIXME: pass debug option from opcode to backend
10189 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10190 self.op.debug_level)
10192 feedback_fn("* resuming disk sync")
10193 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10196 for idx, success in enumerate(result.payload):
10198 logging.warn("resume-sync of instance %s for disk %d failed",
10201 os_add_result.Raise("Could not add os for instance %s"
10202 " on node %s" % (instance, pnode_name))
10205 if self.op.mode == constants.INSTANCE_IMPORT:
10206 feedback_fn("* running the instance OS import scripts...")
10210 for idx, image in enumerate(self.src_images):
10214 # FIXME: pass debug option from opcode to backend
10215 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10216 constants.IEIO_FILE, (image, ),
10217 constants.IEIO_SCRIPT,
10218 (iobj.disks[idx], idx),
10220 transfers.append(dt)
10223 masterd.instance.TransferInstanceData(self, feedback_fn,
10224 self.op.src_node, pnode_name,
10225 self.pnode.secondary_ip,
10227 if not compat.all(import_result):
10228 self.LogWarning("Some disks for instance %s on node %s were not"
10229 " imported successfully" % (instance, pnode_name))
10231 rename_from = self._old_instance_name
10233 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10234 feedback_fn("* preparing remote import...")
10235 # The source cluster will stop the instance before attempting to make
10236 # a connection. In some cases stopping an instance can take a long
10237 # time, hence the shutdown timeout is added to the connection
10239 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10240 self.op.source_shutdown_timeout)
10241 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10243 assert iobj.primary_node == self.pnode.name
10245 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10246 self.source_x509_ca,
10247 self._cds, timeouts)
10248 if not compat.all(disk_results):
10249 # TODO: Should the instance still be started, even if some disks
10250 # failed to import (valid for local imports, too)?
10251 self.LogWarning("Some disks for instance %s on node %s were not"
10252 " imported successfully" % (instance, pnode_name))
10254 rename_from = self.source_instance_name
10257 # also checked in the prereq part
10258 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10261 # Run rename script on newly imported instance
10262 assert iobj.name == instance
10263 feedback_fn("Running rename script for %s" % instance)
10264 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10266 self.op.debug_level)
10267 if result.fail_msg:
10268 self.LogWarning("Failed to run rename script for %s on node"
10269 " %s: %s" % (instance, pnode_name, result.fail_msg))
10271 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10274 iobj.admin_state = constants.ADMINST_UP
10275 self.cfg.Update(iobj, feedback_fn)
10276 logging.info("Starting instance %s on node %s", instance, pnode_name)
10277 feedback_fn("* starting instance...")
10278 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10280 result.Raise("Could not start instance")
10282 return list(iobj.all_nodes)
10285 def _CheckRADOSFreeSpace():
10286 """Compute disk size requirements inside the RADOS cluster.
10289 # For the RADOS cluster we assume there is always enough space.
10293 class LUInstanceConsole(NoHooksLU):
10294 """Connect to an instance's console.
10296 This is somewhat special in that it returns the command line that
10297 you need to run on the master node in order to connect to the
10303 def ExpandNames(self):
10304 self.share_locks = _ShareAll()
10305 self._ExpandAndLockInstance()
10307 def CheckPrereq(self):
10308 """Check prerequisites.
10310 This checks that the instance is in the cluster.
10313 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10314 assert self.instance is not None, \
10315 "Cannot retrieve locked instance %s" % self.op.instance_name
10316 _CheckNodeOnline(self, self.instance.primary_node)
10318 def Exec(self, feedback_fn):
10319 """Connect to the console of an instance
10322 instance = self.instance
10323 node = instance.primary_node
10325 node_insts = self.rpc.call_instance_list([node],
10326 [instance.hypervisor])[node]
10327 node_insts.Raise("Can't get node information from %s" % node)
10329 if instance.name not in node_insts.payload:
10330 if instance.admin_state == constants.ADMINST_UP:
10331 state = constants.INSTST_ERRORDOWN
10332 elif instance.admin_state == constants.ADMINST_DOWN:
10333 state = constants.INSTST_ADMINDOWN
10335 state = constants.INSTST_ADMINOFFLINE
10336 raise errors.OpExecError("Instance %s is not running (state %s)" %
10337 (instance.name, state))
10339 logging.debug("Connecting to console of %s on %s", instance.name, node)
10341 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10344 def _GetInstanceConsole(cluster, instance):
10345 """Returns console information for an instance.
10347 @type cluster: L{objects.Cluster}
10348 @type instance: L{objects.Instance}
10352 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10353 # beparams and hvparams are passed separately, to avoid editing the
10354 # instance and then saving the defaults in the instance itself.
10355 hvparams = cluster.FillHV(instance)
10356 beparams = cluster.FillBE(instance)
10357 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10359 assert console.instance == instance.name
10360 assert console.Validate()
10362 return console.ToDict()
10365 class LUInstanceReplaceDisks(LogicalUnit):
10366 """Replace the disks of an instance.
10369 HPATH = "mirrors-replace"
10370 HTYPE = constants.HTYPE_INSTANCE
10373 def CheckArguments(self):
10374 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10375 self.op.iallocator)
10377 def ExpandNames(self):
10378 self._ExpandAndLockInstance()
10380 assert locking.LEVEL_NODE not in self.needed_locks
10381 assert locking.LEVEL_NODE_RES not in self.needed_locks
10382 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10384 assert self.op.iallocator is None or self.op.remote_node is None, \
10385 "Conflicting options"
10387 if self.op.remote_node is not None:
10388 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10390 # Warning: do not remove the locking of the new secondary here
10391 # unless DRBD8.AddChildren is changed to work in parallel;
10392 # currently it doesn't since parallel invocations of
10393 # FindUnusedMinor will conflict
10394 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10395 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10397 self.needed_locks[locking.LEVEL_NODE] = []
10398 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10400 if self.op.iallocator is not None:
10401 # iallocator will select a new node in the same group
10402 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10404 self.needed_locks[locking.LEVEL_NODE_RES] = []
10406 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10407 self.op.iallocator, self.op.remote_node,
10408 self.op.disks, False, self.op.early_release,
10409 self.op.ignore_ipolicy)
10411 self.tasklets = [self.replacer]
10413 def DeclareLocks(self, level):
10414 if level == locking.LEVEL_NODEGROUP:
10415 assert self.op.remote_node is None
10416 assert self.op.iallocator is not None
10417 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10419 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10420 # Lock all groups used by instance optimistically; this requires going
10421 # via the node before it's locked, requiring verification later on
10422 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10423 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10425 elif level == locking.LEVEL_NODE:
10426 if self.op.iallocator is not None:
10427 assert self.op.remote_node is None
10428 assert not self.needed_locks[locking.LEVEL_NODE]
10430 # Lock member nodes of all locked groups
10431 self.needed_locks[locking.LEVEL_NODE] = [node_name
10432 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10433 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10435 self._LockInstancesNodes()
10436 elif level == locking.LEVEL_NODE_RES:
10438 self.needed_locks[locking.LEVEL_NODE_RES] = \
10439 self.needed_locks[locking.LEVEL_NODE]
10441 def BuildHooksEnv(self):
10442 """Build hooks env.
10444 This runs on the master, the primary and all the secondaries.
10447 instance = self.replacer.instance
10449 "MODE": self.op.mode,
10450 "NEW_SECONDARY": self.op.remote_node,
10451 "OLD_SECONDARY": instance.secondary_nodes[0],
10453 env.update(_BuildInstanceHookEnvByObject(self, instance))
10456 def BuildHooksNodes(self):
10457 """Build hooks nodes.
10460 instance = self.replacer.instance
10462 self.cfg.GetMasterNode(),
10463 instance.primary_node,
10465 if self.op.remote_node is not None:
10466 nl.append(self.op.remote_node)
10469 def CheckPrereq(self):
10470 """Check prerequisites.
10473 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10474 self.op.iallocator is None)
10476 # Verify if node group locks are still correct
10477 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10479 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10481 return LogicalUnit.CheckPrereq(self)
10484 class TLReplaceDisks(Tasklet):
10485 """Replaces disks for an instance.
10487 Note: Locking is not within the scope of this class.
10490 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10491 disks, delay_iallocator, early_release, ignore_ipolicy):
10492 """Initializes this class.
10495 Tasklet.__init__(self, lu)
10498 self.instance_name = instance_name
10500 self.iallocator_name = iallocator_name
10501 self.remote_node = remote_node
10503 self.delay_iallocator = delay_iallocator
10504 self.early_release = early_release
10505 self.ignore_ipolicy = ignore_ipolicy
10508 self.instance = None
10509 self.new_node = None
10510 self.target_node = None
10511 self.other_node = None
10512 self.remote_node_info = None
10513 self.node_secondary_ip = None
10516 def CheckArguments(mode, remote_node, iallocator):
10517 """Helper function for users of this class.
10520 # check for valid parameter combination
10521 if mode == constants.REPLACE_DISK_CHG:
10522 if remote_node is None and iallocator is None:
10523 raise errors.OpPrereqError("When changing the secondary either an"
10524 " iallocator script must be used or the"
10525 " new node given", errors.ECODE_INVAL)
10527 if remote_node is not None and iallocator is not None:
10528 raise errors.OpPrereqError("Give either the iallocator or the new"
10529 " secondary, not both", errors.ECODE_INVAL)
10531 elif remote_node is not None or iallocator is not None:
10532 # Not replacing the secondary
10533 raise errors.OpPrereqError("The iallocator and new node options can"
10534 " only be used when changing the"
10535 " secondary node", errors.ECODE_INVAL)
10538 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10539 """Compute a new secondary node using an IAllocator.
10542 ial = IAllocator(lu.cfg, lu.rpc,
10543 mode=constants.IALLOCATOR_MODE_RELOC,
10544 name=instance_name,
10545 relocate_from=list(relocate_from))
10547 ial.Run(iallocator_name)
10549 if not ial.success:
10550 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10551 " %s" % (iallocator_name, ial.info),
10552 errors.ECODE_NORES)
10554 if len(ial.result) != ial.required_nodes:
10555 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10556 " of nodes (%s), required %s" %
10558 len(ial.result), ial.required_nodes),
10559 errors.ECODE_FAULT)
10561 remote_node_name = ial.result[0]
10563 lu.LogInfo("Selected new secondary for instance '%s': %s",
10564 instance_name, remote_node_name)
10566 return remote_node_name
10568 def _FindFaultyDisks(self, node_name):
10569 """Wrapper for L{_FindFaultyInstanceDisks}.
10572 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10575 def _CheckDisksActivated(self, instance):
10576 """Checks if the instance disks are activated.
10578 @param instance: The instance to check disks
10579 @return: True if they are activated, False otherwise
10582 nodes = instance.all_nodes
10584 for idx, dev in enumerate(instance.disks):
10586 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10587 self.cfg.SetDiskID(dev, node)
10589 result = _BlockdevFind(self, node, dev, instance)
10593 elif result.fail_msg or not result.payload:
10598 def CheckPrereq(self):
10599 """Check prerequisites.
10601 This checks that the instance is in the cluster.
10604 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10605 assert instance is not None, \
10606 "Cannot retrieve locked instance %s" % self.instance_name
10608 if instance.disk_template != constants.DT_DRBD8:
10609 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10610 " instances", errors.ECODE_INVAL)
10612 if len(instance.secondary_nodes) != 1:
10613 raise errors.OpPrereqError("The instance has a strange layout,"
10614 " expected one secondary but found %d" %
10615 len(instance.secondary_nodes),
10616 errors.ECODE_FAULT)
10618 if not self.delay_iallocator:
10619 self._CheckPrereq2()
10621 def _CheckPrereq2(self):
10622 """Check prerequisites, second part.
10624 This function should always be part of CheckPrereq. It was separated and is
10625 now called from Exec because during node evacuation iallocator was only
10626 called with an unmodified cluster model, not taking planned changes into
10630 instance = self.instance
10631 secondary_node = instance.secondary_nodes[0]
10633 if self.iallocator_name is None:
10634 remote_node = self.remote_node
10636 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10637 instance.name, instance.secondary_nodes)
10639 if remote_node is None:
10640 self.remote_node_info = None
10642 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10643 "Remote node '%s' is not locked" % remote_node
10645 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10646 assert self.remote_node_info is not None, \
10647 "Cannot retrieve locked node %s" % remote_node
10649 if remote_node == self.instance.primary_node:
10650 raise errors.OpPrereqError("The specified node is the primary node of"
10651 " the instance", errors.ECODE_INVAL)
10653 if remote_node == secondary_node:
10654 raise errors.OpPrereqError("The specified node is already the"
10655 " secondary node of the instance",
10656 errors.ECODE_INVAL)
10658 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10659 constants.REPLACE_DISK_CHG):
10660 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10661 errors.ECODE_INVAL)
10663 if self.mode == constants.REPLACE_DISK_AUTO:
10664 if not self._CheckDisksActivated(instance):
10665 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10666 " first" % self.instance_name,
10667 errors.ECODE_STATE)
10668 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10669 faulty_secondary = self._FindFaultyDisks(secondary_node)
10671 if faulty_primary and faulty_secondary:
10672 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10673 " one node and can not be repaired"
10674 " automatically" % self.instance_name,
10675 errors.ECODE_STATE)
10678 self.disks = faulty_primary
10679 self.target_node = instance.primary_node
10680 self.other_node = secondary_node
10681 check_nodes = [self.target_node, self.other_node]
10682 elif faulty_secondary:
10683 self.disks = faulty_secondary
10684 self.target_node = secondary_node
10685 self.other_node = instance.primary_node
10686 check_nodes = [self.target_node, self.other_node]
10692 # Non-automatic modes
10693 if self.mode == constants.REPLACE_DISK_PRI:
10694 self.target_node = instance.primary_node
10695 self.other_node = secondary_node
10696 check_nodes = [self.target_node, self.other_node]
10698 elif self.mode == constants.REPLACE_DISK_SEC:
10699 self.target_node = secondary_node
10700 self.other_node = instance.primary_node
10701 check_nodes = [self.target_node, self.other_node]
10703 elif self.mode == constants.REPLACE_DISK_CHG:
10704 self.new_node = remote_node
10705 self.other_node = instance.primary_node
10706 self.target_node = secondary_node
10707 check_nodes = [self.new_node, self.other_node]
10709 _CheckNodeNotDrained(self.lu, remote_node)
10710 _CheckNodeVmCapable(self.lu, remote_node)
10712 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10713 assert old_node_info is not None
10714 if old_node_info.offline and not self.early_release:
10715 # doesn't make sense to delay the release
10716 self.early_release = True
10717 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10718 " early-release mode", secondary_node)
10721 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10724 # If not specified all disks should be replaced
10726 self.disks = range(len(self.instance.disks))
10728 # TODO: This is ugly, but right now we can't distinguish between internal
10729 # submitted opcode and external one. We should fix that.
10730 if self.remote_node_info:
10731 # We change the node, lets verify it still meets instance policy
10732 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10733 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10735 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10736 ignore=self.ignore_ipolicy)
10738 for node in check_nodes:
10739 _CheckNodeOnline(self.lu, node)
10741 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10744 if node_name is not None)
10746 # Release unneeded node and node resource locks
10747 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10748 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10750 # Release any owned node group
10751 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10752 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10754 # Check whether disks are valid
10755 for disk_idx in self.disks:
10756 instance.FindDisk(disk_idx)
10758 # Get secondary node IP addresses
10759 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10760 in self.cfg.GetMultiNodeInfo(touched_nodes))
10762 def Exec(self, feedback_fn):
10763 """Execute disk replacement.
10765 This dispatches the disk replacement to the appropriate handler.
10768 if self.delay_iallocator:
10769 self._CheckPrereq2()
10772 # Verify owned locks before starting operation
10773 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10774 assert set(owned_nodes) == set(self.node_secondary_ip), \
10775 ("Incorrect node locks, owning %s, expected %s" %
10776 (owned_nodes, self.node_secondary_ip.keys()))
10777 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10778 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10780 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10781 assert list(owned_instances) == [self.instance_name], \
10782 "Instance '%s' not locked" % self.instance_name
10784 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10785 "Should not own any node group lock at this point"
10788 feedback_fn("No disks need replacement")
10791 feedback_fn("Replacing disk(s) %s for %s" %
10792 (utils.CommaJoin(self.disks), self.instance.name))
10794 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10796 # Activate the instance disks if we're replacing them on a down instance
10798 _StartInstanceDisks(self.lu, self.instance, True)
10801 # Should we replace the secondary node?
10802 if self.new_node is not None:
10803 fn = self._ExecDrbd8Secondary
10805 fn = self._ExecDrbd8DiskOnly
10807 result = fn(feedback_fn)
10809 # Deactivate the instance disks if we're replacing them on a
10812 _SafeShutdownInstanceDisks(self.lu, self.instance)
10814 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10817 # Verify owned locks
10818 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10819 nodes = frozenset(self.node_secondary_ip)
10820 assert ((self.early_release and not owned_nodes) or
10821 (not self.early_release and not (set(owned_nodes) - nodes))), \
10822 ("Not owning the correct locks, early_release=%s, owned=%r,"
10823 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10827 def _CheckVolumeGroup(self, nodes):
10828 self.lu.LogInfo("Checking volume groups")
10830 vgname = self.cfg.GetVGName()
10832 # Make sure volume group exists on all involved nodes
10833 results = self.rpc.call_vg_list(nodes)
10835 raise errors.OpExecError("Can't list volume groups on the nodes")
10838 res = results[node]
10839 res.Raise("Error checking node %s" % node)
10840 if vgname not in res.payload:
10841 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10844 def _CheckDisksExistence(self, nodes):
10845 # Check disk existence
10846 for idx, dev in enumerate(self.instance.disks):
10847 if idx not in self.disks:
10851 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10852 self.cfg.SetDiskID(dev, node)
10854 result = _BlockdevFind(self, node, dev, self.instance)
10856 msg = result.fail_msg
10857 if msg or not result.payload:
10859 msg = "disk not found"
10860 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10863 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10864 for idx, dev in enumerate(self.instance.disks):
10865 if idx not in self.disks:
10868 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10871 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10872 on_primary, ldisk=ldisk):
10873 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10874 " replace disks for instance %s" %
10875 (node_name, self.instance.name))
10877 def _CreateNewStorage(self, node_name):
10878 """Create new storage on the primary or secondary node.
10880 This is only used for same-node replaces, not for changing the
10881 secondary node, hence we don't want to modify the existing disk.
10886 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10887 for idx, dev in enumerate(disks):
10888 if idx not in self.disks:
10891 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10893 self.cfg.SetDiskID(dev, node_name)
10895 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10896 names = _GenerateUniqueNames(self.lu, lv_names)
10898 (data_disk, meta_disk) = dev.children
10899 vg_data = data_disk.logical_id[0]
10900 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10901 logical_id=(vg_data, names[0]),
10902 params=data_disk.params)
10903 vg_meta = meta_disk.logical_id[0]
10904 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10905 logical_id=(vg_meta, names[1]),
10906 params=meta_disk.params)
10908 new_lvs = [lv_data, lv_meta]
10909 old_lvs = [child.Copy() for child in dev.children]
10910 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10912 # we pass force_create=True to force the LVM creation
10913 for new_lv in new_lvs:
10914 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10915 _GetInstanceInfoText(self.instance), False)
10919 def _CheckDevices(self, node_name, iv_names):
10920 for name, (dev, _, _) in iv_names.iteritems():
10921 self.cfg.SetDiskID(dev, node_name)
10923 result = _BlockdevFind(self, node_name, dev, self.instance)
10925 msg = result.fail_msg
10926 if msg or not result.payload:
10928 msg = "disk not found"
10929 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10932 if result.payload.is_degraded:
10933 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10935 def _RemoveOldStorage(self, node_name, iv_names):
10936 for name, (_, old_lvs, _) in iv_names.iteritems():
10937 self.lu.LogInfo("Remove logical volumes for %s" % name)
10940 self.cfg.SetDiskID(lv, node_name)
10942 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10944 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10945 hint="remove unused LVs manually")
10947 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10948 """Replace a disk on the primary or secondary for DRBD 8.
10950 The algorithm for replace is quite complicated:
10952 1. for each disk to be replaced:
10954 1. create new LVs on the target node with unique names
10955 1. detach old LVs from the drbd device
10956 1. rename old LVs to name_replaced.<time_t>
10957 1. rename new LVs to old LVs
10958 1. attach the new LVs (with the old names now) to the drbd device
10960 1. wait for sync across all devices
10962 1. for each modified disk:
10964 1. remove old LVs (which have the name name_replaces.<time_t>)
10966 Failures are not very well handled.
10971 # Step: check device activation
10972 self.lu.LogStep(1, steps_total, "Check device existence")
10973 self._CheckDisksExistence([self.other_node, self.target_node])
10974 self._CheckVolumeGroup([self.target_node, self.other_node])
10976 # Step: check other node consistency
10977 self.lu.LogStep(2, steps_total, "Check peer consistency")
10978 self._CheckDisksConsistency(self.other_node,
10979 self.other_node == self.instance.primary_node,
10982 # Step: create new storage
10983 self.lu.LogStep(3, steps_total, "Allocate new storage")
10984 iv_names = self._CreateNewStorage(self.target_node)
10986 # Step: for each lv, detach+rename*2+attach
10987 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10988 for dev, old_lvs, new_lvs in iv_names.itervalues():
10989 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10991 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10993 result.Raise("Can't detach drbd from local storage on node"
10994 " %s for device %s" % (self.target_node, dev.iv_name))
10996 #cfg.Update(instance)
10998 # ok, we created the new LVs, so now we know we have the needed
10999 # storage; as such, we proceed on the target node to rename
11000 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11001 # using the assumption that logical_id == physical_id (which in
11002 # turn is the unique_id on that node)
11004 # FIXME(iustin): use a better name for the replaced LVs
11005 temp_suffix = int(time.time())
11006 ren_fn = lambda d, suff: (d.physical_id[0],
11007 d.physical_id[1] + "_replaced-%s" % suff)
11009 # Build the rename list based on what LVs exist on the node
11010 rename_old_to_new = []
11011 for to_ren in old_lvs:
11012 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11013 if not result.fail_msg and result.payload:
11015 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11017 self.lu.LogInfo("Renaming the old LVs on the target node")
11018 result = self.rpc.call_blockdev_rename(self.target_node,
11020 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11022 # Now we rename the new LVs to the old LVs
11023 self.lu.LogInfo("Renaming the new LVs on the target node")
11024 rename_new_to_old = [(new, old.physical_id)
11025 for old, new in zip(old_lvs, new_lvs)]
11026 result = self.rpc.call_blockdev_rename(self.target_node,
11028 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11030 # Intermediate steps of in memory modifications
11031 for old, new in zip(old_lvs, new_lvs):
11032 new.logical_id = old.logical_id
11033 self.cfg.SetDiskID(new, self.target_node)
11035 # We need to modify old_lvs so that removal later removes the
11036 # right LVs, not the newly added ones; note that old_lvs is a
11038 for disk in old_lvs:
11039 disk.logical_id = ren_fn(disk, temp_suffix)
11040 self.cfg.SetDiskID(disk, self.target_node)
11042 # Now that the new lvs have the old name, we can add them to the device
11043 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11044 result = self.rpc.call_blockdev_addchildren(self.target_node,
11045 (dev, self.instance), new_lvs)
11046 msg = result.fail_msg
11048 for new_lv in new_lvs:
11049 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11052 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11053 hint=("cleanup manually the unused logical"
11055 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11057 cstep = itertools.count(5)
11059 if self.early_release:
11060 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11061 self._RemoveOldStorage(self.target_node, iv_names)
11062 # TODO: Check if releasing locks early still makes sense
11063 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11065 # Release all resource locks except those used by the instance
11066 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11067 keep=self.node_secondary_ip.keys())
11069 # Release all node locks while waiting for sync
11070 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11072 # TODO: Can the instance lock be downgraded here? Take the optional disk
11073 # shutdown in the caller into consideration.
11076 # This can fail as the old devices are degraded and _WaitForSync
11077 # does a combined result over all disks, so we don't check its return value
11078 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11079 _WaitForSync(self.lu, self.instance)
11081 # Check all devices manually
11082 self._CheckDevices(self.instance.primary_node, iv_names)
11084 # Step: remove old storage
11085 if not self.early_release:
11086 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11087 self._RemoveOldStorage(self.target_node, iv_names)
11089 def _ExecDrbd8Secondary(self, feedback_fn):
11090 """Replace the secondary node for DRBD 8.
11092 The algorithm for replace is quite complicated:
11093 - for all disks of the instance:
11094 - create new LVs on the new node with same names
11095 - shutdown the drbd device on the old secondary
11096 - disconnect the drbd network on the primary
11097 - create the drbd device on the new secondary
11098 - network attach the drbd on the primary, using an artifice:
11099 the drbd code for Attach() will connect to the network if it
11100 finds a device which is connected to the good local disks but
11101 not network enabled
11102 - wait for sync across all devices
11103 - remove all disks from the old secondary
11105 Failures are not very well handled.
11110 pnode = self.instance.primary_node
11112 # Step: check device activation
11113 self.lu.LogStep(1, steps_total, "Check device existence")
11114 self._CheckDisksExistence([self.instance.primary_node])
11115 self._CheckVolumeGroup([self.instance.primary_node])
11117 # Step: check other node consistency
11118 self.lu.LogStep(2, steps_total, "Check peer consistency")
11119 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11121 # Step: create new storage
11122 self.lu.LogStep(3, steps_total, "Allocate new storage")
11123 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11124 for idx, dev in enumerate(disks):
11125 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11126 (self.new_node, idx))
11127 # we pass force_create=True to force LVM creation
11128 for new_lv in dev.children:
11129 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11130 True, _GetInstanceInfoText(self.instance), False)
11132 # Step 4: dbrd minors and drbd setups changes
11133 # after this, we must manually remove the drbd minors on both the
11134 # error and the success paths
11135 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11136 minors = self.cfg.AllocateDRBDMinor([self.new_node
11137 for dev in self.instance.disks],
11138 self.instance.name)
11139 logging.debug("Allocated minors %r", minors)
11142 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11143 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11144 (self.new_node, idx))
11145 # create new devices on new_node; note that we create two IDs:
11146 # one without port, so the drbd will be activated without
11147 # networking information on the new node at this stage, and one
11148 # with network, for the latter activation in step 4
11149 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11150 if self.instance.primary_node == o_node1:
11153 assert self.instance.primary_node == o_node2, "Three-node instance?"
11156 new_alone_id = (self.instance.primary_node, self.new_node, None,
11157 p_minor, new_minor, o_secret)
11158 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11159 p_minor, new_minor, o_secret)
11161 iv_names[idx] = (dev, dev.children, new_net_id)
11162 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11164 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11165 logical_id=new_alone_id,
11166 children=dev.children,
11169 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11172 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11174 _GetInstanceInfoText(self.instance), False)
11175 except errors.GenericError:
11176 self.cfg.ReleaseDRBDMinors(self.instance.name)
11179 # We have new devices, shutdown the drbd on the old secondary
11180 for idx, dev in enumerate(self.instance.disks):
11181 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11182 self.cfg.SetDiskID(dev, self.target_node)
11183 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11184 (dev, self.instance)).fail_msg
11186 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11187 "node: %s" % (idx, msg),
11188 hint=("Please cleanup this device manually as"
11189 " soon as possible"))
11191 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11192 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11193 self.instance.disks)[pnode]
11195 msg = result.fail_msg
11197 # detaches didn't succeed (unlikely)
11198 self.cfg.ReleaseDRBDMinors(self.instance.name)
11199 raise errors.OpExecError("Can't detach the disks from the network on"
11200 " old node: %s" % (msg,))
11202 # if we managed to detach at least one, we update all the disks of
11203 # the instance to point to the new secondary
11204 self.lu.LogInfo("Updating instance configuration")
11205 for dev, _, new_logical_id in iv_names.itervalues():
11206 dev.logical_id = new_logical_id
11207 self.cfg.SetDiskID(dev, self.instance.primary_node)
11209 self.cfg.Update(self.instance, feedback_fn)
11211 # Release all node locks (the configuration has been updated)
11212 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11214 # and now perform the drbd attach
11215 self.lu.LogInfo("Attaching primary drbds to new secondary"
11216 " (standalone => connected)")
11217 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11219 self.node_secondary_ip,
11220 (self.instance.disks, self.instance),
11221 self.instance.name,
11223 for to_node, to_result in result.items():
11224 msg = to_result.fail_msg
11226 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11228 hint=("please do a gnt-instance info to see the"
11229 " status of disks"))
11231 cstep = itertools.count(5)
11233 if self.early_release:
11234 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11235 self._RemoveOldStorage(self.target_node, iv_names)
11236 # TODO: Check if releasing locks early still makes sense
11237 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11239 # Release all resource locks except those used by the instance
11240 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11241 keep=self.node_secondary_ip.keys())
11243 # TODO: Can the instance lock be downgraded here? Take the optional disk
11244 # shutdown in the caller into consideration.
11247 # This can fail as the old devices are degraded and _WaitForSync
11248 # does a combined result over all disks, so we don't check its return value
11249 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11250 _WaitForSync(self.lu, self.instance)
11252 # Check all devices manually
11253 self._CheckDevices(self.instance.primary_node, iv_names)
11255 # Step: remove old storage
11256 if not self.early_release:
11257 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11258 self._RemoveOldStorage(self.target_node, iv_names)
11261 class LURepairNodeStorage(NoHooksLU):
11262 """Repairs the volume group on a node.
11267 def CheckArguments(self):
11268 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11270 storage_type = self.op.storage_type
11272 if (constants.SO_FIX_CONSISTENCY not in
11273 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11274 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11275 " repaired" % storage_type,
11276 errors.ECODE_INVAL)
11278 def ExpandNames(self):
11279 self.needed_locks = {
11280 locking.LEVEL_NODE: [self.op.node_name],
11283 def _CheckFaultyDisks(self, instance, node_name):
11284 """Ensure faulty disks abort the opcode or at least warn."""
11286 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11288 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11289 " node '%s'" % (instance.name, node_name),
11290 errors.ECODE_STATE)
11291 except errors.OpPrereqError, err:
11292 if self.op.ignore_consistency:
11293 self.proc.LogWarning(str(err.args[0]))
11297 def CheckPrereq(self):
11298 """Check prerequisites.
11301 # Check whether any instance on this node has faulty disks
11302 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11303 if inst.admin_state != constants.ADMINST_UP:
11305 check_nodes = set(inst.all_nodes)
11306 check_nodes.discard(self.op.node_name)
11307 for inst_node_name in check_nodes:
11308 self._CheckFaultyDisks(inst, inst_node_name)
11310 def Exec(self, feedback_fn):
11311 feedback_fn("Repairing storage unit '%s' on %s ..." %
11312 (self.op.name, self.op.node_name))
11314 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11315 result = self.rpc.call_storage_execute(self.op.node_name,
11316 self.op.storage_type, st_args,
11318 constants.SO_FIX_CONSISTENCY)
11319 result.Raise("Failed to repair storage unit '%s' on %s" %
11320 (self.op.name, self.op.node_name))
11323 class LUNodeEvacuate(NoHooksLU):
11324 """Evacuates instances off a list of nodes.
11329 _MODE2IALLOCATOR = {
11330 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11331 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11332 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11334 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11335 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11336 constants.IALLOCATOR_NEVAC_MODES)
11338 def CheckArguments(self):
11339 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11341 def ExpandNames(self):
11342 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11344 if self.op.remote_node is not None:
11345 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11346 assert self.op.remote_node
11348 if self.op.remote_node == self.op.node_name:
11349 raise errors.OpPrereqError("Can not use evacuated node as a new"
11350 " secondary node", errors.ECODE_INVAL)
11352 if self.op.mode != constants.NODE_EVAC_SEC:
11353 raise errors.OpPrereqError("Without the use of an iallocator only"
11354 " secondary instances can be evacuated",
11355 errors.ECODE_INVAL)
11358 self.share_locks = _ShareAll()
11359 self.needed_locks = {
11360 locking.LEVEL_INSTANCE: [],
11361 locking.LEVEL_NODEGROUP: [],
11362 locking.LEVEL_NODE: [],
11365 # Determine nodes (via group) optimistically, needs verification once locks
11366 # have been acquired
11367 self.lock_nodes = self._DetermineNodes()
11369 def _DetermineNodes(self):
11370 """Gets the list of nodes to operate on.
11373 if self.op.remote_node is None:
11374 # Iallocator will choose any node(s) in the same group
11375 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11377 group_nodes = frozenset([self.op.remote_node])
11379 # Determine nodes to be locked
11380 return set([self.op.node_name]) | group_nodes
11382 def _DetermineInstances(self):
11383 """Builds list of instances to operate on.
11386 assert self.op.mode in constants.NODE_EVAC_MODES
11388 if self.op.mode == constants.NODE_EVAC_PRI:
11389 # Primary instances only
11390 inst_fn = _GetNodePrimaryInstances
11391 assert self.op.remote_node is None, \
11392 "Evacuating primary instances requires iallocator"
11393 elif self.op.mode == constants.NODE_EVAC_SEC:
11394 # Secondary instances only
11395 inst_fn = _GetNodeSecondaryInstances
11398 assert self.op.mode == constants.NODE_EVAC_ALL
11399 inst_fn = _GetNodeInstances
11400 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11402 raise errors.OpPrereqError("Due to an issue with the iallocator"
11403 " interface it is not possible to evacuate"
11404 " all instances at once; specify explicitly"
11405 " whether to evacuate primary or secondary"
11407 errors.ECODE_INVAL)
11409 return inst_fn(self.cfg, self.op.node_name)
11411 def DeclareLocks(self, level):
11412 if level == locking.LEVEL_INSTANCE:
11413 # Lock instances optimistically, needs verification once node and group
11414 # locks have been acquired
11415 self.needed_locks[locking.LEVEL_INSTANCE] = \
11416 set(i.name for i in self._DetermineInstances())
11418 elif level == locking.LEVEL_NODEGROUP:
11419 # Lock node groups for all potential target nodes optimistically, needs
11420 # verification once nodes have been acquired
11421 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11422 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11424 elif level == locking.LEVEL_NODE:
11425 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11427 def CheckPrereq(self):
11429 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11430 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11431 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11433 need_nodes = self._DetermineNodes()
11435 if not owned_nodes.issuperset(need_nodes):
11436 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11437 " locks were acquired, current nodes are"
11438 " are '%s', used to be '%s'; retry the"
11440 (self.op.node_name,
11441 utils.CommaJoin(need_nodes),
11442 utils.CommaJoin(owned_nodes)),
11443 errors.ECODE_STATE)
11445 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11446 if owned_groups != wanted_groups:
11447 raise errors.OpExecError("Node groups changed since locks were acquired,"
11448 " current groups are '%s', used to be '%s';"
11449 " retry the operation" %
11450 (utils.CommaJoin(wanted_groups),
11451 utils.CommaJoin(owned_groups)))
11453 # Determine affected instances
11454 self.instances = self._DetermineInstances()
11455 self.instance_names = [i.name for i in self.instances]
11457 if set(self.instance_names) != owned_instances:
11458 raise errors.OpExecError("Instances on node '%s' changed since locks"
11459 " were acquired, current instances are '%s',"
11460 " used to be '%s'; retry the operation" %
11461 (self.op.node_name,
11462 utils.CommaJoin(self.instance_names),
11463 utils.CommaJoin(owned_instances)))
11465 if self.instance_names:
11466 self.LogInfo("Evacuating instances from node '%s': %s",
11468 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11470 self.LogInfo("No instances to evacuate from node '%s'",
11473 if self.op.remote_node is not None:
11474 for i in self.instances:
11475 if i.primary_node == self.op.remote_node:
11476 raise errors.OpPrereqError("Node %s is the primary node of"
11477 " instance %s, cannot use it as"
11479 (self.op.remote_node, i.name),
11480 errors.ECODE_INVAL)
11482 def Exec(self, feedback_fn):
11483 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11485 if not self.instance_names:
11486 # No instances to evacuate
11489 elif self.op.iallocator is not None:
11490 # TODO: Implement relocation to other group
11491 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11492 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11493 instances=list(self.instance_names))
11495 ial.Run(self.op.iallocator)
11497 if not ial.success:
11498 raise errors.OpPrereqError("Can't compute node evacuation using"
11499 " iallocator '%s': %s" %
11500 (self.op.iallocator, ial.info),
11501 errors.ECODE_NORES)
11503 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11505 elif self.op.remote_node is not None:
11506 assert self.op.mode == constants.NODE_EVAC_SEC
11508 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11509 remote_node=self.op.remote_node,
11511 mode=constants.REPLACE_DISK_CHG,
11512 early_release=self.op.early_release)]
11513 for instance_name in self.instance_names
11517 raise errors.ProgrammerError("No iallocator or remote node")
11519 return ResultWithJobs(jobs)
11522 def _SetOpEarlyRelease(early_release, op):
11523 """Sets C{early_release} flag on opcodes if available.
11527 op.early_release = early_release
11528 except AttributeError:
11529 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11534 def _NodeEvacDest(use_nodes, group, nodes):
11535 """Returns group or nodes depending on caller's choice.
11539 return utils.CommaJoin(nodes)
11544 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11545 """Unpacks the result of change-group and node-evacuate iallocator requests.
11547 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11548 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11550 @type lu: L{LogicalUnit}
11551 @param lu: Logical unit instance
11552 @type alloc_result: tuple/list
11553 @param alloc_result: Result from iallocator
11554 @type early_release: bool
11555 @param early_release: Whether to release locks early if possible
11556 @type use_nodes: bool
11557 @param use_nodes: Whether to display node names instead of groups
11560 (moved, failed, jobs) = alloc_result
11563 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11564 for (name, reason) in failed)
11565 lu.LogWarning("Unable to evacuate instances %s", failreason)
11566 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11569 lu.LogInfo("Instances to be moved: %s",
11570 utils.CommaJoin("%s (to %s)" %
11571 (name, _NodeEvacDest(use_nodes, group, nodes))
11572 for (name, group, nodes) in moved))
11574 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11575 map(opcodes.OpCode.LoadOpCode, ops))
11579 class LUInstanceGrowDisk(LogicalUnit):
11580 """Grow a disk of an instance.
11583 HPATH = "disk-grow"
11584 HTYPE = constants.HTYPE_INSTANCE
11587 def ExpandNames(self):
11588 self._ExpandAndLockInstance()
11589 self.needed_locks[locking.LEVEL_NODE] = []
11590 self.needed_locks[locking.LEVEL_NODE_RES] = []
11591 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11592 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11594 def DeclareLocks(self, level):
11595 if level == locking.LEVEL_NODE:
11596 self._LockInstancesNodes()
11597 elif level == locking.LEVEL_NODE_RES:
11599 self.needed_locks[locking.LEVEL_NODE_RES] = \
11600 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11602 def BuildHooksEnv(self):
11603 """Build hooks env.
11605 This runs on the master, the primary and all the secondaries.
11609 "DISK": self.op.disk,
11610 "AMOUNT": self.op.amount,
11611 "ABSOLUTE": self.op.absolute,
11613 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11616 def BuildHooksNodes(self):
11617 """Build hooks nodes.
11620 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11623 def CheckPrereq(self):
11624 """Check prerequisites.
11626 This checks that the instance is in the cluster.
11629 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11630 assert instance is not None, \
11631 "Cannot retrieve locked instance %s" % self.op.instance_name
11632 nodenames = list(instance.all_nodes)
11633 for node in nodenames:
11634 _CheckNodeOnline(self, node)
11636 self.instance = instance
11638 if instance.disk_template not in constants.DTS_GROWABLE:
11639 raise errors.OpPrereqError("Instance's disk layout does not support"
11640 " growing", errors.ECODE_INVAL)
11642 self.disk = instance.FindDisk(self.op.disk)
11644 if self.op.absolute:
11645 self.target = self.op.amount
11646 self.delta = self.target - self.disk.size
11648 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11649 "current disk size (%s)" %
11650 (utils.FormatUnit(self.target, "h"),
11651 utils.FormatUnit(self.disk.size, "h")),
11652 errors.ECODE_STATE)
11654 self.delta = self.op.amount
11655 self.target = self.disk.size + self.delta
11657 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11658 utils.FormatUnit(self.delta, "h"),
11659 errors.ECODE_INVAL)
11661 if instance.disk_template not in (constants.DT_FILE,
11662 constants.DT_SHARED_FILE,
11664 # TODO: check the free disk space for file, when that feature will be
11666 _CheckNodesFreeDiskPerVG(self, nodenames,
11667 self.disk.ComputeGrowth(self.delta))
11669 def Exec(self, feedback_fn):
11670 """Execute disk grow.
11673 instance = self.instance
11676 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11677 assert (self.owned_locks(locking.LEVEL_NODE) ==
11678 self.owned_locks(locking.LEVEL_NODE_RES))
11680 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11682 raise errors.OpExecError("Cannot activate block device to grow")
11684 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11685 (self.op.disk, instance.name,
11686 utils.FormatUnit(self.delta, "h"),
11687 utils.FormatUnit(self.target, "h")))
11689 # First run all grow ops in dry-run mode
11690 for node in instance.all_nodes:
11691 self.cfg.SetDiskID(disk, node)
11692 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11694 result.Raise("Grow request failed to node %s" % node)
11696 # We know that (as far as we can test) operations across different
11697 # nodes will succeed, time to run it for real
11698 for node in instance.all_nodes:
11699 self.cfg.SetDiskID(disk, node)
11700 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11702 result.Raise("Grow request failed to node %s" % node)
11704 # TODO: Rewrite code to work properly
11705 # DRBD goes into sync mode for a short amount of time after executing the
11706 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11707 # calling "resize" in sync mode fails. Sleeping for a short amount of
11708 # time is a work-around.
11711 disk.RecordGrow(self.delta)
11712 self.cfg.Update(instance, feedback_fn)
11714 # Changes have been recorded, release node lock
11715 _ReleaseLocks(self, locking.LEVEL_NODE)
11717 # Downgrade lock while waiting for sync
11718 self.glm.downgrade(locking.LEVEL_INSTANCE)
11720 if self.op.wait_for_sync:
11721 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11723 self.proc.LogWarning("Disk sync-ing has not returned a good"
11724 " status; please check the instance")
11725 if instance.admin_state != constants.ADMINST_UP:
11726 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11727 elif instance.admin_state != constants.ADMINST_UP:
11728 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11729 " not supposed to be running because no wait for"
11730 " sync mode was requested")
11732 assert self.owned_locks(locking.LEVEL_NODE_RES)
11733 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11736 class LUInstanceQueryData(NoHooksLU):
11737 """Query runtime instance data.
11742 def ExpandNames(self):
11743 self.needed_locks = {}
11745 # Use locking if requested or when non-static information is wanted
11746 if not (self.op.static or self.op.use_locking):
11747 self.LogWarning("Non-static data requested, locks need to be acquired")
11748 self.op.use_locking = True
11750 if self.op.instances or not self.op.use_locking:
11751 # Expand instance names right here
11752 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11754 # Will use acquired locks
11755 self.wanted_names = None
11757 if self.op.use_locking:
11758 self.share_locks = _ShareAll()
11760 if self.wanted_names is None:
11761 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11763 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11765 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11766 self.needed_locks[locking.LEVEL_NODE] = []
11767 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11769 def DeclareLocks(self, level):
11770 if self.op.use_locking:
11771 if level == locking.LEVEL_NODEGROUP:
11772 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11774 # Lock all groups used by instances optimistically; this requires going
11775 # via the node before it's locked, requiring verification later on
11776 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11777 frozenset(group_uuid
11778 for instance_name in owned_instances
11780 self.cfg.GetInstanceNodeGroups(instance_name))
11782 elif level == locking.LEVEL_NODE:
11783 self._LockInstancesNodes()
11785 def CheckPrereq(self):
11786 """Check prerequisites.
11788 This only checks the optional instance list against the existing names.
11791 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11792 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11793 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11795 if self.wanted_names is None:
11796 assert self.op.use_locking, "Locking was not used"
11797 self.wanted_names = owned_instances
11799 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11801 if self.op.use_locking:
11802 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11805 assert not (owned_instances or owned_groups or owned_nodes)
11807 self.wanted_instances = instances.values()
11809 def _ComputeBlockdevStatus(self, node, instance, dev):
11810 """Returns the status of a block device
11813 if self.op.static or not node:
11816 self.cfg.SetDiskID(dev, node)
11818 result = self.rpc.call_blockdev_find(node, dev)
11822 result.Raise("Can't compute disk status for %s" % instance.name)
11824 status = result.payload
11828 return (status.dev_path, status.major, status.minor,
11829 status.sync_percent, status.estimated_time,
11830 status.is_degraded, status.ldisk_status)
11832 def _ComputeDiskStatus(self, instance, snode, dev):
11833 """Compute block device status.
11836 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11838 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11840 def _ComputeDiskStatusInner(self, instance, snode, dev):
11841 """Compute block device status.
11843 @attention: The device has to be annotated already.
11846 if dev.dev_type in constants.LDS_DRBD:
11847 # we change the snode then (otherwise we use the one passed in)
11848 if dev.logical_id[0] == instance.primary_node:
11849 snode = dev.logical_id[1]
11851 snode = dev.logical_id[0]
11853 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11855 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11858 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11865 "iv_name": dev.iv_name,
11866 "dev_type": dev.dev_type,
11867 "logical_id": dev.logical_id,
11868 "physical_id": dev.physical_id,
11869 "pstatus": dev_pstatus,
11870 "sstatus": dev_sstatus,
11871 "children": dev_children,
11876 def Exec(self, feedback_fn):
11877 """Gather and return data"""
11880 cluster = self.cfg.GetClusterInfo()
11882 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11883 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11885 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11886 for node in nodes.values()))
11888 group2name_fn = lambda uuid: groups[uuid].name
11890 for instance in self.wanted_instances:
11891 pnode = nodes[instance.primary_node]
11893 if self.op.static or pnode.offline:
11894 remote_state = None
11896 self.LogWarning("Primary node %s is marked offline, returning static"
11897 " information only for instance %s" %
11898 (pnode.name, instance.name))
11900 remote_info = self.rpc.call_instance_info(instance.primary_node,
11902 instance.hypervisor)
11903 remote_info.Raise("Error checking node %s" % instance.primary_node)
11904 remote_info = remote_info.payload
11905 if remote_info and "state" in remote_info:
11906 remote_state = "up"
11908 if instance.admin_state == constants.ADMINST_UP:
11909 remote_state = "down"
11911 remote_state = instance.admin_state
11913 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11916 snodes_group_uuids = [nodes[snode_name].group
11917 for snode_name in instance.secondary_nodes]
11919 result[instance.name] = {
11920 "name": instance.name,
11921 "config_state": instance.admin_state,
11922 "run_state": remote_state,
11923 "pnode": instance.primary_node,
11924 "pnode_group_uuid": pnode.group,
11925 "pnode_group_name": group2name_fn(pnode.group),
11926 "snodes": instance.secondary_nodes,
11927 "snodes_group_uuids": snodes_group_uuids,
11928 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11930 # this happens to be the same format used for hooks
11931 "nics": _NICListToTuple(self, instance.nics),
11932 "disk_template": instance.disk_template,
11934 "hypervisor": instance.hypervisor,
11935 "network_port": instance.network_port,
11936 "hv_instance": instance.hvparams,
11937 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11938 "be_instance": instance.beparams,
11939 "be_actual": cluster.FillBE(instance),
11940 "os_instance": instance.osparams,
11941 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11942 "serial_no": instance.serial_no,
11943 "mtime": instance.mtime,
11944 "ctime": instance.ctime,
11945 "uuid": instance.uuid,
11951 def PrepareContainerMods(mods, private_fn):
11952 """Prepares a list of container modifications by adding a private data field.
11954 @type mods: list of tuples; (operation, index, parameters)
11955 @param mods: List of modifications
11956 @type private_fn: callable or None
11957 @param private_fn: Callable for constructing a private data field for a
11962 if private_fn is None:
11967 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11970 #: Type description for changes as returned by L{ApplyContainerMods}'s
11972 _TApplyContModsCbChanges = \
11973 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11974 ht.TNonEmptyString,
11979 def ApplyContainerMods(kind, container, chgdesc, mods,
11980 create_fn, modify_fn, remove_fn):
11981 """Applies descriptions in C{mods} to C{container}.
11984 @param kind: One-word item description
11985 @type container: list
11986 @param container: Container to modify
11987 @type chgdesc: None or list
11988 @param chgdesc: List of applied changes
11990 @param mods: Modifications as returned by L{PrepareContainerMods}
11991 @type create_fn: callable
11992 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11993 receives absolute item index, parameters and private data object as added
11994 by L{PrepareContainerMods}, returns tuple containing new item and changes
11996 @type modify_fn: callable
11997 @param modify_fn: Callback for modifying an existing item
11998 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11999 and private data object as added by L{PrepareContainerMods}, returns
12001 @type remove_fn: callable
12002 @param remove_fn: Callback on removing item; receives absolute item index,
12003 item and private data object as added by L{PrepareContainerMods}
12006 for (op, idx, params, private) in mods:
12009 absidx = len(container) - 1
12011 raise IndexError("Not accepting negative indices other than -1")
12012 elif idx > len(container):
12013 raise IndexError("Got %s index %s, but there are only %s" %
12014 (kind, idx, len(container)))
12020 if op == constants.DDM_ADD:
12021 # Calculate where item will be added
12023 addidx = len(container)
12027 if create_fn is None:
12030 (item, changes) = create_fn(addidx, params, private)
12033 container.append(item)
12036 assert idx <= len(container)
12037 # list.insert does so before the specified index
12038 container.insert(idx, item)
12040 # Retrieve existing item
12042 item = container[absidx]
12044 raise IndexError("Invalid %s index %s" % (kind, idx))
12046 if op == constants.DDM_REMOVE:
12049 if remove_fn is not None:
12050 remove_fn(absidx, item, private)
12052 changes = [("%s/%s" % (kind, absidx), "remove")]
12054 assert container[absidx] == item
12055 del container[absidx]
12056 elif op == constants.DDM_MODIFY:
12057 if modify_fn is not None:
12058 changes = modify_fn(absidx, item, params, private)
12060 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12062 assert _TApplyContModsCbChanges(changes)
12064 if not (chgdesc is None or changes is None):
12065 chgdesc.extend(changes)
12068 def _UpdateIvNames(base_index, disks):
12069 """Updates the C{iv_name} attribute of disks.
12071 @type disks: list of L{objects.Disk}
12074 for (idx, disk) in enumerate(disks):
12075 disk.iv_name = "disk/%s" % (base_index + idx, )
12078 class _InstNicModPrivate:
12079 """Data structure for network interface modifications.
12081 Used by L{LUInstanceSetParams}.
12084 def __init__(self):
12089 class LUInstanceSetParams(LogicalUnit):
12090 """Modifies an instances's parameters.
12093 HPATH = "instance-modify"
12094 HTYPE = constants.HTYPE_INSTANCE
12098 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12099 assert ht.TList(mods)
12100 assert not mods or len(mods[0]) in (2, 3)
12102 if mods and len(mods[0]) == 2:
12106 for op, params in mods:
12107 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12108 result.append((op, -1, params))
12112 raise errors.OpPrereqError("Only one %s add or remove operation is"
12113 " supported at a time" % kind,
12114 errors.ECODE_INVAL)
12116 result.append((constants.DDM_MODIFY, op, params))
12118 assert verify_fn(result)
12125 def _CheckMods(kind, mods, key_types, item_fn):
12126 """Ensures requested disk/NIC modifications are valid.
12129 for (op, _, params) in mods:
12130 assert ht.TDict(params)
12132 utils.ForceDictType(params, key_types)
12134 if op == constants.DDM_REMOVE:
12136 raise errors.OpPrereqError("No settings should be passed when"
12137 " removing a %s" % kind,
12138 errors.ECODE_INVAL)
12139 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12140 item_fn(op, params)
12142 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12145 def _VerifyDiskModification(op, params):
12146 """Verifies a disk modification.
12149 if op == constants.DDM_ADD:
12150 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12151 if mode not in constants.DISK_ACCESS_SET:
12152 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12153 errors.ECODE_INVAL)
12155 size = params.get(constants.IDISK_SIZE, None)
12157 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12158 constants.IDISK_SIZE, errors.ECODE_INVAL)
12162 except (TypeError, ValueError), err:
12163 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12164 errors.ECODE_INVAL)
12166 params[constants.IDISK_SIZE] = size
12168 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12169 raise errors.OpPrereqError("Disk size change not possible, use"
12170 " grow-disk", errors.ECODE_INVAL)
12173 def _VerifyNicModification(op, params):
12174 """Verifies a network interface modification.
12177 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12178 ip = params.get(constants.INIC_IP, None)
12181 elif ip.lower() == constants.VALUE_NONE:
12182 params[constants.INIC_IP] = None
12183 elif not netutils.IPAddress.IsValid(ip):
12184 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12185 errors.ECODE_INVAL)
12187 bridge = params.get("bridge", None)
12188 link = params.get(constants.INIC_LINK, None)
12189 if bridge and link:
12190 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12191 " at the same time", errors.ECODE_INVAL)
12192 elif bridge and bridge.lower() == constants.VALUE_NONE:
12193 params["bridge"] = None
12194 elif link and link.lower() == constants.VALUE_NONE:
12195 params[constants.INIC_LINK] = None
12197 if op == constants.DDM_ADD:
12198 macaddr = params.get(constants.INIC_MAC, None)
12199 if macaddr is None:
12200 params[constants.INIC_MAC] = constants.VALUE_AUTO
12202 if constants.INIC_MAC in params:
12203 macaddr = params[constants.INIC_MAC]
12204 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12205 macaddr = utils.NormalizeAndValidateMac(macaddr)
12207 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12208 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12209 " modifying an existing NIC",
12210 errors.ECODE_INVAL)
12212 def CheckArguments(self):
12213 if not (self.op.nics or self.op.disks or self.op.disk_template or
12214 self.op.hvparams or self.op.beparams or self.op.os_name or
12215 self.op.offline is not None or self.op.runtime_mem):
12216 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12218 if self.op.hvparams:
12219 _CheckGlobalHvParams(self.op.hvparams)
12222 self._UpgradeDiskNicMods("disk", self.op.disks,
12223 opcodes.OpInstanceSetParams.TestDiskModifications)
12225 self._UpgradeDiskNicMods("NIC", self.op.nics,
12226 opcodes.OpInstanceSetParams.TestNicModifications)
12228 # Check disk modifications
12229 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12230 self._VerifyDiskModification)
12232 if self.op.disks and self.op.disk_template is not None:
12233 raise errors.OpPrereqError("Disk template conversion and other disk"
12234 " changes not supported at the same time",
12235 errors.ECODE_INVAL)
12237 if (self.op.disk_template and
12238 self.op.disk_template in constants.DTS_INT_MIRROR and
12239 self.op.remote_node is None):
12240 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12241 " one requires specifying a secondary node",
12242 errors.ECODE_INVAL)
12244 # Check NIC modifications
12245 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12246 self._VerifyNicModification)
12248 def ExpandNames(self):
12249 self._ExpandAndLockInstance()
12250 # Can't even acquire node locks in shared mode as upcoming changes in
12251 # Ganeti 2.6 will start to modify the node object on disk conversion
12252 self.needed_locks[locking.LEVEL_NODE] = []
12253 self.needed_locks[locking.LEVEL_NODE_RES] = []
12254 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12256 def DeclareLocks(self, level):
12257 # TODO: Acquire group lock in shared mode (disk parameters)
12258 if level == locking.LEVEL_NODE:
12259 self._LockInstancesNodes()
12260 if self.op.disk_template and self.op.remote_node:
12261 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12262 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12263 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12265 self.needed_locks[locking.LEVEL_NODE_RES] = \
12266 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12268 def BuildHooksEnv(self):
12269 """Build hooks env.
12271 This runs on the master, primary and secondaries.
12275 if constants.BE_MINMEM in self.be_new:
12276 args["minmem"] = self.be_new[constants.BE_MINMEM]
12277 if constants.BE_MAXMEM in self.be_new:
12278 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12279 if constants.BE_VCPUS in self.be_new:
12280 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12281 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12282 # information at all.
12284 if self._new_nics is not None:
12287 for nic in self._new_nics:
12288 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12289 mode = nicparams[constants.NIC_MODE]
12290 link = nicparams[constants.NIC_LINK]
12291 nics.append((nic.ip, nic.mac, mode, link))
12293 args["nics"] = nics
12295 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12296 if self.op.disk_template:
12297 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12298 if self.op.runtime_mem:
12299 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12303 def BuildHooksNodes(self):
12304 """Build hooks nodes.
12307 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12310 def _PrepareNicModification(self, params, private, old_ip, old_params,
12312 update_params_dict = dict([(key, params[key])
12313 for key in constants.NICS_PARAMETERS
12316 if "bridge" in params:
12317 update_params_dict[constants.NIC_LINK] = params["bridge"]
12319 new_params = _GetUpdatedParams(old_params, update_params_dict)
12320 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12322 new_filled_params = cluster.SimpleFillNIC(new_params)
12323 objects.NIC.CheckParameterSyntax(new_filled_params)
12325 new_mode = new_filled_params[constants.NIC_MODE]
12326 if new_mode == constants.NIC_MODE_BRIDGED:
12327 bridge = new_filled_params[constants.NIC_LINK]
12328 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12330 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12332 self.warn.append(msg)
12334 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12336 elif new_mode == constants.NIC_MODE_ROUTED:
12337 ip = params.get(constants.INIC_IP, old_ip)
12339 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12340 " on a routed NIC", errors.ECODE_INVAL)
12342 if constants.INIC_MAC in params:
12343 mac = params[constants.INIC_MAC]
12345 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12346 errors.ECODE_INVAL)
12347 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12348 # otherwise generate the MAC address
12349 params[constants.INIC_MAC] = \
12350 self.cfg.GenerateMAC(self.proc.GetECId())
12352 # or validate/reserve the current one
12354 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12355 except errors.ReservationError:
12356 raise errors.OpPrereqError("MAC address '%s' already in use"
12357 " in cluster" % mac,
12358 errors.ECODE_NOTUNIQUE)
12360 private.params = new_params
12361 private.filled = new_filled_params
12363 def CheckPrereq(self):
12364 """Check prerequisites.
12366 This only checks the instance list against the existing names.
12369 # checking the new params on the primary/secondary nodes
12371 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12372 cluster = self.cluster = self.cfg.GetClusterInfo()
12373 assert self.instance is not None, \
12374 "Cannot retrieve locked instance %s" % self.op.instance_name
12375 pnode = instance.primary_node
12376 nodelist = list(instance.all_nodes)
12377 pnode_info = self.cfg.GetNodeInfo(pnode)
12378 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12380 # Prepare disk/NIC modifications
12381 self.diskmod = PrepareContainerMods(self.op.disks, None)
12382 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12385 if self.op.os_name and not self.op.force:
12386 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12387 self.op.force_variant)
12388 instance_os = self.op.os_name
12390 instance_os = instance.os
12392 assert not (self.op.disk_template and self.op.disks), \
12393 "Can't modify disk template and apply disk changes at the same time"
12395 if self.op.disk_template:
12396 if instance.disk_template == self.op.disk_template:
12397 raise errors.OpPrereqError("Instance already has disk template %s" %
12398 instance.disk_template, errors.ECODE_INVAL)
12400 if (instance.disk_template,
12401 self.op.disk_template) not in self._DISK_CONVERSIONS:
12402 raise errors.OpPrereqError("Unsupported disk template conversion from"
12403 " %s to %s" % (instance.disk_template,
12404 self.op.disk_template),
12405 errors.ECODE_INVAL)
12406 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12407 msg="cannot change disk template")
12408 if self.op.disk_template in constants.DTS_INT_MIRROR:
12409 if self.op.remote_node == pnode:
12410 raise errors.OpPrereqError("Given new secondary node %s is the same"
12411 " as the primary node of the instance" %
12412 self.op.remote_node, errors.ECODE_STATE)
12413 _CheckNodeOnline(self, self.op.remote_node)
12414 _CheckNodeNotDrained(self, self.op.remote_node)
12415 # FIXME: here we assume that the old instance type is DT_PLAIN
12416 assert instance.disk_template == constants.DT_PLAIN
12417 disks = [{constants.IDISK_SIZE: d.size,
12418 constants.IDISK_VG: d.logical_id[0]}
12419 for d in instance.disks]
12420 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12421 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12423 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12424 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12425 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12426 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12427 ignore=self.op.ignore_ipolicy)
12428 if pnode_info.group != snode_info.group:
12429 self.LogWarning("The primary and secondary nodes are in two"
12430 " different node groups; the disk parameters"
12431 " from the first disk's node group will be"
12434 # hvparams processing
12435 if self.op.hvparams:
12436 hv_type = instance.hypervisor
12437 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12438 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12439 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12442 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12443 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12444 self.hv_proposed = self.hv_new = hv_new # the new actual values
12445 self.hv_inst = i_hvdict # the new dict (without defaults)
12447 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12449 self.hv_new = self.hv_inst = {}
12451 # beparams processing
12452 if self.op.beparams:
12453 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12455 objects.UpgradeBeParams(i_bedict)
12456 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12457 be_new = cluster.SimpleFillBE(i_bedict)
12458 self.be_proposed = self.be_new = be_new # the new actual values
12459 self.be_inst = i_bedict # the new dict (without defaults)
12461 self.be_new = self.be_inst = {}
12462 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12463 be_old = cluster.FillBE(instance)
12465 # CPU param validation -- checking every time a parameter is
12466 # changed to cover all cases where either CPU mask or vcpus have
12468 if (constants.BE_VCPUS in self.be_proposed and
12469 constants.HV_CPU_MASK in self.hv_proposed):
12471 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12472 # Verify mask is consistent with number of vCPUs. Can skip this
12473 # test if only 1 entry in the CPU mask, which means same mask
12474 # is applied to all vCPUs.
12475 if (len(cpu_list) > 1 and
12476 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12477 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12479 (self.be_proposed[constants.BE_VCPUS],
12480 self.hv_proposed[constants.HV_CPU_MASK]),
12481 errors.ECODE_INVAL)
12483 # Only perform this test if a new CPU mask is given
12484 if constants.HV_CPU_MASK in self.hv_new:
12485 # Calculate the largest CPU number requested
12486 max_requested_cpu = max(map(max, cpu_list))
12487 # Check that all of the instance's nodes have enough physical CPUs to
12488 # satisfy the requested CPU mask
12489 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12490 max_requested_cpu + 1, instance.hypervisor)
12492 # osparams processing
12493 if self.op.osparams:
12494 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12495 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12496 self.os_inst = i_osdict # the new dict (without defaults)
12502 #TODO(dynmem): do the appropriate check involving MINMEM
12503 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12504 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12505 mem_check_list = [pnode]
12506 if be_new[constants.BE_AUTO_BALANCE]:
12507 # either we changed auto_balance to yes or it was from before
12508 mem_check_list.extend(instance.secondary_nodes)
12509 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12510 instance.hypervisor)
12511 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12512 [instance.hypervisor])
12513 pninfo = nodeinfo[pnode]
12514 msg = pninfo.fail_msg
12516 # Assume the primary node is unreachable and go ahead
12517 self.warn.append("Can't get info from primary node %s: %s" %
12520 (_, _, (pnhvinfo, )) = pninfo.payload
12521 if not isinstance(pnhvinfo.get("memory_free", None), int):
12522 self.warn.append("Node data from primary node %s doesn't contain"
12523 " free memory information" % pnode)
12524 elif instance_info.fail_msg:
12525 self.warn.append("Can't get instance runtime information: %s" %
12526 instance_info.fail_msg)
12528 if instance_info.payload:
12529 current_mem = int(instance_info.payload["memory"])
12531 # Assume instance not running
12532 # (there is a slight race condition here, but it's not very
12533 # probable, and we have no other way to check)
12534 # TODO: Describe race condition
12536 #TODO(dynmem): do the appropriate check involving MINMEM
12537 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12538 pnhvinfo["memory_free"])
12540 raise errors.OpPrereqError("This change will prevent the instance"
12541 " from starting, due to %d MB of memory"
12542 " missing on its primary node" %
12544 errors.ECODE_NORES)
12546 if be_new[constants.BE_AUTO_BALANCE]:
12547 for node, nres in nodeinfo.items():
12548 if node not in instance.secondary_nodes:
12550 nres.Raise("Can't get info from secondary node %s" % node,
12551 prereq=True, ecode=errors.ECODE_STATE)
12552 (_, _, (nhvinfo, )) = nres.payload
12553 if not isinstance(nhvinfo.get("memory_free", None), int):
12554 raise errors.OpPrereqError("Secondary node %s didn't return free"
12555 " memory information" % node,
12556 errors.ECODE_STATE)
12557 #TODO(dynmem): do the appropriate check involving MINMEM
12558 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12559 raise errors.OpPrereqError("This change will prevent the instance"
12560 " from failover to its secondary node"
12561 " %s, due to not enough memory" % node,
12562 errors.ECODE_STATE)
12564 if self.op.runtime_mem:
12565 remote_info = self.rpc.call_instance_info(instance.primary_node,
12567 instance.hypervisor)
12568 remote_info.Raise("Error checking node %s" % instance.primary_node)
12569 if not remote_info.payload: # not running already
12570 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12571 errors.ECODE_STATE)
12573 current_memory = remote_info.payload["memory"]
12574 if (not self.op.force and
12575 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12576 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12577 raise errors.OpPrereqError("Instance %s must have memory between %d"
12578 " and %d MB of memory unless --force is"
12579 " given" % (instance.name,
12580 self.be_proposed[constants.BE_MINMEM],
12581 self.be_proposed[constants.BE_MAXMEM]),
12582 errors.ECODE_INVAL)
12584 if self.op.runtime_mem > current_memory:
12585 _CheckNodeFreeMemory(self, instance.primary_node,
12586 "ballooning memory for instance %s" %
12588 self.op.memory - current_memory,
12589 instance.hypervisor)
12591 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12592 raise errors.OpPrereqError("Disk operations not supported for"
12593 " diskless instances",
12594 errors.ECODE_INVAL)
12596 def _PrepareNicCreate(_, params, private):
12597 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12598 return (None, None)
12600 def _PrepareNicMod(_, nic, params, private):
12601 self._PrepareNicModification(params, private, nic.ip,
12602 nic.nicparams, cluster, pnode)
12605 # Verify NIC changes (operating on copy)
12606 nics = instance.nics[:]
12607 ApplyContainerMods("NIC", nics, None, self.nicmod,
12608 _PrepareNicCreate, _PrepareNicMod, None)
12609 if len(nics) > constants.MAX_NICS:
12610 raise errors.OpPrereqError("Instance has too many network interfaces"
12611 " (%d), cannot add more" % constants.MAX_NICS,
12612 errors.ECODE_STATE)
12614 # Verify disk changes (operating on a copy)
12615 disks = instance.disks[:]
12616 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12617 if len(disks) > constants.MAX_DISKS:
12618 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12619 " more" % constants.MAX_DISKS,
12620 errors.ECODE_STATE)
12622 if self.op.offline is not None:
12623 if self.op.offline:
12624 msg = "can't change to offline"
12626 msg = "can't change to online"
12627 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12629 # Pre-compute NIC changes (necessary to use result in hooks)
12630 self._nic_chgdesc = []
12632 # Operate on copies as this is still in prereq
12633 nics = [nic.Copy() for nic in instance.nics]
12634 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12635 self._CreateNewNic, self._ApplyNicMods, None)
12636 self._new_nics = nics
12638 self._new_nics = None
12640 def _ConvertPlainToDrbd(self, feedback_fn):
12641 """Converts an instance from plain to drbd.
12644 feedback_fn("Converting template to drbd")
12645 instance = self.instance
12646 pnode = instance.primary_node
12647 snode = self.op.remote_node
12649 assert instance.disk_template == constants.DT_PLAIN
12651 # create a fake disk info for _GenerateDiskTemplate
12652 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12653 constants.IDISK_VG: d.logical_id[0]}
12654 for d in instance.disks]
12655 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12656 instance.name, pnode, [snode],
12657 disk_info, None, None, 0, feedback_fn,
12659 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12661 info = _GetInstanceInfoText(instance)
12662 feedback_fn("Creating additional volumes...")
12663 # first, create the missing data and meta devices
12664 for disk in anno_disks:
12665 # unfortunately this is... not too nice
12666 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12668 for child in disk.children:
12669 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12670 # at this stage, all new LVs have been created, we can rename the
12672 feedback_fn("Renaming original volumes...")
12673 rename_list = [(o, n.children[0].logical_id)
12674 for (o, n) in zip(instance.disks, new_disks)]
12675 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12676 result.Raise("Failed to rename original LVs")
12678 feedback_fn("Initializing DRBD devices...")
12679 # all child devices are in place, we can now create the DRBD devices
12680 for disk in anno_disks:
12681 for node in [pnode, snode]:
12682 f_create = node == pnode
12683 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12685 # at this point, the instance has been modified
12686 instance.disk_template = constants.DT_DRBD8
12687 instance.disks = new_disks
12688 self.cfg.Update(instance, feedback_fn)
12690 # Release node locks while waiting for sync
12691 _ReleaseLocks(self, locking.LEVEL_NODE)
12693 # disks are created, waiting for sync
12694 disk_abort = not _WaitForSync(self, instance,
12695 oneshot=not self.op.wait_for_sync)
12697 raise errors.OpExecError("There are some degraded disks for"
12698 " this instance, please cleanup manually")
12700 # Node resource locks will be released by caller
12702 def _ConvertDrbdToPlain(self, feedback_fn):
12703 """Converts an instance from drbd to plain.
12706 instance = self.instance
12708 assert len(instance.secondary_nodes) == 1
12709 assert instance.disk_template == constants.DT_DRBD8
12711 pnode = instance.primary_node
12712 snode = instance.secondary_nodes[0]
12713 feedback_fn("Converting template to plain")
12715 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12716 new_disks = [d.children[0] for d in instance.disks]
12718 # copy over size and mode
12719 for parent, child in zip(old_disks, new_disks):
12720 child.size = parent.size
12721 child.mode = parent.mode
12723 # this is a DRBD disk, return its port to the pool
12724 # NOTE: this must be done right before the call to cfg.Update!
12725 for disk in old_disks:
12726 tcp_port = disk.logical_id[2]
12727 self.cfg.AddTcpUdpPort(tcp_port)
12729 # update instance structure
12730 instance.disks = new_disks
12731 instance.disk_template = constants.DT_PLAIN
12732 self.cfg.Update(instance, feedback_fn)
12734 # Release locks in case removing disks takes a while
12735 _ReleaseLocks(self, locking.LEVEL_NODE)
12737 feedback_fn("Removing volumes on the secondary node...")
12738 for disk in old_disks:
12739 self.cfg.SetDiskID(disk, snode)
12740 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12742 self.LogWarning("Could not remove block device %s on node %s,"
12743 " continuing anyway: %s", disk.iv_name, snode, msg)
12745 feedback_fn("Removing unneeded volumes on the primary node...")
12746 for idx, disk in enumerate(old_disks):
12747 meta = disk.children[1]
12748 self.cfg.SetDiskID(meta, pnode)
12749 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12751 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12752 " continuing anyway: %s", idx, pnode, msg)
12754 def _CreateNewDisk(self, idx, params, _):
12755 """Creates a new disk.
12758 instance = self.instance
12761 if instance.disk_template in constants.DTS_FILEBASED:
12762 (file_driver, file_path) = instance.disks[0].logical_id
12763 file_path = os.path.dirname(file_path)
12765 file_driver = file_path = None
12768 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12769 instance.primary_node, instance.secondary_nodes,
12770 [params], file_path, file_driver, idx,
12771 self.Log, self.diskparams)[0]
12773 info = _GetInstanceInfoText(instance)
12775 logging.info("Creating volume %s for instance %s",
12776 disk.iv_name, instance.name)
12777 # Note: this needs to be kept in sync with _CreateDisks
12779 for node in instance.all_nodes:
12780 f_create = (node == instance.primary_node)
12782 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12783 except errors.OpExecError, err:
12784 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12785 disk.iv_name, disk, node, err)
12788 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12792 def _ModifyDisk(idx, disk, params, _):
12793 """Modifies a disk.
12796 disk.mode = params[constants.IDISK_MODE]
12799 ("disk.mode/%d" % idx, disk.mode),
12802 def _RemoveDisk(self, idx, root, _):
12806 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12807 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12808 self.cfg.SetDiskID(disk, node)
12809 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12811 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12812 " continuing anyway", idx, node, msg)
12814 # if this is a DRBD disk, return its port to the pool
12815 if root.dev_type in constants.LDS_DRBD:
12816 self.cfg.AddTcpUdpPort(root.logical_id[2])
12819 def _CreateNewNic(idx, params, private):
12820 """Creates data structure for a new network interface.
12823 mac = params[constants.INIC_MAC]
12824 ip = params.get(constants.INIC_IP, None)
12825 nicparams = private.params
12827 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12829 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12830 (mac, ip, private.filled[constants.NIC_MODE],
12831 private.filled[constants.NIC_LINK])),
12835 def _ApplyNicMods(idx, nic, params, private):
12836 """Modifies a network interface.
12841 for key in [constants.INIC_MAC, constants.INIC_IP]:
12843 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12844 setattr(nic, key, params[key])
12847 nic.nicparams = private.params
12849 for (key, val) in params.items():
12850 changes.append(("nic.%s/%d" % (key, idx), val))
12854 def Exec(self, feedback_fn):
12855 """Modifies an instance.
12857 All parameters take effect only at the next restart of the instance.
12860 # Process here the warnings from CheckPrereq, as we don't have a
12861 # feedback_fn there.
12862 # TODO: Replace with self.LogWarning
12863 for warn in self.warn:
12864 feedback_fn("WARNING: %s" % warn)
12866 assert ((self.op.disk_template is None) ^
12867 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12868 "Not owning any node resource locks"
12871 instance = self.instance
12874 if self.op.runtime_mem:
12875 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12877 self.op.runtime_mem)
12878 rpcres.Raise("Cannot modify instance runtime memory")
12879 result.append(("runtime_memory", self.op.runtime_mem))
12881 # Apply disk changes
12882 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12883 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12884 _UpdateIvNames(0, instance.disks)
12886 if self.op.disk_template:
12888 check_nodes = set(instance.all_nodes)
12889 if self.op.remote_node:
12890 check_nodes.add(self.op.remote_node)
12891 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12892 owned = self.owned_locks(level)
12893 assert not (check_nodes - owned), \
12894 ("Not owning the correct locks, owning %r, expected at least %r" %
12895 (owned, check_nodes))
12897 r_shut = _ShutdownInstanceDisks(self, instance)
12899 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12900 " proceed with disk template conversion")
12901 mode = (instance.disk_template, self.op.disk_template)
12903 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12905 self.cfg.ReleaseDRBDMinors(instance.name)
12907 result.append(("disk_template", self.op.disk_template))
12909 assert instance.disk_template == self.op.disk_template, \
12910 ("Expected disk template '%s', found '%s'" %
12911 (self.op.disk_template, instance.disk_template))
12913 # Release node and resource locks if there are any (they might already have
12914 # been released during disk conversion)
12915 _ReleaseLocks(self, locking.LEVEL_NODE)
12916 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12918 # Apply NIC changes
12919 if self._new_nics is not None:
12920 instance.nics = self._new_nics
12921 result.extend(self._nic_chgdesc)
12924 if self.op.hvparams:
12925 instance.hvparams = self.hv_inst
12926 for key, val in self.op.hvparams.iteritems():
12927 result.append(("hv/%s" % key, val))
12930 if self.op.beparams:
12931 instance.beparams = self.be_inst
12932 for key, val in self.op.beparams.iteritems():
12933 result.append(("be/%s" % key, val))
12936 if self.op.os_name:
12937 instance.os = self.op.os_name
12940 if self.op.osparams:
12941 instance.osparams = self.os_inst
12942 for key, val in self.op.osparams.iteritems():
12943 result.append(("os/%s" % key, val))
12945 if self.op.offline is None:
12948 elif self.op.offline:
12949 # Mark instance as offline
12950 self.cfg.MarkInstanceOffline(instance.name)
12951 result.append(("admin_state", constants.ADMINST_OFFLINE))
12953 # Mark instance as online, but stopped
12954 self.cfg.MarkInstanceDown(instance.name)
12955 result.append(("admin_state", constants.ADMINST_DOWN))
12957 self.cfg.Update(instance, feedback_fn)
12959 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12960 self.owned_locks(locking.LEVEL_NODE)), \
12961 "All node locks should have been released by now"
12965 _DISK_CONVERSIONS = {
12966 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12967 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12971 class LUInstanceChangeGroup(LogicalUnit):
12972 HPATH = "instance-change-group"
12973 HTYPE = constants.HTYPE_INSTANCE
12976 def ExpandNames(self):
12977 self.share_locks = _ShareAll()
12978 self.needed_locks = {
12979 locking.LEVEL_NODEGROUP: [],
12980 locking.LEVEL_NODE: [],
12983 self._ExpandAndLockInstance()
12985 if self.op.target_groups:
12986 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12987 self.op.target_groups)
12989 self.req_target_uuids = None
12991 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12993 def DeclareLocks(self, level):
12994 if level == locking.LEVEL_NODEGROUP:
12995 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12997 if self.req_target_uuids:
12998 lock_groups = set(self.req_target_uuids)
13000 # Lock all groups used by instance optimistically; this requires going
13001 # via the node before it's locked, requiring verification later on
13002 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13003 lock_groups.update(instance_groups)
13005 # No target groups, need to lock all of them
13006 lock_groups = locking.ALL_SET
13008 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13010 elif level == locking.LEVEL_NODE:
13011 if self.req_target_uuids:
13012 # Lock all nodes used by instances
13013 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13014 self._LockInstancesNodes()
13016 # Lock all nodes in all potential target groups
13017 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13018 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13019 member_nodes = [node_name
13020 for group in lock_groups
13021 for node_name in self.cfg.GetNodeGroup(group).members]
13022 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13024 # Lock all nodes as all groups are potential targets
13025 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13027 def CheckPrereq(self):
13028 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13029 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13030 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13032 assert (self.req_target_uuids is None or
13033 owned_groups.issuperset(self.req_target_uuids))
13034 assert owned_instances == set([self.op.instance_name])
13036 # Get instance information
13037 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13039 # Check if node groups for locked instance are still correct
13040 assert owned_nodes.issuperset(self.instance.all_nodes), \
13041 ("Instance %s's nodes changed while we kept the lock" %
13042 self.op.instance_name)
13044 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13047 if self.req_target_uuids:
13048 # User requested specific target groups
13049 self.target_uuids = frozenset(self.req_target_uuids)
13051 # All groups except those used by the instance are potential targets
13052 self.target_uuids = owned_groups - inst_groups
13054 conflicting_groups = self.target_uuids & inst_groups
13055 if conflicting_groups:
13056 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13057 " used by the instance '%s'" %
13058 (utils.CommaJoin(conflicting_groups),
13059 self.op.instance_name),
13060 errors.ECODE_INVAL)
13062 if not self.target_uuids:
13063 raise errors.OpPrereqError("There are no possible target groups",
13064 errors.ECODE_INVAL)
13066 def BuildHooksEnv(self):
13067 """Build hooks env.
13070 assert self.target_uuids
13073 "TARGET_GROUPS": " ".join(self.target_uuids),
13076 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13080 def BuildHooksNodes(self):
13081 """Build hooks nodes.
13084 mn = self.cfg.GetMasterNode()
13085 return ([mn], [mn])
13087 def Exec(self, feedback_fn):
13088 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13090 assert instances == [self.op.instance_name], "Instance not locked"
13092 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13093 instances=instances, target_groups=list(self.target_uuids))
13095 ial.Run(self.op.iallocator)
13097 if not ial.success:
13098 raise errors.OpPrereqError("Can't compute solution for changing group of"
13099 " instance '%s' using iallocator '%s': %s" %
13100 (self.op.instance_name, self.op.iallocator,
13102 errors.ECODE_NORES)
13104 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13106 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13107 " instance '%s'", len(jobs), self.op.instance_name)
13109 return ResultWithJobs(jobs)
13112 class LUBackupQuery(NoHooksLU):
13113 """Query the exports list
13118 def CheckArguments(self):
13119 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13120 ["node", "export"], self.op.use_locking)
13122 def ExpandNames(self):
13123 self.expq.ExpandNames(self)
13125 def DeclareLocks(self, level):
13126 self.expq.DeclareLocks(self, level)
13128 def Exec(self, feedback_fn):
13131 for (node, expname) in self.expq.OldStyleQuery(self):
13132 if expname is None:
13133 result[node] = False
13135 result.setdefault(node, []).append(expname)
13140 class _ExportQuery(_QueryBase):
13141 FIELDS = query.EXPORT_FIELDS
13143 #: The node name is not a unique key for this query
13144 SORT_FIELD = "node"
13146 def ExpandNames(self, lu):
13147 lu.needed_locks = {}
13149 # The following variables interact with _QueryBase._GetNames
13151 self.wanted = _GetWantedNodes(lu, self.names)
13153 self.wanted = locking.ALL_SET
13155 self.do_locking = self.use_locking
13157 if self.do_locking:
13158 lu.share_locks = _ShareAll()
13159 lu.needed_locks = {
13160 locking.LEVEL_NODE: self.wanted,
13163 def DeclareLocks(self, lu, level):
13166 def _GetQueryData(self, lu):
13167 """Computes the list of nodes and their attributes.
13170 # Locking is not used
13172 assert not (compat.any(lu.glm.is_owned(level)
13173 for level in locking.LEVELS
13174 if level != locking.LEVEL_CLUSTER) or
13175 self.do_locking or self.use_locking)
13177 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13181 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13183 result.append((node, None))
13185 result.extend((node, expname) for expname in nres.payload)
13190 class LUBackupPrepare(NoHooksLU):
13191 """Prepares an instance for an export and returns useful information.
13196 def ExpandNames(self):
13197 self._ExpandAndLockInstance()
13199 def CheckPrereq(self):
13200 """Check prerequisites.
13203 instance_name = self.op.instance_name
13205 self.instance = self.cfg.GetInstanceInfo(instance_name)
13206 assert self.instance is not None, \
13207 "Cannot retrieve locked instance %s" % self.op.instance_name
13208 _CheckNodeOnline(self, self.instance.primary_node)
13210 self._cds = _GetClusterDomainSecret()
13212 def Exec(self, feedback_fn):
13213 """Prepares an instance for an export.
13216 instance = self.instance
13218 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13219 salt = utils.GenerateSecret(8)
13221 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13222 result = self.rpc.call_x509_cert_create(instance.primary_node,
13223 constants.RIE_CERT_VALIDITY)
13224 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13226 (name, cert_pem) = result.payload
13228 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13232 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13233 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13235 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13241 class LUBackupExport(LogicalUnit):
13242 """Export an instance to an image in the cluster.
13245 HPATH = "instance-export"
13246 HTYPE = constants.HTYPE_INSTANCE
13249 def CheckArguments(self):
13250 """Check the arguments.
13253 self.x509_key_name = self.op.x509_key_name
13254 self.dest_x509_ca_pem = self.op.destination_x509_ca
13256 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13257 if not self.x509_key_name:
13258 raise errors.OpPrereqError("Missing X509 key name for encryption",
13259 errors.ECODE_INVAL)
13261 if not self.dest_x509_ca_pem:
13262 raise errors.OpPrereqError("Missing destination X509 CA",
13263 errors.ECODE_INVAL)
13265 def ExpandNames(self):
13266 self._ExpandAndLockInstance()
13268 # Lock all nodes for local exports
13269 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13270 # FIXME: lock only instance primary and destination node
13272 # Sad but true, for now we have do lock all nodes, as we don't know where
13273 # the previous export might be, and in this LU we search for it and
13274 # remove it from its current node. In the future we could fix this by:
13275 # - making a tasklet to search (share-lock all), then create the
13276 # new one, then one to remove, after
13277 # - removing the removal operation altogether
13278 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13280 def DeclareLocks(self, level):
13281 """Last minute lock declaration."""
13282 # All nodes are locked anyway, so nothing to do here.
13284 def BuildHooksEnv(self):
13285 """Build hooks env.
13287 This will run on the master, primary node and target node.
13291 "EXPORT_MODE": self.op.mode,
13292 "EXPORT_NODE": self.op.target_node,
13293 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13294 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13295 # TODO: Generic function for boolean env variables
13296 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13299 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13303 def BuildHooksNodes(self):
13304 """Build hooks nodes.
13307 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13309 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13310 nl.append(self.op.target_node)
13314 def CheckPrereq(self):
13315 """Check prerequisites.
13317 This checks that the instance and node names are valid.
13320 instance_name = self.op.instance_name
13322 self.instance = self.cfg.GetInstanceInfo(instance_name)
13323 assert self.instance is not None, \
13324 "Cannot retrieve locked instance %s" % self.op.instance_name
13325 _CheckNodeOnline(self, self.instance.primary_node)
13327 if (self.op.remove_instance and
13328 self.instance.admin_state == constants.ADMINST_UP and
13329 not self.op.shutdown):
13330 raise errors.OpPrereqError("Can not remove instance without shutting it"
13333 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13334 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13335 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13336 assert self.dst_node is not None
13338 _CheckNodeOnline(self, self.dst_node.name)
13339 _CheckNodeNotDrained(self, self.dst_node.name)
13342 self.dest_disk_info = None
13343 self.dest_x509_ca = None
13345 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13346 self.dst_node = None
13348 if len(self.op.target_node) != len(self.instance.disks):
13349 raise errors.OpPrereqError(("Received destination information for %s"
13350 " disks, but instance %s has %s disks") %
13351 (len(self.op.target_node), instance_name,
13352 len(self.instance.disks)),
13353 errors.ECODE_INVAL)
13355 cds = _GetClusterDomainSecret()
13357 # Check X509 key name
13359 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13360 except (TypeError, ValueError), err:
13361 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13363 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13364 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13365 errors.ECODE_INVAL)
13367 # Load and verify CA
13369 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13370 except OpenSSL.crypto.Error, err:
13371 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13372 (err, ), errors.ECODE_INVAL)
13374 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13375 if errcode is not None:
13376 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13377 (msg, ), errors.ECODE_INVAL)
13379 self.dest_x509_ca = cert
13381 # Verify target information
13383 for idx, disk_data in enumerate(self.op.target_node):
13385 (host, port, magic) = \
13386 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13387 except errors.GenericError, err:
13388 raise errors.OpPrereqError("Target info for disk %s: %s" %
13389 (idx, err), errors.ECODE_INVAL)
13391 disk_info.append((host, port, magic))
13393 assert len(disk_info) == len(self.op.target_node)
13394 self.dest_disk_info = disk_info
13397 raise errors.ProgrammerError("Unhandled export mode %r" %
13400 # instance disk type verification
13401 # TODO: Implement export support for file-based disks
13402 for disk in self.instance.disks:
13403 if disk.dev_type == constants.LD_FILE:
13404 raise errors.OpPrereqError("Export not supported for instances with"
13405 " file-based disks", errors.ECODE_INVAL)
13407 def _CleanupExports(self, feedback_fn):
13408 """Removes exports of current instance from all other nodes.
13410 If an instance in a cluster with nodes A..D was exported to node C, its
13411 exports will be removed from the nodes A, B and D.
13414 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13416 nodelist = self.cfg.GetNodeList()
13417 nodelist.remove(self.dst_node.name)
13419 # on one-node clusters nodelist will be empty after the removal
13420 # if we proceed the backup would be removed because OpBackupQuery
13421 # substitutes an empty list with the full cluster node list.
13422 iname = self.instance.name
13424 feedback_fn("Removing old exports for instance %s" % iname)
13425 exportlist = self.rpc.call_export_list(nodelist)
13426 for node in exportlist:
13427 if exportlist[node].fail_msg:
13429 if iname in exportlist[node].payload:
13430 msg = self.rpc.call_export_remove(node, iname).fail_msg
13432 self.LogWarning("Could not remove older export for instance %s"
13433 " on node %s: %s", iname, node, msg)
13435 def Exec(self, feedback_fn):
13436 """Export an instance to an image in the cluster.
13439 assert self.op.mode in constants.EXPORT_MODES
13441 instance = self.instance
13442 src_node = instance.primary_node
13444 if self.op.shutdown:
13445 # shutdown the instance, but not the disks
13446 feedback_fn("Shutting down instance %s" % instance.name)
13447 result = self.rpc.call_instance_shutdown(src_node, instance,
13448 self.op.shutdown_timeout)
13449 # TODO: Maybe ignore failures if ignore_remove_failures is set
13450 result.Raise("Could not shutdown instance %s on"
13451 " node %s" % (instance.name, src_node))
13453 # set the disks ID correctly since call_instance_start needs the
13454 # correct drbd minor to create the symlinks
13455 for disk in instance.disks:
13456 self.cfg.SetDiskID(disk, src_node)
13458 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13461 # Activate the instance disks if we'exporting a stopped instance
13462 feedback_fn("Activating disks for %s" % instance.name)
13463 _StartInstanceDisks(self, instance, None)
13466 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13469 helper.CreateSnapshots()
13471 if (self.op.shutdown and
13472 instance.admin_state == constants.ADMINST_UP and
13473 not self.op.remove_instance):
13474 assert not activate_disks
13475 feedback_fn("Starting instance %s" % instance.name)
13476 result = self.rpc.call_instance_start(src_node,
13477 (instance, None, None), False)
13478 msg = result.fail_msg
13480 feedback_fn("Failed to start instance: %s" % msg)
13481 _ShutdownInstanceDisks(self, instance)
13482 raise errors.OpExecError("Could not start instance: %s" % msg)
13484 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13485 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13486 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13487 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13488 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13490 (key_name, _, _) = self.x509_key_name
13493 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13496 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13497 key_name, dest_ca_pem,
13502 # Check for backwards compatibility
13503 assert len(dresults) == len(instance.disks)
13504 assert compat.all(isinstance(i, bool) for i in dresults), \
13505 "Not all results are boolean: %r" % dresults
13509 feedback_fn("Deactivating disks for %s" % instance.name)
13510 _ShutdownInstanceDisks(self, instance)
13512 if not (compat.all(dresults) and fin_resu):
13515 failures.append("export finalization")
13516 if not compat.all(dresults):
13517 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13519 failures.append("disk export: disk(s) %s" % fdsk)
13521 raise errors.OpExecError("Export failed, errors in %s" %
13522 utils.CommaJoin(failures))
13524 # At this point, the export was successful, we can cleanup/finish
13526 # Remove instance if requested
13527 if self.op.remove_instance:
13528 feedback_fn("Removing instance %s" % instance.name)
13529 _RemoveInstance(self, feedback_fn, instance,
13530 self.op.ignore_remove_failures)
13532 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13533 self._CleanupExports(feedback_fn)
13535 return fin_resu, dresults
13538 class LUBackupRemove(NoHooksLU):
13539 """Remove exports related to the named instance.
13544 def ExpandNames(self):
13545 self.needed_locks = {}
13546 # We need all nodes to be locked in order for RemoveExport to work, but we
13547 # don't need to lock the instance itself, as nothing will happen to it (and
13548 # we can remove exports also for a removed instance)
13549 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13551 def Exec(self, feedback_fn):
13552 """Remove any export.
13555 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13556 # If the instance was not found we'll try with the name that was passed in.
13557 # This will only work if it was an FQDN, though.
13559 if not instance_name:
13561 instance_name = self.op.instance_name
13563 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13564 exportlist = self.rpc.call_export_list(locked_nodes)
13566 for node in exportlist:
13567 msg = exportlist[node].fail_msg
13569 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13571 if instance_name in exportlist[node].payload:
13573 result = self.rpc.call_export_remove(node, instance_name)
13574 msg = result.fail_msg
13576 logging.error("Could not remove export for instance %s"
13577 " on node %s: %s", instance_name, node, msg)
13579 if fqdn_warn and not found:
13580 feedback_fn("Export not found. If trying to remove an export belonging"
13581 " to a deleted instance please use its Fully Qualified"
13585 class LUGroupAdd(LogicalUnit):
13586 """Logical unit for creating node groups.
13589 HPATH = "group-add"
13590 HTYPE = constants.HTYPE_GROUP
13593 def ExpandNames(self):
13594 # We need the new group's UUID here so that we can create and acquire the
13595 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13596 # that it should not check whether the UUID exists in the configuration.
13597 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13598 self.needed_locks = {}
13599 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13601 def CheckPrereq(self):
13602 """Check prerequisites.
13604 This checks that the given group name is not an existing node group
13609 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13610 except errors.OpPrereqError:
13613 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13614 " node group (UUID: %s)" %
13615 (self.op.group_name, existing_uuid),
13616 errors.ECODE_EXISTS)
13618 if self.op.ndparams:
13619 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13621 if self.op.hv_state:
13622 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13624 self.new_hv_state = None
13626 if self.op.disk_state:
13627 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13629 self.new_disk_state = None
13631 if self.op.diskparams:
13632 for templ in constants.DISK_TEMPLATES:
13633 if templ in self.op.diskparams:
13634 utils.ForceDictType(self.op.diskparams[templ],
13635 constants.DISK_DT_TYPES)
13636 self.new_diskparams = self.op.diskparams
13638 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13639 except errors.OpPrereqError, err:
13640 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13641 errors.ECODE_INVAL)
13643 self.new_diskparams = {}
13645 if self.op.ipolicy:
13646 cluster = self.cfg.GetClusterInfo()
13647 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13649 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13650 except errors.ConfigurationError, err:
13651 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13652 errors.ECODE_INVAL)
13654 def BuildHooksEnv(self):
13655 """Build hooks env.
13659 "GROUP_NAME": self.op.group_name,
13662 def BuildHooksNodes(self):
13663 """Build hooks nodes.
13666 mn = self.cfg.GetMasterNode()
13667 return ([mn], [mn])
13669 def Exec(self, feedback_fn):
13670 """Add the node group to the cluster.
13673 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13674 uuid=self.group_uuid,
13675 alloc_policy=self.op.alloc_policy,
13676 ndparams=self.op.ndparams,
13677 diskparams=self.new_diskparams,
13678 ipolicy=self.op.ipolicy,
13679 hv_state_static=self.new_hv_state,
13680 disk_state_static=self.new_disk_state)
13682 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13683 del self.remove_locks[locking.LEVEL_NODEGROUP]
13686 class LUGroupAssignNodes(NoHooksLU):
13687 """Logical unit for assigning nodes to groups.
13692 def ExpandNames(self):
13693 # These raise errors.OpPrereqError on their own:
13694 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13695 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13697 # We want to lock all the affected nodes and groups. We have readily
13698 # available the list of nodes, and the *destination* group. To gather the
13699 # list of "source" groups, we need to fetch node information later on.
13700 self.needed_locks = {
13701 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13702 locking.LEVEL_NODE: self.op.nodes,
13705 def DeclareLocks(self, level):
13706 if level == locking.LEVEL_NODEGROUP:
13707 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13709 # Try to get all affected nodes' groups without having the group or node
13710 # lock yet. Needs verification later in the code flow.
13711 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13713 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13715 def CheckPrereq(self):
13716 """Check prerequisites.
13719 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13720 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13721 frozenset(self.op.nodes))
13723 expected_locks = (set([self.group_uuid]) |
13724 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13725 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13726 if actual_locks != expected_locks:
13727 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13728 " current groups are '%s', used to be '%s'" %
13729 (utils.CommaJoin(expected_locks),
13730 utils.CommaJoin(actual_locks)))
13732 self.node_data = self.cfg.GetAllNodesInfo()
13733 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13734 instance_data = self.cfg.GetAllInstancesInfo()
13736 if self.group is None:
13737 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13738 (self.op.group_name, self.group_uuid))
13740 (new_splits, previous_splits) = \
13741 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13742 for node in self.op.nodes],
13743 self.node_data, instance_data)
13746 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13748 if not self.op.force:
13749 raise errors.OpExecError("The following instances get split by this"
13750 " change and --force was not given: %s" %
13753 self.LogWarning("This operation will split the following instances: %s",
13756 if previous_splits:
13757 self.LogWarning("In addition, these already-split instances continue"
13758 " to be split across groups: %s",
13759 utils.CommaJoin(utils.NiceSort(previous_splits)))
13761 def Exec(self, feedback_fn):
13762 """Assign nodes to a new group.
13765 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13767 self.cfg.AssignGroupNodes(mods)
13770 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13771 """Check for split instances after a node assignment.
13773 This method considers a series of node assignments as an atomic operation,
13774 and returns information about split instances after applying the set of
13777 In particular, it returns information about newly split instances, and
13778 instances that were already split, and remain so after the change.
13780 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13783 @type changes: list of (node_name, new_group_uuid) pairs.
13784 @param changes: list of node assignments to consider.
13785 @param node_data: a dict with data for all nodes
13786 @param instance_data: a dict with all instances to consider
13787 @rtype: a two-tuple
13788 @return: a list of instances that were previously okay and result split as a
13789 consequence of this change, and a list of instances that were previously
13790 split and this change does not fix.
13793 changed_nodes = dict((node, group) for node, group in changes
13794 if node_data[node].group != group)
13796 all_split_instances = set()
13797 previously_split_instances = set()
13799 def InstanceNodes(instance):
13800 return [instance.primary_node] + list(instance.secondary_nodes)
13802 for inst in instance_data.values():
13803 if inst.disk_template not in constants.DTS_INT_MIRROR:
13806 instance_nodes = InstanceNodes(inst)
13808 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13809 previously_split_instances.add(inst.name)
13811 if len(set(changed_nodes.get(node, node_data[node].group)
13812 for node in instance_nodes)) > 1:
13813 all_split_instances.add(inst.name)
13815 return (list(all_split_instances - previously_split_instances),
13816 list(previously_split_instances & all_split_instances))
13819 class _GroupQuery(_QueryBase):
13820 FIELDS = query.GROUP_FIELDS
13822 def ExpandNames(self, lu):
13823 lu.needed_locks = {}
13825 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13826 self._cluster = lu.cfg.GetClusterInfo()
13827 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13830 self.wanted = [name_to_uuid[name]
13831 for name in utils.NiceSort(name_to_uuid.keys())]
13833 # Accept names to be either names or UUIDs.
13836 all_uuid = frozenset(self._all_groups.keys())
13838 for name in self.names:
13839 if name in all_uuid:
13840 self.wanted.append(name)
13841 elif name in name_to_uuid:
13842 self.wanted.append(name_to_uuid[name])
13844 missing.append(name)
13847 raise errors.OpPrereqError("Some groups do not exist: %s" %
13848 utils.CommaJoin(missing),
13849 errors.ECODE_NOENT)
13851 def DeclareLocks(self, lu, level):
13854 def _GetQueryData(self, lu):
13855 """Computes the list of node groups and their attributes.
13858 do_nodes = query.GQ_NODE in self.requested_data
13859 do_instances = query.GQ_INST in self.requested_data
13861 group_to_nodes = None
13862 group_to_instances = None
13864 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13865 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13866 # latter GetAllInstancesInfo() is not enough, for we have to go through
13867 # instance->node. Hence, we will need to process nodes even if we only need
13868 # instance information.
13869 if do_nodes or do_instances:
13870 all_nodes = lu.cfg.GetAllNodesInfo()
13871 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13874 for node in all_nodes.values():
13875 if node.group in group_to_nodes:
13876 group_to_nodes[node.group].append(node.name)
13877 node_to_group[node.name] = node.group
13880 all_instances = lu.cfg.GetAllInstancesInfo()
13881 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13883 for instance in all_instances.values():
13884 node = instance.primary_node
13885 if node in node_to_group:
13886 group_to_instances[node_to_group[node]].append(instance.name)
13889 # Do not pass on node information if it was not requested.
13890 group_to_nodes = None
13892 return query.GroupQueryData(self._cluster,
13893 [self._all_groups[uuid]
13894 for uuid in self.wanted],
13895 group_to_nodes, group_to_instances,
13896 query.GQ_DISKPARAMS in self.requested_data)
13899 class LUGroupQuery(NoHooksLU):
13900 """Logical unit for querying node groups.
13905 def CheckArguments(self):
13906 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13907 self.op.output_fields, False)
13909 def ExpandNames(self):
13910 self.gq.ExpandNames(self)
13912 def DeclareLocks(self, level):
13913 self.gq.DeclareLocks(self, level)
13915 def Exec(self, feedback_fn):
13916 return self.gq.OldStyleQuery(self)
13919 class LUGroupSetParams(LogicalUnit):
13920 """Modifies the parameters of a node group.
13923 HPATH = "group-modify"
13924 HTYPE = constants.HTYPE_GROUP
13927 def CheckArguments(self):
13930 self.op.diskparams,
13931 self.op.alloc_policy,
13933 self.op.disk_state,
13937 if all_changes.count(None) == len(all_changes):
13938 raise errors.OpPrereqError("Please pass at least one modification",
13939 errors.ECODE_INVAL)
13941 def ExpandNames(self):
13942 # This raises errors.OpPrereqError on its own:
13943 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13945 self.needed_locks = {
13946 locking.LEVEL_INSTANCE: [],
13947 locking.LEVEL_NODEGROUP: [self.group_uuid],
13950 self.share_locks[locking.LEVEL_INSTANCE] = 1
13952 def DeclareLocks(self, level):
13953 if level == locking.LEVEL_INSTANCE:
13954 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13956 # Lock instances optimistically, needs verification once group lock has
13958 self.needed_locks[locking.LEVEL_INSTANCE] = \
13959 self.cfg.GetNodeGroupInstances(self.group_uuid)
13962 def _UpdateAndVerifyDiskParams(old, new):
13963 """Updates and verifies disk parameters.
13966 new_params = _GetUpdatedParams(old, new)
13967 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13970 def CheckPrereq(self):
13971 """Check prerequisites.
13974 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13976 # Check if locked instances are still correct
13977 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13979 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13980 cluster = self.cfg.GetClusterInfo()
13982 if self.group is None:
13983 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13984 (self.op.group_name, self.group_uuid))
13986 if self.op.ndparams:
13987 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13988 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
13989 self.new_ndparams = new_ndparams
13991 if self.op.diskparams:
13992 diskparams = self.group.diskparams
13993 uavdp = self._UpdateAndVerifyDiskParams
13994 # For each disktemplate subdict update and verify the values
13995 new_diskparams = dict((dt,
13996 uavdp(diskparams.get(dt, {}),
13997 self.op.diskparams[dt]))
13998 for dt in constants.DISK_TEMPLATES
13999 if dt in self.op.diskparams)
14000 # As we've all subdicts of diskparams ready, lets merge the actual
14001 # dict with all updated subdicts
14002 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14004 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14005 except errors.OpPrereqError, err:
14006 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14007 errors.ECODE_INVAL)
14009 if self.op.hv_state:
14010 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14011 self.group.hv_state_static)
14013 if self.op.disk_state:
14014 self.new_disk_state = \
14015 _MergeAndVerifyDiskState(self.op.disk_state,
14016 self.group.disk_state_static)
14018 if self.op.ipolicy:
14019 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14023 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14024 inst_filter = lambda inst: inst.name in owned_instances
14025 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14027 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14029 new_ipolicy, instances)
14032 self.LogWarning("After the ipolicy change the following instances"
14033 " violate them: %s",
14034 utils.CommaJoin(violations))
14036 def BuildHooksEnv(self):
14037 """Build hooks env.
14041 "GROUP_NAME": self.op.group_name,
14042 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14045 def BuildHooksNodes(self):
14046 """Build hooks nodes.
14049 mn = self.cfg.GetMasterNode()
14050 return ([mn], [mn])
14052 def Exec(self, feedback_fn):
14053 """Modifies the node group.
14058 if self.op.ndparams:
14059 self.group.ndparams = self.new_ndparams
14060 result.append(("ndparams", str(self.group.ndparams)))
14062 if self.op.diskparams:
14063 self.group.diskparams = self.new_diskparams
14064 result.append(("diskparams", str(self.group.diskparams)))
14066 if self.op.alloc_policy:
14067 self.group.alloc_policy = self.op.alloc_policy
14069 if self.op.hv_state:
14070 self.group.hv_state_static = self.new_hv_state
14072 if self.op.disk_state:
14073 self.group.disk_state_static = self.new_disk_state
14075 if self.op.ipolicy:
14076 self.group.ipolicy = self.new_ipolicy
14078 self.cfg.Update(self.group, feedback_fn)
14082 class LUGroupRemove(LogicalUnit):
14083 HPATH = "group-remove"
14084 HTYPE = constants.HTYPE_GROUP
14087 def ExpandNames(self):
14088 # This will raises errors.OpPrereqError on its own:
14089 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14090 self.needed_locks = {
14091 locking.LEVEL_NODEGROUP: [self.group_uuid],
14094 def CheckPrereq(self):
14095 """Check prerequisites.
14097 This checks that the given group name exists as a node group, that is
14098 empty (i.e., contains no nodes), and that is not the last group of the
14102 # Verify that the group is empty.
14103 group_nodes = [node.name
14104 for node in self.cfg.GetAllNodesInfo().values()
14105 if node.group == self.group_uuid]
14108 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14110 (self.op.group_name,
14111 utils.CommaJoin(utils.NiceSort(group_nodes))),
14112 errors.ECODE_STATE)
14114 # Verify the cluster would not be left group-less.
14115 if len(self.cfg.GetNodeGroupList()) == 1:
14116 raise errors.OpPrereqError("Group '%s' is the only group,"
14117 " cannot be removed" %
14118 self.op.group_name,
14119 errors.ECODE_STATE)
14121 def BuildHooksEnv(self):
14122 """Build hooks env.
14126 "GROUP_NAME": self.op.group_name,
14129 def BuildHooksNodes(self):
14130 """Build hooks nodes.
14133 mn = self.cfg.GetMasterNode()
14134 return ([mn], [mn])
14136 def Exec(self, feedback_fn):
14137 """Remove the node group.
14141 self.cfg.RemoveNodeGroup(self.group_uuid)
14142 except errors.ConfigurationError:
14143 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14144 (self.op.group_name, self.group_uuid))
14146 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14149 class LUGroupRename(LogicalUnit):
14150 HPATH = "group-rename"
14151 HTYPE = constants.HTYPE_GROUP
14154 def ExpandNames(self):
14155 # This raises errors.OpPrereqError on its own:
14156 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14158 self.needed_locks = {
14159 locking.LEVEL_NODEGROUP: [self.group_uuid],
14162 def CheckPrereq(self):
14163 """Check prerequisites.
14165 Ensures requested new name is not yet used.
14169 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14170 except errors.OpPrereqError:
14173 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14174 " node group (UUID: %s)" %
14175 (self.op.new_name, new_name_uuid),
14176 errors.ECODE_EXISTS)
14178 def BuildHooksEnv(self):
14179 """Build hooks env.
14183 "OLD_NAME": self.op.group_name,
14184 "NEW_NAME": self.op.new_name,
14187 def BuildHooksNodes(self):
14188 """Build hooks nodes.
14191 mn = self.cfg.GetMasterNode()
14193 all_nodes = self.cfg.GetAllNodesInfo()
14194 all_nodes.pop(mn, None)
14197 run_nodes.extend(node.name for node in all_nodes.values()
14198 if node.group == self.group_uuid)
14200 return (run_nodes, run_nodes)
14202 def Exec(self, feedback_fn):
14203 """Rename the node group.
14206 group = self.cfg.GetNodeGroup(self.group_uuid)
14209 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14210 (self.op.group_name, self.group_uuid))
14212 group.name = self.op.new_name
14213 self.cfg.Update(group, feedback_fn)
14215 return self.op.new_name
14218 class LUGroupEvacuate(LogicalUnit):
14219 HPATH = "group-evacuate"
14220 HTYPE = constants.HTYPE_GROUP
14223 def ExpandNames(self):
14224 # This raises errors.OpPrereqError on its own:
14225 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14227 if self.op.target_groups:
14228 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14229 self.op.target_groups)
14231 self.req_target_uuids = []
14233 if self.group_uuid in self.req_target_uuids:
14234 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14235 " as a target group (targets are %s)" %
14237 utils.CommaJoin(self.req_target_uuids)),
14238 errors.ECODE_INVAL)
14240 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14242 self.share_locks = _ShareAll()
14243 self.needed_locks = {
14244 locking.LEVEL_INSTANCE: [],
14245 locking.LEVEL_NODEGROUP: [],
14246 locking.LEVEL_NODE: [],
14249 def DeclareLocks(self, level):
14250 if level == locking.LEVEL_INSTANCE:
14251 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14253 # Lock instances optimistically, needs verification once node and group
14254 # locks have been acquired
14255 self.needed_locks[locking.LEVEL_INSTANCE] = \
14256 self.cfg.GetNodeGroupInstances(self.group_uuid)
14258 elif level == locking.LEVEL_NODEGROUP:
14259 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14261 if self.req_target_uuids:
14262 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14264 # Lock all groups used by instances optimistically; this requires going
14265 # via the node before it's locked, requiring verification later on
14266 lock_groups.update(group_uuid
14267 for instance_name in
14268 self.owned_locks(locking.LEVEL_INSTANCE)
14270 self.cfg.GetInstanceNodeGroups(instance_name))
14272 # No target groups, need to lock all of them
14273 lock_groups = locking.ALL_SET
14275 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14277 elif level == locking.LEVEL_NODE:
14278 # This will only lock the nodes in the group to be evacuated which
14279 # contain actual instances
14280 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14281 self._LockInstancesNodes()
14283 # Lock all nodes in group to be evacuated and target groups
14284 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14285 assert self.group_uuid in owned_groups
14286 member_nodes = [node_name
14287 for group in owned_groups
14288 for node_name in self.cfg.GetNodeGroup(group).members]
14289 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14291 def CheckPrereq(self):
14292 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14293 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14294 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14296 assert owned_groups.issuperset(self.req_target_uuids)
14297 assert self.group_uuid in owned_groups
14299 # Check if locked instances are still correct
14300 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14302 # Get instance information
14303 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14305 # Check if node groups for locked instances are still correct
14306 _CheckInstancesNodeGroups(self.cfg, self.instances,
14307 owned_groups, owned_nodes, self.group_uuid)
14309 if self.req_target_uuids:
14310 # User requested specific target groups
14311 self.target_uuids = self.req_target_uuids
14313 # All groups except the one to be evacuated are potential targets
14314 self.target_uuids = [group_uuid for group_uuid in owned_groups
14315 if group_uuid != self.group_uuid]
14317 if not self.target_uuids:
14318 raise errors.OpPrereqError("There are no possible target groups",
14319 errors.ECODE_INVAL)
14321 def BuildHooksEnv(self):
14322 """Build hooks env.
14326 "GROUP_NAME": self.op.group_name,
14327 "TARGET_GROUPS": " ".join(self.target_uuids),
14330 def BuildHooksNodes(self):
14331 """Build hooks nodes.
14334 mn = self.cfg.GetMasterNode()
14336 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14338 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14340 return (run_nodes, run_nodes)
14342 def Exec(self, feedback_fn):
14343 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14345 assert self.group_uuid not in self.target_uuids
14347 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14348 instances=instances, target_groups=self.target_uuids)
14350 ial.Run(self.op.iallocator)
14352 if not ial.success:
14353 raise errors.OpPrereqError("Can't compute group evacuation using"
14354 " iallocator '%s': %s" %
14355 (self.op.iallocator, ial.info),
14356 errors.ECODE_NORES)
14358 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14360 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14361 len(jobs), self.op.group_name)
14363 return ResultWithJobs(jobs)
14366 class TagsLU(NoHooksLU): # pylint: disable=W0223
14367 """Generic tags LU.
14369 This is an abstract class which is the parent of all the other tags LUs.
14372 def ExpandNames(self):
14373 self.group_uuid = None
14374 self.needed_locks = {}
14376 if self.op.kind == constants.TAG_NODE:
14377 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14378 lock_level = locking.LEVEL_NODE
14379 lock_name = self.op.name
14380 elif self.op.kind == constants.TAG_INSTANCE:
14381 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14382 lock_level = locking.LEVEL_INSTANCE
14383 lock_name = self.op.name
14384 elif self.op.kind == constants.TAG_NODEGROUP:
14385 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14386 lock_level = locking.LEVEL_NODEGROUP
14387 lock_name = self.group_uuid
14392 if lock_level and getattr(self.op, "use_locking", True):
14393 self.needed_locks[lock_level] = lock_name
14395 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14396 # not possible to acquire the BGL based on opcode parameters)
14398 def CheckPrereq(self):
14399 """Check prerequisites.
14402 if self.op.kind == constants.TAG_CLUSTER:
14403 self.target = self.cfg.GetClusterInfo()
14404 elif self.op.kind == constants.TAG_NODE:
14405 self.target = self.cfg.GetNodeInfo(self.op.name)
14406 elif self.op.kind == constants.TAG_INSTANCE:
14407 self.target = self.cfg.GetInstanceInfo(self.op.name)
14408 elif self.op.kind == constants.TAG_NODEGROUP:
14409 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14411 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14412 str(self.op.kind), errors.ECODE_INVAL)
14415 class LUTagsGet(TagsLU):
14416 """Returns the tags of a given object.
14421 def ExpandNames(self):
14422 TagsLU.ExpandNames(self)
14424 # Share locks as this is only a read operation
14425 self.share_locks = _ShareAll()
14427 def Exec(self, feedback_fn):
14428 """Returns the tag list.
14431 return list(self.target.GetTags())
14434 class LUTagsSearch(NoHooksLU):
14435 """Searches the tags for a given pattern.
14440 def ExpandNames(self):
14441 self.needed_locks = {}
14443 def CheckPrereq(self):
14444 """Check prerequisites.
14446 This checks the pattern passed for validity by compiling it.
14450 self.re = re.compile(self.op.pattern)
14451 except re.error, err:
14452 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14453 (self.op.pattern, err), errors.ECODE_INVAL)
14455 def Exec(self, feedback_fn):
14456 """Returns the tag list.
14460 tgts = [("/cluster", cfg.GetClusterInfo())]
14461 ilist = cfg.GetAllInstancesInfo().values()
14462 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14463 nlist = cfg.GetAllNodesInfo().values()
14464 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14465 tgts.extend(("/nodegroup/%s" % n.name, n)
14466 for n in cfg.GetAllNodeGroupsInfo().values())
14468 for path, target in tgts:
14469 for tag in target.GetTags():
14470 if self.re.search(tag):
14471 results.append((path, tag))
14475 class LUTagsSet(TagsLU):
14476 """Sets a tag on a given object.
14481 def CheckPrereq(self):
14482 """Check prerequisites.
14484 This checks the type and length of the tag name and value.
14487 TagsLU.CheckPrereq(self)
14488 for tag in self.op.tags:
14489 objects.TaggableObject.ValidateTag(tag)
14491 def Exec(self, feedback_fn):
14496 for tag in self.op.tags:
14497 self.target.AddTag(tag)
14498 except errors.TagError, err:
14499 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14500 self.cfg.Update(self.target, feedback_fn)
14503 class LUTagsDel(TagsLU):
14504 """Delete a list of tags from a given object.
14509 def CheckPrereq(self):
14510 """Check prerequisites.
14512 This checks that we have the given tag.
14515 TagsLU.CheckPrereq(self)
14516 for tag in self.op.tags:
14517 objects.TaggableObject.ValidateTag(tag)
14518 del_tags = frozenset(self.op.tags)
14519 cur_tags = self.target.GetTags()
14521 diff_tags = del_tags - cur_tags
14523 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14524 raise errors.OpPrereqError("Tag(s) %s not found" %
14525 (utils.CommaJoin(diff_names), ),
14526 errors.ECODE_NOENT)
14528 def Exec(self, feedback_fn):
14529 """Remove the tag from the object.
14532 for tag in self.op.tags:
14533 self.target.RemoveTag(tag)
14534 self.cfg.Update(self.target, feedback_fn)
14537 class LUTestDelay(NoHooksLU):
14538 """Sleep for a specified amount of time.
14540 This LU sleeps on the master and/or nodes for a specified amount of
14546 def ExpandNames(self):
14547 """Expand names and set required locks.
14549 This expands the node list, if any.
14552 self.needed_locks = {}
14553 if self.op.on_nodes:
14554 # _GetWantedNodes can be used here, but is not always appropriate to use
14555 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14556 # more information.
14557 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14558 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14560 def _TestDelay(self):
14561 """Do the actual sleep.
14564 if self.op.on_master:
14565 if not utils.TestDelay(self.op.duration):
14566 raise errors.OpExecError("Error during master delay test")
14567 if self.op.on_nodes:
14568 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14569 for node, node_result in result.items():
14570 node_result.Raise("Failure during rpc call to node %s" % node)
14572 def Exec(self, feedback_fn):
14573 """Execute the test delay opcode, with the wanted repetitions.
14576 if self.op.repeat == 0:
14579 top_value = self.op.repeat - 1
14580 for i in range(self.op.repeat):
14581 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14585 class LUTestJqueue(NoHooksLU):
14586 """Utility LU to test some aspects of the job queue.
14591 # Must be lower than default timeout for WaitForJobChange to see whether it
14592 # notices changed jobs
14593 _CLIENT_CONNECT_TIMEOUT = 20.0
14594 _CLIENT_CONFIRM_TIMEOUT = 60.0
14597 def _NotifyUsingSocket(cls, cb, errcls):
14598 """Opens a Unix socket and waits for another program to connect.
14601 @param cb: Callback to send socket name to client
14602 @type errcls: class
14603 @param errcls: Exception class to use for errors
14606 # Using a temporary directory as there's no easy way to create temporary
14607 # sockets without writing a custom loop around tempfile.mktemp and
14609 tmpdir = tempfile.mkdtemp()
14611 tmpsock = utils.PathJoin(tmpdir, "sock")
14613 logging.debug("Creating temporary socket at %s", tmpsock)
14614 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14619 # Send details to client
14622 # Wait for client to connect before continuing
14623 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14625 (conn, _) = sock.accept()
14626 except socket.error, err:
14627 raise errcls("Client didn't connect in time (%s)" % err)
14631 # Remove as soon as client is connected
14632 shutil.rmtree(tmpdir)
14634 # Wait for client to close
14637 # pylint: disable=E1101
14638 # Instance of '_socketobject' has no ... member
14639 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14641 except socket.error, err:
14642 raise errcls("Client failed to confirm notification (%s)" % err)
14646 def _SendNotification(self, test, arg, sockname):
14647 """Sends a notification to the client.
14650 @param test: Test name
14651 @param arg: Test argument (depends on test)
14652 @type sockname: string
14653 @param sockname: Socket path
14656 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14658 def _Notify(self, prereq, test, arg):
14659 """Notifies the client of a test.
14662 @param prereq: Whether this is a prereq-phase test
14664 @param test: Test name
14665 @param arg: Test argument (depends on test)
14669 errcls = errors.OpPrereqError
14671 errcls = errors.OpExecError
14673 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14677 def CheckArguments(self):
14678 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14679 self.expandnames_calls = 0
14681 def ExpandNames(self):
14682 checkargs_calls = getattr(self, "checkargs_calls", 0)
14683 if checkargs_calls < 1:
14684 raise errors.ProgrammerError("CheckArguments was not called")
14686 self.expandnames_calls += 1
14688 if self.op.notify_waitlock:
14689 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14691 self.LogInfo("Expanding names")
14693 # Get lock on master node (just to get a lock, not for a particular reason)
14694 self.needed_locks = {
14695 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14698 def Exec(self, feedback_fn):
14699 if self.expandnames_calls < 1:
14700 raise errors.ProgrammerError("ExpandNames was not called")
14702 if self.op.notify_exec:
14703 self._Notify(False, constants.JQT_EXEC, None)
14705 self.LogInfo("Executing")
14707 if self.op.log_messages:
14708 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14709 for idx, msg in enumerate(self.op.log_messages):
14710 self.LogInfo("Sending log message %s", idx + 1)
14711 feedback_fn(constants.JQT_MSGPREFIX + msg)
14712 # Report how many test messages have been sent
14713 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14716 raise errors.OpExecError("Opcode failure was requested")
14721 class IAllocator(object):
14722 """IAllocator framework.
14724 An IAllocator instance has three sets of attributes:
14725 - cfg that is needed to query the cluster
14726 - input data (all members of the _KEYS class attribute are required)
14727 - four buffer attributes (in|out_data|text), that represent the
14728 input (to the external script) in text and data structure format,
14729 and the output from it, again in two formats
14730 - the result variables from the script (success, info, nodes) for
14734 # pylint: disable=R0902
14735 # lots of instance attributes
14737 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14739 self.rpc = rpc_runner
14740 # init buffer variables
14741 self.in_text = self.out_text = self.in_data = self.out_data = None
14742 # init all input fields so that pylint is happy
14744 self.memory = self.disks = self.disk_template = self.spindle_use = None
14745 self.os = self.tags = self.nics = self.vcpus = None
14746 self.hypervisor = None
14747 self.relocate_from = None
14749 self.instances = None
14750 self.evac_mode = None
14751 self.target_groups = []
14753 self.required_nodes = None
14754 # init result fields
14755 self.success = self.info = self.result = None
14758 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14760 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14761 " IAllocator" % self.mode)
14763 keyset = [n for (n, _) in keydata]
14766 if key not in keyset:
14767 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14768 " IAllocator" % key)
14769 setattr(self, key, kwargs[key])
14772 if key not in kwargs:
14773 raise errors.ProgrammerError("Missing input parameter '%s' to"
14774 " IAllocator" % key)
14775 self._BuildInputData(compat.partial(fn, self), keydata)
14777 def _ComputeClusterData(self):
14778 """Compute the generic allocator input data.
14780 This is the data that is independent of the actual operation.
14784 cluster_info = cfg.GetClusterInfo()
14787 "version": constants.IALLOCATOR_VERSION,
14788 "cluster_name": cfg.GetClusterName(),
14789 "cluster_tags": list(cluster_info.GetTags()),
14790 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14791 "ipolicy": cluster_info.ipolicy,
14793 ninfo = cfg.GetAllNodesInfo()
14794 iinfo = cfg.GetAllInstancesInfo().values()
14795 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14798 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14800 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14801 hypervisor_name = self.hypervisor
14802 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14803 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14805 hypervisor_name = cluster_info.primary_hypervisor
14807 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14810 self.rpc.call_all_instances_info(node_list,
14811 cluster_info.enabled_hypervisors)
14813 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14815 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14816 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14817 i_list, config_ndata)
14818 assert len(data["nodes"]) == len(ninfo), \
14819 "Incomplete node data computed"
14821 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14823 self.in_data = data
14826 def _ComputeNodeGroupData(cfg):
14827 """Compute node groups data.
14830 cluster = cfg.GetClusterInfo()
14831 ng = dict((guuid, {
14832 "name": gdata.name,
14833 "alloc_policy": gdata.alloc_policy,
14834 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14836 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14841 def _ComputeBasicNodeData(cfg, node_cfg):
14842 """Compute global node data.
14845 @returns: a dict of name: (node dict, node config)
14848 # fill in static (config-based) values
14849 node_results = dict((ninfo.name, {
14850 "tags": list(ninfo.GetTags()),
14851 "primary_ip": ninfo.primary_ip,
14852 "secondary_ip": ninfo.secondary_ip,
14853 "offline": ninfo.offline,
14854 "drained": ninfo.drained,
14855 "master_candidate": ninfo.master_candidate,
14856 "group": ninfo.group,
14857 "master_capable": ninfo.master_capable,
14858 "vm_capable": ninfo.vm_capable,
14859 "ndparams": cfg.GetNdParams(ninfo),
14861 for ninfo in node_cfg.values())
14863 return node_results
14866 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14868 """Compute global node data.
14870 @param node_results: the basic node structures as filled from the config
14873 #TODO(dynmem): compute the right data on MAX and MIN memory
14874 # make a copy of the current dict
14875 node_results = dict(node_results)
14876 for nname, nresult in node_data.items():
14877 assert nname in node_results, "Missing basic data for node %s" % nname
14878 ninfo = node_cfg[nname]
14880 if not (ninfo.offline or ninfo.drained):
14881 nresult.Raise("Can't get data for node %s" % nname)
14882 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14884 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14886 for attr in ["memory_total", "memory_free", "memory_dom0",
14887 "vg_size", "vg_free", "cpu_total"]:
14888 if attr not in remote_info:
14889 raise errors.OpExecError("Node '%s' didn't return attribute"
14890 " '%s'" % (nname, attr))
14891 if not isinstance(remote_info[attr], int):
14892 raise errors.OpExecError("Node '%s' returned invalid value"
14894 (nname, attr, remote_info[attr]))
14895 # compute memory used by primary instances
14896 i_p_mem = i_p_up_mem = 0
14897 for iinfo, beinfo in i_list:
14898 if iinfo.primary_node == nname:
14899 i_p_mem += beinfo[constants.BE_MAXMEM]
14900 if iinfo.name not in node_iinfo[nname].payload:
14903 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14904 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14905 remote_info["memory_free"] -= max(0, i_mem_diff)
14907 if iinfo.admin_state == constants.ADMINST_UP:
14908 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14910 # compute memory used by instances
14912 "total_memory": remote_info["memory_total"],
14913 "reserved_memory": remote_info["memory_dom0"],
14914 "free_memory": remote_info["memory_free"],
14915 "total_disk": remote_info["vg_size"],
14916 "free_disk": remote_info["vg_free"],
14917 "total_cpus": remote_info["cpu_total"],
14918 "i_pri_memory": i_p_mem,
14919 "i_pri_up_memory": i_p_up_mem,
14921 pnr_dyn.update(node_results[nname])
14922 node_results[nname] = pnr_dyn
14924 return node_results
14927 def _ComputeInstanceData(cluster_info, i_list):
14928 """Compute global instance data.
14932 for iinfo, beinfo in i_list:
14934 for nic in iinfo.nics:
14935 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14939 "mode": filled_params[constants.NIC_MODE],
14940 "link": filled_params[constants.NIC_LINK],
14942 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14943 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14944 nic_data.append(nic_dict)
14946 "tags": list(iinfo.GetTags()),
14947 "admin_state": iinfo.admin_state,
14948 "vcpus": beinfo[constants.BE_VCPUS],
14949 "memory": beinfo[constants.BE_MAXMEM],
14950 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14952 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14954 "disks": [{constants.IDISK_SIZE: dsk.size,
14955 constants.IDISK_MODE: dsk.mode}
14956 for dsk in iinfo.disks],
14957 "disk_template": iinfo.disk_template,
14958 "hypervisor": iinfo.hypervisor,
14960 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14962 instance_data[iinfo.name] = pir
14964 return instance_data
14966 def _AddNewInstance(self):
14967 """Add new instance data to allocator structure.
14969 This in combination with _AllocatorGetClusterData will create the
14970 correct structure needed as input for the allocator.
14972 The checks for the completeness of the opcode must have already been
14976 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14978 if self.disk_template in constants.DTS_INT_MIRROR:
14979 self.required_nodes = 2
14981 self.required_nodes = 1
14985 "disk_template": self.disk_template,
14988 "vcpus": self.vcpus,
14989 "memory": self.memory,
14990 "spindle_use": self.spindle_use,
14991 "disks": self.disks,
14992 "disk_space_total": disk_space,
14994 "required_nodes": self.required_nodes,
14995 "hypervisor": self.hypervisor,
15000 def _AddRelocateInstance(self):
15001 """Add relocate instance data to allocator structure.
15003 This in combination with _IAllocatorGetClusterData will create the
15004 correct structure needed as input for the allocator.
15006 The checks for the completeness of the opcode must have already been
15010 instance = self.cfg.GetInstanceInfo(self.name)
15011 if instance is None:
15012 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15013 " IAllocator" % self.name)
15015 if instance.disk_template not in constants.DTS_MIRRORED:
15016 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15017 errors.ECODE_INVAL)
15019 if instance.disk_template in constants.DTS_INT_MIRROR and \
15020 len(instance.secondary_nodes) != 1:
15021 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15022 errors.ECODE_STATE)
15024 self.required_nodes = 1
15025 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15026 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15030 "disk_space_total": disk_space,
15031 "required_nodes": self.required_nodes,
15032 "relocate_from": self.relocate_from,
15036 def _AddNodeEvacuate(self):
15037 """Get data for node-evacuate requests.
15041 "instances": self.instances,
15042 "evac_mode": self.evac_mode,
15045 def _AddChangeGroup(self):
15046 """Get data for node-evacuate requests.
15050 "instances": self.instances,
15051 "target_groups": self.target_groups,
15054 def _BuildInputData(self, fn, keydata):
15055 """Build input data structures.
15058 self._ComputeClusterData()
15061 request["type"] = self.mode
15062 for keyname, keytype in keydata:
15063 if keyname not in request:
15064 raise errors.ProgrammerError("Request parameter %s is missing" %
15066 val = request[keyname]
15067 if not keytype(val):
15068 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15069 " validation, value %s, expected"
15070 " type %s" % (keyname, val, keytype))
15071 self.in_data["request"] = request
15073 self.in_text = serializer.Dump(self.in_data)
15075 _STRING_LIST = ht.TListOf(ht.TString)
15076 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15077 # pylint: disable=E1101
15078 # Class '...' has no 'OP_ID' member
15079 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15080 opcodes.OpInstanceMigrate.OP_ID,
15081 opcodes.OpInstanceReplaceDisks.OP_ID])
15085 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15086 ht.TItems([ht.TNonEmptyString,
15087 ht.TNonEmptyString,
15088 ht.TListOf(ht.TNonEmptyString),
15091 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15092 ht.TItems([ht.TNonEmptyString,
15095 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15096 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15099 constants.IALLOCATOR_MODE_ALLOC:
15102 ("name", ht.TString),
15103 ("memory", ht.TInt),
15104 ("spindle_use", ht.TInt),
15105 ("disks", ht.TListOf(ht.TDict)),
15106 ("disk_template", ht.TString),
15107 ("os", ht.TString),
15108 ("tags", _STRING_LIST),
15109 ("nics", ht.TListOf(ht.TDict)),
15110 ("vcpus", ht.TInt),
15111 ("hypervisor", ht.TString),
15113 constants.IALLOCATOR_MODE_RELOC:
15114 (_AddRelocateInstance,
15115 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15117 constants.IALLOCATOR_MODE_NODE_EVAC:
15118 (_AddNodeEvacuate, [
15119 ("instances", _STRING_LIST),
15120 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15122 constants.IALLOCATOR_MODE_CHG_GROUP:
15123 (_AddChangeGroup, [
15124 ("instances", _STRING_LIST),
15125 ("target_groups", _STRING_LIST),
15129 def Run(self, name, validate=True, call_fn=None):
15130 """Run an instance allocator and return the results.
15133 if call_fn is None:
15134 call_fn = self.rpc.call_iallocator_runner
15136 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15137 result.Raise("Failure while running the iallocator script")
15139 self.out_text = result.payload
15141 self._ValidateResult()
15143 def _ValidateResult(self):
15144 """Process the allocator results.
15146 This will process and if successful save the result in
15147 self.out_data and the other parameters.
15151 rdict = serializer.Load(self.out_text)
15152 except Exception, err:
15153 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15155 if not isinstance(rdict, dict):
15156 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15158 # TODO: remove backwards compatiblity in later versions
15159 if "nodes" in rdict and "result" not in rdict:
15160 rdict["result"] = rdict["nodes"]
15163 for key in "success", "info", "result":
15164 if key not in rdict:
15165 raise errors.OpExecError("Can't parse iallocator results:"
15166 " missing key '%s'" % key)
15167 setattr(self, key, rdict[key])
15169 if not self._result_check(self.result):
15170 raise errors.OpExecError("Iallocator returned invalid result,"
15171 " expected %s, got %s" %
15172 (self._result_check, self.result),
15173 errors.ECODE_INVAL)
15175 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15176 assert self.relocate_from is not None
15177 assert self.required_nodes == 1
15179 node2group = dict((name, ndata["group"])
15180 for (name, ndata) in self.in_data["nodes"].items())
15182 fn = compat.partial(self._NodesToGroups, node2group,
15183 self.in_data["nodegroups"])
15185 instance = self.cfg.GetInstanceInfo(self.name)
15186 request_groups = fn(self.relocate_from + [instance.primary_node])
15187 result_groups = fn(rdict["result"] + [instance.primary_node])
15189 if self.success and not set(result_groups).issubset(request_groups):
15190 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15191 " differ from original groups (%s)" %
15192 (utils.CommaJoin(result_groups),
15193 utils.CommaJoin(request_groups)))
15195 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15196 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15198 self.out_data = rdict
15201 def _NodesToGroups(node2group, groups, nodes):
15202 """Returns a list of unique group names for a list of nodes.
15204 @type node2group: dict
15205 @param node2group: Map from node name to group UUID
15207 @param groups: Group information
15209 @param nodes: Node names
15216 group_uuid = node2group[node]
15218 # Ignore unknown node
15222 group = groups[group_uuid]
15224 # Can't find group, let's use UUID
15225 group_name = group_uuid
15227 group_name = group["name"]
15229 result.add(group_name)
15231 return sorted(result)
15234 class LUTestAllocator(NoHooksLU):
15235 """Run allocator tests.
15237 This LU runs the allocator tests
15240 def CheckPrereq(self):
15241 """Check prerequisites.
15243 This checks the opcode parameters depending on the director and mode test.
15246 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15247 for attr in ["memory", "disks", "disk_template",
15248 "os", "tags", "nics", "vcpus"]:
15249 if not hasattr(self.op, attr):
15250 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15251 attr, errors.ECODE_INVAL)
15252 iname = self.cfg.ExpandInstanceName(self.op.name)
15253 if iname is not None:
15254 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15255 iname, errors.ECODE_EXISTS)
15256 if not isinstance(self.op.nics, list):
15257 raise errors.OpPrereqError("Invalid parameter 'nics'",
15258 errors.ECODE_INVAL)
15259 if not isinstance(self.op.disks, list):
15260 raise errors.OpPrereqError("Invalid parameter 'disks'",
15261 errors.ECODE_INVAL)
15262 for row in self.op.disks:
15263 if (not isinstance(row, dict) or
15264 constants.IDISK_SIZE not in row or
15265 not isinstance(row[constants.IDISK_SIZE], int) or
15266 constants.IDISK_MODE not in row or
15267 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15268 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15269 " parameter", errors.ECODE_INVAL)
15270 if self.op.hypervisor is None:
15271 self.op.hypervisor = self.cfg.GetHypervisorType()
15272 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15273 fname = _ExpandInstanceName(self.cfg, self.op.name)
15274 self.op.name = fname
15275 self.relocate_from = \
15276 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15277 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15278 constants.IALLOCATOR_MODE_NODE_EVAC):
15279 if not self.op.instances:
15280 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15281 self.op.instances = _GetWantedInstances(self, self.op.instances)
15283 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15284 self.op.mode, errors.ECODE_INVAL)
15286 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15287 if self.op.allocator is None:
15288 raise errors.OpPrereqError("Missing allocator name",
15289 errors.ECODE_INVAL)
15290 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15291 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15292 self.op.direction, errors.ECODE_INVAL)
15294 def Exec(self, feedback_fn):
15295 """Run the allocator test.
15298 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15299 ial = IAllocator(self.cfg, self.rpc,
15302 memory=self.op.memory,
15303 disks=self.op.disks,
15304 disk_template=self.op.disk_template,
15308 vcpus=self.op.vcpus,
15309 hypervisor=self.op.hypervisor,
15310 spindle_use=self.op.spindle_use,
15312 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15313 ial = IAllocator(self.cfg, self.rpc,
15316 relocate_from=list(self.relocate_from),
15318 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15319 ial = IAllocator(self.cfg, self.rpc,
15321 instances=self.op.instances,
15322 target_groups=self.op.target_groups)
15323 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15324 ial = IAllocator(self.cfg, self.rpc,
15326 instances=self.op.instances,
15327 evac_mode=self.op.evac_mode)
15329 raise errors.ProgrammerError("Uncatched mode %s in"
15330 " LUTestAllocator.Exec", self.op.mode)
15332 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15333 result = ial.in_text
15335 ial.Run(self.op.allocator, validate=False)
15336 result = ial.out_text
15340 #: Query type implementations
15342 constants.QR_CLUSTER: _ClusterQuery,
15343 constants.QR_INSTANCE: _InstanceQuery,
15344 constants.QR_NODE: _NodeQuery,
15345 constants.QR_GROUP: _GroupQuery,
15346 constants.QR_OS: _OsQuery,
15347 constants.QR_EXPORT: _ExportQuery,
15350 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15353 def _GetQueryImplementation(name):
15354 """Returns the implemtnation for a query type.
15356 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15360 return _QUERY_IMPL[name]
15362 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15363 errors.ECODE_INVAL)