4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _CopyLockList(names):
707 """Makes a copy of a list of lock names.
709 Handles L{locking.ALL_SET} correctly.
712 if names == locking.ALL_SET:
713 return locking.ALL_SET
718 def _GetWantedNodes(lu, nodes):
719 """Returns list of checked and expanded node names.
721 @type lu: L{LogicalUnit}
722 @param lu: the logical unit on whose behalf we execute
724 @param nodes: list of node names or None for all nodes
726 @return: the list of nodes, sorted
727 @raise errors.ProgrammerError: if the nodes parameter is wrong type
731 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
733 return utils.NiceSort(lu.cfg.GetNodeList())
736 def _GetWantedInstances(lu, instances):
737 """Returns list of checked and expanded instance names.
739 @type lu: L{LogicalUnit}
740 @param lu: the logical unit on whose behalf we execute
741 @type instances: list
742 @param instances: list of instance names or None for all instances
744 @return: the list of instances, sorted
745 @raise errors.OpPrereqError: if the instances parameter is wrong type
746 @raise errors.OpPrereqError: if any of the passed instances is not found
750 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
752 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
756 def _GetUpdatedParams(old_params, update_dict,
757 use_default=True, use_none=False):
758 """Return the new version of a parameter dictionary.
760 @type old_params: dict
761 @param old_params: old parameters
762 @type update_dict: dict
763 @param update_dict: dict containing new parameter values, or
764 constants.VALUE_DEFAULT to reset the parameter to its default
766 @param use_default: boolean
767 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
768 values as 'to be deleted' values
769 @param use_none: boolean
770 @type use_none: whether to recognise C{None} values as 'to be
773 @return: the new parameter dictionary
776 params_copy = copy.deepcopy(old_params)
777 for key, val in update_dict.iteritems():
778 if ((use_default and val == constants.VALUE_DEFAULT) or
779 (use_none and val is None)):
785 params_copy[key] = val
789 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
790 """Return the new version of a instance policy.
792 @param group_policy: whether this policy applies to a group and thus
793 we should support removal of policy entries
796 use_none = use_default = group_policy
797 ipolicy = copy.deepcopy(old_ipolicy)
798 for key, value in new_ipolicy.items():
799 if key not in constants.IPOLICY_ALL_KEYS:
800 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
802 if key in constants.IPOLICY_ISPECS:
803 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
804 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
806 use_default=use_default)
808 if (not value or value == [constants.VALUE_DEFAULT] or
809 value == constants.VALUE_DEFAULT):
813 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
814 " on the cluster'" % key,
817 if key in constants.IPOLICY_PARAMETERS:
818 # FIXME: we assume all such values are float
820 ipolicy[key] = float(value)
821 except (TypeError, ValueError), err:
822 raise errors.OpPrereqError("Invalid value for attribute"
823 " '%s': '%s', error: %s" %
824 (key, value, err), errors.ECODE_INVAL)
826 # FIXME: we assume all others are lists; this should be redone
828 ipolicy[key] = list(value)
830 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
831 except errors.ConfigurationError, err:
832 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
837 def _UpdateAndVerifySubDict(base, updates, type_check):
838 """Updates and verifies a dict with sub dicts of the same type.
840 @param base: The dict with the old data
841 @param updates: The dict with the new data
842 @param type_check: Dict suitable to ForceDictType to verify correct types
843 @returns: A new dict with updated and verified values
847 new = _GetUpdatedParams(old, value)
848 utils.ForceDictType(new, type_check)
851 ret = copy.deepcopy(base)
852 ret.update(dict((key, fn(base.get(key, {}), value))
853 for key, value in updates.items()))
857 def _MergeAndVerifyHvState(op_input, obj_input):
858 """Combines the hv state from an opcode with the one of the object
860 @param op_input: The input dict from the opcode
861 @param obj_input: The input dict from the objects
862 @return: The verified and updated dict
866 invalid_hvs = set(op_input) - constants.HYPER_TYPES
868 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
869 " %s" % utils.CommaJoin(invalid_hvs),
871 if obj_input is None:
873 type_check = constants.HVSTS_PARAMETER_TYPES
874 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
879 def _MergeAndVerifyDiskState(op_input, obj_input):
880 """Combines the disk state from an opcode with the one of the object
882 @param op_input: The input dict from the opcode
883 @param obj_input: The input dict from the objects
884 @return: The verified and updated dict
887 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
889 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
890 utils.CommaJoin(invalid_dst),
892 type_check = constants.DSS_PARAMETER_TYPES
893 if obj_input is None:
895 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
897 for key, value in op_input.items())
902 def _ReleaseLocks(lu, level, names=None, keep=None):
903 """Releases locks owned by an LU.
905 @type lu: L{LogicalUnit}
906 @param level: Lock level
907 @type names: list or None
908 @param names: Names of locks to release
909 @type keep: list or None
910 @param keep: Names of locks to retain
913 assert not (keep is not None and names is not None), \
914 "Only one of the 'names' and the 'keep' parameters can be given"
916 if names is not None:
917 should_release = names.__contains__
919 should_release = lambda name: name not in keep
921 should_release = None
923 owned = lu.owned_locks(level)
925 # Not owning any lock at this level, do nothing
932 # Determine which locks to release
934 if should_release(name):
939 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
941 # Release just some locks
942 lu.glm.release(level, names=release)
944 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
947 lu.glm.release(level)
949 assert not lu.glm.is_owned(level), "No locks should be owned"
952 def _MapInstanceDisksToNodes(instances):
953 """Creates a map from (node, volume) to instance name.
955 @type instances: list of L{objects.Instance}
956 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
959 return dict(((node, vol), inst.name)
960 for inst in instances
961 for (node, vols) in inst.MapLVsByNode().items()
965 def _RunPostHook(lu, node_name):
966 """Runs the post-hook for an opcode on a single node.
969 hm = lu.proc.BuildHooksManager(lu)
971 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
972 except Exception, err: # pylint: disable=W0703
973 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
976 def _CheckOutputFields(static, dynamic, selected):
977 """Checks whether all selected fields are valid.
979 @type static: L{utils.FieldSet}
980 @param static: static fields set
981 @type dynamic: L{utils.FieldSet}
982 @param dynamic: dynamic fields set
989 delta = f.NonMatching(selected)
991 raise errors.OpPrereqError("Unknown output fields selected: %s"
992 % ",".join(delta), errors.ECODE_INVAL)
995 def _CheckGlobalHvParams(params):
996 """Validates that given hypervisor params are not global ones.
998 This will ensure that instances don't get customised versions of
1002 used_globals = constants.HVC_GLOBALS.intersection(params)
1004 msg = ("The following hypervisor parameters are global and cannot"
1005 " be customized at instance level, please modify them at"
1006 " cluster level: %s" % utils.CommaJoin(used_globals))
1007 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1010 def _CheckNodeOnline(lu, node, msg=None):
1011 """Ensure that a given node is online.
1013 @param lu: the LU on behalf of which we make the check
1014 @param node: the node to check
1015 @param msg: if passed, should be a message to replace the default one
1016 @raise errors.OpPrereqError: if the node is offline
1020 msg = "Can't use offline node"
1021 if lu.cfg.GetNodeInfo(node).offline:
1022 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1025 def _CheckNodeNotDrained(lu, node):
1026 """Ensure that a given node is not drained.
1028 @param lu: the LU on behalf of which we make the check
1029 @param node: the node to check
1030 @raise errors.OpPrereqError: if the node is drained
1033 if lu.cfg.GetNodeInfo(node).drained:
1034 raise errors.OpPrereqError("Can't use drained node %s" % node,
1038 def _CheckNodeVmCapable(lu, node):
1039 """Ensure that a given node is vm capable.
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @raise errors.OpPrereqError: if the node is not vm capable
1046 if not lu.cfg.GetNodeInfo(node).vm_capable:
1047 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1051 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1052 """Ensure that a node supports a given OS.
1054 @param lu: the LU on behalf of which we make the check
1055 @param node: the node to check
1056 @param os_name: the OS to query about
1057 @param force_variant: whether to ignore variant errors
1058 @raise errors.OpPrereqError: if the node is not supporting the OS
1061 result = lu.rpc.call_os_get(node, os_name)
1062 result.Raise("OS '%s' not in supported OS list for node %s" %
1064 prereq=True, ecode=errors.ECODE_INVAL)
1065 if not force_variant:
1066 _CheckOSVariant(result.payload, os_name)
1069 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1070 """Ensure that a node has the given secondary ip.
1072 @type lu: L{LogicalUnit}
1073 @param lu: the LU on behalf of which we make the check
1075 @param node: the node to check
1076 @type secondary_ip: string
1077 @param secondary_ip: the ip to check
1078 @type prereq: boolean
1079 @param prereq: whether to throw a prerequisite or an execute error
1080 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1081 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1084 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1085 result.Raise("Failure checking secondary ip on node %s" % node,
1086 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1087 if not result.payload:
1088 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1089 " please fix and re-run this command" % secondary_ip)
1091 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1093 raise errors.OpExecError(msg)
1096 def _GetClusterDomainSecret():
1097 """Reads the cluster domain secret.
1100 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1104 def _CheckInstanceState(lu, instance, req_states, msg=None):
1105 """Ensure that an instance is in one of the required states.
1107 @param lu: the LU on behalf of which we make the check
1108 @param instance: the instance to check
1109 @param msg: if passed, should be a message to replace the default one
1110 @raise errors.OpPrereqError: if the instance is not in the required state
1114 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1115 if instance.admin_state not in req_states:
1116 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1117 (instance.name, instance.admin_state, msg),
1120 if constants.ADMINST_UP not in req_states:
1121 pnode = instance.primary_node
1122 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1123 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1124 prereq=True, ecode=errors.ECODE_ENVIRON)
1126 if instance.name in ins_l.payload:
1127 raise errors.OpPrereqError("Instance %s is running, %s" %
1128 (instance.name, msg), errors.ECODE_STATE)
1131 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1132 """Computes if value is in the desired range.
1134 @param name: name of the parameter for which we perform the check
1135 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1137 @param ipolicy: dictionary containing min, max and std values
1138 @param value: actual value that we want to use
1139 @return: None or element not meeting the criteria
1143 if value in [None, constants.VALUE_AUTO]:
1145 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1146 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1147 if value > max_v or min_v > value:
1149 fqn = "%s/%s" % (name, qualifier)
1152 return ("%s value %s is not in range [%s, %s]" %
1153 (fqn, value, min_v, max_v))
1157 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1158 nic_count, disk_sizes, spindle_use,
1159 _compute_fn=_ComputeMinMaxSpec):
1160 """Verifies ipolicy against provided specs.
1163 @param ipolicy: The ipolicy
1165 @param mem_size: The memory size
1166 @type cpu_count: int
1167 @param cpu_count: Used cpu cores
1168 @type disk_count: int
1169 @param disk_count: Number of disks used
1170 @type nic_count: int
1171 @param nic_count: Number of nics used
1172 @type disk_sizes: list of ints
1173 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1174 @type spindle_use: int
1175 @param spindle_use: The number of spindles this instance uses
1176 @param _compute_fn: The compute function (unittest only)
1177 @return: A list of violations, or an empty list of no violations are found
1180 assert disk_count == len(disk_sizes)
1183 (constants.ISPEC_MEM_SIZE, "", mem_size),
1184 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1185 (constants.ISPEC_DISK_COUNT, "", disk_count),
1186 (constants.ISPEC_NIC_COUNT, "", nic_count),
1187 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1188 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1189 for idx, d in enumerate(disk_sizes)]
1192 (_compute_fn(name, qualifier, ipolicy, value)
1193 for (name, qualifier, value) in test_settings))
1196 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1197 _compute_fn=_ComputeIPolicySpecViolation):
1198 """Compute if instance meets the specs of ipolicy.
1201 @param ipolicy: The ipolicy to verify against
1202 @type instance: L{objects.Instance}
1203 @param instance: The instance to verify
1204 @param _compute_fn: The function to verify ipolicy (unittest only)
1205 @see: L{_ComputeIPolicySpecViolation}
1208 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1209 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1210 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1211 disk_count = len(instance.disks)
1212 disk_sizes = [disk.size for disk in instance.disks]
1213 nic_count = len(instance.nics)
1215 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216 disk_sizes, spindle_use)
1219 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1220 _compute_fn=_ComputeIPolicySpecViolation):
1221 """Compute if instance specs meets the specs of ipolicy.
1224 @param ipolicy: The ipolicy to verify against
1225 @param instance_spec: dict
1226 @param instance_spec: The instance spec to verify
1227 @param _compute_fn: The function to verify ipolicy (unittest only)
1228 @see: L{_ComputeIPolicySpecViolation}
1231 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1232 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1233 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1234 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1235 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1236 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1238 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1239 disk_sizes, spindle_use)
1242 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1244 _compute_fn=_ComputeIPolicyInstanceViolation):
1245 """Compute if instance meets the specs of the new target group.
1247 @param ipolicy: The ipolicy to verify
1248 @param instance: The instance object to verify
1249 @param current_group: The current group of the instance
1250 @param target_group: The new group of the instance
1251 @param _compute_fn: The function to verify ipolicy (unittest only)
1252 @see: L{_ComputeIPolicySpecViolation}
1255 if current_group == target_group:
1258 return _compute_fn(ipolicy, instance)
1261 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1262 _compute_fn=_ComputeIPolicyNodeViolation):
1263 """Checks that the target node is correct in terms of instance policy.
1265 @param ipolicy: The ipolicy to verify
1266 @param instance: The instance object to verify
1267 @param node: The new node to relocate
1268 @param ignore: Ignore violations of the ipolicy
1269 @param _compute_fn: The function to verify ipolicy (unittest only)
1270 @see: L{_ComputeIPolicySpecViolation}
1273 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1274 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1277 msg = ("Instance does not meet target node group's (%s) instance"
1278 " policy: %s") % (node.group, utils.CommaJoin(res))
1282 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1285 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1286 """Computes a set of any instances that would violate the new ipolicy.
1288 @param old_ipolicy: The current (still in-place) ipolicy
1289 @param new_ipolicy: The new (to become) ipolicy
1290 @param instances: List of instances to verify
1291 @return: A list of instances which violates the new ipolicy but
1295 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1296 _ComputeViolatingInstances(old_ipolicy, instances))
1299 def _ExpandItemName(fn, name, kind):
1300 """Expand an item name.
1302 @param fn: the function to use for expansion
1303 @param name: requested item name
1304 @param kind: text description ('Node' or 'Instance')
1305 @return: the resolved (full) name
1306 @raise errors.OpPrereqError: if the item is not found
1309 full_name = fn(name)
1310 if full_name is None:
1311 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1316 def _ExpandNodeName(cfg, name):
1317 """Wrapper over L{_ExpandItemName} for nodes."""
1318 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1321 def _ExpandInstanceName(cfg, name):
1322 """Wrapper over L{_ExpandItemName} for instance."""
1323 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1326 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1327 minmem, maxmem, vcpus, nics, disk_template, disks,
1328 bep, hvp, hypervisor_name, tags):
1329 """Builds instance related env variables for hooks
1331 This builds the hook environment from individual variables.
1334 @param name: the name of the instance
1335 @type primary_node: string
1336 @param primary_node: the name of the instance's primary node
1337 @type secondary_nodes: list
1338 @param secondary_nodes: list of secondary nodes as strings
1339 @type os_type: string
1340 @param os_type: the name of the instance's OS
1341 @type status: string
1342 @param status: the desired status of the instance
1343 @type minmem: string
1344 @param minmem: the minimum memory size of the instance
1345 @type maxmem: string
1346 @param maxmem: the maximum memory size of the instance
1348 @param vcpus: the count of VCPUs the instance has
1350 @param nics: list of tuples (ip, mac, mode, link) representing
1351 the NICs the instance has
1352 @type disk_template: string
1353 @param disk_template: the disk template of the instance
1355 @param disks: the list of (size, mode) pairs
1357 @param bep: the backend parameters for the instance
1359 @param hvp: the hypervisor parameters for the instance
1360 @type hypervisor_name: string
1361 @param hypervisor_name: the hypervisor for the instance
1363 @param tags: list of instance tags as strings
1365 @return: the hook environment for this instance
1370 "INSTANCE_NAME": name,
1371 "INSTANCE_PRIMARY": primary_node,
1372 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1373 "INSTANCE_OS_TYPE": os_type,
1374 "INSTANCE_STATUS": status,
1375 "INSTANCE_MINMEM": minmem,
1376 "INSTANCE_MAXMEM": maxmem,
1377 # TODO(2.7) remove deprecated "memory" value
1378 "INSTANCE_MEMORY": maxmem,
1379 "INSTANCE_VCPUS": vcpus,
1380 "INSTANCE_DISK_TEMPLATE": disk_template,
1381 "INSTANCE_HYPERVISOR": hypervisor_name,
1384 nic_count = len(nics)
1385 for idx, (ip, mac, mode, link) in enumerate(nics):
1388 env["INSTANCE_NIC%d_IP" % idx] = ip
1389 env["INSTANCE_NIC%d_MAC" % idx] = mac
1390 env["INSTANCE_NIC%d_MODE" % idx] = mode
1391 env["INSTANCE_NIC%d_LINK" % idx] = link
1392 if mode == constants.NIC_MODE_BRIDGED:
1393 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1397 env["INSTANCE_NIC_COUNT"] = nic_count
1400 disk_count = len(disks)
1401 for idx, (size, mode) in enumerate(disks):
1402 env["INSTANCE_DISK%d_SIZE" % idx] = size
1403 env["INSTANCE_DISK%d_MODE" % idx] = mode
1407 env["INSTANCE_DISK_COUNT"] = disk_count
1412 env["INSTANCE_TAGS"] = " ".join(tags)
1414 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1415 for key, value in source.items():
1416 env["INSTANCE_%s_%s" % (kind, key)] = value
1421 def _NICListToTuple(lu, nics):
1422 """Build a list of nic information tuples.
1424 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1425 value in LUInstanceQueryData.
1427 @type lu: L{LogicalUnit}
1428 @param lu: the logical unit on whose behalf we execute
1429 @type nics: list of L{objects.NIC}
1430 @param nics: list of nics to convert to hooks tuples
1434 cluster = lu.cfg.GetClusterInfo()
1438 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1439 mode = filled_params[constants.NIC_MODE]
1440 link = filled_params[constants.NIC_LINK]
1441 hooks_nics.append((ip, mac, mode, link))
1445 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1446 """Builds instance related env variables for hooks from an object.
1448 @type lu: L{LogicalUnit}
1449 @param lu: the logical unit on whose behalf we execute
1450 @type instance: L{objects.Instance}
1451 @param instance: the instance for which we should build the
1453 @type override: dict
1454 @param override: dictionary with key/values that will override
1457 @return: the hook environment dictionary
1460 cluster = lu.cfg.GetClusterInfo()
1461 bep = cluster.FillBE(instance)
1462 hvp = cluster.FillHV(instance)
1464 "name": instance.name,
1465 "primary_node": instance.primary_node,
1466 "secondary_nodes": instance.secondary_nodes,
1467 "os_type": instance.os,
1468 "status": instance.admin_state,
1469 "maxmem": bep[constants.BE_MAXMEM],
1470 "minmem": bep[constants.BE_MINMEM],
1471 "vcpus": bep[constants.BE_VCPUS],
1472 "nics": _NICListToTuple(lu, instance.nics),
1473 "disk_template": instance.disk_template,
1474 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1477 "hypervisor_name": instance.hypervisor,
1478 "tags": instance.tags,
1481 args.update(override)
1482 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1485 def _AdjustCandidatePool(lu, exceptions):
1486 """Adjust the candidate pool after node operations.
1489 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1491 lu.LogInfo("Promoted nodes to master candidate role: %s",
1492 utils.CommaJoin(node.name for node in mod_list))
1493 for name in mod_list:
1494 lu.context.ReaddNode(name)
1495 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1497 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1501 def _DecideSelfPromotion(lu, exceptions=None):
1502 """Decide whether I should promote myself as a master candidate.
1505 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1506 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1507 # the new node will increase mc_max with one, so:
1508 mc_should = min(mc_should + 1, cp_size)
1509 return mc_now < mc_should
1512 def _CalculateGroupIPolicy(cluster, group):
1513 """Calculate instance policy for group.
1516 return cluster.SimpleFillIPolicy(group.ipolicy)
1519 def _ComputeViolatingInstances(ipolicy, instances):
1520 """Computes a set of instances who violates given ipolicy.
1522 @param ipolicy: The ipolicy to verify
1523 @type instances: object.Instance
1524 @param instances: List of instances to verify
1525 @return: A frozenset of instance names violating the ipolicy
1528 return frozenset([inst.name for inst in instances
1529 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1532 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1533 """Check that the brigdes needed by a list of nics exist.
1536 cluster = lu.cfg.GetClusterInfo()
1537 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1538 brlist = [params[constants.NIC_LINK] for params in paramslist
1539 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1541 result = lu.rpc.call_bridges_exist(target_node, brlist)
1542 result.Raise("Error checking bridges on destination node '%s'" %
1543 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1546 def _CheckInstanceBridgesExist(lu, instance, node=None):
1547 """Check that the brigdes needed by an instance exist.
1551 node = instance.primary_node
1552 _CheckNicsBridgesExist(lu, instance.nics, node)
1555 def _CheckOSVariant(os_obj, name):
1556 """Check whether an OS name conforms to the os variants specification.
1558 @type os_obj: L{objects.OS}
1559 @param os_obj: OS object to check
1561 @param name: OS name passed by the user, to check for validity
1564 variant = objects.OS.GetVariant(name)
1565 if not os_obj.supported_variants:
1567 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1568 " passed)" % (os_obj.name, variant),
1572 raise errors.OpPrereqError("OS name must include a variant",
1575 if variant not in os_obj.supported_variants:
1576 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1579 def _GetNodeInstancesInner(cfg, fn):
1580 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1583 def _GetNodeInstances(cfg, node_name):
1584 """Returns a list of all primary and secondary instances on a node.
1588 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1591 def _GetNodePrimaryInstances(cfg, node_name):
1592 """Returns primary instances on a node.
1595 return _GetNodeInstancesInner(cfg,
1596 lambda inst: node_name == inst.primary_node)
1599 def _GetNodeSecondaryInstances(cfg, node_name):
1600 """Returns secondary instances on a node.
1603 return _GetNodeInstancesInner(cfg,
1604 lambda inst: node_name in inst.secondary_nodes)
1607 def _GetStorageTypeArgs(cfg, storage_type):
1608 """Returns the arguments for a storage type.
1611 # Special case for file storage
1612 if storage_type == constants.ST_FILE:
1613 # storage.FileStorage wants a list of storage directories
1614 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1619 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1622 for dev in instance.disks:
1623 cfg.SetDiskID(dev, node_name)
1625 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1627 result.Raise("Failed to get disk status from node %s" % node_name,
1628 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1630 for idx, bdev_status in enumerate(result.payload):
1631 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1637 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1638 """Check the sanity of iallocator and node arguments and use the
1639 cluster-wide iallocator if appropriate.
1641 Check that at most one of (iallocator, node) is specified. If none is
1642 specified, then the LU's opcode's iallocator slot is filled with the
1643 cluster-wide default iallocator.
1645 @type iallocator_slot: string
1646 @param iallocator_slot: the name of the opcode iallocator slot
1647 @type node_slot: string
1648 @param node_slot: the name of the opcode target node slot
1651 node = getattr(lu.op, node_slot, None)
1652 iallocator = getattr(lu.op, iallocator_slot, None)
1654 if node is not None and iallocator is not None:
1655 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1657 elif node is None and iallocator is None:
1658 default_iallocator = lu.cfg.GetDefaultIAllocator()
1659 if default_iallocator:
1660 setattr(lu.op, iallocator_slot, default_iallocator)
1662 raise errors.OpPrereqError("No iallocator or node given and no"
1663 " cluster-wide default iallocator found;"
1664 " please specify either an iallocator or a"
1665 " node, or set a cluster-wide default"
1669 def _GetDefaultIAllocator(cfg, iallocator):
1670 """Decides on which iallocator to use.
1672 @type cfg: L{config.ConfigWriter}
1673 @param cfg: Cluster configuration object
1674 @type iallocator: string or None
1675 @param iallocator: Iallocator specified in opcode
1677 @return: Iallocator name
1681 # Use default iallocator
1682 iallocator = cfg.GetDefaultIAllocator()
1685 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1686 " opcode nor as a cluster-wide default",
1692 class LUClusterPostInit(LogicalUnit):
1693 """Logical unit for running hooks after cluster initialization.
1696 HPATH = "cluster-init"
1697 HTYPE = constants.HTYPE_CLUSTER
1699 def BuildHooksEnv(self):
1704 "OP_TARGET": self.cfg.GetClusterName(),
1707 def BuildHooksNodes(self):
1708 """Build hooks nodes.
1711 return ([], [self.cfg.GetMasterNode()])
1713 def Exec(self, feedback_fn):
1720 class LUClusterDestroy(LogicalUnit):
1721 """Logical unit for destroying the cluster.
1724 HPATH = "cluster-destroy"
1725 HTYPE = constants.HTYPE_CLUSTER
1727 def BuildHooksEnv(self):
1732 "OP_TARGET": self.cfg.GetClusterName(),
1735 def BuildHooksNodes(self):
1736 """Build hooks nodes.
1741 def CheckPrereq(self):
1742 """Check prerequisites.
1744 This checks whether the cluster is empty.
1746 Any errors are signaled by raising errors.OpPrereqError.
1749 master = self.cfg.GetMasterNode()
1751 nodelist = self.cfg.GetNodeList()
1752 if len(nodelist) != 1 or nodelist[0] != master:
1753 raise errors.OpPrereqError("There are still %d node(s) in"
1754 " this cluster." % (len(nodelist) - 1),
1756 instancelist = self.cfg.GetInstanceList()
1758 raise errors.OpPrereqError("There are still %d instance(s) in"
1759 " this cluster." % len(instancelist),
1762 def Exec(self, feedback_fn):
1763 """Destroys the cluster.
1766 master_params = self.cfg.GetMasterNetworkParameters()
1768 # Run post hooks on master node before it's removed
1769 _RunPostHook(self, master_params.name)
1771 ems = self.cfg.GetUseExternalMipScript()
1772 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1775 self.LogWarning("Error disabling the master IP address: %s",
1778 return master_params.name
1781 def _VerifyCertificate(filename):
1782 """Verifies a certificate for L{LUClusterVerifyConfig}.
1784 @type filename: string
1785 @param filename: Path to PEM file
1789 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1790 utils.ReadFile(filename))
1791 except Exception, err: # pylint: disable=W0703
1792 return (LUClusterVerifyConfig.ETYPE_ERROR,
1793 "Failed to load X509 certificate %s: %s" % (filename, err))
1796 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1797 constants.SSL_CERT_EXPIRATION_ERROR)
1800 fnamemsg = "While verifying %s: %s" % (filename, msg)
1805 return (None, fnamemsg)
1806 elif errcode == utils.CERT_WARNING:
1807 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1808 elif errcode == utils.CERT_ERROR:
1809 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1811 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1814 def _GetAllHypervisorParameters(cluster, instances):
1815 """Compute the set of all hypervisor parameters.
1817 @type cluster: L{objects.Cluster}
1818 @param cluster: the cluster object
1819 @param instances: list of L{objects.Instance}
1820 @param instances: additional instances from which to obtain parameters
1821 @rtype: list of (origin, hypervisor, parameters)
1822 @return: a list with all parameters found, indicating the hypervisor they
1823 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1828 for hv_name in cluster.enabled_hypervisors:
1829 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1831 for os_name, os_hvp in cluster.os_hvp.items():
1832 for hv_name, hv_params in os_hvp.items():
1834 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1835 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1837 # TODO: collapse identical parameter values in a single one
1838 for instance in instances:
1839 if instance.hvparams:
1840 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1841 cluster.FillHV(instance)))
1846 class _VerifyErrors(object):
1847 """Mix-in for cluster/group verify LUs.
1849 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1850 self.op and self._feedback_fn to be available.)
1854 ETYPE_FIELD = "code"
1855 ETYPE_ERROR = "ERROR"
1856 ETYPE_WARNING = "WARNING"
1858 def _Error(self, ecode, item, msg, *args, **kwargs):
1859 """Format an error message.
1861 Based on the opcode's error_codes parameter, either format a
1862 parseable error code, or a simpler error string.
1864 This must be called only from Exec and functions called from Exec.
1867 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1868 itype, etxt, _ = ecode
1869 # first complete the msg
1872 # then format the whole message
1873 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1874 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1880 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1881 # and finally report it via the feedback_fn
1882 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1884 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1885 """Log an error message if the passed condition is True.
1889 or self.op.debug_simulate_errors) # pylint: disable=E1101
1891 # If the error code is in the list of ignored errors, demote the error to a
1893 (_, etxt, _) = ecode
1894 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1895 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1898 self._Error(ecode, *args, **kwargs)
1900 # do not mark the operation as failed for WARN cases only
1901 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1902 self.bad = self.bad or cond
1905 class LUClusterVerify(NoHooksLU):
1906 """Submits all jobs necessary to verify the cluster.
1911 def ExpandNames(self):
1912 self.needed_locks = {}
1914 def Exec(self, feedback_fn):
1917 if self.op.group_name:
1918 groups = [self.op.group_name]
1919 depends_fn = lambda: None
1921 groups = self.cfg.GetNodeGroupList()
1923 # Verify global configuration
1925 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1928 # Always depend on global verification
1929 depends_fn = lambda: [(-len(jobs), [])]
1931 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1932 ignore_errors=self.op.ignore_errors,
1933 depends=depends_fn())]
1934 for group in groups)
1936 # Fix up all parameters
1937 for op in itertools.chain(*jobs): # pylint: disable=W0142
1938 op.debug_simulate_errors = self.op.debug_simulate_errors
1939 op.verbose = self.op.verbose
1940 op.error_codes = self.op.error_codes
1942 op.skip_checks = self.op.skip_checks
1943 except AttributeError:
1944 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1946 return ResultWithJobs(jobs)
1949 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1950 """Verifies the cluster config.
1955 def _VerifyHVP(self, hvp_data):
1956 """Verifies locally the syntax of the hypervisor parameters.
1959 for item, hv_name, hv_params in hvp_data:
1960 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1963 hv_class = hypervisor.GetHypervisor(hv_name)
1964 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1965 hv_class.CheckParameterSyntax(hv_params)
1966 except errors.GenericError, err:
1967 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1969 def ExpandNames(self):
1970 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1971 self.share_locks = _ShareAll()
1973 def CheckPrereq(self):
1974 """Check prerequisites.
1977 # Retrieve all information
1978 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1979 self.all_node_info = self.cfg.GetAllNodesInfo()
1980 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1982 def Exec(self, feedback_fn):
1983 """Verify integrity of cluster, performing various test on nodes.
1987 self._feedback_fn = feedback_fn
1989 feedback_fn("* Verifying cluster config")
1991 for msg in self.cfg.VerifyConfig():
1992 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1994 feedback_fn("* Verifying cluster certificate files")
1996 for cert_filename in constants.ALL_CERT_FILES:
1997 (errcode, msg) = _VerifyCertificate(cert_filename)
1998 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2000 feedback_fn("* Verifying hypervisor parameters")
2002 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2003 self.all_inst_info.values()))
2005 feedback_fn("* Verifying all nodes belong to an existing group")
2007 # We do this verification here because, should this bogus circumstance
2008 # occur, it would never be caught by VerifyGroup, which only acts on
2009 # nodes/instances reachable from existing node groups.
2011 dangling_nodes = set(node.name for node in self.all_node_info.values()
2012 if node.group not in self.all_group_info)
2014 dangling_instances = {}
2015 no_node_instances = []
2017 for inst in self.all_inst_info.values():
2018 if inst.primary_node in dangling_nodes:
2019 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2020 elif inst.primary_node not in self.all_node_info:
2021 no_node_instances.append(inst.name)
2026 utils.CommaJoin(dangling_instances.get(node.name,
2028 for node in dangling_nodes]
2030 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2032 "the following nodes (and their instances) belong to a non"
2033 " existing group: %s", utils.CommaJoin(pretty_dangling))
2035 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2037 "the following instances have a non-existing primary-node:"
2038 " %s", utils.CommaJoin(no_node_instances))
2043 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2044 """Verifies the status of a node group.
2047 HPATH = "cluster-verify"
2048 HTYPE = constants.HTYPE_CLUSTER
2051 _HOOKS_INDENT_RE = re.compile("^", re.M)
2053 class NodeImage(object):
2054 """A class representing the logical and physical status of a node.
2057 @ivar name: the node name to which this object refers
2058 @ivar volumes: a structure as returned from
2059 L{ganeti.backend.GetVolumeList} (runtime)
2060 @ivar instances: a list of running instances (runtime)
2061 @ivar pinst: list of configured primary instances (config)
2062 @ivar sinst: list of configured secondary instances (config)
2063 @ivar sbp: dictionary of {primary-node: list of instances} for all
2064 instances for which this node is secondary (config)
2065 @ivar mfree: free memory, as reported by hypervisor (runtime)
2066 @ivar dfree: free disk, as reported by the node (runtime)
2067 @ivar offline: the offline status (config)
2068 @type rpc_fail: boolean
2069 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2070 not whether the individual keys were correct) (runtime)
2071 @type lvm_fail: boolean
2072 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2073 @type hyp_fail: boolean
2074 @ivar hyp_fail: whether the RPC call didn't return the instance list
2075 @type ghost: boolean
2076 @ivar ghost: whether this is a known node or not (config)
2077 @type os_fail: boolean
2078 @ivar os_fail: whether the RPC call didn't return valid OS data
2080 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2081 @type vm_capable: boolean
2082 @ivar vm_capable: whether the node can host instances
2085 def __init__(self, offline=False, name=None, vm_capable=True):
2094 self.offline = offline
2095 self.vm_capable = vm_capable
2096 self.rpc_fail = False
2097 self.lvm_fail = False
2098 self.hyp_fail = False
2100 self.os_fail = False
2103 def ExpandNames(self):
2104 # This raises errors.OpPrereqError on its own:
2105 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2107 # Get instances in node group; this is unsafe and needs verification later
2109 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2111 self.needed_locks = {
2112 locking.LEVEL_INSTANCE: inst_names,
2113 locking.LEVEL_NODEGROUP: [self.group_uuid],
2114 locking.LEVEL_NODE: [],
2117 self.share_locks = _ShareAll()
2119 def DeclareLocks(self, level):
2120 if level == locking.LEVEL_NODE:
2121 # Get members of node group; this is unsafe and needs verification later
2122 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2124 all_inst_info = self.cfg.GetAllInstancesInfo()
2126 # In Exec(), we warn about mirrored instances that have primary and
2127 # secondary living in separate node groups. To fully verify that
2128 # volumes for these instances are healthy, we will need to do an
2129 # extra call to their secondaries. We ensure here those nodes will
2131 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2132 # Important: access only the instances whose lock is owned
2133 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2134 nodes.update(all_inst_info[inst].secondary_nodes)
2136 self.needed_locks[locking.LEVEL_NODE] = nodes
2138 def CheckPrereq(self):
2139 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2140 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2142 group_nodes = set(self.group_info.members)
2144 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2147 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2149 unlocked_instances = \
2150 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2153 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2154 utils.CommaJoin(unlocked_nodes),
2157 if unlocked_instances:
2158 raise errors.OpPrereqError("Missing lock for instances: %s" %
2159 utils.CommaJoin(unlocked_instances),
2162 self.all_node_info = self.cfg.GetAllNodesInfo()
2163 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2165 self.my_node_names = utils.NiceSort(group_nodes)
2166 self.my_inst_names = utils.NiceSort(group_instances)
2168 self.my_node_info = dict((name, self.all_node_info[name])
2169 for name in self.my_node_names)
2171 self.my_inst_info = dict((name, self.all_inst_info[name])
2172 for name in self.my_inst_names)
2174 # We detect here the nodes that will need the extra RPC calls for verifying
2175 # split LV volumes; they should be locked.
2176 extra_lv_nodes = set()
2178 for inst in self.my_inst_info.values():
2179 if inst.disk_template in constants.DTS_INT_MIRROR:
2180 for nname in inst.all_nodes:
2181 if self.all_node_info[nname].group != self.group_uuid:
2182 extra_lv_nodes.add(nname)
2184 unlocked_lv_nodes = \
2185 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2187 if unlocked_lv_nodes:
2188 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2189 utils.CommaJoin(unlocked_lv_nodes),
2191 self.extra_lv_nodes = list(extra_lv_nodes)
2193 def _VerifyNode(self, ninfo, nresult):
2194 """Perform some basic validation on data returned from a node.
2196 - check the result data structure is well formed and has all the
2198 - check ganeti version
2200 @type ninfo: L{objects.Node}
2201 @param ninfo: the node to check
2202 @param nresult: the results from the node
2204 @return: whether overall this call was successful (and we can expect
2205 reasonable values in the respose)
2209 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2211 # main result, nresult should be a non-empty dict
2212 test = not nresult or not isinstance(nresult, dict)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "unable to verify node: no data returned")
2218 # compares ganeti version
2219 local_version = constants.PROTOCOL_VERSION
2220 remote_version = nresult.get("version", None)
2221 test = not (remote_version and
2222 isinstance(remote_version, (list, tuple)) and
2223 len(remote_version) == 2)
2224 _ErrorIf(test, constants.CV_ENODERPC, node,
2225 "connection to node returned invalid data")
2229 test = local_version != remote_version[0]
2230 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2231 "incompatible protocol versions: master %s,"
2232 " node %s", local_version, remote_version[0])
2236 # node seems compatible, we can actually try to look into its results
2238 # full package version
2239 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2240 constants.CV_ENODEVERSION, node,
2241 "software version mismatch: master %s, node %s",
2242 constants.RELEASE_VERSION, remote_version[1],
2243 code=self.ETYPE_WARNING)
2245 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2246 if ninfo.vm_capable and isinstance(hyp_result, dict):
2247 for hv_name, hv_result in hyp_result.iteritems():
2248 test = hv_result is not None
2249 _ErrorIf(test, constants.CV_ENODEHV, node,
2250 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2252 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2253 if ninfo.vm_capable and isinstance(hvp_result, list):
2254 for item, hv_name, hv_result in hvp_result:
2255 _ErrorIf(True, constants.CV_ENODEHV, node,
2256 "hypervisor %s parameter verify failure (source %s): %s",
2257 hv_name, item, hv_result)
2259 test = nresult.get(constants.NV_NODESETUP,
2260 ["Missing NODESETUP results"])
2261 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2266 def _VerifyNodeTime(self, ninfo, nresult,
2267 nvinfo_starttime, nvinfo_endtime):
2268 """Check the node time.
2270 @type ninfo: L{objects.Node}
2271 @param ninfo: the node to check
2272 @param nresult: the remote results for the node
2273 @param nvinfo_starttime: the start time of the RPC call
2274 @param nvinfo_endtime: the end time of the RPC call
2278 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2280 ntime = nresult.get(constants.NV_TIME, None)
2282 ntime_merged = utils.MergeTime(ntime)
2283 except (ValueError, TypeError):
2284 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2287 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2288 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2289 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2290 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2294 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2295 "Node time diverges by at least %s from master node time",
2298 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2299 """Check the node LVM results.
2301 @type ninfo: L{objects.Node}
2302 @param ninfo: the node to check
2303 @param nresult: the remote results for the node
2304 @param vg_name: the configured VG name
2311 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2313 # checks vg existence and size > 20G
2314 vglist = nresult.get(constants.NV_VGLIST, None)
2316 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2318 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2319 constants.MIN_VG_SIZE)
2320 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2323 pvlist = nresult.get(constants.NV_PVLIST, None)
2324 test = pvlist is None
2325 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2327 # check that ':' is not present in PV names, since it's a
2328 # special character for lvcreate (denotes the range of PEs to
2330 for _, pvname, owner_vg in pvlist:
2331 test = ":" in pvname
2332 _ErrorIf(test, constants.CV_ENODELVM, node,
2333 "Invalid character ':' in PV '%s' of VG '%s'",
2336 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2337 """Check the node bridges.
2339 @type ninfo: L{objects.Node}
2340 @param ninfo: the node to check
2341 @param nresult: the remote results for the node
2342 @param bridges: the expected list of bridges
2349 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2351 missing = nresult.get(constants.NV_BRIDGES, None)
2352 test = not isinstance(missing, list)
2353 _ErrorIf(test, constants.CV_ENODENET, node,
2354 "did not return valid bridge information")
2356 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2357 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2359 def _VerifyNodeUserScripts(self, ninfo, nresult):
2360 """Check the results of user scripts presence and executability on the node
2362 @type ninfo: L{objects.Node}
2363 @param ninfo: the node to check
2364 @param nresult: the remote results for the node
2369 test = not constants.NV_USERSCRIPTS in nresult
2370 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2371 "did not return user scripts information")
2373 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2375 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2376 "user scripts not present or not executable: %s" %
2377 utils.CommaJoin(sorted(broken_scripts)))
2379 def _VerifyNodeNetwork(self, ninfo, nresult):
2380 """Check the node network connectivity results.
2382 @type ninfo: L{objects.Node}
2383 @param ninfo: the node to check
2384 @param nresult: the remote results for the node
2388 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2390 test = constants.NV_NODELIST not in nresult
2391 _ErrorIf(test, constants.CV_ENODESSH, node,
2392 "node hasn't returned node ssh connectivity data")
2394 if nresult[constants.NV_NODELIST]:
2395 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2396 _ErrorIf(True, constants.CV_ENODESSH, node,
2397 "ssh communication with node '%s': %s", a_node, a_msg)
2399 test = constants.NV_NODENETTEST not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node tcp connectivity data")
2403 if nresult[constants.NV_NODENETTEST]:
2404 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2406 _ErrorIf(True, constants.CV_ENODENET, node,
2407 "tcp communication with node '%s': %s",
2408 anode, nresult[constants.NV_NODENETTEST][anode])
2410 test = constants.NV_MASTERIP not in nresult
2411 _ErrorIf(test, constants.CV_ENODENET, node,
2412 "node hasn't returned node master IP reachability data")
2414 if not nresult[constants.NV_MASTERIP]:
2415 if node == self.master_node:
2416 msg = "the master node cannot reach the master IP (not configured?)"
2418 msg = "cannot reach the master IP"
2419 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2421 def _VerifyInstance(self, instance, instanceconfig, node_image,
2423 """Verify an instance.
2425 This function checks to see if the required block devices are
2426 available on the instance's node.
2429 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2430 node_current = instanceconfig.primary_node
2432 node_vol_should = {}
2433 instanceconfig.MapLVsByNode(node_vol_should)
2435 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2436 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2437 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2439 for node in node_vol_should:
2440 n_img = node_image[node]
2441 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2442 # ignore missing volumes on offline or broken nodes
2444 for volume in node_vol_should[node]:
2445 test = volume not in n_img.volumes
2446 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2447 "volume %s missing on node %s", volume, node)
2449 if instanceconfig.admin_state == constants.ADMINST_UP:
2450 pri_img = node_image[node_current]
2451 test = instance not in pri_img.instances and not pri_img.offline
2452 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2453 "instance not running on its primary node %s",
2456 diskdata = [(nname, success, status, idx)
2457 for (nname, disks) in diskstatus.items()
2458 for idx, (success, status) in enumerate(disks)]
2460 for nname, success, bdev_status, idx in diskdata:
2461 # the 'ghost node' construction in Exec() ensures that we have a
2463 snode = node_image[nname]
2464 bad_snode = snode.ghost or snode.offline
2465 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2466 not success and not bad_snode,
2467 constants.CV_EINSTANCEFAULTYDISK, instance,
2468 "couldn't retrieve status for disk/%s on %s: %s",
2469 idx, nname, bdev_status)
2470 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2471 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2472 constants.CV_EINSTANCEFAULTYDISK, instance,
2473 "disk/%s on %s is faulty", idx, nname)
2475 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2476 """Verify if there are any unknown volumes in the cluster.
2478 The .os, .swap and backup volumes are ignored. All other volumes are
2479 reported as unknown.
2481 @type reserved: L{ganeti.utils.FieldSet}
2482 @param reserved: a FieldSet of reserved volume names
2485 for node, n_img in node_image.items():
2486 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2487 self.all_node_info[node].group != self.group_uuid):
2488 # skip non-healthy nodes
2490 for volume in n_img.volumes:
2491 test = ((node not in node_vol_should or
2492 volume not in node_vol_should[node]) and
2493 not reserved.Matches(volume))
2494 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2495 "volume %s is unknown", volume)
2497 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2498 """Verify N+1 Memory Resilience.
2500 Check that if one single node dies we can still start all the
2501 instances it was primary for.
2504 cluster_info = self.cfg.GetClusterInfo()
2505 for node, n_img in node_image.items():
2506 # This code checks that every node which is now listed as
2507 # secondary has enough memory to host all instances it is
2508 # supposed to should a single other node in the cluster fail.
2509 # FIXME: not ready for failover to an arbitrary node
2510 # FIXME: does not support file-backed instances
2511 # WARNING: we currently take into account down instances as well
2512 # as up ones, considering that even if they're down someone
2513 # might want to start them even in the event of a node failure.
2514 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2515 # we're skipping nodes marked offline and nodes in other groups from
2516 # the N+1 warning, since most likely we don't have good memory
2517 # infromation from them; we already list instances living on such
2518 # nodes, and that's enough warning
2520 #TODO(dynmem): also consider ballooning out other instances
2521 for prinode, instances in n_img.sbp.items():
2523 for instance in instances:
2524 bep = cluster_info.FillBE(instance_cfg[instance])
2525 if bep[constants.BE_AUTO_BALANCE]:
2526 needed_mem += bep[constants.BE_MINMEM]
2527 test = n_img.mfree < needed_mem
2528 self._ErrorIf(test, constants.CV_ENODEN1, node,
2529 "not enough memory to accomodate instance failovers"
2530 " should node %s fail (%dMiB needed, %dMiB available)",
2531 prinode, needed_mem, n_img.mfree)
2534 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2535 (files_all, files_opt, files_mc, files_vm)):
2536 """Verifies file checksums collected from all nodes.
2538 @param errorif: Callback for reporting errors
2539 @param nodeinfo: List of L{objects.Node} objects
2540 @param master_node: Name of master node
2541 @param all_nvinfo: RPC results
2544 # Define functions determining which nodes to consider for a file
2547 (files_mc, lambda node: (node.master_candidate or
2548 node.name == master_node)),
2549 (files_vm, lambda node: node.vm_capable),
2552 # Build mapping from filename to list of nodes which should have the file
2554 for (files, fn) in files2nodefn:
2556 filenodes = nodeinfo
2558 filenodes = filter(fn, nodeinfo)
2559 nodefiles.update((filename,
2560 frozenset(map(operator.attrgetter("name"), filenodes)))
2561 for filename in files)
2563 assert set(nodefiles) == (files_all | files_mc | files_vm)
2565 fileinfo = dict((filename, {}) for filename in nodefiles)
2566 ignore_nodes = set()
2568 for node in nodeinfo:
2570 ignore_nodes.add(node.name)
2573 nresult = all_nvinfo[node.name]
2575 if nresult.fail_msg or not nresult.payload:
2578 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2580 test = not (node_files and isinstance(node_files, dict))
2581 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2582 "Node did not return file checksum data")
2584 ignore_nodes.add(node.name)
2587 # Build per-checksum mapping from filename to nodes having it
2588 for (filename, checksum) in node_files.items():
2589 assert filename in nodefiles
2590 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2592 for (filename, checksums) in fileinfo.items():
2593 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2595 # Nodes having the file
2596 with_file = frozenset(node_name
2597 for nodes in fileinfo[filename].values()
2598 for node_name in nodes) - ignore_nodes
2600 expected_nodes = nodefiles[filename] - ignore_nodes
2602 # Nodes missing file
2603 missing_file = expected_nodes - with_file
2605 if filename in files_opt:
2607 errorif(missing_file and missing_file != expected_nodes,
2608 constants.CV_ECLUSTERFILECHECK, None,
2609 "File %s is optional, but it must exist on all or no"
2610 " nodes (not found on %s)",
2611 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2613 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2614 "File %s is missing from node(s) %s", filename,
2615 utils.CommaJoin(utils.NiceSort(missing_file)))
2617 # Warn if a node has a file it shouldn't
2618 unexpected = with_file - expected_nodes
2620 constants.CV_ECLUSTERFILECHECK, None,
2621 "File %s should not exist on node(s) %s",
2622 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2624 # See if there are multiple versions of the file
2625 test = len(checksums) > 1
2627 variants = ["variant %s on %s" %
2628 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2629 for (idx, (checksum, nodes)) in
2630 enumerate(sorted(checksums.items()))]
2634 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2635 "File %s found with %s different checksums (%s)",
2636 filename, len(checksums), "; ".join(variants))
2638 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2640 """Verifies and the node DRBD status.
2642 @type ninfo: L{objects.Node}
2643 @param ninfo: the node to check
2644 @param nresult: the remote results for the node
2645 @param instanceinfo: the dict of instances
2646 @param drbd_helper: the configured DRBD usermode helper
2647 @param drbd_map: the DRBD map as returned by
2648 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2652 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2655 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2656 test = (helper_result == None)
2657 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2658 "no drbd usermode helper returned")
2660 status, payload = helper_result
2662 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2663 "drbd usermode helper check unsuccessful: %s", payload)
2664 test = status and (payload != drbd_helper)
2665 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2666 "wrong drbd usermode helper: %s", payload)
2668 # compute the DRBD minors
2670 for minor, instance in drbd_map[node].items():
2671 test = instance not in instanceinfo
2672 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2673 "ghost instance '%s' in temporary DRBD map", instance)
2674 # ghost instance should not be running, but otherwise we
2675 # don't give double warnings (both ghost instance and
2676 # unallocated minor in use)
2678 node_drbd[minor] = (instance, False)
2680 instance = instanceinfo[instance]
2681 node_drbd[minor] = (instance.name,
2682 instance.admin_state == constants.ADMINST_UP)
2684 # and now check them
2685 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2686 test = not isinstance(used_minors, (tuple, list))
2687 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2688 "cannot parse drbd status file: %s", str(used_minors))
2690 # we cannot check drbd status
2693 for minor, (iname, must_exist) in node_drbd.items():
2694 test = minor not in used_minors and must_exist
2695 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2696 "drbd minor %d of instance %s is not active", minor, iname)
2697 for minor in used_minors:
2698 test = minor not in node_drbd
2699 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2700 "unallocated drbd minor %d is in use", minor)
2702 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2703 """Builds the node OS structures.
2705 @type ninfo: L{objects.Node}
2706 @param ninfo: the node to check
2707 @param nresult: the remote results for the node
2708 @param nimg: the node image object
2712 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2714 remote_os = nresult.get(constants.NV_OSLIST, None)
2715 test = (not isinstance(remote_os, list) or
2716 not compat.all(isinstance(v, list) and len(v) == 7
2717 for v in remote_os))
2719 _ErrorIf(test, constants.CV_ENODEOS, node,
2720 "node hasn't returned valid OS data")
2729 for (name, os_path, status, diagnose,
2730 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2732 if name not in os_dict:
2735 # parameters is a list of lists instead of list of tuples due to
2736 # JSON lacking a real tuple type, fix it:
2737 parameters = [tuple(v) for v in parameters]
2738 os_dict[name].append((os_path, status, diagnose,
2739 set(variants), set(parameters), set(api_ver)))
2741 nimg.oslist = os_dict
2743 def _VerifyNodeOS(self, ninfo, nimg, base):
2744 """Verifies the node OS list.
2746 @type ninfo: L{objects.Node}
2747 @param ninfo: the node to check
2748 @param nimg: the node image object
2749 @param base: the 'template' node we match against (e.g. from the master)
2753 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2755 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2757 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2758 for os_name, os_data in nimg.oslist.items():
2759 assert os_data, "Empty OS status for OS %s?!" % os_name
2760 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2761 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2762 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2763 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2764 "OS '%s' has multiple entries (first one shadows the rest): %s",
2765 os_name, utils.CommaJoin([v[0] for v in os_data]))
2766 # comparisons with the 'base' image
2767 test = os_name not in base.oslist
2768 _ErrorIf(test, constants.CV_ENODEOS, node,
2769 "Extra OS %s not present on reference node (%s)",
2773 assert base.oslist[os_name], "Base node has empty OS status?"
2774 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2776 # base OS is invalid, skipping
2778 for kind, a, b in [("API version", f_api, b_api),
2779 ("variants list", f_var, b_var),
2780 ("parameters", beautify_params(f_param),
2781 beautify_params(b_param))]:
2782 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2783 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2784 kind, os_name, base.name,
2785 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2787 # check any missing OSes
2788 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2789 _ErrorIf(missing, constants.CV_ENODEOS, node,
2790 "OSes present on reference node %s but missing on this node: %s",
2791 base.name, utils.CommaJoin(missing))
2793 def _VerifyOob(self, ninfo, nresult):
2794 """Verifies out of band functionality of a node.
2796 @type ninfo: L{objects.Node}
2797 @param ninfo: the node to check
2798 @param nresult: the remote results for the node
2802 # We just have to verify the paths on master and/or master candidates
2803 # as the oob helper is invoked on the master
2804 if ((ninfo.master_candidate or ninfo.master_capable) and
2805 constants.NV_OOB_PATHS in nresult):
2806 for path_result in nresult[constants.NV_OOB_PATHS]:
2807 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2809 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2810 """Verifies and updates the node volume data.
2812 This function will update a L{NodeImage}'s internal structures
2813 with data from the remote call.
2815 @type ninfo: L{objects.Node}
2816 @param ninfo: the node to check
2817 @param nresult: the remote results for the node
2818 @param nimg: the node image object
2819 @param vg_name: the configured VG name
2823 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2825 nimg.lvm_fail = True
2826 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2829 elif isinstance(lvdata, basestring):
2830 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2831 utils.SafeEncode(lvdata))
2832 elif not isinstance(lvdata, dict):
2833 _ErrorIf(True, constants.CV_ENODELVM, node,
2834 "rpc call to node failed (lvlist)")
2836 nimg.volumes = lvdata
2837 nimg.lvm_fail = False
2839 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2840 """Verifies and updates the node instance list.
2842 If the listing was successful, then updates this node's instance
2843 list. Otherwise, it marks the RPC call as failed for the instance
2846 @type ninfo: L{objects.Node}
2847 @param ninfo: the node to check
2848 @param nresult: the remote results for the node
2849 @param nimg: the node image object
2852 idata = nresult.get(constants.NV_INSTANCELIST, None)
2853 test = not isinstance(idata, list)
2854 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2855 "rpc call to node failed (instancelist): %s",
2856 utils.SafeEncode(str(idata)))
2858 nimg.hyp_fail = True
2860 nimg.instances = idata
2862 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2863 """Verifies and computes a node information map
2865 @type ninfo: L{objects.Node}
2866 @param ninfo: the node to check
2867 @param nresult: the remote results for the node
2868 @param nimg: the node image object
2869 @param vg_name: the configured VG name
2873 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2875 # try to read free memory (from the hypervisor)
2876 hv_info = nresult.get(constants.NV_HVINFO, None)
2877 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2878 _ErrorIf(test, constants.CV_ENODEHV, node,
2879 "rpc call to node failed (hvinfo)")
2882 nimg.mfree = int(hv_info["memory_free"])
2883 except (ValueError, TypeError):
2884 _ErrorIf(True, constants.CV_ENODERPC, node,
2885 "node returned invalid nodeinfo, check hypervisor")
2887 # FIXME: devise a free space model for file based instances as well
2888 if vg_name is not None:
2889 test = (constants.NV_VGLIST not in nresult or
2890 vg_name not in nresult[constants.NV_VGLIST])
2891 _ErrorIf(test, constants.CV_ENODELVM, node,
2892 "node didn't return data for the volume group '%s'"
2893 " - it is either missing or broken", vg_name)
2896 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2897 except (ValueError, TypeError):
2898 _ErrorIf(True, constants.CV_ENODERPC, node,
2899 "node returned invalid LVM info, check LVM status")
2901 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2902 """Gets per-disk status information for all instances.
2904 @type nodelist: list of strings
2905 @param nodelist: Node names
2906 @type node_image: dict of (name, L{objects.Node})
2907 @param node_image: Node objects
2908 @type instanceinfo: dict of (name, L{objects.Instance})
2909 @param instanceinfo: Instance objects
2910 @rtype: {instance: {node: [(succes, payload)]}}
2911 @return: a dictionary of per-instance dictionaries with nodes as
2912 keys and disk information as values; the disk information is a
2913 list of tuples (success, payload)
2916 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2919 node_disks_devonly = {}
2920 diskless_instances = set()
2921 diskless = constants.DT_DISKLESS
2923 for nname in nodelist:
2924 node_instances = list(itertools.chain(node_image[nname].pinst,
2925 node_image[nname].sinst))
2926 diskless_instances.update(inst for inst in node_instances
2927 if instanceinfo[inst].disk_template == diskless)
2928 disks = [(inst, disk)
2929 for inst in node_instances
2930 for disk in instanceinfo[inst].disks]
2933 # No need to collect data
2936 node_disks[nname] = disks
2938 # _AnnotateDiskParams makes already copies of the disks
2940 for (inst, dev) in disks:
2941 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2942 self.cfg.SetDiskID(anno_disk, nname)
2943 devonly.append(anno_disk)
2945 node_disks_devonly[nname] = devonly
2947 assert len(node_disks) == len(node_disks_devonly)
2949 # Collect data from all nodes with disks
2950 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2953 assert len(result) == len(node_disks)
2957 for (nname, nres) in result.items():
2958 disks = node_disks[nname]
2961 # No data from this node
2962 data = len(disks) * [(False, "node offline")]
2965 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2966 "while getting disk information: %s", msg)
2968 # No data from this node
2969 data = len(disks) * [(False, msg)]
2972 for idx, i in enumerate(nres.payload):
2973 if isinstance(i, (tuple, list)) and len(i) == 2:
2976 logging.warning("Invalid result from node %s, entry %d: %s",
2978 data.append((False, "Invalid result from the remote node"))
2980 for ((inst, _), status) in zip(disks, data):
2981 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2983 # Add empty entries for diskless instances.
2984 for inst in diskless_instances:
2985 assert inst not in instdisk
2988 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2989 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2990 compat.all(isinstance(s, (tuple, list)) and
2991 len(s) == 2 for s in statuses)
2992 for inst, nnames in instdisk.items()
2993 for nname, statuses in nnames.items())
2994 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2999 def _SshNodeSelector(group_uuid, all_nodes):
3000 """Create endless iterators for all potential SSH check hosts.
3003 nodes = [node for node in all_nodes
3004 if (node.group != group_uuid and
3006 keyfunc = operator.attrgetter("group")
3008 return map(itertools.cycle,
3009 [sorted(map(operator.attrgetter("name"), names))
3010 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3014 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3015 """Choose which nodes should talk to which other nodes.
3017 We will make nodes contact all nodes in their group, and one node from
3020 @warning: This algorithm has a known issue if one node group is much
3021 smaller than others (e.g. just one node). In such a case all other
3022 nodes will talk to the single node.
3025 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3026 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3028 return (online_nodes,
3029 dict((name, sorted([i.next() for i in sel]))
3030 for name in online_nodes))
3032 def BuildHooksEnv(self):
3035 Cluster-Verify hooks just ran in the post phase and their failure makes
3036 the output be logged in the verify output and the verification to fail.
3040 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3043 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3044 for node in self.my_node_info.values())
3048 def BuildHooksNodes(self):
3049 """Build hooks nodes.
3052 return ([], self.my_node_names)
3054 def Exec(self, feedback_fn):
3055 """Verify integrity of the node group, performing various test on nodes.
3058 # This method has too many local variables. pylint: disable=R0914
3059 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3061 if not self.my_node_names:
3063 feedback_fn("* Empty node group, skipping verification")
3067 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3068 verbose = self.op.verbose
3069 self._feedback_fn = feedback_fn
3071 vg_name = self.cfg.GetVGName()
3072 drbd_helper = self.cfg.GetDRBDHelper()
3073 cluster = self.cfg.GetClusterInfo()
3074 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3075 hypervisors = cluster.enabled_hypervisors
3076 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3078 i_non_redundant = [] # Non redundant instances
3079 i_non_a_balanced = [] # Non auto-balanced instances
3080 i_offline = 0 # Count of offline instances
3081 n_offline = 0 # Count of offline nodes
3082 n_drained = 0 # Count of nodes being drained
3083 node_vol_should = {}
3085 # FIXME: verify OS list
3088 filemap = _ComputeAncillaryFiles(cluster, False)
3090 # do local checksums
3091 master_node = self.master_node = self.cfg.GetMasterNode()
3092 master_ip = self.cfg.GetMasterIP()
3094 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3097 if self.cfg.GetUseExternalMipScript():
3098 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3100 node_verify_param = {
3101 constants.NV_FILELIST:
3102 utils.UniqueSequence(filename
3103 for files in filemap
3104 for filename in files),
3105 constants.NV_NODELIST:
3106 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3107 self.all_node_info.values()),
3108 constants.NV_HYPERVISOR: hypervisors,
3109 constants.NV_HVPARAMS:
3110 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3111 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3112 for node in node_data_list
3113 if not node.offline],
3114 constants.NV_INSTANCELIST: hypervisors,
3115 constants.NV_VERSION: None,
3116 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3117 constants.NV_NODESETUP: None,
3118 constants.NV_TIME: None,
3119 constants.NV_MASTERIP: (master_node, master_ip),
3120 constants.NV_OSLIST: None,
3121 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3122 constants.NV_USERSCRIPTS: user_scripts,
3125 if vg_name is not None:
3126 node_verify_param[constants.NV_VGLIST] = None
3127 node_verify_param[constants.NV_LVLIST] = vg_name
3128 node_verify_param[constants.NV_PVLIST] = [vg_name]
3129 node_verify_param[constants.NV_DRBDLIST] = None
3132 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3135 # FIXME: this needs to be changed per node-group, not cluster-wide
3137 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3138 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3139 bridges.add(default_nicpp[constants.NIC_LINK])
3140 for instance in self.my_inst_info.values():
3141 for nic in instance.nics:
3142 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3143 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3144 bridges.add(full_nic[constants.NIC_LINK])
3147 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3149 # Build our expected cluster state
3150 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3152 vm_capable=node.vm_capable))
3153 for node in node_data_list)
3157 for node in self.all_node_info.values():
3158 path = _SupportsOob(self.cfg, node)
3159 if path and path not in oob_paths:
3160 oob_paths.append(path)
3163 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3165 for instance in self.my_inst_names:
3166 inst_config = self.my_inst_info[instance]
3167 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3170 for nname in inst_config.all_nodes:
3171 if nname not in node_image:
3172 gnode = self.NodeImage(name=nname)
3173 gnode.ghost = (nname not in self.all_node_info)
3174 node_image[nname] = gnode
3176 inst_config.MapLVsByNode(node_vol_should)
3178 pnode = inst_config.primary_node
3179 node_image[pnode].pinst.append(instance)
3181 for snode in inst_config.secondary_nodes:
3182 nimg = node_image[snode]
3183 nimg.sinst.append(instance)
3184 if pnode not in nimg.sbp:
3185 nimg.sbp[pnode] = []
3186 nimg.sbp[pnode].append(instance)
3188 # At this point, we have the in-memory data structures complete,
3189 # except for the runtime information, which we'll gather next
3191 # Due to the way our RPC system works, exact response times cannot be
3192 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3193 # time before and after executing the request, we can at least have a time
3195 nvinfo_starttime = time.time()
3196 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3198 self.cfg.GetClusterName())
3199 nvinfo_endtime = time.time()
3201 if self.extra_lv_nodes and vg_name is not None:
3203 self.rpc.call_node_verify(self.extra_lv_nodes,
3204 {constants.NV_LVLIST: vg_name},
3205 self.cfg.GetClusterName())
3207 extra_lv_nvinfo = {}
3209 all_drbd_map = self.cfg.ComputeDRBDMap()
3211 feedback_fn("* Gathering disk information (%s nodes)" %
3212 len(self.my_node_names))
3213 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3216 feedback_fn("* Verifying configuration file consistency")
3218 # If not all nodes are being checked, we need to make sure the master node
3219 # and a non-checked vm_capable node are in the list.
3220 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3222 vf_nvinfo = all_nvinfo.copy()
3223 vf_node_info = list(self.my_node_info.values())
3224 additional_nodes = []
3225 if master_node not in self.my_node_info:
3226 additional_nodes.append(master_node)
3227 vf_node_info.append(self.all_node_info[master_node])
3228 # Add the first vm_capable node we find which is not included,
3229 # excluding the master node (which we already have)
3230 for node in absent_nodes:
3231 nodeinfo = self.all_node_info[node]
3232 if (nodeinfo.vm_capable and not nodeinfo.offline and
3233 node != master_node):
3234 additional_nodes.append(node)
3235 vf_node_info.append(self.all_node_info[node])
3237 key = constants.NV_FILELIST
3238 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3239 {key: node_verify_param[key]},
3240 self.cfg.GetClusterName()))
3242 vf_nvinfo = all_nvinfo
3243 vf_node_info = self.my_node_info.values()
3245 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3247 feedback_fn("* Verifying node status")
3251 for node_i in node_data_list:
3253 nimg = node_image[node]
3257 feedback_fn("* Skipping offline node %s" % (node,))
3261 if node == master_node:
3263 elif node_i.master_candidate:
3264 ntype = "master candidate"
3265 elif node_i.drained:
3271 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3273 msg = all_nvinfo[node].fail_msg
3274 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3277 nimg.rpc_fail = True
3280 nresult = all_nvinfo[node].payload
3282 nimg.call_ok = self._VerifyNode(node_i, nresult)
3283 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3284 self._VerifyNodeNetwork(node_i, nresult)
3285 self._VerifyNodeUserScripts(node_i, nresult)
3286 self._VerifyOob(node_i, nresult)
3289 self._VerifyNodeLVM(node_i, nresult, vg_name)
3290 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3293 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3294 self._UpdateNodeInstances(node_i, nresult, nimg)
3295 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3296 self._UpdateNodeOS(node_i, nresult, nimg)
3298 if not nimg.os_fail:
3299 if refos_img is None:
3301 self._VerifyNodeOS(node_i, nimg, refos_img)
3302 self._VerifyNodeBridges(node_i, nresult, bridges)
3304 # Check whether all running instancies are primary for the node. (This
3305 # can no longer be done from _VerifyInstance below, since some of the
3306 # wrong instances could be from other node groups.)
3307 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3309 for inst in non_primary_inst:
3310 test = inst in self.all_inst_info
3311 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3312 "instance should not run on node %s", node_i.name)
3313 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3314 "node is running unknown instance %s", inst)
3316 for node, result in extra_lv_nvinfo.items():
3317 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3318 node_image[node], vg_name)
3320 feedback_fn("* Verifying instance status")
3321 for instance in self.my_inst_names:
3323 feedback_fn("* Verifying instance %s" % instance)
3324 inst_config = self.my_inst_info[instance]
3325 self._VerifyInstance(instance, inst_config, node_image,
3327 inst_nodes_offline = []
3329 pnode = inst_config.primary_node
3330 pnode_img = node_image[pnode]
3331 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3332 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3333 " primary node failed", instance)
3335 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3337 constants.CV_EINSTANCEBADNODE, instance,
3338 "instance is marked as running and lives on offline node %s",
3339 inst_config.primary_node)
3341 # If the instance is non-redundant we cannot survive losing its primary
3342 # node, so we are not N+1 compliant. On the other hand we have no disk
3343 # templates with more than one secondary so that situation is not well
3345 # FIXME: does not support file-backed instances
3346 if not inst_config.secondary_nodes:
3347 i_non_redundant.append(instance)
3349 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3350 constants.CV_EINSTANCELAYOUT,
3351 instance, "instance has multiple secondary nodes: %s",
3352 utils.CommaJoin(inst_config.secondary_nodes),
3353 code=self.ETYPE_WARNING)
3355 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3356 pnode = inst_config.primary_node
3357 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3358 instance_groups = {}
3360 for node in instance_nodes:
3361 instance_groups.setdefault(self.all_node_info[node].group,
3365 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3366 # Sort so that we always list the primary node first.
3367 for group, nodes in sorted(instance_groups.items(),
3368 key=lambda (_, nodes): pnode in nodes,
3371 self._ErrorIf(len(instance_groups) > 1,
3372 constants.CV_EINSTANCESPLITGROUPS,
3373 instance, "instance has primary and secondary nodes in"
3374 " different groups: %s", utils.CommaJoin(pretty_list),
3375 code=self.ETYPE_WARNING)
3377 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3378 i_non_a_balanced.append(instance)
3380 for snode in inst_config.secondary_nodes:
3381 s_img = node_image[snode]
3382 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3383 snode, "instance %s, connection to secondary node failed",
3387 inst_nodes_offline.append(snode)
3389 # warn that the instance lives on offline nodes
3390 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3391 "instance has offline secondary node(s) %s",
3392 utils.CommaJoin(inst_nodes_offline))
3393 # ... or ghost/non-vm_capable nodes
3394 for node in inst_config.all_nodes:
3395 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3396 instance, "instance lives on ghost node %s", node)
3397 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3398 instance, "instance lives on non-vm_capable node %s", node)
3400 feedback_fn("* Verifying orphan volumes")
3401 reserved = utils.FieldSet(*cluster.reserved_lvs)
3403 # We will get spurious "unknown volume" warnings if any node of this group
3404 # is secondary for an instance whose primary is in another group. To avoid
3405 # them, we find these instances and add their volumes to node_vol_should.
3406 for inst in self.all_inst_info.values():
3407 for secondary in inst.secondary_nodes:
3408 if (secondary in self.my_node_info
3409 and inst.name not in self.my_inst_info):
3410 inst.MapLVsByNode(node_vol_should)
3413 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3415 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3416 feedback_fn("* Verifying N+1 Memory redundancy")
3417 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3419 feedback_fn("* Other Notes")
3421 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3422 % len(i_non_redundant))
3424 if i_non_a_balanced:
3425 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3426 % len(i_non_a_balanced))
3429 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3432 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3435 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3439 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3440 """Analyze the post-hooks' result
3442 This method analyses the hook result, handles it, and sends some
3443 nicely-formatted feedback back to the user.
3445 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3446 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3447 @param hooks_results: the results of the multi-node hooks rpc call
3448 @param feedback_fn: function used send feedback back to the caller
3449 @param lu_result: previous Exec result
3450 @return: the new Exec result, based on the previous result
3454 # We only really run POST phase hooks, only for non-empty groups,
3455 # and are only interested in their results
3456 if not self.my_node_names:
3459 elif phase == constants.HOOKS_PHASE_POST:
3460 # Used to change hooks' output to proper indentation
3461 feedback_fn("* Hooks Results")
3462 assert hooks_results, "invalid result from hooks"
3464 for node_name in hooks_results:
3465 res = hooks_results[node_name]
3467 test = msg and not res.offline
3468 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3469 "Communication failure in hooks execution: %s", msg)
3470 if res.offline or msg:
3471 # No need to investigate payload if node is offline or gave
3474 for script, hkr, output in res.payload:
3475 test = hkr == constants.HKR_FAIL
3476 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3477 "Script %s failed, output:", script)
3479 output = self._HOOKS_INDENT_RE.sub(" ", output)
3480 feedback_fn("%s" % output)
3486 class LUClusterVerifyDisks(NoHooksLU):
3487 """Verifies the cluster disks status.
3492 def ExpandNames(self):
3493 self.share_locks = _ShareAll()
3494 self.needed_locks = {
3495 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3498 def Exec(self, feedback_fn):
3499 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3501 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3502 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3503 for group in group_names])
3506 class LUGroupVerifyDisks(NoHooksLU):
3507 """Verifies the status of all disks in a node group.
3512 def ExpandNames(self):
3513 # Raises errors.OpPrereqError on its own if group can't be found
3514 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3516 self.share_locks = _ShareAll()
3517 self.needed_locks = {
3518 locking.LEVEL_INSTANCE: [],
3519 locking.LEVEL_NODEGROUP: [],
3520 locking.LEVEL_NODE: [],
3523 def DeclareLocks(self, level):
3524 if level == locking.LEVEL_INSTANCE:
3525 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3527 # Lock instances optimistically, needs verification once node and group
3528 # locks have been acquired
3529 self.needed_locks[locking.LEVEL_INSTANCE] = \
3530 self.cfg.GetNodeGroupInstances(self.group_uuid)
3532 elif level == locking.LEVEL_NODEGROUP:
3533 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3535 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3536 set([self.group_uuid] +
3537 # Lock all groups used by instances optimistically; this requires
3538 # going via the node before it's locked, requiring verification
3541 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3542 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3544 elif level == locking.LEVEL_NODE:
3545 # This will only lock the nodes in the group to be verified which contain
3547 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3548 self._LockInstancesNodes()
3550 # Lock all nodes in group to be verified
3551 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3552 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3553 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3555 def CheckPrereq(self):
3556 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3557 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3558 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3560 assert self.group_uuid in owned_groups
3562 # Check if locked instances are still correct
3563 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3565 # Get instance information
3566 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3568 # Check if node groups for locked instances are still correct
3569 _CheckInstancesNodeGroups(self.cfg, self.instances,
3570 owned_groups, owned_nodes, self.group_uuid)
3572 def Exec(self, feedback_fn):
3573 """Verify integrity of cluster disks.
3575 @rtype: tuple of three items
3576 @return: a tuple of (dict of node-to-node_error, list of instances
3577 which need activate-disks, dict of instance: (node, volume) for
3582 res_instances = set()
3585 nv_dict = _MapInstanceDisksToNodes([inst
3586 for inst in self.instances.values()
3587 if inst.admin_state == constants.ADMINST_UP])
3590 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3591 set(self.cfg.GetVmCapableNodeList()))
3593 node_lvs = self.rpc.call_lv_list(nodes, [])
3595 for (node, node_res) in node_lvs.items():
3596 if node_res.offline:
3599 msg = node_res.fail_msg
3601 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3602 res_nodes[node] = msg
3605 for lv_name, (_, _, lv_online) in node_res.payload.items():
3606 inst = nv_dict.pop((node, lv_name), None)
3607 if not (lv_online or inst is None):
3608 res_instances.add(inst)
3610 # any leftover items in nv_dict are missing LVs, let's arrange the data
3612 for key, inst in nv_dict.iteritems():
3613 res_missing.setdefault(inst, []).append(list(key))
3615 return (res_nodes, list(res_instances), res_missing)
3618 class LUClusterRepairDiskSizes(NoHooksLU):
3619 """Verifies the cluster disks sizes.
3624 def ExpandNames(self):
3625 if self.op.instances:
3626 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3627 self.needed_locks = {
3628 locking.LEVEL_NODE_RES: [],
3629 locking.LEVEL_INSTANCE: self.wanted_names,
3631 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3633 self.wanted_names = None
3634 self.needed_locks = {
3635 locking.LEVEL_NODE_RES: locking.ALL_SET,
3636 locking.LEVEL_INSTANCE: locking.ALL_SET,
3638 self.share_locks = {
3639 locking.LEVEL_NODE_RES: 1,
3640 locking.LEVEL_INSTANCE: 0,
3643 def DeclareLocks(self, level):
3644 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3645 self._LockInstancesNodes(primary_only=True, level=level)
3647 def CheckPrereq(self):
3648 """Check prerequisites.
3650 This only checks the optional instance list against the existing names.
3653 if self.wanted_names is None:
3654 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3656 self.wanted_instances = \
3657 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3659 def _EnsureChildSizes(self, disk):
3660 """Ensure children of the disk have the needed disk size.
3662 This is valid mainly for DRBD8 and fixes an issue where the
3663 children have smaller disk size.
3665 @param disk: an L{ganeti.objects.Disk} object
3668 if disk.dev_type == constants.LD_DRBD8:
3669 assert disk.children, "Empty children for DRBD8?"
3670 fchild = disk.children[0]
3671 mismatch = fchild.size < disk.size
3673 self.LogInfo("Child disk has size %d, parent %d, fixing",
3674 fchild.size, disk.size)
3675 fchild.size = disk.size
3677 # and we recurse on this child only, not on the metadev
3678 return self._EnsureChildSizes(fchild) or mismatch
3682 def Exec(self, feedback_fn):
3683 """Verify the size of cluster disks.
3686 # TODO: check child disks too
3687 # TODO: check differences in size between primary/secondary nodes
3689 for instance in self.wanted_instances:
3690 pnode = instance.primary_node
3691 if pnode not in per_node_disks:
3692 per_node_disks[pnode] = []
3693 for idx, disk in enumerate(instance.disks):
3694 per_node_disks[pnode].append((instance, idx, disk))
3696 assert not (frozenset(per_node_disks.keys()) -
3697 self.owned_locks(locking.LEVEL_NODE_RES)), \
3698 "Not owning correct locks"
3699 assert not self.owned_locks(locking.LEVEL_NODE)
3702 for node, dskl in per_node_disks.items():
3703 newl = [v[2].Copy() for v in dskl]
3705 self.cfg.SetDiskID(dsk, node)
3706 result = self.rpc.call_blockdev_getsize(node, newl)
3708 self.LogWarning("Failure in blockdev_getsize call to node"
3709 " %s, ignoring", node)
3711 if len(result.payload) != len(dskl):
3712 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3713 " result.payload=%s", node, len(dskl), result.payload)
3714 self.LogWarning("Invalid result from node %s, ignoring node results",
3717 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3719 self.LogWarning("Disk %d of instance %s did not return size"
3720 " information, ignoring", idx, instance.name)
3722 if not isinstance(size, (int, long)):
3723 self.LogWarning("Disk %d of instance %s did not return valid"
3724 " size information, ignoring", idx, instance.name)
3727 if size != disk.size:
3728 self.LogInfo("Disk %d of instance %s has mismatched size,"
3729 " correcting: recorded %d, actual %d", idx,
3730 instance.name, disk.size, size)
3732 self.cfg.Update(instance, feedback_fn)
3733 changed.append((instance.name, idx, size))
3734 if self._EnsureChildSizes(disk):
3735 self.cfg.Update(instance, feedback_fn)
3736 changed.append((instance.name, idx, disk.size))
3740 class LUClusterRename(LogicalUnit):
3741 """Rename the cluster.
3744 HPATH = "cluster-rename"
3745 HTYPE = constants.HTYPE_CLUSTER
3747 def BuildHooksEnv(self):
3752 "OP_TARGET": self.cfg.GetClusterName(),
3753 "NEW_NAME": self.op.name,
3756 def BuildHooksNodes(self):
3757 """Build hooks nodes.
3760 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3762 def CheckPrereq(self):
3763 """Verify that the passed name is a valid one.
3766 hostname = netutils.GetHostname(name=self.op.name,
3767 family=self.cfg.GetPrimaryIPFamily())
3769 new_name = hostname.name
3770 self.ip = new_ip = hostname.ip
3771 old_name = self.cfg.GetClusterName()
3772 old_ip = self.cfg.GetMasterIP()
3773 if new_name == old_name and new_ip == old_ip:
3774 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3775 " cluster has changed",
3777 if new_ip != old_ip:
3778 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3779 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3780 " reachable on the network" %
3781 new_ip, errors.ECODE_NOTUNIQUE)
3783 self.op.name = new_name
3785 def Exec(self, feedback_fn):
3786 """Rename the cluster.
3789 clustername = self.op.name
3792 # shutdown the master IP
3793 master_params = self.cfg.GetMasterNetworkParameters()
3794 ems = self.cfg.GetUseExternalMipScript()
3795 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3797 result.Raise("Could not disable the master role")
3800 cluster = self.cfg.GetClusterInfo()
3801 cluster.cluster_name = clustername
3802 cluster.master_ip = new_ip
3803 self.cfg.Update(cluster, feedback_fn)
3805 # update the known hosts file
3806 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3807 node_list = self.cfg.GetOnlineNodeList()
3809 node_list.remove(master_params.name)
3812 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3814 master_params.ip = new_ip
3815 result = self.rpc.call_node_activate_master_ip(master_params.name,
3817 msg = result.fail_msg
3819 self.LogWarning("Could not re-enable the master role on"
3820 " the master, please restart manually: %s", msg)
3825 def _ValidateNetmask(cfg, netmask):
3826 """Checks if a netmask is valid.
3828 @type cfg: L{config.ConfigWriter}
3829 @param cfg: The cluster configuration
3831 @param netmask: the netmask to be verified
3832 @raise errors.OpPrereqError: if the validation fails
3835 ip_family = cfg.GetPrimaryIPFamily()
3837 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3838 except errors.ProgrammerError:
3839 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3841 if not ipcls.ValidateNetmask(netmask):
3842 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3846 class LUClusterSetParams(LogicalUnit):
3847 """Change the parameters of the cluster.
3850 HPATH = "cluster-modify"
3851 HTYPE = constants.HTYPE_CLUSTER
3854 def CheckArguments(self):
3858 if self.op.uid_pool:
3859 uidpool.CheckUidPool(self.op.uid_pool)
3861 if self.op.add_uids:
3862 uidpool.CheckUidPool(self.op.add_uids)
3864 if self.op.remove_uids:
3865 uidpool.CheckUidPool(self.op.remove_uids)
3867 if self.op.master_netmask is not None:
3868 _ValidateNetmask(self.cfg, self.op.master_netmask)
3870 if self.op.diskparams:
3871 for dt_params in self.op.diskparams.values():
3872 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3874 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3875 except errors.OpPrereqError, err:
3876 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3879 def ExpandNames(self):
3880 # FIXME: in the future maybe other cluster params won't require checking on
3881 # all nodes to be modified.
3882 self.needed_locks = {
3883 locking.LEVEL_NODE: locking.ALL_SET,
3884 locking.LEVEL_INSTANCE: locking.ALL_SET,
3885 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3887 self.share_locks = {
3888 locking.LEVEL_NODE: 1,
3889 locking.LEVEL_INSTANCE: 1,
3890 locking.LEVEL_NODEGROUP: 1,
3893 def BuildHooksEnv(self):
3898 "OP_TARGET": self.cfg.GetClusterName(),
3899 "NEW_VG_NAME": self.op.vg_name,
3902 def BuildHooksNodes(self):
3903 """Build hooks nodes.
3906 mn = self.cfg.GetMasterNode()
3909 def CheckPrereq(self):
3910 """Check prerequisites.
3912 This checks whether the given params don't conflict and
3913 if the given volume group is valid.
3916 if self.op.vg_name is not None and not self.op.vg_name:
3917 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3918 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3919 " instances exist", errors.ECODE_INVAL)
3921 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3922 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3923 raise errors.OpPrereqError("Cannot disable drbd helper while"
3924 " drbd-based instances exist",
3927 node_list = self.owned_locks(locking.LEVEL_NODE)
3929 # if vg_name not None, checks given volume group on all nodes
3931 vglist = self.rpc.call_vg_list(node_list)
3932 for node in node_list:
3933 msg = vglist[node].fail_msg
3935 # ignoring down node
3936 self.LogWarning("Error while gathering data on node %s"
3937 " (ignoring node): %s", node, msg)
3939 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3941 constants.MIN_VG_SIZE)
3943 raise errors.OpPrereqError("Error on node '%s': %s" %
3944 (node, vgstatus), errors.ECODE_ENVIRON)
3946 if self.op.drbd_helper:
3947 # checks given drbd helper on all nodes
3948 helpers = self.rpc.call_drbd_helper(node_list)
3949 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3951 self.LogInfo("Not checking drbd helper on offline node %s", node)
3953 msg = helpers[node].fail_msg
3955 raise errors.OpPrereqError("Error checking drbd helper on node"
3956 " '%s': %s" % (node, msg),
3957 errors.ECODE_ENVIRON)
3958 node_helper = helpers[node].payload
3959 if node_helper != self.op.drbd_helper:
3960 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3961 (node, node_helper), errors.ECODE_ENVIRON)
3963 self.cluster = cluster = self.cfg.GetClusterInfo()
3964 # validate params changes
3965 if self.op.beparams:
3966 objects.UpgradeBeParams(self.op.beparams)
3967 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3968 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3970 if self.op.ndparams:
3971 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3972 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3974 # TODO: we need a more general way to handle resetting
3975 # cluster-level parameters to default values
3976 if self.new_ndparams["oob_program"] == "":
3977 self.new_ndparams["oob_program"] = \
3978 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3980 if self.op.hv_state:
3981 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3982 self.cluster.hv_state_static)
3983 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3984 for hv, values in new_hv_state.items())
3986 if self.op.disk_state:
3987 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3988 self.cluster.disk_state_static)
3989 self.new_disk_state = \
3990 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3991 for name, values in svalues.items()))
3992 for storage, svalues in new_disk_state.items())
3995 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3998 all_instances = self.cfg.GetAllInstancesInfo().values()
4000 for group in self.cfg.GetAllNodeGroupsInfo().values():
4001 instances = frozenset([inst for inst in all_instances
4002 if compat.any(node in group.members
4003 for node in inst.all_nodes)])
4004 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4005 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4007 new_ipolicy, instances)
4009 violations.update(new)
4012 self.LogWarning("After the ipolicy change the following instances"
4013 " violate them: %s",
4014 utils.CommaJoin(utils.NiceSort(violations)))
4016 if self.op.nicparams:
4017 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4018 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4019 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4022 # check all instances for consistency
4023 for instance in self.cfg.GetAllInstancesInfo().values():
4024 for nic_idx, nic in enumerate(instance.nics):
4025 params_copy = copy.deepcopy(nic.nicparams)
4026 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4028 # check parameter syntax
4030 objects.NIC.CheckParameterSyntax(params_filled)
4031 except errors.ConfigurationError, err:
4032 nic_errors.append("Instance %s, nic/%d: %s" %
4033 (instance.name, nic_idx, err))
4035 # if we're moving instances to routed, check that they have an ip
4036 target_mode = params_filled[constants.NIC_MODE]
4037 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4038 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4039 " address" % (instance.name, nic_idx))
4041 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4042 "\n".join(nic_errors))
4044 # hypervisor list/parameters
4045 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4046 if self.op.hvparams:
4047 for hv_name, hv_dict in self.op.hvparams.items():
4048 if hv_name not in self.new_hvparams:
4049 self.new_hvparams[hv_name] = hv_dict
4051 self.new_hvparams[hv_name].update(hv_dict)
4053 # disk template parameters
4054 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4055 if self.op.diskparams:
4056 for dt_name, dt_params in self.op.diskparams.items():
4057 if dt_name not in self.op.diskparams:
4058 self.new_diskparams[dt_name] = dt_params
4060 self.new_diskparams[dt_name].update(dt_params)
4062 # os hypervisor parameters
4063 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4065 for os_name, hvs in self.op.os_hvp.items():
4066 if os_name not in self.new_os_hvp:
4067 self.new_os_hvp[os_name] = hvs
4069 for hv_name, hv_dict in hvs.items():
4070 if hv_name not in self.new_os_hvp[os_name]:
4071 self.new_os_hvp[os_name][hv_name] = hv_dict
4073 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4076 self.new_osp = objects.FillDict(cluster.osparams, {})
4077 if self.op.osparams:
4078 for os_name, osp in self.op.osparams.items():
4079 if os_name not in self.new_osp:
4080 self.new_osp[os_name] = {}
4082 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4085 if not self.new_osp[os_name]:
4086 # we removed all parameters
4087 del self.new_osp[os_name]
4089 # check the parameter validity (remote check)
4090 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4091 os_name, self.new_osp[os_name])
4093 # changes to the hypervisor list
4094 if self.op.enabled_hypervisors is not None:
4095 self.hv_list = self.op.enabled_hypervisors
4096 for hv in self.hv_list:
4097 # if the hypervisor doesn't already exist in the cluster
4098 # hvparams, we initialize it to empty, and then (in both
4099 # cases) we make sure to fill the defaults, as we might not
4100 # have a complete defaults list if the hypervisor wasn't
4102 if hv not in new_hvp:
4104 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4105 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4107 self.hv_list = cluster.enabled_hypervisors
4109 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4110 # either the enabled list has changed, or the parameters have, validate
4111 for hv_name, hv_params in self.new_hvparams.items():
4112 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4113 (self.op.enabled_hypervisors and
4114 hv_name in self.op.enabled_hypervisors)):
4115 # either this is a new hypervisor, or its parameters have changed
4116 hv_class = hypervisor.GetHypervisor(hv_name)
4117 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4118 hv_class.CheckParameterSyntax(hv_params)
4119 _CheckHVParams(self, node_list, hv_name, hv_params)
4122 # no need to check any newly-enabled hypervisors, since the
4123 # defaults have already been checked in the above code-block
4124 for os_name, os_hvp in self.new_os_hvp.items():
4125 for hv_name, hv_params in os_hvp.items():
4126 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4127 # we need to fill in the new os_hvp on top of the actual hv_p
4128 cluster_defaults = self.new_hvparams.get(hv_name, {})
4129 new_osp = objects.FillDict(cluster_defaults, hv_params)
4130 hv_class = hypervisor.GetHypervisor(hv_name)
4131 hv_class.CheckParameterSyntax(new_osp)
4132 _CheckHVParams(self, node_list, hv_name, new_osp)
4134 if self.op.default_iallocator:
4135 alloc_script = utils.FindFile(self.op.default_iallocator,
4136 constants.IALLOCATOR_SEARCH_PATH,
4138 if alloc_script is None:
4139 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4140 " specified" % self.op.default_iallocator,
4143 def Exec(self, feedback_fn):
4144 """Change the parameters of the cluster.
4147 if self.op.vg_name is not None:
4148 new_volume = self.op.vg_name
4151 if new_volume != self.cfg.GetVGName():
4152 self.cfg.SetVGName(new_volume)
4154 feedback_fn("Cluster LVM configuration already in desired"
4155 " state, not changing")
4156 if self.op.drbd_helper is not None:
4157 new_helper = self.op.drbd_helper
4160 if new_helper != self.cfg.GetDRBDHelper():
4161 self.cfg.SetDRBDHelper(new_helper)
4163 feedback_fn("Cluster DRBD helper already in desired state,"
4165 if self.op.hvparams:
4166 self.cluster.hvparams = self.new_hvparams
4168 self.cluster.os_hvp = self.new_os_hvp
4169 if self.op.enabled_hypervisors is not None:
4170 self.cluster.hvparams = self.new_hvparams
4171 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4172 if self.op.beparams:
4173 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4174 if self.op.nicparams:
4175 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4177 self.cluster.ipolicy = self.new_ipolicy
4178 if self.op.osparams:
4179 self.cluster.osparams = self.new_osp
4180 if self.op.ndparams:
4181 self.cluster.ndparams = self.new_ndparams
4182 if self.op.diskparams:
4183 self.cluster.diskparams = self.new_diskparams
4184 if self.op.hv_state:
4185 self.cluster.hv_state_static = self.new_hv_state
4186 if self.op.disk_state:
4187 self.cluster.disk_state_static = self.new_disk_state
4189 if self.op.candidate_pool_size is not None:
4190 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4191 # we need to update the pool size here, otherwise the save will fail
4192 _AdjustCandidatePool(self, [])
4194 if self.op.maintain_node_health is not None:
4195 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4196 feedback_fn("Note: CONFD was disabled at build time, node health"
4197 " maintenance is not useful (still enabling it)")
4198 self.cluster.maintain_node_health = self.op.maintain_node_health
4200 if self.op.prealloc_wipe_disks is not None:
4201 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4203 if self.op.add_uids is not None:
4204 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4206 if self.op.remove_uids is not None:
4207 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4209 if self.op.uid_pool is not None:
4210 self.cluster.uid_pool = self.op.uid_pool
4212 if self.op.default_iallocator is not None:
4213 self.cluster.default_iallocator = self.op.default_iallocator
4215 if self.op.reserved_lvs is not None:
4216 self.cluster.reserved_lvs = self.op.reserved_lvs
4218 if self.op.use_external_mip_script is not None:
4219 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4221 def helper_os(aname, mods, desc):
4223 lst = getattr(self.cluster, aname)
4224 for key, val in mods:
4225 if key == constants.DDM_ADD:
4227 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4230 elif key == constants.DDM_REMOVE:
4234 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4236 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4238 if self.op.hidden_os:
4239 helper_os("hidden_os", self.op.hidden_os, "hidden")
4241 if self.op.blacklisted_os:
4242 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4244 if self.op.master_netdev:
4245 master_params = self.cfg.GetMasterNetworkParameters()
4246 ems = self.cfg.GetUseExternalMipScript()
4247 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4248 self.cluster.master_netdev)
4249 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4251 result.Raise("Could not disable the master ip")
4252 feedback_fn("Changing master_netdev from %s to %s" %
4253 (master_params.netdev, self.op.master_netdev))
4254 self.cluster.master_netdev = self.op.master_netdev
4256 if self.op.master_netmask:
4257 master_params = self.cfg.GetMasterNetworkParameters()
4258 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4259 result = self.rpc.call_node_change_master_netmask(master_params.name,
4260 master_params.netmask,
4261 self.op.master_netmask,
4263 master_params.netdev)
4265 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4268 self.cluster.master_netmask = self.op.master_netmask
4270 self.cfg.Update(self.cluster, feedback_fn)
4272 if self.op.master_netdev:
4273 master_params = self.cfg.GetMasterNetworkParameters()
4274 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4275 self.op.master_netdev)
4276 ems = self.cfg.GetUseExternalMipScript()
4277 result = self.rpc.call_node_activate_master_ip(master_params.name,
4280 self.LogWarning("Could not re-enable the master ip on"
4281 " the master, please restart manually: %s",
4285 def _UploadHelper(lu, nodes, fname):
4286 """Helper for uploading a file and showing warnings.
4289 if os.path.exists(fname):
4290 result = lu.rpc.call_upload_file(nodes, fname)
4291 for to_node, to_result in result.items():
4292 msg = to_result.fail_msg
4294 msg = ("Copy of file %s to node %s failed: %s" %
4295 (fname, to_node, msg))
4296 lu.proc.LogWarning(msg)
4299 def _ComputeAncillaryFiles(cluster, redist):
4300 """Compute files external to Ganeti which need to be consistent.
4302 @type redist: boolean
4303 @param redist: Whether to include files which need to be redistributed
4306 # Compute files for all nodes
4308 constants.SSH_KNOWN_HOSTS_FILE,
4309 constants.CONFD_HMAC_KEY,
4310 constants.CLUSTER_DOMAIN_SECRET_FILE,
4311 constants.SPICE_CERT_FILE,
4312 constants.SPICE_CACERT_FILE,
4313 constants.RAPI_USERS_FILE,
4317 files_all.update(constants.ALL_CERT_FILES)
4318 files_all.update(ssconf.SimpleStore().GetFileList())
4320 # we need to ship at least the RAPI certificate
4321 files_all.add(constants.RAPI_CERT_FILE)
4323 if cluster.modify_etc_hosts:
4324 files_all.add(constants.ETC_HOSTS)
4326 if cluster.use_external_mip_script:
4327 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4329 # Files which are optional, these must:
4330 # - be present in one other category as well
4331 # - either exist or not exist on all nodes of that category (mc, vm all)
4333 constants.RAPI_USERS_FILE,
4336 # Files which should only be on master candidates
4340 files_mc.add(constants.CLUSTER_CONF_FILE)
4342 # Files which should only be on VM-capable nodes
4343 files_vm = set(filename
4344 for hv_name in cluster.enabled_hypervisors
4345 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4347 files_opt |= set(filename
4348 for hv_name in cluster.enabled_hypervisors
4349 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4351 # Filenames in each category must be unique
4352 all_files_set = files_all | files_mc | files_vm
4353 assert (len(all_files_set) ==
4354 sum(map(len, [files_all, files_mc, files_vm]))), \
4355 "Found file listed in more than one file list"
4357 # Optional files must be present in one other category
4358 assert all_files_set.issuperset(files_opt), \
4359 "Optional file not in a different required list"
4361 return (files_all, files_opt, files_mc, files_vm)
4364 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4365 """Distribute additional files which are part of the cluster configuration.
4367 ConfigWriter takes care of distributing the config and ssconf files, but
4368 there are more files which should be distributed to all nodes. This function
4369 makes sure those are copied.
4371 @param lu: calling logical unit
4372 @param additional_nodes: list of nodes not in the config to distribute to
4373 @type additional_vm: boolean
4374 @param additional_vm: whether the additional nodes are vm-capable or not
4377 # Gather target nodes
4378 cluster = lu.cfg.GetClusterInfo()
4379 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4381 online_nodes = lu.cfg.GetOnlineNodeList()
4382 online_set = frozenset(online_nodes)
4383 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4385 if additional_nodes is not None:
4386 online_nodes.extend(additional_nodes)
4388 vm_nodes.extend(additional_nodes)
4390 # Never distribute to master node
4391 for nodelist in [online_nodes, vm_nodes]:
4392 if master_info.name in nodelist:
4393 nodelist.remove(master_info.name)
4396 (files_all, _, files_mc, files_vm) = \
4397 _ComputeAncillaryFiles(cluster, True)
4399 # Never re-distribute configuration file from here
4400 assert not (constants.CLUSTER_CONF_FILE in files_all or
4401 constants.CLUSTER_CONF_FILE in files_vm)
4402 assert not files_mc, "Master candidates not handled in this function"
4405 (online_nodes, files_all),
4406 (vm_nodes, files_vm),
4410 for (node_list, files) in filemap:
4412 _UploadHelper(lu, node_list, fname)
4415 class LUClusterRedistConf(NoHooksLU):
4416 """Force the redistribution of cluster configuration.
4418 This is a very simple LU.
4423 def ExpandNames(self):
4424 self.needed_locks = {
4425 locking.LEVEL_NODE: locking.ALL_SET,
4427 self.share_locks[locking.LEVEL_NODE] = 1
4429 def Exec(self, feedback_fn):
4430 """Redistribute the configuration.
4433 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4434 _RedistributeAncillaryFiles(self)
4437 class LUClusterActivateMasterIp(NoHooksLU):
4438 """Activate the master IP on the master node.
4441 def Exec(self, feedback_fn):
4442 """Activate the master IP.
4445 master_params = self.cfg.GetMasterNetworkParameters()
4446 ems = self.cfg.GetUseExternalMipScript()
4447 result = self.rpc.call_node_activate_master_ip(master_params.name,
4449 result.Raise("Could not activate the master IP")
4452 class LUClusterDeactivateMasterIp(NoHooksLU):
4453 """Deactivate the master IP on the master node.
4456 def Exec(self, feedback_fn):
4457 """Deactivate the master IP.
4460 master_params = self.cfg.GetMasterNetworkParameters()
4461 ems = self.cfg.GetUseExternalMipScript()
4462 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4464 result.Raise("Could not deactivate the master IP")
4467 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4468 """Sleep and poll for an instance's disk to sync.
4471 if not instance.disks or disks is not None and not disks:
4474 disks = _ExpandCheckDisks(instance, disks)
4477 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4479 node = instance.primary_node
4482 lu.cfg.SetDiskID(dev, node)
4484 # TODO: Convert to utils.Retry
4487 degr_retries = 10 # in seconds, as we sleep 1 second each time
4491 cumul_degraded = False
4492 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4493 msg = rstats.fail_msg
4495 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4498 raise errors.RemoteError("Can't contact node %s for mirror data,"
4499 " aborting." % node)
4502 rstats = rstats.payload
4504 for i, mstat in enumerate(rstats):
4506 lu.LogWarning("Can't compute data for node %s/%s",
4507 node, disks[i].iv_name)
4510 cumul_degraded = (cumul_degraded or
4511 (mstat.is_degraded and mstat.sync_percent is None))
4512 if mstat.sync_percent is not None:
4514 if mstat.estimated_time is not None:
4515 rem_time = ("%s remaining (estimated)" %
4516 utils.FormatSeconds(mstat.estimated_time))
4517 max_time = mstat.estimated_time
4519 rem_time = "no time estimate"
4520 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4521 (disks[i].iv_name, mstat.sync_percent, rem_time))
4523 # if we're done but degraded, let's do a few small retries, to
4524 # make sure we see a stable and not transient situation; therefore
4525 # we force restart of the loop
4526 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4527 logging.info("Degraded disks found, %d retries left", degr_retries)
4535 time.sleep(min(60, max_time))
4538 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4539 return not cumul_degraded
4542 def _BlockdevFind(lu, node, dev, instance):
4543 """Wrapper around call_blockdev_find to annotate diskparams.
4545 @param lu: A reference to the lu object
4546 @param node: The node to call out
4547 @param dev: The device to find
4548 @param instance: The instance object the device belongs to
4549 @returns The result of the rpc call
4552 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4553 return lu.rpc.call_blockdev_find(node, disk)
4556 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4557 """Wrapper around L{_CheckDiskConsistencyInner}.
4560 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4561 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4565 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4567 """Check that mirrors are not degraded.
4569 @attention: The device has to be annotated already.
4571 The ldisk parameter, if True, will change the test from the
4572 is_degraded attribute (which represents overall non-ok status for
4573 the device(s)) to the ldisk (representing the local storage status).
4576 lu.cfg.SetDiskID(dev, node)
4580 if on_primary or dev.AssembleOnSecondary():
4581 rstats = lu.rpc.call_blockdev_find(node, dev)
4582 msg = rstats.fail_msg
4584 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4586 elif not rstats.payload:
4587 lu.LogWarning("Can't find disk on node %s", node)
4591 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4593 result = result and not rstats.payload.is_degraded
4596 for child in dev.children:
4597 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4603 class LUOobCommand(NoHooksLU):
4604 """Logical unit for OOB handling.
4608 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4610 def ExpandNames(self):
4611 """Gather locks we need.
4614 if self.op.node_names:
4615 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4616 lock_names = self.op.node_names
4618 lock_names = locking.ALL_SET
4620 self.needed_locks = {
4621 locking.LEVEL_NODE: lock_names,
4624 def CheckPrereq(self):
4625 """Check prerequisites.
4628 - the node exists in the configuration
4631 Any errors are signaled by raising errors.OpPrereqError.
4635 self.master_node = self.cfg.GetMasterNode()
4637 assert self.op.power_delay >= 0.0
4639 if self.op.node_names:
4640 if (self.op.command in self._SKIP_MASTER and
4641 self.master_node in self.op.node_names):
4642 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4643 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4645 if master_oob_handler:
4646 additional_text = ("run '%s %s %s' if you want to operate on the"
4647 " master regardless") % (master_oob_handler,
4651 additional_text = "it does not support out-of-band operations"
4653 raise errors.OpPrereqError(("Operating on the master node %s is not"
4654 " allowed for %s; %s") %
4655 (self.master_node, self.op.command,
4656 additional_text), errors.ECODE_INVAL)
4658 self.op.node_names = self.cfg.GetNodeList()
4659 if self.op.command in self._SKIP_MASTER:
4660 self.op.node_names.remove(self.master_node)
4662 if self.op.command in self._SKIP_MASTER:
4663 assert self.master_node not in self.op.node_names
4665 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4667 raise errors.OpPrereqError("Node %s not found" % node_name,
4670 self.nodes.append(node)
4672 if (not self.op.ignore_status and
4673 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4674 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4675 " not marked offline") % node_name,
4678 def Exec(self, feedback_fn):
4679 """Execute OOB and return result if we expect any.
4682 master_node = self.master_node
4685 for idx, node in enumerate(utils.NiceSort(self.nodes,
4686 key=lambda node: node.name)):
4687 node_entry = [(constants.RS_NORMAL, node.name)]
4688 ret.append(node_entry)
4690 oob_program = _SupportsOob(self.cfg, node)
4693 node_entry.append((constants.RS_UNAVAIL, None))
4696 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4697 self.op.command, oob_program, node.name)
4698 result = self.rpc.call_run_oob(master_node, oob_program,
4699 self.op.command, node.name,
4703 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4704 node.name, result.fail_msg)
4705 node_entry.append((constants.RS_NODATA, None))
4708 self._CheckPayload(result)
4709 except errors.OpExecError, err:
4710 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4712 node_entry.append((constants.RS_NODATA, None))
4714 if self.op.command == constants.OOB_HEALTH:
4715 # For health we should log important events
4716 for item, status in result.payload:
4717 if status in [constants.OOB_STATUS_WARNING,
4718 constants.OOB_STATUS_CRITICAL]:
4719 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4720 item, node.name, status)
4722 if self.op.command == constants.OOB_POWER_ON:
4724 elif self.op.command == constants.OOB_POWER_OFF:
4725 node.powered = False
4726 elif self.op.command == constants.OOB_POWER_STATUS:
4727 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4728 if powered != node.powered:
4729 logging.warning(("Recorded power state (%s) of node '%s' does not"
4730 " match actual power state (%s)"), node.powered,
4733 # For configuration changing commands we should update the node
4734 if self.op.command in (constants.OOB_POWER_ON,
4735 constants.OOB_POWER_OFF):
4736 self.cfg.Update(node, feedback_fn)
4738 node_entry.append((constants.RS_NORMAL, result.payload))
4740 if (self.op.command == constants.OOB_POWER_ON and
4741 idx < len(self.nodes) - 1):
4742 time.sleep(self.op.power_delay)
4746 def _CheckPayload(self, result):
4747 """Checks if the payload is valid.
4749 @param result: RPC result
4750 @raises errors.OpExecError: If payload is not valid
4754 if self.op.command == constants.OOB_HEALTH:
4755 if not isinstance(result.payload, list):
4756 errs.append("command 'health' is expected to return a list but got %s" %
4757 type(result.payload))
4759 for item, status in result.payload:
4760 if status not in constants.OOB_STATUSES:
4761 errs.append("health item '%s' has invalid status '%s'" %
4764 if self.op.command == constants.OOB_POWER_STATUS:
4765 if not isinstance(result.payload, dict):
4766 errs.append("power-status is expected to return a dict but got %s" %
4767 type(result.payload))
4769 if self.op.command in [
4770 constants.OOB_POWER_ON,
4771 constants.OOB_POWER_OFF,
4772 constants.OOB_POWER_CYCLE,
4774 if result.payload is not None:
4775 errs.append("%s is expected to not return payload but got '%s'" %
4776 (self.op.command, result.payload))
4779 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4780 utils.CommaJoin(errs))
4783 class _OsQuery(_QueryBase):
4784 FIELDS = query.OS_FIELDS
4786 def ExpandNames(self, lu):
4787 # Lock all nodes in shared mode
4788 # Temporary removal of locks, should be reverted later
4789 # TODO: reintroduce locks when they are lighter-weight
4790 lu.needed_locks = {}
4791 #self.share_locks[locking.LEVEL_NODE] = 1
4792 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4794 # The following variables interact with _QueryBase._GetNames
4796 self.wanted = self.names
4798 self.wanted = locking.ALL_SET
4800 self.do_locking = self.use_locking
4802 def DeclareLocks(self, lu, level):
4806 def _DiagnoseByOS(rlist):
4807 """Remaps a per-node return list into an a per-os per-node dictionary
4809 @param rlist: a map with node names as keys and OS objects as values
4812 @return: a dictionary with osnames as keys and as value another
4813 map, with nodes as keys and tuples of (path, status, diagnose,
4814 variants, parameters, api_versions) as values, eg::
4816 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4817 (/srv/..., False, "invalid api")],
4818 "node2": [(/srv/..., True, "", [], [])]}
4823 # we build here the list of nodes that didn't fail the RPC (at RPC
4824 # level), so that nodes with a non-responding node daemon don't
4825 # make all OSes invalid
4826 good_nodes = [node_name for node_name in rlist
4827 if not rlist[node_name].fail_msg]
4828 for node_name, nr in rlist.items():
4829 if nr.fail_msg or not nr.payload:
4831 for (name, path, status, diagnose, variants,
4832 params, api_versions) in nr.payload:
4833 if name not in all_os:
4834 # build a list of nodes for this os containing empty lists
4835 # for each node in node_list
4837 for nname in good_nodes:
4838 all_os[name][nname] = []
4839 # convert params from [name, help] to (name, help)
4840 params = [tuple(v) for v in params]
4841 all_os[name][node_name].append((path, status, diagnose,
4842 variants, params, api_versions))
4845 def _GetQueryData(self, lu):
4846 """Computes the list of nodes and their attributes.
4849 # Locking is not used
4850 assert not (compat.any(lu.glm.is_owned(level)
4851 for level in locking.LEVELS
4852 if level != locking.LEVEL_CLUSTER) or
4853 self.do_locking or self.use_locking)
4855 valid_nodes = [node.name
4856 for node in lu.cfg.GetAllNodesInfo().values()
4857 if not node.offline and node.vm_capable]
4858 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4859 cluster = lu.cfg.GetClusterInfo()
4863 for (os_name, os_data) in pol.items():
4864 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4865 hidden=(os_name in cluster.hidden_os),
4866 blacklisted=(os_name in cluster.blacklisted_os))
4870 api_versions = set()
4872 for idx, osl in enumerate(os_data.values()):
4873 info.valid = bool(info.valid and osl and osl[0][1])
4877 (node_variants, node_params, node_api) = osl[0][3:6]
4880 variants.update(node_variants)
4881 parameters.update(node_params)
4882 api_versions.update(node_api)
4884 # Filter out inconsistent values
4885 variants.intersection_update(node_variants)
4886 parameters.intersection_update(node_params)
4887 api_versions.intersection_update(node_api)
4889 info.variants = list(variants)
4890 info.parameters = list(parameters)
4891 info.api_versions = list(api_versions)
4893 data[os_name] = info
4895 # Prepare data in requested order
4896 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4900 class LUOsDiagnose(NoHooksLU):
4901 """Logical unit for OS diagnose/query.
4907 def _BuildFilter(fields, names):
4908 """Builds a filter for querying OSes.
4911 name_filter = qlang.MakeSimpleFilter("name", names)
4913 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4914 # respective field is not requested
4915 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4916 for fname in ["hidden", "blacklisted"]
4917 if fname not in fields]
4918 if "valid" not in fields:
4919 status_filter.append([qlang.OP_TRUE, "valid"])
4922 status_filter.insert(0, qlang.OP_AND)
4924 status_filter = None
4926 if name_filter and status_filter:
4927 return [qlang.OP_AND, name_filter, status_filter]
4931 return status_filter
4933 def CheckArguments(self):
4934 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4935 self.op.output_fields, False)
4937 def ExpandNames(self):
4938 self.oq.ExpandNames(self)
4940 def Exec(self, feedback_fn):
4941 return self.oq.OldStyleQuery(self)
4944 class LUNodeRemove(LogicalUnit):
4945 """Logical unit for removing a node.
4948 HPATH = "node-remove"
4949 HTYPE = constants.HTYPE_NODE
4951 def BuildHooksEnv(self):
4956 "OP_TARGET": self.op.node_name,
4957 "NODE_NAME": self.op.node_name,
4960 def BuildHooksNodes(self):
4961 """Build hooks nodes.
4963 This doesn't run on the target node in the pre phase as a failed
4964 node would then be impossible to remove.
4967 all_nodes = self.cfg.GetNodeList()
4969 all_nodes.remove(self.op.node_name)
4972 return (all_nodes, all_nodes)
4974 def CheckPrereq(self):
4975 """Check prerequisites.
4978 - the node exists in the configuration
4979 - it does not have primary or secondary instances
4980 - it's not the master
4982 Any errors are signaled by raising errors.OpPrereqError.
4985 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4986 node = self.cfg.GetNodeInfo(self.op.node_name)
4987 assert node is not None
4989 masternode = self.cfg.GetMasterNode()
4990 if node.name == masternode:
4991 raise errors.OpPrereqError("Node is the master node, failover to another"
4992 " node is required", errors.ECODE_INVAL)
4994 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4995 if node.name in instance.all_nodes:
4996 raise errors.OpPrereqError("Instance %s is still running on the node,"
4997 " please remove first" % instance_name,
4999 self.op.node_name = node.name
5002 def Exec(self, feedback_fn):
5003 """Removes the node from the cluster.
5007 logging.info("Stopping the node daemon and removing configs from node %s",
5010 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5012 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5015 # Promote nodes to master candidate as needed
5016 _AdjustCandidatePool(self, exceptions=[node.name])
5017 self.context.RemoveNode(node.name)
5019 # Run post hooks on the node before it's removed
5020 _RunPostHook(self, node.name)
5022 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5023 msg = result.fail_msg
5025 self.LogWarning("Errors encountered on the remote node while leaving"
5026 " the cluster: %s", msg)
5028 # Remove node from our /etc/hosts
5029 if self.cfg.GetClusterInfo().modify_etc_hosts:
5030 master_node = self.cfg.GetMasterNode()
5031 result = self.rpc.call_etc_hosts_modify(master_node,
5032 constants.ETC_HOSTS_REMOVE,
5034 result.Raise("Can't update hosts file with new host data")
5035 _RedistributeAncillaryFiles(self)
5038 class _NodeQuery(_QueryBase):
5039 FIELDS = query.NODE_FIELDS
5041 def ExpandNames(self, lu):
5042 lu.needed_locks = {}
5043 lu.share_locks = _ShareAll()
5046 self.wanted = _GetWantedNodes(lu, self.names)
5048 self.wanted = locking.ALL_SET
5050 self.do_locking = (self.use_locking and
5051 query.NQ_LIVE in self.requested_data)
5054 # If any non-static field is requested we need to lock the nodes
5055 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5057 def DeclareLocks(self, lu, level):
5060 def _GetQueryData(self, lu):
5061 """Computes the list of nodes and their attributes.
5064 all_info = lu.cfg.GetAllNodesInfo()
5066 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5068 # Gather data as requested
5069 if query.NQ_LIVE in self.requested_data:
5070 # filter out non-vm_capable nodes
5071 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5073 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5074 [lu.cfg.GetHypervisorType()])
5075 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5076 for (name, nresult) in node_data.items()
5077 if not nresult.fail_msg and nresult.payload)
5081 if query.NQ_INST in self.requested_data:
5082 node_to_primary = dict([(name, set()) for name in nodenames])
5083 node_to_secondary = dict([(name, set()) for name in nodenames])
5085 inst_data = lu.cfg.GetAllInstancesInfo()
5087 for inst in inst_data.values():
5088 if inst.primary_node in node_to_primary:
5089 node_to_primary[inst.primary_node].add(inst.name)
5090 for secnode in inst.secondary_nodes:
5091 if secnode in node_to_secondary:
5092 node_to_secondary[secnode].add(inst.name)
5094 node_to_primary = None
5095 node_to_secondary = None
5097 if query.NQ_OOB in self.requested_data:
5098 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5099 for name, node in all_info.iteritems())
5103 if query.NQ_GROUP in self.requested_data:
5104 groups = lu.cfg.GetAllNodeGroupsInfo()
5108 return query.NodeQueryData([all_info[name] for name in nodenames],
5109 live_data, lu.cfg.GetMasterNode(),
5110 node_to_primary, node_to_secondary, groups,
5111 oob_support, lu.cfg.GetClusterInfo())
5114 class LUNodeQuery(NoHooksLU):
5115 """Logical unit for querying nodes.
5118 # pylint: disable=W0142
5121 def CheckArguments(self):
5122 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5123 self.op.output_fields, self.op.use_locking)
5125 def ExpandNames(self):
5126 self.nq.ExpandNames(self)
5128 def DeclareLocks(self, level):
5129 self.nq.DeclareLocks(self, level)
5131 def Exec(self, feedback_fn):
5132 return self.nq.OldStyleQuery(self)
5135 class LUNodeQueryvols(NoHooksLU):
5136 """Logical unit for getting volumes on node(s).
5140 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5141 _FIELDS_STATIC = utils.FieldSet("node")
5143 def CheckArguments(self):
5144 _CheckOutputFields(static=self._FIELDS_STATIC,
5145 dynamic=self._FIELDS_DYNAMIC,
5146 selected=self.op.output_fields)
5148 def ExpandNames(self):
5149 self.share_locks = _ShareAll()
5150 self.needed_locks = {}
5152 if not self.op.nodes:
5153 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5155 self.needed_locks[locking.LEVEL_NODE] = \
5156 _GetWantedNodes(self, self.op.nodes)
5158 def Exec(self, feedback_fn):
5159 """Computes the list of nodes and their attributes.
5162 nodenames = self.owned_locks(locking.LEVEL_NODE)
5163 volumes = self.rpc.call_node_volumes(nodenames)
5165 ilist = self.cfg.GetAllInstancesInfo()
5166 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5169 for node in nodenames:
5170 nresult = volumes[node]
5173 msg = nresult.fail_msg
5175 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5178 node_vols = sorted(nresult.payload,
5179 key=operator.itemgetter("dev"))
5181 for vol in node_vols:
5183 for field in self.op.output_fields:
5186 elif field == "phys":
5190 elif field == "name":
5192 elif field == "size":
5193 val = int(float(vol["size"]))
5194 elif field == "instance":
5195 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5197 raise errors.ParameterError(field)
5198 node_output.append(str(val))
5200 output.append(node_output)
5205 class LUNodeQueryStorage(NoHooksLU):
5206 """Logical unit for getting information on storage units on node(s).
5209 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5212 def CheckArguments(self):
5213 _CheckOutputFields(static=self._FIELDS_STATIC,
5214 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5215 selected=self.op.output_fields)
5217 def ExpandNames(self):
5218 self.share_locks = _ShareAll()
5219 self.needed_locks = {}
5222 self.needed_locks[locking.LEVEL_NODE] = \
5223 _GetWantedNodes(self, self.op.nodes)
5225 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5227 def Exec(self, feedback_fn):
5228 """Computes the list of nodes and their attributes.
5231 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5233 # Always get name to sort by
5234 if constants.SF_NAME in self.op.output_fields:
5235 fields = self.op.output_fields[:]
5237 fields = [constants.SF_NAME] + self.op.output_fields
5239 # Never ask for node or type as it's only known to the LU
5240 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5241 while extra in fields:
5242 fields.remove(extra)
5244 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5245 name_idx = field_idx[constants.SF_NAME]
5247 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5248 data = self.rpc.call_storage_list(self.nodes,
5249 self.op.storage_type, st_args,
5250 self.op.name, fields)
5254 for node in utils.NiceSort(self.nodes):
5255 nresult = data[node]
5259 msg = nresult.fail_msg
5261 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5264 rows = dict([(row[name_idx], row) for row in nresult.payload])
5266 for name in utils.NiceSort(rows.keys()):
5271 for field in self.op.output_fields:
5272 if field == constants.SF_NODE:
5274 elif field == constants.SF_TYPE:
5275 val = self.op.storage_type
5276 elif field in field_idx:
5277 val = row[field_idx[field]]
5279 raise errors.ParameterError(field)
5288 class _InstanceQuery(_QueryBase):
5289 FIELDS = query.INSTANCE_FIELDS
5291 def ExpandNames(self, lu):
5292 lu.needed_locks = {}
5293 lu.share_locks = _ShareAll()
5296 self.wanted = _GetWantedInstances(lu, self.names)
5298 self.wanted = locking.ALL_SET
5300 self.do_locking = (self.use_locking and
5301 query.IQ_LIVE in self.requested_data)
5303 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5304 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5305 lu.needed_locks[locking.LEVEL_NODE] = []
5306 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5308 self.do_grouplocks = (self.do_locking and
5309 query.IQ_NODES in self.requested_data)
5311 def DeclareLocks(self, lu, level):
5313 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5314 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5316 # Lock all groups used by instances optimistically; this requires going
5317 # via the node before it's locked, requiring verification later on
5318 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5320 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5321 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5322 elif level == locking.LEVEL_NODE:
5323 lu._LockInstancesNodes() # pylint: disable=W0212
5326 def _CheckGroupLocks(lu):
5327 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5328 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5330 # Check if node groups for locked instances are still correct
5331 for instance_name in owned_instances:
5332 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5334 def _GetQueryData(self, lu):
5335 """Computes the list of instances and their attributes.
5338 if self.do_grouplocks:
5339 self._CheckGroupLocks(lu)
5341 cluster = lu.cfg.GetClusterInfo()
5342 all_info = lu.cfg.GetAllInstancesInfo()
5344 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5346 instance_list = [all_info[name] for name in instance_names]
5347 nodes = frozenset(itertools.chain(*(inst.all_nodes
5348 for inst in instance_list)))
5349 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5352 wrongnode_inst = set()
5354 # Gather data as requested
5355 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5357 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5359 result = node_data[name]
5361 # offline nodes will be in both lists
5362 assert result.fail_msg
5363 offline_nodes.append(name)
5365 bad_nodes.append(name)
5366 elif result.payload:
5367 for inst in result.payload:
5368 if inst in all_info:
5369 if all_info[inst].primary_node == name:
5370 live_data.update(result.payload)
5372 wrongnode_inst.add(inst)
5374 # orphan instance; we don't list it here as we don't
5375 # handle this case yet in the output of instance listing
5376 logging.warning("Orphan instance '%s' found on node %s",
5378 # else no instance is alive
5382 if query.IQ_DISKUSAGE in self.requested_data:
5383 disk_usage = dict((inst.name,
5384 _ComputeDiskSize(inst.disk_template,
5385 [{constants.IDISK_SIZE: disk.size}
5386 for disk in inst.disks]))
5387 for inst in instance_list)
5391 if query.IQ_CONSOLE in self.requested_data:
5393 for inst in instance_list:
5394 if inst.name in live_data:
5395 # Instance is running
5396 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5398 consinfo[inst.name] = None
5399 assert set(consinfo.keys()) == set(instance_names)
5403 if query.IQ_NODES in self.requested_data:
5404 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5406 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5407 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5408 for uuid in set(map(operator.attrgetter("group"),
5414 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5415 disk_usage, offline_nodes, bad_nodes,
5416 live_data, wrongnode_inst, consinfo,
5420 class LUQuery(NoHooksLU):
5421 """Query for resources/items of a certain kind.
5424 # pylint: disable=W0142
5427 def CheckArguments(self):
5428 qcls = _GetQueryImplementation(self.op.what)
5430 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5432 def ExpandNames(self):
5433 self.impl.ExpandNames(self)
5435 def DeclareLocks(self, level):
5436 self.impl.DeclareLocks(self, level)
5438 def Exec(self, feedback_fn):
5439 return self.impl.NewStyleQuery(self)
5442 class LUQueryFields(NoHooksLU):
5443 """Query for resources/items of a certain kind.
5446 # pylint: disable=W0142
5449 def CheckArguments(self):
5450 self.qcls = _GetQueryImplementation(self.op.what)
5452 def ExpandNames(self):
5453 self.needed_locks = {}
5455 def Exec(self, feedback_fn):
5456 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5459 class LUNodeModifyStorage(NoHooksLU):
5460 """Logical unit for modifying a storage volume on a node.
5465 def CheckArguments(self):
5466 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5468 storage_type = self.op.storage_type
5471 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5473 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5474 " modified" % storage_type,
5477 diff = set(self.op.changes.keys()) - modifiable
5479 raise errors.OpPrereqError("The following fields can not be modified for"
5480 " storage units of type '%s': %r" %
5481 (storage_type, list(diff)),
5484 def ExpandNames(self):
5485 self.needed_locks = {
5486 locking.LEVEL_NODE: self.op.node_name,
5489 def Exec(self, feedback_fn):
5490 """Computes the list of nodes and their attributes.
5493 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5494 result = self.rpc.call_storage_modify(self.op.node_name,
5495 self.op.storage_type, st_args,
5496 self.op.name, self.op.changes)
5497 result.Raise("Failed to modify storage unit '%s' on %s" %
5498 (self.op.name, self.op.node_name))
5501 class LUNodeAdd(LogicalUnit):
5502 """Logical unit for adding node to the cluster.
5506 HTYPE = constants.HTYPE_NODE
5507 _NFLAGS = ["master_capable", "vm_capable"]
5509 def CheckArguments(self):
5510 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5511 # validate/normalize the node name
5512 self.hostname = netutils.GetHostname(name=self.op.node_name,
5513 family=self.primary_ip_family)
5514 self.op.node_name = self.hostname.name
5516 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5517 raise errors.OpPrereqError("Cannot readd the master node",
5520 if self.op.readd and self.op.group:
5521 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5522 " being readded", errors.ECODE_INVAL)
5524 def BuildHooksEnv(self):
5527 This will run on all nodes before, and on all nodes + the new node after.
5531 "OP_TARGET": self.op.node_name,
5532 "NODE_NAME": self.op.node_name,
5533 "NODE_PIP": self.op.primary_ip,
5534 "NODE_SIP": self.op.secondary_ip,
5535 "MASTER_CAPABLE": str(self.op.master_capable),
5536 "VM_CAPABLE": str(self.op.vm_capable),
5539 def BuildHooksNodes(self):
5540 """Build hooks nodes.
5543 # Exclude added node
5544 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5545 post_nodes = pre_nodes + [self.op.node_name, ]
5547 return (pre_nodes, post_nodes)
5549 def CheckPrereq(self):
5550 """Check prerequisites.
5553 - the new node is not already in the config
5555 - its parameters (single/dual homed) matches the cluster
5557 Any errors are signaled by raising errors.OpPrereqError.
5561 hostname = self.hostname
5562 node = hostname.name
5563 primary_ip = self.op.primary_ip = hostname.ip
5564 if self.op.secondary_ip is None:
5565 if self.primary_ip_family == netutils.IP6Address.family:
5566 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5567 " IPv4 address must be given as secondary",
5569 self.op.secondary_ip = primary_ip
5571 secondary_ip = self.op.secondary_ip
5572 if not netutils.IP4Address.IsValid(secondary_ip):
5573 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5574 " address" % secondary_ip, errors.ECODE_INVAL)
5576 node_list = cfg.GetNodeList()
5577 if not self.op.readd and node in node_list:
5578 raise errors.OpPrereqError("Node %s is already in the configuration" %
5579 node, errors.ECODE_EXISTS)
5580 elif self.op.readd and node not in node_list:
5581 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5584 self.changed_primary_ip = False
5586 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5587 if self.op.readd and node == existing_node_name:
5588 if existing_node.secondary_ip != secondary_ip:
5589 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5590 " address configuration as before",
5592 if existing_node.primary_ip != primary_ip:
5593 self.changed_primary_ip = True
5597 if (existing_node.primary_ip == primary_ip or
5598 existing_node.secondary_ip == primary_ip or
5599 existing_node.primary_ip == secondary_ip or
5600 existing_node.secondary_ip == secondary_ip):
5601 raise errors.OpPrereqError("New node ip address(es) conflict with"
5602 " existing node %s" % existing_node.name,
5603 errors.ECODE_NOTUNIQUE)
5605 # After this 'if' block, None is no longer a valid value for the
5606 # _capable op attributes
5608 old_node = self.cfg.GetNodeInfo(node)
5609 assert old_node is not None, "Can't retrieve locked node %s" % node
5610 for attr in self._NFLAGS:
5611 if getattr(self.op, attr) is None:
5612 setattr(self.op, attr, getattr(old_node, attr))
5614 for attr in self._NFLAGS:
5615 if getattr(self.op, attr) is None:
5616 setattr(self.op, attr, True)
5618 if self.op.readd and not self.op.vm_capable:
5619 pri, sec = cfg.GetNodeInstances(node)
5621 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5622 " flag set to false, but it already holds"
5623 " instances" % node,
5626 # check that the type of the node (single versus dual homed) is the
5627 # same as for the master
5628 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5629 master_singlehomed = myself.secondary_ip == myself.primary_ip
5630 newbie_singlehomed = secondary_ip == primary_ip
5631 if master_singlehomed != newbie_singlehomed:
5632 if master_singlehomed:
5633 raise errors.OpPrereqError("The master has no secondary ip but the"
5634 " new node has one",
5637 raise errors.OpPrereqError("The master has a secondary ip but the"
5638 " new node doesn't have one",
5641 # checks reachability
5642 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5643 raise errors.OpPrereqError("Node not reachable by ping",
5644 errors.ECODE_ENVIRON)
5646 if not newbie_singlehomed:
5647 # check reachability from my secondary ip to newbie's secondary ip
5648 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5649 source=myself.secondary_ip):
5650 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5651 " based ping to node daemon port",
5652 errors.ECODE_ENVIRON)
5659 if self.op.master_capable:
5660 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5662 self.master_candidate = False
5665 self.new_node = old_node
5667 node_group = cfg.LookupNodeGroup(self.op.group)
5668 self.new_node = objects.Node(name=node,
5669 primary_ip=primary_ip,
5670 secondary_ip=secondary_ip,
5671 master_candidate=self.master_candidate,
5672 offline=False, drained=False,
5675 if self.op.ndparams:
5676 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5678 if self.op.hv_state:
5679 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5681 if self.op.disk_state:
5682 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5684 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5685 # it a property on the base class.
5686 result = rpc.DnsOnlyRunner().call_version([node])[node]
5687 result.Raise("Can't get version information from node %s" % node)
5688 if constants.PROTOCOL_VERSION == result.payload:
5689 logging.info("Communication to node %s fine, sw version %s match",
5690 node, result.payload)
5692 raise errors.OpPrereqError("Version mismatch master version %s,"
5693 " node version %s" %
5694 (constants.PROTOCOL_VERSION, result.payload),
5695 errors.ECODE_ENVIRON)
5697 def Exec(self, feedback_fn):
5698 """Adds the new node to the cluster.
5701 new_node = self.new_node
5702 node = new_node.name
5704 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5707 # We adding a new node so we assume it's powered
5708 new_node.powered = True
5710 # for re-adds, reset the offline/drained/master-candidate flags;
5711 # we need to reset here, otherwise offline would prevent RPC calls
5712 # later in the procedure; this also means that if the re-add
5713 # fails, we are left with a non-offlined, broken node
5715 new_node.drained = new_node.offline = False # pylint: disable=W0201
5716 self.LogInfo("Readding a node, the offline/drained flags were reset")
5717 # if we demote the node, we do cleanup later in the procedure
5718 new_node.master_candidate = self.master_candidate
5719 if self.changed_primary_ip:
5720 new_node.primary_ip = self.op.primary_ip
5722 # copy the master/vm_capable flags
5723 for attr in self._NFLAGS:
5724 setattr(new_node, attr, getattr(self.op, attr))
5726 # notify the user about any possible mc promotion
5727 if new_node.master_candidate:
5728 self.LogInfo("Node will be a master candidate")
5730 if self.op.ndparams:
5731 new_node.ndparams = self.op.ndparams
5733 new_node.ndparams = {}
5735 if self.op.hv_state:
5736 new_node.hv_state_static = self.new_hv_state
5738 if self.op.disk_state:
5739 new_node.disk_state_static = self.new_disk_state
5741 # Add node to our /etc/hosts, and add key to known_hosts
5742 if self.cfg.GetClusterInfo().modify_etc_hosts:
5743 master_node = self.cfg.GetMasterNode()
5744 result = self.rpc.call_etc_hosts_modify(master_node,
5745 constants.ETC_HOSTS_ADD,
5748 result.Raise("Can't update hosts file with new host data")
5750 if new_node.secondary_ip != new_node.primary_ip:
5751 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5754 node_verify_list = [self.cfg.GetMasterNode()]
5755 node_verify_param = {
5756 constants.NV_NODELIST: ([node], {}),
5757 # TODO: do a node-net-test as well?
5760 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5761 self.cfg.GetClusterName())
5762 for verifier in node_verify_list:
5763 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5764 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5766 for failed in nl_payload:
5767 feedback_fn("ssh/hostname verification failed"
5768 " (checking from %s): %s" %
5769 (verifier, nl_payload[failed]))
5770 raise errors.OpExecError("ssh/hostname verification failed")
5773 _RedistributeAncillaryFiles(self)
5774 self.context.ReaddNode(new_node)
5775 # make sure we redistribute the config
5776 self.cfg.Update(new_node, feedback_fn)
5777 # and make sure the new node will not have old files around
5778 if not new_node.master_candidate:
5779 result = self.rpc.call_node_demote_from_mc(new_node.name)
5780 msg = result.fail_msg
5782 self.LogWarning("Node failed to demote itself from master"
5783 " candidate status: %s" % msg)
5785 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5786 additional_vm=self.op.vm_capable)
5787 self.context.AddNode(new_node, self.proc.GetECId())
5790 class LUNodeSetParams(LogicalUnit):
5791 """Modifies the parameters of a node.
5793 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5794 to the node role (as _ROLE_*)
5795 @cvar _R2F: a dictionary from node role to tuples of flags
5796 @cvar _FLAGS: a list of attribute names corresponding to the flags
5799 HPATH = "node-modify"
5800 HTYPE = constants.HTYPE_NODE
5802 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5804 (True, False, False): _ROLE_CANDIDATE,
5805 (False, True, False): _ROLE_DRAINED,
5806 (False, False, True): _ROLE_OFFLINE,
5807 (False, False, False): _ROLE_REGULAR,
5809 _R2F = dict((v, k) for k, v in _F2R.items())
5810 _FLAGS = ["master_candidate", "drained", "offline"]
5812 def CheckArguments(self):
5813 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5814 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5815 self.op.master_capable, self.op.vm_capable,
5816 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5818 if all_mods.count(None) == len(all_mods):
5819 raise errors.OpPrereqError("Please pass at least one modification",
5821 if all_mods.count(True) > 1:
5822 raise errors.OpPrereqError("Can't set the node into more than one"
5823 " state at the same time",
5826 # Boolean value that tells us whether we might be demoting from MC
5827 self.might_demote = (self.op.master_candidate == False or
5828 self.op.offline == True or
5829 self.op.drained == True or
5830 self.op.master_capable == False)
5832 if self.op.secondary_ip:
5833 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5834 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5835 " address" % self.op.secondary_ip,
5838 self.lock_all = self.op.auto_promote and self.might_demote
5839 self.lock_instances = self.op.secondary_ip is not None
5841 def _InstanceFilter(self, instance):
5842 """Filter for getting affected instances.
5845 return (instance.disk_template in constants.DTS_INT_MIRROR and
5846 self.op.node_name in instance.all_nodes)
5848 def ExpandNames(self):
5850 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5852 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5854 # Since modifying a node can have severe effects on currently running
5855 # operations the resource lock is at least acquired in shared mode
5856 self.needed_locks[locking.LEVEL_NODE_RES] = \
5857 self.needed_locks[locking.LEVEL_NODE]
5859 # Get node resource and instance locks in shared mode; they are not used
5860 # for anything but read-only access
5861 self.share_locks[locking.LEVEL_NODE_RES] = 1
5862 self.share_locks[locking.LEVEL_INSTANCE] = 1
5864 if self.lock_instances:
5865 self.needed_locks[locking.LEVEL_INSTANCE] = \
5866 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5868 def BuildHooksEnv(self):
5871 This runs on the master node.
5875 "OP_TARGET": self.op.node_name,
5876 "MASTER_CANDIDATE": str(self.op.master_candidate),
5877 "OFFLINE": str(self.op.offline),
5878 "DRAINED": str(self.op.drained),
5879 "MASTER_CAPABLE": str(self.op.master_capable),
5880 "VM_CAPABLE": str(self.op.vm_capable),
5883 def BuildHooksNodes(self):
5884 """Build hooks nodes.
5887 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5890 def CheckPrereq(self):
5891 """Check prerequisites.
5893 This only checks the instance list against the existing names.
5896 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5898 if self.lock_instances:
5899 affected_instances = \
5900 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5902 # Verify instance locks
5903 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5904 wanted_instances = frozenset(affected_instances.keys())
5905 if wanted_instances - owned_instances:
5906 raise errors.OpPrereqError("Instances affected by changing node %s's"
5907 " secondary IP address have changed since"
5908 " locks were acquired, wanted '%s', have"
5909 " '%s'; retry the operation" %
5911 utils.CommaJoin(wanted_instances),
5912 utils.CommaJoin(owned_instances)),
5915 affected_instances = None
5917 if (self.op.master_candidate is not None or
5918 self.op.drained is not None or
5919 self.op.offline is not None):
5920 # we can't change the master's node flags
5921 if self.op.node_name == self.cfg.GetMasterNode():
5922 raise errors.OpPrereqError("The master role can be changed"
5923 " only via master-failover",
5926 if self.op.master_candidate and not node.master_capable:
5927 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5928 " it a master candidate" % node.name,
5931 if self.op.vm_capable == False:
5932 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5934 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5935 " the vm_capable flag" % node.name,
5938 if node.master_candidate and self.might_demote and not self.lock_all:
5939 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5940 # check if after removing the current node, we're missing master
5942 (mc_remaining, mc_should, _) = \
5943 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5944 if mc_remaining < mc_should:
5945 raise errors.OpPrereqError("Not enough master candidates, please"
5946 " pass auto promote option to allow"
5947 " promotion (--auto-promote or RAPI"
5948 " auto_promote=True)", errors.ECODE_STATE)
5950 self.old_flags = old_flags = (node.master_candidate,
5951 node.drained, node.offline)
5952 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5953 self.old_role = old_role = self._F2R[old_flags]
5955 # Check for ineffective changes
5956 for attr in self._FLAGS:
5957 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5958 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5959 setattr(self.op, attr, None)
5961 # Past this point, any flag change to False means a transition
5962 # away from the respective state, as only real changes are kept
5964 # TODO: We might query the real power state if it supports OOB
5965 if _SupportsOob(self.cfg, node):
5966 if self.op.offline is False and not (node.powered or
5967 self.op.powered == True):
5968 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5969 " offline status can be reset") %
5971 elif self.op.powered is not None:
5972 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5973 " as it does not support out-of-band"
5974 " handling") % self.op.node_name)
5976 # If we're being deofflined/drained, we'll MC ourself if needed
5977 if (self.op.drained == False or self.op.offline == False or
5978 (self.op.master_capable and not node.master_capable)):
5979 if _DecideSelfPromotion(self):
5980 self.op.master_candidate = True
5981 self.LogInfo("Auto-promoting node to master candidate")
5983 # If we're no longer master capable, we'll demote ourselves from MC
5984 if self.op.master_capable == False and node.master_candidate:
5985 self.LogInfo("Demoting from master candidate")
5986 self.op.master_candidate = False
5989 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5990 if self.op.master_candidate:
5991 new_role = self._ROLE_CANDIDATE
5992 elif self.op.drained:
5993 new_role = self._ROLE_DRAINED
5994 elif self.op.offline:
5995 new_role = self._ROLE_OFFLINE
5996 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5997 # False is still in new flags, which means we're un-setting (the
5999 new_role = self._ROLE_REGULAR
6000 else: # no new flags, nothing, keep old role
6003 self.new_role = new_role
6005 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6006 # Trying to transition out of offline status
6007 result = self.rpc.call_version([node.name])[node.name]
6009 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6010 " to report its version: %s" %
6011 (node.name, result.fail_msg),
6014 self.LogWarning("Transitioning node from offline to online state"
6015 " without using re-add. Please make sure the node"
6018 if self.op.secondary_ip:
6019 # Ok even without locking, because this can't be changed by any LU
6020 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6021 master_singlehomed = master.secondary_ip == master.primary_ip
6022 if master_singlehomed and self.op.secondary_ip:
6023 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6024 " homed cluster", errors.ECODE_INVAL)
6026 assert not (frozenset(affected_instances) -
6027 self.owned_locks(locking.LEVEL_INSTANCE))
6030 if affected_instances:
6031 raise errors.OpPrereqError("Cannot change secondary IP address:"
6032 " offline node has instances (%s)"
6033 " configured to use it" %
6034 utils.CommaJoin(affected_instances.keys()))
6036 # On online nodes, check that no instances are running, and that
6037 # the node has the new ip and we can reach it.
6038 for instance in affected_instances.values():
6039 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6040 msg="cannot change secondary ip")
6042 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6043 if master.name != node.name:
6044 # check reachability from master secondary ip to new secondary ip
6045 if not netutils.TcpPing(self.op.secondary_ip,
6046 constants.DEFAULT_NODED_PORT,
6047 source=master.secondary_ip):
6048 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6049 " based ping to node daemon port",
6050 errors.ECODE_ENVIRON)
6052 if self.op.ndparams:
6053 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6054 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6055 self.new_ndparams = new_ndparams
6057 if self.op.hv_state:
6058 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6059 self.node.hv_state_static)
6061 if self.op.disk_state:
6062 self.new_disk_state = \
6063 _MergeAndVerifyDiskState(self.op.disk_state,
6064 self.node.disk_state_static)
6066 def Exec(self, feedback_fn):
6071 old_role = self.old_role
6072 new_role = self.new_role
6076 if self.op.ndparams:
6077 node.ndparams = self.new_ndparams
6079 if self.op.powered is not None:
6080 node.powered = self.op.powered
6082 if self.op.hv_state:
6083 node.hv_state_static = self.new_hv_state
6085 if self.op.disk_state:
6086 node.disk_state_static = self.new_disk_state
6088 for attr in ["master_capable", "vm_capable"]:
6089 val = getattr(self.op, attr)
6091 setattr(node, attr, val)
6092 result.append((attr, str(val)))
6094 if new_role != old_role:
6095 # Tell the node to demote itself, if no longer MC and not offline
6096 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6097 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6099 self.LogWarning("Node failed to demote itself: %s", msg)
6101 new_flags = self._R2F[new_role]
6102 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6104 result.append((desc, str(nf)))
6105 (node.master_candidate, node.drained, node.offline) = new_flags
6107 # we locked all nodes, we adjust the CP before updating this node
6109 _AdjustCandidatePool(self, [node.name])
6111 if self.op.secondary_ip:
6112 node.secondary_ip = self.op.secondary_ip
6113 result.append(("secondary_ip", self.op.secondary_ip))
6115 # this will trigger configuration file update, if needed
6116 self.cfg.Update(node, feedback_fn)
6118 # this will trigger job queue propagation or cleanup if the mc
6120 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6121 self.context.ReaddNode(node)
6126 class LUNodePowercycle(NoHooksLU):
6127 """Powercycles a node.
6132 def CheckArguments(self):
6133 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6134 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6135 raise errors.OpPrereqError("The node is the master and the force"
6136 " parameter was not set",
6139 def ExpandNames(self):
6140 """Locking for PowercycleNode.
6142 This is a last-resort option and shouldn't block on other
6143 jobs. Therefore, we grab no locks.
6146 self.needed_locks = {}
6148 def Exec(self, feedback_fn):
6152 result = self.rpc.call_node_powercycle(self.op.node_name,
6153 self.cfg.GetHypervisorType())
6154 result.Raise("Failed to schedule the reboot")
6155 return result.payload
6158 class LUClusterQuery(NoHooksLU):
6159 """Query cluster configuration.
6164 def ExpandNames(self):
6165 self.needed_locks = {}
6167 def Exec(self, feedback_fn):
6168 """Return cluster config.
6171 cluster = self.cfg.GetClusterInfo()
6174 # Filter just for enabled hypervisors
6175 for os_name, hv_dict in cluster.os_hvp.items():
6176 os_hvp[os_name] = {}
6177 for hv_name, hv_params in hv_dict.items():
6178 if hv_name in cluster.enabled_hypervisors:
6179 os_hvp[os_name][hv_name] = hv_params
6181 # Convert ip_family to ip_version
6182 primary_ip_version = constants.IP4_VERSION
6183 if cluster.primary_ip_family == netutils.IP6Address.family:
6184 primary_ip_version = constants.IP6_VERSION
6187 "software_version": constants.RELEASE_VERSION,
6188 "protocol_version": constants.PROTOCOL_VERSION,
6189 "config_version": constants.CONFIG_VERSION,
6190 "os_api_version": max(constants.OS_API_VERSIONS),
6191 "export_version": constants.EXPORT_VERSION,
6192 "architecture": runtime.GetArchInfo(),
6193 "name": cluster.cluster_name,
6194 "master": cluster.master_node,
6195 "default_hypervisor": cluster.primary_hypervisor,
6196 "enabled_hypervisors": cluster.enabled_hypervisors,
6197 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6198 for hypervisor_name in cluster.enabled_hypervisors]),
6200 "beparams": cluster.beparams,
6201 "osparams": cluster.osparams,
6202 "ipolicy": cluster.ipolicy,
6203 "nicparams": cluster.nicparams,
6204 "ndparams": cluster.ndparams,
6205 "diskparams": cluster.diskparams,
6206 "candidate_pool_size": cluster.candidate_pool_size,
6207 "master_netdev": cluster.master_netdev,
6208 "master_netmask": cluster.master_netmask,
6209 "use_external_mip_script": cluster.use_external_mip_script,
6210 "volume_group_name": cluster.volume_group_name,
6211 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6212 "file_storage_dir": cluster.file_storage_dir,
6213 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6214 "maintain_node_health": cluster.maintain_node_health,
6215 "ctime": cluster.ctime,
6216 "mtime": cluster.mtime,
6217 "uuid": cluster.uuid,
6218 "tags": list(cluster.GetTags()),
6219 "uid_pool": cluster.uid_pool,
6220 "default_iallocator": cluster.default_iallocator,
6221 "reserved_lvs": cluster.reserved_lvs,
6222 "primary_ip_version": primary_ip_version,
6223 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6224 "hidden_os": cluster.hidden_os,
6225 "blacklisted_os": cluster.blacklisted_os,
6231 class LUClusterConfigQuery(NoHooksLU):
6232 """Return configuration values.
6237 def CheckArguments(self):
6238 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6240 def ExpandNames(self):
6241 self.cq.ExpandNames(self)
6243 def DeclareLocks(self, level):
6244 self.cq.DeclareLocks(self, level)
6246 def Exec(self, feedback_fn):
6247 result = self.cq.OldStyleQuery(self)
6249 assert len(result) == 1
6254 class _ClusterQuery(_QueryBase):
6255 FIELDS = query.CLUSTER_FIELDS
6257 #: Do not sort (there is only one item)
6260 def ExpandNames(self, lu):
6261 lu.needed_locks = {}
6263 # The following variables interact with _QueryBase._GetNames
6264 self.wanted = locking.ALL_SET
6265 self.do_locking = self.use_locking
6268 raise errors.OpPrereqError("Can not use locking for cluster queries",
6271 def DeclareLocks(self, lu, level):
6274 def _GetQueryData(self, lu):
6275 """Computes the list of nodes and their attributes.
6278 # Locking is not used
6279 assert not (compat.any(lu.glm.is_owned(level)
6280 for level in locking.LEVELS
6281 if level != locking.LEVEL_CLUSTER) or
6282 self.do_locking or self.use_locking)
6284 if query.CQ_CONFIG in self.requested_data:
6285 cluster = lu.cfg.GetClusterInfo()
6287 cluster = NotImplemented
6289 if query.CQ_QUEUE_DRAINED in self.requested_data:
6290 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6292 drain_flag = NotImplemented
6294 if query.CQ_WATCHER_PAUSE in self.requested_data:
6295 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6297 watcher_pause = NotImplemented
6299 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6302 class LUInstanceActivateDisks(NoHooksLU):
6303 """Bring up an instance's disks.
6308 def ExpandNames(self):
6309 self._ExpandAndLockInstance()
6310 self.needed_locks[locking.LEVEL_NODE] = []
6311 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6313 def DeclareLocks(self, level):
6314 if level == locking.LEVEL_NODE:
6315 self._LockInstancesNodes()
6317 def CheckPrereq(self):
6318 """Check prerequisites.
6320 This checks that the instance is in the cluster.
6323 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6324 assert self.instance is not None, \
6325 "Cannot retrieve locked instance %s" % self.op.instance_name
6326 _CheckNodeOnline(self, self.instance.primary_node)
6328 def Exec(self, feedback_fn):
6329 """Activate the disks.
6332 disks_ok, disks_info = \
6333 _AssembleInstanceDisks(self, self.instance,
6334 ignore_size=self.op.ignore_size)
6336 raise errors.OpExecError("Cannot activate block devices")
6341 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6343 """Prepare the block devices for an instance.
6345 This sets up the block devices on all nodes.
6347 @type lu: L{LogicalUnit}
6348 @param lu: the logical unit on whose behalf we execute
6349 @type instance: L{objects.Instance}
6350 @param instance: the instance for whose disks we assemble
6351 @type disks: list of L{objects.Disk} or None
6352 @param disks: which disks to assemble (or all, if None)
6353 @type ignore_secondaries: boolean
6354 @param ignore_secondaries: if true, errors on secondary nodes
6355 won't result in an error return from the function
6356 @type ignore_size: boolean
6357 @param ignore_size: if true, the current known size of the disk
6358 will not be used during the disk activation, useful for cases
6359 when the size is wrong
6360 @return: False if the operation failed, otherwise a list of
6361 (host, instance_visible_name, node_visible_name)
6362 with the mapping from node devices to instance devices
6367 iname = instance.name
6368 disks = _ExpandCheckDisks(instance, disks)
6370 # With the two passes mechanism we try to reduce the window of
6371 # opportunity for the race condition of switching DRBD to primary
6372 # before handshaking occured, but we do not eliminate it
6374 # The proper fix would be to wait (with some limits) until the
6375 # connection has been made and drbd transitions from WFConnection
6376 # into any other network-connected state (Connected, SyncTarget,
6379 # 1st pass, assemble on all nodes in secondary mode
6380 for idx, inst_disk in enumerate(disks):
6381 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6383 node_disk = node_disk.Copy()
6384 node_disk.UnsetSize()
6385 lu.cfg.SetDiskID(node_disk, node)
6386 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6388 msg = result.fail_msg
6390 is_offline_secondary = (node in instance.secondary_nodes and
6392 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6393 " (is_primary=False, pass=1): %s",
6394 inst_disk.iv_name, node, msg)
6395 if not (ignore_secondaries or is_offline_secondary):
6398 # FIXME: race condition on drbd migration to primary
6400 # 2nd pass, do only the primary node
6401 for idx, inst_disk in enumerate(disks):
6404 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6405 if node != instance.primary_node:
6408 node_disk = node_disk.Copy()
6409 node_disk.UnsetSize()
6410 lu.cfg.SetDiskID(node_disk, node)
6411 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6413 msg = result.fail_msg
6415 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6416 " (is_primary=True, pass=2): %s",
6417 inst_disk.iv_name, node, msg)
6420 dev_path = result.payload
6422 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6424 # leave the disks configured for the primary node
6425 # this is a workaround that would be fixed better by
6426 # improving the logical/physical id handling
6428 lu.cfg.SetDiskID(disk, instance.primary_node)
6430 return disks_ok, device_info
6433 def _StartInstanceDisks(lu, instance, force):
6434 """Start the disks of an instance.
6437 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6438 ignore_secondaries=force)
6440 _ShutdownInstanceDisks(lu, instance)
6441 if force is not None and not force:
6442 lu.proc.LogWarning("", hint="If the message above refers to a"
6444 " you can retry the operation using '--force'.")
6445 raise errors.OpExecError("Disk consistency error")
6448 class LUInstanceDeactivateDisks(NoHooksLU):
6449 """Shutdown an instance's disks.
6454 def ExpandNames(self):
6455 self._ExpandAndLockInstance()
6456 self.needed_locks[locking.LEVEL_NODE] = []
6457 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6459 def DeclareLocks(self, level):
6460 if level == locking.LEVEL_NODE:
6461 self._LockInstancesNodes()
6463 def CheckPrereq(self):
6464 """Check prerequisites.
6466 This checks that the instance is in the cluster.
6469 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6470 assert self.instance is not None, \
6471 "Cannot retrieve locked instance %s" % self.op.instance_name
6473 def Exec(self, feedback_fn):
6474 """Deactivate the disks
6477 instance = self.instance
6479 _ShutdownInstanceDisks(self, instance)
6481 _SafeShutdownInstanceDisks(self, instance)
6484 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6485 """Shutdown block devices of an instance.
6487 This function checks if an instance is running, before calling
6488 _ShutdownInstanceDisks.
6491 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6492 _ShutdownInstanceDisks(lu, instance, disks=disks)
6495 def _ExpandCheckDisks(instance, disks):
6496 """Return the instance disks selected by the disks list
6498 @type disks: list of L{objects.Disk} or None
6499 @param disks: selected disks
6500 @rtype: list of L{objects.Disk}
6501 @return: selected instance disks to act on
6505 return instance.disks
6507 if not set(disks).issubset(instance.disks):
6508 raise errors.ProgrammerError("Can only act on disks belonging to the"
6513 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6514 """Shutdown block devices of an instance.
6516 This does the shutdown on all nodes of the instance.
6518 If the ignore_primary is false, errors on the primary node are
6523 disks = _ExpandCheckDisks(instance, disks)
6526 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6527 lu.cfg.SetDiskID(top_disk, node)
6528 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6529 msg = result.fail_msg
6531 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6532 disk.iv_name, node, msg)
6533 if ((node == instance.primary_node and not ignore_primary) or
6534 (node != instance.primary_node and not result.offline)):
6539 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6540 """Checks if a node has enough free memory.
6542 This function check if a given node has the needed amount of free
6543 memory. In case the node has less memory or we cannot get the
6544 information from the node, this function raise an OpPrereqError
6547 @type lu: C{LogicalUnit}
6548 @param lu: a logical unit from which we get configuration data
6550 @param node: the node to check
6551 @type reason: C{str}
6552 @param reason: string to use in the error message
6553 @type requested: C{int}
6554 @param requested: the amount of memory in MiB to check for
6555 @type hypervisor_name: C{str}
6556 @param hypervisor_name: the hypervisor to ask for memory stats
6558 @return: node current free memory
6559 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6560 we cannot check the node
6563 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6564 nodeinfo[node].Raise("Can't get data from node %s" % node,
6565 prereq=True, ecode=errors.ECODE_ENVIRON)
6566 (_, _, (hv_info, )) = nodeinfo[node].payload
6568 free_mem = hv_info.get("memory_free", None)
6569 if not isinstance(free_mem, int):
6570 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6571 " was '%s'" % (node, free_mem),
6572 errors.ECODE_ENVIRON)
6573 if requested > free_mem:
6574 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6575 " needed %s MiB, available %s MiB" %
6576 (node, reason, requested, free_mem),
6581 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6582 """Checks if nodes have enough free disk space in the all VGs.
6584 This function check if all given nodes have the needed amount of
6585 free disk. In case any node has less disk or we cannot get the
6586 information from the node, this function raise an OpPrereqError
6589 @type lu: C{LogicalUnit}
6590 @param lu: a logical unit from which we get configuration data
6591 @type nodenames: C{list}
6592 @param nodenames: the list of node names to check
6593 @type req_sizes: C{dict}
6594 @param req_sizes: the hash of vg and corresponding amount of disk in
6596 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6597 or we cannot check the node
6600 for vg, req_size in req_sizes.items():
6601 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6604 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6605 """Checks if nodes have enough free disk space in the specified VG.
6607 This function check if all given nodes have the needed amount of
6608 free disk. In case any node has less disk or we cannot get the
6609 information from the node, this function raise an OpPrereqError
6612 @type lu: C{LogicalUnit}
6613 @param lu: a logical unit from which we get configuration data
6614 @type nodenames: C{list}
6615 @param nodenames: the list of node names to check
6617 @param vg: the volume group to check
6618 @type requested: C{int}
6619 @param requested: the amount of disk in MiB to check for
6620 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6621 or we cannot check the node
6624 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6625 for node in nodenames:
6626 info = nodeinfo[node]
6627 info.Raise("Cannot get current information from node %s" % node,
6628 prereq=True, ecode=errors.ECODE_ENVIRON)
6629 (_, (vg_info, ), _) = info.payload
6630 vg_free = vg_info.get("vg_free", None)
6631 if not isinstance(vg_free, int):
6632 raise errors.OpPrereqError("Can't compute free disk space on node"
6633 " %s for vg %s, result was '%s'" %
6634 (node, vg, vg_free), errors.ECODE_ENVIRON)
6635 if requested > vg_free:
6636 raise errors.OpPrereqError("Not enough disk space on target node %s"
6637 " vg %s: required %d MiB, available %d MiB" %
6638 (node, vg, requested, vg_free),
6642 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6643 """Checks if nodes have enough physical CPUs
6645 This function checks if all given nodes have the needed number of
6646 physical CPUs. In case any node has less CPUs or we cannot get the
6647 information from the node, this function raises an OpPrereqError
6650 @type lu: C{LogicalUnit}
6651 @param lu: a logical unit from which we get configuration data
6652 @type nodenames: C{list}
6653 @param nodenames: the list of node names to check
6654 @type requested: C{int}
6655 @param requested: the minimum acceptable number of physical CPUs
6656 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6657 or we cannot check the node
6660 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6661 for node in nodenames:
6662 info = nodeinfo[node]
6663 info.Raise("Cannot get current information from node %s" % node,
6664 prereq=True, ecode=errors.ECODE_ENVIRON)
6665 (_, _, (hv_info, )) = info.payload
6666 num_cpus = hv_info.get("cpu_total", None)
6667 if not isinstance(num_cpus, int):
6668 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6669 " on node %s, result was '%s'" %
6670 (node, num_cpus), errors.ECODE_ENVIRON)
6671 if requested > num_cpus:
6672 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6673 "required" % (node, num_cpus, requested),
6677 class LUInstanceStartup(LogicalUnit):
6678 """Starts an instance.
6681 HPATH = "instance-start"
6682 HTYPE = constants.HTYPE_INSTANCE
6685 def CheckArguments(self):
6687 if self.op.beparams:
6688 # fill the beparams dict
6689 objects.UpgradeBeParams(self.op.beparams)
6690 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6692 def ExpandNames(self):
6693 self._ExpandAndLockInstance()
6694 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6696 def DeclareLocks(self, level):
6697 if level == locking.LEVEL_NODE_RES:
6698 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6700 def BuildHooksEnv(self):
6703 This runs on master, primary and secondary nodes of the instance.
6707 "FORCE": self.op.force,
6710 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6714 def BuildHooksNodes(self):
6715 """Build hooks nodes.
6718 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6721 def CheckPrereq(self):
6722 """Check prerequisites.
6724 This checks that the instance is in the cluster.
6727 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6728 assert self.instance is not None, \
6729 "Cannot retrieve locked instance %s" % self.op.instance_name
6732 if self.op.hvparams:
6733 # check hypervisor parameter syntax (locally)
6734 cluster = self.cfg.GetClusterInfo()
6735 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6736 filled_hvp = cluster.FillHV(instance)
6737 filled_hvp.update(self.op.hvparams)
6738 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6739 hv_type.CheckParameterSyntax(filled_hvp)
6740 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6742 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6744 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6746 if self.primary_offline and self.op.ignore_offline_nodes:
6747 self.proc.LogWarning("Ignoring offline primary node")
6749 if self.op.hvparams or self.op.beparams:
6750 self.proc.LogWarning("Overridden parameters are ignored")
6752 _CheckNodeOnline(self, instance.primary_node)
6754 bep = self.cfg.GetClusterInfo().FillBE(instance)
6755 bep.update(self.op.beparams)
6757 # check bridges existence
6758 _CheckInstanceBridgesExist(self, instance)
6760 remote_info = self.rpc.call_instance_info(instance.primary_node,
6762 instance.hypervisor)
6763 remote_info.Raise("Error checking node %s" % instance.primary_node,
6764 prereq=True, ecode=errors.ECODE_ENVIRON)
6765 if not remote_info.payload: # not running already
6766 _CheckNodeFreeMemory(self, instance.primary_node,
6767 "starting instance %s" % instance.name,
6768 bep[constants.BE_MINMEM], instance.hypervisor)
6770 def Exec(self, feedback_fn):
6771 """Start the instance.
6774 instance = self.instance
6775 force = self.op.force
6777 if not self.op.no_remember:
6778 self.cfg.MarkInstanceUp(instance.name)
6780 if self.primary_offline:
6781 assert self.op.ignore_offline_nodes
6782 self.proc.LogInfo("Primary node offline, marked instance as started")
6784 node_current = instance.primary_node
6786 _StartInstanceDisks(self, instance, force)
6789 self.rpc.call_instance_start(node_current,
6790 (instance, self.op.hvparams,
6792 self.op.startup_paused)
6793 msg = result.fail_msg
6795 _ShutdownInstanceDisks(self, instance)
6796 raise errors.OpExecError("Could not start instance: %s" % msg)
6799 class LUInstanceReboot(LogicalUnit):
6800 """Reboot an instance.
6803 HPATH = "instance-reboot"
6804 HTYPE = constants.HTYPE_INSTANCE
6807 def ExpandNames(self):
6808 self._ExpandAndLockInstance()
6810 def BuildHooksEnv(self):
6813 This runs on master, primary and secondary nodes of the instance.
6817 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6818 "REBOOT_TYPE": self.op.reboot_type,
6819 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6822 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6826 def BuildHooksNodes(self):
6827 """Build hooks nodes.
6830 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6833 def CheckPrereq(self):
6834 """Check prerequisites.
6836 This checks that the instance is in the cluster.
6839 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6840 assert self.instance is not None, \
6841 "Cannot retrieve locked instance %s" % self.op.instance_name
6842 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6843 _CheckNodeOnline(self, instance.primary_node)
6845 # check bridges existence
6846 _CheckInstanceBridgesExist(self, instance)
6848 def Exec(self, feedback_fn):
6849 """Reboot the instance.
6852 instance = self.instance
6853 ignore_secondaries = self.op.ignore_secondaries
6854 reboot_type = self.op.reboot_type
6856 remote_info = self.rpc.call_instance_info(instance.primary_node,
6858 instance.hypervisor)
6859 remote_info.Raise("Error checking node %s" % instance.primary_node)
6860 instance_running = bool(remote_info.payload)
6862 node_current = instance.primary_node
6864 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6865 constants.INSTANCE_REBOOT_HARD]:
6866 for disk in instance.disks:
6867 self.cfg.SetDiskID(disk, node_current)
6868 result = self.rpc.call_instance_reboot(node_current, instance,
6870 self.op.shutdown_timeout)
6871 result.Raise("Could not reboot instance")
6873 if instance_running:
6874 result = self.rpc.call_instance_shutdown(node_current, instance,
6875 self.op.shutdown_timeout)
6876 result.Raise("Could not shutdown instance for full reboot")
6877 _ShutdownInstanceDisks(self, instance)
6879 self.LogInfo("Instance %s was already stopped, starting now",
6881 _StartInstanceDisks(self, instance, ignore_secondaries)
6882 result = self.rpc.call_instance_start(node_current,
6883 (instance, None, None), False)
6884 msg = result.fail_msg
6886 _ShutdownInstanceDisks(self, instance)
6887 raise errors.OpExecError("Could not start instance for"
6888 " full reboot: %s" % msg)
6890 self.cfg.MarkInstanceUp(instance.name)
6893 class LUInstanceShutdown(LogicalUnit):
6894 """Shutdown an instance.
6897 HPATH = "instance-stop"
6898 HTYPE = constants.HTYPE_INSTANCE
6901 def ExpandNames(self):
6902 self._ExpandAndLockInstance()
6904 def BuildHooksEnv(self):
6907 This runs on master, primary and secondary nodes of the instance.
6910 env = _BuildInstanceHookEnvByObject(self, self.instance)
6911 env["TIMEOUT"] = self.op.timeout
6914 def BuildHooksNodes(self):
6915 """Build hooks nodes.
6918 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6921 def CheckPrereq(self):
6922 """Check prerequisites.
6924 This checks that the instance is in the cluster.
6927 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6928 assert self.instance is not None, \
6929 "Cannot retrieve locked instance %s" % self.op.instance_name
6931 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6933 self.primary_offline = \
6934 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6936 if self.primary_offline and self.op.ignore_offline_nodes:
6937 self.proc.LogWarning("Ignoring offline primary node")
6939 _CheckNodeOnline(self, self.instance.primary_node)
6941 def Exec(self, feedback_fn):
6942 """Shutdown the instance.
6945 instance = self.instance
6946 node_current = instance.primary_node
6947 timeout = self.op.timeout
6949 if not self.op.no_remember:
6950 self.cfg.MarkInstanceDown(instance.name)
6952 if self.primary_offline:
6953 assert self.op.ignore_offline_nodes
6954 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6956 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6957 msg = result.fail_msg
6959 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6961 _ShutdownInstanceDisks(self, instance)
6964 class LUInstanceReinstall(LogicalUnit):
6965 """Reinstall an instance.
6968 HPATH = "instance-reinstall"
6969 HTYPE = constants.HTYPE_INSTANCE
6972 def ExpandNames(self):
6973 self._ExpandAndLockInstance()
6975 def BuildHooksEnv(self):
6978 This runs on master, primary and secondary nodes of the instance.
6981 return _BuildInstanceHookEnvByObject(self, self.instance)
6983 def BuildHooksNodes(self):
6984 """Build hooks nodes.
6987 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6990 def CheckPrereq(self):
6991 """Check prerequisites.
6993 This checks that the instance is in the cluster and is not running.
6996 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6997 assert instance is not None, \
6998 "Cannot retrieve locked instance %s" % self.op.instance_name
6999 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7000 " offline, cannot reinstall")
7002 if instance.disk_template == constants.DT_DISKLESS:
7003 raise errors.OpPrereqError("Instance '%s' has no disks" %
7004 self.op.instance_name,
7006 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7008 if self.op.os_type is not None:
7010 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7011 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7012 instance_os = self.op.os_type
7014 instance_os = instance.os
7016 nodelist = list(instance.all_nodes)
7018 if self.op.osparams:
7019 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7020 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7021 self.os_inst = i_osdict # the new dict (without defaults)
7025 self.instance = instance
7027 def Exec(self, feedback_fn):
7028 """Reinstall the instance.
7031 inst = self.instance
7033 if self.op.os_type is not None:
7034 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7035 inst.os = self.op.os_type
7036 # Write to configuration
7037 self.cfg.Update(inst, feedback_fn)
7039 _StartInstanceDisks(self, inst, None)
7041 feedback_fn("Running the instance OS create scripts...")
7042 # FIXME: pass debug option from opcode to backend
7043 result = self.rpc.call_instance_os_add(inst.primary_node,
7044 (inst, self.os_inst), True,
7045 self.op.debug_level)
7046 result.Raise("Could not install OS for instance %s on node %s" %
7047 (inst.name, inst.primary_node))
7049 _ShutdownInstanceDisks(self, inst)
7052 class LUInstanceRecreateDisks(LogicalUnit):
7053 """Recreate an instance's missing disks.
7056 HPATH = "instance-recreate-disks"
7057 HTYPE = constants.HTYPE_INSTANCE
7060 _MODIFYABLE = frozenset([
7061 constants.IDISK_SIZE,
7062 constants.IDISK_MODE,
7065 # New or changed disk parameters may have different semantics
7066 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7067 constants.IDISK_ADOPT,
7069 # TODO: Implement support changing VG while recreating
7071 constants.IDISK_METAVG,
7074 def CheckArguments(self):
7075 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7076 # Normalize and convert deprecated list of disk indices
7077 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7079 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7081 raise errors.OpPrereqError("Some disks have been specified more than"
7082 " once: %s" % utils.CommaJoin(duplicates),
7085 for (idx, params) in self.op.disks:
7086 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7087 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7089 raise errors.OpPrereqError("Parameters for disk %s try to change"
7090 " unmodifyable parameter(s): %s" %
7091 (idx, utils.CommaJoin(unsupported)),
7094 def ExpandNames(self):
7095 self._ExpandAndLockInstance()
7096 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7098 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7099 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7101 self.needed_locks[locking.LEVEL_NODE] = []
7102 self.needed_locks[locking.LEVEL_NODE_RES] = []
7104 def DeclareLocks(self, level):
7105 if level == locking.LEVEL_NODE:
7106 # if we replace the nodes, we only need to lock the old primary,
7107 # otherwise we need to lock all nodes for disk re-creation
7108 primary_only = bool(self.op.nodes)
7109 self._LockInstancesNodes(primary_only=primary_only)
7110 elif level == locking.LEVEL_NODE_RES:
7112 self.needed_locks[locking.LEVEL_NODE_RES] = \
7113 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7115 def BuildHooksEnv(self):
7118 This runs on master, primary and secondary nodes of the instance.
7121 return _BuildInstanceHookEnvByObject(self, self.instance)
7123 def BuildHooksNodes(self):
7124 """Build hooks nodes.
7127 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7130 def CheckPrereq(self):
7131 """Check prerequisites.
7133 This checks that the instance is in the cluster and is not running.
7136 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7137 assert instance is not None, \
7138 "Cannot retrieve locked instance %s" % self.op.instance_name
7140 if len(self.op.nodes) != len(instance.all_nodes):
7141 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7142 " %d replacement nodes were specified" %
7143 (instance.name, len(instance.all_nodes),
7144 len(self.op.nodes)),
7146 assert instance.disk_template != constants.DT_DRBD8 or \
7147 len(self.op.nodes) == 2
7148 assert instance.disk_template != constants.DT_PLAIN or \
7149 len(self.op.nodes) == 1
7150 primary_node = self.op.nodes[0]
7152 primary_node = instance.primary_node
7153 _CheckNodeOnline(self, primary_node)
7155 if instance.disk_template == constants.DT_DISKLESS:
7156 raise errors.OpPrereqError("Instance '%s' has no disks" %
7157 self.op.instance_name, errors.ECODE_INVAL)
7159 # if we replace nodes *and* the old primary is offline, we don't
7161 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7162 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7163 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7164 if not (self.op.nodes and old_pnode.offline):
7165 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7166 msg="cannot recreate disks")
7169 self.disks = dict(self.op.disks)
7171 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7173 maxidx = max(self.disks.keys())
7174 if maxidx >= len(instance.disks):
7175 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7178 if (self.op.nodes and
7179 sorted(self.disks.keys()) != range(len(instance.disks))):
7180 raise errors.OpPrereqError("Can't recreate disks partially and"
7181 " change the nodes at the same time",
7184 self.instance = instance
7186 def Exec(self, feedback_fn):
7187 """Recreate the disks.
7190 instance = self.instance
7192 assert (self.owned_locks(locking.LEVEL_NODE) ==
7193 self.owned_locks(locking.LEVEL_NODE_RES))
7196 mods = [] # keeps track of needed changes
7198 for idx, disk in enumerate(instance.disks):
7200 changes = self.disks[idx]
7202 # Disk should not be recreated
7206 # update secondaries for disks, if needed
7207 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7208 # need to update the nodes and minors
7209 assert len(self.op.nodes) == 2
7210 assert len(disk.logical_id) == 6 # otherwise disk internals
7212 (_, _, old_port, _, _, old_secret) = disk.logical_id
7213 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7214 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7215 new_minors[0], new_minors[1], old_secret)
7216 assert len(disk.logical_id) == len(new_id)
7220 mods.append((idx, new_id, changes))
7222 # now that we have passed all asserts above, we can apply the mods
7223 # in a single run (to avoid partial changes)
7224 for idx, new_id, changes in mods:
7225 disk = instance.disks[idx]
7226 if new_id is not None:
7227 assert disk.dev_type == constants.LD_DRBD8
7228 disk.logical_id = new_id
7230 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7231 mode=changes.get(constants.IDISK_MODE, None))
7233 # change primary node, if needed
7235 instance.primary_node = self.op.nodes[0]
7236 self.LogWarning("Changing the instance's nodes, you will have to"
7237 " remove any disks left on the older nodes manually")
7240 self.cfg.Update(instance, feedback_fn)
7242 _CreateDisks(self, instance, to_skip=to_skip)
7245 class LUInstanceRename(LogicalUnit):
7246 """Rename an instance.
7249 HPATH = "instance-rename"
7250 HTYPE = constants.HTYPE_INSTANCE
7252 def CheckArguments(self):
7256 if self.op.ip_check and not self.op.name_check:
7257 # TODO: make the ip check more flexible and not depend on the name check
7258 raise errors.OpPrereqError("IP address check requires a name check",
7261 def BuildHooksEnv(self):
7264 This runs on master, primary and secondary nodes of the instance.
7267 env = _BuildInstanceHookEnvByObject(self, self.instance)
7268 env["INSTANCE_NEW_NAME"] = self.op.new_name
7271 def BuildHooksNodes(self):
7272 """Build hooks nodes.
7275 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7278 def CheckPrereq(self):
7279 """Check prerequisites.
7281 This checks that the instance is in the cluster and is not running.
7284 self.op.instance_name = _ExpandInstanceName(self.cfg,
7285 self.op.instance_name)
7286 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7287 assert instance is not None
7288 _CheckNodeOnline(self, instance.primary_node)
7289 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7290 msg="cannot rename")
7291 self.instance = instance
7293 new_name = self.op.new_name
7294 if self.op.name_check:
7295 hostname = netutils.GetHostname(name=new_name)
7296 if hostname.name != new_name:
7297 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7299 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7300 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7301 " same as given hostname '%s'") %
7302 (hostname.name, self.op.new_name),
7304 new_name = self.op.new_name = hostname.name
7305 if (self.op.ip_check and
7306 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7307 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7308 (hostname.ip, new_name),
7309 errors.ECODE_NOTUNIQUE)
7311 instance_list = self.cfg.GetInstanceList()
7312 if new_name in instance_list and new_name != instance.name:
7313 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7314 new_name, errors.ECODE_EXISTS)
7316 def Exec(self, feedback_fn):
7317 """Rename the instance.
7320 inst = self.instance
7321 old_name = inst.name
7323 rename_file_storage = False
7324 if (inst.disk_template in constants.DTS_FILEBASED and
7325 self.op.new_name != inst.name):
7326 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7327 rename_file_storage = True
7329 self.cfg.RenameInstance(inst.name, self.op.new_name)
7330 # Change the instance lock. This is definitely safe while we hold the BGL.
7331 # Otherwise the new lock would have to be added in acquired mode.
7333 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7334 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7336 # re-read the instance from the configuration after rename
7337 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7339 if rename_file_storage:
7340 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7341 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7342 old_file_storage_dir,
7343 new_file_storage_dir)
7344 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7345 " (but the instance has been renamed in Ganeti)" %
7346 (inst.primary_node, old_file_storage_dir,
7347 new_file_storage_dir))
7349 _StartInstanceDisks(self, inst, None)
7351 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7352 old_name, self.op.debug_level)
7353 msg = result.fail_msg
7355 msg = ("Could not run OS rename script for instance %s on node %s"
7356 " (but the instance has been renamed in Ganeti): %s" %
7357 (inst.name, inst.primary_node, msg))
7358 self.proc.LogWarning(msg)
7360 _ShutdownInstanceDisks(self, inst)
7365 class LUInstanceRemove(LogicalUnit):
7366 """Remove an instance.
7369 HPATH = "instance-remove"
7370 HTYPE = constants.HTYPE_INSTANCE
7373 def ExpandNames(self):
7374 self._ExpandAndLockInstance()
7375 self.needed_locks[locking.LEVEL_NODE] = []
7376 self.needed_locks[locking.LEVEL_NODE_RES] = []
7377 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7379 def DeclareLocks(self, level):
7380 if level == locking.LEVEL_NODE:
7381 self._LockInstancesNodes()
7382 elif level == locking.LEVEL_NODE_RES:
7384 self.needed_locks[locking.LEVEL_NODE_RES] = \
7385 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7387 def BuildHooksEnv(self):
7390 This runs on master, primary and secondary nodes of the instance.
7393 env = _BuildInstanceHookEnvByObject(self, self.instance)
7394 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7397 def BuildHooksNodes(self):
7398 """Build hooks nodes.
7401 nl = [self.cfg.GetMasterNode()]
7402 nl_post = list(self.instance.all_nodes) + nl
7403 return (nl, nl_post)
7405 def CheckPrereq(self):
7406 """Check prerequisites.
7408 This checks that the instance is in the cluster.
7411 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7412 assert self.instance is not None, \
7413 "Cannot retrieve locked instance %s" % self.op.instance_name
7415 def Exec(self, feedback_fn):
7416 """Remove the instance.
7419 instance = self.instance
7420 logging.info("Shutting down instance %s on node %s",
7421 instance.name, instance.primary_node)
7423 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7424 self.op.shutdown_timeout)
7425 msg = result.fail_msg
7427 if self.op.ignore_failures:
7428 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7430 raise errors.OpExecError("Could not shutdown instance %s on"
7432 (instance.name, instance.primary_node, msg))
7434 assert (self.owned_locks(locking.LEVEL_NODE) ==
7435 self.owned_locks(locking.LEVEL_NODE_RES))
7436 assert not (set(instance.all_nodes) -
7437 self.owned_locks(locking.LEVEL_NODE)), \
7438 "Not owning correct locks"
7440 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7443 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7444 """Utility function to remove an instance.
7447 logging.info("Removing block devices for instance %s", instance.name)
7449 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7450 if not ignore_failures:
7451 raise errors.OpExecError("Can't remove instance's disks")
7452 feedback_fn("Warning: can't remove instance's disks")
7454 logging.info("Removing instance %s out of cluster config", instance.name)
7456 lu.cfg.RemoveInstance(instance.name)
7458 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7459 "Instance lock removal conflict"
7461 # Remove lock for the instance
7462 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7465 class LUInstanceQuery(NoHooksLU):
7466 """Logical unit for querying instances.
7469 # pylint: disable=W0142
7472 def CheckArguments(self):
7473 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7474 self.op.output_fields, self.op.use_locking)
7476 def ExpandNames(self):
7477 self.iq.ExpandNames(self)
7479 def DeclareLocks(self, level):
7480 self.iq.DeclareLocks(self, level)
7482 def Exec(self, feedback_fn):
7483 return self.iq.OldStyleQuery(self)
7486 class LUInstanceFailover(LogicalUnit):
7487 """Failover an instance.
7490 HPATH = "instance-failover"
7491 HTYPE = constants.HTYPE_INSTANCE
7494 def CheckArguments(self):
7495 """Check the arguments.
7498 self.iallocator = getattr(self.op, "iallocator", None)
7499 self.target_node = getattr(self.op, "target_node", None)
7501 def ExpandNames(self):
7502 self._ExpandAndLockInstance()
7504 if self.op.target_node is not None:
7505 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7507 self.needed_locks[locking.LEVEL_NODE] = []
7508 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7510 self.needed_locks[locking.LEVEL_NODE_RES] = []
7511 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7513 ignore_consistency = self.op.ignore_consistency
7514 shutdown_timeout = self.op.shutdown_timeout
7515 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7518 ignore_consistency=ignore_consistency,
7519 shutdown_timeout=shutdown_timeout,
7520 ignore_ipolicy=self.op.ignore_ipolicy)
7521 self.tasklets = [self._migrater]
7523 def DeclareLocks(self, level):
7524 if level == locking.LEVEL_NODE:
7525 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7526 if instance.disk_template in constants.DTS_EXT_MIRROR:
7527 if self.op.target_node is None:
7528 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7530 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7531 self.op.target_node]
7532 del self.recalculate_locks[locking.LEVEL_NODE]
7534 self._LockInstancesNodes()
7535 elif level == locking.LEVEL_NODE_RES:
7537 self.needed_locks[locking.LEVEL_NODE_RES] = \
7538 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7540 def BuildHooksEnv(self):
7543 This runs on master, primary and secondary nodes of the instance.
7546 instance = self._migrater.instance
7547 source_node = instance.primary_node
7548 target_node = self.op.target_node
7550 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7551 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7552 "OLD_PRIMARY": source_node,
7553 "NEW_PRIMARY": target_node,
7556 if instance.disk_template in constants.DTS_INT_MIRROR:
7557 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7558 env["NEW_SECONDARY"] = source_node
7560 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7562 env.update(_BuildInstanceHookEnvByObject(self, instance))
7566 def BuildHooksNodes(self):
7567 """Build hooks nodes.
7570 instance = self._migrater.instance
7571 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7572 return (nl, nl + [instance.primary_node])
7575 class LUInstanceMigrate(LogicalUnit):
7576 """Migrate an instance.
7578 This is migration without shutting down, compared to the failover,
7579 which is done with shutdown.
7582 HPATH = "instance-migrate"
7583 HTYPE = constants.HTYPE_INSTANCE
7586 def ExpandNames(self):
7587 self._ExpandAndLockInstance()
7589 if self.op.target_node is not None:
7590 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7592 self.needed_locks[locking.LEVEL_NODE] = []
7593 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7595 self.needed_locks[locking.LEVEL_NODE] = []
7596 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7599 TLMigrateInstance(self, self.op.instance_name,
7600 cleanup=self.op.cleanup,
7602 fallback=self.op.allow_failover,
7603 allow_runtime_changes=self.op.allow_runtime_changes,
7604 ignore_ipolicy=self.op.ignore_ipolicy)
7605 self.tasklets = [self._migrater]
7607 def DeclareLocks(self, level):
7608 if level == locking.LEVEL_NODE:
7609 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7610 if instance.disk_template in constants.DTS_EXT_MIRROR:
7611 if self.op.target_node is None:
7612 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7614 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7615 self.op.target_node]
7616 del self.recalculate_locks[locking.LEVEL_NODE]
7618 self._LockInstancesNodes()
7619 elif level == locking.LEVEL_NODE_RES:
7621 self.needed_locks[locking.LEVEL_NODE_RES] = \
7622 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7624 def BuildHooksEnv(self):
7627 This runs on master, primary and secondary nodes of the instance.
7630 instance = self._migrater.instance
7631 source_node = instance.primary_node
7632 target_node = self.op.target_node
7633 env = _BuildInstanceHookEnvByObject(self, instance)
7635 "MIGRATE_LIVE": self._migrater.live,
7636 "MIGRATE_CLEANUP": self.op.cleanup,
7637 "OLD_PRIMARY": source_node,
7638 "NEW_PRIMARY": target_node,
7639 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7642 if instance.disk_template in constants.DTS_INT_MIRROR:
7643 env["OLD_SECONDARY"] = target_node
7644 env["NEW_SECONDARY"] = source_node
7646 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7650 def BuildHooksNodes(self):
7651 """Build hooks nodes.
7654 instance = self._migrater.instance
7655 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7656 return (nl, nl + [instance.primary_node])
7659 class LUInstanceMove(LogicalUnit):
7660 """Move an instance by data-copying.
7663 HPATH = "instance-move"
7664 HTYPE = constants.HTYPE_INSTANCE
7667 def ExpandNames(self):
7668 self._ExpandAndLockInstance()
7669 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7670 self.op.target_node = target_node
7671 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7672 self.needed_locks[locking.LEVEL_NODE_RES] = []
7673 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7675 def DeclareLocks(self, level):
7676 if level == locking.LEVEL_NODE:
7677 self._LockInstancesNodes(primary_only=True)
7678 elif level == locking.LEVEL_NODE_RES:
7680 self.needed_locks[locking.LEVEL_NODE_RES] = \
7681 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7683 def BuildHooksEnv(self):
7686 This runs on master, primary and secondary nodes of the instance.
7690 "TARGET_NODE": self.op.target_node,
7691 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7693 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7696 def BuildHooksNodes(self):
7697 """Build hooks nodes.
7701 self.cfg.GetMasterNode(),
7702 self.instance.primary_node,
7703 self.op.target_node,
7707 def CheckPrereq(self):
7708 """Check prerequisites.
7710 This checks that the instance is in the cluster.
7713 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7714 assert self.instance is not None, \
7715 "Cannot retrieve locked instance %s" % self.op.instance_name
7717 node = self.cfg.GetNodeInfo(self.op.target_node)
7718 assert node is not None, \
7719 "Cannot retrieve locked node %s" % self.op.target_node
7721 self.target_node = target_node = node.name
7723 if target_node == instance.primary_node:
7724 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7725 (instance.name, target_node),
7728 bep = self.cfg.GetClusterInfo().FillBE(instance)
7730 for idx, dsk in enumerate(instance.disks):
7731 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7732 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7733 " cannot copy" % idx, errors.ECODE_STATE)
7735 _CheckNodeOnline(self, target_node)
7736 _CheckNodeNotDrained(self, target_node)
7737 _CheckNodeVmCapable(self, target_node)
7738 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7739 self.cfg.GetNodeGroup(node.group))
7740 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7741 ignore=self.op.ignore_ipolicy)
7743 if instance.admin_state == constants.ADMINST_UP:
7744 # check memory requirements on the secondary node
7745 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7746 instance.name, bep[constants.BE_MAXMEM],
7747 instance.hypervisor)
7749 self.LogInfo("Not checking memory on the secondary node as"
7750 " instance will not be started")
7752 # check bridge existance
7753 _CheckInstanceBridgesExist(self, instance, node=target_node)
7755 def Exec(self, feedback_fn):
7756 """Move an instance.
7758 The move is done by shutting it down on its present node, copying
7759 the data over (slow) and starting it on the new node.
7762 instance = self.instance
7764 source_node = instance.primary_node
7765 target_node = self.target_node
7767 self.LogInfo("Shutting down instance %s on source node %s",
7768 instance.name, source_node)
7770 assert (self.owned_locks(locking.LEVEL_NODE) ==
7771 self.owned_locks(locking.LEVEL_NODE_RES))
7773 result = self.rpc.call_instance_shutdown(source_node, instance,
7774 self.op.shutdown_timeout)
7775 msg = result.fail_msg
7777 if self.op.ignore_consistency:
7778 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7779 " Proceeding anyway. Please make sure node"
7780 " %s is down. Error details: %s",
7781 instance.name, source_node, source_node, msg)
7783 raise errors.OpExecError("Could not shutdown instance %s on"
7785 (instance.name, source_node, msg))
7787 # create the target disks
7789 _CreateDisks(self, instance, target_node=target_node)
7790 except errors.OpExecError:
7791 self.LogWarning("Device creation failed, reverting...")
7793 _RemoveDisks(self, instance, target_node=target_node)
7795 self.cfg.ReleaseDRBDMinors(instance.name)
7798 cluster_name = self.cfg.GetClusterInfo().cluster_name
7801 # activate, get path, copy the data over
7802 for idx, disk in enumerate(instance.disks):
7803 self.LogInfo("Copying data for disk %d", idx)
7804 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7805 instance.name, True, idx)
7807 self.LogWarning("Can't assemble newly created disk %d: %s",
7808 idx, result.fail_msg)
7809 errs.append(result.fail_msg)
7811 dev_path = result.payload
7812 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7813 target_node, dev_path,
7816 self.LogWarning("Can't copy data over for disk %d: %s",
7817 idx, result.fail_msg)
7818 errs.append(result.fail_msg)
7822 self.LogWarning("Some disks failed to copy, aborting")
7824 _RemoveDisks(self, instance, target_node=target_node)
7826 self.cfg.ReleaseDRBDMinors(instance.name)
7827 raise errors.OpExecError("Errors during disk copy: %s" %
7830 instance.primary_node = target_node
7831 self.cfg.Update(instance, feedback_fn)
7833 self.LogInfo("Removing the disks on the original node")
7834 _RemoveDisks(self, instance, target_node=source_node)
7836 # Only start the instance if it's marked as up
7837 if instance.admin_state == constants.ADMINST_UP:
7838 self.LogInfo("Starting instance %s on node %s",
7839 instance.name, target_node)
7841 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7842 ignore_secondaries=True)
7844 _ShutdownInstanceDisks(self, instance)
7845 raise errors.OpExecError("Can't activate the instance's disks")
7847 result = self.rpc.call_instance_start(target_node,
7848 (instance, None, None), False)
7849 msg = result.fail_msg
7851 _ShutdownInstanceDisks(self, instance)
7852 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7853 (instance.name, target_node, msg))
7856 class LUNodeMigrate(LogicalUnit):
7857 """Migrate all instances from a node.
7860 HPATH = "node-migrate"
7861 HTYPE = constants.HTYPE_NODE
7864 def CheckArguments(self):
7867 def ExpandNames(self):
7868 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7870 self.share_locks = _ShareAll()
7871 self.needed_locks = {
7872 locking.LEVEL_NODE: [self.op.node_name],
7875 def BuildHooksEnv(self):
7878 This runs on the master, the primary and all the secondaries.
7882 "NODE_NAME": self.op.node_name,
7883 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7886 def BuildHooksNodes(self):
7887 """Build hooks nodes.
7890 nl = [self.cfg.GetMasterNode()]
7893 def CheckPrereq(self):
7896 def Exec(self, feedback_fn):
7897 # Prepare jobs for migration instances
7898 allow_runtime_changes = self.op.allow_runtime_changes
7900 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7903 iallocator=self.op.iallocator,
7904 target_node=self.op.target_node,
7905 allow_runtime_changes=allow_runtime_changes,
7906 ignore_ipolicy=self.op.ignore_ipolicy)]
7907 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7910 # TODO: Run iallocator in this opcode and pass correct placement options to
7911 # OpInstanceMigrate. Since other jobs can modify the cluster between
7912 # running the iallocator and the actual migration, a good consistency model
7913 # will have to be found.
7915 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7916 frozenset([self.op.node_name]))
7918 return ResultWithJobs(jobs)
7921 class TLMigrateInstance(Tasklet):
7922 """Tasklet class for instance migration.
7925 @ivar live: whether the migration will be done live or non-live;
7926 this variable is initalized only after CheckPrereq has run
7927 @type cleanup: boolean
7928 @ivar cleanup: Wheater we cleanup from a failed migration
7929 @type iallocator: string
7930 @ivar iallocator: The iallocator used to determine target_node
7931 @type target_node: string
7932 @ivar target_node: If given, the target_node to reallocate the instance to
7933 @type failover: boolean
7934 @ivar failover: Whether operation results in failover or migration
7935 @type fallback: boolean
7936 @ivar fallback: Whether fallback to failover is allowed if migration not
7938 @type ignore_consistency: boolean
7939 @ivar ignore_consistency: Wheter we should ignore consistency between source
7941 @type shutdown_timeout: int
7942 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7943 @type ignore_ipolicy: bool
7944 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7949 _MIGRATION_POLL_INTERVAL = 1 # seconds
7950 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7952 def __init__(self, lu, instance_name, cleanup=False,
7953 failover=False, fallback=False,
7954 ignore_consistency=False,
7955 allow_runtime_changes=True,
7956 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7957 ignore_ipolicy=False):
7958 """Initializes this class.
7961 Tasklet.__init__(self, lu)
7964 self.instance_name = instance_name
7965 self.cleanup = cleanup
7966 self.live = False # will be overridden later
7967 self.failover = failover
7968 self.fallback = fallback
7969 self.ignore_consistency = ignore_consistency
7970 self.shutdown_timeout = shutdown_timeout
7971 self.ignore_ipolicy = ignore_ipolicy
7972 self.allow_runtime_changes = allow_runtime_changes
7974 def CheckPrereq(self):
7975 """Check prerequisites.
7977 This checks that the instance is in the cluster.
7980 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7981 instance = self.cfg.GetInstanceInfo(instance_name)
7982 assert instance is not None
7983 self.instance = instance
7984 cluster = self.cfg.GetClusterInfo()
7986 if (not self.cleanup and
7987 not instance.admin_state == constants.ADMINST_UP and
7988 not self.failover and self.fallback):
7989 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7990 " switching to failover")
7991 self.failover = True
7993 if instance.disk_template not in constants.DTS_MIRRORED:
7998 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7999 " %s" % (instance.disk_template, text),
8002 if instance.disk_template in constants.DTS_EXT_MIRROR:
8003 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8005 if self.lu.op.iallocator:
8006 self._RunAllocator()
8008 # We set set self.target_node as it is required by
8010 self.target_node = self.lu.op.target_node
8012 # Check that the target node is correct in terms of instance policy
8013 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8014 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8015 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8016 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8017 ignore=self.ignore_ipolicy)
8019 # self.target_node is already populated, either directly or by the
8021 target_node = self.target_node
8022 if self.target_node == instance.primary_node:
8023 raise errors.OpPrereqError("Cannot migrate instance %s"
8024 " to its primary (%s)" %
8025 (instance.name, instance.primary_node))
8027 if len(self.lu.tasklets) == 1:
8028 # It is safe to release locks only when we're the only tasklet
8030 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8031 keep=[instance.primary_node, self.target_node])
8034 secondary_nodes = instance.secondary_nodes
8035 if not secondary_nodes:
8036 raise errors.ConfigurationError("No secondary node but using"
8037 " %s disk template" %
8038 instance.disk_template)
8039 target_node = secondary_nodes[0]
8040 if self.lu.op.iallocator or (self.lu.op.target_node and
8041 self.lu.op.target_node != target_node):
8043 text = "failed over"
8046 raise errors.OpPrereqError("Instances with disk template %s cannot"
8047 " be %s to arbitrary nodes"
8048 " (neither an iallocator nor a target"
8049 " node can be passed)" %
8050 (instance.disk_template, text),
8052 nodeinfo = self.cfg.GetNodeInfo(target_node)
8053 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8054 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8055 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8056 ignore=self.ignore_ipolicy)
8058 i_be = cluster.FillBE(instance)
8060 # check memory requirements on the secondary node
8061 if (not self.cleanup and
8062 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8063 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8064 "migrating instance %s" %
8066 i_be[constants.BE_MINMEM],
8067 instance.hypervisor)
8069 self.lu.LogInfo("Not checking memory on the secondary node as"
8070 " instance will not be started")
8072 # check if failover must be forced instead of migration
8073 if (not self.cleanup and not self.failover and
8074 i_be[constants.BE_ALWAYS_FAILOVER]):
8075 self.lu.LogInfo("Instance configured to always failover; fallback"
8077 self.failover = True
8079 # check bridge existance
8080 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8082 if not self.cleanup:
8083 _CheckNodeNotDrained(self.lu, target_node)
8084 if not self.failover:
8085 result = self.rpc.call_instance_migratable(instance.primary_node,
8087 if result.fail_msg and self.fallback:
8088 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8090 self.failover = True
8092 result.Raise("Can't migrate, please use failover",
8093 prereq=True, ecode=errors.ECODE_STATE)
8095 assert not (self.failover and self.cleanup)
8097 if not self.failover:
8098 if self.lu.op.live is not None and self.lu.op.mode is not None:
8099 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8100 " parameters are accepted",
8102 if self.lu.op.live is not None:
8104 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8106 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8107 # reset the 'live' parameter to None so that repeated
8108 # invocations of CheckPrereq do not raise an exception
8109 self.lu.op.live = None
8110 elif self.lu.op.mode is None:
8111 # read the default value from the hypervisor
8112 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8113 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8115 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8117 # Failover is never live
8120 if not (self.failover or self.cleanup):
8121 remote_info = self.rpc.call_instance_info(instance.primary_node,
8123 instance.hypervisor)
8124 remote_info.Raise("Error checking instance on node %s" %
8125 instance.primary_node)
8126 instance_running = bool(remote_info.payload)
8127 if instance_running:
8128 self.current_mem = int(remote_info.payload["memory"])
8130 def _RunAllocator(self):
8131 """Run the allocator based on input opcode.
8134 # FIXME: add a self.ignore_ipolicy option
8135 ial = IAllocator(self.cfg, self.rpc,
8136 mode=constants.IALLOCATOR_MODE_RELOC,
8137 name=self.instance_name,
8138 relocate_from=[self.instance.primary_node],
8141 ial.Run(self.lu.op.iallocator)
8144 raise errors.OpPrereqError("Can't compute nodes using"
8145 " iallocator '%s': %s" %
8146 (self.lu.op.iallocator, ial.info),
8148 if len(ial.result) != ial.required_nodes:
8149 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8150 " of nodes (%s), required %s" %
8151 (self.lu.op.iallocator, len(ial.result),
8152 ial.required_nodes), errors.ECODE_FAULT)
8153 self.target_node = ial.result[0]
8154 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8155 self.instance_name, self.lu.op.iallocator,
8156 utils.CommaJoin(ial.result))
8158 def _WaitUntilSync(self):
8159 """Poll with custom rpc for disk sync.
8161 This uses our own step-based rpc call.
8164 self.feedback_fn("* wait until resync is done")
8168 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8170 (self.instance.disks,
8173 for node, nres in result.items():
8174 nres.Raise("Cannot resync disks on node %s" % node)
8175 node_done, node_percent = nres.payload
8176 all_done = all_done and node_done
8177 if node_percent is not None:
8178 min_percent = min(min_percent, node_percent)
8180 if min_percent < 100:
8181 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8184 def _EnsureSecondary(self, node):
8185 """Demote a node to secondary.
8188 self.feedback_fn("* switching node %s to secondary mode" % node)
8190 for dev in self.instance.disks:
8191 self.cfg.SetDiskID(dev, node)
8193 result = self.rpc.call_blockdev_close(node, self.instance.name,
8194 self.instance.disks)
8195 result.Raise("Cannot change disk to secondary on node %s" % node)
8197 def _GoStandalone(self):
8198 """Disconnect from the network.
8201 self.feedback_fn("* changing into standalone mode")
8202 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8203 self.instance.disks)
8204 for node, nres in result.items():
8205 nres.Raise("Cannot disconnect disks node %s" % node)
8207 def _GoReconnect(self, multimaster):
8208 """Reconnect to the network.
8214 msg = "single-master"
8215 self.feedback_fn("* changing disks into %s mode" % msg)
8216 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8217 (self.instance.disks, self.instance),
8218 self.instance.name, multimaster)
8219 for node, nres in result.items():
8220 nres.Raise("Cannot change disks config on node %s" % node)
8222 def _ExecCleanup(self):
8223 """Try to cleanup after a failed migration.
8225 The cleanup is done by:
8226 - check that the instance is running only on one node
8227 (and update the config if needed)
8228 - change disks on its secondary node to secondary
8229 - wait until disks are fully synchronized
8230 - disconnect from the network
8231 - change disks into single-master mode
8232 - wait again until disks are fully synchronized
8235 instance = self.instance
8236 target_node = self.target_node
8237 source_node = self.source_node
8239 # check running on only one node
8240 self.feedback_fn("* checking where the instance actually runs"
8241 " (if this hangs, the hypervisor might be in"
8243 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8244 for node, result in ins_l.items():
8245 result.Raise("Can't contact node %s" % node)
8247 runningon_source = instance.name in ins_l[source_node].payload
8248 runningon_target = instance.name in ins_l[target_node].payload
8250 if runningon_source and runningon_target:
8251 raise errors.OpExecError("Instance seems to be running on two nodes,"
8252 " or the hypervisor is confused; you will have"
8253 " to ensure manually that it runs only on one"
8254 " and restart this operation")
8256 if not (runningon_source or runningon_target):
8257 raise errors.OpExecError("Instance does not seem to be running at all;"
8258 " in this case it's safer to repair by"
8259 " running 'gnt-instance stop' to ensure disk"
8260 " shutdown, and then restarting it")
8262 if runningon_target:
8263 # the migration has actually succeeded, we need to update the config
8264 self.feedback_fn("* instance running on secondary node (%s),"
8265 " updating config" % target_node)
8266 instance.primary_node = target_node
8267 self.cfg.Update(instance, self.feedback_fn)
8268 demoted_node = source_node
8270 self.feedback_fn("* instance confirmed to be running on its"
8271 " primary node (%s)" % source_node)
8272 demoted_node = target_node
8274 if instance.disk_template in constants.DTS_INT_MIRROR:
8275 self._EnsureSecondary(demoted_node)
8277 self._WaitUntilSync()
8278 except errors.OpExecError:
8279 # we ignore here errors, since if the device is standalone, it
8280 # won't be able to sync
8282 self._GoStandalone()
8283 self._GoReconnect(False)
8284 self._WaitUntilSync()
8286 self.feedback_fn("* done")
8288 def _RevertDiskStatus(self):
8289 """Try to revert the disk status after a failed migration.
8292 target_node = self.target_node
8293 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8297 self._EnsureSecondary(target_node)
8298 self._GoStandalone()
8299 self._GoReconnect(False)
8300 self._WaitUntilSync()
8301 except errors.OpExecError, err:
8302 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8303 " please try to recover the instance manually;"
8304 " error '%s'" % str(err))
8306 def _AbortMigration(self):
8307 """Call the hypervisor code to abort a started migration.
8310 instance = self.instance
8311 target_node = self.target_node
8312 source_node = self.source_node
8313 migration_info = self.migration_info
8315 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8319 abort_msg = abort_result.fail_msg
8321 logging.error("Aborting migration failed on target node %s: %s",
8322 target_node, abort_msg)
8323 # Don't raise an exception here, as we stil have to try to revert the
8324 # disk status, even if this step failed.
8326 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8327 instance, False, self.live)
8328 abort_msg = abort_result.fail_msg
8330 logging.error("Aborting migration failed on source node %s: %s",
8331 source_node, abort_msg)
8333 def _ExecMigration(self):
8334 """Migrate an instance.
8336 The migrate is done by:
8337 - change the disks into dual-master mode
8338 - wait until disks are fully synchronized again
8339 - migrate the instance
8340 - change disks on the new secondary node (the old primary) to secondary
8341 - wait until disks are fully synchronized
8342 - change disks into single-master mode
8345 instance = self.instance
8346 target_node = self.target_node
8347 source_node = self.source_node
8349 # Check for hypervisor version mismatch and warn the user.
8350 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8351 None, [self.instance.hypervisor])
8352 for ninfo in nodeinfo.values():
8353 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8355 (_, _, (src_info, )) = nodeinfo[source_node].payload
8356 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8358 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8359 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8360 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8361 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8362 if src_version != dst_version:
8363 self.feedback_fn("* warning: hypervisor version mismatch between"
8364 " source (%s) and target (%s) node" %
8365 (src_version, dst_version))
8367 self.feedback_fn("* checking disk consistency between source and target")
8368 for (idx, dev) in enumerate(instance.disks):
8369 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8370 raise errors.OpExecError("Disk %s is degraded or not fully"
8371 " synchronized on target node,"
8372 " aborting migration" % idx)
8374 if self.current_mem > self.tgt_free_mem:
8375 if not self.allow_runtime_changes:
8376 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8377 " free memory to fit instance %s on target"
8378 " node %s (have %dMB, need %dMB)" %
8379 (instance.name, target_node,
8380 self.tgt_free_mem, self.current_mem))
8381 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8382 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8385 rpcres.Raise("Cannot modify instance runtime memory")
8387 # First get the migration information from the remote node
8388 result = self.rpc.call_migration_info(source_node, instance)
8389 msg = result.fail_msg
8391 log_err = ("Failed fetching source migration information from %s: %s" %
8393 logging.error(log_err)
8394 raise errors.OpExecError(log_err)
8396 self.migration_info = migration_info = result.payload
8398 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8399 # Then switch the disks to master/master mode
8400 self._EnsureSecondary(target_node)
8401 self._GoStandalone()
8402 self._GoReconnect(True)
8403 self._WaitUntilSync()
8405 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8406 result = self.rpc.call_accept_instance(target_node,
8409 self.nodes_ip[target_node])
8411 msg = result.fail_msg
8413 logging.error("Instance pre-migration failed, trying to revert"
8414 " disk status: %s", msg)
8415 self.feedback_fn("Pre-migration failed, aborting")
8416 self._AbortMigration()
8417 self._RevertDiskStatus()
8418 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8419 (instance.name, msg))
8421 self.feedback_fn("* migrating instance to %s" % target_node)
8422 result = self.rpc.call_instance_migrate(source_node, instance,
8423 self.nodes_ip[target_node],
8425 msg = result.fail_msg
8427 logging.error("Instance migration failed, trying to revert"
8428 " disk status: %s", msg)
8429 self.feedback_fn("Migration failed, aborting")
8430 self._AbortMigration()
8431 self._RevertDiskStatus()
8432 raise errors.OpExecError("Could not migrate instance %s: %s" %
8433 (instance.name, msg))
8435 self.feedback_fn("* starting memory transfer")
8436 last_feedback = time.time()
8438 result = self.rpc.call_instance_get_migration_status(source_node,
8440 msg = result.fail_msg
8441 ms = result.payload # MigrationStatus instance
8442 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8443 logging.error("Instance migration failed, trying to revert"
8444 " disk status: %s", msg)
8445 self.feedback_fn("Migration failed, aborting")
8446 self._AbortMigration()
8447 self._RevertDiskStatus()
8448 raise errors.OpExecError("Could not migrate instance %s: %s" %
8449 (instance.name, msg))
8451 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8452 self.feedback_fn("* memory transfer complete")
8455 if (utils.TimeoutExpired(last_feedback,
8456 self._MIGRATION_FEEDBACK_INTERVAL) and
8457 ms.transferred_ram is not None):
8458 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8459 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8460 last_feedback = time.time()
8462 time.sleep(self._MIGRATION_POLL_INTERVAL)
8464 result = self.rpc.call_instance_finalize_migration_src(source_node,
8468 msg = result.fail_msg
8470 logging.error("Instance migration succeeded, but finalization failed"
8471 " on the source node: %s", msg)
8472 raise errors.OpExecError("Could not finalize instance migration: %s" %
8475 instance.primary_node = target_node
8477 # distribute new instance config to the other nodes
8478 self.cfg.Update(instance, self.feedback_fn)
8480 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8484 msg = result.fail_msg
8486 logging.error("Instance migration succeeded, but finalization failed"
8487 " on the target node: %s", msg)
8488 raise errors.OpExecError("Could not finalize instance migration: %s" %
8491 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8492 self._EnsureSecondary(source_node)
8493 self._WaitUntilSync()
8494 self._GoStandalone()
8495 self._GoReconnect(False)
8496 self._WaitUntilSync()
8498 # If the instance's disk template is `rbd' and there was a successful
8499 # migration, unmap the device from the source node.
8500 if self.instance.disk_template == constants.DT_RBD:
8501 disks = _ExpandCheckDisks(instance, instance.disks)
8502 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8504 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8505 msg = result.fail_msg
8507 logging.error("Migration was successful, but couldn't unmap the"
8508 " block device %s on source node %s: %s",
8509 disk.iv_name, source_node, msg)
8510 logging.error("You need to unmap the device %s manually on %s",
8511 disk.iv_name, source_node)
8513 self.feedback_fn("* done")
8515 def _ExecFailover(self):
8516 """Failover an instance.
8518 The failover is done by shutting it down on its present node and
8519 starting it on the secondary.
8522 instance = self.instance
8523 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8525 source_node = instance.primary_node
8526 target_node = self.target_node
8528 if instance.admin_state == constants.ADMINST_UP:
8529 self.feedback_fn("* checking disk consistency between source and target")
8530 for (idx, dev) in enumerate(instance.disks):
8531 # for drbd, these are drbd over lvm
8532 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8534 if primary_node.offline:
8535 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8537 (primary_node.name, idx, target_node))
8538 elif not self.ignore_consistency:
8539 raise errors.OpExecError("Disk %s is degraded on target node,"
8540 " aborting failover" % idx)
8542 self.feedback_fn("* not checking disk consistency as instance is not"
8545 self.feedback_fn("* shutting down instance on source node")
8546 logging.info("Shutting down instance %s on node %s",
8547 instance.name, source_node)
8549 result = self.rpc.call_instance_shutdown(source_node, instance,
8550 self.shutdown_timeout)
8551 msg = result.fail_msg
8553 if self.ignore_consistency or primary_node.offline:
8554 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8555 " proceeding anyway; please make sure node"
8556 " %s is down; error details: %s",
8557 instance.name, source_node, source_node, msg)
8559 raise errors.OpExecError("Could not shutdown instance %s on"
8561 (instance.name, source_node, msg))
8563 self.feedback_fn("* deactivating the instance's disks on source node")
8564 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8565 raise errors.OpExecError("Can't shut down the instance's disks")
8567 instance.primary_node = target_node
8568 # distribute new instance config to the other nodes
8569 self.cfg.Update(instance, self.feedback_fn)
8571 # Only start the instance if it's marked as up
8572 if instance.admin_state == constants.ADMINST_UP:
8573 self.feedback_fn("* activating the instance's disks on target node %s" %
8575 logging.info("Starting instance %s on node %s",
8576 instance.name, target_node)
8578 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8579 ignore_secondaries=True)
8581 _ShutdownInstanceDisks(self.lu, instance)
8582 raise errors.OpExecError("Can't activate the instance's disks")
8584 self.feedback_fn("* starting the instance on the target node %s" %
8586 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8588 msg = result.fail_msg
8590 _ShutdownInstanceDisks(self.lu, instance)
8591 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8592 (instance.name, target_node, msg))
8594 def Exec(self, feedback_fn):
8595 """Perform the migration.
8598 self.feedback_fn = feedback_fn
8599 self.source_node = self.instance.primary_node
8601 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8602 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8603 self.target_node = self.instance.secondary_nodes[0]
8604 # Otherwise self.target_node has been populated either
8605 # directly, or through an iallocator.
8607 self.all_nodes = [self.source_node, self.target_node]
8608 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8609 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8612 feedback_fn("Failover instance %s" % self.instance.name)
8613 self._ExecFailover()
8615 feedback_fn("Migrating instance %s" % self.instance.name)
8618 return self._ExecCleanup()
8620 return self._ExecMigration()
8623 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8625 """Wrapper around L{_CreateBlockDevInner}.
8627 This method annotates the root device first.
8630 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8631 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8635 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8637 """Create a tree of block devices on a given node.
8639 If this device type has to be created on secondaries, create it and
8642 If not, just recurse to children keeping the same 'force' value.
8644 @attention: The device has to be annotated already.
8646 @param lu: the lu on whose behalf we execute
8647 @param node: the node on which to create the device
8648 @type instance: L{objects.Instance}
8649 @param instance: the instance which owns the device
8650 @type device: L{objects.Disk}
8651 @param device: the device to create
8652 @type force_create: boolean
8653 @param force_create: whether to force creation of this device; this
8654 will be change to True whenever we find a device which has
8655 CreateOnSecondary() attribute
8656 @param info: the extra 'metadata' we should attach to the device
8657 (this will be represented as a LVM tag)
8658 @type force_open: boolean
8659 @param force_open: this parameter will be passes to the
8660 L{backend.BlockdevCreate} function where it specifies
8661 whether we run on primary or not, and it affects both
8662 the child assembly and the device own Open() execution
8665 if device.CreateOnSecondary():
8669 for child in device.children:
8670 _CreateBlockDevInner(lu, node, instance, child, force_create,
8673 if not force_create:
8676 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8679 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8680 """Create a single block device on a given node.
8682 This will not recurse over children of the device, so they must be
8685 @param lu: the lu on whose behalf we execute
8686 @param node: the node on which to create the device
8687 @type instance: L{objects.Instance}
8688 @param instance: the instance which owns the device
8689 @type device: L{objects.Disk}
8690 @param device: the device to create
8691 @param info: the extra 'metadata' we should attach to the device
8692 (this will be represented as a LVM tag)
8693 @type force_open: boolean
8694 @param force_open: this parameter will be passes to the
8695 L{backend.BlockdevCreate} function where it specifies
8696 whether we run on primary or not, and it affects both
8697 the child assembly and the device own Open() execution
8700 lu.cfg.SetDiskID(device, node)
8701 result = lu.rpc.call_blockdev_create(node, device, device.size,
8702 instance.name, force_open, info)
8703 result.Raise("Can't create block device %s on"
8704 " node %s for instance %s" % (device, node, instance.name))
8705 if device.physical_id is None:
8706 device.physical_id = result.payload
8709 def _GenerateUniqueNames(lu, exts):
8710 """Generate a suitable LV name.
8712 This will generate a logical volume name for the given instance.
8717 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8718 results.append("%s%s" % (new_id, val))
8722 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8723 iv_name, p_minor, s_minor):
8724 """Generate a drbd8 device complete with its children.
8727 assert len(vgnames) == len(names) == 2
8728 port = lu.cfg.AllocatePort()
8729 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8731 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8732 logical_id=(vgnames[0], names[0]),
8734 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8735 logical_id=(vgnames[1], names[1]),
8737 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8738 logical_id=(primary, secondary, port,
8741 children=[dev_data, dev_meta],
8742 iv_name=iv_name, params={})
8746 _DISK_TEMPLATE_NAME_PREFIX = {
8747 constants.DT_PLAIN: "",
8748 constants.DT_RBD: ".rbd",
8752 _DISK_TEMPLATE_DEVICE_TYPE = {
8753 constants.DT_PLAIN: constants.LD_LV,
8754 constants.DT_FILE: constants.LD_FILE,
8755 constants.DT_SHARED_FILE: constants.LD_FILE,
8756 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8757 constants.DT_RBD: constants.LD_RBD,
8761 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8762 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8763 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8764 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8765 """Generate the entire disk layout for a given template type.
8768 #TODO: compute space requirements
8770 vgname = lu.cfg.GetVGName()
8771 disk_count = len(disk_info)
8774 if template_name == constants.DT_DISKLESS:
8776 elif template_name == constants.DT_DRBD8:
8777 if len(secondary_nodes) != 1:
8778 raise errors.ProgrammerError("Wrong template configuration")
8779 remote_node = secondary_nodes[0]
8780 minors = lu.cfg.AllocateDRBDMinor(
8781 [primary_node, remote_node] * len(disk_info), instance_name)
8783 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8785 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8788 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8789 for i in range(disk_count)]):
8790 names.append(lv_prefix + "_data")
8791 names.append(lv_prefix + "_meta")
8792 for idx, disk in enumerate(disk_info):
8793 disk_index = idx + base_index
8794 data_vg = disk.get(constants.IDISK_VG, vgname)
8795 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8796 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8797 disk[constants.IDISK_SIZE],
8799 names[idx * 2:idx * 2 + 2],
8800 "disk/%d" % disk_index,
8801 minors[idx * 2], minors[idx * 2 + 1])
8802 disk_dev.mode = disk[constants.IDISK_MODE]
8803 disks.append(disk_dev)
8806 raise errors.ProgrammerError("Wrong template configuration")
8808 if template_name == constants.DT_FILE:
8810 elif template_name == constants.DT_SHARED_FILE:
8811 _req_shr_file_storage()
8813 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8814 if name_prefix is None:
8817 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8818 (name_prefix, base_index + i)
8819 for i in range(disk_count)])
8821 if template_name == constants.DT_PLAIN:
8822 def logical_id_fn(idx, _, disk):
8823 vg = disk.get(constants.IDISK_VG, vgname)
8824 return (vg, names[idx])
8825 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8827 lambda _, disk_index, disk: (file_driver,
8828 "%s/disk%d" % (file_storage_dir,
8830 elif template_name == constants.DT_BLOCK:
8832 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8833 disk[constants.IDISK_ADOPT])
8834 elif template_name == constants.DT_RBD:
8835 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8837 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8839 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8841 for idx, disk in enumerate(disk_info):
8842 disk_index = idx + base_index
8843 size = disk[constants.IDISK_SIZE]
8844 feedback_fn("* disk %s, size %s" %
8845 (disk_index, utils.FormatUnit(size, "h")))
8846 disks.append(objects.Disk(dev_type=dev_type, size=size,
8847 logical_id=logical_id_fn(idx, disk_index, disk),
8848 iv_name="disk/%d" % disk_index,
8849 mode=disk[constants.IDISK_MODE],
8855 def _GetInstanceInfoText(instance):
8856 """Compute that text that should be added to the disk's metadata.
8859 return "originstname+%s" % instance.name
8862 def _CalcEta(time_taken, written, total_size):
8863 """Calculates the ETA based on size written and total size.
8865 @param time_taken: The time taken so far
8866 @param written: amount written so far
8867 @param total_size: The total size of data to be written
8868 @return: The remaining time in seconds
8871 avg_time = time_taken / float(written)
8872 return (total_size - written) * avg_time
8875 def _WipeDisks(lu, instance):
8876 """Wipes instance disks.
8878 @type lu: L{LogicalUnit}
8879 @param lu: the logical unit on whose behalf we execute
8880 @type instance: L{objects.Instance}
8881 @param instance: the instance whose disks we should create
8882 @return: the success of the wipe
8885 node = instance.primary_node
8887 for device in instance.disks:
8888 lu.cfg.SetDiskID(device, node)
8890 logging.info("Pause sync of instance %s disks", instance.name)
8891 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8892 (instance.disks, instance),
8894 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8896 for idx, success in enumerate(result.payload):
8898 logging.warn("pause-sync of instance %s for disks %d failed",
8902 for idx, device in enumerate(instance.disks):
8903 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8904 # MAX_WIPE_CHUNK at max
8905 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8906 constants.MIN_WIPE_CHUNK_PERCENT)
8907 # we _must_ make this an int, otherwise rounding errors will
8909 wipe_chunk_size = int(wipe_chunk_size)
8911 lu.LogInfo("* Wiping disk %d", idx)
8912 logging.info("Wiping disk %d for instance %s, node %s using"
8913 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8918 start_time = time.time()
8920 while offset < size:
8921 wipe_size = min(wipe_chunk_size, size - offset)
8922 logging.debug("Wiping disk %d, offset %s, chunk %s",
8923 idx, offset, wipe_size)
8924 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8926 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8927 (idx, offset, wipe_size))
8930 if now - last_output >= 60:
8931 eta = _CalcEta(now - start_time, offset, size)
8932 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8933 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8936 logging.info("Resume sync of instance %s disks", instance.name)
8938 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8939 (instance.disks, instance),
8943 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8944 " please have a look at the status and troubleshoot"
8945 " the issue: %s", node, result.fail_msg)
8947 for idx, success in enumerate(result.payload):
8949 lu.LogWarning("Resume sync of disk %d failed, please have a"
8950 " look at the status and troubleshoot the issue", idx)
8951 logging.warn("resume-sync of instance %s for disks %d failed",
8955 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8956 """Create all disks for an instance.
8958 This abstracts away some work from AddInstance.
8960 @type lu: L{LogicalUnit}
8961 @param lu: the logical unit on whose behalf we execute
8962 @type instance: L{objects.Instance}
8963 @param instance: the instance whose disks we should create
8965 @param to_skip: list of indices to skip
8966 @type target_node: string
8967 @param target_node: if passed, overrides the target node for creation
8969 @return: the success of the creation
8972 info = _GetInstanceInfoText(instance)
8973 if target_node is None:
8974 pnode = instance.primary_node
8975 all_nodes = instance.all_nodes
8980 if instance.disk_template in constants.DTS_FILEBASED:
8981 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8982 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8984 result.Raise("Failed to create directory '%s' on"
8985 " node %s" % (file_storage_dir, pnode))
8987 # Note: this needs to be kept in sync with adding of disks in
8988 # LUInstanceSetParams
8989 for idx, device in enumerate(instance.disks):
8990 if to_skip and idx in to_skip:
8992 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8994 for node in all_nodes:
8995 f_create = node == pnode
8996 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8999 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9000 """Remove all disks for an instance.
9002 This abstracts away some work from `AddInstance()` and
9003 `RemoveInstance()`. Note that in case some of the devices couldn't
9004 be removed, the removal will continue with the other ones (compare
9005 with `_CreateDisks()`).
9007 @type lu: L{LogicalUnit}
9008 @param lu: the logical unit on whose behalf we execute
9009 @type instance: L{objects.Instance}
9010 @param instance: the instance whose disks we should remove
9011 @type target_node: string
9012 @param target_node: used to override the node on which to remove the disks
9014 @return: the success of the removal
9017 logging.info("Removing block devices for instance %s", instance.name)
9020 ports_to_release = set()
9021 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9022 for (idx, device) in enumerate(anno_disks):
9024 edata = [(target_node, device)]
9026 edata = device.ComputeNodeTree(instance.primary_node)
9027 for node, disk in edata:
9028 lu.cfg.SetDiskID(disk, node)
9029 result = lu.rpc.call_blockdev_remove(node, disk)
9031 lu.LogWarning("Could not remove disk %s on node %s,"
9032 " continuing anyway: %s", idx, node, result.fail_msg)
9033 if not (result.offline and node != instance.primary_node):
9036 # if this is a DRBD disk, return its port to the pool
9037 if device.dev_type in constants.LDS_DRBD:
9038 ports_to_release.add(device.logical_id[2])
9040 if all_result or ignore_failures:
9041 for port in ports_to_release:
9042 lu.cfg.AddTcpUdpPort(port)
9044 if instance.disk_template == constants.DT_FILE:
9045 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9049 tgt = instance.primary_node
9050 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9052 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9053 file_storage_dir, instance.primary_node, result.fail_msg)
9059 def _ComputeDiskSizePerVG(disk_template, disks):
9060 """Compute disk size requirements in the volume group
9063 def _compute(disks, payload):
9064 """Universal algorithm.
9069 vgs[disk[constants.IDISK_VG]] = \
9070 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9074 # Required free disk space as a function of disk and swap space
9076 constants.DT_DISKLESS: {},
9077 constants.DT_PLAIN: _compute(disks, 0),
9078 # 128 MB are added for drbd metadata for each disk
9079 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9080 constants.DT_FILE: {},
9081 constants.DT_SHARED_FILE: {},
9084 if disk_template not in req_size_dict:
9085 raise errors.ProgrammerError("Disk template '%s' size requirement"
9086 " is unknown" % disk_template)
9088 return req_size_dict[disk_template]
9091 def _ComputeDiskSize(disk_template, disks):
9092 """Compute disk size requirements according to disk template
9095 # Required free disk space as a function of disk and swap space
9097 constants.DT_DISKLESS: None,
9098 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9099 # 128 MB are added for drbd metadata for each disk
9101 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9102 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9103 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9104 constants.DT_BLOCK: 0,
9105 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9108 if disk_template not in req_size_dict:
9109 raise errors.ProgrammerError("Disk template '%s' size requirement"
9110 " is unknown" % disk_template)
9112 return req_size_dict[disk_template]
9115 def _FilterVmNodes(lu, nodenames):
9116 """Filters out non-vm_capable nodes from a list.
9118 @type lu: L{LogicalUnit}
9119 @param lu: the logical unit for which we check
9120 @type nodenames: list
9121 @param nodenames: the list of nodes on which we should check
9123 @return: the list of vm-capable nodes
9126 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9127 return [name for name in nodenames if name not in vm_nodes]
9130 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9131 """Hypervisor parameter validation.
9133 This function abstract the hypervisor parameter validation to be
9134 used in both instance create and instance modify.
9136 @type lu: L{LogicalUnit}
9137 @param lu: the logical unit for which we check
9138 @type nodenames: list
9139 @param nodenames: the list of nodes on which we should check
9140 @type hvname: string
9141 @param hvname: the name of the hypervisor we should use
9142 @type hvparams: dict
9143 @param hvparams: the parameters which we need to check
9144 @raise errors.OpPrereqError: if the parameters are not valid
9147 nodenames = _FilterVmNodes(lu, nodenames)
9149 cluster = lu.cfg.GetClusterInfo()
9150 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9152 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9153 for node in nodenames:
9157 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9160 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9161 """OS parameters validation.
9163 @type lu: L{LogicalUnit}
9164 @param lu: the logical unit for which we check
9165 @type required: boolean
9166 @param required: whether the validation should fail if the OS is not
9168 @type nodenames: list
9169 @param nodenames: the list of nodes on which we should check
9170 @type osname: string
9171 @param osname: the name of the hypervisor we should use
9172 @type osparams: dict
9173 @param osparams: the parameters which we need to check
9174 @raise errors.OpPrereqError: if the parameters are not valid
9177 nodenames = _FilterVmNodes(lu, nodenames)
9178 result = lu.rpc.call_os_validate(nodenames, required, osname,
9179 [constants.OS_VALIDATE_PARAMETERS],
9181 for node, nres in result.items():
9182 # we don't check for offline cases since this should be run only
9183 # against the master node and/or an instance's nodes
9184 nres.Raise("OS Parameters validation failed on node %s" % node)
9185 if not nres.payload:
9186 lu.LogInfo("OS %s not found on node %s, validation skipped",
9190 class LUInstanceCreate(LogicalUnit):
9191 """Create an instance.
9194 HPATH = "instance-add"
9195 HTYPE = constants.HTYPE_INSTANCE
9198 def CheckArguments(self):
9202 # do not require name_check to ease forward/backward compatibility
9204 if self.op.no_install and self.op.start:
9205 self.LogInfo("No-installation mode selected, disabling startup")
9206 self.op.start = False
9207 # validate/normalize the instance name
9208 self.op.instance_name = \
9209 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9211 if self.op.ip_check and not self.op.name_check:
9212 # TODO: make the ip check more flexible and not depend on the name check
9213 raise errors.OpPrereqError("Cannot do IP address check without a name"
9214 " check", errors.ECODE_INVAL)
9216 # check nics' parameter names
9217 for nic in self.op.nics:
9218 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9220 # check disks. parameter names and consistent adopt/no-adopt strategy
9221 has_adopt = has_no_adopt = False
9222 for disk in self.op.disks:
9223 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9224 if constants.IDISK_ADOPT in disk:
9228 if has_adopt and has_no_adopt:
9229 raise errors.OpPrereqError("Either all disks are adopted or none is",
9232 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9233 raise errors.OpPrereqError("Disk adoption is not supported for the"
9234 " '%s' disk template" %
9235 self.op.disk_template,
9237 if self.op.iallocator is not None:
9238 raise errors.OpPrereqError("Disk adoption not allowed with an"
9239 " iallocator script", errors.ECODE_INVAL)
9240 if self.op.mode == constants.INSTANCE_IMPORT:
9241 raise errors.OpPrereqError("Disk adoption not allowed for"
9242 " instance import", errors.ECODE_INVAL)
9244 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9245 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9246 " but no 'adopt' parameter given" %
9247 self.op.disk_template,
9250 self.adopt_disks = has_adopt
9252 # instance name verification
9253 if self.op.name_check:
9254 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9255 self.op.instance_name = self.hostname1.name
9256 # used in CheckPrereq for ip ping check
9257 self.check_ip = self.hostname1.ip
9259 self.check_ip = None
9261 # file storage checks
9262 if (self.op.file_driver and
9263 not self.op.file_driver in constants.FILE_DRIVER):
9264 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9265 self.op.file_driver, errors.ECODE_INVAL)
9267 if self.op.disk_template == constants.DT_FILE:
9268 opcodes.RequireFileStorage()
9269 elif self.op.disk_template == constants.DT_SHARED_FILE:
9270 opcodes.RequireSharedFileStorage()
9272 ### Node/iallocator related checks
9273 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9275 if self.op.pnode is not None:
9276 if self.op.disk_template in constants.DTS_INT_MIRROR:
9277 if self.op.snode is None:
9278 raise errors.OpPrereqError("The networked disk templates need"
9279 " a mirror node", errors.ECODE_INVAL)
9281 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9283 self.op.snode = None
9285 self._cds = _GetClusterDomainSecret()
9287 if self.op.mode == constants.INSTANCE_IMPORT:
9288 # On import force_variant must be True, because if we forced it at
9289 # initial install, our only chance when importing it back is that it
9291 self.op.force_variant = True
9293 if self.op.no_install:
9294 self.LogInfo("No-installation mode has no effect during import")
9296 elif self.op.mode == constants.INSTANCE_CREATE:
9297 if self.op.os_type is None:
9298 raise errors.OpPrereqError("No guest OS specified",
9300 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9301 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9302 " installation" % self.op.os_type,
9304 if self.op.disk_template is None:
9305 raise errors.OpPrereqError("No disk template specified",
9308 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9309 # Check handshake to ensure both clusters have the same domain secret
9310 src_handshake = self.op.source_handshake
9311 if not src_handshake:
9312 raise errors.OpPrereqError("Missing source handshake",
9315 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9318 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9321 # Load and check source CA
9322 self.source_x509_ca_pem = self.op.source_x509_ca
9323 if not self.source_x509_ca_pem:
9324 raise errors.OpPrereqError("Missing source X509 CA",
9328 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9330 except OpenSSL.crypto.Error, err:
9331 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9332 (err, ), errors.ECODE_INVAL)
9334 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9335 if errcode is not None:
9336 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9339 self.source_x509_ca = cert
9341 src_instance_name = self.op.source_instance_name
9342 if not src_instance_name:
9343 raise errors.OpPrereqError("Missing source instance name",
9346 self.source_instance_name = \
9347 netutils.GetHostname(name=src_instance_name).name
9350 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9351 self.op.mode, errors.ECODE_INVAL)
9353 def ExpandNames(self):
9354 """ExpandNames for CreateInstance.
9356 Figure out the right locks for instance creation.
9359 self.needed_locks = {}
9361 instance_name = self.op.instance_name
9362 # this is just a preventive check, but someone might still add this
9363 # instance in the meantime, and creation will fail at lock-add time
9364 if instance_name in self.cfg.GetInstanceList():
9365 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9366 instance_name, errors.ECODE_EXISTS)
9368 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9370 if self.op.iallocator:
9371 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9372 # specifying a group on instance creation and then selecting nodes from
9374 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9375 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9377 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9378 nodelist = [self.op.pnode]
9379 if self.op.snode is not None:
9380 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9381 nodelist.append(self.op.snode)
9382 self.needed_locks[locking.LEVEL_NODE] = nodelist
9383 # Lock resources of instance's primary and secondary nodes (copy to
9384 # prevent accidential modification)
9385 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9387 # in case of import lock the source node too
9388 if self.op.mode == constants.INSTANCE_IMPORT:
9389 src_node = self.op.src_node
9390 src_path = self.op.src_path
9392 if src_path is None:
9393 self.op.src_path = src_path = self.op.instance_name
9395 if src_node is None:
9396 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9397 self.op.src_node = None
9398 if os.path.isabs(src_path):
9399 raise errors.OpPrereqError("Importing an instance from a path"
9400 " requires a source node option",
9403 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9404 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9405 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9406 if not os.path.isabs(src_path):
9407 self.op.src_path = src_path = \
9408 utils.PathJoin(constants.EXPORT_DIR, src_path)
9410 def _RunAllocator(self):
9411 """Run the allocator based on input opcode.
9414 nics = [n.ToDict() for n in self.nics]
9415 ial = IAllocator(self.cfg, self.rpc,
9416 mode=constants.IALLOCATOR_MODE_ALLOC,
9417 name=self.op.instance_name,
9418 disk_template=self.op.disk_template,
9421 vcpus=self.be_full[constants.BE_VCPUS],
9422 memory=self.be_full[constants.BE_MAXMEM],
9423 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9426 hypervisor=self.op.hypervisor,
9429 ial.Run(self.op.iallocator)
9432 raise errors.OpPrereqError("Can't compute nodes using"
9433 " iallocator '%s': %s" %
9434 (self.op.iallocator, ial.info),
9436 if len(ial.result) != ial.required_nodes:
9437 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9438 " of nodes (%s), required %s" %
9439 (self.op.iallocator, len(ial.result),
9440 ial.required_nodes), errors.ECODE_FAULT)
9441 self.op.pnode = ial.result[0]
9442 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9443 self.op.instance_name, self.op.iallocator,
9444 utils.CommaJoin(ial.result))
9445 if ial.required_nodes == 2:
9446 self.op.snode = ial.result[1]
9448 def BuildHooksEnv(self):
9451 This runs on master, primary and secondary nodes of the instance.
9455 "ADD_MODE": self.op.mode,
9457 if self.op.mode == constants.INSTANCE_IMPORT:
9458 env["SRC_NODE"] = self.op.src_node
9459 env["SRC_PATH"] = self.op.src_path
9460 env["SRC_IMAGES"] = self.src_images
9462 env.update(_BuildInstanceHookEnv(
9463 name=self.op.instance_name,
9464 primary_node=self.op.pnode,
9465 secondary_nodes=self.secondaries,
9466 status=self.op.start,
9467 os_type=self.op.os_type,
9468 minmem=self.be_full[constants.BE_MINMEM],
9469 maxmem=self.be_full[constants.BE_MAXMEM],
9470 vcpus=self.be_full[constants.BE_VCPUS],
9471 nics=_NICListToTuple(self, self.nics),
9472 disk_template=self.op.disk_template,
9473 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9474 for d in self.disks],
9477 hypervisor_name=self.op.hypervisor,
9483 def BuildHooksNodes(self):
9484 """Build hooks nodes.
9487 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9490 def _ReadExportInfo(self):
9491 """Reads the export information from disk.
9493 It will override the opcode source node and path with the actual
9494 information, if these two were not specified before.
9496 @return: the export information
9499 assert self.op.mode == constants.INSTANCE_IMPORT
9501 src_node = self.op.src_node
9502 src_path = self.op.src_path
9504 if src_node is None:
9505 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9506 exp_list = self.rpc.call_export_list(locked_nodes)
9508 for node in exp_list:
9509 if exp_list[node].fail_msg:
9511 if src_path in exp_list[node].payload:
9513 self.op.src_node = src_node = node
9514 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9518 raise errors.OpPrereqError("No export found for relative path %s" %
9519 src_path, errors.ECODE_INVAL)
9521 _CheckNodeOnline(self, src_node)
9522 result = self.rpc.call_export_info(src_node, src_path)
9523 result.Raise("No export or invalid export found in dir %s" % src_path)
9525 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9526 if not export_info.has_section(constants.INISECT_EXP):
9527 raise errors.ProgrammerError("Corrupted export config",
9528 errors.ECODE_ENVIRON)
9530 ei_version = export_info.get(constants.INISECT_EXP, "version")
9531 if (int(ei_version) != constants.EXPORT_VERSION):
9532 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9533 (ei_version, constants.EXPORT_VERSION),
9534 errors.ECODE_ENVIRON)
9537 def _ReadExportParams(self, einfo):
9538 """Use export parameters as defaults.
9540 In case the opcode doesn't specify (as in override) some instance
9541 parameters, then try to use them from the export information, if
9545 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9547 if self.op.disk_template is None:
9548 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9549 self.op.disk_template = einfo.get(constants.INISECT_INS,
9551 if self.op.disk_template not in constants.DISK_TEMPLATES:
9552 raise errors.OpPrereqError("Disk template specified in configuration"
9553 " file is not one of the allowed values:"
9554 " %s" % " ".join(constants.DISK_TEMPLATES))
9556 raise errors.OpPrereqError("No disk template specified and the export"
9557 " is missing the disk_template information",
9560 if not self.op.disks:
9562 # TODO: import the disk iv_name too
9563 for idx in range(constants.MAX_DISKS):
9564 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9565 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9566 disks.append({constants.IDISK_SIZE: disk_sz})
9567 self.op.disks = disks
9568 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9569 raise errors.OpPrereqError("No disk info specified and the export"
9570 " is missing the disk information",
9573 if not self.op.nics:
9575 for idx in range(constants.MAX_NICS):
9576 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9578 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9579 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9586 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9587 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9589 if (self.op.hypervisor is None and
9590 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9591 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9593 if einfo.has_section(constants.INISECT_HYP):
9594 # use the export parameters but do not override the ones
9595 # specified by the user
9596 for name, value in einfo.items(constants.INISECT_HYP):
9597 if name not in self.op.hvparams:
9598 self.op.hvparams[name] = value
9600 if einfo.has_section(constants.INISECT_BEP):
9601 # use the parameters, without overriding
9602 for name, value in einfo.items(constants.INISECT_BEP):
9603 if name not in self.op.beparams:
9604 self.op.beparams[name] = value
9605 # Compatibility for the old "memory" be param
9606 if name == constants.BE_MEMORY:
9607 if constants.BE_MAXMEM not in self.op.beparams:
9608 self.op.beparams[constants.BE_MAXMEM] = value
9609 if constants.BE_MINMEM not in self.op.beparams:
9610 self.op.beparams[constants.BE_MINMEM] = value
9612 # try to read the parameters old style, from the main section
9613 for name in constants.BES_PARAMETERS:
9614 if (name not in self.op.beparams and
9615 einfo.has_option(constants.INISECT_INS, name)):
9616 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9618 if einfo.has_section(constants.INISECT_OSP):
9619 # use the parameters, without overriding
9620 for name, value in einfo.items(constants.INISECT_OSP):
9621 if name not in self.op.osparams:
9622 self.op.osparams[name] = value
9624 def _RevertToDefaults(self, cluster):
9625 """Revert the instance parameters to the default values.
9629 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9630 for name in self.op.hvparams.keys():
9631 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9632 del self.op.hvparams[name]
9634 be_defs = cluster.SimpleFillBE({})
9635 for name in self.op.beparams.keys():
9636 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9637 del self.op.beparams[name]
9639 nic_defs = cluster.SimpleFillNIC({})
9640 for nic in self.op.nics:
9641 for name in constants.NICS_PARAMETERS:
9642 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9645 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9646 for name in self.op.osparams.keys():
9647 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9648 del self.op.osparams[name]
9650 def _CalculateFileStorageDir(self):
9651 """Calculate final instance file storage dir.
9654 # file storage dir calculation/check
9655 self.instance_file_storage_dir = None
9656 if self.op.disk_template in constants.DTS_FILEBASED:
9657 # build the full file storage dir path
9660 if self.op.disk_template == constants.DT_SHARED_FILE:
9661 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9663 get_fsd_fn = self.cfg.GetFileStorageDir
9665 cfg_storagedir = get_fsd_fn()
9666 if not cfg_storagedir:
9667 raise errors.OpPrereqError("Cluster file storage dir not defined")
9668 joinargs.append(cfg_storagedir)
9670 if self.op.file_storage_dir is not None:
9671 joinargs.append(self.op.file_storage_dir)
9673 joinargs.append(self.op.instance_name)
9675 # pylint: disable=W0142
9676 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9678 def CheckPrereq(self): # pylint: disable=R0914
9679 """Check prerequisites.
9682 self._CalculateFileStorageDir()
9684 if self.op.mode == constants.INSTANCE_IMPORT:
9685 export_info = self._ReadExportInfo()
9686 self._ReadExportParams(export_info)
9687 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9689 self._old_instance_name = None
9691 if (not self.cfg.GetVGName() and
9692 self.op.disk_template not in constants.DTS_NOT_LVM):
9693 raise errors.OpPrereqError("Cluster does not support lvm-based"
9694 " instances", errors.ECODE_STATE)
9696 if (self.op.hypervisor is None or
9697 self.op.hypervisor == constants.VALUE_AUTO):
9698 self.op.hypervisor = self.cfg.GetHypervisorType()
9700 cluster = self.cfg.GetClusterInfo()
9701 enabled_hvs = cluster.enabled_hypervisors
9702 if self.op.hypervisor not in enabled_hvs:
9703 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9704 " cluster (%s)" % (self.op.hypervisor,
9705 ",".join(enabled_hvs)),
9708 # Check tag validity
9709 for tag in self.op.tags:
9710 objects.TaggableObject.ValidateTag(tag)
9712 # check hypervisor parameter syntax (locally)
9713 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9714 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9716 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9717 hv_type.CheckParameterSyntax(filled_hvp)
9718 self.hv_full = filled_hvp
9719 # check that we don't specify global parameters on an instance
9720 _CheckGlobalHvParams(self.op.hvparams)
9722 # fill and remember the beparams dict
9723 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9724 for param, value in self.op.beparams.iteritems():
9725 if value == constants.VALUE_AUTO:
9726 self.op.beparams[param] = default_beparams[param]
9727 objects.UpgradeBeParams(self.op.beparams)
9728 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9729 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9731 # build os parameters
9732 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9734 # now that hvp/bep are in final format, let's reset to defaults,
9736 if self.op.identify_defaults:
9737 self._RevertToDefaults(cluster)
9741 for idx, nic in enumerate(self.op.nics):
9742 nic_mode_req = nic.get(constants.INIC_MODE, None)
9743 nic_mode = nic_mode_req
9744 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9745 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9747 # in routed mode, for the first nic, the default ip is 'auto'
9748 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9749 default_ip_mode = constants.VALUE_AUTO
9751 default_ip_mode = constants.VALUE_NONE
9753 # ip validity checks
9754 ip = nic.get(constants.INIC_IP, default_ip_mode)
9755 if ip is None or ip.lower() == constants.VALUE_NONE:
9757 elif ip.lower() == constants.VALUE_AUTO:
9758 if not self.op.name_check:
9759 raise errors.OpPrereqError("IP address set to auto but name checks"
9760 " have been skipped",
9762 nic_ip = self.hostname1.ip
9764 if not netutils.IPAddress.IsValid(ip):
9765 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9769 # TODO: check the ip address for uniqueness
9770 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9771 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9774 # MAC address verification
9775 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9776 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9777 mac = utils.NormalizeAndValidateMac(mac)
9780 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9781 except errors.ReservationError:
9782 raise errors.OpPrereqError("MAC address %s already in use"
9783 " in cluster" % mac,
9784 errors.ECODE_NOTUNIQUE)
9786 # Build nic parameters
9787 link = nic.get(constants.INIC_LINK, None)
9788 if link == constants.VALUE_AUTO:
9789 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9792 nicparams[constants.NIC_MODE] = nic_mode
9794 nicparams[constants.NIC_LINK] = link
9796 check_params = cluster.SimpleFillNIC(nicparams)
9797 objects.NIC.CheckParameterSyntax(check_params)
9798 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9800 # disk checks/pre-build
9801 default_vg = self.cfg.GetVGName()
9803 for disk in self.op.disks:
9804 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9805 if mode not in constants.DISK_ACCESS_SET:
9806 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9807 mode, errors.ECODE_INVAL)
9808 size = disk.get(constants.IDISK_SIZE, None)
9810 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9813 except (TypeError, ValueError):
9814 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9817 data_vg = disk.get(constants.IDISK_VG, default_vg)
9819 constants.IDISK_SIZE: size,
9820 constants.IDISK_MODE: mode,
9821 constants.IDISK_VG: data_vg,
9823 if constants.IDISK_METAVG in disk:
9824 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9825 if constants.IDISK_ADOPT in disk:
9826 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9827 self.disks.append(new_disk)
9829 if self.op.mode == constants.INSTANCE_IMPORT:
9831 for idx in range(len(self.disks)):
9832 option = "disk%d_dump" % idx
9833 if export_info.has_option(constants.INISECT_INS, option):
9834 # FIXME: are the old os-es, disk sizes, etc. useful?
9835 export_name = export_info.get(constants.INISECT_INS, option)
9836 image = utils.PathJoin(self.op.src_path, export_name)
9837 disk_images.append(image)
9839 disk_images.append(False)
9841 self.src_images = disk_images
9843 if self.op.instance_name == self._old_instance_name:
9844 for idx, nic in enumerate(self.nics):
9845 if nic.mac == constants.VALUE_AUTO:
9846 nic_mac_ini = "nic%d_mac" % idx
9847 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9849 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9851 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9852 if self.op.ip_check:
9853 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9854 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9855 (self.check_ip, self.op.instance_name),
9856 errors.ECODE_NOTUNIQUE)
9858 #### mac address generation
9859 # By generating here the mac address both the allocator and the hooks get
9860 # the real final mac address rather than the 'auto' or 'generate' value.
9861 # There is a race condition between the generation and the instance object
9862 # creation, which means that we know the mac is valid now, but we're not
9863 # sure it will be when we actually add the instance. If things go bad
9864 # adding the instance will abort because of a duplicate mac, and the
9865 # creation job will fail.
9866 for nic in self.nics:
9867 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9868 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9872 if self.op.iallocator is not None:
9873 self._RunAllocator()
9875 # Release all unneeded node locks
9876 _ReleaseLocks(self, locking.LEVEL_NODE,
9877 keep=filter(None, [self.op.pnode, self.op.snode,
9879 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9880 keep=filter(None, [self.op.pnode, self.op.snode,
9883 #### node related checks
9885 # check primary node
9886 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9887 assert self.pnode is not None, \
9888 "Cannot retrieve locked node %s" % self.op.pnode
9890 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9891 pnode.name, errors.ECODE_STATE)
9893 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9894 pnode.name, errors.ECODE_STATE)
9895 if not pnode.vm_capable:
9896 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9897 " '%s'" % pnode.name, errors.ECODE_STATE)
9899 self.secondaries = []
9901 # mirror node verification
9902 if self.op.disk_template in constants.DTS_INT_MIRROR:
9903 if self.op.snode == pnode.name:
9904 raise errors.OpPrereqError("The secondary node cannot be the"
9905 " primary node", errors.ECODE_INVAL)
9906 _CheckNodeOnline(self, self.op.snode)
9907 _CheckNodeNotDrained(self, self.op.snode)
9908 _CheckNodeVmCapable(self, self.op.snode)
9909 self.secondaries.append(self.op.snode)
9911 snode = self.cfg.GetNodeInfo(self.op.snode)
9912 if pnode.group != snode.group:
9913 self.LogWarning("The primary and secondary nodes are in two"
9914 " different node groups; the disk parameters"
9915 " from the first disk's node group will be"
9918 nodenames = [pnode.name] + self.secondaries
9920 # Verify instance specs
9921 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9923 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9924 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9925 constants.ISPEC_DISK_COUNT: len(self.disks),
9926 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9927 constants.ISPEC_NIC_COUNT: len(self.nics),
9928 constants.ISPEC_SPINDLE_USE: spindle_use,
9931 group_info = self.cfg.GetNodeGroup(pnode.group)
9932 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9933 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9934 if not self.op.ignore_ipolicy and res:
9935 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9936 " policy: %s") % (pnode.group,
9937 utils.CommaJoin(res)),
9940 if not self.adopt_disks:
9941 if self.op.disk_template == constants.DT_RBD:
9942 # _CheckRADOSFreeSpace() is just a placeholder.
9943 # Any function that checks prerequisites can be placed here.
9944 # Check if there is enough space on the RADOS cluster.
9945 _CheckRADOSFreeSpace()
9947 # Check lv size requirements, if not adopting
9948 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9949 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9951 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9952 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9953 disk[constants.IDISK_ADOPT])
9954 for disk in self.disks])
9955 if len(all_lvs) != len(self.disks):
9956 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9958 for lv_name in all_lvs:
9960 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9961 # to ReserveLV uses the same syntax
9962 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9963 except errors.ReservationError:
9964 raise errors.OpPrereqError("LV named %s used by another instance" %
9965 lv_name, errors.ECODE_NOTUNIQUE)
9967 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9968 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9970 node_lvs = self.rpc.call_lv_list([pnode.name],
9971 vg_names.payload.keys())[pnode.name]
9972 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9973 node_lvs = node_lvs.payload
9975 delta = all_lvs.difference(node_lvs.keys())
9977 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9978 utils.CommaJoin(delta),
9980 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9982 raise errors.OpPrereqError("Online logical volumes found, cannot"
9983 " adopt: %s" % utils.CommaJoin(online_lvs),
9985 # update the size of disk based on what is found
9986 for dsk in self.disks:
9987 dsk[constants.IDISK_SIZE] = \
9988 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9989 dsk[constants.IDISK_ADOPT])][0]))
9991 elif self.op.disk_template == constants.DT_BLOCK:
9992 # Normalize and de-duplicate device paths
9993 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9994 for disk in self.disks])
9995 if len(all_disks) != len(self.disks):
9996 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9998 baddisks = [d for d in all_disks
9999 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10001 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10002 " cannot be adopted" %
10003 (", ".join(baddisks),
10004 constants.ADOPTABLE_BLOCKDEV_ROOT),
10005 errors.ECODE_INVAL)
10007 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10008 list(all_disks))[pnode.name]
10009 node_disks.Raise("Cannot get block device information from node %s" %
10011 node_disks = node_disks.payload
10012 delta = all_disks.difference(node_disks.keys())
10014 raise errors.OpPrereqError("Missing block device(s): %s" %
10015 utils.CommaJoin(delta),
10016 errors.ECODE_INVAL)
10017 for dsk in self.disks:
10018 dsk[constants.IDISK_SIZE] = \
10019 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10021 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10023 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10024 # check OS parameters (remotely)
10025 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10027 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10029 # memory check on primary node
10030 #TODO(dynmem): use MINMEM for checking
10032 _CheckNodeFreeMemory(self, self.pnode.name,
10033 "creating instance %s" % self.op.instance_name,
10034 self.be_full[constants.BE_MAXMEM],
10035 self.op.hypervisor)
10037 self.dry_run_result = list(nodenames)
10039 def Exec(self, feedback_fn):
10040 """Create and add the instance to the cluster.
10043 instance = self.op.instance_name
10044 pnode_name = self.pnode.name
10046 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10047 self.owned_locks(locking.LEVEL_NODE)), \
10048 "Node locks differ from node resource locks"
10050 ht_kind = self.op.hypervisor
10051 if ht_kind in constants.HTS_REQ_PORT:
10052 network_port = self.cfg.AllocatePort()
10054 network_port = None
10056 # This is ugly but we got a chicken-egg problem here
10057 # We can only take the group disk parameters, as the instance
10058 # has no disks yet (we are generating them right here).
10059 node = self.cfg.GetNodeInfo(pnode_name)
10060 nodegroup = self.cfg.GetNodeGroup(node.group)
10061 disks = _GenerateDiskTemplate(self,
10062 self.op.disk_template,
10063 instance, pnode_name,
10066 self.instance_file_storage_dir,
10067 self.op.file_driver,
10070 self.cfg.GetGroupDiskParams(nodegroup))
10072 iobj = objects.Instance(name=instance, os=self.op.os_type,
10073 primary_node=pnode_name,
10074 nics=self.nics, disks=disks,
10075 disk_template=self.op.disk_template,
10076 admin_state=constants.ADMINST_DOWN,
10077 network_port=network_port,
10078 beparams=self.op.beparams,
10079 hvparams=self.op.hvparams,
10080 hypervisor=self.op.hypervisor,
10081 osparams=self.op.osparams,
10085 for tag in self.op.tags:
10088 if self.adopt_disks:
10089 if self.op.disk_template == constants.DT_PLAIN:
10090 # rename LVs to the newly-generated names; we need to construct
10091 # 'fake' LV disks with the old data, plus the new unique_id
10092 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10094 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10095 rename_to.append(t_dsk.logical_id)
10096 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10097 self.cfg.SetDiskID(t_dsk, pnode_name)
10098 result = self.rpc.call_blockdev_rename(pnode_name,
10099 zip(tmp_disks, rename_to))
10100 result.Raise("Failed to rename adoped LVs")
10102 feedback_fn("* creating instance disks...")
10104 _CreateDisks(self, iobj)
10105 except errors.OpExecError:
10106 self.LogWarning("Device creation failed, reverting...")
10108 _RemoveDisks(self, iobj)
10110 self.cfg.ReleaseDRBDMinors(instance)
10113 feedback_fn("adding instance %s to cluster config" % instance)
10115 self.cfg.AddInstance(iobj, self.proc.GetECId())
10117 # Declare that we don't want to remove the instance lock anymore, as we've
10118 # added the instance to the config
10119 del self.remove_locks[locking.LEVEL_INSTANCE]
10121 if self.op.mode == constants.INSTANCE_IMPORT:
10122 # Release unused nodes
10123 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10125 # Release all nodes
10126 _ReleaseLocks(self, locking.LEVEL_NODE)
10129 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10130 feedback_fn("* wiping instance disks...")
10132 _WipeDisks(self, iobj)
10133 except errors.OpExecError, err:
10134 logging.exception("Wiping disks failed")
10135 self.LogWarning("Wiping instance disks failed (%s)", err)
10139 # Something is already wrong with the disks, don't do anything else
10141 elif self.op.wait_for_sync:
10142 disk_abort = not _WaitForSync(self, iobj)
10143 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10144 # make sure the disks are not degraded (still sync-ing is ok)
10145 feedback_fn("* checking mirrors status")
10146 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10151 _RemoveDisks(self, iobj)
10152 self.cfg.RemoveInstance(iobj.name)
10153 # Make sure the instance lock gets removed
10154 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10155 raise errors.OpExecError("There are some degraded disks for"
10158 # Release all node resource locks
10159 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10161 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10162 # we need to set the disks ID to the primary node, since the
10163 # preceding code might or might have not done it, depending on
10164 # disk template and other options
10165 for disk in iobj.disks:
10166 self.cfg.SetDiskID(disk, pnode_name)
10167 if self.op.mode == constants.INSTANCE_CREATE:
10168 if not self.op.no_install:
10169 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10170 not self.op.wait_for_sync)
10172 feedback_fn("* pausing disk sync to install instance OS")
10173 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10176 for idx, success in enumerate(result.payload):
10178 logging.warn("pause-sync of instance %s for disk %d failed",
10181 feedback_fn("* running the instance OS create scripts...")
10182 # FIXME: pass debug option from opcode to backend
10184 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10185 self.op.debug_level)
10187 feedback_fn("* resuming disk sync")
10188 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10191 for idx, success in enumerate(result.payload):
10193 logging.warn("resume-sync of instance %s for disk %d failed",
10196 os_add_result.Raise("Could not add os for instance %s"
10197 " on node %s" % (instance, pnode_name))
10200 if self.op.mode == constants.INSTANCE_IMPORT:
10201 feedback_fn("* running the instance OS import scripts...")
10205 for idx, image in enumerate(self.src_images):
10209 # FIXME: pass debug option from opcode to backend
10210 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10211 constants.IEIO_FILE, (image, ),
10212 constants.IEIO_SCRIPT,
10213 (iobj.disks[idx], idx),
10215 transfers.append(dt)
10218 masterd.instance.TransferInstanceData(self, feedback_fn,
10219 self.op.src_node, pnode_name,
10220 self.pnode.secondary_ip,
10222 if not compat.all(import_result):
10223 self.LogWarning("Some disks for instance %s on node %s were not"
10224 " imported successfully" % (instance, pnode_name))
10226 rename_from = self._old_instance_name
10228 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10229 feedback_fn("* preparing remote import...")
10230 # The source cluster will stop the instance before attempting to make
10231 # a connection. In some cases stopping an instance can take a long
10232 # time, hence the shutdown timeout is added to the connection
10234 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10235 self.op.source_shutdown_timeout)
10236 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10238 assert iobj.primary_node == self.pnode.name
10240 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10241 self.source_x509_ca,
10242 self._cds, timeouts)
10243 if not compat.all(disk_results):
10244 # TODO: Should the instance still be started, even if some disks
10245 # failed to import (valid for local imports, too)?
10246 self.LogWarning("Some disks for instance %s on node %s were not"
10247 " imported successfully" % (instance, pnode_name))
10249 rename_from = self.source_instance_name
10252 # also checked in the prereq part
10253 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10256 # Run rename script on newly imported instance
10257 assert iobj.name == instance
10258 feedback_fn("Running rename script for %s" % instance)
10259 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10261 self.op.debug_level)
10262 if result.fail_msg:
10263 self.LogWarning("Failed to run rename script for %s on node"
10264 " %s: %s" % (instance, pnode_name, result.fail_msg))
10266 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10269 iobj.admin_state = constants.ADMINST_UP
10270 self.cfg.Update(iobj, feedback_fn)
10271 logging.info("Starting instance %s on node %s", instance, pnode_name)
10272 feedback_fn("* starting instance...")
10273 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10275 result.Raise("Could not start instance")
10277 return list(iobj.all_nodes)
10280 def _CheckRADOSFreeSpace():
10281 """Compute disk size requirements inside the RADOS cluster.
10284 # For the RADOS cluster we assume there is always enough space.
10288 class LUInstanceConsole(NoHooksLU):
10289 """Connect to an instance's console.
10291 This is somewhat special in that it returns the command line that
10292 you need to run on the master node in order to connect to the
10298 def ExpandNames(self):
10299 self.share_locks = _ShareAll()
10300 self._ExpandAndLockInstance()
10302 def CheckPrereq(self):
10303 """Check prerequisites.
10305 This checks that the instance is in the cluster.
10308 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10309 assert self.instance is not None, \
10310 "Cannot retrieve locked instance %s" % self.op.instance_name
10311 _CheckNodeOnline(self, self.instance.primary_node)
10313 def Exec(self, feedback_fn):
10314 """Connect to the console of an instance
10317 instance = self.instance
10318 node = instance.primary_node
10320 node_insts = self.rpc.call_instance_list([node],
10321 [instance.hypervisor])[node]
10322 node_insts.Raise("Can't get node information from %s" % node)
10324 if instance.name not in node_insts.payload:
10325 if instance.admin_state == constants.ADMINST_UP:
10326 state = constants.INSTST_ERRORDOWN
10327 elif instance.admin_state == constants.ADMINST_DOWN:
10328 state = constants.INSTST_ADMINDOWN
10330 state = constants.INSTST_ADMINOFFLINE
10331 raise errors.OpExecError("Instance %s is not running (state %s)" %
10332 (instance.name, state))
10334 logging.debug("Connecting to console of %s on %s", instance.name, node)
10336 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10339 def _GetInstanceConsole(cluster, instance):
10340 """Returns console information for an instance.
10342 @type cluster: L{objects.Cluster}
10343 @type instance: L{objects.Instance}
10347 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10348 # beparams and hvparams are passed separately, to avoid editing the
10349 # instance and then saving the defaults in the instance itself.
10350 hvparams = cluster.FillHV(instance)
10351 beparams = cluster.FillBE(instance)
10352 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10354 assert console.instance == instance.name
10355 assert console.Validate()
10357 return console.ToDict()
10360 class LUInstanceReplaceDisks(LogicalUnit):
10361 """Replace the disks of an instance.
10364 HPATH = "mirrors-replace"
10365 HTYPE = constants.HTYPE_INSTANCE
10368 def CheckArguments(self):
10369 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10370 self.op.iallocator)
10372 def ExpandNames(self):
10373 self._ExpandAndLockInstance()
10375 assert locking.LEVEL_NODE not in self.needed_locks
10376 assert locking.LEVEL_NODE_RES not in self.needed_locks
10377 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10379 assert self.op.iallocator is None or self.op.remote_node is None, \
10380 "Conflicting options"
10382 if self.op.remote_node is not None:
10383 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10385 # Warning: do not remove the locking of the new secondary here
10386 # unless DRBD8.AddChildren is changed to work in parallel;
10387 # currently it doesn't since parallel invocations of
10388 # FindUnusedMinor will conflict
10389 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10390 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10392 self.needed_locks[locking.LEVEL_NODE] = []
10393 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10395 if self.op.iallocator is not None:
10396 # iallocator will select a new node in the same group
10397 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10399 self.needed_locks[locking.LEVEL_NODE_RES] = []
10401 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10402 self.op.iallocator, self.op.remote_node,
10403 self.op.disks, False, self.op.early_release,
10404 self.op.ignore_ipolicy)
10406 self.tasklets = [self.replacer]
10408 def DeclareLocks(self, level):
10409 if level == locking.LEVEL_NODEGROUP:
10410 assert self.op.remote_node is None
10411 assert self.op.iallocator is not None
10412 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10414 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10415 # Lock all groups used by instance optimistically; this requires going
10416 # via the node before it's locked, requiring verification later on
10417 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10418 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10420 elif level == locking.LEVEL_NODE:
10421 if self.op.iallocator is not None:
10422 assert self.op.remote_node is None
10423 assert not self.needed_locks[locking.LEVEL_NODE]
10425 # Lock member nodes of all locked groups
10426 self.needed_locks[locking.LEVEL_NODE] = [node_name
10427 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10428 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10430 self._LockInstancesNodes()
10431 elif level == locking.LEVEL_NODE_RES:
10433 self.needed_locks[locking.LEVEL_NODE_RES] = \
10434 self.needed_locks[locking.LEVEL_NODE]
10436 def BuildHooksEnv(self):
10437 """Build hooks env.
10439 This runs on the master, the primary and all the secondaries.
10442 instance = self.replacer.instance
10444 "MODE": self.op.mode,
10445 "NEW_SECONDARY": self.op.remote_node,
10446 "OLD_SECONDARY": instance.secondary_nodes[0],
10448 env.update(_BuildInstanceHookEnvByObject(self, instance))
10451 def BuildHooksNodes(self):
10452 """Build hooks nodes.
10455 instance = self.replacer.instance
10457 self.cfg.GetMasterNode(),
10458 instance.primary_node,
10460 if self.op.remote_node is not None:
10461 nl.append(self.op.remote_node)
10464 def CheckPrereq(self):
10465 """Check prerequisites.
10468 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10469 self.op.iallocator is None)
10471 # Verify if node group locks are still correct
10472 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10474 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10476 return LogicalUnit.CheckPrereq(self)
10479 class TLReplaceDisks(Tasklet):
10480 """Replaces disks for an instance.
10482 Note: Locking is not within the scope of this class.
10485 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10486 disks, delay_iallocator, early_release, ignore_ipolicy):
10487 """Initializes this class.
10490 Tasklet.__init__(self, lu)
10493 self.instance_name = instance_name
10495 self.iallocator_name = iallocator_name
10496 self.remote_node = remote_node
10498 self.delay_iallocator = delay_iallocator
10499 self.early_release = early_release
10500 self.ignore_ipolicy = ignore_ipolicy
10503 self.instance = None
10504 self.new_node = None
10505 self.target_node = None
10506 self.other_node = None
10507 self.remote_node_info = None
10508 self.node_secondary_ip = None
10511 def CheckArguments(mode, remote_node, iallocator):
10512 """Helper function for users of this class.
10515 # check for valid parameter combination
10516 if mode == constants.REPLACE_DISK_CHG:
10517 if remote_node is None and iallocator is None:
10518 raise errors.OpPrereqError("When changing the secondary either an"
10519 " iallocator script must be used or the"
10520 " new node given", errors.ECODE_INVAL)
10522 if remote_node is not None and iallocator is not None:
10523 raise errors.OpPrereqError("Give either the iallocator or the new"
10524 " secondary, not both", errors.ECODE_INVAL)
10526 elif remote_node is not None or iallocator is not None:
10527 # Not replacing the secondary
10528 raise errors.OpPrereqError("The iallocator and new node options can"
10529 " only be used when changing the"
10530 " secondary node", errors.ECODE_INVAL)
10533 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10534 """Compute a new secondary node using an IAllocator.
10537 ial = IAllocator(lu.cfg, lu.rpc,
10538 mode=constants.IALLOCATOR_MODE_RELOC,
10539 name=instance_name,
10540 relocate_from=list(relocate_from))
10542 ial.Run(iallocator_name)
10544 if not ial.success:
10545 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10546 " %s" % (iallocator_name, ial.info),
10547 errors.ECODE_NORES)
10549 if len(ial.result) != ial.required_nodes:
10550 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10551 " of nodes (%s), required %s" %
10553 len(ial.result), ial.required_nodes),
10554 errors.ECODE_FAULT)
10556 remote_node_name = ial.result[0]
10558 lu.LogInfo("Selected new secondary for instance '%s': %s",
10559 instance_name, remote_node_name)
10561 return remote_node_name
10563 def _FindFaultyDisks(self, node_name):
10564 """Wrapper for L{_FindFaultyInstanceDisks}.
10567 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10570 def _CheckDisksActivated(self, instance):
10571 """Checks if the instance disks are activated.
10573 @param instance: The instance to check disks
10574 @return: True if they are activated, False otherwise
10577 nodes = instance.all_nodes
10579 for idx, dev in enumerate(instance.disks):
10581 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10582 self.cfg.SetDiskID(dev, node)
10584 result = _BlockdevFind(self, node, dev, instance)
10588 elif result.fail_msg or not result.payload:
10593 def CheckPrereq(self):
10594 """Check prerequisites.
10596 This checks that the instance is in the cluster.
10599 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10600 assert instance is not None, \
10601 "Cannot retrieve locked instance %s" % self.instance_name
10603 if instance.disk_template != constants.DT_DRBD8:
10604 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10605 " instances", errors.ECODE_INVAL)
10607 if len(instance.secondary_nodes) != 1:
10608 raise errors.OpPrereqError("The instance has a strange layout,"
10609 " expected one secondary but found %d" %
10610 len(instance.secondary_nodes),
10611 errors.ECODE_FAULT)
10613 if not self.delay_iallocator:
10614 self._CheckPrereq2()
10616 def _CheckPrereq2(self):
10617 """Check prerequisites, second part.
10619 This function should always be part of CheckPrereq. It was separated and is
10620 now called from Exec because during node evacuation iallocator was only
10621 called with an unmodified cluster model, not taking planned changes into
10625 instance = self.instance
10626 secondary_node = instance.secondary_nodes[0]
10628 if self.iallocator_name is None:
10629 remote_node = self.remote_node
10631 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10632 instance.name, instance.secondary_nodes)
10634 if remote_node is None:
10635 self.remote_node_info = None
10637 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10638 "Remote node '%s' is not locked" % remote_node
10640 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10641 assert self.remote_node_info is not None, \
10642 "Cannot retrieve locked node %s" % remote_node
10644 if remote_node == self.instance.primary_node:
10645 raise errors.OpPrereqError("The specified node is the primary node of"
10646 " the instance", errors.ECODE_INVAL)
10648 if remote_node == secondary_node:
10649 raise errors.OpPrereqError("The specified node is already the"
10650 " secondary node of the instance",
10651 errors.ECODE_INVAL)
10653 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10654 constants.REPLACE_DISK_CHG):
10655 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10656 errors.ECODE_INVAL)
10658 if self.mode == constants.REPLACE_DISK_AUTO:
10659 if not self._CheckDisksActivated(instance):
10660 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10661 " first" % self.instance_name,
10662 errors.ECODE_STATE)
10663 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10664 faulty_secondary = self._FindFaultyDisks(secondary_node)
10666 if faulty_primary and faulty_secondary:
10667 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10668 " one node and can not be repaired"
10669 " automatically" % self.instance_name,
10670 errors.ECODE_STATE)
10673 self.disks = faulty_primary
10674 self.target_node = instance.primary_node
10675 self.other_node = secondary_node
10676 check_nodes = [self.target_node, self.other_node]
10677 elif faulty_secondary:
10678 self.disks = faulty_secondary
10679 self.target_node = secondary_node
10680 self.other_node = instance.primary_node
10681 check_nodes = [self.target_node, self.other_node]
10687 # Non-automatic modes
10688 if self.mode == constants.REPLACE_DISK_PRI:
10689 self.target_node = instance.primary_node
10690 self.other_node = secondary_node
10691 check_nodes = [self.target_node, self.other_node]
10693 elif self.mode == constants.REPLACE_DISK_SEC:
10694 self.target_node = secondary_node
10695 self.other_node = instance.primary_node
10696 check_nodes = [self.target_node, self.other_node]
10698 elif self.mode == constants.REPLACE_DISK_CHG:
10699 self.new_node = remote_node
10700 self.other_node = instance.primary_node
10701 self.target_node = secondary_node
10702 check_nodes = [self.new_node, self.other_node]
10704 _CheckNodeNotDrained(self.lu, remote_node)
10705 _CheckNodeVmCapable(self.lu, remote_node)
10707 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10708 assert old_node_info is not None
10709 if old_node_info.offline and not self.early_release:
10710 # doesn't make sense to delay the release
10711 self.early_release = True
10712 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10713 " early-release mode", secondary_node)
10716 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10719 # If not specified all disks should be replaced
10721 self.disks = range(len(self.instance.disks))
10723 # TODO: This is ugly, but right now we can't distinguish between internal
10724 # submitted opcode and external one. We should fix that.
10725 if self.remote_node_info:
10726 # We change the node, lets verify it still meets instance policy
10727 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10728 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10730 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10731 ignore=self.ignore_ipolicy)
10733 for node in check_nodes:
10734 _CheckNodeOnline(self.lu, node)
10736 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10739 if node_name is not None)
10741 # Release unneeded node and node resource locks
10742 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10743 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10745 # Release any owned node group
10746 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10747 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10749 # Check whether disks are valid
10750 for disk_idx in self.disks:
10751 instance.FindDisk(disk_idx)
10753 # Get secondary node IP addresses
10754 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10755 in self.cfg.GetMultiNodeInfo(touched_nodes))
10757 def Exec(self, feedback_fn):
10758 """Execute disk replacement.
10760 This dispatches the disk replacement to the appropriate handler.
10763 if self.delay_iallocator:
10764 self._CheckPrereq2()
10767 # Verify owned locks before starting operation
10768 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10769 assert set(owned_nodes) == set(self.node_secondary_ip), \
10770 ("Incorrect node locks, owning %s, expected %s" %
10771 (owned_nodes, self.node_secondary_ip.keys()))
10772 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10773 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10775 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10776 assert list(owned_instances) == [self.instance_name], \
10777 "Instance '%s' not locked" % self.instance_name
10779 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10780 "Should not own any node group lock at this point"
10783 feedback_fn("No disks need replacement")
10786 feedback_fn("Replacing disk(s) %s for %s" %
10787 (utils.CommaJoin(self.disks), self.instance.name))
10789 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10791 # Activate the instance disks if we're replacing them on a down instance
10793 _StartInstanceDisks(self.lu, self.instance, True)
10796 # Should we replace the secondary node?
10797 if self.new_node is not None:
10798 fn = self._ExecDrbd8Secondary
10800 fn = self._ExecDrbd8DiskOnly
10802 result = fn(feedback_fn)
10804 # Deactivate the instance disks if we're replacing them on a
10807 _SafeShutdownInstanceDisks(self.lu, self.instance)
10809 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10812 # Verify owned locks
10813 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10814 nodes = frozenset(self.node_secondary_ip)
10815 assert ((self.early_release and not owned_nodes) or
10816 (not self.early_release and not (set(owned_nodes) - nodes))), \
10817 ("Not owning the correct locks, early_release=%s, owned=%r,"
10818 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10822 def _CheckVolumeGroup(self, nodes):
10823 self.lu.LogInfo("Checking volume groups")
10825 vgname = self.cfg.GetVGName()
10827 # Make sure volume group exists on all involved nodes
10828 results = self.rpc.call_vg_list(nodes)
10830 raise errors.OpExecError("Can't list volume groups on the nodes")
10833 res = results[node]
10834 res.Raise("Error checking node %s" % node)
10835 if vgname not in res.payload:
10836 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10839 def _CheckDisksExistence(self, nodes):
10840 # Check disk existence
10841 for idx, dev in enumerate(self.instance.disks):
10842 if idx not in self.disks:
10846 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10847 self.cfg.SetDiskID(dev, node)
10849 result = _BlockdevFind(self, node, dev, self.instance)
10851 msg = result.fail_msg
10852 if msg or not result.payload:
10854 msg = "disk not found"
10855 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10858 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10859 for idx, dev in enumerate(self.instance.disks):
10860 if idx not in self.disks:
10863 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10866 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10867 on_primary, ldisk=ldisk):
10868 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10869 " replace disks for instance %s" %
10870 (node_name, self.instance.name))
10872 def _CreateNewStorage(self, node_name):
10873 """Create new storage on the primary or secondary node.
10875 This is only used for same-node replaces, not for changing the
10876 secondary node, hence we don't want to modify the existing disk.
10881 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10882 for idx, dev in enumerate(disks):
10883 if idx not in self.disks:
10886 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10888 self.cfg.SetDiskID(dev, node_name)
10890 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10891 names = _GenerateUniqueNames(self.lu, lv_names)
10893 (data_disk, meta_disk) = dev.children
10894 vg_data = data_disk.logical_id[0]
10895 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10896 logical_id=(vg_data, names[0]),
10897 params=data_disk.params)
10898 vg_meta = meta_disk.logical_id[0]
10899 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10900 logical_id=(vg_meta, names[1]),
10901 params=meta_disk.params)
10903 new_lvs = [lv_data, lv_meta]
10904 old_lvs = [child.Copy() for child in dev.children]
10905 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10907 # we pass force_create=True to force the LVM creation
10908 for new_lv in new_lvs:
10909 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10910 _GetInstanceInfoText(self.instance), False)
10914 def _CheckDevices(self, node_name, iv_names):
10915 for name, (dev, _, _) in iv_names.iteritems():
10916 self.cfg.SetDiskID(dev, node_name)
10918 result = _BlockdevFind(self, node_name, dev, self.instance)
10920 msg = result.fail_msg
10921 if msg or not result.payload:
10923 msg = "disk not found"
10924 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10927 if result.payload.is_degraded:
10928 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10930 def _RemoveOldStorage(self, node_name, iv_names):
10931 for name, (_, old_lvs, _) in iv_names.iteritems():
10932 self.lu.LogInfo("Remove logical volumes for %s" % name)
10935 self.cfg.SetDiskID(lv, node_name)
10937 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10939 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10940 hint="remove unused LVs manually")
10942 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10943 """Replace a disk on the primary or secondary for DRBD 8.
10945 The algorithm for replace is quite complicated:
10947 1. for each disk to be replaced:
10949 1. create new LVs on the target node with unique names
10950 1. detach old LVs from the drbd device
10951 1. rename old LVs to name_replaced.<time_t>
10952 1. rename new LVs to old LVs
10953 1. attach the new LVs (with the old names now) to the drbd device
10955 1. wait for sync across all devices
10957 1. for each modified disk:
10959 1. remove old LVs (which have the name name_replaces.<time_t>)
10961 Failures are not very well handled.
10966 # Step: check device activation
10967 self.lu.LogStep(1, steps_total, "Check device existence")
10968 self._CheckDisksExistence([self.other_node, self.target_node])
10969 self._CheckVolumeGroup([self.target_node, self.other_node])
10971 # Step: check other node consistency
10972 self.lu.LogStep(2, steps_total, "Check peer consistency")
10973 self._CheckDisksConsistency(self.other_node,
10974 self.other_node == self.instance.primary_node,
10977 # Step: create new storage
10978 self.lu.LogStep(3, steps_total, "Allocate new storage")
10979 iv_names = self._CreateNewStorage(self.target_node)
10981 # Step: for each lv, detach+rename*2+attach
10982 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10983 for dev, old_lvs, new_lvs in iv_names.itervalues():
10984 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10986 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10988 result.Raise("Can't detach drbd from local storage on node"
10989 " %s for device %s" % (self.target_node, dev.iv_name))
10991 #cfg.Update(instance)
10993 # ok, we created the new LVs, so now we know we have the needed
10994 # storage; as such, we proceed on the target node to rename
10995 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10996 # using the assumption that logical_id == physical_id (which in
10997 # turn is the unique_id on that node)
10999 # FIXME(iustin): use a better name for the replaced LVs
11000 temp_suffix = int(time.time())
11001 ren_fn = lambda d, suff: (d.physical_id[0],
11002 d.physical_id[1] + "_replaced-%s" % suff)
11004 # Build the rename list based on what LVs exist on the node
11005 rename_old_to_new = []
11006 for to_ren in old_lvs:
11007 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11008 if not result.fail_msg and result.payload:
11010 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11012 self.lu.LogInfo("Renaming the old LVs on the target node")
11013 result = self.rpc.call_blockdev_rename(self.target_node,
11015 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11017 # Now we rename the new LVs to the old LVs
11018 self.lu.LogInfo("Renaming the new LVs on the target node")
11019 rename_new_to_old = [(new, old.physical_id)
11020 for old, new in zip(old_lvs, new_lvs)]
11021 result = self.rpc.call_blockdev_rename(self.target_node,
11023 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11025 # Intermediate steps of in memory modifications
11026 for old, new in zip(old_lvs, new_lvs):
11027 new.logical_id = old.logical_id
11028 self.cfg.SetDiskID(new, self.target_node)
11030 # We need to modify old_lvs so that removal later removes the
11031 # right LVs, not the newly added ones; note that old_lvs is a
11033 for disk in old_lvs:
11034 disk.logical_id = ren_fn(disk, temp_suffix)
11035 self.cfg.SetDiskID(disk, self.target_node)
11037 # Now that the new lvs have the old name, we can add them to the device
11038 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11039 result = self.rpc.call_blockdev_addchildren(self.target_node,
11040 (dev, self.instance), new_lvs)
11041 msg = result.fail_msg
11043 for new_lv in new_lvs:
11044 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11047 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11048 hint=("cleanup manually the unused logical"
11050 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11052 cstep = itertools.count(5)
11054 if self.early_release:
11055 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11056 self._RemoveOldStorage(self.target_node, iv_names)
11057 # TODO: Check if releasing locks early still makes sense
11058 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11060 # Release all resource locks except those used by the instance
11061 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11062 keep=self.node_secondary_ip.keys())
11064 # Release all node locks while waiting for sync
11065 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11067 # TODO: Can the instance lock be downgraded here? Take the optional disk
11068 # shutdown in the caller into consideration.
11071 # This can fail as the old devices are degraded and _WaitForSync
11072 # does a combined result over all disks, so we don't check its return value
11073 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11074 _WaitForSync(self.lu, self.instance)
11076 # Check all devices manually
11077 self._CheckDevices(self.instance.primary_node, iv_names)
11079 # Step: remove old storage
11080 if not self.early_release:
11081 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11082 self._RemoveOldStorage(self.target_node, iv_names)
11084 def _ExecDrbd8Secondary(self, feedback_fn):
11085 """Replace the secondary node for DRBD 8.
11087 The algorithm for replace is quite complicated:
11088 - for all disks of the instance:
11089 - create new LVs on the new node with same names
11090 - shutdown the drbd device on the old secondary
11091 - disconnect the drbd network on the primary
11092 - create the drbd device on the new secondary
11093 - network attach the drbd on the primary, using an artifice:
11094 the drbd code for Attach() will connect to the network if it
11095 finds a device which is connected to the good local disks but
11096 not network enabled
11097 - wait for sync across all devices
11098 - remove all disks from the old secondary
11100 Failures are not very well handled.
11105 pnode = self.instance.primary_node
11107 # Step: check device activation
11108 self.lu.LogStep(1, steps_total, "Check device existence")
11109 self._CheckDisksExistence([self.instance.primary_node])
11110 self._CheckVolumeGroup([self.instance.primary_node])
11112 # Step: check other node consistency
11113 self.lu.LogStep(2, steps_total, "Check peer consistency")
11114 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11116 # Step: create new storage
11117 self.lu.LogStep(3, steps_total, "Allocate new storage")
11118 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11119 for idx, dev in enumerate(disks):
11120 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11121 (self.new_node, idx))
11122 # we pass force_create=True to force LVM creation
11123 for new_lv in dev.children:
11124 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11125 True, _GetInstanceInfoText(self.instance), False)
11127 # Step 4: dbrd minors and drbd setups changes
11128 # after this, we must manually remove the drbd minors on both the
11129 # error and the success paths
11130 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11131 minors = self.cfg.AllocateDRBDMinor([self.new_node
11132 for dev in self.instance.disks],
11133 self.instance.name)
11134 logging.debug("Allocated minors %r", minors)
11137 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11138 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11139 (self.new_node, idx))
11140 # create new devices on new_node; note that we create two IDs:
11141 # one without port, so the drbd will be activated without
11142 # networking information on the new node at this stage, and one
11143 # with network, for the latter activation in step 4
11144 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11145 if self.instance.primary_node == o_node1:
11148 assert self.instance.primary_node == o_node2, "Three-node instance?"
11151 new_alone_id = (self.instance.primary_node, self.new_node, None,
11152 p_minor, new_minor, o_secret)
11153 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11154 p_minor, new_minor, o_secret)
11156 iv_names[idx] = (dev, dev.children, new_net_id)
11157 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11159 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11160 logical_id=new_alone_id,
11161 children=dev.children,
11164 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11167 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11169 _GetInstanceInfoText(self.instance), False)
11170 except errors.GenericError:
11171 self.cfg.ReleaseDRBDMinors(self.instance.name)
11174 # We have new devices, shutdown the drbd on the old secondary
11175 for idx, dev in enumerate(self.instance.disks):
11176 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11177 self.cfg.SetDiskID(dev, self.target_node)
11178 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11179 (dev, self.instance)).fail_msg
11181 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11182 "node: %s" % (idx, msg),
11183 hint=("Please cleanup this device manually as"
11184 " soon as possible"))
11186 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11187 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11188 self.instance.disks)[pnode]
11190 msg = result.fail_msg
11192 # detaches didn't succeed (unlikely)
11193 self.cfg.ReleaseDRBDMinors(self.instance.name)
11194 raise errors.OpExecError("Can't detach the disks from the network on"
11195 " old node: %s" % (msg,))
11197 # if we managed to detach at least one, we update all the disks of
11198 # the instance to point to the new secondary
11199 self.lu.LogInfo("Updating instance configuration")
11200 for dev, _, new_logical_id in iv_names.itervalues():
11201 dev.logical_id = new_logical_id
11202 self.cfg.SetDiskID(dev, self.instance.primary_node)
11204 self.cfg.Update(self.instance, feedback_fn)
11206 # Release all node locks (the configuration has been updated)
11207 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11209 # and now perform the drbd attach
11210 self.lu.LogInfo("Attaching primary drbds to new secondary"
11211 " (standalone => connected)")
11212 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11214 self.node_secondary_ip,
11215 (self.instance.disks, self.instance),
11216 self.instance.name,
11218 for to_node, to_result in result.items():
11219 msg = to_result.fail_msg
11221 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11223 hint=("please do a gnt-instance info to see the"
11224 " status of disks"))
11226 cstep = itertools.count(5)
11228 if self.early_release:
11229 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11230 self._RemoveOldStorage(self.target_node, iv_names)
11231 # TODO: Check if releasing locks early still makes sense
11232 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11234 # Release all resource locks except those used by the instance
11235 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11236 keep=self.node_secondary_ip.keys())
11238 # TODO: Can the instance lock be downgraded here? Take the optional disk
11239 # shutdown in the caller into consideration.
11242 # This can fail as the old devices are degraded and _WaitForSync
11243 # does a combined result over all disks, so we don't check its return value
11244 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11245 _WaitForSync(self.lu, self.instance)
11247 # Check all devices manually
11248 self._CheckDevices(self.instance.primary_node, iv_names)
11250 # Step: remove old storage
11251 if not self.early_release:
11252 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11253 self._RemoveOldStorage(self.target_node, iv_names)
11256 class LURepairNodeStorage(NoHooksLU):
11257 """Repairs the volume group on a node.
11262 def CheckArguments(self):
11263 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11265 storage_type = self.op.storage_type
11267 if (constants.SO_FIX_CONSISTENCY not in
11268 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11269 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11270 " repaired" % storage_type,
11271 errors.ECODE_INVAL)
11273 def ExpandNames(self):
11274 self.needed_locks = {
11275 locking.LEVEL_NODE: [self.op.node_name],
11278 def _CheckFaultyDisks(self, instance, node_name):
11279 """Ensure faulty disks abort the opcode or at least warn."""
11281 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11283 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11284 " node '%s'" % (instance.name, node_name),
11285 errors.ECODE_STATE)
11286 except errors.OpPrereqError, err:
11287 if self.op.ignore_consistency:
11288 self.proc.LogWarning(str(err.args[0]))
11292 def CheckPrereq(self):
11293 """Check prerequisites.
11296 # Check whether any instance on this node has faulty disks
11297 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11298 if inst.admin_state != constants.ADMINST_UP:
11300 check_nodes = set(inst.all_nodes)
11301 check_nodes.discard(self.op.node_name)
11302 for inst_node_name in check_nodes:
11303 self._CheckFaultyDisks(inst, inst_node_name)
11305 def Exec(self, feedback_fn):
11306 feedback_fn("Repairing storage unit '%s' on %s ..." %
11307 (self.op.name, self.op.node_name))
11309 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11310 result = self.rpc.call_storage_execute(self.op.node_name,
11311 self.op.storage_type, st_args,
11313 constants.SO_FIX_CONSISTENCY)
11314 result.Raise("Failed to repair storage unit '%s' on %s" %
11315 (self.op.name, self.op.node_name))
11318 class LUNodeEvacuate(NoHooksLU):
11319 """Evacuates instances off a list of nodes.
11324 _MODE2IALLOCATOR = {
11325 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11326 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11327 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11329 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11330 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11331 constants.IALLOCATOR_NEVAC_MODES)
11333 def CheckArguments(self):
11334 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11336 def ExpandNames(self):
11337 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11339 if self.op.remote_node is not None:
11340 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11341 assert self.op.remote_node
11343 if self.op.remote_node == self.op.node_name:
11344 raise errors.OpPrereqError("Can not use evacuated node as a new"
11345 " secondary node", errors.ECODE_INVAL)
11347 if self.op.mode != constants.NODE_EVAC_SEC:
11348 raise errors.OpPrereqError("Without the use of an iallocator only"
11349 " secondary instances can be evacuated",
11350 errors.ECODE_INVAL)
11353 self.share_locks = _ShareAll()
11354 self.needed_locks = {
11355 locking.LEVEL_INSTANCE: [],
11356 locking.LEVEL_NODEGROUP: [],
11357 locking.LEVEL_NODE: [],
11360 # Determine nodes (via group) optimistically, needs verification once locks
11361 # have been acquired
11362 self.lock_nodes = self._DetermineNodes()
11364 def _DetermineNodes(self):
11365 """Gets the list of nodes to operate on.
11368 if self.op.remote_node is None:
11369 # Iallocator will choose any node(s) in the same group
11370 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11372 group_nodes = frozenset([self.op.remote_node])
11374 # Determine nodes to be locked
11375 return set([self.op.node_name]) | group_nodes
11377 def _DetermineInstances(self):
11378 """Builds list of instances to operate on.
11381 assert self.op.mode in constants.NODE_EVAC_MODES
11383 if self.op.mode == constants.NODE_EVAC_PRI:
11384 # Primary instances only
11385 inst_fn = _GetNodePrimaryInstances
11386 assert self.op.remote_node is None, \
11387 "Evacuating primary instances requires iallocator"
11388 elif self.op.mode == constants.NODE_EVAC_SEC:
11389 # Secondary instances only
11390 inst_fn = _GetNodeSecondaryInstances
11393 assert self.op.mode == constants.NODE_EVAC_ALL
11394 inst_fn = _GetNodeInstances
11395 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11397 raise errors.OpPrereqError("Due to an issue with the iallocator"
11398 " interface it is not possible to evacuate"
11399 " all instances at once; specify explicitly"
11400 " whether to evacuate primary or secondary"
11402 errors.ECODE_INVAL)
11404 return inst_fn(self.cfg, self.op.node_name)
11406 def DeclareLocks(self, level):
11407 if level == locking.LEVEL_INSTANCE:
11408 # Lock instances optimistically, needs verification once node and group
11409 # locks have been acquired
11410 self.needed_locks[locking.LEVEL_INSTANCE] = \
11411 set(i.name for i in self._DetermineInstances())
11413 elif level == locking.LEVEL_NODEGROUP:
11414 # Lock node groups for all potential target nodes optimistically, needs
11415 # verification once nodes have been acquired
11416 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11417 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11419 elif level == locking.LEVEL_NODE:
11420 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11422 def CheckPrereq(self):
11424 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11425 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11426 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11428 need_nodes = self._DetermineNodes()
11430 if not owned_nodes.issuperset(need_nodes):
11431 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11432 " locks were acquired, current nodes are"
11433 " are '%s', used to be '%s'; retry the"
11435 (self.op.node_name,
11436 utils.CommaJoin(need_nodes),
11437 utils.CommaJoin(owned_nodes)),
11438 errors.ECODE_STATE)
11440 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11441 if owned_groups != wanted_groups:
11442 raise errors.OpExecError("Node groups changed since locks were acquired,"
11443 " current groups are '%s', used to be '%s';"
11444 " retry the operation" %
11445 (utils.CommaJoin(wanted_groups),
11446 utils.CommaJoin(owned_groups)))
11448 # Determine affected instances
11449 self.instances = self._DetermineInstances()
11450 self.instance_names = [i.name for i in self.instances]
11452 if set(self.instance_names) != owned_instances:
11453 raise errors.OpExecError("Instances on node '%s' changed since locks"
11454 " were acquired, current instances are '%s',"
11455 " used to be '%s'; retry the operation" %
11456 (self.op.node_name,
11457 utils.CommaJoin(self.instance_names),
11458 utils.CommaJoin(owned_instances)))
11460 if self.instance_names:
11461 self.LogInfo("Evacuating instances from node '%s': %s",
11463 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11465 self.LogInfo("No instances to evacuate from node '%s'",
11468 if self.op.remote_node is not None:
11469 for i in self.instances:
11470 if i.primary_node == self.op.remote_node:
11471 raise errors.OpPrereqError("Node %s is the primary node of"
11472 " instance %s, cannot use it as"
11474 (self.op.remote_node, i.name),
11475 errors.ECODE_INVAL)
11477 def Exec(self, feedback_fn):
11478 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11480 if not self.instance_names:
11481 # No instances to evacuate
11484 elif self.op.iallocator is not None:
11485 # TODO: Implement relocation to other group
11486 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11487 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11488 instances=list(self.instance_names))
11490 ial.Run(self.op.iallocator)
11492 if not ial.success:
11493 raise errors.OpPrereqError("Can't compute node evacuation using"
11494 " iallocator '%s': %s" %
11495 (self.op.iallocator, ial.info),
11496 errors.ECODE_NORES)
11498 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11500 elif self.op.remote_node is not None:
11501 assert self.op.mode == constants.NODE_EVAC_SEC
11503 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11504 remote_node=self.op.remote_node,
11506 mode=constants.REPLACE_DISK_CHG,
11507 early_release=self.op.early_release)]
11508 for instance_name in self.instance_names
11512 raise errors.ProgrammerError("No iallocator or remote node")
11514 return ResultWithJobs(jobs)
11517 def _SetOpEarlyRelease(early_release, op):
11518 """Sets C{early_release} flag on opcodes if available.
11522 op.early_release = early_release
11523 except AttributeError:
11524 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11529 def _NodeEvacDest(use_nodes, group, nodes):
11530 """Returns group or nodes depending on caller's choice.
11534 return utils.CommaJoin(nodes)
11539 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11540 """Unpacks the result of change-group and node-evacuate iallocator requests.
11542 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11543 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11545 @type lu: L{LogicalUnit}
11546 @param lu: Logical unit instance
11547 @type alloc_result: tuple/list
11548 @param alloc_result: Result from iallocator
11549 @type early_release: bool
11550 @param early_release: Whether to release locks early if possible
11551 @type use_nodes: bool
11552 @param use_nodes: Whether to display node names instead of groups
11555 (moved, failed, jobs) = alloc_result
11558 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11559 for (name, reason) in failed)
11560 lu.LogWarning("Unable to evacuate instances %s", failreason)
11561 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11564 lu.LogInfo("Instances to be moved: %s",
11565 utils.CommaJoin("%s (to %s)" %
11566 (name, _NodeEvacDest(use_nodes, group, nodes))
11567 for (name, group, nodes) in moved))
11569 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11570 map(opcodes.OpCode.LoadOpCode, ops))
11574 class LUInstanceGrowDisk(LogicalUnit):
11575 """Grow a disk of an instance.
11578 HPATH = "disk-grow"
11579 HTYPE = constants.HTYPE_INSTANCE
11582 def ExpandNames(self):
11583 self._ExpandAndLockInstance()
11584 self.needed_locks[locking.LEVEL_NODE] = []
11585 self.needed_locks[locking.LEVEL_NODE_RES] = []
11586 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11587 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11589 def DeclareLocks(self, level):
11590 if level == locking.LEVEL_NODE:
11591 self._LockInstancesNodes()
11592 elif level == locking.LEVEL_NODE_RES:
11594 self.needed_locks[locking.LEVEL_NODE_RES] = \
11595 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11597 def BuildHooksEnv(self):
11598 """Build hooks env.
11600 This runs on the master, the primary and all the secondaries.
11604 "DISK": self.op.disk,
11605 "AMOUNT": self.op.amount,
11606 "ABSOLUTE": self.op.absolute,
11608 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11611 def BuildHooksNodes(self):
11612 """Build hooks nodes.
11615 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11618 def CheckPrereq(self):
11619 """Check prerequisites.
11621 This checks that the instance is in the cluster.
11624 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11625 assert instance is not None, \
11626 "Cannot retrieve locked instance %s" % self.op.instance_name
11627 nodenames = list(instance.all_nodes)
11628 for node in nodenames:
11629 _CheckNodeOnline(self, node)
11631 self.instance = instance
11633 if instance.disk_template not in constants.DTS_GROWABLE:
11634 raise errors.OpPrereqError("Instance's disk layout does not support"
11635 " growing", errors.ECODE_INVAL)
11637 self.disk = instance.FindDisk(self.op.disk)
11639 if self.op.absolute:
11640 self.target = self.op.amount
11641 self.delta = self.target - self.disk.size
11643 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11644 "current disk size (%s)" %
11645 (utils.FormatUnit(self.target, "h"),
11646 utils.FormatUnit(self.disk.size, "h")),
11647 errors.ECODE_STATE)
11649 self.delta = self.op.amount
11650 self.target = self.disk.size + self.delta
11652 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11653 utils.FormatUnit(self.delta, "h"),
11654 errors.ECODE_INVAL)
11656 if instance.disk_template not in (constants.DT_FILE,
11657 constants.DT_SHARED_FILE,
11659 # TODO: check the free disk space for file, when that feature will be
11661 _CheckNodesFreeDiskPerVG(self, nodenames,
11662 self.disk.ComputeGrowth(self.delta))
11664 def Exec(self, feedback_fn):
11665 """Execute disk grow.
11668 instance = self.instance
11671 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11672 assert (self.owned_locks(locking.LEVEL_NODE) ==
11673 self.owned_locks(locking.LEVEL_NODE_RES))
11675 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11677 raise errors.OpExecError("Cannot activate block device to grow")
11679 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11680 (self.op.disk, instance.name,
11681 utils.FormatUnit(self.delta, "h"),
11682 utils.FormatUnit(self.target, "h")))
11684 # First run all grow ops in dry-run mode
11685 for node in instance.all_nodes:
11686 self.cfg.SetDiskID(disk, node)
11687 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11689 result.Raise("Grow request failed to node %s" % node)
11691 # We know that (as far as we can test) operations across different
11692 # nodes will succeed, time to run it for real
11693 for node in instance.all_nodes:
11694 self.cfg.SetDiskID(disk, node)
11695 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11697 result.Raise("Grow request failed to node %s" % node)
11699 # TODO: Rewrite code to work properly
11700 # DRBD goes into sync mode for a short amount of time after executing the
11701 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11702 # calling "resize" in sync mode fails. Sleeping for a short amount of
11703 # time is a work-around.
11706 disk.RecordGrow(self.delta)
11707 self.cfg.Update(instance, feedback_fn)
11709 # Changes have been recorded, release node lock
11710 _ReleaseLocks(self, locking.LEVEL_NODE)
11712 # Downgrade lock while waiting for sync
11713 self.glm.downgrade(locking.LEVEL_INSTANCE)
11715 if self.op.wait_for_sync:
11716 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11718 self.proc.LogWarning("Disk sync-ing has not returned a good"
11719 " status; please check the instance")
11720 if instance.admin_state != constants.ADMINST_UP:
11721 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11722 elif instance.admin_state != constants.ADMINST_UP:
11723 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11724 " not supposed to be running because no wait for"
11725 " sync mode was requested")
11727 assert self.owned_locks(locking.LEVEL_NODE_RES)
11728 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11731 class LUInstanceQueryData(NoHooksLU):
11732 """Query runtime instance data.
11737 def ExpandNames(self):
11738 self.needed_locks = {}
11740 # Use locking if requested or when non-static information is wanted
11741 if not (self.op.static or self.op.use_locking):
11742 self.LogWarning("Non-static data requested, locks need to be acquired")
11743 self.op.use_locking = True
11745 if self.op.instances or not self.op.use_locking:
11746 # Expand instance names right here
11747 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11749 # Will use acquired locks
11750 self.wanted_names = None
11752 if self.op.use_locking:
11753 self.share_locks = _ShareAll()
11755 if self.wanted_names is None:
11756 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11758 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11760 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11761 self.needed_locks[locking.LEVEL_NODE] = []
11762 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11764 def DeclareLocks(self, level):
11765 if self.op.use_locking:
11766 if level == locking.LEVEL_NODEGROUP:
11767 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11769 # Lock all groups used by instances optimistically; this requires going
11770 # via the node before it's locked, requiring verification later on
11771 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11772 frozenset(group_uuid
11773 for instance_name in owned_instances
11775 self.cfg.GetInstanceNodeGroups(instance_name))
11777 elif level == locking.LEVEL_NODE:
11778 self._LockInstancesNodes()
11780 def CheckPrereq(self):
11781 """Check prerequisites.
11783 This only checks the optional instance list against the existing names.
11786 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11787 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11788 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11790 if self.wanted_names is None:
11791 assert self.op.use_locking, "Locking was not used"
11792 self.wanted_names = owned_instances
11794 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11796 if self.op.use_locking:
11797 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11800 assert not (owned_instances or owned_groups or owned_nodes)
11802 self.wanted_instances = instances.values()
11804 def _ComputeBlockdevStatus(self, node, instance, dev):
11805 """Returns the status of a block device
11808 if self.op.static or not node:
11811 self.cfg.SetDiskID(dev, node)
11813 result = self.rpc.call_blockdev_find(node, dev)
11817 result.Raise("Can't compute disk status for %s" % instance.name)
11819 status = result.payload
11823 return (status.dev_path, status.major, status.minor,
11824 status.sync_percent, status.estimated_time,
11825 status.is_degraded, status.ldisk_status)
11827 def _ComputeDiskStatus(self, instance, snode, dev):
11828 """Compute block device status.
11831 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11833 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11835 def _ComputeDiskStatusInner(self, instance, snode, dev):
11836 """Compute block device status.
11838 @attention: The device has to be annotated already.
11841 if dev.dev_type in constants.LDS_DRBD:
11842 # we change the snode then (otherwise we use the one passed in)
11843 if dev.logical_id[0] == instance.primary_node:
11844 snode = dev.logical_id[1]
11846 snode = dev.logical_id[0]
11848 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11850 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11853 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11860 "iv_name": dev.iv_name,
11861 "dev_type": dev.dev_type,
11862 "logical_id": dev.logical_id,
11863 "physical_id": dev.physical_id,
11864 "pstatus": dev_pstatus,
11865 "sstatus": dev_sstatus,
11866 "children": dev_children,
11871 def Exec(self, feedback_fn):
11872 """Gather and return data"""
11875 cluster = self.cfg.GetClusterInfo()
11877 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11878 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11880 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11881 for node in nodes.values()))
11883 group2name_fn = lambda uuid: groups[uuid].name
11885 for instance in self.wanted_instances:
11886 pnode = nodes[instance.primary_node]
11888 if self.op.static or pnode.offline:
11889 remote_state = None
11891 self.LogWarning("Primary node %s is marked offline, returning static"
11892 " information only for instance %s" %
11893 (pnode.name, instance.name))
11895 remote_info = self.rpc.call_instance_info(instance.primary_node,
11897 instance.hypervisor)
11898 remote_info.Raise("Error checking node %s" % instance.primary_node)
11899 remote_info = remote_info.payload
11900 if remote_info and "state" in remote_info:
11901 remote_state = "up"
11903 if instance.admin_state == constants.ADMINST_UP:
11904 remote_state = "down"
11906 remote_state = instance.admin_state
11908 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11911 snodes_group_uuids = [nodes[snode_name].group
11912 for snode_name in instance.secondary_nodes]
11914 result[instance.name] = {
11915 "name": instance.name,
11916 "config_state": instance.admin_state,
11917 "run_state": remote_state,
11918 "pnode": instance.primary_node,
11919 "pnode_group_uuid": pnode.group,
11920 "pnode_group_name": group2name_fn(pnode.group),
11921 "snodes": instance.secondary_nodes,
11922 "snodes_group_uuids": snodes_group_uuids,
11923 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11925 # this happens to be the same format used for hooks
11926 "nics": _NICListToTuple(self, instance.nics),
11927 "disk_template": instance.disk_template,
11929 "hypervisor": instance.hypervisor,
11930 "network_port": instance.network_port,
11931 "hv_instance": instance.hvparams,
11932 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11933 "be_instance": instance.beparams,
11934 "be_actual": cluster.FillBE(instance),
11935 "os_instance": instance.osparams,
11936 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11937 "serial_no": instance.serial_no,
11938 "mtime": instance.mtime,
11939 "ctime": instance.ctime,
11940 "uuid": instance.uuid,
11946 def PrepareContainerMods(mods, private_fn):
11947 """Prepares a list of container modifications by adding a private data field.
11949 @type mods: list of tuples; (operation, index, parameters)
11950 @param mods: List of modifications
11951 @type private_fn: callable or None
11952 @param private_fn: Callable for constructing a private data field for a
11957 if private_fn is None:
11962 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11965 #: Type description for changes as returned by L{ApplyContainerMods}'s
11967 _TApplyContModsCbChanges = \
11968 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11969 ht.TNonEmptyString,
11974 def ApplyContainerMods(kind, container, chgdesc, mods,
11975 create_fn, modify_fn, remove_fn):
11976 """Applies descriptions in C{mods} to C{container}.
11979 @param kind: One-word item description
11980 @type container: list
11981 @param container: Container to modify
11982 @type chgdesc: None or list
11983 @param chgdesc: List of applied changes
11985 @param mods: Modifications as returned by L{PrepareContainerMods}
11986 @type create_fn: callable
11987 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11988 receives absolute item index, parameters and private data object as added
11989 by L{PrepareContainerMods}, returns tuple containing new item and changes
11991 @type modify_fn: callable
11992 @param modify_fn: Callback for modifying an existing item
11993 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11994 and private data object as added by L{PrepareContainerMods}, returns
11996 @type remove_fn: callable
11997 @param remove_fn: Callback on removing item; receives absolute item index,
11998 item and private data object as added by L{PrepareContainerMods}
12001 for (op, idx, params, private) in mods:
12004 absidx = len(container) - 1
12006 raise IndexError("Not accepting negative indices other than -1")
12007 elif idx > len(container):
12008 raise IndexError("Got %s index %s, but there are only %s" %
12009 (kind, idx, len(container)))
12015 if op == constants.DDM_ADD:
12016 # Calculate where item will be added
12018 addidx = len(container)
12022 if create_fn is None:
12025 (item, changes) = create_fn(addidx, params, private)
12028 container.append(item)
12031 assert idx <= len(container)
12032 # list.insert does so before the specified index
12033 container.insert(idx, item)
12035 # Retrieve existing item
12037 item = container[absidx]
12039 raise IndexError("Invalid %s index %s" % (kind, idx))
12041 if op == constants.DDM_REMOVE:
12044 if remove_fn is not None:
12045 remove_fn(absidx, item, private)
12047 changes = [("%s/%s" % (kind, absidx), "remove")]
12049 assert container[absidx] == item
12050 del container[absidx]
12051 elif op == constants.DDM_MODIFY:
12052 if modify_fn is not None:
12053 changes = modify_fn(absidx, item, params, private)
12055 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12057 assert _TApplyContModsCbChanges(changes)
12059 if not (chgdesc is None or changes is None):
12060 chgdesc.extend(changes)
12063 def _UpdateIvNames(base_index, disks):
12064 """Updates the C{iv_name} attribute of disks.
12066 @type disks: list of L{objects.Disk}
12069 for (idx, disk) in enumerate(disks):
12070 disk.iv_name = "disk/%s" % (base_index + idx, )
12073 class _InstNicModPrivate:
12074 """Data structure for network interface modifications.
12076 Used by L{LUInstanceSetParams}.
12079 def __init__(self):
12084 class LUInstanceSetParams(LogicalUnit):
12085 """Modifies an instances's parameters.
12088 HPATH = "instance-modify"
12089 HTYPE = constants.HTYPE_INSTANCE
12093 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12094 assert ht.TList(mods)
12095 assert not mods or len(mods[0]) in (2, 3)
12097 if mods and len(mods[0]) == 2:
12101 for op, params in mods:
12102 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12103 result.append((op, -1, params))
12107 raise errors.OpPrereqError("Only one %s add or remove operation is"
12108 " supported at a time" % kind,
12109 errors.ECODE_INVAL)
12111 result.append((constants.DDM_MODIFY, op, params))
12113 assert verify_fn(result)
12120 def _CheckMods(kind, mods, key_types, item_fn):
12121 """Ensures requested disk/NIC modifications are valid.
12124 for (op, _, params) in mods:
12125 assert ht.TDict(params)
12127 utils.ForceDictType(params, key_types)
12129 if op == constants.DDM_REMOVE:
12131 raise errors.OpPrereqError("No settings should be passed when"
12132 " removing a %s" % kind,
12133 errors.ECODE_INVAL)
12134 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12135 item_fn(op, params)
12137 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12140 def _VerifyDiskModification(op, params):
12141 """Verifies a disk modification.
12144 if op == constants.DDM_ADD:
12145 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12146 if mode not in constants.DISK_ACCESS_SET:
12147 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12148 errors.ECODE_INVAL)
12150 size = params.get(constants.IDISK_SIZE, None)
12152 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12153 constants.IDISK_SIZE, errors.ECODE_INVAL)
12157 except (TypeError, ValueError), err:
12158 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12159 errors.ECODE_INVAL)
12161 params[constants.IDISK_SIZE] = size
12163 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12164 raise errors.OpPrereqError("Disk size change not possible, use"
12165 " grow-disk", errors.ECODE_INVAL)
12168 def _VerifyNicModification(op, params):
12169 """Verifies a network interface modification.
12172 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12173 ip = params.get(constants.INIC_IP, None)
12176 elif ip.lower() == constants.VALUE_NONE:
12177 params[constants.INIC_IP] = None
12178 elif not netutils.IPAddress.IsValid(ip):
12179 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12180 errors.ECODE_INVAL)
12182 bridge = params.get("bridge", None)
12183 link = params.get(constants.INIC_LINK, None)
12184 if bridge and link:
12185 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12186 " at the same time", errors.ECODE_INVAL)
12187 elif bridge and bridge.lower() == constants.VALUE_NONE:
12188 params["bridge"] = None
12189 elif link and link.lower() == constants.VALUE_NONE:
12190 params[constants.INIC_LINK] = None
12192 if op == constants.DDM_ADD:
12193 macaddr = params.get(constants.INIC_MAC, None)
12194 if macaddr is None:
12195 params[constants.INIC_MAC] = constants.VALUE_AUTO
12197 if constants.INIC_MAC in params:
12198 macaddr = params[constants.INIC_MAC]
12199 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12200 macaddr = utils.NormalizeAndValidateMac(macaddr)
12202 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12203 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12204 " modifying an existing NIC",
12205 errors.ECODE_INVAL)
12207 def CheckArguments(self):
12208 if not (self.op.nics or self.op.disks or self.op.disk_template or
12209 self.op.hvparams or self.op.beparams or self.op.os_name or
12210 self.op.offline is not None or self.op.runtime_mem):
12211 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12213 if self.op.hvparams:
12214 _CheckGlobalHvParams(self.op.hvparams)
12217 self._UpgradeDiskNicMods("disk", self.op.disks,
12218 opcodes.OpInstanceSetParams.TestDiskModifications)
12220 self._UpgradeDiskNicMods("NIC", self.op.nics,
12221 opcodes.OpInstanceSetParams.TestNicModifications)
12223 # Check disk modifications
12224 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12225 self._VerifyDiskModification)
12227 if self.op.disks and self.op.disk_template is not None:
12228 raise errors.OpPrereqError("Disk template conversion and other disk"
12229 " changes not supported at the same time",
12230 errors.ECODE_INVAL)
12232 if (self.op.disk_template and
12233 self.op.disk_template in constants.DTS_INT_MIRROR and
12234 self.op.remote_node is None):
12235 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12236 " one requires specifying a secondary node",
12237 errors.ECODE_INVAL)
12239 # Check NIC modifications
12240 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12241 self._VerifyNicModification)
12243 def ExpandNames(self):
12244 self._ExpandAndLockInstance()
12245 # Can't even acquire node locks in shared mode as upcoming changes in
12246 # Ganeti 2.6 will start to modify the node object on disk conversion
12247 self.needed_locks[locking.LEVEL_NODE] = []
12248 self.needed_locks[locking.LEVEL_NODE_RES] = []
12249 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12251 def DeclareLocks(self, level):
12252 # TODO: Acquire group lock in shared mode (disk parameters)
12253 if level == locking.LEVEL_NODE:
12254 self._LockInstancesNodes()
12255 if self.op.disk_template and self.op.remote_node:
12256 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12257 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12258 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12260 self.needed_locks[locking.LEVEL_NODE_RES] = \
12261 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12263 def BuildHooksEnv(self):
12264 """Build hooks env.
12266 This runs on the master, primary and secondaries.
12270 if constants.BE_MINMEM in self.be_new:
12271 args["minmem"] = self.be_new[constants.BE_MINMEM]
12272 if constants.BE_MAXMEM in self.be_new:
12273 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12274 if constants.BE_VCPUS in self.be_new:
12275 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12276 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12277 # information at all.
12279 if self._new_nics is not None:
12282 for nic in self._new_nics:
12283 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12284 mode = nicparams[constants.NIC_MODE]
12285 link = nicparams[constants.NIC_LINK]
12286 nics.append((nic.ip, nic.mac, mode, link))
12288 args["nics"] = nics
12290 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12291 if self.op.disk_template:
12292 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12293 if self.op.runtime_mem:
12294 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12298 def BuildHooksNodes(self):
12299 """Build hooks nodes.
12302 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12305 def _PrepareNicModification(self, params, private, old_ip, old_params,
12307 update_params_dict = dict([(key, params[key])
12308 for key in constants.NICS_PARAMETERS
12311 if "bridge" in params:
12312 update_params_dict[constants.NIC_LINK] = params["bridge"]
12314 new_params = _GetUpdatedParams(old_params, update_params_dict)
12315 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12317 new_filled_params = cluster.SimpleFillNIC(new_params)
12318 objects.NIC.CheckParameterSyntax(new_filled_params)
12320 new_mode = new_filled_params[constants.NIC_MODE]
12321 if new_mode == constants.NIC_MODE_BRIDGED:
12322 bridge = new_filled_params[constants.NIC_LINK]
12323 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12325 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12327 self.warn.append(msg)
12329 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12331 elif new_mode == constants.NIC_MODE_ROUTED:
12332 ip = params.get(constants.INIC_IP, old_ip)
12334 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12335 " on a routed NIC", errors.ECODE_INVAL)
12337 if constants.INIC_MAC in params:
12338 mac = params[constants.INIC_MAC]
12340 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12341 errors.ECODE_INVAL)
12342 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12343 # otherwise generate the MAC address
12344 params[constants.INIC_MAC] = \
12345 self.cfg.GenerateMAC(self.proc.GetECId())
12347 # or validate/reserve the current one
12349 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12350 except errors.ReservationError:
12351 raise errors.OpPrereqError("MAC address '%s' already in use"
12352 " in cluster" % mac,
12353 errors.ECODE_NOTUNIQUE)
12355 private.params = new_params
12356 private.filled = new_filled_params
12358 def CheckPrereq(self):
12359 """Check prerequisites.
12361 This only checks the instance list against the existing names.
12364 # checking the new params on the primary/secondary nodes
12366 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12367 cluster = self.cluster = self.cfg.GetClusterInfo()
12368 assert self.instance is not None, \
12369 "Cannot retrieve locked instance %s" % self.op.instance_name
12370 pnode = instance.primary_node
12371 nodelist = list(instance.all_nodes)
12372 pnode_info = self.cfg.GetNodeInfo(pnode)
12373 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12375 # Prepare disk/NIC modifications
12376 self.diskmod = PrepareContainerMods(self.op.disks, None)
12377 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12380 if self.op.os_name and not self.op.force:
12381 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12382 self.op.force_variant)
12383 instance_os = self.op.os_name
12385 instance_os = instance.os
12387 assert not (self.op.disk_template and self.op.disks), \
12388 "Can't modify disk template and apply disk changes at the same time"
12390 if self.op.disk_template:
12391 if instance.disk_template == self.op.disk_template:
12392 raise errors.OpPrereqError("Instance already has disk template %s" %
12393 instance.disk_template, errors.ECODE_INVAL)
12395 if (instance.disk_template,
12396 self.op.disk_template) not in self._DISK_CONVERSIONS:
12397 raise errors.OpPrereqError("Unsupported disk template conversion from"
12398 " %s to %s" % (instance.disk_template,
12399 self.op.disk_template),
12400 errors.ECODE_INVAL)
12401 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12402 msg="cannot change disk template")
12403 if self.op.disk_template in constants.DTS_INT_MIRROR:
12404 if self.op.remote_node == pnode:
12405 raise errors.OpPrereqError("Given new secondary node %s is the same"
12406 " as the primary node of the instance" %
12407 self.op.remote_node, errors.ECODE_STATE)
12408 _CheckNodeOnline(self, self.op.remote_node)
12409 _CheckNodeNotDrained(self, self.op.remote_node)
12410 # FIXME: here we assume that the old instance type is DT_PLAIN
12411 assert instance.disk_template == constants.DT_PLAIN
12412 disks = [{constants.IDISK_SIZE: d.size,
12413 constants.IDISK_VG: d.logical_id[0]}
12414 for d in instance.disks]
12415 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12416 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12418 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12419 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12420 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12421 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12422 ignore=self.op.ignore_ipolicy)
12423 if pnode_info.group != snode_info.group:
12424 self.LogWarning("The primary and secondary nodes are in two"
12425 " different node groups; the disk parameters"
12426 " from the first disk's node group will be"
12429 # hvparams processing
12430 if self.op.hvparams:
12431 hv_type = instance.hypervisor
12432 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12433 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12434 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12437 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12438 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12439 self.hv_proposed = self.hv_new = hv_new # the new actual values
12440 self.hv_inst = i_hvdict # the new dict (without defaults)
12442 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12444 self.hv_new = self.hv_inst = {}
12446 # beparams processing
12447 if self.op.beparams:
12448 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12450 objects.UpgradeBeParams(i_bedict)
12451 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12452 be_new = cluster.SimpleFillBE(i_bedict)
12453 self.be_proposed = self.be_new = be_new # the new actual values
12454 self.be_inst = i_bedict # the new dict (without defaults)
12456 self.be_new = self.be_inst = {}
12457 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12458 be_old = cluster.FillBE(instance)
12460 # CPU param validation -- checking every time a parameter is
12461 # changed to cover all cases where either CPU mask or vcpus have
12463 if (constants.BE_VCPUS in self.be_proposed and
12464 constants.HV_CPU_MASK in self.hv_proposed):
12466 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12467 # Verify mask is consistent with number of vCPUs. Can skip this
12468 # test if only 1 entry in the CPU mask, which means same mask
12469 # is applied to all vCPUs.
12470 if (len(cpu_list) > 1 and
12471 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12472 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12474 (self.be_proposed[constants.BE_VCPUS],
12475 self.hv_proposed[constants.HV_CPU_MASK]),
12476 errors.ECODE_INVAL)
12478 # Only perform this test if a new CPU mask is given
12479 if constants.HV_CPU_MASK in self.hv_new:
12480 # Calculate the largest CPU number requested
12481 max_requested_cpu = max(map(max, cpu_list))
12482 # Check that all of the instance's nodes have enough physical CPUs to
12483 # satisfy the requested CPU mask
12484 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12485 max_requested_cpu + 1, instance.hypervisor)
12487 # osparams processing
12488 if self.op.osparams:
12489 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12490 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12491 self.os_inst = i_osdict # the new dict (without defaults)
12497 #TODO(dynmem): do the appropriate check involving MINMEM
12498 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12499 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12500 mem_check_list = [pnode]
12501 if be_new[constants.BE_AUTO_BALANCE]:
12502 # either we changed auto_balance to yes or it was from before
12503 mem_check_list.extend(instance.secondary_nodes)
12504 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12505 instance.hypervisor)
12506 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12507 [instance.hypervisor])
12508 pninfo = nodeinfo[pnode]
12509 msg = pninfo.fail_msg
12511 # Assume the primary node is unreachable and go ahead
12512 self.warn.append("Can't get info from primary node %s: %s" %
12515 (_, _, (pnhvinfo, )) = pninfo.payload
12516 if not isinstance(pnhvinfo.get("memory_free", None), int):
12517 self.warn.append("Node data from primary node %s doesn't contain"
12518 " free memory information" % pnode)
12519 elif instance_info.fail_msg:
12520 self.warn.append("Can't get instance runtime information: %s" %
12521 instance_info.fail_msg)
12523 if instance_info.payload:
12524 current_mem = int(instance_info.payload["memory"])
12526 # Assume instance not running
12527 # (there is a slight race condition here, but it's not very
12528 # probable, and we have no other way to check)
12529 # TODO: Describe race condition
12531 #TODO(dynmem): do the appropriate check involving MINMEM
12532 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12533 pnhvinfo["memory_free"])
12535 raise errors.OpPrereqError("This change will prevent the instance"
12536 " from starting, due to %d MB of memory"
12537 " missing on its primary node" %
12539 errors.ECODE_NORES)
12541 if be_new[constants.BE_AUTO_BALANCE]:
12542 for node, nres in nodeinfo.items():
12543 if node not in instance.secondary_nodes:
12545 nres.Raise("Can't get info from secondary node %s" % node,
12546 prereq=True, ecode=errors.ECODE_STATE)
12547 (_, _, (nhvinfo, )) = nres.payload
12548 if not isinstance(nhvinfo.get("memory_free", None), int):
12549 raise errors.OpPrereqError("Secondary node %s didn't return free"
12550 " memory information" % node,
12551 errors.ECODE_STATE)
12552 #TODO(dynmem): do the appropriate check involving MINMEM
12553 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12554 raise errors.OpPrereqError("This change will prevent the instance"
12555 " from failover to its secondary node"
12556 " %s, due to not enough memory" % node,
12557 errors.ECODE_STATE)
12559 if self.op.runtime_mem:
12560 remote_info = self.rpc.call_instance_info(instance.primary_node,
12562 instance.hypervisor)
12563 remote_info.Raise("Error checking node %s" % instance.primary_node)
12564 if not remote_info.payload: # not running already
12565 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12566 errors.ECODE_STATE)
12568 current_memory = remote_info.payload["memory"]
12569 if (not self.op.force and
12570 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12571 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12572 raise errors.OpPrereqError("Instance %s must have memory between %d"
12573 " and %d MB of memory unless --force is"
12574 " given" % (instance.name,
12575 self.be_proposed[constants.BE_MINMEM],
12576 self.be_proposed[constants.BE_MAXMEM]),
12577 errors.ECODE_INVAL)
12579 if self.op.runtime_mem > current_memory:
12580 _CheckNodeFreeMemory(self, instance.primary_node,
12581 "ballooning memory for instance %s" %
12583 self.op.memory - current_memory,
12584 instance.hypervisor)
12586 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12587 raise errors.OpPrereqError("Disk operations not supported for"
12588 " diskless instances",
12589 errors.ECODE_INVAL)
12591 def _PrepareNicCreate(_, params, private):
12592 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12593 return (None, None)
12595 def _PrepareNicMod(_, nic, params, private):
12596 self._PrepareNicModification(params, private, nic.ip,
12597 nic.nicparams, cluster, pnode)
12600 # Verify NIC changes (operating on copy)
12601 nics = instance.nics[:]
12602 ApplyContainerMods("NIC", nics, None, self.nicmod,
12603 _PrepareNicCreate, _PrepareNicMod, None)
12604 if len(nics) > constants.MAX_NICS:
12605 raise errors.OpPrereqError("Instance has too many network interfaces"
12606 " (%d), cannot add more" % constants.MAX_NICS,
12607 errors.ECODE_STATE)
12609 # Verify disk changes (operating on a copy)
12610 disks = instance.disks[:]
12611 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12612 if len(disks) > constants.MAX_DISKS:
12613 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12614 " more" % constants.MAX_DISKS,
12615 errors.ECODE_STATE)
12617 if self.op.offline is not None:
12618 if self.op.offline:
12619 msg = "can't change to offline"
12621 msg = "can't change to online"
12622 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12624 # Pre-compute NIC changes (necessary to use result in hooks)
12625 self._nic_chgdesc = []
12627 # Operate on copies as this is still in prereq
12628 nics = [nic.Copy() for nic in instance.nics]
12629 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12630 self._CreateNewNic, self._ApplyNicMods, None)
12631 self._new_nics = nics
12633 self._new_nics = None
12635 def _ConvertPlainToDrbd(self, feedback_fn):
12636 """Converts an instance from plain to drbd.
12639 feedback_fn("Converting template to drbd")
12640 instance = self.instance
12641 pnode = instance.primary_node
12642 snode = self.op.remote_node
12644 assert instance.disk_template == constants.DT_PLAIN
12646 # create a fake disk info for _GenerateDiskTemplate
12647 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12648 constants.IDISK_VG: d.logical_id[0]}
12649 for d in instance.disks]
12650 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12651 instance.name, pnode, [snode],
12652 disk_info, None, None, 0, feedback_fn,
12654 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12656 info = _GetInstanceInfoText(instance)
12657 feedback_fn("Creating additional volumes...")
12658 # first, create the missing data and meta devices
12659 for disk in anno_disks:
12660 # unfortunately this is... not too nice
12661 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12663 for child in disk.children:
12664 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12665 # at this stage, all new LVs have been created, we can rename the
12667 feedback_fn("Renaming original volumes...")
12668 rename_list = [(o, n.children[0].logical_id)
12669 for (o, n) in zip(instance.disks, new_disks)]
12670 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12671 result.Raise("Failed to rename original LVs")
12673 feedback_fn("Initializing DRBD devices...")
12674 # all child devices are in place, we can now create the DRBD devices
12675 for disk in anno_disks:
12676 for node in [pnode, snode]:
12677 f_create = node == pnode
12678 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12680 # at this point, the instance has been modified
12681 instance.disk_template = constants.DT_DRBD8
12682 instance.disks = new_disks
12683 self.cfg.Update(instance, feedback_fn)
12685 # Release node locks while waiting for sync
12686 _ReleaseLocks(self, locking.LEVEL_NODE)
12688 # disks are created, waiting for sync
12689 disk_abort = not _WaitForSync(self, instance,
12690 oneshot=not self.op.wait_for_sync)
12692 raise errors.OpExecError("There are some degraded disks for"
12693 " this instance, please cleanup manually")
12695 # Node resource locks will be released by caller
12697 def _ConvertDrbdToPlain(self, feedback_fn):
12698 """Converts an instance from drbd to plain.
12701 instance = self.instance
12703 assert len(instance.secondary_nodes) == 1
12704 assert instance.disk_template == constants.DT_DRBD8
12706 pnode = instance.primary_node
12707 snode = instance.secondary_nodes[0]
12708 feedback_fn("Converting template to plain")
12710 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12711 new_disks = [d.children[0] for d in instance.disks]
12713 # copy over size and mode
12714 for parent, child in zip(old_disks, new_disks):
12715 child.size = parent.size
12716 child.mode = parent.mode
12718 # this is a DRBD disk, return its port to the pool
12719 # NOTE: this must be done right before the call to cfg.Update!
12720 for disk in old_disks:
12721 tcp_port = disk.logical_id[2]
12722 self.cfg.AddTcpUdpPort(tcp_port)
12724 # update instance structure
12725 instance.disks = new_disks
12726 instance.disk_template = constants.DT_PLAIN
12727 self.cfg.Update(instance, feedback_fn)
12729 # Release locks in case removing disks takes a while
12730 _ReleaseLocks(self, locking.LEVEL_NODE)
12732 feedback_fn("Removing volumes on the secondary node...")
12733 for disk in old_disks:
12734 self.cfg.SetDiskID(disk, snode)
12735 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12737 self.LogWarning("Could not remove block device %s on node %s,"
12738 " continuing anyway: %s", disk.iv_name, snode, msg)
12740 feedback_fn("Removing unneeded volumes on the primary node...")
12741 for idx, disk in enumerate(old_disks):
12742 meta = disk.children[1]
12743 self.cfg.SetDiskID(meta, pnode)
12744 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12746 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12747 " continuing anyway: %s", idx, pnode, msg)
12749 def _CreateNewDisk(self, idx, params, _):
12750 """Creates a new disk.
12753 instance = self.instance
12756 if instance.disk_template in constants.DTS_FILEBASED:
12757 (file_driver, file_path) = instance.disks[0].logical_id
12758 file_path = os.path.dirname(file_path)
12760 file_driver = file_path = None
12763 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12764 instance.primary_node, instance.secondary_nodes,
12765 [params], file_path, file_driver, idx,
12766 self.Log, self.diskparams)[0]
12768 info = _GetInstanceInfoText(instance)
12770 logging.info("Creating volume %s for instance %s",
12771 disk.iv_name, instance.name)
12772 # Note: this needs to be kept in sync with _CreateDisks
12774 for node in instance.all_nodes:
12775 f_create = (node == instance.primary_node)
12777 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12778 except errors.OpExecError, err:
12779 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12780 disk.iv_name, disk, node, err)
12783 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12787 def _ModifyDisk(idx, disk, params, _):
12788 """Modifies a disk.
12791 disk.mode = params[constants.IDISK_MODE]
12794 ("disk.mode/%d" % idx, disk.mode),
12797 def _RemoveDisk(self, idx, root, _):
12801 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12802 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12803 self.cfg.SetDiskID(disk, node)
12804 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12806 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12807 " continuing anyway", idx, node, msg)
12809 # if this is a DRBD disk, return its port to the pool
12810 if root.dev_type in constants.LDS_DRBD:
12811 self.cfg.AddTcpUdpPort(root.logical_id[2])
12814 def _CreateNewNic(idx, params, private):
12815 """Creates data structure for a new network interface.
12818 mac = params[constants.INIC_MAC]
12819 ip = params.get(constants.INIC_IP, None)
12820 nicparams = private.params
12822 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12824 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12825 (mac, ip, private.filled[constants.NIC_MODE],
12826 private.filled[constants.NIC_LINK])),
12830 def _ApplyNicMods(idx, nic, params, private):
12831 """Modifies a network interface.
12836 for key in [constants.INIC_MAC, constants.INIC_IP]:
12838 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12839 setattr(nic, key, params[key])
12842 nic.nicparams = private.params
12844 for (key, val) in params.items():
12845 changes.append(("nic.%s/%d" % (key, idx), val))
12849 def Exec(self, feedback_fn):
12850 """Modifies an instance.
12852 All parameters take effect only at the next restart of the instance.
12855 # Process here the warnings from CheckPrereq, as we don't have a
12856 # feedback_fn there.
12857 # TODO: Replace with self.LogWarning
12858 for warn in self.warn:
12859 feedback_fn("WARNING: %s" % warn)
12861 assert ((self.op.disk_template is None) ^
12862 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12863 "Not owning any node resource locks"
12866 instance = self.instance
12869 if self.op.runtime_mem:
12870 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12872 self.op.runtime_mem)
12873 rpcres.Raise("Cannot modify instance runtime memory")
12874 result.append(("runtime_memory", self.op.runtime_mem))
12876 # Apply disk changes
12877 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12878 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12879 _UpdateIvNames(0, instance.disks)
12881 if self.op.disk_template:
12883 check_nodes = set(instance.all_nodes)
12884 if self.op.remote_node:
12885 check_nodes.add(self.op.remote_node)
12886 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12887 owned = self.owned_locks(level)
12888 assert not (check_nodes - owned), \
12889 ("Not owning the correct locks, owning %r, expected at least %r" %
12890 (owned, check_nodes))
12892 r_shut = _ShutdownInstanceDisks(self, instance)
12894 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12895 " proceed with disk template conversion")
12896 mode = (instance.disk_template, self.op.disk_template)
12898 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12900 self.cfg.ReleaseDRBDMinors(instance.name)
12902 result.append(("disk_template", self.op.disk_template))
12904 assert instance.disk_template == self.op.disk_template, \
12905 ("Expected disk template '%s', found '%s'" %
12906 (self.op.disk_template, instance.disk_template))
12908 # Release node and resource locks if there are any (they might already have
12909 # been released during disk conversion)
12910 _ReleaseLocks(self, locking.LEVEL_NODE)
12911 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12913 # Apply NIC changes
12914 if self._new_nics is not None:
12915 instance.nics = self._new_nics
12916 result.extend(self._nic_chgdesc)
12919 if self.op.hvparams:
12920 instance.hvparams = self.hv_inst
12921 for key, val in self.op.hvparams.iteritems():
12922 result.append(("hv/%s" % key, val))
12925 if self.op.beparams:
12926 instance.beparams = self.be_inst
12927 for key, val in self.op.beparams.iteritems():
12928 result.append(("be/%s" % key, val))
12931 if self.op.os_name:
12932 instance.os = self.op.os_name
12935 if self.op.osparams:
12936 instance.osparams = self.os_inst
12937 for key, val in self.op.osparams.iteritems():
12938 result.append(("os/%s" % key, val))
12940 if self.op.offline is None:
12943 elif self.op.offline:
12944 # Mark instance as offline
12945 self.cfg.MarkInstanceOffline(instance.name)
12946 result.append(("admin_state", constants.ADMINST_OFFLINE))
12948 # Mark instance as online, but stopped
12949 self.cfg.MarkInstanceDown(instance.name)
12950 result.append(("admin_state", constants.ADMINST_DOWN))
12952 self.cfg.Update(instance, feedback_fn)
12954 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12955 self.owned_locks(locking.LEVEL_NODE)), \
12956 "All node locks should have been released by now"
12960 _DISK_CONVERSIONS = {
12961 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12962 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12966 class LUInstanceChangeGroup(LogicalUnit):
12967 HPATH = "instance-change-group"
12968 HTYPE = constants.HTYPE_INSTANCE
12971 def ExpandNames(self):
12972 self.share_locks = _ShareAll()
12973 self.needed_locks = {
12974 locking.LEVEL_NODEGROUP: [],
12975 locking.LEVEL_NODE: [],
12978 self._ExpandAndLockInstance()
12980 if self.op.target_groups:
12981 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12982 self.op.target_groups)
12984 self.req_target_uuids = None
12986 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12988 def DeclareLocks(self, level):
12989 if level == locking.LEVEL_NODEGROUP:
12990 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12992 if self.req_target_uuids:
12993 lock_groups = set(self.req_target_uuids)
12995 # Lock all groups used by instance optimistically; this requires going
12996 # via the node before it's locked, requiring verification later on
12997 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12998 lock_groups.update(instance_groups)
13000 # No target groups, need to lock all of them
13001 lock_groups = locking.ALL_SET
13003 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13005 elif level == locking.LEVEL_NODE:
13006 if self.req_target_uuids:
13007 # Lock all nodes used by instances
13008 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13009 self._LockInstancesNodes()
13011 # Lock all nodes in all potential target groups
13012 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13013 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13014 member_nodes = [node_name
13015 for group in lock_groups
13016 for node_name in self.cfg.GetNodeGroup(group).members]
13017 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13019 # Lock all nodes as all groups are potential targets
13020 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13022 def CheckPrereq(self):
13023 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13024 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13025 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13027 assert (self.req_target_uuids is None or
13028 owned_groups.issuperset(self.req_target_uuids))
13029 assert owned_instances == set([self.op.instance_name])
13031 # Get instance information
13032 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13034 # Check if node groups for locked instance are still correct
13035 assert owned_nodes.issuperset(self.instance.all_nodes), \
13036 ("Instance %s's nodes changed while we kept the lock" %
13037 self.op.instance_name)
13039 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13042 if self.req_target_uuids:
13043 # User requested specific target groups
13044 self.target_uuids = frozenset(self.req_target_uuids)
13046 # All groups except those used by the instance are potential targets
13047 self.target_uuids = owned_groups - inst_groups
13049 conflicting_groups = self.target_uuids & inst_groups
13050 if conflicting_groups:
13051 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13052 " used by the instance '%s'" %
13053 (utils.CommaJoin(conflicting_groups),
13054 self.op.instance_name),
13055 errors.ECODE_INVAL)
13057 if not self.target_uuids:
13058 raise errors.OpPrereqError("There are no possible target groups",
13059 errors.ECODE_INVAL)
13061 def BuildHooksEnv(self):
13062 """Build hooks env.
13065 assert self.target_uuids
13068 "TARGET_GROUPS": " ".join(self.target_uuids),
13071 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13075 def BuildHooksNodes(self):
13076 """Build hooks nodes.
13079 mn = self.cfg.GetMasterNode()
13080 return ([mn], [mn])
13082 def Exec(self, feedback_fn):
13083 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13085 assert instances == [self.op.instance_name], "Instance not locked"
13087 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13088 instances=instances, target_groups=list(self.target_uuids))
13090 ial.Run(self.op.iallocator)
13092 if not ial.success:
13093 raise errors.OpPrereqError("Can't compute solution for changing group of"
13094 " instance '%s' using iallocator '%s': %s" %
13095 (self.op.instance_name, self.op.iallocator,
13097 errors.ECODE_NORES)
13099 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13101 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13102 " instance '%s'", len(jobs), self.op.instance_name)
13104 return ResultWithJobs(jobs)
13107 class LUBackupQuery(NoHooksLU):
13108 """Query the exports list
13113 def CheckArguments(self):
13114 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13115 ["node", "export"], self.op.use_locking)
13117 def ExpandNames(self):
13118 self.expq.ExpandNames(self)
13120 def DeclareLocks(self, level):
13121 self.expq.DeclareLocks(self, level)
13123 def Exec(self, feedback_fn):
13126 for (node, expname) in self.expq.OldStyleQuery(self):
13127 if expname is None:
13128 result[node] = False
13130 result.setdefault(node, []).append(expname)
13135 class _ExportQuery(_QueryBase):
13136 FIELDS = query.EXPORT_FIELDS
13138 #: The node name is not a unique key for this query
13139 SORT_FIELD = "node"
13141 def ExpandNames(self, lu):
13142 lu.needed_locks = {}
13144 # The following variables interact with _QueryBase._GetNames
13146 self.wanted = _GetWantedNodes(lu, self.names)
13148 self.wanted = locking.ALL_SET
13150 self.do_locking = self.use_locking
13152 if self.do_locking:
13153 lu.share_locks = _ShareAll()
13154 lu.needed_locks = {
13155 locking.LEVEL_NODE: self.wanted,
13158 def DeclareLocks(self, lu, level):
13161 def _GetQueryData(self, lu):
13162 """Computes the list of nodes and their attributes.
13165 # Locking is not used
13167 assert not (compat.any(lu.glm.is_owned(level)
13168 for level in locking.LEVELS
13169 if level != locking.LEVEL_CLUSTER) or
13170 self.do_locking or self.use_locking)
13172 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13176 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13178 result.append((node, None))
13180 result.extend((node, expname) for expname in nres.payload)
13185 class LUBackupPrepare(NoHooksLU):
13186 """Prepares an instance for an export and returns useful information.
13191 def ExpandNames(self):
13192 self._ExpandAndLockInstance()
13194 def CheckPrereq(self):
13195 """Check prerequisites.
13198 instance_name = self.op.instance_name
13200 self.instance = self.cfg.GetInstanceInfo(instance_name)
13201 assert self.instance is not None, \
13202 "Cannot retrieve locked instance %s" % self.op.instance_name
13203 _CheckNodeOnline(self, self.instance.primary_node)
13205 self._cds = _GetClusterDomainSecret()
13207 def Exec(self, feedback_fn):
13208 """Prepares an instance for an export.
13211 instance = self.instance
13213 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13214 salt = utils.GenerateSecret(8)
13216 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13217 result = self.rpc.call_x509_cert_create(instance.primary_node,
13218 constants.RIE_CERT_VALIDITY)
13219 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13221 (name, cert_pem) = result.payload
13223 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13227 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13228 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13230 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13236 class LUBackupExport(LogicalUnit):
13237 """Export an instance to an image in the cluster.
13240 HPATH = "instance-export"
13241 HTYPE = constants.HTYPE_INSTANCE
13244 def CheckArguments(self):
13245 """Check the arguments.
13248 self.x509_key_name = self.op.x509_key_name
13249 self.dest_x509_ca_pem = self.op.destination_x509_ca
13251 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13252 if not self.x509_key_name:
13253 raise errors.OpPrereqError("Missing X509 key name for encryption",
13254 errors.ECODE_INVAL)
13256 if not self.dest_x509_ca_pem:
13257 raise errors.OpPrereqError("Missing destination X509 CA",
13258 errors.ECODE_INVAL)
13260 def ExpandNames(self):
13261 self._ExpandAndLockInstance()
13263 # Lock all nodes for local exports
13264 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13265 # FIXME: lock only instance primary and destination node
13267 # Sad but true, for now we have do lock all nodes, as we don't know where
13268 # the previous export might be, and in this LU we search for it and
13269 # remove it from its current node. In the future we could fix this by:
13270 # - making a tasklet to search (share-lock all), then create the
13271 # new one, then one to remove, after
13272 # - removing the removal operation altogether
13273 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13275 def DeclareLocks(self, level):
13276 """Last minute lock declaration."""
13277 # All nodes are locked anyway, so nothing to do here.
13279 def BuildHooksEnv(self):
13280 """Build hooks env.
13282 This will run on the master, primary node and target node.
13286 "EXPORT_MODE": self.op.mode,
13287 "EXPORT_NODE": self.op.target_node,
13288 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13289 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13290 # TODO: Generic function for boolean env variables
13291 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13294 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13298 def BuildHooksNodes(self):
13299 """Build hooks nodes.
13302 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13304 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13305 nl.append(self.op.target_node)
13309 def CheckPrereq(self):
13310 """Check prerequisites.
13312 This checks that the instance and node names are valid.
13315 instance_name = self.op.instance_name
13317 self.instance = self.cfg.GetInstanceInfo(instance_name)
13318 assert self.instance is not None, \
13319 "Cannot retrieve locked instance %s" % self.op.instance_name
13320 _CheckNodeOnline(self, self.instance.primary_node)
13322 if (self.op.remove_instance and
13323 self.instance.admin_state == constants.ADMINST_UP and
13324 not self.op.shutdown):
13325 raise errors.OpPrereqError("Can not remove instance without shutting it"
13328 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13329 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13330 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13331 assert self.dst_node is not None
13333 _CheckNodeOnline(self, self.dst_node.name)
13334 _CheckNodeNotDrained(self, self.dst_node.name)
13337 self.dest_disk_info = None
13338 self.dest_x509_ca = None
13340 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13341 self.dst_node = None
13343 if len(self.op.target_node) != len(self.instance.disks):
13344 raise errors.OpPrereqError(("Received destination information for %s"
13345 " disks, but instance %s has %s disks") %
13346 (len(self.op.target_node), instance_name,
13347 len(self.instance.disks)),
13348 errors.ECODE_INVAL)
13350 cds = _GetClusterDomainSecret()
13352 # Check X509 key name
13354 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13355 except (TypeError, ValueError), err:
13356 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13358 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13359 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13360 errors.ECODE_INVAL)
13362 # Load and verify CA
13364 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13365 except OpenSSL.crypto.Error, err:
13366 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13367 (err, ), errors.ECODE_INVAL)
13369 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13370 if errcode is not None:
13371 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13372 (msg, ), errors.ECODE_INVAL)
13374 self.dest_x509_ca = cert
13376 # Verify target information
13378 for idx, disk_data in enumerate(self.op.target_node):
13380 (host, port, magic) = \
13381 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13382 except errors.GenericError, err:
13383 raise errors.OpPrereqError("Target info for disk %s: %s" %
13384 (idx, err), errors.ECODE_INVAL)
13386 disk_info.append((host, port, magic))
13388 assert len(disk_info) == len(self.op.target_node)
13389 self.dest_disk_info = disk_info
13392 raise errors.ProgrammerError("Unhandled export mode %r" %
13395 # instance disk type verification
13396 # TODO: Implement export support for file-based disks
13397 for disk in self.instance.disks:
13398 if disk.dev_type == constants.LD_FILE:
13399 raise errors.OpPrereqError("Export not supported for instances with"
13400 " file-based disks", errors.ECODE_INVAL)
13402 def _CleanupExports(self, feedback_fn):
13403 """Removes exports of current instance from all other nodes.
13405 If an instance in a cluster with nodes A..D was exported to node C, its
13406 exports will be removed from the nodes A, B and D.
13409 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13411 nodelist = self.cfg.GetNodeList()
13412 nodelist.remove(self.dst_node.name)
13414 # on one-node clusters nodelist will be empty after the removal
13415 # if we proceed the backup would be removed because OpBackupQuery
13416 # substitutes an empty list with the full cluster node list.
13417 iname = self.instance.name
13419 feedback_fn("Removing old exports for instance %s" % iname)
13420 exportlist = self.rpc.call_export_list(nodelist)
13421 for node in exportlist:
13422 if exportlist[node].fail_msg:
13424 if iname in exportlist[node].payload:
13425 msg = self.rpc.call_export_remove(node, iname).fail_msg
13427 self.LogWarning("Could not remove older export for instance %s"
13428 " on node %s: %s", iname, node, msg)
13430 def Exec(self, feedback_fn):
13431 """Export an instance to an image in the cluster.
13434 assert self.op.mode in constants.EXPORT_MODES
13436 instance = self.instance
13437 src_node = instance.primary_node
13439 if self.op.shutdown:
13440 # shutdown the instance, but not the disks
13441 feedback_fn("Shutting down instance %s" % instance.name)
13442 result = self.rpc.call_instance_shutdown(src_node, instance,
13443 self.op.shutdown_timeout)
13444 # TODO: Maybe ignore failures if ignore_remove_failures is set
13445 result.Raise("Could not shutdown instance %s on"
13446 " node %s" % (instance.name, src_node))
13448 # set the disks ID correctly since call_instance_start needs the
13449 # correct drbd minor to create the symlinks
13450 for disk in instance.disks:
13451 self.cfg.SetDiskID(disk, src_node)
13453 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13456 # Activate the instance disks if we'exporting a stopped instance
13457 feedback_fn("Activating disks for %s" % instance.name)
13458 _StartInstanceDisks(self, instance, None)
13461 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13464 helper.CreateSnapshots()
13466 if (self.op.shutdown and
13467 instance.admin_state == constants.ADMINST_UP and
13468 not self.op.remove_instance):
13469 assert not activate_disks
13470 feedback_fn("Starting instance %s" % instance.name)
13471 result = self.rpc.call_instance_start(src_node,
13472 (instance, None, None), False)
13473 msg = result.fail_msg
13475 feedback_fn("Failed to start instance: %s" % msg)
13476 _ShutdownInstanceDisks(self, instance)
13477 raise errors.OpExecError("Could not start instance: %s" % msg)
13479 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13480 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13481 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13482 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13483 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13485 (key_name, _, _) = self.x509_key_name
13488 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13491 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13492 key_name, dest_ca_pem,
13497 # Check for backwards compatibility
13498 assert len(dresults) == len(instance.disks)
13499 assert compat.all(isinstance(i, bool) for i in dresults), \
13500 "Not all results are boolean: %r" % dresults
13504 feedback_fn("Deactivating disks for %s" % instance.name)
13505 _ShutdownInstanceDisks(self, instance)
13507 if not (compat.all(dresults) and fin_resu):
13510 failures.append("export finalization")
13511 if not compat.all(dresults):
13512 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13514 failures.append("disk export: disk(s) %s" % fdsk)
13516 raise errors.OpExecError("Export failed, errors in %s" %
13517 utils.CommaJoin(failures))
13519 # At this point, the export was successful, we can cleanup/finish
13521 # Remove instance if requested
13522 if self.op.remove_instance:
13523 feedback_fn("Removing instance %s" % instance.name)
13524 _RemoveInstance(self, feedback_fn, instance,
13525 self.op.ignore_remove_failures)
13527 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13528 self._CleanupExports(feedback_fn)
13530 return fin_resu, dresults
13533 class LUBackupRemove(NoHooksLU):
13534 """Remove exports related to the named instance.
13539 def ExpandNames(self):
13540 self.needed_locks = {}
13541 # We need all nodes to be locked in order for RemoveExport to work, but we
13542 # don't need to lock the instance itself, as nothing will happen to it (and
13543 # we can remove exports also for a removed instance)
13544 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13546 def Exec(self, feedback_fn):
13547 """Remove any export.
13550 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13551 # If the instance was not found we'll try with the name that was passed in.
13552 # This will only work if it was an FQDN, though.
13554 if not instance_name:
13556 instance_name = self.op.instance_name
13558 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13559 exportlist = self.rpc.call_export_list(locked_nodes)
13561 for node in exportlist:
13562 msg = exportlist[node].fail_msg
13564 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13566 if instance_name in exportlist[node].payload:
13568 result = self.rpc.call_export_remove(node, instance_name)
13569 msg = result.fail_msg
13571 logging.error("Could not remove export for instance %s"
13572 " on node %s: %s", instance_name, node, msg)
13574 if fqdn_warn and not found:
13575 feedback_fn("Export not found. If trying to remove an export belonging"
13576 " to a deleted instance please use its Fully Qualified"
13580 class LUGroupAdd(LogicalUnit):
13581 """Logical unit for creating node groups.
13584 HPATH = "group-add"
13585 HTYPE = constants.HTYPE_GROUP
13588 def ExpandNames(self):
13589 # We need the new group's UUID here so that we can create and acquire the
13590 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13591 # that it should not check whether the UUID exists in the configuration.
13592 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13593 self.needed_locks = {}
13594 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13596 def CheckPrereq(self):
13597 """Check prerequisites.
13599 This checks that the given group name is not an existing node group
13604 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13605 except errors.OpPrereqError:
13608 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13609 " node group (UUID: %s)" %
13610 (self.op.group_name, existing_uuid),
13611 errors.ECODE_EXISTS)
13613 if self.op.ndparams:
13614 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13616 if self.op.hv_state:
13617 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13619 self.new_hv_state = None
13621 if self.op.disk_state:
13622 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13624 self.new_disk_state = None
13626 if self.op.diskparams:
13627 for templ in constants.DISK_TEMPLATES:
13628 if templ in self.op.diskparams:
13629 utils.ForceDictType(self.op.diskparams[templ],
13630 constants.DISK_DT_TYPES)
13631 self.new_diskparams = self.op.diskparams
13633 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13634 except errors.OpPrereqError, err:
13635 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13636 errors.ECODE_INVAL)
13638 self.new_diskparams = {}
13640 if self.op.ipolicy:
13641 cluster = self.cfg.GetClusterInfo()
13642 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13644 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13645 except errors.ConfigurationError, err:
13646 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13647 errors.ECODE_INVAL)
13649 def BuildHooksEnv(self):
13650 """Build hooks env.
13654 "GROUP_NAME": self.op.group_name,
13657 def BuildHooksNodes(self):
13658 """Build hooks nodes.
13661 mn = self.cfg.GetMasterNode()
13662 return ([mn], [mn])
13664 def Exec(self, feedback_fn):
13665 """Add the node group to the cluster.
13668 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13669 uuid=self.group_uuid,
13670 alloc_policy=self.op.alloc_policy,
13671 ndparams=self.op.ndparams,
13672 diskparams=self.new_diskparams,
13673 ipolicy=self.op.ipolicy,
13674 hv_state_static=self.new_hv_state,
13675 disk_state_static=self.new_disk_state)
13677 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13678 del self.remove_locks[locking.LEVEL_NODEGROUP]
13681 class LUGroupAssignNodes(NoHooksLU):
13682 """Logical unit for assigning nodes to groups.
13687 def ExpandNames(self):
13688 # These raise errors.OpPrereqError on their own:
13689 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13690 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13692 # We want to lock all the affected nodes and groups. We have readily
13693 # available the list of nodes, and the *destination* group. To gather the
13694 # list of "source" groups, we need to fetch node information later on.
13695 self.needed_locks = {
13696 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13697 locking.LEVEL_NODE: self.op.nodes,
13700 def DeclareLocks(self, level):
13701 if level == locking.LEVEL_NODEGROUP:
13702 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13704 # Try to get all affected nodes' groups without having the group or node
13705 # lock yet. Needs verification later in the code flow.
13706 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13708 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13710 def CheckPrereq(self):
13711 """Check prerequisites.
13714 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13715 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13716 frozenset(self.op.nodes))
13718 expected_locks = (set([self.group_uuid]) |
13719 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13720 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13721 if actual_locks != expected_locks:
13722 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13723 " current groups are '%s', used to be '%s'" %
13724 (utils.CommaJoin(expected_locks),
13725 utils.CommaJoin(actual_locks)))
13727 self.node_data = self.cfg.GetAllNodesInfo()
13728 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13729 instance_data = self.cfg.GetAllInstancesInfo()
13731 if self.group is None:
13732 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13733 (self.op.group_name, self.group_uuid))
13735 (new_splits, previous_splits) = \
13736 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13737 for node in self.op.nodes],
13738 self.node_data, instance_data)
13741 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13743 if not self.op.force:
13744 raise errors.OpExecError("The following instances get split by this"
13745 " change and --force was not given: %s" %
13748 self.LogWarning("This operation will split the following instances: %s",
13751 if previous_splits:
13752 self.LogWarning("In addition, these already-split instances continue"
13753 " to be split across groups: %s",
13754 utils.CommaJoin(utils.NiceSort(previous_splits)))
13756 def Exec(self, feedback_fn):
13757 """Assign nodes to a new group.
13760 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13762 self.cfg.AssignGroupNodes(mods)
13765 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13766 """Check for split instances after a node assignment.
13768 This method considers a series of node assignments as an atomic operation,
13769 and returns information about split instances after applying the set of
13772 In particular, it returns information about newly split instances, and
13773 instances that were already split, and remain so after the change.
13775 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13778 @type changes: list of (node_name, new_group_uuid) pairs.
13779 @param changes: list of node assignments to consider.
13780 @param node_data: a dict with data for all nodes
13781 @param instance_data: a dict with all instances to consider
13782 @rtype: a two-tuple
13783 @return: a list of instances that were previously okay and result split as a
13784 consequence of this change, and a list of instances that were previously
13785 split and this change does not fix.
13788 changed_nodes = dict((node, group) for node, group in changes
13789 if node_data[node].group != group)
13791 all_split_instances = set()
13792 previously_split_instances = set()
13794 def InstanceNodes(instance):
13795 return [instance.primary_node] + list(instance.secondary_nodes)
13797 for inst in instance_data.values():
13798 if inst.disk_template not in constants.DTS_INT_MIRROR:
13801 instance_nodes = InstanceNodes(inst)
13803 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13804 previously_split_instances.add(inst.name)
13806 if len(set(changed_nodes.get(node, node_data[node].group)
13807 for node in instance_nodes)) > 1:
13808 all_split_instances.add(inst.name)
13810 return (list(all_split_instances - previously_split_instances),
13811 list(previously_split_instances & all_split_instances))
13814 class _GroupQuery(_QueryBase):
13815 FIELDS = query.GROUP_FIELDS
13817 def ExpandNames(self, lu):
13818 lu.needed_locks = {}
13820 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13821 self._cluster = lu.cfg.GetClusterInfo()
13822 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13825 self.wanted = [name_to_uuid[name]
13826 for name in utils.NiceSort(name_to_uuid.keys())]
13828 # Accept names to be either names or UUIDs.
13831 all_uuid = frozenset(self._all_groups.keys())
13833 for name in self.names:
13834 if name in all_uuid:
13835 self.wanted.append(name)
13836 elif name in name_to_uuid:
13837 self.wanted.append(name_to_uuid[name])
13839 missing.append(name)
13842 raise errors.OpPrereqError("Some groups do not exist: %s" %
13843 utils.CommaJoin(missing),
13844 errors.ECODE_NOENT)
13846 def DeclareLocks(self, lu, level):
13849 def _GetQueryData(self, lu):
13850 """Computes the list of node groups and their attributes.
13853 do_nodes = query.GQ_NODE in self.requested_data
13854 do_instances = query.GQ_INST in self.requested_data
13856 group_to_nodes = None
13857 group_to_instances = None
13859 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13860 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13861 # latter GetAllInstancesInfo() is not enough, for we have to go through
13862 # instance->node. Hence, we will need to process nodes even if we only need
13863 # instance information.
13864 if do_nodes or do_instances:
13865 all_nodes = lu.cfg.GetAllNodesInfo()
13866 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13869 for node in all_nodes.values():
13870 if node.group in group_to_nodes:
13871 group_to_nodes[node.group].append(node.name)
13872 node_to_group[node.name] = node.group
13875 all_instances = lu.cfg.GetAllInstancesInfo()
13876 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13878 for instance in all_instances.values():
13879 node = instance.primary_node
13880 if node in node_to_group:
13881 group_to_instances[node_to_group[node]].append(instance.name)
13884 # Do not pass on node information if it was not requested.
13885 group_to_nodes = None
13887 return query.GroupQueryData(self._cluster,
13888 [self._all_groups[uuid]
13889 for uuid in self.wanted],
13890 group_to_nodes, group_to_instances,
13891 query.GQ_DISKPARAMS in self.requested_data)
13894 class LUGroupQuery(NoHooksLU):
13895 """Logical unit for querying node groups.
13900 def CheckArguments(self):
13901 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13902 self.op.output_fields, False)
13904 def ExpandNames(self):
13905 self.gq.ExpandNames(self)
13907 def DeclareLocks(self, level):
13908 self.gq.DeclareLocks(self, level)
13910 def Exec(self, feedback_fn):
13911 return self.gq.OldStyleQuery(self)
13914 class LUGroupSetParams(LogicalUnit):
13915 """Modifies the parameters of a node group.
13918 HPATH = "group-modify"
13919 HTYPE = constants.HTYPE_GROUP
13922 def CheckArguments(self):
13925 self.op.diskparams,
13926 self.op.alloc_policy,
13928 self.op.disk_state,
13932 if all_changes.count(None) == len(all_changes):
13933 raise errors.OpPrereqError("Please pass at least one modification",
13934 errors.ECODE_INVAL)
13936 def ExpandNames(self):
13937 # This raises errors.OpPrereqError on its own:
13938 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13940 self.needed_locks = {
13941 locking.LEVEL_INSTANCE: [],
13942 locking.LEVEL_NODEGROUP: [self.group_uuid],
13945 self.share_locks[locking.LEVEL_INSTANCE] = 1
13947 def DeclareLocks(self, level):
13948 if level == locking.LEVEL_INSTANCE:
13949 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13951 # Lock instances optimistically, needs verification once group lock has
13953 self.needed_locks[locking.LEVEL_INSTANCE] = \
13954 self.cfg.GetNodeGroupInstances(self.group_uuid)
13957 def _UpdateAndVerifyDiskParams(old, new):
13958 """Updates and verifies disk parameters.
13961 new_params = _GetUpdatedParams(old, new)
13962 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13965 def CheckPrereq(self):
13966 """Check prerequisites.
13969 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13971 # Check if locked instances are still correct
13972 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13974 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13975 cluster = self.cfg.GetClusterInfo()
13977 if self.group is None:
13978 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13979 (self.op.group_name, self.group_uuid))
13981 if self.op.ndparams:
13982 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13983 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
13984 self.new_ndparams = new_ndparams
13986 if self.op.diskparams:
13987 diskparams = self.group.diskparams
13988 uavdp = self._UpdateAndVerifyDiskParams
13989 # For each disktemplate subdict update and verify the values
13990 new_diskparams = dict((dt,
13991 uavdp(diskparams.get(dt, {}),
13992 self.op.diskparams[dt]))
13993 for dt in constants.DISK_TEMPLATES
13994 if dt in self.op.diskparams)
13995 # As we've all subdicts of diskparams ready, lets merge the actual
13996 # dict with all updated subdicts
13997 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13999 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14000 except errors.OpPrereqError, err:
14001 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14002 errors.ECODE_INVAL)
14004 if self.op.hv_state:
14005 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14006 self.group.hv_state_static)
14008 if self.op.disk_state:
14009 self.new_disk_state = \
14010 _MergeAndVerifyDiskState(self.op.disk_state,
14011 self.group.disk_state_static)
14013 if self.op.ipolicy:
14014 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14018 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14019 inst_filter = lambda inst: inst.name in owned_instances
14020 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14022 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14024 new_ipolicy, instances)
14027 self.LogWarning("After the ipolicy change the following instances"
14028 " violate them: %s",
14029 utils.CommaJoin(violations))
14031 def BuildHooksEnv(self):
14032 """Build hooks env.
14036 "GROUP_NAME": self.op.group_name,
14037 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14040 def BuildHooksNodes(self):
14041 """Build hooks nodes.
14044 mn = self.cfg.GetMasterNode()
14045 return ([mn], [mn])
14047 def Exec(self, feedback_fn):
14048 """Modifies the node group.
14053 if self.op.ndparams:
14054 self.group.ndparams = self.new_ndparams
14055 result.append(("ndparams", str(self.group.ndparams)))
14057 if self.op.diskparams:
14058 self.group.diskparams = self.new_diskparams
14059 result.append(("diskparams", str(self.group.diskparams)))
14061 if self.op.alloc_policy:
14062 self.group.alloc_policy = self.op.alloc_policy
14064 if self.op.hv_state:
14065 self.group.hv_state_static = self.new_hv_state
14067 if self.op.disk_state:
14068 self.group.disk_state_static = self.new_disk_state
14070 if self.op.ipolicy:
14071 self.group.ipolicy = self.new_ipolicy
14073 self.cfg.Update(self.group, feedback_fn)
14077 class LUGroupRemove(LogicalUnit):
14078 HPATH = "group-remove"
14079 HTYPE = constants.HTYPE_GROUP
14082 def ExpandNames(self):
14083 # This will raises errors.OpPrereqError on its own:
14084 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14085 self.needed_locks = {
14086 locking.LEVEL_NODEGROUP: [self.group_uuid],
14089 def CheckPrereq(self):
14090 """Check prerequisites.
14092 This checks that the given group name exists as a node group, that is
14093 empty (i.e., contains no nodes), and that is not the last group of the
14097 # Verify that the group is empty.
14098 group_nodes = [node.name
14099 for node in self.cfg.GetAllNodesInfo().values()
14100 if node.group == self.group_uuid]
14103 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14105 (self.op.group_name,
14106 utils.CommaJoin(utils.NiceSort(group_nodes))),
14107 errors.ECODE_STATE)
14109 # Verify the cluster would not be left group-less.
14110 if len(self.cfg.GetNodeGroupList()) == 1:
14111 raise errors.OpPrereqError("Group '%s' is the only group,"
14112 " cannot be removed" %
14113 self.op.group_name,
14114 errors.ECODE_STATE)
14116 def BuildHooksEnv(self):
14117 """Build hooks env.
14121 "GROUP_NAME": self.op.group_name,
14124 def BuildHooksNodes(self):
14125 """Build hooks nodes.
14128 mn = self.cfg.GetMasterNode()
14129 return ([mn], [mn])
14131 def Exec(self, feedback_fn):
14132 """Remove the node group.
14136 self.cfg.RemoveNodeGroup(self.group_uuid)
14137 except errors.ConfigurationError:
14138 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14139 (self.op.group_name, self.group_uuid))
14141 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14144 class LUGroupRename(LogicalUnit):
14145 HPATH = "group-rename"
14146 HTYPE = constants.HTYPE_GROUP
14149 def ExpandNames(self):
14150 # This raises errors.OpPrereqError on its own:
14151 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14153 self.needed_locks = {
14154 locking.LEVEL_NODEGROUP: [self.group_uuid],
14157 def CheckPrereq(self):
14158 """Check prerequisites.
14160 Ensures requested new name is not yet used.
14164 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14165 except errors.OpPrereqError:
14168 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14169 " node group (UUID: %s)" %
14170 (self.op.new_name, new_name_uuid),
14171 errors.ECODE_EXISTS)
14173 def BuildHooksEnv(self):
14174 """Build hooks env.
14178 "OLD_NAME": self.op.group_name,
14179 "NEW_NAME": self.op.new_name,
14182 def BuildHooksNodes(self):
14183 """Build hooks nodes.
14186 mn = self.cfg.GetMasterNode()
14188 all_nodes = self.cfg.GetAllNodesInfo()
14189 all_nodes.pop(mn, None)
14192 run_nodes.extend(node.name for node in all_nodes.values()
14193 if node.group == self.group_uuid)
14195 return (run_nodes, run_nodes)
14197 def Exec(self, feedback_fn):
14198 """Rename the node group.
14201 group = self.cfg.GetNodeGroup(self.group_uuid)
14204 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14205 (self.op.group_name, self.group_uuid))
14207 group.name = self.op.new_name
14208 self.cfg.Update(group, feedback_fn)
14210 return self.op.new_name
14213 class LUGroupEvacuate(LogicalUnit):
14214 HPATH = "group-evacuate"
14215 HTYPE = constants.HTYPE_GROUP
14218 def ExpandNames(self):
14219 # This raises errors.OpPrereqError on its own:
14220 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14222 if self.op.target_groups:
14223 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14224 self.op.target_groups)
14226 self.req_target_uuids = []
14228 if self.group_uuid in self.req_target_uuids:
14229 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14230 " as a target group (targets are %s)" %
14232 utils.CommaJoin(self.req_target_uuids)),
14233 errors.ECODE_INVAL)
14235 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14237 self.share_locks = _ShareAll()
14238 self.needed_locks = {
14239 locking.LEVEL_INSTANCE: [],
14240 locking.LEVEL_NODEGROUP: [],
14241 locking.LEVEL_NODE: [],
14244 def DeclareLocks(self, level):
14245 if level == locking.LEVEL_INSTANCE:
14246 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14248 # Lock instances optimistically, needs verification once node and group
14249 # locks have been acquired
14250 self.needed_locks[locking.LEVEL_INSTANCE] = \
14251 self.cfg.GetNodeGroupInstances(self.group_uuid)
14253 elif level == locking.LEVEL_NODEGROUP:
14254 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14256 if self.req_target_uuids:
14257 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14259 # Lock all groups used by instances optimistically; this requires going
14260 # via the node before it's locked, requiring verification later on
14261 lock_groups.update(group_uuid
14262 for instance_name in
14263 self.owned_locks(locking.LEVEL_INSTANCE)
14265 self.cfg.GetInstanceNodeGroups(instance_name))
14267 # No target groups, need to lock all of them
14268 lock_groups = locking.ALL_SET
14270 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14272 elif level == locking.LEVEL_NODE:
14273 # This will only lock the nodes in the group to be evacuated which
14274 # contain actual instances
14275 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14276 self._LockInstancesNodes()
14278 # Lock all nodes in group to be evacuated and target groups
14279 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14280 assert self.group_uuid in owned_groups
14281 member_nodes = [node_name
14282 for group in owned_groups
14283 for node_name in self.cfg.GetNodeGroup(group).members]
14284 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14286 def CheckPrereq(self):
14287 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14288 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14289 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14291 assert owned_groups.issuperset(self.req_target_uuids)
14292 assert self.group_uuid in owned_groups
14294 # Check if locked instances are still correct
14295 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14297 # Get instance information
14298 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14300 # Check if node groups for locked instances are still correct
14301 _CheckInstancesNodeGroups(self.cfg, self.instances,
14302 owned_groups, owned_nodes, self.group_uuid)
14304 if self.req_target_uuids:
14305 # User requested specific target groups
14306 self.target_uuids = self.req_target_uuids
14308 # All groups except the one to be evacuated are potential targets
14309 self.target_uuids = [group_uuid for group_uuid in owned_groups
14310 if group_uuid != self.group_uuid]
14312 if not self.target_uuids:
14313 raise errors.OpPrereqError("There are no possible target groups",
14314 errors.ECODE_INVAL)
14316 def BuildHooksEnv(self):
14317 """Build hooks env.
14321 "GROUP_NAME": self.op.group_name,
14322 "TARGET_GROUPS": " ".join(self.target_uuids),
14325 def BuildHooksNodes(self):
14326 """Build hooks nodes.
14329 mn = self.cfg.GetMasterNode()
14331 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14333 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14335 return (run_nodes, run_nodes)
14337 def Exec(self, feedback_fn):
14338 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14340 assert self.group_uuid not in self.target_uuids
14342 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14343 instances=instances, target_groups=self.target_uuids)
14345 ial.Run(self.op.iallocator)
14347 if not ial.success:
14348 raise errors.OpPrereqError("Can't compute group evacuation using"
14349 " iallocator '%s': %s" %
14350 (self.op.iallocator, ial.info),
14351 errors.ECODE_NORES)
14353 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14355 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14356 len(jobs), self.op.group_name)
14358 return ResultWithJobs(jobs)
14361 class TagsLU(NoHooksLU): # pylint: disable=W0223
14362 """Generic tags LU.
14364 This is an abstract class which is the parent of all the other tags LUs.
14367 def ExpandNames(self):
14368 self.group_uuid = None
14369 self.needed_locks = {}
14371 if self.op.kind == constants.TAG_NODE:
14372 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14373 lock_level = locking.LEVEL_NODE
14374 lock_name = self.op.name
14375 elif self.op.kind == constants.TAG_INSTANCE:
14376 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14377 lock_level = locking.LEVEL_INSTANCE
14378 lock_name = self.op.name
14379 elif self.op.kind == constants.TAG_NODEGROUP:
14380 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14381 lock_level = locking.LEVEL_NODEGROUP
14382 lock_name = self.group_uuid
14387 if lock_level and getattr(self.op, "use_locking", True):
14388 self.needed_locks[lock_level] = lock_name
14390 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14391 # not possible to acquire the BGL based on opcode parameters)
14393 def CheckPrereq(self):
14394 """Check prerequisites.
14397 if self.op.kind == constants.TAG_CLUSTER:
14398 self.target = self.cfg.GetClusterInfo()
14399 elif self.op.kind == constants.TAG_NODE:
14400 self.target = self.cfg.GetNodeInfo(self.op.name)
14401 elif self.op.kind == constants.TAG_INSTANCE:
14402 self.target = self.cfg.GetInstanceInfo(self.op.name)
14403 elif self.op.kind == constants.TAG_NODEGROUP:
14404 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14406 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14407 str(self.op.kind), errors.ECODE_INVAL)
14410 class LUTagsGet(TagsLU):
14411 """Returns the tags of a given object.
14416 def ExpandNames(self):
14417 TagsLU.ExpandNames(self)
14419 # Share locks as this is only a read operation
14420 self.share_locks = _ShareAll()
14422 def Exec(self, feedback_fn):
14423 """Returns the tag list.
14426 return list(self.target.GetTags())
14429 class LUTagsSearch(NoHooksLU):
14430 """Searches the tags for a given pattern.
14435 def ExpandNames(self):
14436 self.needed_locks = {}
14438 def CheckPrereq(self):
14439 """Check prerequisites.
14441 This checks the pattern passed for validity by compiling it.
14445 self.re = re.compile(self.op.pattern)
14446 except re.error, err:
14447 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14448 (self.op.pattern, err), errors.ECODE_INVAL)
14450 def Exec(self, feedback_fn):
14451 """Returns the tag list.
14455 tgts = [("/cluster", cfg.GetClusterInfo())]
14456 ilist = cfg.GetAllInstancesInfo().values()
14457 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14458 nlist = cfg.GetAllNodesInfo().values()
14459 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14460 tgts.extend(("/nodegroup/%s" % n.name, n)
14461 for n in cfg.GetAllNodeGroupsInfo().values())
14463 for path, target in tgts:
14464 for tag in target.GetTags():
14465 if self.re.search(tag):
14466 results.append((path, tag))
14470 class LUTagsSet(TagsLU):
14471 """Sets a tag on a given object.
14476 def CheckPrereq(self):
14477 """Check prerequisites.
14479 This checks the type and length of the tag name and value.
14482 TagsLU.CheckPrereq(self)
14483 for tag in self.op.tags:
14484 objects.TaggableObject.ValidateTag(tag)
14486 def Exec(self, feedback_fn):
14491 for tag in self.op.tags:
14492 self.target.AddTag(tag)
14493 except errors.TagError, err:
14494 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14495 self.cfg.Update(self.target, feedback_fn)
14498 class LUTagsDel(TagsLU):
14499 """Delete a list of tags from a given object.
14504 def CheckPrereq(self):
14505 """Check prerequisites.
14507 This checks that we have the given tag.
14510 TagsLU.CheckPrereq(self)
14511 for tag in self.op.tags:
14512 objects.TaggableObject.ValidateTag(tag)
14513 del_tags = frozenset(self.op.tags)
14514 cur_tags = self.target.GetTags()
14516 diff_tags = del_tags - cur_tags
14518 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14519 raise errors.OpPrereqError("Tag(s) %s not found" %
14520 (utils.CommaJoin(diff_names), ),
14521 errors.ECODE_NOENT)
14523 def Exec(self, feedback_fn):
14524 """Remove the tag from the object.
14527 for tag in self.op.tags:
14528 self.target.RemoveTag(tag)
14529 self.cfg.Update(self.target, feedback_fn)
14532 class LUTestDelay(NoHooksLU):
14533 """Sleep for a specified amount of time.
14535 This LU sleeps on the master and/or nodes for a specified amount of
14541 def ExpandNames(self):
14542 """Expand names and set required locks.
14544 This expands the node list, if any.
14547 self.needed_locks = {}
14548 if self.op.on_nodes:
14549 # _GetWantedNodes can be used here, but is not always appropriate to use
14550 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14551 # more information.
14552 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14553 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14555 def _TestDelay(self):
14556 """Do the actual sleep.
14559 if self.op.on_master:
14560 if not utils.TestDelay(self.op.duration):
14561 raise errors.OpExecError("Error during master delay test")
14562 if self.op.on_nodes:
14563 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14564 for node, node_result in result.items():
14565 node_result.Raise("Failure during rpc call to node %s" % node)
14567 def Exec(self, feedback_fn):
14568 """Execute the test delay opcode, with the wanted repetitions.
14571 if self.op.repeat == 0:
14574 top_value = self.op.repeat - 1
14575 for i in range(self.op.repeat):
14576 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14580 class LUTestJqueue(NoHooksLU):
14581 """Utility LU to test some aspects of the job queue.
14586 # Must be lower than default timeout for WaitForJobChange to see whether it
14587 # notices changed jobs
14588 _CLIENT_CONNECT_TIMEOUT = 20.0
14589 _CLIENT_CONFIRM_TIMEOUT = 60.0
14592 def _NotifyUsingSocket(cls, cb, errcls):
14593 """Opens a Unix socket and waits for another program to connect.
14596 @param cb: Callback to send socket name to client
14597 @type errcls: class
14598 @param errcls: Exception class to use for errors
14601 # Using a temporary directory as there's no easy way to create temporary
14602 # sockets without writing a custom loop around tempfile.mktemp and
14604 tmpdir = tempfile.mkdtemp()
14606 tmpsock = utils.PathJoin(tmpdir, "sock")
14608 logging.debug("Creating temporary socket at %s", tmpsock)
14609 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14614 # Send details to client
14617 # Wait for client to connect before continuing
14618 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14620 (conn, _) = sock.accept()
14621 except socket.error, err:
14622 raise errcls("Client didn't connect in time (%s)" % err)
14626 # Remove as soon as client is connected
14627 shutil.rmtree(tmpdir)
14629 # Wait for client to close
14632 # pylint: disable=E1101
14633 # Instance of '_socketobject' has no ... member
14634 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14636 except socket.error, err:
14637 raise errcls("Client failed to confirm notification (%s)" % err)
14641 def _SendNotification(self, test, arg, sockname):
14642 """Sends a notification to the client.
14645 @param test: Test name
14646 @param arg: Test argument (depends on test)
14647 @type sockname: string
14648 @param sockname: Socket path
14651 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14653 def _Notify(self, prereq, test, arg):
14654 """Notifies the client of a test.
14657 @param prereq: Whether this is a prereq-phase test
14659 @param test: Test name
14660 @param arg: Test argument (depends on test)
14664 errcls = errors.OpPrereqError
14666 errcls = errors.OpExecError
14668 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14672 def CheckArguments(self):
14673 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14674 self.expandnames_calls = 0
14676 def ExpandNames(self):
14677 checkargs_calls = getattr(self, "checkargs_calls", 0)
14678 if checkargs_calls < 1:
14679 raise errors.ProgrammerError("CheckArguments was not called")
14681 self.expandnames_calls += 1
14683 if self.op.notify_waitlock:
14684 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14686 self.LogInfo("Expanding names")
14688 # Get lock on master node (just to get a lock, not for a particular reason)
14689 self.needed_locks = {
14690 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14693 def Exec(self, feedback_fn):
14694 if self.expandnames_calls < 1:
14695 raise errors.ProgrammerError("ExpandNames was not called")
14697 if self.op.notify_exec:
14698 self._Notify(False, constants.JQT_EXEC, None)
14700 self.LogInfo("Executing")
14702 if self.op.log_messages:
14703 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14704 for idx, msg in enumerate(self.op.log_messages):
14705 self.LogInfo("Sending log message %s", idx + 1)
14706 feedback_fn(constants.JQT_MSGPREFIX + msg)
14707 # Report how many test messages have been sent
14708 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14711 raise errors.OpExecError("Opcode failure was requested")
14716 class IAllocator(object):
14717 """IAllocator framework.
14719 An IAllocator instance has three sets of attributes:
14720 - cfg that is needed to query the cluster
14721 - input data (all members of the _KEYS class attribute are required)
14722 - four buffer attributes (in|out_data|text), that represent the
14723 input (to the external script) in text and data structure format,
14724 and the output from it, again in two formats
14725 - the result variables from the script (success, info, nodes) for
14729 # pylint: disable=R0902
14730 # lots of instance attributes
14732 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14734 self.rpc = rpc_runner
14735 # init buffer variables
14736 self.in_text = self.out_text = self.in_data = self.out_data = None
14737 # init all input fields so that pylint is happy
14739 self.memory = self.disks = self.disk_template = self.spindle_use = None
14740 self.os = self.tags = self.nics = self.vcpus = None
14741 self.hypervisor = None
14742 self.relocate_from = None
14744 self.instances = None
14745 self.evac_mode = None
14746 self.target_groups = []
14748 self.required_nodes = None
14749 # init result fields
14750 self.success = self.info = self.result = None
14753 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14755 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14756 " IAllocator" % self.mode)
14758 keyset = [n for (n, _) in keydata]
14761 if key not in keyset:
14762 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14763 " IAllocator" % key)
14764 setattr(self, key, kwargs[key])
14767 if key not in kwargs:
14768 raise errors.ProgrammerError("Missing input parameter '%s' to"
14769 " IAllocator" % key)
14770 self._BuildInputData(compat.partial(fn, self), keydata)
14772 def _ComputeClusterData(self):
14773 """Compute the generic allocator input data.
14775 This is the data that is independent of the actual operation.
14779 cluster_info = cfg.GetClusterInfo()
14782 "version": constants.IALLOCATOR_VERSION,
14783 "cluster_name": cfg.GetClusterName(),
14784 "cluster_tags": list(cluster_info.GetTags()),
14785 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14786 "ipolicy": cluster_info.ipolicy,
14788 ninfo = cfg.GetAllNodesInfo()
14789 iinfo = cfg.GetAllInstancesInfo().values()
14790 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14793 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14795 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14796 hypervisor_name = self.hypervisor
14797 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14798 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14800 hypervisor_name = cluster_info.primary_hypervisor
14802 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14805 self.rpc.call_all_instances_info(node_list,
14806 cluster_info.enabled_hypervisors)
14808 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14810 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14811 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14812 i_list, config_ndata)
14813 assert len(data["nodes"]) == len(ninfo), \
14814 "Incomplete node data computed"
14816 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14818 self.in_data = data
14821 def _ComputeNodeGroupData(cfg):
14822 """Compute node groups data.
14825 cluster = cfg.GetClusterInfo()
14826 ng = dict((guuid, {
14827 "name": gdata.name,
14828 "alloc_policy": gdata.alloc_policy,
14829 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14831 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14836 def _ComputeBasicNodeData(cfg, node_cfg):
14837 """Compute global node data.
14840 @returns: a dict of name: (node dict, node config)
14843 # fill in static (config-based) values
14844 node_results = dict((ninfo.name, {
14845 "tags": list(ninfo.GetTags()),
14846 "primary_ip": ninfo.primary_ip,
14847 "secondary_ip": ninfo.secondary_ip,
14848 "offline": ninfo.offline,
14849 "drained": ninfo.drained,
14850 "master_candidate": ninfo.master_candidate,
14851 "group": ninfo.group,
14852 "master_capable": ninfo.master_capable,
14853 "vm_capable": ninfo.vm_capable,
14854 "ndparams": cfg.GetNdParams(ninfo),
14856 for ninfo in node_cfg.values())
14858 return node_results
14861 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14863 """Compute global node data.
14865 @param node_results: the basic node structures as filled from the config
14868 #TODO(dynmem): compute the right data on MAX and MIN memory
14869 # make a copy of the current dict
14870 node_results = dict(node_results)
14871 for nname, nresult in node_data.items():
14872 assert nname in node_results, "Missing basic data for node %s" % nname
14873 ninfo = node_cfg[nname]
14875 if not (ninfo.offline or ninfo.drained):
14876 nresult.Raise("Can't get data for node %s" % nname)
14877 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14879 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14881 for attr in ["memory_total", "memory_free", "memory_dom0",
14882 "vg_size", "vg_free", "cpu_total"]:
14883 if attr not in remote_info:
14884 raise errors.OpExecError("Node '%s' didn't return attribute"
14885 " '%s'" % (nname, attr))
14886 if not isinstance(remote_info[attr], int):
14887 raise errors.OpExecError("Node '%s' returned invalid value"
14889 (nname, attr, remote_info[attr]))
14890 # compute memory used by primary instances
14891 i_p_mem = i_p_up_mem = 0
14892 for iinfo, beinfo in i_list:
14893 if iinfo.primary_node == nname:
14894 i_p_mem += beinfo[constants.BE_MAXMEM]
14895 if iinfo.name not in node_iinfo[nname].payload:
14898 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14899 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14900 remote_info["memory_free"] -= max(0, i_mem_diff)
14902 if iinfo.admin_state == constants.ADMINST_UP:
14903 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14905 # compute memory used by instances
14907 "total_memory": remote_info["memory_total"],
14908 "reserved_memory": remote_info["memory_dom0"],
14909 "free_memory": remote_info["memory_free"],
14910 "total_disk": remote_info["vg_size"],
14911 "free_disk": remote_info["vg_free"],
14912 "total_cpus": remote_info["cpu_total"],
14913 "i_pri_memory": i_p_mem,
14914 "i_pri_up_memory": i_p_up_mem,
14916 pnr_dyn.update(node_results[nname])
14917 node_results[nname] = pnr_dyn
14919 return node_results
14922 def _ComputeInstanceData(cluster_info, i_list):
14923 """Compute global instance data.
14927 for iinfo, beinfo in i_list:
14929 for nic in iinfo.nics:
14930 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14934 "mode": filled_params[constants.NIC_MODE],
14935 "link": filled_params[constants.NIC_LINK],
14937 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14938 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14939 nic_data.append(nic_dict)
14941 "tags": list(iinfo.GetTags()),
14942 "admin_state": iinfo.admin_state,
14943 "vcpus": beinfo[constants.BE_VCPUS],
14944 "memory": beinfo[constants.BE_MAXMEM],
14945 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14947 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14949 "disks": [{constants.IDISK_SIZE: dsk.size,
14950 constants.IDISK_MODE: dsk.mode}
14951 for dsk in iinfo.disks],
14952 "disk_template": iinfo.disk_template,
14953 "hypervisor": iinfo.hypervisor,
14955 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14957 instance_data[iinfo.name] = pir
14959 return instance_data
14961 def _AddNewInstance(self):
14962 """Add new instance data to allocator structure.
14964 This in combination with _AllocatorGetClusterData will create the
14965 correct structure needed as input for the allocator.
14967 The checks for the completeness of the opcode must have already been
14971 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14973 if self.disk_template in constants.DTS_INT_MIRROR:
14974 self.required_nodes = 2
14976 self.required_nodes = 1
14980 "disk_template": self.disk_template,
14983 "vcpus": self.vcpus,
14984 "memory": self.memory,
14985 "spindle_use": self.spindle_use,
14986 "disks": self.disks,
14987 "disk_space_total": disk_space,
14989 "required_nodes": self.required_nodes,
14990 "hypervisor": self.hypervisor,
14995 def _AddRelocateInstance(self):
14996 """Add relocate instance data to allocator structure.
14998 This in combination with _IAllocatorGetClusterData will create the
14999 correct structure needed as input for the allocator.
15001 The checks for the completeness of the opcode must have already been
15005 instance = self.cfg.GetInstanceInfo(self.name)
15006 if instance is None:
15007 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15008 " IAllocator" % self.name)
15010 if instance.disk_template not in constants.DTS_MIRRORED:
15011 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15012 errors.ECODE_INVAL)
15014 if instance.disk_template in constants.DTS_INT_MIRROR and \
15015 len(instance.secondary_nodes) != 1:
15016 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15017 errors.ECODE_STATE)
15019 self.required_nodes = 1
15020 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15021 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15025 "disk_space_total": disk_space,
15026 "required_nodes": self.required_nodes,
15027 "relocate_from": self.relocate_from,
15031 def _AddNodeEvacuate(self):
15032 """Get data for node-evacuate requests.
15036 "instances": self.instances,
15037 "evac_mode": self.evac_mode,
15040 def _AddChangeGroup(self):
15041 """Get data for node-evacuate requests.
15045 "instances": self.instances,
15046 "target_groups": self.target_groups,
15049 def _BuildInputData(self, fn, keydata):
15050 """Build input data structures.
15053 self._ComputeClusterData()
15056 request["type"] = self.mode
15057 for keyname, keytype in keydata:
15058 if keyname not in request:
15059 raise errors.ProgrammerError("Request parameter %s is missing" %
15061 val = request[keyname]
15062 if not keytype(val):
15063 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15064 " validation, value %s, expected"
15065 " type %s" % (keyname, val, keytype))
15066 self.in_data["request"] = request
15068 self.in_text = serializer.Dump(self.in_data)
15070 _STRING_LIST = ht.TListOf(ht.TString)
15071 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15072 # pylint: disable=E1101
15073 # Class '...' has no 'OP_ID' member
15074 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15075 opcodes.OpInstanceMigrate.OP_ID,
15076 opcodes.OpInstanceReplaceDisks.OP_ID])
15080 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15081 ht.TItems([ht.TNonEmptyString,
15082 ht.TNonEmptyString,
15083 ht.TListOf(ht.TNonEmptyString),
15086 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15087 ht.TItems([ht.TNonEmptyString,
15090 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15091 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15094 constants.IALLOCATOR_MODE_ALLOC:
15097 ("name", ht.TString),
15098 ("memory", ht.TInt),
15099 ("spindle_use", ht.TInt),
15100 ("disks", ht.TListOf(ht.TDict)),
15101 ("disk_template", ht.TString),
15102 ("os", ht.TString),
15103 ("tags", _STRING_LIST),
15104 ("nics", ht.TListOf(ht.TDict)),
15105 ("vcpus", ht.TInt),
15106 ("hypervisor", ht.TString),
15108 constants.IALLOCATOR_MODE_RELOC:
15109 (_AddRelocateInstance,
15110 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15112 constants.IALLOCATOR_MODE_NODE_EVAC:
15113 (_AddNodeEvacuate, [
15114 ("instances", _STRING_LIST),
15115 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15117 constants.IALLOCATOR_MODE_CHG_GROUP:
15118 (_AddChangeGroup, [
15119 ("instances", _STRING_LIST),
15120 ("target_groups", _STRING_LIST),
15124 def Run(self, name, validate=True, call_fn=None):
15125 """Run an instance allocator and return the results.
15128 if call_fn is None:
15129 call_fn = self.rpc.call_iallocator_runner
15131 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15132 result.Raise("Failure while running the iallocator script")
15134 self.out_text = result.payload
15136 self._ValidateResult()
15138 def _ValidateResult(self):
15139 """Process the allocator results.
15141 This will process and if successful save the result in
15142 self.out_data and the other parameters.
15146 rdict = serializer.Load(self.out_text)
15147 except Exception, err:
15148 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15150 if not isinstance(rdict, dict):
15151 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15153 # TODO: remove backwards compatiblity in later versions
15154 if "nodes" in rdict and "result" not in rdict:
15155 rdict["result"] = rdict["nodes"]
15158 for key in "success", "info", "result":
15159 if key not in rdict:
15160 raise errors.OpExecError("Can't parse iallocator results:"
15161 " missing key '%s'" % key)
15162 setattr(self, key, rdict[key])
15164 if not self._result_check(self.result):
15165 raise errors.OpExecError("Iallocator returned invalid result,"
15166 " expected %s, got %s" %
15167 (self._result_check, self.result),
15168 errors.ECODE_INVAL)
15170 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15171 assert self.relocate_from is not None
15172 assert self.required_nodes == 1
15174 node2group = dict((name, ndata["group"])
15175 for (name, ndata) in self.in_data["nodes"].items())
15177 fn = compat.partial(self._NodesToGroups, node2group,
15178 self.in_data["nodegroups"])
15180 instance = self.cfg.GetInstanceInfo(self.name)
15181 request_groups = fn(self.relocate_from + [instance.primary_node])
15182 result_groups = fn(rdict["result"] + [instance.primary_node])
15184 if self.success and not set(result_groups).issubset(request_groups):
15185 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15186 " differ from original groups (%s)" %
15187 (utils.CommaJoin(result_groups),
15188 utils.CommaJoin(request_groups)))
15190 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15191 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15193 self.out_data = rdict
15196 def _NodesToGroups(node2group, groups, nodes):
15197 """Returns a list of unique group names for a list of nodes.
15199 @type node2group: dict
15200 @param node2group: Map from node name to group UUID
15202 @param groups: Group information
15204 @param nodes: Node names
15211 group_uuid = node2group[node]
15213 # Ignore unknown node
15217 group = groups[group_uuid]
15219 # Can't find group, let's use UUID
15220 group_name = group_uuid
15222 group_name = group["name"]
15224 result.add(group_name)
15226 return sorted(result)
15229 class LUTestAllocator(NoHooksLU):
15230 """Run allocator tests.
15232 This LU runs the allocator tests
15235 def CheckPrereq(self):
15236 """Check prerequisites.
15238 This checks the opcode parameters depending on the director and mode test.
15241 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15242 for attr in ["memory", "disks", "disk_template",
15243 "os", "tags", "nics", "vcpus"]:
15244 if not hasattr(self.op, attr):
15245 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15246 attr, errors.ECODE_INVAL)
15247 iname = self.cfg.ExpandInstanceName(self.op.name)
15248 if iname is not None:
15249 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15250 iname, errors.ECODE_EXISTS)
15251 if not isinstance(self.op.nics, list):
15252 raise errors.OpPrereqError("Invalid parameter 'nics'",
15253 errors.ECODE_INVAL)
15254 if not isinstance(self.op.disks, list):
15255 raise errors.OpPrereqError("Invalid parameter 'disks'",
15256 errors.ECODE_INVAL)
15257 for row in self.op.disks:
15258 if (not isinstance(row, dict) or
15259 constants.IDISK_SIZE not in row or
15260 not isinstance(row[constants.IDISK_SIZE], int) or
15261 constants.IDISK_MODE not in row or
15262 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15263 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15264 " parameter", errors.ECODE_INVAL)
15265 if self.op.hypervisor is None:
15266 self.op.hypervisor = self.cfg.GetHypervisorType()
15267 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15268 fname = _ExpandInstanceName(self.cfg, self.op.name)
15269 self.op.name = fname
15270 self.relocate_from = \
15271 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15272 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15273 constants.IALLOCATOR_MODE_NODE_EVAC):
15274 if not self.op.instances:
15275 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15276 self.op.instances = _GetWantedInstances(self, self.op.instances)
15278 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15279 self.op.mode, errors.ECODE_INVAL)
15281 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15282 if self.op.allocator is None:
15283 raise errors.OpPrereqError("Missing allocator name",
15284 errors.ECODE_INVAL)
15285 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15286 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15287 self.op.direction, errors.ECODE_INVAL)
15289 def Exec(self, feedback_fn):
15290 """Run the allocator test.
15293 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15294 ial = IAllocator(self.cfg, self.rpc,
15297 memory=self.op.memory,
15298 disks=self.op.disks,
15299 disk_template=self.op.disk_template,
15303 vcpus=self.op.vcpus,
15304 hypervisor=self.op.hypervisor,
15305 spindle_use=self.op.spindle_use,
15307 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15308 ial = IAllocator(self.cfg, self.rpc,
15311 relocate_from=list(self.relocate_from),
15313 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15314 ial = IAllocator(self.cfg, self.rpc,
15316 instances=self.op.instances,
15317 target_groups=self.op.target_groups)
15318 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15319 ial = IAllocator(self.cfg, self.rpc,
15321 instances=self.op.instances,
15322 evac_mode=self.op.evac_mode)
15324 raise errors.ProgrammerError("Uncatched mode %s in"
15325 " LUTestAllocator.Exec", self.op.mode)
15327 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15328 result = ial.in_text
15330 ial.Run(self.op.allocator, validate=False)
15331 result = ial.out_text
15335 #: Query type implementations
15337 constants.QR_CLUSTER: _ClusterQuery,
15338 constants.QR_INSTANCE: _InstanceQuery,
15339 constants.QR_NODE: _NodeQuery,
15340 constants.QR_GROUP: _GroupQuery,
15341 constants.QR_OS: _OsQuery,
15342 constants.QR_EXPORT: _ExportQuery,
15345 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15348 def _GetQueryImplementation(name):
15349 """Returns the implemtnation for a query type.
15351 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15355 return _QUERY_IMPL[name]
15357 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15358 errors.ECODE_INVAL)