4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _CopyLockList(names):
707 """Makes a copy of a list of lock names.
709 Handles L{locking.ALL_SET} correctly.
712 if names == locking.ALL_SET:
713 return locking.ALL_SET
718 def _GetWantedNodes(lu, nodes):
719 """Returns list of checked and expanded node names.
721 @type lu: L{LogicalUnit}
722 @param lu: the logical unit on whose behalf we execute
724 @param nodes: list of node names or None for all nodes
726 @return: the list of nodes, sorted
727 @raise errors.ProgrammerError: if the nodes parameter is wrong type
731 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
733 return utils.NiceSort(lu.cfg.GetNodeList())
736 def _GetWantedInstances(lu, instances):
737 """Returns list of checked and expanded instance names.
739 @type lu: L{LogicalUnit}
740 @param lu: the logical unit on whose behalf we execute
741 @type instances: list
742 @param instances: list of instance names or None for all instances
744 @return: the list of instances, sorted
745 @raise errors.OpPrereqError: if the instances parameter is wrong type
746 @raise errors.OpPrereqError: if any of the passed instances is not found
750 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
752 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
756 def _GetUpdatedParams(old_params, update_dict,
757 use_default=True, use_none=False):
758 """Return the new version of a parameter dictionary.
760 @type old_params: dict
761 @param old_params: old parameters
762 @type update_dict: dict
763 @param update_dict: dict containing new parameter values, or
764 constants.VALUE_DEFAULT to reset the parameter to its default
766 @param use_default: boolean
767 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
768 values as 'to be deleted' values
769 @param use_none: boolean
770 @type use_none: whether to recognise C{None} values as 'to be
773 @return: the new parameter dictionary
776 params_copy = copy.deepcopy(old_params)
777 for key, val in update_dict.iteritems():
778 if ((use_default and val == constants.VALUE_DEFAULT) or
779 (use_none and val is None)):
785 params_copy[key] = val
789 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
790 """Return the new version of a instance policy.
792 @param group_policy: whether this policy applies to a group and thus
793 we should support removal of policy entries
796 use_none = use_default = group_policy
797 ipolicy = copy.deepcopy(old_ipolicy)
798 for key, value in new_ipolicy.items():
799 if key not in constants.IPOLICY_ALL_KEYS:
800 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
802 if key in constants.IPOLICY_ISPECS:
803 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
804 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
806 use_default=use_default)
808 if (not value or value == [constants.VALUE_DEFAULT] or
809 value == constants.VALUE_DEFAULT):
813 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
814 " on the cluster'" % key,
817 if key in constants.IPOLICY_PARAMETERS:
818 # FIXME: we assume all such values are float
820 ipolicy[key] = float(value)
821 except (TypeError, ValueError), err:
822 raise errors.OpPrereqError("Invalid value for attribute"
823 " '%s': '%s', error: %s" %
824 (key, value, err), errors.ECODE_INVAL)
826 # FIXME: we assume all others are lists; this should be redone
828 ipolicy[key] = list(value)
830 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
831 except errors.ConfigurationError, err:
832 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
837 def _UpdateAndVerifySubDict(base, updates, type_check):
838 """Updates and verifies a dict with sub dicts of the same type.
840 @param base: The dict with the old data
841 @param updates: The dict with the new data
842 @param type_check: Dict suitable to ForceDictType to verify correct types
843 @returns: A new dict with updated and verified values
847 new = _GetUpdatedParams(old, value)
848 utils.ForceDictType(new, type_check)
851 ret = copy.deepcopy(base)
852 ret.update(dict((key, fn(base.get(key, {}), value))
853 for key, value in updates.items()))
857 def _MergeAndVerifyHvState(op_input, obj_input):
858 """Combines the hv state from an opcode with the one of the object
860 @param op_input: The input dict from the opcode
861 @param obj_input: The input dict from the objects
862 @return: The verified and updated dict
866 invalid_hvs = set(op_input) - constants.HYPER_TYPES
868 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
869 " %s" % utils.CommaJoin(invalid_hvs),
871 if obj_input is None:
873 type_check = constants.HVSTS_PARAMETER_TYPES
874 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
879 def _MergeAndVerifyDiskState(op_input, obj_input):
880 """Combines the disk state from an opcode with the one of the object
882 @param op_input: The input dict from the opcode
883 @param obj_input: The input dict from the objects
884 @return: The verified and updated dict
887 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
889 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
890 utils.CommaJoin(invalid_dst),
892 type_check = constants.DSS_PARAMETER_TYPES
893 if obj_input is None:
895 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
897 for key, value in op_input.items())
902 def _ReleaseLocks(lu, level, names=None, keep=None):
903 """Releases locks owned by an LU.
905 @type lu: L{LogicalUnit}
906 @param level: Lock level
907 @type names: list or None
908 @param names: Names of locks to release
909 @type keep: list or None
910 @param keep: Names of locks to retain
913 assert not (keep is not None and names is not None), \
914 "Only one of the 'names' and the 'keep' parameters can be given"
916 if names is not None:
917 should_release = names.__contains__
919 should_release = lambda name: name not in keep
921 should_release = None
923 owned = lu.owned_locks(level)
925 # Not owning any lock at this level, do nothing
932 # Determine which locks to release
934 if should_release(name):
939 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
941 # Release just some locks
942 lu.glm.release(level, names=release)
944 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
947 lu.glm.release(level)
949 assert not lu.glm.is_owned(level), "No locks should be owned"
952 def _MapInstanceDisksToNodes(instances):
953 """Creates a map from (node, volume) to instance name.
955 @type instances: list of L{objects.Instance}
956 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
959 return dict(((node, vol), inst.name)
960 for inst in instances
961 for (node, vols) in inst.MapLVsByNode().items()
965 def _RunPostHook(lu, node_name):
966 """Runs the post-hook for an opcode on a single node.
969 hm = lu.proc.BuildHooksManager(lu)
971 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
972 except Exception, err: # pylint: disable=W0703
973 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
976 def _CheckOutputFields(static, dynamic, selected):
977 """Checks whether all selected fields are valid.
979 @type static: L{utils.FieldSet}
980 @param static: static fields set
981 @type dynamic: L{utils.FieldSet}
982 @param dynamic: dynamic fields set
989 delta = f.NonMatching(selected)
991 raise errors.OpPrereqError("Unknown output fields selected: %s"
992 % ",".join(delta), errors.ECODE_INVAL)
995 def _CheckGlobalHvParams(params):
996 """Validates that given hypervisor params are not global ones.
998 This will ensure that instances don't get customised versions of
1002 used_globals = constants.HVC_GLOBALS.intersection(params)
1004 msg = ("The following hypervisor parameters are global and cannot"
1005 " be customized at instance level, please modify them at"
1006 " cluster level: %s" % utils.CommaJoin(used_globals))
1007 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1010 def _CheckNodeOnline(lu, node, msg=None):
1011 """Ensure that a given node is online.
1013 @param lu: the LU on behalf of which we make the check
1014 @param node: the node to check
1015 @param msg: if passed, should be a message to replace the default one
1016 @raise errors.OpPrereqError: if the node is offline
1020 msg = "Can't use offline node"
1021 if lu.cfg.GetNodeInfo(node).offline:
1022 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1025 def _CheckNodeNotDrained(lu, node):
1026 """Ensure that a given node is not drained.
1028 @param lu: the LU on behalf of which we make the check
1029 @param node: the node to check
1030 @raise errors.OpPrereqError: if the node is drained
1033 if lu.cfg.GetNodeInfo(node).drained:
1034 raise errors.OpPrereqError("Can't use drained node %s" % node,
1038 def _CheckNodeVmCapable(lu, node):
1039 """Ensure that a given node is vm capable.
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @raise errors.OpPrereqError: if the node is not vm capable
1046 if not lu.cfg.GetNodeInfo(node).vm_capable:
1047 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1051 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1052 """Ensure that a node supports a given OS.
1054 @param lu: the LU on behalf of which we make the check
1055 @param node: the node to check
1056 @param os_name: the OS to query about
1057 @param force_variant: whether to ignore variant errors
1058 @raise errors.OpPrereqError: if the node is not supporting the OS
1061 result = lu.rpc.call_os_get(node, os_name)
1062 result.Raise("OS '%s' not in supported OS list for node %s" %
1064 prereq=True, ecode=errors.ECODE_INVAL)
1065 if not force_variant:
1066 _CheckOSVariant(result.payload, os_name)
1069 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1070 """Ensure that a node has the given secondary ip.
1072 @type lu: L{LogicalUnit}
1073 @param lu: the LU on behalf of which we make the check
1075 @param node: the node to check
1076 @type secondary_ip: string
1077 @param secondary_ip: the ip to check
1078 @type prereq: boolean
1079 @param prereq: whether to throw a prerequisite or an execute error
1080 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1081 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1084 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1085 result.Raise("Failure checking secondary ip on node %s" % node,
1086 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1087 if not result.payload:
1088 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1089 " please fix and re-run this command" % secondary_ip)
1091 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1093 raise errors.OpExecError(msg)
1096 def _GetClusterDomainSecret():
1097 """Reads the cluster domain secret.
1100 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1104 def _CheckInstanceState(lu, instance, req_states, msg=None):
1105 """Ensure that an instance is in one of the required states.
1107 @param lu: the LU on behalf of which we make the check
1108 @param instance: the instance to check
1109 @param msg: if passed, should be a message to replace the default one
1110 @raise errors.OpPrereqError: if the instance is not in the required state
1114 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1115 if instance.admin_state not in req_states:
1116 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1117 (instance.name, instance.admin_state, msg),
1120 if constants.ADMINST_UP not in req_states:
1121 pnode = instance.primary_node
1122 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1123 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1124 prereq=True, ecode=errors.ECODE_ENVIRON)
1126 if instance.name in ins_l.payload:
1127 raise errors.OpPrereqError("Instance %s is running, %s" %
1128 (instance.name, msg), errors.ECODE_STATE)
1131 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1132 """Computes if value is in the desired range.
1134 @param name: name of the parameter for which we perform the check
1135 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1137 @param ipolicy: dictionary containing min, max and std values
1138 @param value: actual value that we want to use
1139 @return: None or element not meeting the criteria
1143 if value in [None, constants.VALUE_AUTO]:
1145 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1146 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1147 if value > max_v or min_v > value:
1149 fqn = "%s/%s" % (name, qualifier)
1152 return ("%s value %s is not in range [%s, %s]" %
1153 (fqn, value, min_v, max_v))
1157 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1158 nic_count, disk_sizes, spindle_use,
1159 _compute_fn=_ComputeMinMaxSpec):
1160 """Verifies ipolicy against provided specs.
1163 @param ipolicy: The ipolicy
1165 @param mem_size: The memory size
1166 @type cpu_count: int
1167 @param cpu_count: Used cpu cores
1168 @type disk_count: int
1169 @param disk_count: Number of disks used
1170 @type nic_count: int
1171 @param nic_count: Number of nics used
1172 @type disk_sizes: list of ints
1173 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1174 @type spindle_use: int
1175 @param spindle_use: The number of spindles this instance uses
1176 @param _compute_fn: The compute function (unittest only)
1177 @return: A list of violations, or an empty list of no violations are found
1180 assert disk_count == len(disk_sizes)
1183 (constants.ISPEC_MEM_SIZE, "", mem_size),
1184 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1185 (constants.ISPEC_DISK_COUNT, "", disk_count),
1186 (constants.ISPEC_NIC_COUNT, "", nic_count),
1187 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1188 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1189 for idx, d in enumerate(disk_sizes)]
1192 (_compute_fn(name, qualifier, ipolicy, value)
1193 for (name, qualifier, value) in test_settings))
1196 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1197 _compute_fn=_ComputeIPolicySpecViolation):
1198 """Compute if instance meets the specs of ipolicy.
1201 @param ipolicy: The ipolicy to verify against
1202 @type instance: L{objects.Instance}
1203 @param instance: The instance to verify
1204 @param _compute_fn: The function to verify ipolicy (unittest only)
1205 @see: L{_ComputeIPolicySpecViolation}
1208 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1209 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1210 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1211 disk_count = len(instance.disks)
1212 disk_sizes = [disk.size for disk in instance.disks]
1213 nic_count = len(instance.nics)
1215 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216 disk_sizes, spindle_use)
1219 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1220 _compute_fn=_ComputeIPolicySpecViolation):
1221 """Compute if instance specs meets the specs of ipolicy.
1224 @param ipolicy: The ipolicy to verify against
1225 @param instance_spec: dict
1226 @param instance_spec: The instance spec to verify
1227 @param _compute_fn: The function to verify ipolicy (unittest only)
1228 @see: L{_ComputeIPolicySpecViolation}
1231 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1232 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1233 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1234 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1235 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1236 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1238 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1239 disk_sizes, spindle_use)
1242 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1244 _compute_fn=_ComputeIPolicyInstanceViolation):
1245 """Compute if instance meets the specs of the new target group.
1247 @param ipolicy: The ipolicy to verify
1248 @param instance: The instance object to verify
1249 @param current_group: The current group of the instance
1250 @param target_group: The new group of the instance
1251 @param _compute_fn: The function to verify ipolicy (unittest only)
1252 @see: L{_ComputeIPolicySpecViolation}
1255 if current_group == target_group:
1258 return _compute_fn(ipolicy, instance)
1261 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1262 _compute_fn=_ComputeIPolicyNodeViolation):
1263 """Checks that the target node is correct in terms of instance policy.
1265 @param ipolicy: The ipolicy to verify
1266 @param instance: The instance object to verify
1267 @param node: The new node to relocate
1268 @param ignore: Ignore violations of the ipolicy
1269 @param _compute_fn: The function to verify ipolicy (unittest only)
1270 @see: L{_ComputeIPolicySpecViolation}
1273 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1274 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1277 msg = ("Instance does not meet target node group's (%s) instance"
1278 " policy: %s") % (node.group, utils.CommaJoin(res))
1282 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1285 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1286 """Computes a set of any instances that would violate the new ipolicy.
1288 @param old_ipolicy: The current (still in-place) ipolicy
1289 @param new_ipolicy: The new (to become) ipolicy
1290 @param instances: List of instances to verify
1291 @return: A list of instances which violates the new ipolicy but
1295 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1296 _ComputeViolatingInstances(old_ipolicy, instances))
1299 def _ExpandItemName(fn, name, kind):
1300 """Expand an item name.
1302 @param fn: the function to use for expansion
1303 @param name: requested item name
1304 @param kind: text description ('Node' or 'Instance')
1305 @return: the resolved (full) name
1306 @raise errors.OpPrereqError: if the item is not found
1309 full_name = fn(name)
1310 if full_name is None:
1311 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1316 def _ExpandNodeName(cfg, name):
1317 """Wrapper over L{_ExpandItemName} for nodes."""
1318 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1321 def _ExpandInstanceName(cfg, name):
1322 """Wrapper over L{_ExpandItemName} for instance."""
1323 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1326 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1327 minmem, maxmem, vcpus, nics, disk_template, disks,
1328 bep, hvp, hypervisor_name, tags):
1329 """Builds instance related env variables for hooks
1331 This builds the hook environment from individual variables.
1334 @param name: the name of the instance
1335 @type primary_node: string
1336 @param primary_node: the name of the instance's primary node
1337 @type secondary_nodes: list
1338 @param secondary_nodes: list of secondary nodes as strings
1339 @type os_type: string
1340 @param os_type: the name of the instance's OS
1341 @type status: string
1342 @param status: the desired status of the instance
1343 @type minmem: string
1344 @param minmem: the minimum memory size of the instance
1345 @type maxmem: string
1346 @param maxmem: the maximum memory size of the instance
1348 @param vcpus: the count of VCPUs the instance has
1350 @param nics: list of tuples (ip, mac, mode, link) representing
1351 the NICs the instance has
1352 @type disk_template: string
1353 @param disk_template: the disk template of the instance
1355 @param disks: the list of (size, mode) pairs
1357 @param bep: the backend parameters for the instance
1359 @param hvp: the hypervisor parameters for the instance
1360 @type hypervisor_name: string
1361 @param hypervisor_name: the hypervisor for the instance
1363 @param tags: list of instance tags as strings
1365 @return: the hook environment for this instance
1370 "INSTANCE_NAME": name,
1371 "INSTANCE_PRIMARY": primary_node,
1372 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1373 "INSTANCE_OS_TYPE": os_type,
1374 "INSTANCE_STATUS": status,
1375 "INSTANCE_MINMEM": minmem,
1376 "INSTANCE_MAXMEM": maxmem,
1377 # TODO(2.7) remove deprecated "memory" value
1378 "INSTANCE_MEMORY": maxmem,
1379 "INSTANCE_VCPUS": vcpus,
1380 "INSTANCE_DISK_TEMPLATE": disk_template,
1381 "INSTANCE_HYPERVISOR": hypervisor_name,
1384 nic_count = len(nics)
1385 for idx, (ip, mac, mode, link) in enumerate(nics):
1388 env["INSTANCE_NIC%d_IP" % idx] = ip
1389 env["INSTANCE_NIC%d_MAC" % idx] = mac
1390 env["INSTANCE_NIC%d_MODE" % idx] = mode
1391 env["INSTANCE_NIC%d_LINK" % idx] = link
1392 if mode == constants.NIC_MODE_BRIDGED:
1393 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1397 env["INSTANCE_NIC_COUNT"] = nic_count
1400 disk_count = len(disks)
1401 for idx, (size, mode) in enumerate(disks):
1402 env["INSTANCE_DISK%d_SIZE" % idx] = size
1403 env["INSTANCE_DISK%d_MODE" % idx] = mode
1407 env["INSTANCE_DISK_COUNT"] = disk_count
1412 env["INSTANCE_TAGS"] = " ".join(tags)
1414 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1415 for key, value in source.items():
1416 env["INSTANCE_%s_%s" % (kind, key)] = value
1421 def _NICListToTuple(lu, nics):
1422 """Build a list of nic information tuples.
1424 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1425 value in LUInstanceQueryData.
1427 @type lu: L{LogicalUnit}
1428 @param lu: the logical unit on whose behalf we execute
1429 @type nics: list of L{objects.NIC}
1430 @param nics: list of nics to convert to hooks tuples
1434 cluster = lu.cfg.GetClusterInfo()
1438 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1439 mode = filled_params[constants.NIC_MODE]
1440 link = filled_params[constants.NIC_LINK]
1441 hooks_nics.append((ip, mac, mode, link))
1445 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1446 """Builds instance related env variables for hooks from an object.
1448 @type lu: L{LogicalUnit}
1449 @param lu: the logical unit on whose behalf we execute
1450 @type instance: L{objects.Instance}
1451 @param instance: the instance for which we should build the
1453 @type override: dict
1454 @param override: dictionary with key/values that will override
1457 @return: the hook environment dictionary
1460 cluster = lu.cfg.GetClusterInfo()
1461 bep = cluster.FillBE(instance)
1462 hvp = cluster.FillHV(instance)
1464 "name": instance.name,
1465 "primary_node": instance.primary_node,
1466 "secondary_nodes": instance.secondary_nodes,
1467 "os_type": instance.os,
1468 "status": instance.admin_state,
1469 "maxmem": bep[constants.BE_MAXMEM],
1470 "minmem": bep[constants.BE_MINMEM],
1471 "vcpus": bep[constants.BE_VCPUS],
1472 "nics": _NICListToTuple(lu, instance.nics),
1473 "disk_template": instance.disk_template,
1474 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1477 "hypervisor_name": instance.hypervisor,
1478 "tags": instance.tags,
1481 args.update(override)
1482 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1485 def _AdjustCandidatePool(lu, exceptions):
1486 """Adjust the candidate pool after node operations.
1489 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1491 lu.LogInfo("Promoted nodes to master candidate role: %s",
1492 utils.CommaJoin(node.name for node in mod_list))
1493 for name in mod_list:
1494 lu.context.ReaddNode(name)
1495 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1497 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1501 def _DecideSelfPromotion(lu, exceptions=None):
1502 """Decide whether I should promote myself as a master candidate.
1505 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1506 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1507 # the new node will increase mc_max with one, so:
1508 mc_should = min(mc_should + 1, cp_size)
1509 return mc_now < mc_should
1512 def _CalculateGroupIPolicy(cluster, group):
1513 """Calculate instance policy for group.
1516 return cluster.SimpleFillIPolicy(group.ipolicy)
1519 def _ComputeViolatingInstances(ipolicy, instances):
1520 """Computes a set of instances who violates given ipolicy.
1522 @param ipolicy: The ipolicy to verify
1523 @type instances: object.Instance
1524 @param instances: List of instances to verify
1525 @return: A frozenset of instance names violating the ipolicy
1528 return frozenset([inst.name for inst in instances
1529 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1532 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1533 """Check that the brigdes needed by a list of nics exist.
1536 cluster = lu.cfg.GetClusterInfo()
1537 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1538 brlist = [params[constants.NIC_LINK] for params in paramslist
1539 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1541 result = lu.rpc.call_bridges_exist(target_node, brlist)
1542 result.Raise("Error checking bridges on destination node '%s'" %
1543 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1546 def _CheckInstanceBridgesExist(lu, instance, node=None):
1547 """Check that the brigdes needed by an instance exist.
1551 node = instance.primary_node
1552 _CheckNicsBridgesExist(lu, instance.nics, node)
1555 def _CheckOSVariant(os_obj, name):
1556 """Check whether an OS name conforms to the os variants specification.
1558 @type os_obj: L{objects.OS}
1559 @param os_obj: OS object to check
1561 @param name: OS name passed by the user, to check for validity
1564 variant = objects.OS.GetVariant(name)
1565 if not os_obj.supported_variants:
1567 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1568 " passed)" % (os_obj.name, variant),
1572 raise errors.OpPrereqError("OS name must include a variant",
1575 if variant not in os_obj.supported_variants:
1576 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1579 def _GetNodeInstancesInner(cfg, fn):
1580 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1583 def _GetNodeInstances(cfg, node_name):
1584 """Returns a list of all primary and secondary instances on a node.
1588 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1591 def _GetNodePrimaryInstances(cfg, node_name):
1592 """Returns primary instances on a node.
1595 return _GetNodeInstancesInner(cfg,
1596 lambda inst: node_name == inst.primary_node)
1599 def _GetNodeSecondaryInstances(cfg, node_name):
1600 """Returns secondary instances on a node.
1603 return _GetNodeInstancesInner(cfg,
1604 lambda inst: node_name in inst.secondary_nodes)
1607 def _GetStorageTypeArgs(cfg, storage_type):
1608 """Returns the arguments for a storage type.
1611 # Special case for file storage
1612 if storage_type == constants.ST_FILE:
1613 # storage.FileStorage wants a list of storage directories
1614 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1619 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1622 for dev in instance.disks:
1623 cfg.SetDiskID(dev, node_name)
1625 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1627 result.Raise("Failed to get disk status from node %s" % node_name,
1628 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1630 for idx, bdev_status in enumerate(result.payload):
1631 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1637 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1638 """Check the sanity of iallocator and node arguments and use the
1639 cluster-wide iallocator if appropriate.
1641 Check that at most one of (iallocator, node) is specified. If none is
1642 specified, then the LU's opcode's iallocator slot is filled with the
1643 cluster-wide default iallocator.
1645 @type iallocator_slot: string
1646 @param iallocator_slot: the name of the opcode iallocator slot
1647 @type node_slot: string
1648 @param node_slot: the name of the opcode target node slot
1651 node = getattr(lu.op, node_slot, None)
1652 iallocator = getattr(lu.op, iallocator_slot, None)
1654 if node is not None and iallocator is not None:
1655 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1657 elif node is None and iallocator is None:
1658 default_iallocator = lu.cfg.GetDefaultIAllocator()
1659 if default_iallocator:
1660 setattr(lu.op, iallocator_slot, default_iallocator)
1662 raise errors.OpPrereqError("No iallocator or node given and no"
1663 " cluster-wide default iallocator found;"
1664 " please specify either an iallocator or a"
1665 " node, or set a cluster-wide default"
1669 def _GetDefaultIAllocator(cfg, iallocator):
1670 """Decides on which iallocator to use.
1672 @type cfg: L{config.ConfigWriter}
1673 @param cfg: Cluster configuration object
1674 @type iallocator: string or None
1675 @param iallocator: Iallocator specified in opcode
1677 @return: Iallocator name
1681 # Use default iallocator
1682 iallocator = cfg.GetDefaultIAllocator()
1685 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1686 " opcode nor as a cluster-wide default",
1692 class LUClusterPostInit(LogicalUnit):
1693 """Logical unit for running hooks after cluster initialization.
1696 HPATH = "cluster-init"
1697 HTYPE = constants.HTYPE_CLUSTER
1699 def BuildHooksEnv(self):
1704 "OP_TARGET": self.cfg.GetClusterName(),
1707 def BuildHooksNodes(self):
1708 """Build hooks nodes.
1711 return ([], [self.cfg.GetMasterNode()])
1713 def Exec(self, feedback_fn):
1720 class LUClusterDestroy(LogicalUnit):
1721 """Logical unit for destroying the cluster.
1724 HPATH = "cluster-destroy"
1725 HTYPE = constants.HTYPE_CLUSTER
1727 def BuildHooksEnv(self):
1732 "OP_TARGET": self.cfg.GetClusterName(),
1735 def BuildHooksNodes(self):
1736 """Build hooks nodes.
1741 def CheckPrereq(self):
1742 """Check prerequisites.
1744 This checks whether the cluster is empty.
1746 Any errors are signaled by raising errors.OpPrereqError.
1749 master = self.cfg.GetMasterNode()
1751 nodelist = self.cfg.GetNodeList()
1752 if len(nodelist) != 1 or nodelist[0] != master:
1753 raise errors.OpPrereqError("There are still %d node(s) in"
1754 " this cluster." % (len(nodelist) - 1),
1756 instancelist = self.cfg.GetInstanceList()
1758 raise errors.OpPrereqError("There are still %d instance(s) in"
1759 " this cluster." % len(instancelist),
1762 def Exec(self, feedback_fn):
1763 """Destroys the cluster.
1766 master_params = self.cfg.GetMasterNetworkParameters()
1768 # Run post hooks on master node before it's removed
1769 _RunPostHook(self, master_params.name)
1771 ems = self.cfg.GetUseExternalMipScript()
1772 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1775 self.LogWarning("Error disabling the master IP address: %s",
1778 return master_params.name
1781 def _VerifyCertificate(filename):
1782 """Verifies a certificate for L{LUClusterVerifyConfig}.
1784 @type filename: string
1785 @param filename: Path to PEM file
1789 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1790 utils.ReadFile(filename))
1791 except Exception, err: # pylint: disable=W0703
1792 return (LUClusterVerifyConfig.ETYPE_ERROR,
1793 "Failed to load X509 certificate %s: %s" % (filename, err))
1796 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1797 constants.SSL_CERT_EXPIRATION_ERROR)
1800 fnamemsg = "While verifying %s: %s" % (filename, msg)
1805 return (None, fnamemsg)
1806 elif errcode == utils.CERT_WARNING:
1807 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1808 elif errcode == utils.CERT_ERROR:
1809 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1811 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1814 def _GetAllHypervisorParameters(cluster, instances):
1815 """Compute the set of all hypervisor parameters.
1817 @type cluster: L{objects.Cluster}
1818 @param cluster: the cluster object
1819 @param instances: list of L{objects.Instance}
1820 @param instances: additional instances from which to obtain parameters
1821 @rtype: list of (origin, hypervisor, parameters)
1822 @return: a list with all parameters found, indicating the hypervisor they
1823 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1828 for hv_name in cluster.enabled_hypervisors:
1829 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1831 for os_name, os_hvp in cluster.os_hvp.items():
1832 for hv_name, hv_params in os_hvp.items():
1834 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1835 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1837 # TODO: collapse identical parameter values in a single one
1838 for instance in instances:
1839 if instance.hvparams:
1840 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1841 cluster.FillHV(instance)))
1846 class _VerifyErrors(object):
1847 """Mix-in for cluster/group verify LUs.
1849 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1850 self.op and self._feedback_fn to be available.)
1854 ETYPE_FIELD = "code"
1855 ETYPE_ERROR = "ERROR"
1856 ETYPE_WARNING = "WARNING"
1858 def _Error(self, ecode, item, msg, *args, **kwargs):
1859 """Format an error message.
1861 Based on the opcode's error_codes parameter, either format a
1862 parseable error code, or a simpler error string.
1864 This must be called only from Exec and functions called from Exec.
1867 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1868 itype, etxt, _ = ecode
1869 # first complete the msg
1872 # then format the whole message
1873 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1874 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1880 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1881 # and finally report it via the feedback_fn
1882 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1884 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1885 """Log an error message if the passed condition is True.
1889 or self.op.debug_simulate_errors) # pylint: disable=E1101
1891 # If the error code is in the list of ignored errors, demote the error to a
1893 (_, etxt, _) = ecode
1894 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1895 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1898 self._Error(ecode, *args, **kwargs)
1900 # do not mark the operation as failed for WARN cases only
1901 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1902 self.bad = self.bad or cond
1905 class LUClusterVerify(NoHooksLU):
1906 """Submits all jobs necessary to verify the cluster.
1911 def ExpandNames(self):
1912 self.needed_locks = {}
1914 def Exec(self, feedback_fn):
1917 if self.op.group_name:
1918 groups = [self.op.group_name]
1919 depends_fn = lambda: None
1921 groups = self.cfg.GetNodeGroupList()
1923 # Verify global configuration
1925 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1928 # Always depend on global verification
1929 depends_fn = lambda: [(-len(jobs), [])]
1931 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1932 ignore_errors=self.op.ignore_errors,
1933 depends=depends_fn())]
1934 for group in groups)
1936 # Fix up all parameters
1937 for op in itertools.chain(*jobs): # pylint: disable=W0142
1938 op.debug_simulate_errors = self.op.debug_simulate_errors
1939 op.verbose = self.op.verbose
1940 op.error_codes = self.op.error_codes
1942 op.skip_checks = self.op.skip_checks
1943 except AttributeError:
1944 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1946 return ResultWithJobs(jobs)
1949 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1950 """Verifies the cluster config.
1955 def _VerifyHVP(self, hvp_data):
1956 """Verifies locally the syntax of the hypervisor parameters.
1959 for item, hv_name, hv_params in hvp_data:
1960 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1963 hv_class = hypervisor.GetHypervisor(hv_name)
1964 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1965 hv_class.CheckParameterSyntax(hv_params)
1966 except errors.GenericError, err:
1967 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1969 def ExpandNames(self):
1970 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1971 self.share_locks = _ShareAll()
1973 def CheckPrereq(self):
1974 """Check prerequisites.
1977 # Retrieve all information
1978 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1979 self.all_node_info = self.cfg.GetAllNodesInfo()
1980 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1982 def Exec(self, feedback_fn):
1983 """Verify integrity of cluster, performing various test on nodes.
1987 self._feedback_fn = feedback_fn
1989 feedback_fn("* Verifying cluster config")
1991 for msg in self.cfg.VerifyConfig():
1992 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1994 feedback_fn("* Verifying cluster certificate files")
1996 for cert_filename in constants.ALL_CERT_FILES:
1997 (errcode, msg) = _VerifyCertificate(cert_filename)
1998 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2000 feedback_fn("* Verifying hypervisor parameters")
2002 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2003 self.all_inst_info.values()))
2005 feedback_fn("* Verifying all nodes belong to an existing group")
2007 # We do this verification here because, should this bogus circumstance
2008 # occur, it would never be caught by VerifyGroup, which only acts on
2009 # nodes/instances reachable from existing node groups.
2011 dangling_nodes = set(node.name for node in self.all_node_info.values()
2012 if node.group not in self.all_group_info)
2014 dangling_instances = {}
2015 no_node_instances = []
2017 for inst in self.all_inst_info.values():
2018 if inst.primary_node in dangling_nodes:
2019 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2020 elif inst.primary_node not in self.all_node_info:
2021 no_node_instances.append(inst.name)
2026 utils.CommaJoin(dangling_instances.get(node.name,
2028 for node in dangling_nodes]
2030 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2032 "the following nodes (and their instances) belong to a non"
2033 " existing group: %s", utils.CommaJoin(pretty_dangling))
2035 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2037 "the following instances have a non-existing primary-node:"
2038 " %s", utils.CommaJoin(no_node_instances))
2043 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2044 """Verifies the status of a node group.
2047 HPATH = "cluster-verify"
2048 HTYPE = constants.HTYPE_CLUSTER
2051 _HOOKS_INDENT_RE = re.compile("^", re.M)
2053 class NodeImage(object):
2054 """A class representing the logical and physical status of a node.
2057 @ivar name: the node name to which this object refers
2058 @ivar volumes: a structure as returned from
2059 L{ganeti.backend.GetVolumeList} (runtime)
2060 @ivar instances: a list of running instances (runtime)
2061 @ivar pinst: list of configured primary instances (config)
2062 @ivar sinst: list of configured secondary instances (config)
2063 @ivar sbp: dictionary of {primary-node: list of instances} for all
2064 instances for which this node is secondary (config)
2065 @ivar mfree: free memory, as reported by hypervisor (runtime)
2066 @ivar dfree: free disk, as reported by the node (runtime)
2067 @ivar offline: the offline status (config)
2068 @type rpc_fail: boolean
2069 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2070 not whether the individual keys were correct) (runtime)
2071 @type lvm_fail: boolean
2072 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2073 @type hyp_fail: boolean
2074 @ivar hyp_fail: whether the RPC call didn't return the instance list
2075 @type ghost: boolean
2076 @ivar ghost: whether this is a known node or not (config)
2077 @type os_fail: boolean
2078 @ivar os_fail: whether the RPC call didn't return valid OS data
2080 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2081 @type vm_capable: boolean
2082 @ivar vm_capable: whether the node can host instances
2085 def __init__(self, offline=False, name=None, vm_capable=True):
2094 self.offline = offline
2095 self.vm_capable = vm_capable
2096 self.rpc_fail = False
2097 self.lvm_fail = False
2098 self.hyp_fail = False
2100 self.os_fail = False
2103 def ExpandNames(self):
2104 # This raises errors.OpPrereqError on its own:
2105 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2107 # Get instances in node group; this is unsafe and needs verification later
2109 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2111 self.needed_locks = {
2112 locking.LEVEL_INSTANCE: inst_names,
2113 locking.LEVEL_NODEGROUP: [self.group_uuid],
2114 locking.LEVEL_NODE: [],
2117 self.share_locks = _ShareAll()
2119 def DeclareLocks(self, level):
2120 if level == locking.LEVEL_NODE:
2121 # Get members of node group; this is unsafe and needs verification later
2122 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2124 all_inst_info = self.cfg.GetAllInstancesInfo()
2126 # In Exec(), we warn about mirrored instances that have primary and
2127 # secondary living in separate node groups. To fully verify that
2128 # volumes for these instances are healthy, we will need to do an
2129 # extra call to their secondaries. We ensure here those nodes will
2131 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2132 # Important: access only the instances whose lock is owned
2133 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2134 nodes.update(all_inst_info[inst].secondary_nodes)
2136 self.needed_locks[locking.LEVEL_NODE] = nodes
2138 def CheckPrereq(self):
2139 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2140 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2142 group_nodes = set(self.group_info.members)
2144 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2147 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2149 unlocked_instances = \
2150 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2153 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2154 utils.CommaJoin(unlocked_nodes),
2157 if unlocked_instances:
2158 raise errors.OpPrereqError("Missing lock for instances: %s" %
2159 utils.CommaJoin(unlocked_instances),
2162 self.all_node_info = self.cfg.GetAllNodesInfo()
2163 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2165 self.my_node_names = utils.NiceSort(group_nodes)
2166 self.my_inst_names = utils.NiceSort(group_instances)
2168 self.my_node_info = dict((name, self.all_node_info[name])
2169 for name in self.my_node_names)
2171 self.my_inst_info = dict((name, self.all_inst_info[name])
2172 for name in self.my_inst_names)
2174 # We detect here the nodes that will need the extra RPC calls for verifying
2175 # split LV volumes; they should be locked.
2176 extra_lv_nodes = set()
2178 for inst in self.my_inst_info.values():
2179 if inst.disk_template in constants.DTS_INT_MIRROR:
2180 for nname in inst.all_nodes:
2181 if self.all_node_info[nname].group != self.group_uuid:
2182 extra_lv_nodes.add(nname)
2184 unlocked_lv_nodes = \
2185 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2187 if unlocked_lv_nodes:
2188 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2189 utils.CommaJoin(unlocked_lv_nodes),
2191 self.extra_lv_nodes = list(extra_lv_nodes)
2193 def _VerifyNode(self, ninfo, nresult):
2194 """Perform some basic validation on data returned from a node.
2196 - check the result data structure is well formed and has all the
2198 - check ganeti version
2200 @type ninfo: L{objects.Node}
2201 @param ninfo: the node to check
2202 @param nresult: the results from the node
2204 @return: whether overall this call was successful (and we can expect
2205 reasonable values in the respose)
2209 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2211 # main result, nresult should be a non-empty dict
2212 test = not nresult or not isinstance(nresult, dict)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "unable to verify node: no data returned")
2218 # compares ganeti version
2219 local_version = constants.PROTOCOL_VERSION
2220 remote_version = nresult.get("version", None)
2221 test = not (remote_version and
2222 isinstance(remote_version, (list, tuple)) and
2223 len(remote_version) == 2)
2224 _ErrorIf(test, constants.CV_ENODERPC, node,
2225 "connection to node returned invalid data")
2229 test = local_version != remote_version[0]
2230 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2231 "incompatible protocol versions: master %s,"
2232 " node %s", local_version, remote_version[0])
2236 # node seems compatible, we can actually try to look into its results
2238 # full package version
2239 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2240 constants.CV_ENODEVERSION, node,
2241 "software version mismatch: master %s, node %s",
2242 constants.RELEASE_VERSION, remote_version[1],
2243 code=self.ETYPE_WARNING)
2245 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2246 if ninfo.vm_capable and isinstance(hyp_result, dict):
2247 for hv_name, hv_result in hyp_result.iteritems():
2248 test = hv_result is not None
2249 _ErrorIf(test, constants.CV_ENODEHV, node,
2250 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2252 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2253 if ninfo.vm_capable and isinstance(hvp_result, list):
2254 for item, hv_name, hv_result in hvp_result:
2255 _ErrorIf(True, constants.CV_ENODEHV, node,
2256 "hypervisor %s parameter verify failure (source %s): %s",
2257 hv_name, item, hv_result)
2259 test = nresult.get(constants.NV_NODESETUP,
2260 ["Missing NODESETUP results"])
2261 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2266 def _VerifyNodeTime(self, ninfo, nresult,
2267 nvinfo_starttime, nvinfo_endtime):
2268 """Check the node time.
2270 @type ninfo: L{objects.Node}
2271 @param ninfo: the node to check
2272 @param nresult: the remote results for the node
2273 @param nvinfo_starttime: the start time of the RPC call
2274 @param nvinfo_endtime: the end time of the RPC call
2278 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2280 ntime = nresult.get(constants.NV_TIME, None)
2282 ntime_merged = utils.MergeTime(ntime)
2283 except (ValueError, TypeError):
2284 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2287 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2288 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2289 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2290 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2294 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2295 "Node time diverges by at least %s from master node time",
2298 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2299 """Check the node LVM results.
2301 @type ninfo: L{objects.Node}
2302 @param ninfo: the node to check
2303 @param nresult: the remote results for the node
2304 @param vg_name: the configured VG name
2311 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2313 # checks vg existence and size > 20G
2314 vglist = nresult.get(constants.NV_VGLIST, None)
2316 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2318 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2319 constants.MIN_VG_SIZE)
2320 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2323 pvlist = nresult.get(constants.NV_PVLIST, None)
2324 test = pvlist is None
2325 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2327 # check that ':' is not present in PV names, since it's a
2328 # special character for lvcreate (denotes the range of PEs to
2330 for _, pvname, owner_vg in pvlist:
2331 test = ":" in pvname
2332 _ErrorIf(test, constants.CV_ENODELVM, node,
2333 "Invalid character ':' in PV '%s' of VG '%s'",
2336 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2337 """Check the node bridges.
2339 @type ninfo: L{objects.Node}
2340 @param ninfo: the node to check
2341 @param nresult: the remote results for the node
2342 @param bridges: the expected list of bridges
2349 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2351 missing = nresult.get(constants.NV_BRIDGES, None)
2352 test = not isinstance(missing, list)
2353 _ErrorIf(test, constants.CV_ENODENET, node,
2354 "did not return valid bridge information")
2356 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2357 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2359 def _VerifyNodeUserScripts(self, ninfo, nresult):
2360 """Check the results of user scripts presence and executability on the node
2362 @type ninfo: L{objects.Node}
2363 @param ninfo: the node to check
2364 @param nresult: the remote results for the node
2369 test = not constants.NV_USERSCRIPTS in nresult
2370 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2371 "did not return user scripts information")
2373 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2375 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2376 "user scripts not present or not executable: %s" %
2377 utils.CommaJoin(sorted(broken_scripts)))
2379 def _VerifyNodeNetwork(self, ninfo, nresult):
2380 """Check the node network connectivity results.
2382 @type ninfo: L{objects.Node}
2383 @param ninfo: the node to check
2384 @param nresult: the remote results for the node
2388 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2390 test = constants.NV_NODELIST not in nresult
2391 _ErrorIf(test, constants.CV_ENODESSH, node,
2392 "node hasn't returned node ssh connectivity data")
2394 if nresult[constants.NV_NODELIST]:
2395 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2396 _ErrorIf(True, constants.CV_ENODESSH, node,
2397 "ssh communication with node '%s': %s", a_node, a_msg)
2399 test = constants.NV_NODENETTEST not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node tcp connectivity data")
2403 if nresult[constants.NV_NODENETTEST]:
2404 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2406 _ErrorIf(True, constants.CV_ENODENET, node,
2407 "tcp communication with node '%s': %s",
2408 anode, nresult[constants.NV_NODENETTEST][anode])
2410 test = constants.NV_MASTERIP not in nresult
2411 _ErrorIf(test, constants.CV_ENODENET, node,
2412 "node hasn't returned node master IP reachability data")
2414 if not nresult[constants.NV_MASTERIP]:
2415 if node == self.master_node:
2416 msg = "the master node cannot reach the master IP (not configured?)"
2418 msg = "cannot reach the master IP"
2419 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2421 def _VerifyInstance(self, instance, instanceconfig, node_image,
2423 """Verify an instance.
2425 This function checks to see if the required block devices are
2426 available on the instance's node.
2429 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2430 node_current = instanceconfig.primary_node
2432 node_vol_should = {}
2433 instanceconfig.MapLVsByNode(node_vol_should)
2435 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2436 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2437 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2439 for node in node_vol_should:
2440 n_img = node_image[node]
2441 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2442 # ignore missing volumes on offline or broken nodes
2444 for volume in node_vol_should[node]:
2445 test = volume not in n_img.volumes
2446 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2447 "volume %s missing on node %s", volume, node)
2449 if instanceconfig.admin_state == constants.ADMINST_UP:
2450 pri_img = node_image[node_current]
2451 test = instance not in pri_img.instances and not pri_img.offline
2452 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2453 "instance not running on its primary node %s",
2456 diskdata = [(nname, success, status, idx)
2457 for (nname, disks) in diskstatus.items()
2458 for idx, (success, status) in enumerate(disks)]
2460 for nname, success, bdev_status, idx in diskdata:
2461 # the 'ghost node' construction in Exec() ensures that we have a
2463 snode = node_image[nname]
2464 bad_snode = snode.ghost or snode.offline
2465 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2466 not success and not bad_snode,
2467 constants.CV_EINSTANCEFAULTYDISK, instance,
2468 "couldn't retrieve status for disk/%s on %s: %s",
2469 idx, nname, bdev_status)
2470 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2471 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2472 constants.CV_EINSTANCEFAULTYDISK, instance,
2473 "disk/%s on %s is faulty", idx, nname)
2475 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2476 """Verify if there are any unknown volumes in the cluster.
2478 The .os, .swap and backup volumes are ignored. All other volumes are
2479 reported as unknown.
2481 @type reserved: L{ganeti.utils.FieldSet}
2482 @param reserved: a FieldSet of reserved volume names
2485 for node, n_img in node_image.items():
2486 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2487 self.all_node_info[node].group != self.group_uuid):
2488 # skip non-healthy nodes
2490 for volume in n_img.volumes:
2491 test = ((node not in node_vol_should or
2492 volume not in node_vol_should[node]) and
2493 not reserved.Matches(volume))
2494 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2495 "volume %s is unknown", volume)
2497 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2498 """Verify N+1 Memory Resilience.
2500 Check that if one single node dies we can still start all the
2501 instances it was primary for.
2504 cluster_info = self.cfg.GetClusterInfo()
2505 for node, n_img in node_image.items():
2506 # This code checks that every node which is now listed as
2507 # secondary has enough memory to host all instances it is
2508 # supposed to should a single other node in the cluster fail.
2509 # FIXME: not ready for failover to an arbitrary node
2510 # FIXME: does not support file-backed instances
2511 # WARNING: we currently take into account down instances as well
2512 # as up ones, considering that even if they're down someone
2513 # might want to start them even in the event of a node failure.
2514 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2515 # we're skipping nodes marked offline and nodes in other groups from
2516 # the N+1 warning, since most likely we don't have good memory
2517 # infromation from them; we already list instances living on such
2518 # nodes, and that's enough warning
2520 #TODO(dynmem): also consider ballooning out other instances
2521 for prinode, instances in n_img.sbp.items():
2523 for instance in instances:
2524 bep = cluster_info.FillBE(instance_cfg[instance])
2525 if bep[constants.BE_AUTO_BALANCE]:
2526 needed_mem += bep[constants.BE_MINMEM]
2527 test = n_img.mfree < needed_mem
2528 self._ErrorIf(test, constants.CV_ENODEN1, node,
2529 "not enough memory to accomodate instance failovers"
2530 " should node %s fail (%dMiB needed, %dMiB available)",
2531 prinode, needed_mem, n_img.mfree)
2534 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2535 (files_all, files_opt, files_mc, files_vm)):
2536 """Verifies file checksums collected from all nodes.
2538 @param errorif: Callback for reporting errors
2539 @param nodeinfo: List of L{objects.Node} objects
2540 @param master_node: Name of master node
2541 @param all_nvinfo: RPC results
2544 # Define functions determining which nodes to consider for a file
2547 (files_mc, lambda node: (node.master_candidate or
2548 node.name == master_node)),
2549 (files_vm, lambda node: node.vm_capable),
2552 # Build mapping from filename to list of nodes which should have the file
2554 for (files, fn) in files2nodefn:
2556 filenodes = nodeinfo
2558 filenodes = filter(fn, nodeinfo)
2559 nodefiles.update((filename,
2560 frozenset(map(operator.attrgetter("name"), filenodes)))
2561 for filename in files)
2563 assert set(nodefiles) == (files_all | files_mc | files_vm)
2565 fileinfo = dict((filename, {}) for filename in nodefiles)
2566 ignore_nodes = set()
2568 for node in nodeinfo:
2570 ignore_nodes.add(node.name)
2573 nresult = all_nvinfo[node.name]
2575 if nresult.fail_msg or not nresult.payload:
2578 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2580 test = not (node_files and isinstance(node_files, dict))
2581 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2582 "Node did not return file checksum data")
2584 ignore_nodes.add(node.name)
2587 # Build per-checksum mapping from filename to nodes having it
2588 for (filename, checksum) in node_files.items():
2589 assert filename in nodefiles
2590 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2592 for (filename, checksums) in fileinfo.items():
2593 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2595 # Nodes having the file
2596 with_file = frozenset(node_name
2597 for nodes in fileinfo[filename].values()
2598 for node_name in nodes) - ignore_nodes
2600 expected_nodes = nodefiles[filename] - ignore_nodes
2602 # Nodes missing file
2603 missing_file = expected_nodes - with_file
2605 if filename in files_opt:
2607 errorif(missing_file and missing_file != expected_nodes,
2608 constants.CV_ECLUSTERFILECHECK, None,
2609 "File %s is optional, but it must exist on all or no"
2610 " nodes (not found on %s)",
2611 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2613 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2614 "File %s is missing from node(s) %s", filename,
2615 utils.CommaJoin(utils.NiceSort(missing_file)))
2617 # Warn if a node has a file it shouldn't
2618 unexpected = with_file - expected_nodes
2620 constants.CV_ECLUSTERFILECHECK, None,
2621 "File %s should not exist on node(s) %s",
2622 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2624 # See if there are multiple versions of the file
2625 test = len(checksums) > 1
2627 variants = ["variant %s on %s" %
2628 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2629 for (idx, (checksum, nodes)) in
2630 enumerate(sorted(checksums.items()))]
2634 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2635 "File %s found with %s different checksums (%s)",
2636 filename, len(checksums), "; ".join(variants))
2638 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2640 """Verifies and the node DRBD status.
2642 @type ninfo: L{objects.Node}
2643 @param ninfo: the node to check
2644 @param nresult: the remote results for the node
2645 @param instanceinfo: the dict of instances
2646 @param drbd_helper: the configured DRBD usermode helper
2647 @param drbd_map: the DRBD map as returned by
2648 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2652 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2655 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2656 test = (helper_result == None)
2657 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2658 "no drbd usermode helper returned")
2660 status, payload = helper_result
2662 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2663 "drbd usermode helper check unsuccessful: %s", payload)
2664 test = status and (payload != drbd_helper)
2665 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2666 "wrong drbd usermode helper: %s", payload)
2668 # compute the DRBD minors
2670 for minor, instance in drbd_map[node].items():
2671 test = instance not in instanceinfo
2672 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2673 "ghost instance '%s' in temporary DRBD map", instance)
2674 # ghost instance should not be running, but otherwise we
2675 # don't give double warnings (both ghost instance and
2676 # unallocated minor in use)
2678 node_drbd[minor] = (instance, False)
2680 instance = instanceinfo[instance]
2681 node_drbd[minor] = (instance.name,
2682 instance.admin_state == constants.ADMINST_UP)
2684 # and now check them
2685 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2686 test = not isinstance(used_minors, (tuple, list))
2687 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2688 "cannot parse drbd status file: %s", str(used_minors))
2690 # we cannot check drbd status
2693 for minor, (iname, must_exist) in node_drbd.items():
2694 test = minor not in used_minors and must_exist
2695 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2696 "drbd minor %d of instance %s is not active", minor, iname)
2697 for minor in used_minors:
2698 test = minor not in node_drbd
2699 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2700 "unallocated drbd minor %d is in use", minor)
2702 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2703 """Builds the node OS structures.
2705 @type ninfo: L{objects.Node}
2706 @param ninfo: the node to check
2707 @param nresult: the remote results for the node
2708 @param nimg: the node image object
2712 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2714 remote_os = nresult.get(constants.NV_OSLIST, None)
2715 test = (not isinstance(remote_os, list) or
2716 not compat.all(isinstance(v, list) and len(v) == 7
2717 for v in remote_os))
2719 _ErrorIf(test, constants.CV_ENODEOS, node,
2720 "node hasn't returned valid OS data")
2729 for (name, os_path, status, diagnose,
2730 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2732 if name not in os_dict:
2735 # parameters is a list of lists instead of list of tuples due to
2736 # JSON lacking a real tuple type, fix it:
2737 parameters = [tuple(v) for v in parameters]
2738 os_dict[name].append((os_path, status, diagnose,
2739 set(variants), set(parameters), set(api_ver)))
2741 nimg.oslist = os_dict
2743 def _VerifyNodeOS(self, ninfo, nimg, base):
2744 """Verifies the node OS list.
2746 @type ninfo: L{objects.Node}
2747 @param ninfo: the node to check
2748 @param nimg: the node image object
2749 @param base: the 'template' node we match against (e.g. from the master)
2753 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2755 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2757 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2758 for os_name, os_data in nimg.oslist.items():
2759 assert os_data, "Empty OS status for OS %s?!" % os_name
2760 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2761 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2762 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2763 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2764 "OS '%s' has multiple entries (first one shadows the rest): %s",
2765 os_name, utils.CommaJoin([v[0] for v in os_data]))
2766 # comparisons with the 'base' image
2767 test = os_name not in base.oslist
2768 _ErrorIf(test, constants.CV_ENODEOS, node,
2769 "Extra OS %s not present on reference node (%s)",
2773 assert base.oslist[os_name], "Base node has empty OS status?"
2774 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2776 # base OS is invalid, skipping
2778 for kind, a, b in [("API version", f_api, b_api),
2779 ("variants list", f_var, b_var),
2780 ("parameters", beautify_params(f_param),
2781 beautify_params(b_param))]:
2782 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2783 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2784 kind, os_name, base.name,
2785 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2787 # check any missing OSes
2788 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2789 _ErrorIf(missing, constants.CV_ENODEOS, node,
2790 "OSes present on reference node %s but missing on this node: %s",
2791 base.name, utils.CommaJoin(missing))
2793 def _VerifyOob(self, ninfo, nresult):
2794 """Verifies out of band functionality of a node.
2796 @type ninfo: L{objects.Node}
2797 @param ninfo: the node to check
2798 @param nresult: the remote results for the node
2802 # We just have to verify the paths on master and/or master candidates
2803 # as the oob helper is invoked on the master
2804 if ((ninfo.master_candidate or ninfo.master_capable) and
2805 constants.NV_OOB_PATHS in nresult):
2806 for path_result in nresult[constants.NV_OOB_PATHS]:
2807 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2809 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2810 """Verifies and updates the node volume data.
2812 This function will update a L{NodeImage}'s internal structures
2813 with data from the remote call.
2815 @type ninfo: L{objects.Node}
2816 @param ninfo: the node to check
2817 @param nresult: the remote results for the node
2818 @param nimg: the node image object
2819 @param vg_name: the configured VG name
2823 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2825 nimg.lvm_fail = True
2826 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2829 elif isinstance(lvdata, basestring):
2830 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2831 utils.SafeEncode(lvdata))
2832 elif not isinstance(lvdata, dict):
2833 _ErrorIf(True, constants.CV_ENODELVM, node,
2834 "rpc call to node failed (lvlist)")
2836 nimg.volumes = lvdata
2837 nimg.lvm_fail = False
2839 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2840 """Verifies and updates the node instance list.
2842 If the listing was successful, then updates this node's instance
2843 list. Otherwise, it marks the RPC call as failed for the instance
2846 @type ninfo: L{objects.Node}
2847 @param ninfo: the node to check
2848 @param nresult: the remote results for the node
2849 @param nimg: the node image object
2852 idata = nresult.get(constants.NV_INSTANCELIST, None)
2853 test = not isinstance(idata, list)
2854 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2855 "rpc call to node failed (instancelist): %s",
2856 utils.SafeEncode(str(idata)))
2858 nimg.hyp_fail = True
2860 nimg.instances = idata
2862 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2863 """Verifies and computes a node information map
2865 @type ninfo: L{objects.Node}
2866 @param ninfo: the node to check
2867 @param nresult: the remote results for the node
2868 @param nimg: the node image object
2869 @param vg_name: the configured VG name
2873 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2875 # try to read free memory (from the hypervisor)
2876 hv_info = nresult.get(constants.NV_HVINFO, None)
2877 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2878 _ErrorIf(test, constants.CV_ENODEHV, node,
2879 "rpc call to node failed (hvinfo)")
2882 nimg.mfree = int(hv_info["memory_free"])
2883 except (ValueError, TypeError):
2884 _ErrorIf(True, constants.CV_ENODERPC, node,
2885 "node returned invalid nodeinfo, check hypervisor")
2887 # FIXME: devise a free space model for file based instances as well
2888 if vg_name is not None:
2889 test = (constants.NV_VGLIST not in nresult or
2890 vg_name not in nresult[constants.NV_VGLIST])
2891 _ErrorIf(test, constants.CV_ENODELVM, node,
2892 "node didn't return data for the volume group '%s'"
2893 " - it is either missing or broken", vg_name)
2896 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2897 except (ValueError, TypeError):
2898 _ErrorIf(True, constants.CV_ENODERPC, node,
2899 "node returned invalid LVM info, check LVM status")
2901 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2902 """Gets per-disk status information for all instances.
2904 @type nodelist: list of strings
2905 @param nodelist: Node names
2906 @type node_image: dict of (name, L{objects.Node})
2907 @param node_image: Node objects
2908 @type instanceinfo: dict of (name, L{objects.Instance})
2909 @param instanceinfo: Instance objects
2910 @rtype: {instance: {node: [(succes, payload)]}}
2911 @return: a dictionary of per-instance dictionaries with nodes as
2912 keys and disk information as values; the disk information is a
2913 list of tuples (success, payload)
2916 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2919 node_disks_devonly = {}
2920 diskless_instances = set()
2921 diskless = constants.DT_DISKLESS
2923 for nname in nodelist:
2924 node_instances = list(itertools.chain(node_image[nname].pinst,
2925 node_image[nname].sinst))
2926 diskless_instances.update(inst for inst in node_instances
2927 if instanceinfo[inst].disk_template == diskless)
2928 disks = [(inst, disk)
2929 for inst in node_instances
2930 for disk in instanceinfo[inst].disks]
2933 # No need to collect data
2936 node_disks[nname] = disks
2938 # _AnnotateDiskParams makes already copies of the disks
2940 for (inst, dev) in disks:
2941 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2942 self.cfg.SetDiskID(anno_disk, nname)
2943 devonly.append(anno_disk)
2945 node_disks_devonly[nname] = devonly
2947 assert len(node_disks) == len(node_disks_devonly)
2949 # Collect data from all nodes with disks
2950 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2953 assert len(result) == len(node_disks)
2957 for (nname, nres) in result.items():
2958 disks = node_disks[nname]
2961 # No data from this node
2962 data = len(disks) * [(False, "node offline")]
2965 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2966 "while getting disk information: %s", msg)
2968 # No data from this node
2969 data = len(disks) * [(False, msg)]
2972 for idx, i in enumerate(nres.payload):
2973 if isinstance(i, (tuple, list)) and len(i) == 2:
2976 logging.warning("Invalid result from node %s, entry %d: %s",
2978 data.append((False, "Invalid result from the remote node"))
2980 for ((inst, _), status) in zip(disks, data):
2981 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2983 # Add empty entries for diskless instances.
2984 for inst in diskless_instances:
2985 assert inst not in instdisk
2988 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2989 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2990 compat.all(isinstance(s, (tuple, list)) and
2991 len(s) == 2 for s in statuses)
2992 for inst, nnames in instdisk.items()
2993 for nname, statuses in nnames.items())
2994 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2999 def _SshNodeSelector(group_uuid, all_nodes):
3000 """Create endless iterators for all potential SSH check hosts.
3003 nodes = [node for node in all_nodes
3004 if (node.group != group_uuid and
3006 keyfunc = operator.attrgetter("group")
3008 return map(itertools.cycle,
3009 [sorted(map(operator.attrgetter("name"), names))
3010 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3014 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3015 """Choose which nodes should talk to which other nodes.
3017 We will make nodes contact all nodes in their group, and one node from
3020 @warning: This algorithm has a known issue if one node group is much
3021 smaller than others (e.g. just one node). In such a case all other
3022 nodes will talk to the single node.
3025 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3026 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3028 return (online_nodes,
3029 dict((name, sorted([i.next() for i in sel]))
3030 for name in online_nodes))
3032 def BuildHooksEnv(self):
3035 Cluster-Verify hooks just ran in the post phase and their failure makes
3036 the output be logged in the verify output and the verification to fail.
3040 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3043 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3044 for node in self.my_node_info.values())
3048 def BuildHooksNodes(self):
3049 """Build hooks nodes.
3052 return ([], self.my_node_names)
3054 def Exec(self, feedback_fn):
3055 """Verify integrity of the node group, performing various test on nodes.
3058 # This method has too many local variables. pylint: disable=R0914
3059 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3061 if not self.my_node_names:
3063 feedback_fn("* Empty node group, skipping verification")
3067 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3068 verbose = self.op.verbose
3069 self._feedback_fn = feedback_fn
3071 vg_name = self.cfg.GetVGName()
3072 drbd_helper = self.cfg.GetDRBDHelper()
3073 cluster = self.cfg.GetClusterInfo()
3074 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3075 hypervisors = cluster.enabled_hypervisors
3076 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3078 i_non_redundant = [] # Non redundant instances
3079 i_non_a_balanced = [] # Non auto-balanced instances
3080 i_offline = 0 # Count of offline instances
3081 n_offline = 0 # Count of offline nodes
3082 n_drained = 0 # Count of nodes being drained
3083 node_vol_should = {}
3085 # FIXME: verify OS list
3088 filemap = _ComputeAncillaryFiles(cluster, False)
3090 # do local checksums
3091 master_node = self.master_node = self.cfg.GetMasterNode()
3092 master_ip = self.cfg.GetMasterIP()
3094 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3097 if self.cfg.GetUseExternalMipScript():
3098 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3100 node_verify_param = {
3101 constants.NV_FILELIST:
3102 utils.UniqueSequence(filename
3103 for files in filemap
3104 for filename in files),
3105 constants.NV_NODELIST:
3106 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3107 self.all_node_info.values()),
3108 constants.NV_HYPERVISOR: hypervisors,
3109 constants.NV_HVPARAMS:
3110 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3111 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3112 for node in node_data_list
3113 if not node.offline],
3114 constants.NV_INSTANCELIST: hypervisors,
3115 constants.NV_VERSION: None,
3116 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3117 constants.NV_NODESETUP: None,
3118 constants.NV_TIME: None,
3119 constants.NV_MASTERIP: (master_node, master_ip),
3120 constants.NV_OSLIST: None,
3121 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3122 constants.NV_USERSCRIPTS: user_scripts,
3125 if vg_name is not None:
3126 node_verify_param[constants.NV_VGLIST] = None
3127 node_verify_param[constants.NV_LVLIST] = vg_name
3128 node_verify_param[constants.NV_PVLIST] = [vg_name]
3131 node_verify_param[constants.NV_DRBDLIST] = None
3132 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3135 # FIXME: this needs to be changed per node-group, not cluster-wide
3137 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3138 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3139 bridges.add(default_nicpp[constants.NIC_LINK])
3140 for instance in self.my_inst_info.values():
3141 for nic in instance.nics:
3142 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3143 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3144 bridges.add(full_nic[constants.NIC_LINK])
3147 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3149 # Build our expected cluster state
3150 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3152 vm_capable=node.vm_capable))
3153 for node in node_data_list)
3157 for node in self.all_node_info.values():
3158 path = _SupportsOob(self.cfg, node)
3159 if path and path not in oob_paths:
3160 oob_paths.append(path)
3163 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3165 for instance in self.my_inst_names:
3166 inst_config = self.my_inst_info[instance]
3167 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3170 for nname in inst_config.all_nodes:
3171 if nname not in node_image:
3172 gnode = self.NodeImage(name=nname)
3173 gnode.ghost = (nname not in self.all_node_info)
3174 node_image[nname] = gnode
3176 inst_config.MapLVsByNode(node_vol_should)
3178 pnode = inst_config.primary_node
3179 node_image[pnode].pinst.append(instance)
3181 for snode in inst_config.secondary_nodes:
3182 nimg = node_image[snode]
3183 nimg.sinst.append(instance)
3184 if pnode not in nimg.sbp:
3185 nimg.sbp[pnode] = []
3186 nimg.sbp[pnode].append(instance)
3188 # At this point, we have the in-memory data structures complete,
3189 # except for the runtime information, which we'll gather next
3191 # Due to the way our RPC system works, exact response times cannot be
3192 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3193 # time before and after executing the request, we can at least have a time
3195 nvinfo_starttime = time.time()
3196 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3198 self.cfg.GetClusterName())
3199 nvinfo_endtime = time.time()
3201 if self.extra_lv_nodes and vg_name is not None:
3203 self.rpc.call_node_verify(self.extra_lv_nodes,
3204 {constants.NV_LVLIST: vg_name},
3205 self.cfg.GetClusterName())
3207 extra_lv_nvinfo = {}
3209 all_drbd_map = self.cfg.ComputeDRBDMap()
3211 feedback_fn("* Gathering disk information (%s nodes)" %
3212 len(self.my_node_names))
3213 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3216 feedback_fn("* Verifying configuration file consistency")
3218 # If not all nodes are being checked, we need to make sure the master node
3219 # and a non-checked vm_capable node are in the list.
3220 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3222 vf_nvinfo = all_nvinfo.copy()
3223 vf_node_info = list(self.my_node_info.values())
3224 additional_nodes = []
3225 if master_node not in self.my_node_info:
3226 additional_nodes.append(master_node)
3227 vf_node_info.append(self.all_node_info[master_node])
3228 # Add the first vm_capable node we find which is not included,
3229 # excluding the master node (which we already have)
3230 for node in absent_nodes:
3231 nodeinfo = self.all_node_info[node]
3232 if (nodeinfo.vm_capable and not nodeinfo.offline and
3233 node != master_node):
3234 additional_nodes.append(node)
3235 vf_node_info.append(self.all_node_info[node])
3237 key = constants.NV_FILELIST
3238 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3239 {key: node_verify_param[key]},
3240 self.cfg.GetClusterName()))
3242 vf_nvinfo = all_nvinfo
3243 vf_node_info = self.my_node_info.values()
3245 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3247 feedback_fn("* Verifying node status")
3251 for node_i in node_data_list:
3253 nimg = node_image[node]
3257 feedback_fn("* Skipping offline node %s" % (node,))
3261 if node == master_node:
3263 elif node_i.master_candidate:
3264 ntype = "master candidate"
3265 elif node_i.drained:
3271 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3273 msg = all_nvinfo[node].fail_msg
3274 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3277 nimg.rpc_fail = True
3280 nresult = all_nvinfo[node].payload
3282 nimg.call_ok = self._VerifyNode(node_i, nresult)
3283 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3284 self._VerifyNodeNetwork(node_i, nresult)
3285 self._VerifyNodeUserScripts(node_i, nresult)
3286 self._VerifyOob(node_i, nresult)
3289 self._VerifyNodeLVM(node_i, nresult, vg_name)
3290 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3293 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3294 self._UpdateNodeInstances(node_i, nresult, nimg)
3295 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3296 self._UpdateNodeOS(node_i, nresult, nimg)
3298 if not nimg.os_fail:
3299 if refos_img is None:
3301 self._VerifyNodeOS(node_i, nimg, refos_img)
3302 self._VerifyNodeBridges(node_i, nresult, bridges)
3304 # Check whether all running instancies are primary for the node. (This
3305 # can no longer be done from _VerifyInstance below, since some of the
3306 # wrong instances could be from other node groups.)
3307 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3309 for inst in non_primary_inst:
3310 test = inst in self.all_inst_info
3311 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3312 "instance should not run on node %s", node_i.name)
3313 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3314 "node is running unknown instance %s", inst)
3316 for node, result in extra_lv_nvinfo.items():
3317 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3318 node_image[node], vg_name)
3320 feedback_fn("* Verifying instance status")
3321 for instance in self.my_inst_names:
3323 feedback_fn("* Verifying instance %s" % instance)
3324 inst_config = self.my_inst_info[instance]
3325 self._VerifyInstance(instance, inst_config, node_image,
3327 inst_nodes_offline = []
3329 pnode = inst_config.primary_node
3330 pnode_img = node_image[pnode]
3331 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3332 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3333 " primary node failed", instance)
3335 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3337 constants.CV_EINSTANCEBADNODE, instance,
3338 "instance is marked as running and lives on offline node %s",
3339 inst_config.primary_node)
3341 # If the instance is non-redundant we cannot survive losing its primary
3342 # node, so we are not N+1 compliant. On the other hand we have no disk
3343 # templates with more than one secondary so that situation is not well
3345 # FIXME: does not support file-backed instances
3346 if not inst_config.secondary_nodes:
3347 i_non_redundant.append(instance)
3349 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3350 constants.CV_EINSTANCELAYOUT,
3351 instance, "instance has multiple secondary nodes: %s",
3352 utils.CommaJoin(inst_config.secondary_nodes),
3353 code=self.ETYPE_WARNING)
3355 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3356 pnode = inst_config.primary_node
3357 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3358 instance_groups = {}
3360 for node in instance_nodes:
3361 instance_groups.setdefault(self.all_node_info[node].group,
3365 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3366 # Sort so that we always list the primary node first.
3367 for group, nodes in sorted(instance_groups.items(),
3368 key=lambda (_, nodes): pnode in nodes,
3371 self._ErrorIf(len(instance_groups) > 1,
3372 constants.CV_EINSTANCESPLITGROUPS,
3373 instance, "instance has primary and secondary nodes in"
3374 " different groups: %s", utils.CommaJoin(pretty_list),
3375 code=self.ETYPE_WARNING)
3377 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3378 i_non_a_balanced.append(instance)
3380 for snode in inst_config.secondary_nodes:
3381 s_img = node_image[snode]
3382 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3383 snode, "instance %s, connection to secondary node failed",
3387 inst_nodes_offline.append(snode)
3389 # warn that the instance lives on offline nodes
3390 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3391 "instance has offline secondary node(s) %s",
3392 utils.CommaJoin(inst_nodes_offline))
3393 # ... or ghost/non-vm_capable nodes
3394 for node in inst_config.all_nodes:
3395 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3396 instance, "instance lives on ghost node %s", node)
3397 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3398 instance, "instance lives on non-vm_capable node %s", node)
3400 feedback_fn("* Verifying orphan volumes")
3401 reserved = utils.FieldSet(*cluster.reserved_lvs)
3403 # We will get spurious "unknown volume" warnings if any node of this group
3404 # is secondary for an instance whose primary is in another group. To avoid
3405 # them, we find these instances and add their volumes to node_vol_should.
3406 for inst in self.all_inst_info.values():
3407 for secondary in inst.secondary_nodes:
3408 if (secondary in self.my_node_info
3409 and inst.name not in self.my_inst_info):
3410 inst.MapLVsByNode(node_vol_should)
3413 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3415 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3416 feedback_fn("* Verifying N+1 Memory redundancy")
3417 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3419 feedback_fn("* Other Notes")
3421 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3422 % len(i_non_redundant))
3424 if i_non_a_balanced:
3425 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3426 % len(i_non_a_balanced))
3429 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3432 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3435 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3439 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3440 """Analyze the post-hooks' result
3442 This method analyses the hook result, handles it, and sends some
3443 nicely-formatted feedback back to the user.
3445 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3446 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3447 @param hooks_results: the results of the multi-node hooks rpc call
3448 @param feedback_fn: function used send feedback back to the caller
3449 @param lu_result: previous Exec result
3450 @return: the new Exec result, based on the previous result
3454 # We only really run POST phase hooks, only for non-empty groups,
3455 # and are only interested in their results
3456 if not self.my_node_names:
3459 elif phase == constants.HOOKS_PHASE_POST:
3460 # Used to change hooks' output to proper indentation
3461 feedback_fn("* Hooks Results")
3462 assert hooks_results, "invalid result from hooks"
3464 for node_name in hooks_results:
3465 res = hooks_results[node_name]
3467 test = msg and not res.offline
3468 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3469 "Communication failure in hooks execution: %s", msg)
3470 if res.offline or msg:
3471 # No need to investigate payload if node is offline or gave
3474 for script, hkr, output in res.payload:
3475 test = hkr == constants.HKR_FAIL
3476 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3477 "Script %s failed, output:", script)
3479 output = self._HOOKS_INDENT_RE.sub(" ", output)
3480 feedback_fn("%s" % output)
3486 class LUClusterVerifyDisks(NoHooksLU):
3487 """Verifies the cluster disks status.
3492 def ExpandNames(self):
3493 self.share_locks = _ShareAll()
3494 self.needed_locks = {
3495 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3498 def Exec(self, feedback_fn):
3499 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3501 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3502 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3503 for group in group_names])
3506 class LUGroupVerifyDisks(NoHooksLU):
3507 """Verifies the status of all disks in a node group.
3512 def ExpandNames(self):
3513 # Raises errors.OpPrereqError on its own if group can't be found
3514 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3516 self.share_locks = _ShareAll()
3517 self.needed_locks = {
3518 locking.LEVEL_INSTANCE: [],
3519 locking.LEVEL_NODEGROUP: [],
3520 locking.LEVEL_NODE: [],
3523 def DeclareLocks(self, level):
3524 if level == locking.LEVEL_INSTANCE:
3525 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3527 # Lock instances optimistically, needs verification once node and group
3528 # locks have been acquired
3529 self.needed_locks[locking.LEVEL_INSTANCE] = \
3530 self.cfg.GetNodeGroupInstances(self.group_uuid)
3532 elif level == locking.LEVEL_NODEGROUP:
3533 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3535 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3536 set([self.group_uuid] +
3537 # Lock all groups used by instances optimistically; this requires
3538 # going via the node before it's locked, requiring verification
3541 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3542 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3544 elif level == locking.LEVEL_NODE:
3545 # This will only lock the nodes in the group to be verified which contain
3547 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3548 self._LockInstancesNodes()
3550 # Lock all nodes in group to be verified
3551 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3552 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3553 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3555 def CheckPrereq(self):
3556 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3557 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3558 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3560 assert self.group_uuid in owned_groups
3562 # Check if locked instances are still correct
3563 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3565 # Get instance information
3566 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3568 # Check if node groups for locked instances are still correct
3569 _CheckInstancesNodeGroups(self.cfg, self.instances,
3570 owned_groups, owned_nodes, self.group_uuid)
3572 def Exec(self, feedback_fn):
3573 """Verify integrity of cluster disks.
3575 @rtype: tuple of three items
3576 @return: a tuple of (dict of node-to-node_error, list of instances
3577 which need activate-disks, dict of instance: (node, volume) for
3582 res_instances = set()
3585 nv_dict = _MapInstanceDisksToNodes([inst
3586 for inst in self.instances.values()
3587 if inst.admin_state == constants.ADMINST_UP])
3590 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3591 set(self.cfg.GetVmCapableNodeList()))
3593 node_lvs = self.rpc.call_lv_list(nodes, [])
3595 for (node, node_res) in node_lvs.items():
3596 if node_res.offline:
3599 msg = node_res.fail_msg
3601 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3602 res_nodes[node] = msg
3605 for lv_name, (_, _, lv_online) in node_res.payload.items():
3606 inst = nv_dict.pop((node, lv_name), None)
3607 if not (lv_online or inst is None):
3608 res_instances.add(inst)
3610 # any leftover items in nv_dict are missing LVs, let's arrange the data
3612 for key, inst in nv_dict.iteritems():
3613 res_missing.setdefault(inst, []).append(list(key))
3615 return (res_nodes, list(res_instances), res_missing)
3618 class LUClusterRepairDiskSizes(NoHooksLU):
3619 """Verifies the cluster disks sizes.
3624 def ExpandNames(self):
3625 if self.op.instances:
3626 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3627 self.needed_locks = {
3628 locking.LEVEL_NODE_RES: [],
3629 locking.LEVEL_INSTANCE: self.wanted_names,
3631 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3633 self.wanted_names = None
3634 self.needed_locks = {
3635 locking.LEVEL_NODE_RES: locking.ALL_SET,
3636 locking.LEVEL_INSTANCE: locking.ALL_SET,
3638 self.share_locks = {
3639 locking.LEVEL_NODE_RES: 1,
3640 locking.LEVEL_INSTANCE: 0,
3643 def DeclareLocks(self, level):
3644 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3645 self._LockInstancesNodes(primary_only=True, level=level)
3647 def CheckPrereq(self):
3648 """Check prerequisites.
3650 This only checks the optional instance list against the existing names.
3653 if self.wanted_names is None:
3654 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3656 self.wanted_instances = \
3657 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3659 def _EnsureChildSizes(self, disk):
3660 """Ensure children of the disk have the needed disk size.
3662 This is valid mainly for DRBD8 and fixes an issue where the
3663 children have smaller disk size.
3665 @param disk: an L{ganeti.objects.Disk} object
3668 if disk.dev_type == constants.LD_DRBD8:
3669 assert disk.children, "Empty children for DRBD8?"
3670 fchild = disk.children[0]
3671 mismatch = fchild.size < disk.size
3673 self.LogInfo("Child disk has size %d, parent %d, fixing",
3674 fchild.size, disk.size)
3675 fchild.size = disk.size
3677 # and we recurse on this child only, not on the metadev
3678 return self._EnsureChildSizes(fchild) or mismatch
3682 def Exec(self, feedback_fn):
3683 """Verify the size of cluster disks.
3686 # TODO: check child disks too
3687 # TODO: check differences in size between primary/secondary nodes
3689 for instance in self.wanted_instances:
3690 pnode = instance.primary_node
3691 if pnode not in per_node_disks:
3692 per_node_disks[pnode] = []
3693 for idx, disk in enumerate(instance.disks):
3694 per_node_disks[pnode].append((instance, idx, disk))
3696 assert not (frozenset(per_node_disks.keys()) -
3697 self.owned_locks(locking.LEVEL_NODE_RES)), \
3698 "Not owning correct locks"
3699 assert not self.owned_locks(locking.LEVEL_NODE)
3702 for node, dskl in per_node_disks.items():
3703 newl = [v[2].Copy() for v in dskl]
3705 self.cfg.SetDiskID(dsk, node)
3706 result = self.rpc.call_blockdev_getsize(node, newl)
3708 self.LogWarning("Failure in blockdev_getsize call to node"
3709 " %s, ignoring", node)
3711 if len(result.payload) != len(dskl):
3712 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3713 " result.payload=%s", node, len(dskl), result.payload)
3714 self.LogWarning("Invalid result from node %s, ignoring node results",
3717 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3719 self.LogWarning("Disk %d of instance %s did not return size"
3720 " information, ignoring", idx, instance.name)
3722 if not isinstance(size, (int, long)):
3723 self.LogWarning("Disk %d of instance %s did not return valid"
3724 " size information, ignoring", idx, instance.name)
3727 if size != disk.size:
3728 self.LogInfo("Disk %d of instance %s has mismatched size,"
3729 " correcting: recorded %d, actual %d", idx,
3730 instance.name, disk.size, size)
3732 self.cfg.Update(instance, feedback_fn)
3733 changed.append((instance.name, idx, size))
3734 if self._EnsureChildSizes(disk):
3735 self.cfg.Update(instance, feedback_fn)
3736 changed.append((instance.name, idx, disk.size))
3740 class LUClusterRename(LogicalUnit):
3741 """Rename the cluster.
3744 HPATH = "cluster-rename"
3745 HTYPE = constants.HTYPE_CLUSTER
3747 def BuildHooksEnv(self):
3752 "OP_TARGET": self.cfg.GetClusterName(),
3753 "NEW_NAME": self.op.name,
3756 def BuildHooksNodes(self):
3757 """Build hooks nodes.
3760 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3762 def CheckPrereq(self):
3763 """Verify that the passed name is a valid one.
3766 hostname = netutils.GetHostname(name=self.op.name,
3767 family=self.cfg.GetPrimaryIPFamily())
3769 new_name = hostname.name
3770 self.ip = new_ip = hostname.ip
3771 old_name = self.cfg.GetClusterName()
3772 old_ip = self.cfg.GetMasterIP()
3773 if new_name == old_name and new_ip == old_ip:
3774 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3775 " cluster has changed",
3777 if new_ip != old_ip:
3778 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3779 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3780 " reachable on the network" %
3781 new_ip, errors.ECODE_NOTUNIQUE)
3783 self.op.name = new_name
3785 def Exec(self, feedback_fn):
3786 """Rename the cluster.
3789 clustername = self.op.name
3792 # shutdown the master IP
3793 master_params = self.cfg.GetMasterNetworkParameters()
3794 ems = self.cfg.GetUseExternalMipScript()
3795 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3797 result.Raise("Could not disable the master role")
3800 cluster = self.cfg.GetClusterInfo()
3801 cluster.cluster_name = clustername
3802 cluster.master_ip = new_ip
3803 self.cfg.Update(cluster, feedback_fn)
3805 # update the known hosts file
3806 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3807 node_list = self.cfg.GetOnlineNodeList()
3809 node_list.remove(master_params.name)
3812 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3814 master_params.ip = new_ip
3815 result = self.rpc.call_node_activate_master_ip(master_params.name,
3817 msg = result.fail_msg
3819 self.LogWarning("Could not re-enable the master role on"
3820 " the master, please restart manually: %s", msg)
3825 def _ValidateNetmask(cfg, netmask):
3826 """Checks if a netmask is valid.
3828 @type cfg: L{config.ConfigWriter}
3829 @param cfg: The cluster configuration
3831 @param netmask: the netmask to be verified
3832 @raise errors.OpPrereqError: if the validation fails
3835 ip_family = cfg.GetPrimaryIPFamily()
3837 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3838 except errors.ProgrammerError:
3839 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3841 if not ipcls.ValidateNetmask(netmask):
3842 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3846 class LUClusterSetParams(LogicalUnit):
3847 """Change the parameters of the cluster.
3850 HPATH = "cluster-modify"
3851 HTYPE = constants.HTYPE_CLUSTER
3854 def CheckArguments(self):
3858 if self.op.uid_pool:
3859 uidpool.CheckUidPool(self.op.uid_pool)
3861 if self.op.add_uids:
3862 uidpool.CheckUidPool(self.op.add_uids)
3864 if self.op.remove_uids:
3865 uidpool.CheckUidPool(self.op.remove_uids)
3867 if self.op.master_netmask is not None:
3868 _ValidateNetmask(self.cfg, self.op.master_netmask)
3870 if self.op.diskparams:
3871 for dt_params in self.op.diskparams.values():
3872 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3874 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3875 except errors.OpPrereqError, err:
3876 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3879 def ExpandNames(self):
3880 # FIXME: in the future maybe other cluster params won't require checking on
3881 # all nodes to be modified.
3882 self.needed_locks = {
3883 locking.LEVEL_NODE: locking.ALL_SET,
3884 locking.LEVEL_INSTANCE: locking.ALL_SET,
3885 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3887 self.share_locks = {
3888 locking.LEVEL_NODE: 1,
3889 locking.LEVEL_INSTANCE: 1,
3890 locking.LEVEL_NODEGROUP: 1,
3893 def BuildHooksEnv(self):
3898 "OP_TARGET": self.cfg.GetClusterName(),
3899 "NEW_VG_NAME": self.op.vg_name,
3902 def BuildHooksNodes(self):
3903 """Build hooks nodes.
3906 mn = self.cfg.GetMasterNode()
3909 def CheckPrereq(self):
3910 """Check prerequisites.
3912 This checks whether the given params don't conflict and
3913 if the given volume group is valid.
3916 if self.op.vg_name is not None and not self.op.vg_name:
3917 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3918 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3919 " instances exist", errors.ECODE_INVAL)
3921 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3922 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3923 raise errors.OpPrereqError("Cannot disable drbd helper while"
3924 " drbd-based instances exist",
3927 node_list = self.owned_locks(locking.LEVEL_NODE)
3929 # if vg_name not None, checks given volume group on all nodes
3931 vglist = self.rpc.call_vg_list(node_list)
3932 for node in node_list:
3933 msg = vglist[node].fail_msg
3935 # ignoring down node
3936 self.LogWarning("Error while gathering data on node %s"
3937 " (ignoring node): %s", node, msg)
3939 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3941 constants.MIN_VG_SIZE)
3943 raise errors.OpPrereqError("Error on node '%s': %s" %
3944 (node, vgstatus), errors.ECODE_ENVIRON)
3946 if self.op.drbd_helper:
3947 # checks given drbd helper on all nodes
3948 helpers = self.rpc.call_drbd_helper(node_list)
3949 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3951 self.LogInfo("Not checking drbd helper on offline node %s", node)
3953 msg = helpers[node].fail_msg
3955 raise errors.OpPrereqError("Error checking drbd helper on node"
3956 " '%s': %s" % (node, msg),
3957 errors.ECODE_ENVIRON)
3958 node_helper = helpers[node].payload
3959 if node_helper != self.op.drbd_helper:
3960 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3961 (node, node_helper), errors.ECODE_ENVIRON)
3963 self.cluster = cluster = self.cfg.GetClusterInfo()
3964 # validate params changes
3965 if self.op.beparams:
3966 objects.UpgradeBeParams(self.op.beparams)
3967 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3968 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3970 if self.op.ndparams:
3971 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3972 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3974 # TODO: we need a more general way to handle resetting
3975 # cluster-level parameters to default values
3976 if self.new_ndparams["oob_program"] == "":
3977 self.new_ndparams["oob_program"] = \
3978 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3980 if self.op.hv_state:
3981 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3982 self.cluster.hv_state_static)
3983 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3984 for hv, values in new_hv_state.items())
3986 if self.op.disk_state:
3987 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3988 self.cluster.disk_state_static)
3989 self.new_disk_state = \
3990 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3991 for name, values in svalues.items()))
3992 for storage, svalues in new_disk_state.items())
3995 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3998 all_instances = self.cfg.GetAllInstancesInfo().values()
4000 for group in self.cfg.GetAllNodeGroupsInfo().values():
4001 instances = frozenset([inst for inst in all_instances
4002 if compat.any(node in group.members
4003 for node in inst.all_nodes)])
4004 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4005 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4007 new_ipolicy, instances)
4009 violations.update(new)
4012 self.LogWarning("After the ipolicy change the following instances"
4013 " violate them: %s",
4014 utils.CommaJoin(utils.NiceSort(violations)))
4016 if self.op.nicparams:
4017 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4018 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4019 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4022 # check all instances for consistency
4023 for instance in self.cfg.GetAllInstancesInfo().values():
4024 for nic_idx, nic in enumerate(instance.nics):
4025 params_copy = copy.deepcopy(nic.nicparams)
4026 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4028 # check parameter syntax
4030 objects.NIC.CheckParameterSyntax(params_filled)
4031 except errors.ConfigurationError, err:
4032 nic_errors.append("Instance %s, nic/%d: %s" %
4033 (instance.name, nic_idx, err))
4035 # if we're moving instances to routed, check that they have an ip
4036 target_mode = params_filled[constants.NIC_MODE]
4037 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4038 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4039 " address" % (instance.name, nic_idx))
4041 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4042 "\n".join(nic_errors))
4044 # hypervisor list/parameters
4045 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4046 if self.op.hvparams:
4047 for hv_name, hv_dict in self.op.hvparams.items():
4048 if hv_name not in self.new_hvparams:
4049 self.new_hvparams[hv_name] = hv_dict
4051 self.new_hvparams[hv_name].update(hv_dict)
4053 # disk template parameters
4054 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4055 if self.op.diskparams:
4056 for dt_name, dt_params in self.op.diskparams.items():
4057 if dt_name not in self.op.diskparams:
4058 self.new_diskparams[dt_name] = dt_params
4060 self.new_diskparams[dt_name].update(dt_params)
4062 # os hypervisor parameters
4063 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4065 for os_name, hvs in self.op.os_hvp.items():
4066 if os_name not in self.new_os_hvp:
4067 self.new_os_hvp[os_name] = hvs
4069 for hv_name, hv_dict in hvs.items():
4070 if hv_name not in self.new_os_hvp[os_name]:
4071 self.new_os_hvp[os_name][hv_name] = hv_dict
4073 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4076 self.new_osp = objects.FillDict(cluster.osparams, {})
4077 if self.op.osparams:
4078 for os_name, osp in self.op.osparams.items():
4079 if os_name not in self.new_osp:
4080 self.new_osp[os_name] = {}
4082 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4085 if not self.new_osp[os_name]:
4086 # we removed all parameters
4087 del self.new_osp[os_name]
4089 # check the parameter validity (remote check)
4090 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4091 os_name, self.new_osp[os_name])
4093 # changes to the hypervisor list
4094 if self.op.enabled_hypervisors is not None:
4095 self.hv_list = self.op.enabled_hypervisors
4096 for hv in self.hv_list:
4097 # if the hypervisor doesn't already exist in the cluster
4098 # hvparams, we initialize it to empty, and then (in both
4099 # cases) we make sure to fill the defaults, as we might not
4100 # have a complete defaults list if the hypervisor wasn't
4102 if hv not in new_hvp:
4104 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4105 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4107 self.hv_list = cluster.enabled_hypervisors
4109 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4110 # either the enabled list has changed, or the parameters have, validate
4111 for hv_name, hv_params in self.new_hvparams.items():
4112 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4113 (self.op.enabled_hypervisors and
4114 hv_name in self.op.enabled_hypervisors)):
4115 # either this is a new hypervisor, or its parameters have changed
4116 hv_class = hypervisor.GetHypervisor(hv_name)
4117 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4118 hv_class.CheckParameterSyntax(hv_params)
4119 _CheckHVParams(self, node_list, hv_name, hv_params)
4122 # no need to check any newly-enabled hypervisors, since the
4123 # defaults have already been checked in the above code-block
4124 for os_name, os_hvp in self.new_os_hvp.items():
4125 for hv_name, hv_params in os_hvp.items():
4126 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4127 # we need to fill in the new os_hvp on top of the actual hv_p
4128 cluster_defaults = self.new_hvparams.get(hv_name, {})
4129 new_osp = objects.FillDict(cluster_defaults, hv_params)
4130 hv_class = hypervisor.GetHypervisor(hv_name)
4131 hv_class.CheckParameterSyntax(new_osp)
4132 _CheckHVParams(self, node_list, hv_name, new_osp)
4134 if self.op.default_iallocator:
4135 alloc_script = utils.FindFile(self.op.default_iallocator,
4136 constants.IALLOCATOR_SEARCH_PATH,
4138 if alloc_script is None:
4139 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4140 " specified" % self.op.default_iallocator,
4143 def Exec(self, feedback_fn):
4144 """Change the parameters of the cluster.
4147 if self.op.vg_name is not None:
4148 new_volume = self.op.vg_name
4151 if new_volume != self.cfg.GetVGName():
4152 self.cfg.SetVGName(new_volume)
4154 feedback_fn("Cluster LVM configuration already in desired"
4155 " state, not changing")
4156 if self.op.drbd_helper is not None:
4157 new_helper = self.op.drbd_helper
4160 if new_helper != self.cfg.GetDRBDHelper():
4161 self.cfg.SetDRBDHelper(new_helper)
4163 feedback_fn("Cluster DRBD helper already in desired state,"
4165 if self.op.hvparams:
4166 self.cluster.hvparams = self.new_hvparams
4168 self.cluster.os_hvp = self.new_os_hvp
4169 if self.op.enabled_hypervisors is not None:
4170 self.cluster.hvparams = self.new_hvparams
4171 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4172 if self.op.beparams:
4173 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4174 if self.op.nicparams:
4175 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4177 self.cluster.ipolicy = self.new_ipolicy
4178 if self.op.osparams:
4179 self.cluster.osparams = self.new_osp
4180 if self.op.ndparams:
4181 self.cluster.ndparams = self.new_ndparams
4182 if self.op.diskparams:
4183 self.cluster.diskparams = self.new_diskparams
4184 if self.op.hv_state:
4185 self.cluster.hv_state_static = self.new_hv_state
4186 if self.op.disk_state:
4187 self.cluster.disk_state_static = self.new_disk_state
4189 if self.op.candidate_pool_size is not None:
4190 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4191 # we need to update the pool size here, otherwise the save will fail
4192 _AdjustCandidatePool(self, [])
4194 if self.op.maintain_node_health is not None:
4195 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4196 feedback_fn("Note: CONFD was disabled at build time, node health"
4197 " maintenance is not useful (still enabling it)")
4198 self.cluster.maintain_node_health = self.op.maintain_node_health
4200 if self.op.prealloc_wipe_disks is not None:
4201 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4203 if self.op.add_uids is not None:
4204 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4206 if self.op.remove_uids is not None:
4207 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4209 if self.op.uid_pool is not None:
4210 self.cluster.uid_pool = self.op.uid_pool
4212 if self.op.default_iallocator is not None:
4213 self.cluster.default_iallocator = self.op.default_iallocator
4215 if self.op.reserved_lvs is not None:
4216 self.cluster.reserved_lvs = self.op.reserved_lvs
4218 if self.op.use_external_mip_script is not None:
4219 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4221 def helper_os(aname, mods, desc):
4223 lst = getattr(self.cluster, aname)
4224 for key, val in mods:
4225 if key == constants.DDM_ADD:
4227 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4230 elif key == constants.DDM_REMOVE:
4234 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4236 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4238 if self.op.hidden_os:
4239 helper_os("hidden_os", self.op.hidden_os, "hidden")
4241 if self.op.blacklisted_os:
4242 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4244 if self.op.master_netdev:
4245 master_params = self.cfg.GetMasterNetworkParameters()
4246 ems = self.cfg.GetUseExternalMipScript()
4247 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4248 self.cluster.master_netdev)
4249 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4251 result.Raise("Could not disable the master ip")
4252 feedback_fn("Changing master_netdev from %s to %s" %
4253 (master_params.netdev, self.op.master_netdev))
4254 self.cluster.master_netdev = self.op.master_netdev
4256 if self.op.master_netmask:
4257 master_params = self.cfg.GetMasterNetworkParameters()
4258 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4259 result = self.rpc.call_node_change_master_netmask(master_params.name,
4260 master_params.netmask,
4261 self.op.master_netmask,
4263 master_params.netdev)
4265 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4268 self.cluster.master_netmask = self.op.master_netmask
4270 self.cfg.Update(self.cluster, feedback_fn)
4272 if self.op.master_netdev:
4273 master_params = self.cfg.GetMasterNetworkParameters()
4274 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4275 self.op.master_netdev)
4276 ems = self.cfg.GetUseExternalMipScript()
4277 result = self.rpc.call_node_activate_master_ip(master_params.name,
4280 self.LogWarning("Could not re-enable the master ip on"
4281 " the master, please restart manually: %s",
4285 def _UploadHelper(lu, nodes, fname):
4286 """Helper for uploading a file and showing warnings.
4289 if os.path.exists(fname):
4290 result = lu.rpc.call_upload_file(nodes, fname)
4291 for to_node, to_result in result.items():
4292 msg = to_result.fail_msg
4294 msg = ("Copy of file %s to node %s failed: %s" %
4295 (fname, to_node, msg))
4296 lu.proc.LogWarning(msg)
4299 def _ComputeAncillaryFiles(cluster, redist):
4300 """Compute files external to Ganeti which need to be consistent.
4302 @type redist: boolean
4303 @param redist: Whether to include files which need to be redistributed
4306 # Compute files for all nodes
4308 constants.SSH_KNOWN_HOSTS_FILE,
4309 constants.CONFD_HMAC_KEY,
4310 constants.CLUSTER_DOMAIN_SECRET_FILE,
4311 constants.SPICE_CERT_FILE,
4312 constants.SPICE_CACERT_FILE,
4313 constants.RAPI_USERS_FILE,
4317 files_all.update(constants.ALL_CERT_FILES)
4318 files_all.update(ssconf.SimpleStore().GetFileList())
4320 # we need to ship at least the RAPI certificate
4321 files_all.add(constants.RAPI_CERT_FILE)
4323 if cluster.modify_etc_hosts:
4324 files_all.add(constants.ETC_HOSTS)
4326 if cluster.use_external_mip_script:
4327 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4329 # Files which are optional, these must:
4330 # - be present in one other category as well
4331 # - either exist or not exist on all nodes of that category (mc, vm all)
4333 constants.RAPI_USERS_FILE,
4336 # Files which should only be on master candidates
4340 files_mc.add(constants.CLUSTER_CONF_FILE)
4342 # Files which should only be on VM-capable nodes
4343 files_vm = set(filename
4344 for hv_name in cluster.enabled_hypervisors
4345 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4347 files_opt |= set(filename
4348 for hv_name in cluster.enabled_hypervisors
4349 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4351 # Filenames in each category must be unique
4352 all_files_set = files_all | files_mc | files_vm
4353 assert (len(all_files_set) ==
4354 sum(map(len, [files_all, files_mc, files_vm]))), \
4355 "Found file listed in more than one file list"
4357 # Optional files must be present in one other category
4358 assert all_files_set.issuperset(files_opt), \
4359 "Optional file not in a different required list"
4361 return (files_all, files_opt, files_mc, files_vm)
4364 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4365 """Distribute additional files which are part of the cluster configuration.
4367 ConfigWriter takes care of distributing the config and ssconf files, but
4368 there are more files which should be distributed to all nodes. This function
4369 makes sure those are copied.
4371 @param lu: calling logical unit
4372 @param additional_nodes: list of nodes not in the config to distribute to
4373 @type additional_vm: boolean
4374 @param additional_vm: whether the additional nodes are vm-capable or not
4377 # Gather target nodes
4378 cluster = lu.cfg.GetClusterInfo()
4379 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4381 online_nodes = lu.cfg.GetOnlineNodeList()
4382 online_set = frozenset(online_nodes)
4383 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4385 if additional_nodes is not None:
4386 online_nodes.extend(additional_nodes)
4388 vm_nodes.extend(additional_nodes)
4390 # Never distribute to master node
4391 for nodelist in [online_nodes, vm_nodes]:
4392 if master_info.name in nodelist:
4393 nodelist.remove(master_info.name)
4396 (files_all, _, files_mc, files_vm) = \
4397 _ComputeAncillaryFiles(cluster, True)
4399 # Never re-distribute configuration file from here
4400 assert not (constants.CLUSTER_CONF_FILE in files_all or
4401 constants.CLUSTER_CONF_FILE in files_vm)
4402 assert not files_mc, "Master candidates not handled in this function"
4405 (online_nodes, files_all),
4406 (vm_nodes, files_vm),
4410 for (node_list, files) in filemap:
4412 _UploadHelper(lu, node_list, fname)
4415 class LUClusterRedistConf(NoHooksLU):
4416 """Force the redistribution of cluster configuration.
4418 This is a very simple LU.
4423 def ExpandNames(self):
4424 self.needed_locks = {
4425 locking.LEVEL_NODE: locking.ALL_SET,
4427 self.share_locks[locking.LEVEL_NODE] = 1
4429 def Exec(self, feedback_fn):
4430 """Redistribute the configuration.
4433 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4434 _RedistributeAncillaryFiles(self)
4437 class LUClusterActivateMasterIp(NoHooksLU):
4438 """Activate the master IP on the master node.
4441 def Exec(self, feedback_fn):
4442 """Activate the master IP.
4445 master_params = self.cfg.GetMasterNetworkParameters()
4446 ems = self.cfg.GetUseExternalMipScript()
4447 result = self.rpc.call_node_activate_master_ip(master_params.name,
4449 result.Raise("Could not activate the master IP")
4452 class LUClusterDeactivateMasterIp(NoHooksLU):
4453 """Deactivate the master IP on the master node.
4456 def Exec(self, feedback_fn):
4457 """Deactivate the master IP.
4460 master_params = self.cfg.GetMasterNetworkParameters()
4461 ems = self.cfg.GetUseExternalMipScript()
4462 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4464 result.Raise("Could not deactivate the master IP")
4467 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4468 """Sleep and poll for an instance's disk to sync.
4471 if not instance.disks or disks is not None and not disks:
4474 disks = _ExpandCheckDisks(instance, disks)
4477 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4479 node = instance.primary_node
4482 lu.cfg.SetDiskID(dev, node)
4484 # TODO: Convert to utils.Retry
4487 degr_retries = 10 # in seconds, as we sleep 1 second each time
4491 cumul_degraded = False
4492 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4493 msg = rstats.fail_msg
4495 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4498 raise errors.RemoteError("Can't contact node %s for mirror data,"
4499 " aborting." % node)
4502 rstats = rstats.payload
4504 for i, mstat in enumerate(rstats):
4506 lu.LogWarning("Can't compute data for node %s/%s",
4507 node, disks[i].iv_name)
4510 cumul_degraded = (cumul_degraded or
4511 (mstat.is_degraded and mstat.sync_percent is None))
4512 if mstat.sync_percent is not None:
4514 if mstat.estimated_time is not None:
4515 rem_time = ("%s remaining (estimated)" %
4516 utils.FormatSeconds(mstat.estimated_time))
4517 max_time = mstat.estimated_time
4519 rem_time = "no time estimate"
4520 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4521 (disks[i].iv_name, mstat.sync_percent, rem_time))
4523 # if we're done but degraded, let's do a few small retries, to
4524 # make sure we see a stable and not transient situation; therefore
4525 # we force restart of the loop
4526 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4527 logging.info("Degraded disks found, %d retries left", degr_retries)
4535 time.sleep(min(60, max_time))
4538 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4539 return not cumul_degraded
4542 def _BlockdevFind(lu, node, dev, instance):
4543 """Wrapper around call_blockdev_find to annotate diskparams.
4545 @param lu: A reference to the lu object
4546 @param node: The node to call out
4547 @param dev: The device to find
4548 @param instance: The instance object the device belongs to
4549 @returns The result of the rpc call
4552 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4553 return lu.rpc.call_blockdev_find(node, disk)
4556 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4557 """Wrapper around L{_CheckDiskConsistencyInner}.
4560 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4561 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4565 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4567 """Check that mirrors are not degraded.
4569 @attention: The device has to be annotated already.
4571 The ldisk parameter, if True, will change the test from the
4572 is_degraded attribute (which represents overall non-ok status for
4573 the device(s)) to the ldisk (representing the local storage status).
4576 lu.cfg.SetDiskID(dev, node)
4580 if on_primary or dev.AssembleOnSecondary():
4581 rstats = lu.rpc.call_blockdev_find(node, dev)
4582 msg = rstats.fail_msg
4584 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4586 elif not rstats.payload:
4587 lu.LogWarning("Can't find disk on node %s", node)
4591 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4593 result = result and not rstats.payload.is_degraded
4596 for child in dev.children:
4597 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4603 class LUOobCommand(NoHooksLU):
4604 """Logical unit for OOB handling.
4608 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4610 def ExpandNames(self):
4611 """Gather locks we need.
4614 if self.op.node_names:
4615 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4616 lock_names = self.op.node_names
4618 lock_names = locking.ALL_SET
4620 self.needed_locks = {
4621 locking.LEVEL_NODE: lock_names,
4624 def CheckPrereq(self):
4625 """Check prerequisites.
4628 - the node exists in the configuration
4631 Any errors are signaled by raising errors.OpPrereqError.
4635 self.master_node = self.cfg.GetMasterNode()
4637 assert self.op.power_delay >= 0.0
4639 if self.op.node_names:
4640 if (self.op.command in self._SKIP_MASTER and
4641 self.master_node in self.op.node_names):
4642 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4643 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4645 if master_oob_handler:
4646 additional_text = ("run '%s %s %s' if you want to operate on the"
4647 " master regardless") % (master_oob_handler,
4651 additional_text = "it does not support out-of-band operations"
4653 raise errors.OpPrereqError(("Operating on the master node %s is not"
4654 " allowed for %s; %s") %
4655 (self.master_node, self.op.command,
4656 additional_text), errors.ECODE_INVAL)
4658 self.op.node_names = self.cfg.GetNodeList()
4659 if self.op.command in self._SKIP_MASTER:
4660 self.op.node_names.remove(self.master_node)
4662 if self.op.command in self._SKIP_MASTER:
4663 assert self.master_node not in self.op.node_names
4665 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4667 raise errors.OpPrereqError("Node %s not found" % node_name,
4670 self.nodes.append(node)
4672 if (not self.op.ignore_status and
4673 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4674 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4675 " not marked offline") % node_name,
4678 def Exec(self, feedback_fn):
4679 """Execute OOB and return result if we expect any.
4682 master_node = self.master_node
4685 for idx, node in enumerate(utils.NiceSort(self.nodes,
4686 key=lambda node: node.name)):
4687 node_entry = [(constants.RS_NORMAL, node.name)]
4688 ret.append(node_entry)
4690 oob_program = _SupportsOob(self.cfg, node)
4693 node_entry.append((constants.RS_UNAVAIL, None))
4696 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4697 self.op.command, oob_program, node.name)
4698 result = self.rpc.call_run_oob(master_node, oob_program,
4699 self.op.command, node.name,
4703 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4704 node.name, result.fail_msg)
4705 node_entry.append((constants.RS_NODATA, None))
4708 self._CheckPayload(result)
4709 except errors.OpExecError, err:
4710 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4712 node_entry.append((constants.RS_NODATA, None))
4714 if self.op.command == constants.OOB_HEALTH:
4715 # For health we should log important events
4716 for item, status in result.payload:
4717 if status in [constants.OOB_STATUS_WARNING,
4718 constants.OOB_STATUS_CRITICAL]:
4719 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4720 item, node.name, status)
4722 if self.op.command == constants.OOB_POWER_ON:
4724 elif self.op.command == constants.OOB_POWER_OFF:
4725 node.powered = False
4726 elif self.op.command == constants.OOB_POWER_STATUS:
4727 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4728 if powered != node.powered:
4729 logging.warning(("Recorded power state (%s) of node '%s' does not"
4730 " match actual power state (%s)"), node.powered,
4733 # For configuration changing commands we should update the node
4734 if self.op.command in (constants.OOB_POWER_ON,
4735 constants.OOB_POWER_OFF):
4736 self.cfg.Update(node, feedback_fn)
4738 node_entry.append((constants.RS_NORMAL, result.payload))
4740 if (self.op.command == constants.OOB_POWER_ON and
4741 idx < len(self.nodes) - 1):
4742 time.sleep(self.op.power_delay)
4746 def _CheckPayload(self, result):
4747 """Checks if the payload is valid.
4749 @param result: RPC result
4750 @raises errors.OpExecError: If payload is not valid
4754 if self.op.command == constants.OOB_HEALTH:
4755 if not isinstance(result.payload, list):
4756 errs.append("command 'health' is expected to return a list but got %s" %
4757 type(result.payload))
4759 for item, status in result.payload:
4760 if status not in constants.OOB_STATUSES:
4761 errs.append("health item '%s' has invalid status '%s'" %
4764 if self.op.command == constants.OOB_POWER_STATUS:
4765 if not isinstance(result.payload, dict):
4766 errs.append("power-status is expected to return a dict but got %s" %
4767 type(result.payload))
4769 if self.op.command in [
4770 constants.OOB_POWER_ON,
4771 constants.OOB_POWER_OFF,
4772 constants.OOB_POWER_CYCLE,
4774 if result.payload is not None:
4775 errs.append("%s is expected to not return payload but got '%s'" %
4776 (self.op.command, result.payload))
4779 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4780 utils.CommaJoin(errs))
4783 class _OsQuery(_QueryBase):
4784 FIELDS = query.OS_FIELDS
4786 def ExpandNames(self, lu):
4787 # Lock all nodes in shared mode
4788 # Temporary removal of locks, should be reverted later
4789 # TODO: reintroduce locks when they are lighter-weight
4790 lu.needed_locks = {}
4791 #self.share_locks[locking.LEVEL_NODE] = 1
4792 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4794 # The following variables interact with _QueryBase._GetNames
4796 self.wanted = self.names
4798 self.wanted = locking.ALL_SET
4800 self.do_locking = self.use_locking
4802 def DeclareLocks(self, lu, level):
4806 def _DiagnoseByOS(rlist):
4807 """Remaps a per-node return list into an a per-os per-node dictionary
4809 @param rlist: a map with node names as keys and OS objects as values
4812 @return: a dictionary with osnames as keys and as value another
4813 map, with nodes as keys and tuples of (path, status, diagnose,
4814 variants, parameters, api_versions) as values, eg::
4816 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4817 (/srv/..., False, "invalid api")],
4818 "node2": [(/srv/..., True, "", [], [])]}
4823 # we build here the list of nodes that didn't fail the RPC (at RPC
4824 # level), so that nodes with a non-responding node daemon don't
4825 # make all OSes invalid
4826 good_nodes = [node_name for node_name in rlist
4827 if not rlist[node_name].fail_msg]
4828 for node_name, nr in rlist.items():
4829 if nr.fail_msg or not nr.payload:
4831 for (name, path, status, diagnose, variants,
4832 params, api_versions) in nr.payload:
4833 if name not in all_os:
4834 # build a list of nodes for this os containing empty lists
4835 # for each node in node_list
4837 for nname in good_nodes:
4838 all_os[name][nname] = []
4839 # convert params from [name, help] to (name, help)
4840 params = [tuple(v) for v in params]
4841 all_os[name][node_name].append((path, status, diagnose,
4842 variants, params, api_versions))
4845 def _GetQueryData(self, lu):
4846 """Computes the list of nodes and their attributes.
4849 # Locking is not used
4850 assert not (compat.any(lu.glm.is_owned(level)
4851 for level in locking.LEVELS
4852 if level != locking.LEVEL_CLUSTER) or
4853 self.do_locking or self.use_locking)
4855 valid_nodes = [node.name
4856 for node in lu.cfg.GetAllNodesInfo().values()
4857 if not node.offline and node.vm_capable]
4858 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4859 cluster = lu.cfg.GetClusterInfo()
4863 for (os_name, os_data) in pol.items():
4864 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4865 hidden=(os_name in cluster.hidden_os),
4866 blacklisted=(os_name in cluster.blacklisted_os))
4870 api_versions = set()
4872 for idx, osl in enumerate(os_data.values()):
4873 info.valid = bool(info.valid and osl and osl[0][1])
4877 (node_variants, node_params, node_api) = osl[0][3:6]
4880 variants.update(node_variants)
4881 parameters.update(node_params)
4882 api_versions.update(node_api)
4884 # Filter out inconsistent values
4885 variants.intersection_update(node_variants)
4886 parameters.intersection_update(node_params)
4887 api_versions.intersection_update(node_api)
4889 info.variants = list(variants)
4890 info.parameters = list(parameters)
4891 info.api_versions = list(api_versions)
4893 data[os_name] = info
4895 # Prepare data in requested order
4896 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4900 class LUOsDiagnose(NoHooksLU):
4901 """Logical unit for OS diagnose/query.
4907 def _BuildFilter(fields, names):
4908 """Builds a filter for querying OSes.
4911 name_filter = qlang.MakeSimpleFilter("name", names)
4913 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4914 # respective field is not requested
4915 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4916 for fname in ["hidden", "blacklisted"]
4917 if fname not in fields]
4918 if "valid" not in fields:
4919 status_filter.append([qlang.OP_TRUE, "valid"])
4922 status_filter.insert(0, qlang.OP_AND)
4924 status_filter = None
4926 if name_filter and status_filter:
4927 return [qlang.OP_AND, name_filter, status_filter]
4931 return status_filter
4933 def CheckArguments(self):
4934 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4935 self.op.output_fields, False)
4937 def ExpandNames(self):
4938 self.oq.ExpandNames(self)
4940 def Exec(self, feedback_fn):
4941 return self.oq.OldStyleQuery(self)
4944 class LUNodeRemove(LogicalUnit):
4945 """Logical unit for removing a node.
4948 HPATH = "node-remove"
4949 HTYPE = constants.HTYPE_NODE
4951 def BuildHooksEnv(self):
4956 "OP_TARGET": self.op.node_name,
4957 "NODE_NAME": self.op.node_name,
4960 def BuildHooksNodes(self):
4961 """Build hooks nodes.
4963 This doesn't run on the target node in the pre phase as a failed
4964 node would then be impossible to remove.
4967 all_nodes = self.cfg.GetNodeList()
4969 all_nodes.remove(self.op.node_name)
4972 return (all_nodes, all_nodes)
4974 def CheckPrereq(self):
4975 """Check prerequisites.
4978 - the node exists in the configuration
4979 - it does not have primary or secondary instances
4980 - it's not the master
4982 Any errors are signaled by raising errors.OpPrereqError.
4985 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4986 node = self.cfg.GetNodeInfo(self.op.node_name)
4987 assert node is not None
4989 masternode = self.cfg.GetMasterNode()
4990 if node.name == masternode:
4991 raise errors.OpPrereqError("Node is the master node, failover to another"
4992 " node is required", errors.ECODE_INVAL)
4994 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4995 if node.name in instance.all_nodes:
4996 raise errors.OpPrereqError("Instance %s is still running on the node,"
4997 " please remove first" % instance_name,
4999 self.op.node_name = node.name
5002 def Exec(self, feedback_fn):
5003 """Removes the node from the cluster.
5007 logging.info("Stopping the node daemon and removing configs from node %s",
5010 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5012 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5015 # Promote nodes to master candidate as needed
5016 _AdjustCandidatePool(self, exceptions=[node.name])
5017 self.context.RemoveNode(node.name)
5019 # Run post hooks on the node before it's removed
5020 _RunPostHook(self, node.name)
5022 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5023 msg = result.fail_msg
5025 self.LogWarning("Errors encountered on the remote node while leaving"
5026 " the cluster: %s", msg)
5028 # Remove node from our /etc/hosts
5029 if self.cfg.GetClusterInfo().modify_etc_hosts:
5030 master_node = self.cfg.GetMasterNode()
5031 result = self.rpc.call_etc_hosts_modify(master_node,
5032 constants.ETC_HOSTS_REMOVE,
5034 result.Raise("Can't update hosts file with new host data")
5035 _RedistributeAncillaryFiles(self)
5038 class _NodeQuery(_QueryBase):
5039 FIELDS = query.NODE_FIELDS
5041 def ExpandNames(self, lu):
5042 lu.needed_locks = {}
5043 lu.share_locks = _ShareAll()
5046 self.wanted = _GetWantedNodes(lu, self.names)
5048 self.wanted = locking.ALL_SET
5050 self.do_locking = (self.use_locking and
5051 query.NQ_LIVE in self.requested_data)
5054 # If any non-static field is requested we need to lock the nodes
5055 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5057 def DeclareLocks(self, lu, level):
5060 def _GetQueryData(self, lu):
5061 """Computes the list of nodes and their attributes.
5064 all_info = lu.cfg.GetAllNodesInfo()
5066 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5068 # Gather data as requested
5069 if query.NQ_LIVE in self.requested_data:
5070 # filter out non-vm_capable nodes
5071 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5073 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5074 [lu.cfg.GetHypervisorType()])
5075 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5076 for (name, nresult) in node_data.items()
5077 if not nresult.fail_msg and nresult.payload)
5081 if query.NQ_INST in self.requested_data:
5082 node_to_primary = dict([(name, set()) for name in nodenames])
5083 node_to_secondary = dict([(name, set()) for name in nodenames])
5085 inst_data = lu.cfg.GetAllInstancesInfo()
5087 for inst in inst_data.values():
5088 if inst.primary_node in node_to_primary:
5089 node_to_primary[inst.primary_node].add(inst.name)
5090 for secnode in inst.secondary_nodes:
5091 if secnode in node_to_secondary:
5092 node_to_secondary[secnode].add(inst.name)
5094 node_to_primary = None
5095 node_to_secondary = None
5097 if query.NQ_OOB in self.requested_data:
5098 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5099 for name, node in all_info.iteritems())
5103 if query.NQ_GROUP in self.requested_data:
5104 groups = lu.cfg.GetAllNodeGroupsInfo()
5108 return query.NodeQueryData([all_info[name] for name in nodenames],
5109 live_data, lu.cfg.GetMasterNode(),
5110 node_to_primary, node_to_secondary, groups,
5111 oob_support, lu.cfg.GetClusterInfo())
5114 class LUNodeQuery(NoHooksLU):
5115 """Logical unit for querying nodes.
5118 # pylint: disable=W0142
5121 def CheckArguments(self):
5122 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5123 self.op.output_fields, self.op.use_locking)
5125 def ExpandNames(self):
5126 self.nq.ExpandNames(self)
5128 def DeclareLocks(self, level):
5129 self.nq.DeclareLocks(self, level)
5131 def Exec(self, feedback_fn):
5132 return self.nq.OldStyleQuery(self)
5135 class LUNodeQueryvols(NoHooksLU):
5136 """Logical unit for getting volumes on node(s).
5140 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5141 _FIELDS_STATIC = utils.FieldSet("node")
5143 def CheckArguments(self):
5144 _CheckOutputFields(static=self._FIELDS_STATIC,
5145 dynamic=self._FIELDS_DYNAMIC,
5146 selected=self.op.output_fields)
5148 def ExpandNames(self):
5149 self.share_locks = _ShareAll()
5150 self.needed_locks = {}
5152 if not self.op.nodes:
5153 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5155 self.needed_locks[locking.LEVEL_NODE] = \
5156 _GetWantedNodes(self, self.op.nodes)
5158 def Exec(self, feedback_fn):
5159 """Computes the list of nodes and their attributes.
5162 nodenames = self.owned_locks(locking.LEVEL_NODE)
5163 volumes = self.rpc.call_node_volumes(nodenames)
5165 ilist = self.cfg.GetAllInstancesInfo()
5166 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5169 for node in nodenames:
5170 nresult = volumes[node]
5173 msg = nresult.fail_msg
5175 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5178 node_vols = sorted(nresult.payload,
5179 key=operator.itemgetter("dev"))
5181 for vol in node_vols:
5183 for field in self.op.output_fields:
5186 elif field == "phys":
5190 elif field == "name":
5192 elif field == "size":
5193 val = int(float(vol["size"]))
5194 elif field == "instance":
5195 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5197 raise errors.ParameterError(field)
5198 node_output.append(str(val))
5200 output.append(node_output)
5205 class LUNodeQueryStorage(NoHooksLU):
5206 """Logical unit for getting information on storage units on node(s).
5209 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5212 def CheckArguments(self):
5213 _CheckOutputFields(static=self._FIELDS_STATIC,
5214 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5215 selected=self.op.output_fields)
5217 def ExpandNames(self):
5218 self.share_locks = _ShareAll()
5219 self.needed_locks = {}
5222 self.needed_locks[locking.LEVEL_NODE] = \
5223 _GetWantedNodes(self, self.op.nodes)
5225 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5227 def Exec(self, feedback_fn):
5228 """Computes the list of nodes and their attributes.
5231 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5233 # Always get name to sort by
5234 if constants.SF_NAME in self.op.output_fields:
5235 fields = self.op.output_fields[:]
5237 fields = [constants.SF_NAME] + self.op.output_fields
5239 # Never ask for node or type as it's only known to the LU
5240 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5241 while extra in fields:
5242 fields.remove(extra)
5244 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5245 name_idx = field_idx[constants.SF_NAME]
5247 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5248 data = self.rpc.call_storage_list(self.nodes,
5249 self.op.storage_type, st_args,
5250 self.op.name, fields)
5254 for node in utils.NiceSort(self.nodes):
5255 nresult = data[node]
5259 msg = nresult.fail_msg
5261 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5264 rows = dict([(row[name_idx], row) for row in nresult.payload])
5266 for name in utils.NiceSort(rows.keys()):
5271 for field in self.op.output_fields:
5272 if field == constants.SF_NODE:
5274 elif field == constants.SF_TYPE:
5275 val = self.op.storage_type
5276 elif field in field_idx:
5277 val = row[field_idx[field]]
5279 raise errors.ParameterError(field)
5288 class _InstanceQuery(_QueryBase):
5289 FIELDS = query.INSTANCE_FIELDS
5291 def ExpandNames(self, lu):
5292 lu.needed_locks = {}
5293 lu.share_locks = _ShareAll()
5296 self.wanted = _GetWantedInstances(lu, self.names)
5298 self.wanted = locking.ALL_SET
5300 self.do_locking = (self.use_locking and
5301 query.IQ_LIVE in self.requested_data)
5303 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5304 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5305 lu.needed_locks[locking.LEVEL_NODE] = []
5306 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5308 self.do_grouplocks = (self.do_locking and
5309 query.IQ_NODES in self.requested_data)
5311 def DeclareLocks(self, lu, level):
5313 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5314 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5316 # Lock all groups used by instances optimistically; this requires going
5317 # via the node before it's locked, requiring verification later on
5318 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5320 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5321 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5322 elif level == locking.LEVEL_NODE:
5323 lu._LockInstancesNodes() # pylint: disable=W0212
5326 def _CheckGroupLocks(lu):
5327 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5328 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5330 # Check if node groups for locked instances are still correct
5331 for instance_name in owned_instances:
5332 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5334 def _GetQueryData(self, lu):
5335 """Computes the list of instances and their attributes.
5338 if self.do_grouplocks:
5339 self._CheckGroupLocks(lu)
5341 cluster = lu.cfg.GetClusterInfo()
5342 all_info = lu.cfg.GetAllInstancesInfo()
5344 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5346 instance_list = [all_info[name] for name in instance_names]
5347 nodes = frozenset(itertools.chain(*(inst.all_nodes
5348 for inst in instance_list)))
5349 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5352 wrongnode_inst = set()
5354 # Gather data as requested
5355 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5357 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5359 result = node_data[name]
5361 # offline nodes will be in both lists
5362 assert result.fail_msg
5363 offline_nodes.append(name)
5365 bad_nodes.append(name)
5366 elif result.payload:
5367 for inst in result.payload:
5368 if inst in all_info:
5369 if all_info[inst].primary_node == name:
5370 live_data.update(result.payload)
5372 wrongnode_inst.add(inst)
5374 # orphan instance; we don't list it here as we don't
5375 # handle this case yet in the output of instance listing
5376 logging.warning("Orphan instance '%s' found on node %s",
5378 # else no instance is alive
5382 if query.IQ_DISKUSAGE in self.requested_data:
5383 disk_usage = dict((inst.name,
5384 _ComputeDiskSize(inst.disk_template,
5385 [{constants.IDISK_SIZE: disk.size}
5386 for disk in inst.disks]))
5387 for inst in instance_list)
5391 if query.IQ_CONSOLE in self.requested_data:
5393 for inst in instance_list:
5394 if inst.name in live_data:
5395 # Instance is running
5396 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5398 consinfo[inst.name] = None
5399 assert set(consinfo.keys()) == set(instance_names)
5403 if query.IQ_NODES in self.requested_data:
5404 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5406 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5407 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5408 for uuid in set(map(operator.attrgetter("group"),
5414 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5415 disk_usage, offline_nodes, bad_nodes,
5416 live_data, wrongnode_inst, consinfo,
5420 class LUQuery(NoHooksLU):
5421 """Query for resources/items of a certain kind.
5424 # pylint: disable=W0142
5427 def CheckArguments(self):
5428 qcls = _GetQueryImplementation(self.op.what)
5430 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5432 def ExpandNames(self):
5433 self.impl.ExpandNames(self)
5435 def DeclareLocks(self, level):
5436 self.impl.DeclareLocks(self, level)
5438 def Exec(self, feedback_fn):
5439 return self.impl.NewStyleQuery(self)
5442 class LUQueryFields(NoHooksLU):
5443 """Query for resources/items of a certain kind.
5446 # pylint: disable=W0142
5449 def CheckArguments(self):
5450 self.qcls = _GetQueryImplementation(self.op.what)
5452 def ExpandNames(self):
5453 self.needed_locks = {}
5455 def Exec(self, feedback_fn):
5456 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5459 class LUNodeModifyStorage(NoHooksLU):
5460 """Logical unit for modifying a storage volume on a node.
5465 def CheckArguments(self):
5466 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5468 storage_type = self.op.storage_type
5471 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5473 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5474 " modified" % storage_type,
5477 diff = set(self.op.changes.keys()) - modifiable
5479 raise errors.OpPrereqError("The following fields can not be modified for"
5480 " storage units of type '%s': %r" %
5481 (storage_type, list(diff)),
5484 def ExpandNames(self):
5485 self.needed_locks = {
5486 locking.LEVEL_NODE: self.op.node_name,
5489 def Exec(self, feedback_fn):
5490 """Computes the list of nodes and their attributes.
5493 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5494 result = self.rpc.call_storage_modify(self.op.node_name,
5495 self.op.storage_type, st_args,
5496 self.op.name, self.op.changes)
5497 result.Raise("Failed to modify storage unit '%s' on %s" %
5498 (self.op.name, self.op.node_name))
5501 class LUNodeAdd(LogicalUnit):
5502 """Logical unit for adding node to the cluster.
5506 HTYPE = constants.HTYPE_NODE
5507 _NFLAGS = ["master_capable", "vm_capable"]
5509 def CheckArguments(self):
5510 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5511 # validate/normalize the node name
5512 self.hostname = netutils.GetHostname(name=self.op.node_name,
5513 family=self.primary_ip_family)
5514 self.op.node_name = self.hostname.name
5516 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5517 raise errors.OpPrereqError("Cannot readd the master node",
5520 if self.op.readd and self.op.group:
5521 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5522 " being readded", errors.ECODE_INVAL)
5524 def BuildHooksEnv(self):
5527 This will run on all nodes before, and on all nodes + the new node after.
5531 "OP_TARGET": self.op.node_name,
5532 "NODE_NAME": self.op.node_name,
5533 "NODE_PIP": self.op.primary_ip,
5534 "NODE_SIP": self.op.secondary_ip,
5535 "MASTER_CAPABLE": str(self.op.master_capable),
5536 "VM_CAPABLE": str(self.op.vm_capable),
5539 def BuildHooksNodes(self):
5540 """Build hooks nodes.
5543 # Exclude added node
5544 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5545 post_nodes = pre_nodes + [self.op.node_name, ]
5547 return (pre_nodes, post_nodes)
5549 def CheckPrereq(self):
5550 """Check prerequisites.
5553 - the new node is not already in the config
5555 - its parameters (single/dual homed) matches the cluster
5557 Any errors are signaled by raising errors.OpPrereqError.
5561 hostname = self.hostname
5562 node = hostname.name
5563 primary_ip = self.op.primary_ip = hostname.ip
5564 if self.op.secondary_ip is None:
5565 if self.primary_ip_family == netutils.IP6Address.family:
5566 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5567 " IPv4 address must be given as secondary",
5569 self.op.secondary_ip = primary_ip
5571 secondary_ip = self.op.secondary_ip
5572 if not netutils.IP4Address.IsValid(secondary_ip):
5573 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5574 " address" % secondary_ip, errors.ECODE_INVAL)
5576 node_list = cfg.GetNodeList()
5577 if not self.op.readd and node in node_list:
5578 raise errors.OpPrereqError("Node %s is already in the configuration" %
5579 node, errors.ECODE_EXISTS)
5580 elif self.op.readd and node not in node_list:
5581 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5584 self.changed_primary_ip = False
5586 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5587 if self.op.readd and node == existing_node_name:
5588 if existing_node.secondary_ip != secondary_ip:
5589 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5590 " address configuration as before",
5592 if existing_node.primary_ip != primary_ip:
5593 self.changed_primary_ip = True
5597 if (existing_node.primary_ip == primary_ip or
5598 existing_node.secondary_ip == primary_ip or
5599 existing_node.primary_ip == secondary_ip or
5600 existing_node.secondary_ip == secondary_ip):
5601 raise errors.OpPrereqError("New node ip address(es) conflict with"
5602 " existing node %s" % existing_node.name,
5603 errors.ECODE_NOTUNIQUE)
5605 # After this 'if' block, None is no longer a valid value for the
5606 # _capable op attributes
5608 old_node = self.cfg.GetNodeInfo(node)
5609 assert old_node is not None, "Can't retrieve locked node %s" % node
5610 for attr in self._NFLAGS:
5611 if getattr(self.op, attr) is None:
5612 setattr(self.op, attr, getattr(old_node, attr))
5614 for attr in self._NFLAGS:
5615 if getattr(self.op, attr) is None:
5616 setattr(self.op, attr, True)
5618 if self.op.readd and not self.op.vm_capable:
5619 pri, sec = cfg.GetNodeInstances(node)
5621 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5622 " flag set to false, but it already holds"
5623 " instances" % node,
5626 # check that the type of the node (single versus dual homed) is the
5627 # same as for the master
5628 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5629 master_singlehomed = myself.secondary_ip == myself.primary_ip
5630 newbie_singlehomed = secondary_ip == primary_ip
5631 if master_singlehomed != newbie_singlehomed:
5632 if master_singlehomed:
5633 raise errors.OpPrereqError("The master has no secondary ip but the"
5634 " new node has one",
5637 raise errors.OpPrereqError("The master has a secondary ip but the"
5638 " new node doesn't have one",
5641 # checks reachability
5642 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5643 raise errors.OpPrereqError("Node not reachable by ping",
5644 errors.ECODE_ENVIRON)
5646 if not newbie_singlehomed:
5647 # check reachability from my secondary ip to newbie's secondary ip
5648 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5649 source=myself.secondary_ip):
5650 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5651 " based ping to node daemon port",
5652 errors.ECODE_ENVIRON)
5659 if self.op.master_capable:
5660 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5662 self.master_candidate = False
5665 self.new_node = old_node
5667 node_group = cfg.LookupNodeGroup(self.op.group)
5668 self.new_node = objects.Node(name=node,
5669 primary_ip=primary_ip,
5670 secondary_ip=secondary_ip,
5671 master_candidate=self.master_candidate,
5672 offline=False, drained=False,
5675 if self.op.ndparams:
5676 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5678 if self.op.hv_state:
5679 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5681 if self.op.disk_state:
5682 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5684 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5685 # it a property on the base class.
5686 result = rpc.DnsOnlyRunner().call_version([node])[node]
5687 result.Raise("Can't get version information from node %s" % node)
5688 if constants.PROTOCOL_VERSION == result.payload:
5689 logging.info("Communication to node %s fine, sw version %s match",
5690 node, result.payload)
5692 raise errors.OpPrereqError("Version mismatch master version %s,"
5693 " node version %s" %
5694 (constants.PROTOCOL_VERSION, result.payload),
5695 errors.ECODE_ENVIRON)
5697 def Exec(self, feedback_fn):
5698 """Adds the new node to the cluster.
5701 new_node = self.new_node
5702 node = new_node.name
5704 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5707 # We adding a new node so we assume it's powered
5708 new_node.powered = True
5710 # for re-adds, reset the offline/drained/master-candidate flags;
5711 # we need to reset here, otherwise offline would prevent RPC calls
5712 # later in the procedure; this also means that if the re-add
5713 # fails, we are left with a non-offlined, broken node
5715 new_node.drained = new_node.offline = False # pylint: disable=W0201
5716 self.LogInfo("Readding a node, the offline/drained flags were reset")
5717 # if we demote the node, we do cleanup later in the procedure
5718 new_node.master_candidate = self.master_candidate
5719 if self.changed_primary_ip:
5720 new_node.primary_ip = self.op.primary_ip
5722 # copy the master/vm_capable flags
5723 for attr in self._NFLAGS:
5724 setattr(new_node, attr, getattr(self.op, attr))
5726 # notify the user about any possible mc promotion
5727 if new_node.master_candidate:
5728 self.LogInfo("Node will be a master candidate")
5730 if self.op.ndparams:
5731 new_node.ndparams = self.op.ndparams
5733 new_node.ndparams = {}
5735 if self.op.hv_state:
5736 new_node.hv_state_static = self.new_hv_state
5738 if self.op.disk_state:
5739 new_node.disk_state_static = self.new_disk_state
5741 # Add node to our /etc/hosts, and add key to known_hosts
5742 if self.cfg.GetClusterInfo().modify_etc_hosts:
5743 master_node = self.cfg.GetMasterNode()
5744 result = self.rpc.call_etc_hosts_modify(master_node,
5745 constants.ETC_HOSTS_ADD,
5748 result.Raise("Can't update hosts file with new host data")
5750 if new_node.secondary_ip != new_node.primary_ip:
5751 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5754 node_verify_list = [self.cfg.GetMasterNode()]
5755 node_verify_param = {
5756 constants.NV_NODELIST: ([node], {}),
5757 # TODO: do a node-net-test as well?
5760 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5761 self.cfg.GetClusterName())
5762 for verifier in node_verify_list:
5763 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5764 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5766 for failed in nl_payload:
5767 feedback_fn("ssh/hostname verification failed"
5768 " (checking from %s): %s" %
5769 (verifier, nl_payload[failed]))
5770 raise errors.OpExecError("ssh/hostname verification failed")
5773 _RedistributeAncillaryFiles(self)
5774 self.context.ReaddNode(new_node)
5775 # make sure we redistribute the config
5776 self.cfg.Update(new_node, feedback_fn)
5777 # and make sure the new node will not have old files around
5778 if not new_node.master_candidate:
5779 result = self.rpc.call_node_demote_from_mc(new_node.name)
5780 msg = result.fail_msg
5782 self.LogWarning("Node failed to demote itself from master"
5783 " candidate status: %s" % msg)
5785 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5786 additional_vm=self.op.vm_capable)
5787 self.context.AddNode(new_node, self.proc.GetECId())
5790 class LUNodeSetParams(LogicalUnit):
5791 """Modifies the parameters of a node.
5793 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5794 to the node role (as _ROLE_*)
5795 @cvar _R2F: a dictionary from node role to tuples of flags
5796 @cvar _FLAGS: a list of attribute names corresponding to the flags
5799 HPATH = "node-modify"
5800 HTYPE = constants.HTYPE_NODE
5802 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5804 (True, False, False): _ROLE_CANDIDATE,
5805 (False, True, False): _ROLE_DRAINED,
5806 (False, False, True): _ROLE_OFFLINE,
5807 (False, False, False): _ROLE_REGULAR,
5809 _R2F = dict((v, k) for k, v in _F2R.items())
5810 _FLAGS = ["master_candidate", "drained", "offline"]
5812 def CheckArguments(self):
5813 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5814 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5815 self.op.master_capable, self.op.vm_capable,
5816 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5818 if all_mods.count(None) == len(all_mods):
5819 raise errors.OpPrereqError("Please pass at least one modification",
5821 if all_mods.count(True) > 1:
5822 raise errors.OpPrereqError("Can't set the node into more than one"
5823 " state at the same time",
5826 # Boolean value that tells us whether we might be demoting from MC
5827 self.might_demote = (self.op.master_candidate == False or
5828 self.op.offline == True or
5829 self.op.drained == True or
5830 self.op.master_capable == False)
5832 if self.op.secondary_ip:
5833 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5834 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5835 " address" % self.op.secondary_ip,
5838 self.lock_all = self.op.auto_promote and self.might_demote
5839 self.lock_instances = self.op.secondary_ip is not None
5841 def _InstanceFilter(self, instance):
5842 """Filter for getting affected instances.
5845 return (instance.disk_template in constants.DTS_INT_MIRROR and
5846 self.op.node_name in instance.all_nodes)
5848 def ExpandNames(self):
5850 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5852 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5854 # Since modifying a node can have severe effects on currently running
5855 # operations the resource lock is at least acquired in shared mode
5856 self.needed_locks[locking.LEVEL_NODE_RES] = \
5857 self.needed_locks[locking.LEVEL_NODE]
5859 # Get node resource and instance locks in shared mode; they are not used
5860 # for anything but read-only access
5861 self.share_locks[locking.LEVEL_NODE_RES] = 1
5862 self.share_locks[locking.LEVEL_INSTANCE] = 1
5864 if self.lock_instances:
5865 self.needed_locks[locking.LEVEL_INSTANCE] = \
5866 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5868 def BuildHooksEnv(self):
5871 This runs on the master node.
5875 "OP_TARGET": self.op.node_name,
5876 "MASTER_CANDIDATE": str(self.op.master_candidate),
5877 "OFFLINE": str(self.op.offline),
5878 "DRAINED": str(self.op.drained),
5879 "MASTER_CAPABLE": str(self.op.master_capable),
5880 "VM_CAPABLE": str(self.op.vm_capable),
5883 def BuildHooksNodes(self):
5884 """Build hooks nodes.
5887 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5890 def CheckPrereq(self):
5891 """Check prerequisites.
5893 This only checks the instance list against the existing names.
5896 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5898 if self.lock_instances:
5899 affected_instances = \
5900 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5902 # Verify instance locks
5903 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5904 wanted_instances = frozenset(affected_instances.keys())
5905 if wanted_instances - owned_instances:
5906 raise errors.OpPrereqError("Instances affected by changing node %s's"
5907 " secondary IP address have changed since"
5908 " locks were acquired, wanted '%s', have"
5909 " '%s'; retry the operation" %
5911 utils.CommaJoin(wanted_instances),
5912 utils.CommaJoin(owned_instances)),
5915 affected_instances = None
5917 if (self.op.master_candidate is not None or
5918 self.op.drained is not None or
5919 self.op.offline is not None):
5920 # we can't change the master's node flags
5921 if self.op.node_name == self.cfg.GetMasterNode():
5922 raise errors.OpPrereqError("The master role can be changed"
5923 " only via master-failover",
5926 if self.op.master_candidate and not node.master_capable:
5927 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5928 " it a master candidate" % node.name,
5931 if self.op.vm_capable == False:
5932 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5934 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5935 " the vm_capable flag" % node.name,
5938 if node.master_candidate and self.might_demote and not self.lock_all:
5939 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5940 # check if after removing the current node, we're missing master
5942 (mc_remaining, mc_should, _) = \
5943 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5944 if mc_remaining < mc_should:
5945 raise errors.OpPrereqError("Not enough master candidates, please"
5946 " pass auto promote option to allow"
5947 " promotion (--auto-promote or RAPI"
5948 " auto_promote=True)", errors.ECODE_STATE)
5950 self.old_flags = old_flags = (node.master_candidate,
5951 node.drained, node.offline)
5952 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5953 self.old_role = old_role = self._F2R[old_flags]
5955 # Check for ineffective changes
5956 for attr in self._FLAGS:
5957 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5958 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5959 setattr(self.op, attr, None)
5961 # Past this point, any flag change to False means a transition
5962 # away from the respective state, as only real changes are kept
5964 # TODO: We might query the real power state if it supports OOB
5965 if _SupportsOob(self.cfg, node):
5966 if self.op.offline is False and not (node.powered or
5967 self.op.powered == True):
5968 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5969 " offline status can be reset") %
5971 elif self.op.powered is not None:
5972 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5973 " as it does not support out-of-band"
5974 " handling") % self.op.node_name)
5976 # If we're being deofflined/drained, we'll MC ourself if needed
5977 if (self.op.drained == False or self.op.offline == False or
5978 (self.op.master_capable and not node.master_capable)):
5979 if _DecideSelfPromotion(self):
5980 self.op.master_candidate = True
5981 self.LogInfo("Auto-promoting node to master candidate")
5983 # If we're no longer master capable, we'll demote ourselves from MC
5984 if self.op.master_capable == False and node.master_candidate:
5985 self.LogInfo("Demoting from master candidate")
5986 self.op.master_candidate = False
5989 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5990 if self.op.master_candidate:
5991 new_role = self._ROLE_CANDIDATE
5992 elif self.op.drained:
5993 new_role = self._ROLE_DRAINED
5994 elif self.op.offline:
5995 new_role = self._ROLE_OFFLINE
5996 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5997 # False is still in new flags, which means we're un-setting (the
5999 new_role = self._ROLE_REGULAR
6000 else: # no new flags, nothing, keep old role
6003 self.new_role = new_role
6005 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6006 # Trying to transition out of offline status
6007 result = self.rpc.call_version([node.name])[node.name]
6009 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6010 " to report its version: %s" %
6011 (node.name, result.fail_msg),
6014 self.LogWarning("Transitioning node from offline to online state"
6015 " without using re-add. Please make sure the node"
6018 if self.op.secondary_ip:
6019 # Ok even without locking, because this can't be changed by any LU
6020 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6021 master_singlehomed = master.secondary_ip == master.primary_ip
6022 if master_singlehomed and self.op.secondary_ip:
6023 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6024 " homed cluster", errors.ECODE_INVAL)
6026 assert not (frozenset(affected_instances) -
6027 self.owned_locks(locking.LEVEL_INSTANCE))
6030 if affected_instances:
6031 raise errors.OpPrereqError("Cannot change secondary IP address:"
6032 " offline node has instances (%s)"
6033 " configured to use it" %
6034 utils.CommaJoin(affected_instances.keys()))
6036 # On online nodes, check that no instances are running, and that
6037 # the node has the new ip and we can reach it.
6038 for instance in affected_instances.values():
6039 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6040 msg="cannot change secondary ip")
6042 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6043 if master.name != node.name:
6044 # check reachability from master secondary ip to new secondary ip
6045 if not netutils.TcpPing(self.op.secondary_ip,
6046 constants.DEFAULT_NODED_PORT,
6047 source=master.secondary_ip):
6048 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6049 " based ping to node daemon port",
6050 errors.ECODE_ENVIRON)
6052 if self.op.ndparams:
6053 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6054 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6055 self.new_ndparams = new_ndparams
6057 if self.op.hv_state:
6058 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6059 self.node.hv_state_static)
6061 if self.op.disk_state:
6062 self.new_disk_state = \
6063 _MergeAndVerifyDiskState(self.op.disk_state,
6064 self.node.disk_state_static)
6066 def Exec(self, feedback_fn):
6071 old_role = self.old_role
6072 new_role = self.new_role
6076 if self.op.ndparams:
6077 node.ndparams = self.new_ndparams
6079 if self.op.powered is not None:
6080 node.powered = self.op.powered
6082 if self.op.hv_state:
6083 node.hv_state_static = self.new_hv_state
6085 if self.op.disk_state:
6086 node.disk_state_static = self.new_disk_state
6088 for attr in ["master_capable", "vm_capable"]:
6089 val = getattr(self.op, attr)
6091 setattr(node, attr, val)
6092 result.append((attr, str(val)))
6094 if new_role != old_role:
6095 # Tell the node to demote itself, if no longer MC and not offline
6096 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6097 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6099 self.LogWarning("Node failed to demote itself: %s", msg)
6101 new_flags = self._R2F[new_role]
6102 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6104 result.append((desc, str(nf)))
6105 (node.master_candidate, node.drained, node.offline) = new_flags
6107 # we locked all nodes, we adjust the CP before updating this node
6109 _AdjustCandidatePool(self, [node.name])
6111 if self.op.secondary_ip:
6112 node.secondary_ip = self.op.secondary_ip
6113 result.append(("secondary_ip", self.op.secondary_ip))
6115 # this will trigger configuration file update, if needed
6116 self.cfg.Update(node, feedback_fn)
6118 # this will trigger job queue propagation or cleanup if the mc
6120 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6121 self.context.ReaddNode(node)
6126 class LUNodePowercycle(NoHooksLU):
6127 """Powercycles a node.
6132 def CheckArguments(self):
6133 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6134 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6135 raise errors.OpPrereqError("The node is the master and the force"
6136 " parameter was not set",
6139 def ExpandNames(self):
6140 """Locking for PowercycleNode.
6142 This is a last-resort option and shouldn't block on other
6143 jobs. Therefore, we grab no locks.
6146 self.needed_locks = {}
6148 def Exec(self, feedback_fn):
6152 result = self.rpc.call_node_powercycle(self.op.node_name,
6153 self.cfg.GetHypervisorType())
6154 result.Raise("Failed to schedule the reboot")
6155 return result.payload
6158 class LUClusterQuery(NoHooksLU):
6159 """Query cluster configuration.
6164 def ExpandNames(self):
6165 self.needed_locks = {}
6167 def Exec(self, feedback_fn):
6168 """Return cluster config.
6171 cluster = self.cfg.GetClusterInfo()
6174 # Filter just for enabled hypervisors
6175 for os_name, hv_dict in cluster.os_hvp.items():
6176 os_hvp[os_name] = {}
6177 for hv_name, hv_params in hv_dict.items():
6178 if hv_name in cluster.enabled_hypervisors:
6179 os_hvp[os_name][hv_name] = hv_params
6181 # Convert ip_family to ip_version
6182 primary_ip_version = constants.IP4_VERSION
6183 if cluster.primary_ip_family == netutils.IP6Address.family:
6184 primary_ip_version = constants.IP6_VERSION
6187 "software_version": constants.RELEASE_VERSION,
6188 "protocol_version": constants.PROTOCOL_VERSION,
6189 "config_version": constants.CONFIG_VERSION,
6190 "os_api_version": max(constants.OS_API_VERSIONS),
6191 "export_version": constants.EXPORT_VERSION,
6192 "architecture": runtime.GetArchInfo(),
6193 "name": cluster.cluster_name,
6194 "master": cluster.master_node,
6195 "default_hypervisor": cluster.primary_hypervisor,
6196 "enabled_hypervisors": cluster.enabled_hypervisors,
6197 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6198 for hypervisor_name in cluster.enabled_hypervisors]),
6200 "beparams": cluster.beparams,
6201 "osparams": cluster.osparams,
6202 "ipolicy": cluster.ipolicy,
6203 "nicparams": cluster.nicparams,
6204 "ndparams": cluster.ndparams,
6205 "diskparams": cluster.diskparams,
6206 "candidate_pool_size": cluster.candidate_pool_size,
6207 "master_netdev": cluster.master_netdev,
6208 "master_netmask": cluster.master_netmask,
6209 "use_external_mip_script": cluster.use_external_mip_script,
6210 "volume_group_name": cluster.volume_group_name,
6211 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6212 "file_storage_dir": cluster.file_storage_dir,
6213 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6214 "maintain_node_health": cluster.maintain_node_health,
6215 "ctime": cluster.ctime,
6216 "mtime": cluster.mtime,
6217 "uuid": cluster.uuid,
6218 "tags": list(cluster.GetTags()),
6219 "uid_pool": cluster.uid_pool,
6220 "default_iallocator": cluster.default_iallocator,
6221 "reserved_lvs": cluster.reserved_lvs,
6222 "primary_ip_version": primary_ip_version,
6223 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6224 "hidden_os": cluster.hidden_os,
6225 "blacklisted_os": cluster.blacklisted_os,
6231 class LUClusterConfigQuery(NoHooksLU):
6232 """Return configuration values.
6237 def CheckArguments(self):
6238 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6240 def ExpandNames(self):
6241 self.cq.ExpandNames(self)
6243 def DeclareLocks(self, level):
6244 self.cq.DeclareLocks(self, level)
6246 def Exec(self, feedback_fn):
6247 result = self.cq.OldStyleQuery(self)
6249 assert len(result) == 1
6254 class _ClusterQuery(_QueryBase):
6255 FIELDS = query.CLUSTER_FIELDS
6257 #: Do not sort (there is only one item)
6260 def ExpandNames(self, lu):
6261 lu.needed_locks = {}
6263 # The following variables interact with _QueryBase._GetNames
6264 self.wanted = locking.ALL_SET
6265 self.do_locking = self.use_locking
6268 raise errors.OpPrereqError("Can not use locking for cluster queries",
6271 def DeclareLocks(self, lu, level):
6274 def _GetQueryData(self, lu):
6275 """Computes the list of nodes and their attributes.
6278 # Locking is not used
6279 assert not (compat.any(lu.glm.is_owned(level)
6280 for level in locking.LEVELS
6281 if level != locking.LEVEL_CLUSTER) or
6282 self.do_locking or self.use_locking)
6284 if query.CQ_CONFIG in self.requested_data:
6285 cluster = lu.cfg.GetClusterInfo()
6287 cluster = NotImplemented
6289 if query.CQ_QUEUE_DRAINED in self.requested_data:
6290 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6292 drain_flag = NotImplemented
6294 if query.CQ_WATCHER_PAUSE in self.requested_data:
6295 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6297 watcher_pause = NotImplemented
6299 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6302 class LUInstanceActivateDisks(NoHooksLU):
6303 """Bring up an instance's disks.
6308 def ExpandNames(self):
6309 self._ExpandAndLockInstance()
6310 self.needed_locks[locking.LEVEL_NODE] = []
6311 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6313 def DeclareLocks(self, level):
6314 if level == locking.LEVEL_NODE:
6315 self._LockInstancesNodes()
6317 def CheckPrereq(self):
6318 """Check prerequisites.
6320 This checks that the instance is in the cluster.
6323 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6324 assert self.instance is not None, \
6325 "Cannot retrieve locked instance %s" % self.op.instance_name
6326 _CheckNodeOnline(self, self.instance.primary_node)
6328 def Exec(self, feedback_fn):
6329 """Activate the disks.
6332 disks_ok, disks_info = \
6333 _AssembleInstanceDisks(self, self.instance,
6334 ignore_size=self.op.ignore_size)
6336 raise errors.OpExecError("Cannot activate block devices")
6341 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6343 """Prepare the block devices for an instance.
6345 This sets up the block devices on all nodes.
6347 @type lu: L{LogicalUnit}
6348 @param lu: the logical unit on whose behalf we execute
6349 @type instance: L{objects.Instance}
6350 @param instance: the instance for whose disks we assemble
6351 @type disks: list of L{objects.Disk} or None
6352 @param disks: which disks to assemble (or all, if None)
6353 @type ignore_secondaries: boolean
6354 @param ignore_secondaries: if true, errors on secondary nodes
6355 won't result in an error return from the function
6356 @type ignore_size: boolean
6357 @param ignore_size: if true, the current known size of the disk
6358 will not be used during the disk activation, useful for cases
6359 when the size is wrong
6360 @return: False if the operation failed, otherwise a list of
6361 (host, instance_visible_name, node_visible_name)
6362 with the mapping from node devices to instance devices
6367 iname = instance.name
6368 disks = _ExpandCheckDisks(instance, disks)
6370 # With the two passes mechanism we try to reduce the window of
6371 # opportunity for the race condition of switching DRBD to primary
6372 # before handshaking occured, but we do not eliminate it
6374 # The proper fix would be to wait (with some limits) until the
6375 # connection has been made and drbd transitions from WFConnection
6376 # into any other network-connected state (Connected, SyncTarget,
6379 # 1st pass, assemble on all nodes in secondary mode
6380 for idx, inst_disk in enumerate(disks):
6381 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6383 node_disk = node_disk.Copy()
6384 node_disk.UnsetSize()
6385 lu.cfg.SetDiskID(node_disk, node)
6386 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6388 msg = result.fail_msg
6390 is_offline_secondary = (node in instance.secondary_nodes and
6392 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6393 " (is_primary=False, pass=1): %s",
6394 inst_disk.iv_name, node, msg)
6395 if not (ignore_secondaries or is_offline_secondary):
6398 # FIXME: race condition on drbd migration to primary
6400 # 2nd pass, do only the primary node
6401 for idx, inst_disk in enumerate(disks):
6404 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6405 if node != instance.primary_node:
6408 node_disk = node_disk.Copy()
6409 node_disk.UnsetSize()
6410 lu.cfg.SetDiskID(node_disk, node)
6411 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6413 msg = result.fail_msg
6415 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6416 " (is_primary=True, pass=2): %s",
6417 inst_disk.iv_name, node, msg)
6420 dev_path = result.payload
6422 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6424 # leave the disks configured for the primary node
6425 # this is a workaround that would be fixed better by
6426 # improving the logical/physical id handling
6428 lu.cfg.SetDiskID(disk, instance.primary_node)
6430 return disks_ok, device_info
6433 def _StartInstanceDisks(lu, instance, force):
6434 """Start the disks of an instance.
6437 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6438 ignore_secondaries=force)
6440 _ShutdownInstanceDisks(lu, instance)
6441 if force is not None and not force:
6442 lu.proc.LogWarning("", hint="If the message above refers to a"
6444 " you can retry the operation using '--force'.")
6445 raise errors.OpExecError("Disk consistency error")
6448 class LUInstanceDeactivateDisks(NoHooksLU):
6449 """Shutdown an instance's disks.
6454 def ExpandNames(self):
6455 self._ExpandAndLockInstance()
6456 self.needed_locks[locking.LEVEL_NODE] = []
6457 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6459 def DeclareLocks(self, level):
6460 if level == locking.LEVEL_NODE:
6461 self._LockInstancesNodes()
6463 def CheckPrereq(self):
6464 """Check prerequisites.
6466 This checks that the instance is in the cluster.
6469 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6470 assert self.instance is not None, \
6471 "Cannot retrieve locked instance %s" % self.op.instance_name
6473 def Exec(self, feedback_fn):
6474 """Deactivate the disks
6477 instance = self.instance
6479 _ShutdownInstanceDisks(self, instance)
6481 _SafeShutdownInstanceDisks(self, instance)
6484 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6485 """Shutdown block devices of an instance.
6487 This function checks if an instance is running, before calling
6488 _ShutdownInstanceDisks.
6491 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6492 _ShutdownInstanceDisks(lu, instance, disks=disks)
6495 def _ExpandCheckDisks(instance, disks):
6496 """Return the instance disks selected by the disks list
6498 @type disks: list of L{objects.Disk} or None
6499 @param disks: selected disks
6500 @rtype: list of L{objects.Disk}
6501 @return: selected instance disks to act on
6505 return instance.disks
6507 if not set(disks).issubset(instance.disks):
6508 raise errors.ProgrammerError("Can only act on disks belonging to the"
6513 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6514 """Shutdown block devices of an instance.
6516 This does the shutdown on all nodes of the instance.
6518 If the ignore_primary is false, errors on the primary node are
6523 disks = _ExpandCheckDisks(instance, disks)
6526 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6527 lu.cfg.SetDiskID(top_disk, node)
6528 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6529 msg = result.fail_msg
6531 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6532 disk.iv_name, node, msg)
6533 if ((node == instance.primary_node and not ignore_primary) or
6534 (node != instance.primary_node and not result.offline)):
6539 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6540 """Checks if a node has enough free memory.
6542 This function check if a given node has the needed amount of free
6543 memory. In case the node has less memory or we cannot get the
6544 information from the node, this function raise an OpPrereqError
6547 @type lu: C{LogicalUnit}
6548 @param lu: a logical unit from which we get configuration data
6550 @param node: the node to check
6551 @type reason: C{str}
6552 @param reason: string to use in the error message
6553 @type requested: C{int}
6554 @param requested: the amount of memory in MiB to check for
6555 @type hypervisor_name: C{str}
6556 @param hypervisor_name: the hypervisor to ask for memory stats
6558 @return: node current free memory
6559 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6560 we cannot check the node
6563 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6564 nodeinfo[node].Raise("Can't get data from node %s" % node,
6565 prereq=True, ecode=errors.ECODE_ENVIRON)
6566 (_, _, (hv_info, )) = nodeinfo[node].payload
6568 free_mem = hv_info.get("memory_free", None)
6569 if not isinstance(free_mem, int):
6570 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6571 " was '%s'" % (node, free_mem),
6572 errors.ECODE_ENVIRON)
6573 if requested > free_mem:
6574 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6575 " needed %s MiB, available %s MiB" %
6576 (node, reason, requested, free_mem),
6581 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6582 """Checks if nodes have enough free disk space in the all VGs.
6584 This function check if all given nodes have the needed amount of
6585 free disk. In case any node has less disk or we cannot get the
6586 information from the node, this function raise an OpPrereqError
6589 @type lu: C{LogicalUnit}
6590 @param lu: a logical unit from which we get configuration data
6591 @type nodenames: C{list}
6592 @param nodenames: the list of node names to check
6593 @type req_sizes: C{dict}
6594 @param req_sizes: the hash of vg and corresponding amount of disk in
6596 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6597 or we cannot check the node
6600 for vg, req_size in req_sizes.items():
6601 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6604 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6605 """Checks if nodes have enough free disk space in the specified VG.
6607 This function check if all given nodes have the needed amount of
6608 free disk. In case any node has less disk or we cannot get the
6609 information from the node, this function raise an OpPrereqError
6612 @type lu: C{LogicalUnit}
6613 @param lu: a logical unit from which we get configuration data
6614 @type nodenames: C{list}
6615 @param nodenames: the list of node names to check
6617 @param vg: the volume group to check
6618 @type requested: C{int}
6619 @param requested: the amount of disk in MiB to check for
6620 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6621 or we cannot check the node
6624 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6625 for node in nodenames:
6626 info = nodeinfo[node]
6627 info.Raise("Cannot get current information from node %s" % node,
6628 prereq=True, ecode=errors.ECODE_ENVIRON)
6629 (_, (vg_info, ), _) = info.payload
6630 vg_free = vg_info.get("vg_free", None)
6631 if not isinstance(vg_free, int):
6632 raise errors.OpPrereqError("Can't compute free disk space on node"
6633 " %s for vg %s, result was '%s'" %
6634 (node, vg, vg_free), errors.ECODE_ENVIRON)
6635 if requested > vg_free:
6636 raise errors.OpPrereqError("Not enough disk space on target node %s"
6637 " vg %s: required %d MiB, available %d MiB" %
6638 (node, vg, requested, vg_free),
6642 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6643 """Checks if nodes have enough physical CPUs
6645 This function checks if all given nodes have the needed number of
6646 physical CPUs. In case any node has less CPUs or we cannot get the
6647 information from the node, this function raises an OpPrereqError
6650 @type lu: C{LogicalUnit}
6651 @param lu: a logical unit from which we get configuration data
6652 @type nodenames: C{list}
6653 @param nodenames: the list of node names to check
6654 @type requested: C{int}
6655 @param requested: the minimum acceptable number of physical CPUs
6656 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6657 or we cannot check the node
6660 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6661 for node in nodenames:
6662 info = nodeinfo[node]
6663 info.Raise("Cannot get current information from node %s" % node,
6664 prereq=True, ecode=errors.ECODE_ENVIRON)
6665 (_, _, (hv_info, )) = info.payload
6666 num_cpus = hv_info.get("cpu_total", None)
6667 if not isinstance(num_cpus, int):
6668 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6669 " on node %s, result was '%s'" %
6670 (node, num_cpus), errors.ECODE_ENVIRON)
6671 if requested > num_cpus:
6672 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6673 "required" % (node, num_cpus, requested),
6677 class LUInstanceStartup(LogicalUnit):
6678 """Starts an instance.
6681 HPATH = "instance-start"
6682 HTYPE = constants.HTYPE_INSTANCE
6685 def CheckArguments(self):
6687 if self.op.beparams:
6688 # fill the beparams dict
6689 objects.UpgradeBeParams(self.op.beparams)
6690 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6692 def ExpandNames(self):
6693 self._ExpandAndLockInstance()
6694 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6696 def DeclareLocks(self, level):
6697 if level == locking.LEVEL_NODE_RES:
6698 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6700 def BuildHooksEnv(self):
6703 This runs on master, primary and secondary nodes of the instance.
6707 "FORCE": self.op.force,
6710 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6714 def BuildHooksNodes(self):
6715 """Build hooks nodes.
6718 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6721 def CheckPrereq(self):
6722 """Check prerequisites.
6724 This checks that the instance is in the cluster.
6727 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6728 assert self.instance is not None, \
6729 "Cannot retrieve locked instance %s" % self.op.instance_name
6732 if self.op.hvparams:
6733 # check hypervisor parameter syntax (locally)
6734 cluster = self.cfg.GetClusterInfo()
6735 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6736 filled_hvp = cluster.FillHV(instance)
6737 filled_hvp.update(self.op.hvparams)
6738 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6739 hv_type.CheckParameterSyntax(filled_hvp)
6740 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6742 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6744 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6746 if self.primary_offline and self.op.ignore_offline_nodes:
6747 self.proc.LogWarning("Ignoring offline primary node")
6749 if self.op.hvparams or self.op.beparams:
6750 self.proc.LogWarning("Overridden parameters are ignored")
6752 _CheckNodeOnline(self, instance.primary_node)
6754 bep = self.cfg.GetClusterInfo().FillBE(instance)
6755 bep.update(self.op.beparams)
6757 # check bridges existence
6758 _CheckInstanceBridgesExist(self, instance)
6760 remote_info = self.rpc.call_instance_info(instance.primary_node,
6762 instance.hypervisor)
6763 remote_info.Raise("Error checking node %s" % instance.primary_node,
6764 prereq=True, ecode=errors.ECODE_ENVIRON)
6765 if not remote_info.payload: # not running already
6766 _CheckNodeFreeMemory(self, instance.primary_node,
6767 "starting instance %s" % instance.name,
6768 bep[constants.BE_MINMEM], instance.hypervisor)
6770 def Exec(self, feedback_fn):
6771 """Start the instance.
6774 instance = self.instance
6775 force = self.op.force
6777 if not self.op.no_remember:
6778 self.cfg.MarkInstanceUp(instance.name)
6780 if self.primary_offline:
6781 assert self.op.ignore_offline_nodes
6782 self.proc.LogInfo("Primary node offline, marked instance as started")
6784 node_current = instance.primary_node
6786 _StartInstanceDisks(self, instance, force)
6789 self.rpc.call_instance_start(node_current,
6790 (instance, self.op.hvparams,
6792 self.op.startup_paused)
6793 msg = result.fail_msg
6795 _ShutdownInstanceDisks(self, instance)
6796 raise errors.OpExecError("Could not start instance: %s" % msg)
6799 class LUInstanceReboot(LogicalUnit):
6800 """Reboot an instance.
6803 HPATH = "instance-reboot"
6804 HTYPE = constants.HTYPE_INSTANCE
6807 def ExpandNames(self):
6808 self._ExpandAndLockInstance()
6810 def BuildHooksEnv(self):
6813 This runs on master, primary and secondary nodes of the instance.
6817 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6818 "REBOOT_TYPE": self.op.reboot_type,
6819 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6822 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6826 def BuildHooksNodes(self):
6827 """Build hooks nodes.
6830 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6833 def CheckPrereq(self):
6834 """Check prerequisites.
6836 This checks that the instance is in the cluster.
6839 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6840 assert self.instance is not None, \
6841 "Cannot retrieve locked instance %s" % self.op.instance_name
6842 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6843 _CheckNodeOnline(self, instance.primary_node)
6845 # check bridges existence
6846 _CheckInstanceBridgesExist(self, instance)
6848 def Exec(self, feedback_fn):
6849 """Reboot the instance.
6852 instance = self.instance
6853 ignore_secondaries = self.op.ignore_secondaries
6854 reboot_type = self.op.reboot_type
6856 remote_info = self.rpc.call_instance_info(instance.primary_node,
6858 instance.hypervisor)
6859 remote_info.Raise("Error checking node %s" % instance.primary_node)
6860 instance_running = bool(remote_info.payload)
6862 node_current = instance.primary_node
6864 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6865 constants.INSTANCE_REBOOT_HARD]:
6866 for disk in instance.disks:
6867 self.cfg.SetDiskID(disk, node_current)
6868 result = self.rpc.call_instance_reboot(node_current, instance,
6870 self.op.shutdown_timeout)
6871 result.Raise("Could not reboot instance")
6873 if instance_running:
6874 result = self.rpc.call_instance_shutdown(node_current, instance,
6875 self.op.shutdown_timeout)
6876 result.Raise("Could not shutdown instance for full reboot")
6877 _ShutdownInstanceDisks(self, instance)
6879 self.LogInfo("Instance %s was already stopped, starting now",
6881 _StartInstanceDisks(self, instance, ignore_secondaries)
6882 result = self.rpc.call_instance_start(node_current,
6883 (instance, None, None), False)
6884 msg = result.fail_msg
6886 _ShutdownInstanceDisks(self, instance)
6887 raise errors.OpExecError("Could not start instance for"
6888 " full reboot: %s" % msg)
6890 self.cfg.MarkInstanceUp(instance.name)
6893 class LUInstanceShutdown(LogicalUnit):
6894 """Shutdown an instance.
6897 HPATH = "instance-stop"
6898 HTYPE = constants.HTYPE_INSTANCE
6901 def ExpandNames(self):
6902 self._ExpandAndLockInstance()
6904 def BuildHooksEnv(self):
6907 This runs on master, primary and secondary nodes of the instance.
6910 env = _BuildInstanceHookEnvByObject(self, self.instance)
6911 env["TIMEOUT"] = self.op.timeout
6914 def BuildHooksNodes(self):
6915 """Build hooks nodes.
6918 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6921 def CheckPrereq(self):
6922 """Check prerequisites.
6924 This checks that the instance is in the cluster.
6927 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6928 assert self.instance is not None, \
6929 "Cannot retrieve locked instance %s" % self.op.instance_name
6931 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6933 self.primary_offline = \
6934 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6936 if self.primary_offline and self.op.ignore_offline_nodes:
6937 self.proc.LogWarning("Ignoring offline primary node")
6939 _CheckNodeOnline(self, self.instance.primary_node)
6941 def Exec(self, feedback_fn):
6942 """Shutdown the instance.
6945 instance = self.instance
6946 node_current = instance.primary_node
6947 timeout = self.op.timeout
6949 if not self.op.no_remember:
6950 self.cfg.MarkInstanceDown(instance.name)
6952 if self.primary_offline:
6953 assert self.op.ignore_offline_nodes
6954 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6956 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6957 msg = result.fail_msg
6959 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6961 _ShutdownInstanceDisks(self, instance)
6964 class LUInstanceReinstall(LogicalUnit):
6965 """Reinstall an instance.
6968 HPATH = "instance-reinstall"
6969 HTYPE = constants.HTYPE_INSTANCE
6972 def ExpandNames(self):
6973 self._ExpandAndLockInstance()
6975 def BuildHooksEnv(self):
6978 This runs on master, primary and secondary nodes of the instance.
6981 return _BuildInstanceHookEnvByObject(self, self.instance)
6983 def BuildHooksNodes(self):
6984 """Build hooks nodes.
6987 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6990 def CheckPrereq(self):
6991 """Check prerequisites.
6993 This checks that the instance is in the cluster and is not running.
6996 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6997 assert instance is not None, \
6998 "Cannot retrieve locked instance %s" % self.op.instance_name
6999 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7000 " offline, cannot reinstall")
7002 if instance.disk_template == constants.DT_DISKLESS:
7003 raise errors.OpPrereqError("Instance '%s' has no disks" %
7004 self.op.instance_name,
7006 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7008 if self.op.os_type is not None:
7010 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7011 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7012 instance_os = self.op.os_type
7014 instance_os = instance.os
7016 nodelist = list(instance.all_nodes)
7018 if self.op.osparams:
7019 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7020 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7021 self.os_inst = i_osdict # the new dict (without defaults)
7025 self.instance = instance
7027 def Exec(self, feedback_fn):
7028 """Reinstall the instance.
7031 inst = self.instance
7033 if self.op.os_type is not None:
7034 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7035 inst.os = self.op.os_type
7036 # Write to configuration
7037 self.cfg.Update(inst, feedback_fn)
7039 _StartInstanceDisks(self, inst, None)
7041 feedback_fn("Running the instance OS create scripts...")
7042 # FIXME: pass debug option from opcode to backend
7043 result = self.rpc.call_instance_os_add(inst.primary_node,
7044 (inst, self.os_inst), True,
7045 self.op.debug_level)
7046 result.Raise("Could not install OS for instance %s on node %s" %
7047 (inst.name, inst.primary_node))
7049 _ShutdownInstanceDisks(self, inst)
7052 class LUInstanceRecreateDisks(LogicalUnit):
7053 """Recreate an instance's missing disks.
7056 HPATH = "instance-recreate-disks"
7057 HTYPE = constants.HTYPE_INSTANCE
7060 _MODIFYABLE = frozenset([
7061 constants.IDISK_SIZE,
7062 constants.IDISK_MODE,
7065 # New or changed disk parameters may have different semantics
7066 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7067 constants.IDISK_ADOPT,
7069 # TODO: Implement support changing VG while recreating
7071 constants.IDISK_METAVG,
7074 def CheckArguments(self):
7075 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7076 # Normalize and convert deprecated list of disk indices
7077 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7079 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7081 raise errors.OpPrereqError("Some disks have been specified more than"
7082 " once: %s" % utils.CommaJoin(duplicates),
7085 for (idx, params) in self.op.disks:
7086 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7087 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7089 raise errors.OpPrereqError("Parameters for disk %s try to change"
7090 " unmodifyable parameter(s): %s" %
7091 (idx, utils.CommaJoin(unsupported)),
7094 def ExpandNames(self):
7095 self._ExpandAndLockInstance()
7096 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7098 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7099 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7101 self.needed_locks[locking.LEVEL_NODE] = []
7102 self.needed_locks[locking.LEVEL_NODE_RES] = []
7104 def DeclareLocks(self, level):
7105 if level == locking.LEVEL_NODE:
7106 # if we replace the nodes, we only need to lock the old primary,
7107 # otherwise we need to lock all nodes for disk re-creation
7108 primary_only = bool(self.op.nodes)
7109 self._LockInstancesNodes(primary_only=primary_only)
7110 elif level == locking.LEVEL_NODE_RES:
7112 self.needed_locks[locking.LEVEL_NODE_RES] = \
7113 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7115 def BuildHooksEnv(self):
7118 This runs on master, primary and secondary nodes of the instance.
7121 return _BuildInstanceHookEnvByObject(self, self.instance)
7123 def BuildHooksNodes(self):
7124 """Build hooks nodes.
7127 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7130 def CheckPrereq(self):
7131 """Check prerequisites.
7133 This checks that the instance is in the cluster and is not running.
7136 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7137 assert instance is not None, \
7138 "Cannot retrieve locked instance %s" % self.op.instance_name
7140 if len(self.op.nodes) != len(instance.all_nodes):
7141 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7142 " %d replacement nodes were specified" %
7143 (instance.name, len(instance.all_nodes),
7144 len(self.op.nodes)),
7146 assert instance.disk_template != constants.DT_DRBD8 or \
7147 len(self.op.nodes) == 2
7148 assert instance.disk_template != constants.DT_PLAIN or \
7149 len(self.op.nodes) == 1
7150 primary_node = self.op.nodes[0]
7152 primary_node = instance.primary_node
7153 _CheckNodeOnline(self, primary_node)
7155 if instance.disk_template == constants.DT_DISKLESS:
7156 raise errors.OpPrereqError("Instance '%s' has no disks" %
7157 self.op.instance_name, errors.ECODE_INVAL)
7159 # if we replace nodes *and* the old primary is offline, we don't
7161 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7162 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7163 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7164 if not (self.op.nodes and old_pnode.offline):
7165 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7166 msg="cannot recreate disks")
7169 self.disks = dict(self.op.disks)
7171 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7173 maxidx = max(self.disks.keys())
7174 if maxidx >= len(instance.disks):
7175 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7178 if (self.op.nodes and
7179 sorted(self.disks.keys()) != range(len(instance.disks))):
7180 raise errors.OpPrereqError("Can't recreate disks partially and"
7181 " change the nodes at the same time",
7184 self.instance = instance
7186 def Exec(self, feedback_fn):
7187 """Recreate the disks.
7190 instance = self.instance
7192 assert (self.owned_locks(locking.LEVEL_NODE) ==
7193 self.owned_locks(locking.LEVEL_NODE_RES))
7196 mods = [] # keeps track of needed changes
7198 for idx, disk in enumerate(instance.disks):
7200 changes = self.disks[idx]
7202 # Disk should not be recreated
7206 # update secondaries for disks, if needed
7207 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7208 # need to update the nodes and minors
7209 assert len(self.op.nodes) == 2
7210 assert len(disk.logical_id) == 6 # otherwise disk internals
7212 (_, _, old_port, _, _, old_secret) = disk.logical_id
7213 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7214 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7215 new_minors[0], new_minors[1], old_secret)
7216 assert len(disk.logical_id) == len(new_id)
7220 mods.append((idx, new_id, changes))
7222 # now that we have passed all asserts above, we can apply the mods
7223 # in a single run (to avoid partial changes)
7224 for idx, new_id, changes in mods:
7225 disk = instance.disks[idx]
7226 if new_id is not None:
7227 assert disk.dev_type == constants.LD_DRBD8
7228 disk.logical_id = new_id
7230 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7231 mode=changes.get(constants.IDISK_MODE, None))
7233 # change primary node, if needed
7235 instance.primary_node = self.op.nodes[0]
7236 self.LogWarning("Changing the instance's nodes, you will have to"
7237 " remove any disks left on the older nodes manually")
7240 self.cfg.Update(instance, feedback_fn)
7242 _CreateDisks(self, instance, to_skip=to_skip)
7245 class LUInstanceRename(LogicalUnit):
7246 """Rename an instance.
7249 HPATH = "instance-rename"
7250 HTYPE = constants.HTYPE_INSTANCE
7252 def CheckArguments(self):
7256 if self.op.ip_check and not self.op.name_check:
7257 # TODO: make the ip check more flexible and not depend on the name check
7258 raise errors.OpPrereqError("IP address check requires a name check",
7261 def BuildHooksEnv(self):
7264 This runs on master, primary and secondary nodes of the instance.
7267 env = _BuildInstanceHookEnvByObject(self, self.instance)
7268 env["INSTANCE_NEW_NAME"] = self.op.new_name
7271 def BuildHooksNodes(self):
7272 """Build hooks nodes.
7275 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7278 def CheckPrereq(self):
7279 """Check prerequisites.
7281 This checks that the instance is in the cluster and is not running.
7284 self.op.instance_name = _ExpandInstanceName(self.cfg,
7285 self.op.instance_name)
7286 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7287 assert instance is not None
7288 _CheckNodeOnline(self, instance.primary_node)
7289 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7290 msg="cannot rename")
7291 self.instance = instance
7293 new_name = self.op.new_name
7294 if self.op.name_check:
7295 hostname = netutils.GetHostname(name=new_name)
7296 if hostname.name != new_name:
7297 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7299 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7300 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7301 " same as given hostname '%s'") %
7302 (hostname.name, self.op.new_name),
7304 new_name = self.op.new_name = hostname.name
7305 if (self.op.ip_check and
7306 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7307 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7308 (hostname.ip, new_name),
7309 errors.ECODE_NOTUNIQUE)
7311 instance_list = self.cfg.GetInstanceList()
7312 if new_name in instance_list and new_name != instance.name:
7313 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7314 new_name, errors.ECODE_EXISTS)
7316 def Exec(self, feedback_fn):
7317 """Rename the instance.
7320 inst = self.instance
7321 old_name = inst.name
7323 rename_file_storage = False
7324 if (inst.disk_template in constants.DTS_FILEBASED and
7325 self.op.new_name != inst.name):
7326 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7327 rename_file_storage = True
7329 self.cfg.RenameInstance(inst.name, self.op.new_name)
7330 # Change the instance lock. This is definitely safe while we hold the BGL.
7331 # Otherwise the new lock would have to be added in acquired mode.
7333 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7334 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7336 # re-read the instance from the configuration after rename
7337 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7339 if rename_file_storage:
7340 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7341 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7342 old_file_storage_dir,
7343 new_file_storage_dir)
7344 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7345 " (but the instance has been renamed in Ganeti)" %
7346 (inst.primary_node, old_file_storage_dir,
7347 new_file_storage_dir))
7349 _StartInstanceDisks(self, inst, None)
7351 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7352 old_name, self.op.debug_level)
7353 msg = result.fail_msg
7355 msg = ("Could not run OS rename script for instance %s on node %s"
7356 " (but the instance has been renamed in Ganeti): %s" %
7357 (inst.name, inst.primary_node, msg))
7358 self.proc.LogWarning(msg)
7360 _ShutdownInstanceDisks(self, inst)
7365 class LUInstanceRemove(LogicalUnit):
7366 """Remove an instance.
7369 HPATH = "instance-remove"
7370 HTYPE = constants.HTYPE_INSTANCE
7373 def ExpandNames(self):
7374 self._ExpandAndLockInstance()
7375 self.needed_locks[locking.LEVEL_NODE] = []
7376 self.needed_locks[locking.LEVEL_NODE_RES] = []
7377 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7379 def DeclareLocks(self, level):
7380 if level == locking.LEVEL_NODE:
7381 self._LockInstancesNodes()
7382 elif level == locking.LEVEL_NODE_RES:
7384 self.needed_locks[locking.LEVEL_NODE_RES] = \
7385 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7387 def BuildHooksEnv(self):
7390 This runs on master, primary and secondary nodes of the instance.
7393 env = _BuildInstanceHookEnvByObject(self, self.instance)
7394 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7397 def BuildHooksNodes(self):
7398 """Build hooks nodes.
7401 nl = [self.cfg.GetMasterNode()]
7402 nl_post = list(self.instance.all_nodes) + nl
7403 return (nl, nl_post)
7405 def CheckPrereq(self):
7406 """Check prerequisites.
7408 This checks that the instance is in the cluster.
7411 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7412 assert self.instance is not None, \
7413 "Cannot retrieve locked instance %s" % self.op.instance_name
7415 def Exec(self, feedback_fn):
7416 """Remove the instance.
7419 instance = self.instance
7420 logging.info("Shutting down instance %s on node %s",
7421 instance.name, instance.primary_node)
7423 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7424 self.op.shutdown_timeout)
7425 msg = result.fail_msg
7427 if self.op.ignore_failures:
7428 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7430 raise errors.OpExecError("Could not shutdown instance %s on"
7432 (instance.name, instance.primary_node, msg))
7434 assert (self.owned_locks(locking.LEVEL_NODE) ==
7435 self.owned_locks(locking.LEVEL_NODE_RES))
7436 assert not (set(instance.all_nodes) -
7437 self.owned_locks(locking.LEVEL_NODE)), \
7438 "Not owning correct locks"
7440 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7443 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7444 """Utility function to remove an instance.
7447 logging.info("Removing block devices for instance %s", instance.name)
7449 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7450 if not ignore_failures:
7451 raise errors.OpExecError("Can't remove instance's disks")
7452 feedback_fn("Warning: can't remove instance's disks")
7454 logging.info("Removing instance %s out of cluster config", instance.name)
7456 lu.cfg.RemoveInstance(instance.name)
7458 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7459 "Instance lock removal conflict"
7461 # Remove lock for the instance
7462 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7465 class LUInstanceQuery(NoHooksLU):
7466 """Logical unit for querying instances.
7469 # pylint: disable=W0142
7472 def CheckArguments(self):
7473 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7474 self.op.output_fields, self.op.use_locking)
7476 def ExpandNames(self):
7477 self.iq.ExpandNames(self)
7479 def DeclareLocks(self, level):
7480 self.iq.DeclareLocks(self, level)
7482 def Exec(self, feedback_fn):
7483 return self.iq.OldStyleQuery(self)
7486 class LUInstanceFailover(LogicalUnit):
7487 """Failover an instance.
7490 HPATH = "instance-failover"
7491 HTYPE = constants.HTYPE_INSTANCE
7494 def CheckArguments(self):
7495 """Check the arguments.
7498 self.iallocator = getattr(self.op, "iallocator", None)
7499 self.target_node = getattr(self.op, "target_node", None)
7501 def ExpandNames(self):
7502 self._ExpandAndLockInstance()
7504 if self.op.target_node is not None:
7505 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7507 self.needed_locks[locking.LEVEL_NODE] = []
7508 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7510 self.needed_locks[locking.LEVEL_NODE_RES] = []
7511 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7513 ignore_consistency = self.op.ignore_consistency
7514 shutdown_timeout = self.op.shutdown_timeout
7515 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7518 ignore_consistency=ignore_consistency,
7519 shutdown_timeout=shutdown_timeout,
7520 ignore_ipolicy=self.op.ignore_ipolicy)
7521 self.tasklets = [self._migrater]
7523 def DeclareLocks(self, level):
7524 if level == locking.LEVEL_NODE:
7525 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7526 if instance.disk_template in constants.DTS_EXT_MIRROR:
7527 if self.op.target_node is None:
7528 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7530 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7531 self.op.target_node]
7532 del self.recalculate_locks[locking.LEVEL_NODE]
7534 self._LockInstancesNodes()
7535 elif level == locking.LEVEL_NODE_RES:
7537 self.needed_locks[locking.LEVEL_NODE_RES] = \
7538 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7540 def BuildHooksEnv(self):
7543 This runs on master, primary and secondary nodes of the instance.
7546 instance = self._migrater.instance
7547 source_node = instance.primary_node
7548 target_node = self.op.target_node
7550 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7551 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7552 "OLD_PRIMARY": source_node,
7553 "NEW_PRIMARY": target_node,
7556 if instance.disk_template in constants.DTS_INT_MIRROR:
7557 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7558 env["NEW_SECONDARY"] = source_node
7560 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7562 env.update(_BuildInstanceHookEnvByObject(self, instance))
7566 def BuildHooksNodes(self):
7567 """Build hooks nodes.
7570 instance = self._migrater.instance
7571 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7572 return (nl, nl + [instance.primary_node])
7575 class LUInstanceMigrate(LogicalUnit):
7576 """Migrate an instance.
7578 This is migration without shutting down, compared to the failover,
7579 which is done with shutdown.
7582 HPATH = "instance-migrate"
7583 HTYPE = constants.HTYPE_INSTANCE
7586 def ExpandNames(self):
7587 self._ExpandAndLockInstance()
7589 if self.op.target_node is not None:
7590 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7592 self.needed_locks[locking.LEVEL_NODE] = []
7593 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7595 self.needed_locks[locking.LEVEL_NODE] = []
7596 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7599 TLMigrateInstance(self, self.op.instance_name,
7600 cleanup=self.op.cleanup,
7602 fallback=self.op.allow_failover,
7603 allow_runtime_changes=self.op.allow_runtime_changes,
7604 ignore_ipolicy=self.op.ignore_ipolicy)
7605 self.tasklets = [self._migrater]
7607 def DeclareLocks(self, level):
7608 if level == locking.LEVEL_NODE:
7609 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7610 if instance.disk_template in constants.DTS_EXT_MIRROR:
7611 if self.op.target_node is None:
7612 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7614 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7615 self.op.target_node]
7616 del self.recalculate_locks[locking.LEVEL_NODE]
7618 self._LockInstancesNodes()
7619 elif level == locking.LEVEL_NODE_RES:
7621 self.needed_locks[locking.LEVEL_NODE_RES] = \
7622 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7624 def BuildHooksEnv(self):
7627 This runs on master, primary and secondary nodes of the instance.
7630 instance = self._migrater.instance
7631 source_node = instance.primary_node
7632 target_node = self.op.target_node
7633 env = _BuildInstanceHookEnvByObject(self, instance)
7635 "MIGRATE_LIVE": self._migrater.live,
7636 "MIGRATE_CLEANUP": self.op.cleanup,
7637 "OLD_PRIMARY": source_node,
7638 "NEW_PRIMARY": target_node,
7639 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7642 if instance.disk_template in constants.DTS_INT_MIRROR:
7643 env["OLD_SECONDARY"] = target_node
7644 env["NEW_SECONDARY"] = source_node
7646 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7650 def BuildHooksNodes(self):
7651 """Build hooks nodes.
7654 instance = self._migrater.instance
7655 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7656 return (nl, nl + [instance.primary_node])
7659 class LUInstanceMove(LogicalUnit):
7660 """Move an instance by data-copying.
7663 HPATH = "instance-move"
7664 HTYPE = constants.HTYPE_INSTANCE
7667 def ExpandNames(self):
7668 self._ExpandAndLockInstance()
7669 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7670 self.op.target_node = target_node
7671 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7672 self.needed_locks[locking.LEVEL_NODE_RES] = []
7673 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7675 def DeclareLocks(self, level):
7676 if level == locking.LEVEL_NODE:
7677 self._LockInstancesNodes(primary_only=True)
7678 elif level == locking.LEVEL_NODE_RES:
7680 self.needed_locks[locking.LEVEL_NODE_RES] = \
7681 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7683 def BuildHooksEnv(self):
7686 This runs on master, primary and secondary nodes of the instance.
7690 "TARGET_NODE": self.op.target_node,
7691 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7693 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7696 def BuildHooksNodes(self):
7697 """Build hooks nodes.
7701 self.cfg.GetMasterNode(),
7702 self.instance.primary_node,
7703 self.op.target_node,
7707 def CheckPrereq(self):
7708 """Check prerequisites.
7710 This checks that the instance is in the cluster.
7713 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7714 assert self.instance is not None, \
7715 "Cannot retrieve locked instance %s" % self.op.instance_name
7717 node = self.cfg.GetNodeInfo(self.op.target_node)
7718 assert node is not None, \
7719 "Cannot retrieve locked node %s" % self.op.target_node
7721 self.target_node = target_node = node.name
7723 if target_node == instance.primary_node:
7724 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7725 (instance.name, target_node),
7728 bep = self.cfg.GetClusterInfo().FillBE(instance)
7730 for idx, dsk in enumerate(instance.disks):
7731 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7732 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7733 " cannot copy" % idx, errors.ECODE_STATE)
7735 _CheckNodeOnline(self, target_node)
7736 _CheckNodeNotDrained(self, target_node)
7737 _CheckNodeVmCapable(self, target_node)
7738 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7739 self.cfg.GetNodeGroup(node.group))
7740 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7741 ignore=self.op.ignore_ipolicy)
7743 if instance.admin_state == constants.ADMINST_UP:
7744 # check memory requirements on the secondary node
7745 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7746 instance.name, bep[constants.BE_MAXMEM],
7747 instance.hypervisor)
7749 self.LogInfo("Not checking memory on the secondary node as"
7750 " instance will not be started")
7752 # check bridge existance
7753 _CheckInstanceBridgesExist(self, instance, node=target_node)
7755 def Exec(self, feedback_fn):
7756 """Move an instance.
7758 The move is done by shutting it down on its present node, copying
7759 the data over (slow) and starting it on the new node.
7762 instance = self.instance
7764 source_node = instance.primary_node
7765 target_node = self.target_node
7767 self.LogInfo("Shutting down instance %s on source node %s",
7768 instance.name, source_node)
7770 assert (self.owned_locks(locking.LEVEL_NODE) ==
7771 self.owned_locks(locking.LEVEL_NODE_RES))
7773 result = self.rpc.call_instance_shutdown(source_node, instance,
7774 self.op.shutdown_timeout)
7775 msg = result.fail_msg
7777 if self.op.ignore_consistency:
7778 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7779 " Proceeding anyway. Please make sure node"
7780 " %s is down. Error details: %s",
7781 instance.name, source_node, source_node, msg)
7783 raise errors.OpExecError("Could not shutdown instance %s on"
7785 (instance.name, source_node, msg))
7787 # create the target disks
7789 _CreateDisks(self, instance, target_node=target_node)
7790 except errors.OpExecError:
7791 self.LogWarning("Device creation failed, reverting...")
7793 _RemoveDisks(self, instance, target_node=target_node)
7795 self.cfg.ReleaseDRBDMinors(instance.name)
7798 cluster_name = self.cfg.GetClusterInfo().cluster_name
7801 # activate, get path, copy the data over
7802 for idx, disk in enumerate(instance.disks):
7803 self.LogInfo("Copying data for disk %d", idx)
7804 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7805 instance.name, True, idx)
7807 self.LogWarning("Can't assemble newly created disk %d: %s",
7808 idx, result.fail_msg)
7809 errs.append(result.fail_msg)
7811 dev_path = result.payload
7812 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7813 target_node, dev_path,
7816 self.LogWarning("Can't copy data over for disk %d: %s",
7817 idx, result.fail_msg)
7818 errs.append(result.fail_msg)
7822 self.LogWarning("Some disks failed to copy, aborting")
7824 _RemoveDisks(self, instance, target_node=target_node)
7826 self.cfg.ReleaseDRBDMinors(instance.name)
7827 raise errors.OpExecError("Errors during disk copy: %s" %
7830 instance.primary_node = target_node
7831 self.cfg.Update(instance, feedback_fn)
7833 self.LogInfo("Removing the disks on the original node")
7834 _RemoveDisks(self, instance, target_node=source_node)
7836 # Only start the instance if it's marked as up
7837 if instance.admin_state == constants.ADMINST_UP:
7838 self.LogInfo("Starting instance %s on node %s",
7839 instance.name, target_node)
7841 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7842 ignore_secondaries=True)
7844 _ShutdownInstanceDisks(self, instance)
7845 raise errors.OpExecError("Can't activate the instance's disks")
7847 result = self.rpc.call_instance_start(target_node,
7848 (instance, None, None), False)
7849 msg = result.fail_msg
7851 _ShutdownInstanceDisks(self, instance)
7852 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7853 (instance.name, target_node, msg))
7856 class LUNodeMigrate(LogicalUnit):
7857 """Migrate all instances from a node.
7860 HPATH = "node-migrate"
7861 HTYPE = constants.HTYPE_NODE
7864 def CheckArguments(self):
7867 def ExpandNames(self):
7868 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7870 self.share_locks = _ShareAll()
7871 self.needed_locks = {
7872 locking.LEVEL_NODE: [self.op.node_name],
7875 def BuildHooksEnv(self):
7878 This runs on the master, the primary and all the secondaries.
7882 "NODE_NAME": self.op.node_name,
7883 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7886 def BuildHooksNodes(self):
7887 """Build hooks nodes.
7890 nl = [self.cfg.GetMasterNode()]
7893 def CheckPrereq(self):
7896 def Exec(self, feedback_fn):
7897 # Prepare jobs for migration instances
7898 allow_runtime_changes = self.op.allow_runtime_changes
7900 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7903 iallocator=self.op.iallocator,
7904 target_node=self.op.target_node,
7905 allow_runtime_changes=allow_runtime_changes,
7906 ignore_ipolicy=self.op.ignore_ipolicy)]
7907 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7910 # TODO: Run iallocator in this opcode and pass correct placement options to
7911 # OpInstanceMigrate. Since other jobs can modify the cluster between
7912 # running the iallocator and the actual migration, a good consistency model
7913 # will have to be found.
7915 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7916 frozenset([self.op.node_name]))
7918 return ResultWithJobs(jobs)
7921 class TLMigrateInstance(Tasklet):
7922 """Tasklet class for instance migration.
7925 @ivar live: whether the migration will be done live or non-live;
7926 this variable is initalized only after CheckPrereq has run
7927 @type cleanup: boolean
7928 @ivar cleanup: Wheater we cleanup from a failed migration
7929 @type iallocator: string
7930 @ivar iallocator: The iallocator used to determine target_node
7931 @type target_node: string
7932 @ivar target_node: If given, the target_node to reallocate the instance to
7933 @type failover: boolean
7934 @ivar failover: Whether operation results in failover or migration
7935 @type fallback: boolean
7936 @ivar fallback: Whether fallback to failover is allowed if migration not
7938 @type ignore_consistency: boolean
7939 @ivar ignore_consistency: Wheter we should ignore consistency between source
7941 @type shutdown_timeout: int
7942 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7943 @type ignore_ipolicy: bool
7944 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7949 _MIGRATION_POLL_INTERVAL = 1 # seconds
7950 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7952 def __init__(self, lu, instance_name, cleanup=False,
7953 failover=False, fallback=False,
7954 ignore_consistency=False,
7955 allow_runtime_changes=True,
7956 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7957 ignore_ipolicy=False):
7958 """Initializes this class.
7961 Tasklet.__init__(self, lu)
7964 self.instance_name = instance_name
7965 self.cleanup = cleanup
7966 self.live = False # will be overridden later
7967 self.failover = failover
7968 self.fallback = fallback
7969 self.ignore_consistency = ignore_consistency
7970 self.shutdown_timeout = shutdown_timeout
7971 self.ignore_ipolicy = ignore_ipolicy
7972 self.allow_runtime_changes = allow_runtime_changes
7974 def CheckPrereq(self):
7975 """Check prerequisites.
7977 This checks that the instance is in the cluster.
7980 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7981 instance = self.cfg.GetInstanceInfo(instance_name)
7982 assert instance is not None
7983 self.instance = instance
7984 cluster = self.cfg.GetClusterInfo()
7986 if (not self.cleanup and
7987 not instance.admin_state == constants.ADMINST_UP and
7988 not self.failover and self.fallback):
7989 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7990 " switching to failover")
7991 self.failover = True
7993 if instance.disk_template not in constants.DTS_MIRRORED:
7998 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7999 " %s" % (instance.disk_template, text),
8002 if instance.disk_template in constants.DTS_EXT_MIRROR:
8003 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8005 if self.lu.op.iallocator:
8006 self._RunAllocator()
8008 # We set set self.target_node as it is required by
8010 self.target_node = self.lu.op.target_node
8012 # Check that the target node is correct in terms of instance policy
8013 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8014 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8015 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8016 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8017 ignore=self.ignore_ipolicy)
8019 # self.target_node is already populated, either directly or by the
8021 target_node = self.target_node
8022 if self.target_node == instance.primary_node:
8023 raise errors.OpPrereqError("Cannot migrate instance %s"
8024 " to its primary (%s)" %
8025 (instance.name, instance.primary_node))
8027 if len(self.lu.tasklets) == 1:
8028 # It is safe to release locks only when we're the only tasklet
8030 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8031 keep=[instance.primary_node, self.target_node])
8034 secondary_nodes = instance.secondary_nodes
8035 if not secondary_nodes:
8036 raise errors.ConfigurationError("No secondary node but using"
8037 " %s disk template" %
8038 instance.disk_template)
8039 target_node = secondary_nodes[0]
8040 if self.lu.op.iallocator or (self.lu.op.target_node and
8041 self.lu.op.target_node != target_node):
8043 text = "failed over"
8046 raise errors.OpPrereqError("Instances with disk template %s cannot"
8047 " be %s to arbitrary nodes"
8048 " (neither an iallocator nor a target"
8049 " node can be passed)" %
8050 (instance.disk_template, text),
8052 nodeinfo = self.cfg.GetNodeInfo(target_node)
8053 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8054 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8055 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8056 ignore=self.ignore_ipolicy)
8058 i_be = cluster.FillBE(instance)
8060 # check memory requirements on the secondary node
8061 if (not self.cleanup and
8062 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8063 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8064 "migrating instance %s" %
8066 i_be[constants.BE_MINMEM],
8067 instance.hypervisor)
8069 self.lu.LogInfo("Not checking memory on the secondary node as"
8070 " instance will not be started")
8072 # check if failover must be forced instead of migration
8073 if (not self.cleanup and not self.failover and
8074 i_be[constants.BE_ALWAYS_FAILOVER]):
8075 self.lu.LogInfo("Instance configured to always failover; fallback"
8077 self.failover = True
8079 # check bridge existance
8080 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8082 if not self.cleanup:
8083 _CheckNodeNotDrained(self.lu, target_node)
8084 if not self.failover:
8085 result = self.rpc.call_instance_migratable(instance.primary_node,
8087 if result.fail_msg and self.fallback:
8088 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8090 self.failover = True
8092 result.Raise("Can't migrate, please use failover",
8093 prereq=True, ecode=errors.ECODE_STATE)
8095 assert not (self.failover and self.cleanup)
8097 if not self.failover:
8098 if self.lu.op.live is not None and self.lu.op.mode is not None:
8099 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8100 " parameters are accepted",
8102 if self.lu.op.live is not None:
8104 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8106 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8107 # reset the 'live' parameter to None so that repeated
8108 # invocations of CheckPrereq do not raise an exception
8109 self.lu.op.live = None
8110 elif self.lu.op.mode is None:
8111 # read the default value from the hypervisor
8112 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8113 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8115 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8117 # Failover is never live
8120 if not (self.failover or self.cleanup):
8121 remote_info = self.rpc.call_instance_info(instance.primary_node,
8123 instance.hypervisor)
8124 remote_info.Raise("Error checking instance on node %s" %
8125 instance.primary_node)
8126 instance_running = bool(remote_info.payload)
8127 if instance_running:
8128 self.current_mem = int(remote_info.payload["memory"])
8130 def _RunAllocator(self):
8131 """Run the allocator based on input opcode.
8134 # FIXME: add a self.ignore_ipolicy option
8135 ial = IAllocator(self.cfg, self.rpc,
8136 mode=constants.IALLOCATOR_MODE_RELOC,
8137 name=self.instance_name,
8138 relocate_from=[self.instance.primary_node],
8141 ial.Run(self.lu.op.iallocator)
8144 raise errors.OpPrereqError("Can't compute nodes using"
8145 " iallocator '%s': %s" %
8146 (self.lu.op.iallocator, ial.info),
8148 if len(ial.result) != ial.required_nodes:
8149 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8150 " of nodes (%s), required %s" %
8151 (self.lu.op.iallocator, len(ial.result),
8152 ial.required_nodes), errors.ECODE_FAULT)
8153 self.target_node = ial.result[0]
8154 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8155 self.instance_name, self.lu.op.iallocator,
8156 utils.CommaJoin(ial.result))
8158 def _WaitUntilSync(self):
8159 """Poll with custom rpc for disk sync.
8161 This uses our own step-based rpc call.
8164 self.feedback_fn("* wait until resync is done")
8168 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8170 (self.instance.disks,
8173 for node, nres in result.items():
8174 nres.Raise("Cannot resync disks on node %s" % node)
8175 node_done, node_percent = nres.payload
8176 all_done = all_done and node_done
8177 if node_percent is not None:
8178 min_percent = min(min_percent, node_percent)
8180 if min_percent < 100:
8181 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8184 def _EnsureSecondary(self, node):
8185 """Demote a node to secondary.
8188 self.feedback_fn("* switching node %s to secondary mode" % node)
8190 for dev in self.instance.disks:
8191 self.cfg.SetDiskID(dev, node)
8193 result = self.rpc.call_blockdev_close(node, self.instance.name,
8194 self.instance.disks)
8195 result.Raise("Cannot change disk to secondary on node %s" % node)
8197 def _GoStandalone(self):
8198 """Disconnect from the network.
8201 self.feedback_fn("* changing into standalone mode")
8202 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8203 self.instance.disks)
8204 for node, nres in result.items():
8205 nres.Raise("Cannot disconnect disks node %s" % node)
8207 def _GoReconnect(self, multimaster):
8208 """Reconnect to the network.
8214 msg = "single-master"
8215 self.feedback_fn("* changing disks into %s mode" % msg)
8216 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8217 (self.instance.disks, self.instance),
8218 self.instance.name, multimaster)
8219 for node, nres in result.items():
8220 nres.Raise("Cannot change disks config on node %s" % node)
8222 def _ExecCleanup(self):
8223 """Try to cleanup after a failed migration.
8225 The cleanup is done by:
8226 - check that the instance is running only on one node
8227 (and update the config if needed)
8228 - change disks on its secondary node to secondary
8229 - wait until disks are fully synchronized
8230 - disconnect from the network
8231 - change disks into single-master mode
8232 - wait again until disks are fully synchronized
8235 instance = self.instance
8236 target_node = self.target_node
8237 source_node = self.source_node
8239 # check running on only one node
8240 self.feedback_fn("* checking where the instance actually runs"
8241 " (if this hangs, the hypervisor might be in"
8243 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8244 for node, result in ins_l.items():
8245 result.Raise("Can't contact node %s" % node)
8247 runningon_source = instance.name in ins_l[source_node].payload
8248 runningon_target = instance.name in ins_l[target_node].payload
8250 if runningon_source and runningon_target:
8251 raise errors.OpExecError("Instance seems to be running on two nodes,"
8252 " or the hypervisor is confused; you will have"
8253 " to ensure manually that it runs only on one"
8254 " and restart this operation")
8256 if not (runningon_source or runningon_target):
8257 raise errors.OpExecError("Instance does not seem to be running at all;"
8258 " in this case it's safer to repair by"
8259 " running 'gnt-instance stop' to ensure disk"
8260 " shutdown, and then restarting it")
8262 if runningon_target:
8263 # the migration has actually succeeded, we need to update the config
8264 self.feedback_fn("* instance running on secondary node (%s),"
8265 " updating config" % target_node)
8266 instance.primary_node = target_node
8267 self.cfg.Update(instance, self.feedback_fn)
8268 demoted_node = source_node
8270 self.feedback_fn("* instance confirmed to be running on its"
8271 " primary node (%s)" % source_node)
8272 demoted_node = target_node
8274 if instance.disk_template in constants.DTS_INT_MIRROR:
8275 self._EnsureSecondary(demoted_node)
8277 self._WaitUntilSync()
8278 except errors.OpExecError:
8279 # we ignore here errors, since if the device is standalone, it
8280 # won't be able to sync
8282 self._GoStandalone()
8283 self._GoReconnect(False)
8284 self._WaitUntilSync()
8286 self.feedback_fn("* done")
8288 def _RevertDiskStatus(self):
8289 """Try to revert the disk status after a failed migration.
8292 target_node = self.target_node
8293 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8297 self._EnsureSecondary(target_node)
8298 self._GoStandalone()
8299 self._GoReconnect(False)
8300 self._WaitUntilSync()
8301 except errors.OpExecError, err:
8302 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8303 " please try to recover the instance manually;"
8304 " error '%s'" % str(err))
8306 def _AbortMigration(self):
8307 """Call the hypervisor code to abort a started migration.
8310 instance = self.instance
8311 target_node = self.target_node
8312 source_node = self.source_node
8313 migration_info = self.migration_info
8315 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8319 abort_msg = abort_result.fail_msg
8321 logging.error("Aborting migration failed on target node %s: %s",
8322 target_node, abort_msg)
8323 # Don't raise an exception here, as we stil have to try to revert the
8324 # disk status, even if this step failed.
8326 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8327 instance, False, self.live)
8328 abort_msg = abort_result.fail_msg
8330 logging.error("Aborting migration failed on source node %s: %s",
8331 source_node, abort_msg)
8333 def _ExecMigration(self):
8334 """Migrate an instance.
8336 The migrate is done by:
8337 - change the disks into dual-master mode
8338 - wait until disks are fully synchronized again
8339 - migrate the instance
8340 - change disks on the new secondary node (the old primary) to secondary
8341 - wait until disks are fully synchronized
8342 - change disks into single-master mode
8345 instance = self.instance
8346 target_node = self.target_node
8347 source_node = self.source_node
8349 # Check for hypervisor version mismatch and warn the user.
8350 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8351 None, [self.instance.hypervisor])
8352 for ninfo in nodeinfo.values():
8353 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8355 (_, _, (src_info, )) = nodeinfo[source_node].payload
8356 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8358 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8359 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8360 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8361 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8362 if src_version != dst_version:
8363 self.feedback_fn("* warning: hypervisor version mismatch between"
8364 " source (%s) and target (%s) node" %
8365 (src_version, dst_version))
8367 self.feedback_fn("* checking disk consistency between source and target")
8368 for (idx, dev) in enumerate(instance.disks):
8369 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8370 raise errors.OpExecError("Disk %s is degraded or not fully"
8371 " synchronized on target node,"
8372 " aborting migration" % idx)
8374 if self.current_mem > self.tgt_free_mem:
8375 if not self.allow_runtime_changes:
8376 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8377 " free memory to fit instance %s on target"
8378 " node %s (have %dMB, need %dMB)" %
8379 (instance.name, target_node,
8380 self.tgt_free_mem, self.current_mem))
8381 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8382 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8385 rpcres.Raise("Cannot modify instance runtime memory")
8387 # First get the migration information from the remote node
8388 result = self.rpc.call_migration_info(source_node, instance)
8389 msg = result.fail_msg
8391 log_err = ("Failed fetching source migration information from %s: %s" %
8393 logging.error(log_err)
8394 raise errors.OpExecError(log_err)
8396 self.migration_info = migration_info = result.payload
8398 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8399 # Then switch the disks to master/master mode
8400 self._EnsureSecondary(target_node)
8401 self._GoStandalone()
8402 self._GoReconnect(True)
8403 self._WaitUntilSync()
8405 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8406 result = self.rpc.call_accept_instance(target_node,
8409 self.nodes_ip[target_node])
8411 msg = result.fail_msg
8413 logging.error("Instance pre-migration failed, trying to revert"
8414 " disk status: %s", msg)
8415 self.feedback_fn("Pre-migration failed, aborting")
8416 self._AbortMigration()
8417 self._RevertDiskStatus()
8418 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8419 (instance.name, msg))
8421 self.feedback_fn("* migrating instance to %s" % target_node)
8422 result = self.rpc.call_instance_migrate(source_node, instance,
8423 self.nodes_ip[target_node],
8425 msg = result.fail_msg
8427 logging.error("Instance migration failed, trying to revert"
8428 " disk status: %s", msg)
8429 self.feedback_fn("Migration failed, aborting")
8430 self._AbortMigration()
8431 self._RevertDiskStatus()
8432 raise errors.OpExecError("Could not migrate instance %s: %s" %
8433 (instance.name, msg))
8435 self.feedback_fn("* starting memory transfer")
8436 last_feedback = time.time()
8438 result = self.rpc.call_instance_get_migration_status(source_node,
8440 msg = result.fail_msg
8441 ms = result.payload # MigrationStatus instance
8442 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8443 logging.error("Instance migration failed, trying to revert"
8444 " disk status: %s", msg)
8445 self.feedback_fn("Migration failed, aborting")
8446 self._AbortMigration()
8447 self._RevertDiskStatus()
8448 raise errors.OpExecError("Could not migrate instance %s: %s" %
8449 (instance.name, msg))
8451 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8452 self.feedback_fn("* memory transfer complete")
8455 if (utils.TimeoutExpired(last_feedback,
8456 self._MIGRATION_FEEDBACK_INTERVAL) and
8457 ms.transferred_ram is not None):
8458 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8459 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8460 last_feedback = time.time()
8462 time.sleep(self._MIGRATION_POLL_INTERVAL)
8464 result = self.rpc.call_instance_finalize_migration_src(source_node,
8468 msg = result.fail_msg
8470 logging.error("Instance migration succeeded, but finalization failed"
8471 " on the source node: %s", msg)
8472 raise errors.OpExecError("Could not finalize instance migration: %s" %
8475 instance.primary_node = target_node
8477 # distribute new instance config to the other nodes
8478 self.cfg.Update(instance, self.feedback_fn)
8480 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8484 msg = result.fail_msg
8486 logging.error("Instance migration succeeded, but finalization failed"
8487 " on the target node: %s", msg)
8488 raise errors.OpExecError("Could not finalize instance migration: %s" %
8491 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8492 self._EnsureSecondary(source_node)
8493 self._WaitUntilSync()
8494 self._GoStandalone()
8495 self._GoReconnect(False)
8496 self._WaitUntilSync()
8498 # If the instance's disk template is `rbd' and there was a successful
8499 # migration, unmap the device from the source node.
8500 if self.instance.disk_template == constants.DT_RBD:
8501 disks = _ExpandCheckDisks(instance, instance.disks)
8502 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8504 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8505 msg = result.fail_msg
8507 logging.error("Migration was successful, but couldn't unmap the"
8508 " block device %s on source node %s: %s",
8509 disk.iv_name, source_node, msg)
8510 logging.error("You need to unmap the device %s manually on %s",
8511 disk.iv_name, source_node)
8513 self.feedback_fn("* done")
8515 def _ExecFailover(self):
8516 """Failover an instance.
8518 The failover is done by shutting it down on its present node and
8519 starting it on the secondary.
8522 instance = self.instance
8523 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8525 source_node = instance.primary_node
8526 target_node = self.target_node
8528 if instance.admin_state == constants.ADMINST_UP:
8529 self.feedback_fn("* checking disk consistency between source and target")
8530 for (idx, dev) in enumerate(instance.disks):
8531 # for drbd, these are drbd over lvm
8532 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8534 if primary_node.offline:
8535 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8537 (primary_node.name, idx, target_node))
8538 elif not self.ignore_consistency:
8539 raise errors.OpExecError("Disk %s is degraded on target node,"
8540 " aborting failover" % idx)
8542 self.feedback_fn("* not checking disk consistency as instance is not"
8545 self.feedback_fn("* shutting down instance on source node")
8546 logging.info("Shutting down instance %s on node %s",
8547 instance.name, source_node)
8549 result = self.rpc.call_instance_shutdown(source_node, instance,
8550 self.shutdown_timeout)
8551 msg = result.fail_msg
8553 if self.ignore_consistency or primary_node.offline:
8554 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8555 " proceeding anyway; please make sure node"
8556 " %s is down; error details: %s",
8557 instance.name, source_node, source_node, msg)
8559 raise errors.OpExecError("Could not shutdown instance %s on"
8561 (instance.name, source_node, msg))
8563 self.feedback_fn("* deactivating the instance's disks on source node")
8564 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8565 raise errors.OpExecError("Can't shut down the instance's disks")
8567 instance.primary_node = target_node
8568 # distribute new instance config to the other nodes
8569 self.cfg.Update(instance, self.feedback_fn)
8571 # Only start the instance if it's marked as up
8572 if instance.admin_state == constants.ADMINST_UP:
8573 self.feedback_fn("* activating the instance's disks on target node %s" %
8575 logging.info("Starting instance %s on node %s",
8576 instance.name, target_node)
8578 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8579 ignore_secondaries=True)
8581 _ShutdownInstanceDisks(self.lu, instance)
8582 raise errors.OpExecError("Can't activate the instance's disks")
8584 self.feedback_fn("* starting the instance on the target node %s" %
8586 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8588 msg = result.fail_msg
8590 _ShutdownInstanceDisks(self.lu, instance)
8591 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8592 (instance.name, target_node, msg))
8594 def Exec(self, feedback_fn):
8595 """Perform the migration.
8598 self.feedback_fn = feedback_fn
8599 self.source_node = self.instance.primary_node
8601 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8602 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8603 self.target_node = self.instance.secondary_nodes[0]
8604 # Otherwise self.target_node has been populated either
8605 # directly, or through an iallocator.
8607 self.all_nodes = [self.source_node, self.target_node]
8608 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8609 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8612 feedback_fn("Failover instance %s" % self.instance.name)
8613 self._ExecFailover()
8615 feedback_fn("Migrating instance %s" % self.instance.name)
8618 return self._ExecCleanup()
8620 return self._ExecMigration()
8623 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8625 """Wrapper around L{_CreateBlockDevInner}.
8627 This method annotates the root device first.
8630 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8631 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8635 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8637 """Create a tree of block devices on a given node.
8639 If this device type has to be created on secondaries, create it and
8642 If not, just recurse to children keeping the same 'force' value.
8644 @attention: The device has to be annotated already.
8646 @param lu: the lu on whose behalf we execute
8647 @param node: the node on which to create the device
8648 @type instance: L{objects.Instance}
8649 @param instance: the instance which owns the device
8650 @type device: L{objects.Disk}
8651 @param device: the device to create
8652 @type force_create: boolean
8653 @param force_create: whether to force creation of this device; this
8654 will be change to True whenever we find a device which has
8655 CreateOnSecondary() attribute
8656 @param info: the extra 'metadata' we should attach to the device
8657 (this will be represented as a LVM tag)
8658 @type force_open: boolean
8659 @param force_open: this parameter will be passes to the
8660 L{backend.BlockdevCreate} function where it specifies
8661 whether we run on primary or not, and it affects both
8662 the child assembly and the device own Open() execution
8665 if device.CreateOnSecondary():
8669 for child in device.children:
8670 _CreateBlockDevInner(lu, node, instance, child, force_create,
8673 if not force_create:
8676 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8679 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8680 """Create a single block device on a given node.
8682 This will not recurse over children of the device, so they must be
8685 @param lu: the lu on whose behalf we execute
8686 @param node: the node on which to create the device
8687 @type instance: L{objects.Instance}
8688 @param instance: the instance which owns the device
8689 @type device: L{objects.Disk}
8690 @param device: the device to create
8691 @param info: the extra 'metadata' we should attach to the device
8692 (this will be represented as a LVM tag)
8693 @type force_open: boolean
8694 @param force_open: this parameter will be passes to the
8695 L{backend.BlockdevCreate} function where it specifies
8696 whether we run on primary or not, and it affects both
8697 the child assembly and the device own Open() execution
8700 lu.cfg.SetDiskID(device, node)
8701 result = lu.rpc.call_blockdev_create(node, device, device.size,
8702 instance.name, force_open, info)
8703 result.Raise("Can't create block device %s on"
8704 " node %s for instance %s" % (device, node, instance.name))
8705 if device.physical_id is None:
8706 device.physical_id = result.payload
8709 def _GenerateUniqueNames(lu, exts):
8710 """Generate a suitable LV name.
8712 This will generate a logical volume name for the given instance.
8717 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8718 results.append("%s%s" % (new_id, val))
8722 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8723 iv_name, p_minor, s_minor):
8724 """Generate a drbd8 device complete with its children.
8727 assert len(vgnames) == len(names) == 2
8728 port = lu.cfg.AllocatePort()
8729 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8731 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8732 logical_id=(vgnames[0], names[0]),
8734 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8735 logical_id=(vgnames[1], names[1]),
8737 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8738 logical_id=(primary, secondary, port,
8741 children=[dev_data, dev_meta],
8742 iv_name=iv_name, params={})
8746 _DISK_TEMPLATE_NAME_PREFIX = {
8747 constants.DT_PLAIN: "",
8748 constants.DT_RBD: ".rbd",
8752 _DISK_TEMPLATE_DEVICE_TYPE = {
8753 constants.DT_PLAIN: constants.LD_LV,
8754 constants.DT_FILE: constants.LD_FILE,
8755 constants.DT_SHARED_FILE: constants.LD_FILE,
8756 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8757 constants.DT_RBD: constants.LD_RBD,
8761 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8762 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8763 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8764 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8765 """Generate the entire disk layout for a given template type.
8768 #TODO: compute space requirements
8770 vgname = lu.cfg.GetVGName()
8771 disk_count = len(disk_info)
8774 if template_name == constants.DT_DISKLESS:
8776 elif template_name == constants.DT_DRBD8:
8777 if len(secondary_nodes) != 1:
8778 raise errors.ProgrammerError("Wrong template configuration")
8779 remote_node = secondary_nodes[0]
8780 minors = lu.cfg.AllocateDRBDMinor(
8781 [primary_node, remote_node] * len(disk_info), instance_name)
8783 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8785 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8788 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8789 for i in range(disk_count)]):
8790 names.append(lv_prefix + "_data")
8791 names.append(lv_prefix + "_meta")
8792 for idx, disk in enumerate(disk_info):
8793 disk_index = idx + base_index
8794 data_vg = disk.get(constants.IDISK_VG, vgname)
8795 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8796 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8797 disk[constants.IDISK_SIZE],
8799 names[idx * 2:idx * 2 + 2],
8800 "disk/%d" % disk_index,
8801 minors[idx * 2], minors[idx * 2 + 1])
8802 disk_dev.mode = disk[constants.IDISK_MODE]
8803 disks.append(disk_dev)
8806 raise errors.ProgrammerError("Wrong template configuration")
8808 if template_name == constants.DT_FILE:
8810 elif template_name == constants.DT_SHARED_FILE:
8811 _req_shr_file_storage()
8813 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8814 if name_prefix is None:
8817 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8818 (name_prefix, base_index + i)
8819 for i in range(disk_count)])
8821 if template_name == constants.DT_PLAIN:
8822 def logical_id_fn(idx, _, disk):
8823 vg = disk.get(constants.IDISK_VG, vgname)
8824 return (vg, names[idx])
8825 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8827 lambda _, disk_index, disk: (file_driver,
8828 "%s/disk%d" % (file_storage_dir,
8830 elif template_name == constants.DT_BLOCK:
8832 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8833 disk[constants.IDISK_ADOPT])
8834 elif template_name == constants.DT_RBD:
8835 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8837 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8839 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8841 for idx, disk in enumerate(disk_info):
8842 disk_index = idx + base_index
8843 size = disk[constants.IDISK_SIZE]
8844 feedback_fn("* disk %s, size %s" %
8845 (disk_index, utils.FormatUnit(size, "h")))
8846 disks.append(objects.Disk(dev_type=dev_type, size=size,
8847 logical_id=logical_id_fn(idx, disk_index, disk),
8848 iv_name="disk/%d" % disk_index,
8849 mode=disk[constants.IDISK_MODE],
8855 def _GetInstanceInfoText(instance):
8856 """Compute that text that should be added to the disk's metadata.
8859 return "originstname+%s" % instance.name
8862 def _CalcEta(time_taken, written, total_size):
8863 """Calculates the ETA based on size written and total size.
8865 @param time_taken: The time taken so far
8866 @param written: amount written so far
8867 @param total_size: The total size of data to be written
8868 @return: The remaining time in seconds
8871 avg_time = time_taken / float(written)
8872 return (total_size - written) * avg_time
8875 def _WipeDisks(lu, instance):
8876 """Wipes instance disks.
8878 @type lu: L{LogicalUnit}
8879 @param lu: the logical unit on whose behalf we execute
8880 @type instance: L{objects.Instance}
8881 @param instance: the instance whose disks we should create
8882 @return: the success of the wipe
8885 node = instance.primary_node
8887 for device in instance.disks:
8888 lu.cfg.SetDiskID(device, node)
8890 logging.info("Pause sync of instance %s disks", instance.name)
8891 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8892 (instance.disks, instance),
8894 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8896 for idx, success in enumerate(result.payload):
8898 logging.warn("pause-sync of instance %s for disks %d failed",
8902 for idx, device in enumerate(instance.disks):
8903 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8904 # MAX_WIPE_CHUNK at max
8905 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8906 constants.MIN_WIPE_CHUNK_PERCENT)
8907 # we _must_ make this an int, otherwise rounding errors will
8909 wipe_chunk_size = int(wipe_chunk_size)
8911 lu.LogInfo("* Wiping disk %d", idx)
8912 logging.info("Wiping disk %d for instance %s, node %s using"
8913 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8918 start_time = time.time()
8920 while offset < size:
8921 wipe_size = min(wipe_chunk_size, size - offset)
8922 logging.debug("Wiping disk %d, offset %s, chunk %s",
8923 idx, offset, wipe_size)
8924 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8926 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8927 (idx, offset, wipe_size))
8930 if now - last_output >= 60:
8931 eta = _CalcEta(now - start_time, offset, size)
8932 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8933 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8936 logging.info("Resume sync of instance %s disks", instance.name)
8938 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8939 (instance.disks, instance),
8943 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8944 " please have a look at the status and troubleshoot"
8945 " the issue: %s", node, result.fail_msg)
8947 for idx, success in enumerate(result.payload):
8949 lu.LogWarning("Resume sync of disk %d failed, please have a"
8950 " look at the status and troubleshoot the issue", idx)
8951 logging.warn("resume-sync of instance %s for disks %d failed",
8955 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8956 """Create all disks for an instance.
8958 This abstracts away some work from AddInstance.
8960 @type lu: L{LogicalUnit}
8961 @param lu: the logical unit on whose behalf we execute
8962 @type instance: L{objects.Instance}
8963 @param instance: the instance whose disks we should create
8965 @param to_skip: list of indices to skip
8966 @type target_node: string
8967 @param target_node: if passed, overrides the target node for creation
8969 @return: the success of the creation
8972 info = _GetInstanceInfoText(instance)
8973 if target_node is None:
8974 pnode = instance.primary_node
8975 all_nodes = instance.all_nodes
8980 if instance.disk_template in constants.DTS_FILEBASED:
8981 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8982 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8984 result.Raise("Failed to create directory '%s' on"
8985 " node %s" % (file_storage_dir, pnode))
8987 # Note: this needs to be kept in sync with adding of disks in
8988 # LUInstanceSetParams
8989 for idx, device in enumerate(instance.disks):
8990 if to_skip and idx in to_skip:
8992 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8994 for node in all_nodes:
8995 f_create = node == pnode
8996 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8999 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9000 """Remove all disks for an instance.
9002 This abstracts away some work from `AddInstance()` and
9003 `RemoveInstance()`. Note that in case some of the devices couldn't
9004 be removed, the removal will continue with the other ones (compare
9005 with `_CreateDisks()`).
9007 @type lu: L{LogicalUnit}
9008 @param lu: the logical unit on whose behalf we execute
9009 @type instance: L{objects.Instance}
9010 @param instance: the instance whose disks we should remove
9011 @type target_node: string
9012 @param target_node: used to override the node on which to remove the disks
9014 @return: the success of the removal
9017 logging.info("Removing block devices for instance %s", instance.name)
9020 ports_to_release = set()
9021 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9022 for (idx, device) in enumerate(anno_disks):
9024 edata = [(target_node, device)]
9026 edata = device.ComputeNodeTree(instance.primary_node)
9027 for node, disk in edata:
9028 lu.cfg.SetDiskID(disk, node)
9029 result = lu.rpc.call_blockdev_remove(node, disk)
9031 lu.LogWarning("Could not remove disk %s on node %s,"
9032 " continuing anyway: %s", idx, node, result.fail_msg)
9033 if not (result.offline and node != instance.primary_node):
9036 # if this is a DRBD disk, return its port to the pool
9037 if device.dev_type in constants.LDS_DRBD:
9038 ports_to_release.add(device.logical_id[2])
9040 if all_result or ignore_failures:
9041 for port in ports_to_release:
9042 lu.cfg.AddTcpUdpPort(port)
9044 if instance.disk_template == constants.DT_FILE:
9045 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9049 tgt = instance.primary_node
9050 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9052 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9053 file_storage_dir, instance.primary_node, result.fail_msg)
9059 def _ComputeDiskSizePerVG(disk_template, disks):
9060 """Compute disk size requirements in the volume group
9063 def _compute(disks, payload):
9064 """Universal algorithm.
9069 vgs[disk[constants.IDISK_VG]] = \
9070 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9074 # Required free disk space as a function of disk and swap space
9076 constants.DT_DISKLESS: {},
9077 constants.DT_PLAIN: _compute(disks, 0),
9078 # 128 MB are added for drbd metadata for each disk
9079 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9080 constants.DT_FILE: {},
9081 constants.DT_SHARED_FILE: {},
9084 if disk_template not in req_size_dict:
9085 raise errors.ProgrammerError("Disk template '%s' size requirement"
9086 " is unknown" % disk_template)
9088 return req_size_dict[disk_template]
9091 def _ComputeDiskSize(disk_template, disks):
9092 """Compute disk size requirements according to disk template
9095 # Required free disk space as a function of disk and swap space
9097 constants.DT_DISKLESS: None,
9098 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9099 # 128 MB are added for drbd metadata for each disk
9101 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9102 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9103 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9104 constants.DT_BLOCK: 0,
9105 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9108 if disk_template not in req_size_dict:
9109 raise errors.ProgrammerError("Disk template '%s' size requirement"
9110 " is unknown" % disk_template)
9112 return req_size_dict[disk_template]
9115 def _FilterVmNodes(lu, nodenames):
9116 """Filters out non-vm_capable nodes from a list.
9118 @type lu: L{LogicalUnit}
9119 @param lu: the logical unit for which we check
9120 @type nodenames: list
9121 @param nodenames: the list of nodes on which we should check
9123 @return: the list of vm-capable nodes
9126 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9127 return [name for name in nodenames if name not in vm_nodes]
9130 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9131 """Hypervisor parameter validation.
9133 This function abstract the hypervisor parameter validation to be
9134 used in both instance create and instance modify.
9136 @type lu: L{LogicalUnit}
9137 @param lu: the logical unit for which we check
9138 @type nodenames: list
9139 @param nodenames: the list of nodes on which we should check
9140 @type hvname: string
9141 @param hvname: the name of the hypervisor we should use
9142 @type hvparams: dict
9143 @param hvparams: the parameters which we need to check
9144 @raise errors.OpPrereqError: if the parameters are not valid
9147 nodenames = _FilterVmNodes(lu, nodenames)
9149 cluster = lu.cfg.GetClusterInfo()
9150 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9152 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9153 for node in nodenames:
9157 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9160 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9161 """OS parameters validation.
9163 @type lu: L{LogicalUnit}
9164 @param lu: the logical unit for which we check
9165 @type required: boolean
9166 @param required: whether the validation should fail if the OS is not
9168 @type nodenames: list
9169 @param nodenames: the list of nodes on which we should check
9170 @type osname: string
9171 @param osname: the name of the hypervisor we should use
9172 @type osparams: dict
9173 @param osparams: the parameters which we need to check
9174 @raise errors.OpPrereqError: if the parameters are not valid
9177 nodenames = _FilterVmNodes(lu, nodenames)
9178 result = lu.rpc.call_os_validate(nodenames, required, osname,
9179 [constants.OS_VALIDATE_PARAMETERS],
9181 for node, nres in result.items():
9182 # we don't check for offline cases since this should be run only
9183 # against the master node and/or an instance's nodes
9184 nres.Raise("OS Parameters validation failed on node %s" % node)
9185 if not nres.payload:
9186 lu.LogInfo("OS %s not found on node %s, validation skipped",
9190 class LUInstanceCreate(LogicalUnit):
9191 """Create an instance.
9194 HPATH = "instance-add"
9195 HTYPE = constants.HTYPE_INSTANCE
9198 def CheckArguments(self):
9202 # do not require name_check to ease forward/backward compatibility
9204 if self.op.no_install and self.op.start:
9205 self.LogInfo("No-installation mode selected, disabling startup")
9206 self.op.start = False
9207 # validate/normalize the instance name
9208 self.op.instance_name = \
9209 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9211 if self.op.ip_check and not self.op.name_check:
9212 # TODO: make the ip check more flexible and not depend on the name check
9213 raise errors.OpPrereqError("Cannot do IP address check without a name"
9214 " check", errors.ECODE_INVAL)
9216 # check nics' parameter names
9217 for nic in self.op.nics:
9218 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9220 # check disks. parameter names and consistent adopt/no-adopt strategy
9221 has_adopt = has_no_adopt = False
9222 for disk in self.op.disks:
9223 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9224 if constants.IDISK_ADOPT in disk:
9228 if has_adopt and has_no_adopt:
9229 raise errors.OpPrereqError("Either all disks are adopted or none is",
9232 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9233 raise errors.OpPrereqError("Disk adoption is not supported for the"
9234 " '%s' disk template" %
9235 self.op.disk_template,
9237 if self.op.iallocator is not None:
9238 raise errors.OpPrereqError("Disk adoption not allowed with an"
9239 " iallocator script", errors.ECODE_INVAL)
9240 if self.op.mode == constants.INSTANCE_IMPORT:
9241 raise errors.OpPrereqError("Disk adoption not allowed for"
9242 " instance import", errors.ECODE_INVAL)
9244 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9245 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9246 " but no 'adopt' parameter given" %
9247 self.op.disk_template,
9250 self.adopt_disks = has_adopt
9252 # instance name verification
9253 if self.op.name_check:
9254 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9255 self.op.instance_name = self.hostname1.name
9256 # used in CheckPrereq for ip ping check
9257 self.check_ip = self.hostname1.ip
9259 self.check_ip = None
9261 # file storage checks
9262 if (self.op.file_driver and
9263 not self.op.file_driver in constants.FILE_DRIVER):
9264 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9265 self.op.file_driver, errors.ECODE_INVAL)
9267 if self.op.disk_template == constants.DT_FILE:
9268 opcodes.RequireFileStorage()
9269 elif self.op.disk_template == constants.DT_SHARED_FILE:
9270 opcodes.RequireSharedFileStorage()
9272 ### Node/iallocator related checks
9273 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9275 if self.op.pnode is not None:
9276 if self.op.disk_template in constants.DTS_INT_MIRROR:
9277 if self.op.snode is None:
9278 raise errors.OpPrereqError("The networked disk templates need"
9279 " a mirror node", errors.ECODE_INVAL)
9281 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9283 self.op.snode = None
9285 self._cds = _GetClusterDomainSecret()
9287 if self.op.mode == constants.INSTANCE_IMPORT:
9288 # On import force_variant must be True, because if we forced it at
9289 # initial install, our only chance when importing it back is that it
9291 self.op.force_variant = True
9293 if self.op.no_install:
9294 self.LogInfo("No-installation mode has no effect during import")
9296 elif self.op.mode == constants.INSTANCE_CREATE:
9297 if self.op.os_type is None:
9298 raise errors.OpPrereqError("No guest OS specified",
9300 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9301 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9302 " installation" % self.op.os_type,
9304 if self.op.disk_template is None:
9305 raise errors.OpPrereqError("No disk template specified",
9308 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9309 # Check handshake to ensure both clusters have the same domain secret
9310 src_handshake = self.op.source_handshake
9311 if not src_handshake:
9312 raise errors.OpPrereqError("Missing source handshake",
9315 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9318 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9321 # Load and check source CA
9322 self.source_x509_ca_pem = self.op.source_x509_ca
9323 if not self.source_x509_ca_pem:
9324 raise errors.OpPrereqError("Missing source X509 CA",
9328 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9330 except OpenSSL.crypto.Error, err:
9331 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9332 (err, ), errors.ECODE_INVAL)
9334 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9335 if errcode is not None:
9336 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9339 self.source_x509_ca = cert
9341 src_instance_name = self.op.source_instance_name
9342 if not src_instance_name:
9343 raise errors.OpPrereqError("Missing source instance name",
9346 self.source_instance_name = \
9347 netutils.GetHostname(name=src_instance_name).name
9350 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9351 self.op.mode, errors.ECODE_INVAL)
9353 def ExpandNames(self):
9354 """ExpandNames for CreateInstance.
9356 Figure out the right locks for instance creation.
9359 self.needed_locks = {}
9361 instance_name = self.op.instance_name
9362 # this is just a preventive check, but someone might still add this
9363 # instance in the meantime, and creation will fail at lock-add time
9364 if instance_name in self.cfg.GetInstanceList():
9365 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9366 instance_name, errors.ECODE_EXISTS)
9368 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9370 if self.op.iallocator:
9371 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9372 # specifying a group on instance creation and then selecting nodes from
9374 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9375 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9377 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9378 nodelist = [self.op.pnode]
9379 if self.op.snode is not None:
9380 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9381 nodelist.append(self.op.snode)
9382 self.needed_locks[locking.LEVEL_NODE] = nodelist
9383 # Lock resources of instance's primary and secondary nodes (copy to
9384 # prevent accidential modification)
9385 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9387 # in case of import lock the source node too
9388 if self.op.mode == constants.INSTANCE_IMPORT:
9389 src_node = self.op.src_node
9390 src_path = self.op.src_path
9392 if src_path is None:
9393 self.op.src_path = src_path = self.op.instance_name
9395 if src_node is None:
9396 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9397 self.op.src_node = None
9398 if os.path.isabs(src_path):
9399 raise errors.OpPrereqError("Importing an instance from a path"
9400 " requires a source node option",
9403 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9404 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9405 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9406 if not os.path.isabs(src_path):
9407 self.op.src_path = src_path = \
9408 utils.PathJoin(constants.EXPORT_DIR, src_path)
9410 def _RunAllocator(self):
9411 """Run the allocator based on input opcode.
9414 nics = [n.ToDict() for n in self.nics]
9415 ial = IAllocator(self.cfg, self.rpc,
9416 mode=constants.IALLOCATOR_MODE_ALLOC,
9417 name=self.op.instance_name,
9418 disk_template=self.op.disk_template,
9421 vcpus=self.be_full[constants.BE_VCPUS],
9422 memory=self.be_full[constants.BE_MAXMEM],
9423 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9426 hypervisor=self.op.hypervisor,
9429 ial.Run(self.op.iallocator)
9432 raise errors.OpPrereqError("Can't compute nodes using"
9433 " iallocator '%s': %s" %
9434 (self.op.iallocator, ial.info),
9436 if len(ial.result) != ial.required_nodes:
9437 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9438 " of nodes (%s), required %s" %
9439 (self.op.iallocator, len(ial.result),
9440 ial.required_nodes), errors.ECODE_FAULT)
9441 self.op.pnode = ial.result[0]
9442 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9443 self.op.instance_name, self.op.iallocator,
9444 utils.CommaJoin(ial.result))
9445 if ial.required_nodes == 2:
9446 self.op.snode = ial.result[1]
9448 def BuildHooksEnv(self):
9451 This runs on master, primary and secondary nodes of the instance.
9455 "ADD_MODE": self.op.mode,
9457 if self.op.mode == constants.INSTANCE_IMPORT:
9458 env["SRC_NODE"] = self.op.src_node
9459 env["SRC_PATH"] = self.op.src_path
9460 env["SRC_IMAGES"] = self.src_images
9462 env.update(_BuildInstanceHookEnv(
9463 name=self.op.instance_name,
9464 primary_node=self.op.pnode,
9465 secondary_nodes=self.secondaries,
9466 status=self.op.start,
9467 os_type=self.op.os_type,
9468 minmem=self.be_full[constants.BE_MINMEM],
9469 maxmem=self.be_full[constants.BE_MAXMEM],
9470 vcpus=self.be_full[constants.BE_VCPUS],
9471 nics=_NICListToTuple(self, self.nics),
9472 disk_template=self.op.disk_template,
9473 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9474 for d in self.disks],
9477 hypervisor_name=self.op.hypervisor,
9483 def BuildHooksNodes(self):
9484 """Build hooks nodes.
9487 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9490 def _ReadExportInfo(self):
9491 """Reads the export information from disk.
9493 It will override the opcode source node and path with the actual
9494 information, if these two were not specified before.
9496 @return: the export information
9499 assert self.op.mode == constants.INSTANCE_IMPORT
9501 src_node = self.op.src_node
9502 src_path = self.op.src_path
9504 if src_node is None:
9505 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9506 exp_list = self.rpc.call_export_list(locked_nodes)
9508 for node in exp_list:
9509 if exp_list[node].fail_msg:
9511 if src_path in exp_list[node].payload:
9513 self.op.src_node = src_node = node
9514 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9518 raise errors.OpPrereqError("No export found for relative path %s" %
9519 src_path, errors.ECODE_INVAL)
9521 _CheckNodeOnline(self, src_node)
9522 result = self.rpc.call_export_info(src_node, src_path)
9523 result.Raise("No export or invalid export found in dir %s" % src_path)
9525 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9526 if not export_info.has_section(constants.INISECT_EXP):
9527 raise errors.ProgrammerError("Corrupted export config",
9528 errors.ECODE_ENVIRON)
9530 ei_version = export_info.get(constants.INISECT_EXP, "version")
9531 if (int(ei_version) != constants.EXPORT_VERSION):
9532 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9533 (ei_version, constants.EXPORT_VERSION),
9534 errors.ECODE_ENVIRON)
9537 def _ReadExportParams(self, einfo):
9538 """Use export parameters as defaults.
9540 In case the opcode doesn't specify (as in override) some instance
9541 parameters, then try to use them from the export information, if
9545 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9547 if self.op.disk_template is None:
9548 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9549 self.op.disk_template = einfo.get(constants.INISECT_INS,
9551 if self.op.disk_template not in constants.DISK_TEMPLATES:
9552 raise errors.OpPrereqError("Disk template specified in configuration"
9553 " file is not one of the allowed values:"
9554 " %s" % " ".join(constants.DISK_TEMPLATES))
9556 raise errors.OpPrereqError("No disk template specified and the export"
9557 " is missing the disk_template information",
9560 if not self.op.disks:
9562 # TODO: import the disk iv_name too
9563 for idx in range(constants.MAX_DISKS):
9564 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9565 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9566 disks.append({constants.IDISK_SIZE: disk_sz})
9567 self.op.disks = disks
9568 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9569 raise errors.OpPrereqError("No disk info specified and the export"
9570 " is missing the disk information",
9573 if not self.op.nics:
9575 for idx in range(constants.MAX_NICS):
9576 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9578 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9579 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9586 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9587 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9589 if (self.op.hypervisor is None and
9590 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9591 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9593 if einfo.has_section(constants.INISECT_HYP):
9594 # use the export parameters but do not override the ones
9595 # specified by the user
9596 for name, value in einfo.items(constants.INISECT_HYP):
9597 if name not in self.op.hvparams:
9598 self.op.hvparams[name] = value
9600 if einfo.has_section(constants.INISECT_BEP):
9601 # use the parameters, without overriding
9602 for name, value in einfo.items(constants.INISECT_BEP):
9603 if name not in self.op.beparams:
9604 self.op.beparams[name] = value
9605 # Compatibility for the old "memory" be param
9606 if name == constants.BE_MEMORY:
9607 if constants.BE_MAXMEM not in self.op.beparams:
9608 self.op.beparams[constants.BE_MAXMEM] = value
9609 if constants.BE_MINMEM not in self.op.beparams:
9610 self.op.beparams[constants.BE_MINMEM] = value
9612 # try to read the parameters old style, from the main section
9613 for name in constants.BES_PARAMETERS:
9614 if (name not in self.op.beparams and
9615 einfo.has_option(constants.INISECT_INS, name)):
9616 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9618 if einfo.has_section(constants.INISECT_OSP):
9619 # use the parameters, without overriding
9620 for name, value in einfo.items(constants.INISECT_OSP):
9621 if name not in self.op.osparams:
9622 self.op.osparams[name] = value
9624 def _RevertToDefaults(self, cluster):
9625 """Revert the instance parameters to the default values.
9629 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9630 for name in self.op.hvparams.keys():
9631 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9632 del self.op.hvparams[name]
9634 be_defs = cluster.SimpleFillBE({})
9635 for name in self.op.beparams.keys():
9636 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9637 del self.op.beparams[name]
9639 nic_defs = cluster.SimpleFillNIC({})
9640 for nic in self.op.nics:
9641 for name in constants.NICS_PARAMETERS:
9642 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9645 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9646 for name in self.op.osparams.keys():
9647 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9648 del self.op.osparams[name]
9650 def _CalculateFileStorageDir(self):
9651 """Calculate final instance file storage dir.
9654 # file storage dir calculation/check
9655 self.instance_file_storage_dir = None
9656 if self.op.disk_template in constants.DTS_FILEBASED:
9657 # build the full file storage dir path
9660 if self.op.disk_template == constants.DT_SHARED_FILE:
9661 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9663 get_fsd_fn = self.cfg.GetFileStorageDir
9665 cfg_storagedir = get_fsd_fn()
9666 if not cfg_storagedir:
9667 raise errors.OpPrereqError("Cluster file storage dir not defined")
9668 joinargs.append(cfg_storagedir)
9670 if self.op.file_storage_dir is not None:
9671 joinargs.append(self.op.file_storage_dir)
9673 joinargs.append(self.op.instance_name)
9675 # pylint: disable=W0142
9676 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9678 def CheckPrereq(self): # pylint: disable=R0914
9679 """Check prerequisites.
9682 self._CalculateFileStorageDir()
9684 if self.op.mode == constants.INSTANCE_IMPORT:
9685 export_info = self._ReadExportInfo()
9686 self._ReadExportParams(export_info)
9687 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9689 self._old_instance_name = None
9691 if (not self.cfg.GetVGName() and
9692 self.op.disk_template not in constants.DTS_NOT_LVM):
9693 raise errors.OpPrereqError("Cluster does not support lvm-based"
9694 " instances", errors.ECODE_STATE)
9696 if (self.op.hypervisor is None or
9697 self.op.hypervisor == constants.VALUE_AUTO):
9698 self.op.hypervisor = self.cfg.GetHypervisorType()
9700 cluster = self.cfg.GetClusterInfo()
9701 enabled_hvs = cluster.enabled_hypervisors
9702 if self.op.hypervisor not in enabled_hvs:
9703 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9704 " cluster (%s)" % (self.op.hypervisor,
9705 ",".join(enabled_hvs)),
9708 # Check tag validity
9709 for tag in self.op.tags:
9710 objects.TaggableObject.ValidateTag(tag)
9712 # check hypervisor parameter syntax (locally)
9713 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9714 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9716 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9717 hv_type.CheckParameterSyntax(filled_hvp)
9718 self.hv_full = filled_hvp
9719 # check that we don't specify global parameters on an instance
9720 _CheckGlobalHvParams(self.op.hvparams)
9722 # fill and remember the beparams dict
9723 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9724 for param, value in self.op.beparams.iteritems():
9725 if value == constants.VALUE_AUTO:
9726 self.op.beparams[param] = default_beparams[param]
9727 objects.UpgradeBeParams(self.op.beparams)
9728 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9729 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9731 # build os parameters
9732 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9734 # now that hvp/bep are in final format, let's reset to defaults,
9736 if self.op.identify_defaults:
9737 self._RevertToDefaults(cluster)
9741 for idx, nic in enumerate(self.op.nics):
9742 nic_mode_req = nic.get(constants.INIC_MODE, None)
9743 nic_mode = nic_mode_req
9744 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9745 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9747 # in routed mode, for the first nic, the default ip is 'auto'
9748 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9749 default_ip_mode = constants.VALUE_AUTO
9751 default_ip_mode = constants.VALUE_NONE
9753 # ip validity checks
9754 ip = nic.get(constants.INIC_IP, default_ip_mode)
9755 if ip is None or ip.lower() == constants.VALUE_NONE:
9757 elif ip.lower() == constants.VALUE_AUTO:
9758 if not self.op.name_check:
9759 raise errors.OpPrereqError("IP address set to auto but name checks"
9760 " have been skipped",
9762 nic_ip = self.hostname1.ip
9764 if not netutils.IPAddress.IsValid(ip):
9765 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9769 # TODO: check the ip address for uniqueness
9770 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9771 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9774 # MAC address verification
9775 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9776 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9777 mac = utils.NormalizeAndValidateMac(mac)
9780 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9781 except errors.ReservationError:
9782 raise errors.OpPrereqError("MAC address %s already in use"
9783 " in cluster" % mac,
9784 errors.ECODE_NOTUNIQUE)
9786 # Build nic parameters
9787 link = nic.get(constants.INIC_LINK, None)
9788 if link == constants.VALUE_AUTO:
9789 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9792 nicparams[constants.NIC_MODE] = nic_mode
9794 nicparams[constants.NIC_LINK] = link
9796 check_params = cluster.SimpleFillNIC(nicparams)
9797 objects.NIC.CheckParameterSyntax(check_params)
9798 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9800 # disk checks/pre-build
9801 default_vg = self.cfg.GetVGName()
9803 for disk in self.op.disks:
9804 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9805 if mode not in constants.DISK_ACCESS_SET:
9806 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9807 mode, errors.ECODE_INVAL)
9808 size = disk.get(constants.IDISK_SIZE, None)
9810 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9813 except (TypeError, ValueError):
9814 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9817 data_vg = disk.get(constants.IDISK_VG, default_vg)
9819 constants.IDISK_SIZE: size,
9820 constants.IDISK_MODE: mode,
9821 constants.IDISK_VG: data_vg,
9823 if constants.IDISK_METAVG in disk:
9824 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9825 if constants.IDISK_ADOPT in disk:
9826 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9827 self.disks.append(new_disk)
9829 if self.op.mode == constants.INSTANCE_IMPORT:
9831 for idx in range(len(self.disks)):
9832 option = "disk%d_dump" % idx
9833 if export_info.has_option(constants.INISECT_INS, option):
9834 # FIXME: are the old os-es, disk sizes, etc. useful?
9835 export_name = export_info.get(constants.INISECT_INS, option)
9836 image = utils.PathJoin(self.op.src_path, export_name)
9837 disk_images.append(image)
9839 disk_images.append(False)
9841 self.src_images = disk_images
9843 if self.op.instance_name == self._old_instance_name:
9844 for idx, nic in enumerate(self.nics):
9845 if nic.mac == constants.VALUE_AUTO:
9846 nic_mac_ini = "nic%d_mac" % idx
9847 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9849 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9851 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9852 if self.op.ip_check:
9853 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9854 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9855 (self.check_ip, self.op.instance_name),
9856 errors.ECODE_NOTUNIQUE)
9858 #### mac address generation
9859 # By generating here the mac address both the allocator and the hooks get
9860 # the real final mac address rather than the 'auto' or 'generate' value.
9861 # There is a race condition between the generation and the instance object
9862 # creation, which means that we know the mac is valid now, but we're not
9863 # sure it will be when we actually add the instance. If things go bad
9864 # adding the instance will abort because of a duplicate mac, and the
9865 # creation job will fail.
9866 for nic in self.nics:
9867 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9868 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9872 if self.op.iallocator is not None:
9873 self._RunAllocator()
9875 # Release all unneeded node locks
9876 _ReleaseLocks(self, locking.LEVEL_NODE,
9877 keep=filter(None, [self.op.pnode, self.op.snode,
9879 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9880 keep=filter(None, [self.op.pnode, self.op.snode,
9883 #### node related checks
9885 # check primary node
9886 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9887 assert self.pnode is not None, \
9888 "Cannot retrieve locked node %s" % self.op.pnode
9890 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9891 pnode.name, errors.ECODE_STATE)
9893 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9894 pnode.name, errors.ECODE_STATE)
9895 if not pnode.vm_capable:
9896 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9897 " '%s'" % pnode.name, errors.ECODE_STATE)
9899 self.secondaries = []
9901 # mirror node verification
9902 if self.op.disk_template in constants.DTS_INT_MIRROR:
9903 if self.op.snode == pnode.name:
9904 raise errors.OpPrereqError("The secondary node cannot be the"
9905 " primary node", errors.ECODE_INVAL)
9906 _CheckNodeOnline(self, self.op.snode)
9907 _CheckNodeNotDrained(self, self.op.snode)
9908 _CheckNodeVmCapable(self, self.op.snode)
9909 self.secondaries.append(self.op.snode)
9911 snode = self.cfg.GetNodeInfo(self.op.snode)
9912 if pnode.group != snode.group:
9913 self.LogWarning("The primary and secondary nodes are in two"
9914 " different node groups; the disk parameters"
9915 " from the first disk's node group will be"
9918 nodenames = [pnode.name] + self.secondaries
9920 # Verify instance specs
9921 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9923 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9924 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9925 constants.ISPEC_DISK_COUNT: len(self.disks),
9926 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9927 constants.ISPEC_NIC_COUNT: len(self.nics),
9928 constants.ISPEC_SPINDLE_USE: spindle_use,
9931 group_info = self.cfg.GetNodeGroup(pnode.group)
9932 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9933 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9934 if not self.op.ignore_ipolicy and res:
9935 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9936 " policy: %s") % (pnode.group,
9937 utils.CommaJoin(res)),
9940 if not self.adopt_disks:
9941 if self.op.disk_template == constants.DT_RBD:
9942 # _CheckRADOSFreeSpace() is just a placeholder.
9943 # Any function that checks prerequisites can be placed here.
9944 # Check if there is enough space on the RADOS cluster.
9945 _CheckRADOSFreeSpace()
9947 # Check lv size requirements, if not adopting
9948 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9949 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9951 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9952 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9953 disk[constants.IDISK_ADOPT])
9954 for disk in self.disks])
9955 if len(all_lvs) != len(self.disks):
9956 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9958 for lv_name in all_lvs:
9960 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9961 # to ReserveLV uses the same syntax
9962 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9963 except errors.ReservationError:
9964 raise errors.OpPrereqError("LV named %s used by another instance" %
9965 lv_name, errors.ECODE_NOTUNIQUE)
9967 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9968 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9970 node_lvs = self.rpc.call_lv_list([pnode.name],
9971 vg_names.payload.keys())[pnode.name]
9972 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9973 node_lvs = node_lvs.payload
9975 delta = all_lvs.difference(node_lvs.keys())
9977 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9978 utils.CommaJoin(delta),
9980 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9982 raise errors.OpPrereqError("Online logical volumes found, cannot"
9983 " adopt: %s" % utils.CommaJoin(online_lvs),
9985 # update the size of disk based on what is found
9986 for dsk in self.disks:
9987 dsk[constants.IDISK_SIZE] = \
9988 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9989 dsk[constants.IDISK_ADOPT])][0]))
9991 elif self.op.disk_template == constants.DT_BLOCK:
9992 # Normalize and de-duplicate device paths
9993 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9994 for disk in self.disks])
9995 if len(all_disks) != len(self.disks):
9996 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9998 baddisks = [d for d in all_disks
9999 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10001 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10002 " cannot be adopted" %
10003 (", ".join(baddisks),
10004 constants.ADOPTABLE_BLOCKDEV_ROOT),
10005 errors.ECODE_INVAL)
10007 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10008 list(all_disks))[pnode.name]
10009 node_disks.Raise("Cannot get block device information from node %s" %
10011 node_disks = node_disks.payload
10012 delta = all_disks.difference(node_disks.keys())
10014 raise errors.OpPrereqError("Missing block device(s): %s" %
10015 utils.CommaJoin(delta),
10016 errors.ECODE_INVAL)
10017 for dsk in self.disks:
10018 dsk[constants.IDISK_SIZE] = \
10019 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10021 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10023 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10024 # check OS parameters (remotely)
10025 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10027 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10029 # memory check on primary node
10030 #TODO(dynmem): use MINMEM for checking
10032 _CheckNodeFreeMemory(self, self.pnode.name,
10033 "creating instance %s" % self.op.instance_name,
10034 self.be_full[constants.BE_MAXMEM],
10035 self.op.hypervisor)
10037 self.dry_run_result = list(nodenames)
10039 def Exec(self, feedback_fn):
10040 """Create and add the instance to the cluster.
10043 instance = self.op.instance_name
10044 pnode_name = self.pnode.name
10046 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10047 self.owned_locks(locking.LEVEL_NODE)), \
10048 "Node locks differ from node resource locks"
10050 ht_kind = self.op.hypervisor
10051 if ht_kind in constants.HTS_REQ_PORT:
10052 network_port = self.cfg.AllocatePort()
10054 network_port = None
10056 # This is ugly but we got a chicken-egg problem here
10057 # We can only take the group disk parameters, as the instance
10058 # has no disks yet (we are generating them right here).
10059 node = self.cfg.GetNodeInfo(pnode_name)
10060 nodegroup = self.cfg.GetNodeGroup(node.group)
10061 disks = _GenerateDiskTemplate(self,
10062 self.op.disk_template,
10063 instance, pnode_name,
10066 self.instance_file_storage_dir,
10067 self.op.file_driver,
10070 self.cfg.GetGroupDiskParams(nodegroup))
10072 iobj = objects.Instance(name=instance, os=self.op.os_type,
10073 primary_node=pnode_name,
10074 nics=self.nics, disks=disks,
10075 disk_template=self.op.disk_template,
10076 admin_state=constants.ADMINST_DOWN,
10077 network_port=network_port,
10078 beparams=self.op.beparams,
10079 hvparams=self.op.hvparams,
10080 hypervisor=self.op.hypervisor,
10081 osparams=self.op.osparams,
10085 for tag in self.op.tags:
10088 if self.adopt_disks:
10089 if self.op.disk_template == constants.DT_PLAIN:
10090 # rename LVs to the newly-generated names; we need to construct
10091 # 'fake' LV disks with the old data, plus the new unique_id
10092 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10094 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10095 rename_to.append(t_dsk.logical_id)
10096 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10097 self.cfg.SetDiskID(t_dsk, pnode_name)
10098 result = self.rpc.call_blockdev_rename(pnode_name,
10099 zip(tmp_disks, rename_to))
10100 result.Raise("Failed to rename adoped LVs")
10102 feedback_fn("* creating instance disks...")
10104 _CreateDisks(self, iobj)
10105 except errors.OpExecError:
10106 self.LogWarning("Device creation failed, reverting...")
10108 _RemoveDisks(self, iobj)
10110 self.cfg.ReleaseDRBDMinors(instance)
10113 feedback_fn("adding instance %s to cluster config" % instance)
10115 self.cfg.AddInstance(iobj, self.proc.GetECId())
10117 # Declare that we don't want to remove the instance lock anymore, as we've
10118 # added the instance to the config
10119 del self.remove_locks[locking.LEVEL_INSTANCE]
10121 if self.op.mode == constants.INSTANCE_IMPORT:
10122 # Release unused nodes
10123 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10125 # Release all nodes
10126 _ReleaseLocks(self, locking.LEVEL_NODE)
10129 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10130 feedback_fn("* wiping instance disks...")
10132 _WipeDisks(self, iobj)
10133 except errors.OpExecError, err:
10134 logging.exception("Wiping disks failed")
10135 self.LogWarning("Wiping instance disks failed (%s)", err)
10139 # Something is already wrong with the disks, don't do anything else
10141 elif self.op.wait_for_sync:
10142 disk_abort = not _WaitForSync(self, iobj)
10143 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10144 # make sure the disks are not degraded (still sync-ing is ok)
10145 feedback_fn("* checking mirrors status")
10146 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10151 _RemoveDisks(self, iobj)
10152 self.cfg.RemoveInstance(iobj.name)
10153 # Make sure the instance lock gets removed
10154 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10155 raise errors.OpExecError("There are some degraded disks for"
10158 # Release all node resource locks
10159 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10161 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10162 # we need to set the disks ID to the primary node, since the
10163 # preceding code might or might have not done it, depending on
10164 # disk template and other options
10165 for disk in iobj.disks:
10166 self.cfg.SetDiskID(disk, pnode_name)
10167 if self.op.mode == constants.INSTANCE_CREATE:
10168 if not self.op.no_install:
10169 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10170 not self.op.wait_for_sync)
10172 feedback_fn("* pausing disk sync to install instance OS")
10173 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10176 for idx, success in enumerate(result.payload):
10178 logging.warn("pause-sync of instance %s for disk %d failed",
10181 feedback_fn("* running the instance OS create scripts...")
10182 # FIXME: pass debug option from opcode to backend
10184 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10185 self.op.debug_level)
10187 feedback_fn("* resuming disk sync")
10188 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10191 for idx, success in enumerate(result.payload):
10193 logging.warn("resume-sync of instance %s for disk %d failed",
10196 os_add_result.Raise("Could not add os for instance %s"
10197 " on node %s" % (instance, pnode_name))
10200 if self.op.mode == constants.INSTANCE_IMPORT:
10201 feedback_fn("* running the instance OS import scripts...")
10205 for idx, image in enumerate(self.src_images):
10209 # FIXME: pass debug option from opcode to backend
10210 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10211 constants.IEIO_FILE, (image, ),
10212 constants.IEIO_SCRIPT,
10213 (iobj.disks[idx], idx),
10215 transfers.append(dt)
10218 masterd.instance.TransferInstanceData(self, feedback_fn,
10219 self.op.src_node, pnode_name,
10220 self.pnode.secondary_ip,
10222 if not compat.all(import_result):
10223 self.LogWarning("Some disks for instance %s on node %s were not"
10224 " imported successfully" % (instance, pnode_name))
10226 rename_from = self._old_instance_name
10228 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10229 feedback_fn("* preparing remote import...")
10230 # The source cluster will stop the instance before attempting to make
10231 # a connection. In some cases stopping an instance can take a long
10232 # time, hence the shutdown timeout is added to the connection
10234 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10235 self.op.source_shutdown_timeout)
10236 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10238 assert iobj.primary_node == self.pnode.name
10240 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10241 self.source_x509_ca,
10242 self._cds, timeouts)
10243 if not compat.all(disk_results):
10244 # TODO: Should the instance still be started, even if some disks
10245 # failed to import (valid for local imports, too)?
10246 self.LogWarning("Some disks for instance %s on node %s were not"
10247 " imported successfully" % (instance, pnode_name))
10249 rename_from = self.source_instance_name
10252 # also checked in the prereq part
10253 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10256 # Run rename script on newly imported instance
10257 assert iobj.name == instance
10258 feedback_fn("Running rename script for %s" % instance)
10259 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10261 self.op.debug_level)
10262 if result.fail_msg:
10263 self.LogWarning("Failed to run rename script for %s on node"
10264 " %s: %s" % (instance, pnode_name, result.fail_msg))
10266 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10269 iobj.admin_state = constants.ADMINST_UP
10270 self.cfg.Update(iobj, feedback_fn)
10271 logging.info("Starting instance %s on node %s", instance, pnode_name)
10272 feedback_fn("* starting instance...")
10273 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10275 result.Raise("Could not start instance")
10277 return list(iobj.all_nodes)
10280 def _CheckRADOSFreeSpace():
10281 """Compute disk size requirements inside the RADOS cluster.
10284 # For the RADOS cluster we assume there is always enough space.
10288 class LUInstanceConsole(NoHooksLU):
10289 """Connect to an instance's console.
10291 This is somewhat special in that it returns the command line that
10292 you need to run on the master node in order to connect to the
10298 def ExpandNames(self):
10299 self.share_locks = _ShareAll()
10300 self._ExpandAndLockInstance()
10302 def CheckPrereq(self):
10303 """Check prerequisites.
10305 This checks that the instance is in the cluster.
10308 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10309 assert self.instance is not None, \
10310 "Cannot retrieve locked instance %s" % self.op.instance_name
10311 _CheckNodeOnline(self, self.instance.primary_node)
10313 def Exec(self, feedback_fn):
10314 """Connect to the console of an instance
10317 instance = self.instance
10318 node = instance.primary_node
10320 node_insts = self.rpc.call_instance_list([node],
10321 [instance.hypervisor])[node]
10322 node_insts.Raise("Can't get node information from %s" % node)
10324 if instance.name not in node_insts.payload:
10325 if instance.admin_state == constants.ADMINST_UP:
10326 state = constants.INSTST_ERRORDOWN
10327 elif instance.admin_state == constants.ADMINST_DOWN:
10328 state = constants.INSTST_ADMINDOWN
10330 state = constants.INSTST_ADMINOFFLINE
10331 raise errors.OpExecError("Instance %s is not running (state %s)" %
10332 (instance.name, state))
10334 logging.debug("Connecting to console of %s on %s", instance.name, node)
10336 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10339 def _GetInstanceConsole(cluster, instance):
10340 """Returns console information for an instance.
10342 @type cluster: L{objects.Cluster}
10343 @type instance: L{objects.Instance}
10347 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10348 # beparams and hvparams are passed separately, to avoid editing the
10349 # instance and then saving the defaults in the instance itself.
10350 hvparams = cluster.FillHV(instance)
10351 beparams = cluster.FillBE(instance)
10352 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10354 assert console.instance == instance.name
10355 assert console.Validate()
10357 return console.ToDict()
10360 class LUInstanceReplaceDisks(LogicalUnit):
10361 """Replace the disks of an instance.
10364 HPATH = "mirrors-replace"
10365 HTYPE = constants.HTYPE_INSTANCE
10368 def CheckArguments(self):
10369 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10370 self.op.iallocator)
10372 def ExpandNames(self):
10373 self._ExpandAndLockInstance()
10375 assert locking.LEVEL_NODE not in self.needed_locks
10376 assert locking.LEVEL_NODE_RES not in self.needed_locks
10377 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10379 assert self.op.iallocator is None or self.op.remote_node is None, \
10380 "Conflicting options"
10382 if self.op.remote_node is not None:
10383 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10385 # Warning: do not remove the locking of the new secondary here
10386 # unless DRBD8.AddChildren is changed to work in parallel;
10387 # currently it doesn't since parallel invocations of
10388 # FindUnusedMinor will conflict
10389 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10390 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10392 self.needed_locks[locking.LEVEL_NODE] = []
10393 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10395 if self.op.iallocator is not None:
10396 # iallocator will select a new node in the same group
10397 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10399 self.needed_locks[locking.LEVEL_NODE_RES] = []
10401 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10402 self.op.iallocator, self.op.remote_node,
10403 self.op.disks, False, self.op.early_release,
10404 self.op.ignore_ipolicy)
10406 self.tasklets = [self.replacer]
10408 def DeclareLocks(self, level):
10409 if level == locking.LEVEL_NODEGROUP:
10410 assert self.op.remote_node is None
10411 assert self.op.iallocator is not None
10412 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10414 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10415 # Lock all groups used by instance optimistically; this requires going
10416 # via the node before it's locked, requiring verification later on
10417 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10418 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10420 elif level == locking.LEVEL_NODE:
10421 if self.op.iallocator is not None:
10422 assert self.op.remote_node is None
10423 assert not self.needed_locks[locking.LEVEL_NODE]
10425 # Lock member nodes of all locked groups
10426 self.needed_locks[locking.LEVEL_NODE] = [node_name
10427 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10428 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10430 self._LockInstancesNodes()
10431 elif level == locking.LEVEL_NODE_RES:
10433 self.needed_locks[locking.LEVEL_NODE_RES] = \
10434 self.needed_locks[locking.LEVEL_NODE]
10436 def BuildHooksEnv(self):
10437 """Build hooks env.
10439 This runs on the master, the primary and all the secondaries.
10442 instance = self.replacer.instance
10444 "MODE": self.op.mode,
10445 "NEW_SECONDARY": self.op.remote_node,
10446 "OLD_SECONDARY": instance.secondary_nodes[0],
10448 env.update(_BuildInstanceHookEnvByObject(self, instance))
10451 def BuildHooksNodes(self):
10452 """Build hooks nodes.
10455 instance = self.replacer.instance
10457 self.cfg.GetMasterNode(),
10458 instance.primary_node,
10460 if self.op.remote_node is not None:
10461 nl.append(self.op.remote_node)
10464 def CheckPrereq(self):
10465 """Check prerequisites.
10468 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10469 self.op.iallocator is None)
10471 # Verify if node group locks are still correct
10472 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10474 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10476 return LogicalUnit.CheckPrereq(self)
10479 class TLReplaceDisks(Tasklet):
10480 """Replaces disks for an instance.
10482 Note: Locking is not within the scope of this class.
10485 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10486 disks, delay_iallocator, early_release, ignore_ipolicy):
10487 """Initializes this class.
10490 Tasklet.__init__(self, lu)
10493 self.instance_name = instance_name
10495 self.iallocator_name = iallocator_name
10496 self.remote_node = remote_node
10498 self.delay_iallocator = delay_iallocator
10499 self.early_release = early_release
10500 self.ignore_ipolicy = ignore_ipolicy
10503 self.instance = None
10504 self.new_node = None
10505 self.target_node = None
10506 self.other_node = None
10507 self.remote_node_info = None
10508 self.node_secondary_ip = None
10511 def CheckArguments(mode, remote_node, iallocator):
10512 """Helper function for users of this class.
10515 # check for valid parameter combination
10516 if mode == constants.REPLACE_DISK_CHG:
10517 if remote_node is None and iallocator is None:
10518 raise errors.OpPrereqError("When changing the secondary either an"
10519 " iallocator script must be used or the"
10520 " new node given", errors.ECODE_INVAL)
10522 if remote_node is not None and iallocator is not None:
10523 raise errors.OpPrereqError("Give either the iallocator or the new"
10524 " secondary, not both", errors.ECODE_INVAL)
10526 elif remote_node is not None or iallocator is not None:
10527 # Not replacing the secondary
10528 raise errors.OpPrereqError("The iallocator and new node options can"
10529 " only be used when changing the"
10530 " secondary node", errors.ECODE_INVAL)
10533 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10534 """Compute a new secondary node using an IAllocator.
10537 ial = IAllocator(lu.cfg, lu.rpc,
10538 mode=constants.IALLOCATOR_MODE_RELOC,
10539 name=instance_name,
10540 relocate_from=list(relocate_from))
10542 ial.Run(iallocator_name)
10544 if not ial.success:
10545 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10546 " %s" % (iallocator_name, ial.info),
10547 errors.ECODE_NORES)
10549 if len(ial.result) != ial.required_nodes:
10550 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10551 " of nodes (%s), required %s" %
10553 len(ial.result), ial.required_nodes),
10554 errors.ECODE_FAULT)
10556 remote_node_name = ial.result[0]
10558 lu.LogInfo("Selected new secondary for instance '%s': %s",
10559 instance_name, remote_node_name)
10561 return remote_node_name
10563 def _FindFaultyDisks(self, node_name):
10564 """Wrapper for L{_FindFaultyInstanceDisks}.
10567 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10570 def _CheckDisksActivated(self, instance):
10571 """Checks if the instance disks are activated.
10573 @param instance: The instance to check disks
10574 @return: True if they are activated, False otherwise
10577 nodes = instance.all_nodes
10579 for idx, dev in enumerate(instance.disks):
10581 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10582 self.cfg.SetDiskID(dev, node)
10584 result = _BlockdevFind(self, node, dev, instance)
10588 elif result.fail_msg or not result.payload:
10593 def CheckPrereq(self):
10594 """Check prerequisites.
10596 This checks that the instance is in the cluster.
10599 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10600 assert instance is not None, \
10601 "Cannot retrieve locked instance %s" % self.instance_name
10603 if instance.disk_template != constants.DT_DRBD8:
10604 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10605 " instances", errors.ECODE_INVAL)
10607 if len(instance.secondary_nodes) != 1:
10608 raise errors.OpPrereqError("The instance has a strange layout,"
10609 " expected one secondary but found %d" %
10610 len(instance.secondary_nodes),
10611 errors.ECODE_FAULT)
10613 if not self.delay_iallocator:
10614 self._CheckPrereq2()
10616 def _CheckPrereq2(self):
10617 """Check prerequisites, second part.
10619 This function should always be part of CheckPrereq. It was separated and is
10620 now called from Exec because during node evacuation iallocator was only
10621 called with an unmodified cluster model, not taking planned changes into
10625 instance = self.instance
10626 secondary_node = instance.secondary_nodes[0]
10628 if self.iallocator_name is None:
10629 remote_node = self.remote_node
10631 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10632 instance.name, instance.secondary_nodes)
10634 if remote_node is None:
10635 self.remote_node_info = None
10637 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10638 "Remote node '%s' is not locked" % remote_node
10640 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10641 assert self.remote_node_info is not None, \
10642 "Cannot retrieve locked node %s" % remote_node
10644 if remote_node == self.instance.primary_node:
10645 raise errors.OpPrereqError("The specified node is the primary node of"
10646 " the instance", errors.ECODE_INVAL)
10648 if remote_node == secondary_node:
10649 raise errors.OpPrereqError("The specified node is already the"
10650 " secondary node of the instance",
10651 errors.ECODE_INVAL)
10653 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10654 constants.REPLACE_DISK_CHG):
10655 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10656 errors.ECODE_INVAL)
10658 if self.mode == constants.REPLACE_DISK_AUTO:
10659 if not self._CheckDisksActivated(instance):
10660 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10661 " first" % self.instance_name,
10662 errors.ECODE_STATE)
10663 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10664 faulty_secondary = self._FindFaultyDisks(secondary_node)
10666 if faulty_primary and faulty_secondary:
10667 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10668 " one node and can not be repaired"
10669 " automatically" % self.instance_name,
10670 errors.ECODE_STATE)
10673 self.disks = faulty_primary
10674 self.target_node = instance.primary_node
10675 self.other_node = secondary_node
10676 check_nodes = [self.target_node, self.other_node]
10677 elif faulty_secondary:
10678 self.disks = faulty_secondary
10679 self.target_node = secondary_node
10680 self.other_node = instance.primary_node
10681 check_nodes = [self.target_node, self.other_node]
10687 # Non-automatic modes
10688 if self.mode == constants.REPLACE_DISK_PRI:
10689 self.target_node = instance.primary_node
10690 self.other_node = secondary_node
10691 check_nodes = [self.target_node, self.other_node]
10693 elif self.mode == constants.REPLACE_DISK_SEC:
10694 self.target_node = secondary_node
10695 self.other_node = instance.primary_node
10696 check_nodes = [self.target_node, self.other_node]
10698 elif self.mode == constants.REPLACE_DISK_CHG:
10699 self.new_node = remote_node
10700 self.other_node = instance.primary_node
10701 self.target_node = secondary_node
10702 check_nodes = [self.new_node, self.other_node]
10704 _CheckNodeNotDrained(self.lu, remote_node)
10705 _CheckNodeVmCapable(self.lu, remote_node)
10707 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10708 assert old_node_info is not None
10709 if old_node_info.offline and not self.early_release:
10710 # doesn't make sense to delay the release
10711 self.early_release = True
10712 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10713 " early-release mode", secondary_node)
10716 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10719 # If not specified all disks should be replaced
10721 self.disks = range(len(self.instance.disks))
10723 # TODO: This is ugly, but right now we can't distinguish between internal
10724 # submitted opcode and external one. We should fix that.
10725 if self.remote_node_info:
10726 # We change the node, lets verify it still meets instance policy
10727 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10728 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10730 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10731 ignore=self.ignore_ipolicy)
10733 for node in check_nodes:
10734 _CheckNodeOnline(self.lu, node)
10736 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10739 if node_name is not None)
10741 # Release unneeded node and node resource locks
10742 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10743 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10745 # Release any owned node group
10746 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10747 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10749 # Check whether disks are valid
10750 for disk_idx in self.disks:
10751 instance.FindDisk(disk_idx)
10753 # Get secondary node IP addresses
10754 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10755 in self.cfg.GetMultiNodeInfo(touched_nodes))
10757 def Exec(self, feedback_fn):
10758 """Execute disk replacement.
10760 This dispatches the disk replacement to the appropriate handler.
10763 if self.delay_iallocator:
10764 self._CheckPrereq2()
10767 # Verify owned locks before starting operation
10768 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10769 assert set(owned_nodes) == set(self.node_secondary_ip), \
10770 ("Incorrect node locks, owning %s, expected %s" %
10771 (owned_nodes, self.node_secondary_ip.keys()))
10772 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10773 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10775 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10776 assert list(owned_instances) == [self.instance_name], \
10777 "Instance '%s' not locked" % self.instance_name
10779 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10780 "Should not own any node group lock at this point"
10783 feedback_fn("No disks need replacement for instance '%s'" %
10784 self.instance.name)
10787 feedback_fn("Replacing disk(s) %s for instance '%s'" %
10788 (utils.CommaJoin(self.disks), self.instance.name))
10789 feedback_fn("Current primary node: %s", self.instance.primary_node)
10790 feedback_fn("Current seconary node: %s",
10791 utils.CommaJoin(self.instance.secondary_nodes))
10793 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10795 # Activate the instance disks if we're replacing them on a down instance
10797 _StartInstanceDisks(self.lu, self.instance, True)
10800 # Should we replace the secondary node?
10801 if self.new_node is not None:
10802 fn = self._ExecDrbd8Secondary
10804 fn = self._ExecDrbd8DiskOnly
10806 result = fn(feedback_fn)
10808 # Deactivate the instance disks if we're replacing them on a
10811 _SafeShutdownInstanceDisks(self.lu, self.instance)
10813 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10816 # Verify owned locks
10817 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10818 nodes = frozenset(self.node_secondary_ip)
10819 assert ((self.early_release and not owned_nodes) or
10820 (not self.early_release and not (set(owned_nodes) - nodes))), \
10821 ("Not owning the correct locks, early_release=%s, owned=%r,"
10822 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10826 def _CheckVolumeGroup(self, nodes):
10827 self.lu.LogInfo("Checking volume groups")
10829 vgname = self.cfg.GetVGName()
10831 # Make sure volume group exists on all involved nodes
10832 results = self.rpc.call_vg_list(nodes)
10834 raise errors.OpExecError("Can't list volume groups on the nodes")
10837 res = results[node]
10838 res.Raise("Error checking node %s" % node)
10839 if vgname not in res.payload:
10840 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10843 def _CheckDisksExistence(self, nodes):
10844 # Check disk existence
10845 for idx, dev in enumerate(self.instance.disks):
10846 if idx not in self.disks:
10850 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10851 self.cfg.SetDiskID(dev, node)
10853 result = _BlockdevFind(self, node, dev, self.instance)
10855 msg = result.fail_msg
10856 if msg or not result.payload:
10858 msg = "disk not found"
10859 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10862 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10863 for idx, dev in enumerate(self.instance.disks):
10864 if idx not in self.disks:
10867 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10870 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10871 on_primary, ldisk=ldisk):
10872 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10873 " replace disks for instance %s" %
10874 (node_name, self.instance.name))
10876 def _CreateNewStorage(self, node_name):
10877 """Create new storage on the primary or secondary node.
10879 This is only used for same-node replaces, not for changing the
10880 secondary node, hence we don't want to modify the existing disk.
10885 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10886 for idx, dev in enumerate(disks):
10887 if idx not in self.disks:
10890 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10892 self.cfg.SetDiskID(dev, node_name)
10894 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10895 names = _GenerateUniqueNames(self.lu, lv_names)
10897 (data_disk, meta_disk) = dev.children
10898 vg_data = data_disk.logical_id[0]
10899 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10900 logical_id=(vg_data, names[0]),
10901 params=data_disk.params)
10902 vg_meta = meta_disk.logical_id[0]
10903 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10904 logical_id=(vg_meta, names[1]),
10905 params=meta_disk.params)
10907 new_lvs = [lv_data, lv_meta]
10908 old_lvs = [child.Copy() for child in dev.children]
10909 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10911 # we pass force_create=True to force the LVM creation
10912 for new_lv in new_lvs:
10913 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10914 _GetInstanceInfoText(self.instance), False)
10918 def _CheckDevices(self, node_name, iv_names):
10919 for name, (dev, _, _) in iv_names.iteritems():
10920 self.cfg.SetDiskID(dev, node_name)
10922 result = _BlockdevFind(self, node_name, dev, self.instance)
10924 msg = result.fail_msg
10925 if msg or not result.payload:
10927 msg = "disk not found"
10928 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10931 if result.payload.is_degraded:
10932 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10934 def _RemoveOldStorage(self, node_name, iv_names):
10935 for name, (_, old_lvs, _) in iv_names.iteritems():
10936 self.lu.LogInfo("Remove logical volumes for %s" % name)
10939 self.cfg.SetDiskID(lv, node_name)
10941 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10943 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10944 hint="remove unused LVs manually")
10946 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10947 """Replace a disk on the primary or secondary for DRBD 8.
10949 The algorithm for replace is quite complicated:
10951 1. for each disk to be replaced:
10953 1. create new LVs on the target node with unique names
10954 1. detach old LVs from the drbd device
10955 1. rename old LVs to name_replaced.<time_t>
10956 1. rename new LVs to old LVs
10957 1. attach the new LVs (with the old names now) to the drbd device
10959 1. wait for sync across all devices
10961 1. for each modified disk:
10963 1. remove old LVs (which have the name name_replaces.<time_t>)
10965 Failures are not very well handled.
10970 # Step: check device activation
10971 self.lu.LogStep(1, steps_total, "Check device existence")
10972 self._CheckDisksExistence([self.other_node, self.target_node])
10973 self._CheckVolumeGroup([self.target_node, self.other_node])
10975 # Step: check other node consistency
10976 self.lu.LogStep(2, steps_total, "Check peer consistency")
10977 self._CheckDisksConsistency(self.other_node,
10978 self.other_node == self.instance.primary_node,
10981 # Step: create new storage
10982 self.lu.LogStep(3, steps_total, "Allocate new storage")
10983 iv_names = self._CreateNewStorage(self.target_node)
10985 # Step: for each lv, detach+rename*2+attach
10986 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10987 for dev, old_lvs, new_lvs in iv_names.itervalues():
10988 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10990 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10992 result.Raise("Can't detach drbd from local storage on node"
10993 " %s for device %s" % (self.target_node, dev.iv_name))
10995 #cfg.Update(instance)
10997 # ok, we created the new LVs, so now we know we have the needed
10998 # storage; as such, we proceed on the target node to rename
10999 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11000 # using the assumption that logical_id == physical_id (which in
11001 # turn is the unique_id on that node)
11003 # FIXME(iustin): use a better name for the replaced LVs
11004 temp_suffix = int(time.time())
11005 ren_fn = lambda d, suff: (d.physical_id[0],
11006 d.physical_id[1] + "_replaced-%s" % suff)
11008 # Build the rename list based on what LVs exist on the node
11009 rename_old_to_new = []
11010 for to_ren in old_lvs:
11011 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11012 if not result.fail_msg and result.payload:
11014 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11016 self.lu.LogInfo("Renaming the old LVs on the target node")
11017 result = self.rpc.call_blockdev_rename(self.target_node,
11019 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11021 # Now we rename the new LVs to the old LVs
11022 self.lu.LogInfo("Renaming the new LVs on the target node")
11023 rename_new_to_old = [(new, old.physical_id)
11024 for old, new in zip(old_lvs, new_lvs)]
11025 result = self.rpc.call_blockdev_rename(self.target_node,
11027 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11029 # Intermediate steps of in memory modifications
11030 for old, new in zip(old_lvs, new_lvs):
11031 new.logical_id = old.logical_id
11032 self.cfg.SetDiskID(new, self.target_node)
11034 # We need to modify old_lvs so that removal later removes the
11035 # right LVs, not the newly added ones; note that old_lvs is a
11037 for disk in old_lvs:
11038 disk.logical_id = ren_fn(disk, temp_suffix)
11039 self.cfg.SetDiskID(disk, self.target_node)
11041 # Now that the new lvs have the old name, we can add them to the device
11042 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11043 result = self.rpc.call_blockdev_addchildren(self.target_node,
11044 (dev, self.instance), new_lvs)
11045 msg = result.fail_msg
11047 for new_lv in new_lvs:
11048 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11051 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11052 hint=("cleanup manually the unused logical"
11054 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11056 cstep = itertools.count(5)
11058 if self.early_release:
11059 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11060 self._RemoveOldStorage(self.target_node, iv_names)
11061 # TODO: Check if releasing locks early still makes sense
11062 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11064 # Release all resource locks except those used by the instance
11065 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11066 keep=self.node_secondary_ip.keys())
11068 # Release all node locks while waiting for sync
11069 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11071 # TODO: Can the instance lock be downgraded here? Take the optional disk
11072 # shutdown in the caller into consideration.
11075 # This can fail as the old devices are degraded and _WaitForSync
11076 # does a combined result over all disks, so we don't check its return value
11077 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11078 _WaitForSync(self.lu, self.instance)
11080 # Check all devices manually
11081 self._CheckDevices(self.instance.primary_node, iv_names)
11083 # Step: remove old storage
11084 if not self.early_release:
11085 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11086 self._RemoveOldStorage(self.target_node, iv_names)
11088 def _ExecDrbd8Secondary(self, feedback_fn):
11089 """Replace the secondary node for DRBD 8.
11091 The algorithm for replace is quite complicated:
11092 - for all disks of the instance:
11093 - create new LVs on the new node with same names
11094 - shutdown the drbd device on the old secondary
11095 - disconnect the drbd network on the primary
11096 - create the drbd device on the new secondary
11097 - network attach the drbd on the primary, using an artifice:
11098 the drbd code for Attach() will connect to the network if it
11099 finds a device which is connected to the good local disks but
11100 not network enabled
11101 - wait for sync across all devices
11102 - remove all disks from the old secondary
11104 Failures are not very well handled.
11109 pnode = self.instance.primary_node
11111 # Step: check device activation
11112 self.lu.LogStep(1, steps_total, "Check device existence")
11113 self._CheckDisksExistence([self.instance.primary_node])
11114 self._CheckVolumeGroup([self.instance.primary_node])
11116 # Step: check other node consistency
11117 self.lu.LogStep(2, steps_total, "Check peer consistency")
11118 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11120 # Step: create new storage
11121 self.lu.LogStep(3, steps_total, "Allocate new storage")
11122 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11123 for idx, dev in enumerate(disks):
11124 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11125 (self.new_node, idx))
11126 # we pass force_create=True to force LVM creation
11127 for new_lv in dev.children:
11128 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11129 True, _GetInstanceInfoText(self.instance), False)
11131 # Step 4: dbrd minors and drbd setups changes
11132 # after this, we must manually remove the drbd minors on both the
11133 # error and the success paths
11134 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11135 minors = self.cfg.AllocateDRBDMinor([self.new_node
11136 for dev in self.instance.disks],
11137 self.instance.name)
11138 logging.debug("Allocated minors %r", minors)
11141 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11142 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11143 (self.new_node, idx))
11144 # create new devices on new_node; note that we create two IDs:
11145 # one without port, so the drbd will be activated without
11146 # networking information on the new node at this stage, and one
11147 # with network, for the latter activation in step 4
11148 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11149 if self.instance.primary_node == o_node1:
11152 assert self.instance.primary_node == o_node2, "Three-node instance?"
11155 new_alone_id = (self.instance.primary_node, self.new_node, None,
11156 p_minor, new_minor, o_secret)
11157 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11158 p_minor, new_minor, o_secret)
11160 iv_names[idx] = (dev, dev.children, new_net_id)
11161 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11163 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11164 logical_id=new_alone_id,
11165 children=dev.children,
11168 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11171 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11173 _GetInstanceInfoText(self.instance), False)
11174 except errors.GenericError:
11175 self.cfg.ReleaseDRBDMinors(self.instance.name)
11178 # We have new devices, shutdown the drbd on the old secondary
11179 for idx, dev in enumerate(self.instance.disks):
11180 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11181 self.cfg.SetDiskID(dev, self.target_node)
11182 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11183 (dev, self.instance)).fail_msg
11185 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11186 "node: %s" % (idx, msg),
11187 hint=("Please cleanup this device manually as"
11188 " soon as possible"))
11190 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11191 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11192 self.instance.disks)[pnode]
11194 msg = result.fail_msg
11196 # detaches didn't succeed (unlikely)
11197 self.cfg.ReleaseDRBDMinors(self.instance.name)
11198 raise errors.OpExecError("Can't detach the disks from the network on"
11199 " old node: %s" % (msg,))
11201 # if we managed to detach at least one, we update all the disks of
11202 # the instance to point to the new secondary
11203 self.lu.LogInfo("Updating instance configuration")
11204 for dev, _, new_logical_id in iv_names.itervalues():
11205 dev.logical_id = new_logical_id
11206 self.cfg.SetDiskID(dev, self.instance.primary_node)
11208 self.cfg.Update(self.instance, feedback_fn)
11210 # Release all node locks (the configuration has been updated)
11211 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11213 # and now perform the drbd attach
11214 self.lu.LogInfo("Attaching primary drbds to new secondary"
11215 " (standalone => connected)")
11216 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11218 self.node_secondary_ip,
11219 (self.instance.disks, self.instance),
11220 self.instance.name,
11222 for to_node, to_result in result.items():
11223 msg = to_result.fail_msg
11225 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11227 hint=("please do a gnt-instance info to see the"
11228 " status of disks"))
11230 cstep = itertools.count(5)
11232 if self.early_release:
11233 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11234 self._RemoveOldStorage(self.target_node, iv_names)
11235 # TODO: Check if releasing locks early still makes sense
11236 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11238 # Release all resource locks except those used by the instance
11239 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11240 keep=self.node_secondary_ip.keys())
11242 # TODO: Can the instance lock be downgraded here? Take the optional disk
11243 # shutdown in the caller into consideration.
11246 # This can fail as the old devices are degraded and _WaitForSync
11247 # does a combined result over all disks, so we don't check its return value
11248 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11249 _WaitForSync(self.lu, self.instance)
11251 # Check all devices manually
11252 self._CheckDevices(self.instance.primary_node, iv_names)
11254 # Step: remove old storage
11255 if not self.early_release:
11256 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11257 self._RemoveOldStorage(self.target_node, iv_names)
11260 class LURepairNodeStorage(NoHooksLU):
11261 """Repairs the volume group on a node.
11266 def CheckArguments(self):
11267 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11269 storage_type = self.op.storage_type
11271 if (constants.SO_FIX_CONSISTENCY not in
11272 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11273 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11274 " repaired" % storage_type,
11275 errors.ECODE_INVAL)
11277 def ExpandNames(self):
11278 self.needed_locks = {
11279 locking.LEVEL_NODE: [self.op.node_name],
11282 def _CheckFaultyDisks(self, instance, node_name):
11283 """Ensure faulty disks abort the opcode or at least warn."""
11285 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11287 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11288 " node '%s'" % (instance.name, node_name),
11289 errors.ECODE_STATE)
11290 except errors.OpPrereqError, err:
11291 if self.op.ignore_consistency:
11292 self.proc.LogWarning(str(err.args[0]))
11296 def CheckPrereq(self):
11297 """Check prerequisites.
11300 # Check whether any instance on this node has faulty disks
11301 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11302 if inst.admin_state != constants.ADMINST_UP:
11304 check_nodes = set(inst.all_nodes)
11305 check_nodes.discard(self.op.node_name)
11306 for inst_node_name in check_nodes:
11307 self._CheckFaultyDisks(inst, inst_node_name)
11309 def Exec(self, feedback_fn):
11310 feedback_fn("Repairing storage unit '%s' on %s ..." %
11311 (self.op.name, self.op.node_name))
11313 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11314 result = self.rpc.call_storage_execute(self.op.node_name,
11315 self.op.storage_type, st_args,
11317 constants.SO_FIX_CONSISTENCY)
11318 result.Raise("Failed to repair storage unit '%s' on %s" %
11319 (self.op.name, self.op.node_name))
11322 class LUNodeEvacuate(NoHooksLU):
11323 """Evacuates instances off a list of nodes.
11328 _MODE2IALLOCATOR = {
11329 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11330 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11331 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11333 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11334 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11335 constants.IALLOCATOR_NEVAC_MODES)
11337 def CheckArguments(self):
11338 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11340 def ExpandNames(self):
11341 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11343 if self.op.remote_node is not None:
11344 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11345 assert self.op.remote_node
11347 if self.op.remote_node == self.op.node_name:
11348 raise errors.OpPrereqError("Can not use evacuated node as a new"
11349 " secondary node", errors.ECODE_INVAL)
11351 if self.op.mode != constants.NODE_EVAC_SEC:
11352 raise errors.OpPrereqError("Without the use of an iallocator only"
11353 " secondary instances can be evacuated",
11354 errors.ECODE_INVAL)
11357 self.share_locks = _ShareAll()
11358 self.needed_locks = {
11359 locking.LEVEL_INSTANCE: [],
11360 locking.LEVEL_NODEGROUP: [],
11361 locking.LEVEL_NODE: [],
11364 # Determine nodes (via group) optimistically, needs verification once locks
11365 # have been acquired
11366 self.lock_nodes = self._DetermineNodes()
11368 def _DetermineNodes(self):
11369 """Gets the list of nodes to operate on.
11372 if self.op.remote_node is None:
11373 # Iallocator will choose any node(s) in the same group
11374 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11376 group_nodes = frozenset([self.op.remote_node])
11378 # Determine nodes to be locked
11379 return set([self.op.node_name]) | group_nodes
11381 def _DetermineInstances(self):
11382 """Builds list of instances to operate on.
11385 assert self.op.mode in constants.NODE_EVAC_MODES
11387 if self.op.mode == constants.NODE_EVAC_PRI:
11388 # Primary instances only
11389 inst_fn = _GetNodePrimaryInstances
11390 assert self.op.remote_node is None, \
11391 "Evacuating primary instances requires iallocator"
11392 elif self.op.mode == constants.NODE_EVAC_SEC:
11393 # Secondary instances only
11394 inst_fn = _GetNodeSecondaryInstances
11397 assert self.op.mode == constants.NODE_EVAC_ALL
11398 inst_fn = _GetNodeInstances
11399 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11401 raise errors.OpPrereqError("Due to an issue with the iallocator"
11402 " interface it is not possible to evacuate"
11403 " all instances at once; specify explicitly"
11404 " whether to evacuate primary or secondary"
11406 errors.ECODE_INVAL)
11408 return inst_fn(self.cfg, self.op.node_name)
11410 def DeclareLocks(self, level):
11411 if level == locking.LEVEL_INSTANCE:
11412 # Lock instances optimistically, needs verification once node and group
11413 # locks have been acquired
11414 self.needed_locks[locking.LEVEL_INSTANCE] = \
11415 set(i.name for i in self._DetermineInstances())
11417 elif level == locking.LEVEL_NODEGROUP:
11418 # Lock node groups for all potential target nodes optimistically, needs
11419 # verification once nodes have been acquired
11420 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11421 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11423 elif level == locking.LEVEL_NODE:
11424 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11426 def CheckPrereq(self):
11428 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11429 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11430 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11432 need_nodes = self._DetermineNodes()
11434 if not owned_nodes.issuperset(need_nodes):
11435 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11436 " locks were acquired, current nodes are"
11437 " are '%s', used to be '%s'; retry the"
11439 (self.op.node_name,
11440 utils.CommaJoin(need_nodes),
11441 utils.CommaJoin(owned_nodes)),
11442 errors.ECODE_STATE)
11444 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11445 if owned_groups != wanted_groups:
11446 raise errors.OpExecError("Node groups changed since locks were acquired,"
11447 " current groups are '%s', used to be '%s';"
11448 " retry the operation" %
11449 (utils.CommaJoin(wanted_groups),
11450 utils.CommaJoin(owned_groups)))
11452 # Determine affected instances
11453 self.instances = self._DetermineInstances()
11454 self.instance_names = [i.name for i in self.instances]
11456 if set(self.instance_names) != owned_instances:
11457 raise errors.OpExecError("Instances on node '%s' changed since locks"
11458 " were acquired, current instances are '%s',"
11459 " used to be '%s'; retry the operation" %
11460 (self.op.node_name,
11461 utils.CommaJoin(self.instance_names),
11462 utils.CommaJoin(owned_instances)))
11464 if self.instance_names:
11465 self.LogInfo("Evacuating instances from node '%s': %s",
11467 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11469 self.LogInfo("No instances to evacuate from node '%s'",
11472 if self.op.remote_node is not None:
11473 for i in self.instances:
11474 if i.primary_node == self.op.remote_node:
11475 raise errors.OpPrereqError("Node %s is the primary node of"
11476 " instance %s, cannot use it as"
11478 (self.op.remote_node, i.name),
11479 errors.ECODE_INVAL)
11481 def Exec(self, feedback_fn):
11482 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11484 if not self.instance_names:
11485 # No instances to evacuate
11488 elif self.op.iallocator is not None:
11489 # TODO: Implement relocation to other group
11490 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11491 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11492 instances=list(self.instance_names))
11494 ial.Run(self.op.iallocator)
11496 if not ial.success:
11497 raise errors.OpPrereqError("Can't compute node evacuation using"
11498 " iallocator '%s': %s" %
11499 (self.op.iallocator, ial.info),
11500 errors.ECODE_NORES)
11502 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11504 elif self.op.remote_node is not None:
11505 assert self.op.mode == constants.NODE_EVAC_SEC
11507 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11508 remote_node=self.op.remote_node,
11510 mode=constants.REPLACE_DISK_CHG,
11511 early_release=self.op.early_release)]
11512 for instance_name in self.instance_names
11516 raise errors.ProgrammerError("No iallocator or remote node")
11518 return ResultWithJobs(jobs)
11521 def _SetOpEarlyRelease(early_release, op):
11522 """Sets C{early_release} flag on opcodes if available.
11526 op.early_release = early_release
11527 except AttributeError:
11528 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11533 def _NodeEvacDest(use_nodes, group, nodes):
11534 """Returns group or nodes depending on caller's choice.
11538 return utils.CommaJoin(nodes)
11543 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11544 """Unpacks the result of change-group and node-evacuate iallocator requests.
11546 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11547 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11549 @type lu: L{LogicalUnit}
11550 @param lu: Logical unit instance
11551 @type alloc_result: tuple/list
11552 @param alloc_result: Result from iallocator
11553 @type early_release: bool
11554 @param early_release: Whether to release locks early if possible
11555 @type use_nodes: bool
11556 @param use_nodes: Whether to display node names instead of groups
11559 (moved, failed, jobs) = alloc_result
11562 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11563 for (name, reason) in failed)
11564 lu.LogWarning("Unable to evacuate instances %s", failreason)
11565 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11568 lu.LogInfo("Instances to be moved: %s",
11569 utils.CommaJoin("%s (to %s)" %
11570 (name, _NodeEvacDest(use_nodes, group, nodes))
11571 for (name, group, nodes) in moved))
11573 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11574 map(opcodes.OpCode.LoadOpCode, ops))
11578 class LUInstanceGrowDisk(LogicalUnit):
11579 """Grow a disk of an instance.
11582 HPATH = "disk-grow"
11583 HTYPE = constants.HTYPE_INSTANCE
11586 def ExpandNames(self):
11587 self._ExpandAndLockInstance()
11588 self.needed_locks[locking.LEVEL_NODE] = []
11589 self.needed_locks[locking.LEVEL_NODE_RES] = []
11590 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11591 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11593 def DeclareLocks(self, level):
11594 if level == locking.LEVEL_NODE:
11595 self._LockInstancesNodes()
11596 elif level == locking.LEVEL_NODE_RES:
11598 self.needed_locks[locking.LEVEL_NODE_RES] = \
11599 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11601 def BuildHooksEnv(self):
11602 """Build hooks env.
11604 This runs on the master, the primary and all the secondaries.
11608 "DISK": self.op.disk,
11609 "AMOUNT": self.op.amount,
11610 "ABSOLUTE": self.op.absolute,
11612 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11615 def BuildHooksNodes(self):
11616 """Build hooks nodes.
11619 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11622 def CheckPrereq(self):
11623 """Check prerequisites.
11625 This checks that the instance is in the cluster.
11628 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11629 assert instance is not None, \
11630 "Cannot retrieve locked instance %s" % self.op.instance_name
11631 nodenames = list(instance.all_nodes)
11632 for node in nodenames:
11633 _CheckNodeOnline(self, node)
11635 self.instance = instance
11637 if instance.disk_template not in constants.DTS_GROWABLE:
11638 raise errors.OpPrereqError("Instance's disk layout does not support"
11639 " growing", errors.ECODE_INVAL)
11641 self.disk = instance.FindDisk(self.op.disk)
11643 if self.op.absolute:
11644 self.target = self.op.amount
11645 self.delta = self.target - self.disk.size
11647 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11648 "current disk size (%s)" %
11649 (utils.FormatUnit(self.target, "h"),
11650 utils.FormatUnit(self.disk.size, "h")),
11651 errors.ECODE_STATE)
11653 self.delta = self.op.amount
11654 self.target = self.disk.size + self.delta
11656 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11657 utils.FormatUnit(self.delta, "h"),
11658 errors.ECODE_INVAL)
11660 if instance.disk_template not in (constants.DT_FILE,
11661 constants.DT_SHARED_FILE,
11663 # TODO: check the free disk space for file, when that feature will be
11665 _CheckNodesFreeDiskPerVG(self, nodenames,
11666 self.disk.ComputeGrowth(self.delta))
11668 def Exec(self, feedback_fn):
11669 """Execute disk grow.
11672 instance = self.instance
11675 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11676 assert (self.owned_locks(locking.LEVEL_NODE) ==
11677 self.owned_locks(locking.LEVEL_NODE_RES))
11679 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11681 raise errors.OpExecError("Cannot activate block device to grow")
11683 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11684 (self.op.disk, instance.name,
11685 utils.FormatUnit(self.delta, "h"),
11686 utils.FormatUnit(self.target, "h")))
11688 # First run all grow ops in dry-run mode
11689 for node in instance.all_nodes:
11690 self.cfg.SetDiskID(disk, node)
11691 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11693 result.Raise("Grow request failed to node %s" % node)
11695 # We know that (as far as we can test) operations across different
11696 # nodes will succeed, time to run it for real
11697 for node in instance.all_nodes:
11698 self.cfg.SetDiskID(disk, node)
11699 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11701 result.Raise("Grow request failed to node %s" % node)
11703 # TODO: Rewrite code to work properly
11704 # DRBD goes into sync mode for a short amount of time after executing the
11705 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11706 # calling "resize" in sync mode fails. Sleeping for a short amount of
11707 # time is a work-around.
11710 disk.RecordGrow(self.delta)
11711 self.cfg.Update(instance, feedback_fn)
11713 # Changes have been recorded, release node lock
11714 _ReleaseLocks(self, locking.LEVEL_NODE)
11716 # Downgrade lock while waiting for sync
11717 self.glm.downgrade(locking.LEVEL_INSTANCE)
11719 if self.op.wait_for_sync:
11720 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11722 self.proc.LogWarning("Disk sync-ing has not returned a good"
11723 " status; please check the instance")
11724 if instance.admin_state != constants.ADMINST_UP:
11725 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11726 elif instance.admin_state != constants.ADMINST_UP:
11727 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11728 " not supposed to be running because no wait for"
11729 " sync mode was requested")
11731 assert self.owned_locks(locking.LEVEL_NODE_RES)
11732 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11735 class LUInstanceQueryData(NoHooksLU):
11736 """Query runtime instance data.
11741 def ExpandNames(self):
11742 self.needed_locks = {}
11744 # Use locking if requested or when non-static information is wanted
11745 if not (self.op.static or self.op.use_locking):
11746 self.LogWarning("Non-static data requested, locks need to be acquired")
11747 self.op.use_locking = True
11749 if self.op.instances or not self.op.use_locking:
11750 # Expand instance names right here
11751 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11753 # Will use acquired locks
11754 self.wanted_names = None
11756 if self.op.use_locking:
11757 self.share_locks = _ShareAll()
11759 if self.wanted_names is None:
11760 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11762 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11764 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11765 self.needed_locks[locking.LEVEL_NODE] = []
11766 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11768 def DeclareLocks(self, level):
11769 if self.op.use_locking:
11770 if level == locking.LEVEL_NODEGROUP:
11771 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11773 # Lock all groups used by instances optimistically; this requires going
11774 # via the node before it's locked, requiring verification later on
11775 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11776 frozenset(group_uuid
11777 for instance_name in owned_instances
11779 self.cfg.GetInstanceNodeGroups(instance_name))
11781 elif level == locking.LEVEL_NODE:
11782 self._LockInstancesNodes()
11784 def CheckPrereq(self):
11785 """Check prerequisites.
11787 This only checks the optional instance list against the existing names.
11790 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11791 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11792 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11794 if self.wanted_names is None:
11795 assert self.op.use_locking, "Locking was not used"
11796 self.wanted_names = owned_instances
11798 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11800 if self.op.use_locking:
11801 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11804 assert not (owned_instances or owned_groups or owned_nodes)
11806 self.wanted_instances = instances.values()
11808 def _ComputeBlockdevStatus(self, node, instance, dev):
11809 """Returns the status of a block device
11812 if self.op.static or not node:
11815 self.cfg.SetDiskID(dev, node)
11817 result = self.rpc.call_blockdev_find(node, dev)
11821 result.Raise("Can't compute disk status for %s" % instance.name)
11823 status = result.payload
11827 return (status.dev_path, status.major, status.minor,
11828 status.sync_percent, status.estimated_time,
11829 status.is_degraded, status.ldisk_status)
11831 def _ComputeDiskStatus(self, instance, snode, dev):
11832 """Compute block device status.
11835 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11837 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11839 def _ComputeDiskStatusInner(self, instance, snode, dev):
11840 """Compute block device status.
11842 @attention: The device has to be annotated already.
11845 if dev.dev_type in constants.LDS_DRBD:
11846 # we change the snode then (otherwise we use the one passed in)
11847 if dev.logical_id[0] == instance.primary_node:
11848 snode = dev.logical_id[1]
11850 snode = dev.logical_id[0]
11852 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11854 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11857 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11864 "iv_name": dev.iv_name,
11865 "dev_type": dev.dev_type,
11866 "logical_id": dev.logical_id,
11867 "physical_id": dev.physical_id,
11868 "pstatus": dev_pstatus,
11869 "sstatus": dev_sstatus,
11870 "children": dev_children,
11875 def Exec(self, feedback_fn):
11876 """Gather and return data"""
11879 cluster = self.cfg.GetClusterInfo()
11881 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11882 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11884 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11885 for node in nodes.values()))
11887 group2name_fn = lambda uuid: groups[uuid].name
11889 for instance in self.wanted_instances:
11890 pnode = nodes[instance.primary_node]
11892 if self.op.static or pnode.offline:
11893 remote_state = None
11895 self.LogWarning("Primary node %s is marked offline, returning static"
11896 " information only for instance %s" %
11897 (pnode.name, instance.name))
11899 remote_info = self.rpc.call_instance_info(instance.primary_node,
11901 instance.hypervisor)
11902 remote_info.Raise("Error checking node %s" % instance.primary_node)
11903 remote_info = remote_info.payload
11904 if remote_info and "state" in remote_info:
11905 remote_state = "up"
11907 if instance.admin_state == constants.ADMINST_UP:
11908 remote_state = "down"
11910 remote_state = instance.admin_state
11912 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11915 snodes_group_uuids = [nodes[snode_name].group
11916 for snode_name in instance.secondary_nodes]
11918 result[instance.name] = {
11919 "name": instance.name,
11920 "config_state": instance.admin_state,
11921 "run_state": remote_state,
11922 "pnode": instance.primary_node,
11923 "pnode_group_uuid": pnode.group,
11924 "pnode_group_name": group2name_fn(pnode.group),
11925 "snodes": instance.secondary_nodes,
11926 "snodes_group_uuids": snodes_group_uuids,
11927 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11929 # this happens to be the same format used for hooks
11930 "nics": _NICListToTuple(self, instance.nics),
11931 "disk_template": instance.disk_template,
11933 "hypervisor": instance.hypervisor,
11934 "network_port": instance.network_port,
11935 "hv_instance": instance.hvparams,
11936 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11937 "be_instance": instance.beparams,
11938 "be_actual": cluster.FillBE(instance),
11939 "os_instance": instance.osparams,
11940 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11941 "serial_no": instance.serial_no,
11942 "mtime": instance.mtime,
11943 "ctime": instance.ctime,
11944 "uuid": instance.uuid,
11950 def PrepareContainerMods(mods, private_fn):
11951 """Prepares a list of container modifications by adding a private data field.
11953 @type mods: list of tuples; (operation, index, parameters)
11954 @param mods: List of modifications
11955 @type private_fn: callable or None
11956 @param private_fn: Callable for constructing a private data field for a
11961 if private_fn is None:
11966 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11969 #: Type description for changes as returned by L{ApplyContainerMods}'s
11971 _TApplyContModsCbChanges = \
11972 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11973 ht.TNonEmptyString,
11978 def ApplyContainerMods(kind, container, chgdesc, mods,
11979 create_fn, modify_fn, remove_fn):
11980 """Applies descriptions in C{mods} to C{container}.
11983 @param kind: One-word item description
11984 @type container: list
11985 @param container: Container to modify
11986 @type chgdesc: None or list
11987 @param chgdesc: List of applied changes
11989 @param mods: Modifications as returned by L{PrepareContainerMods}
11990 @type create_fn: callable
11991 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11992 receives absolute item index, parameters and private data object as added
11993 by L{PrepareContainerMods}, returns tuple containing new item and changes
11995 @type modify_fn: callable
11996 @param modify_fn: Callback for modifying an existing item
11997 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11998 and private data object as added by L{PrepareContainerMods}, returns
12000 @type remove_fn: callable
12001 @param remove_fn: Callback on removing item; receives absolute item index,
12002 item and private data object as added by L{PrepareContainerMods}
12005 for (op, idx, params, private) in mods:
12008 absidx = len(container) - 1
12010 raise IndexError("Not accepting negative indices other than -1")
12011 elif idx > len(container):
12012 raise IndexError("Got %s index %s, but there are only %s" %
12013 (kind, idx, len(container)))
12019 if op == constants.DDM_ADD:
12020 # Calculate where item will be added
12022 addidx = len(container)
12026 if create_fn is None:
12029 (item, changes) = create_fn(addidx, params, private)
12032 container.append(item)
12035 assert idx <= len(container)
12036 # list.insert does so before the specified index
12037 container.insert(idx, item)
12039 # Retrieve existing item
12041 item = container[absidx]
12043 raise IndexError("Invalid %s index %s" % (kind, idx))
12045 if op == constants.DDM_REMOVE:
12048 if remove_fn is not None:
12049 remove_fn(absidx, item, private)
12051 changes = [("%s/%s" % (kind, absidx), "remove")]
12053 assert container[absidx] == item
12054 del container[absidx]
12055 elif op == constants.DDM_MODIFY:
12056 if modify_fn is not None:
12057 changes = modify_fn(absidx, item, params, private)
12059 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12061 assert _TApplyContModsCbChanges(changes)
12063 if not (chgdesc is None or changes is None):
12064 chgdesc.extend(changes)
12067 def _UpdateIvNames(base_index, disks):
12068 """Updates the C{iv_name} attribute of disks.
12070 @type disks: list of L{objects.Disk}
12073 for (idx, disk) in enumerate(disks):
12074 disk.iv_name = "disk/%s" % (base_index + idx, )
12077 class _InstNicModPrivate:
12078 """Data structure for network interface modifications.
12080 Used by L{LUInstanceSetParams}.
12083 def __init__(self):
12088 class LUInstanceSetParams(LogicalUnit):
12089 """Modifies an instances's parameters.
12092 HPATH = "instance-modify"
12093 HTYPE = constants.HTYPE_INSTANCE
12097 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12098 assert ht.TList(mods)
12099 assert not mods or len(mods[0]) in (2, 3)
12101 if mods and len(mods[0]) == 2:
12105 for op, params in mods:
12106 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12107 result.append((op, -1, params))
12111 raise errors.OpPrereqError("Only one %s add or remove operation is"
12112 " supported at a time" % kind,
12113 errors.ECODE_INVAL)
12115 result.append((constants.DDM_MODIFY, op, params))
12117 assert verify_fn(result)
12124 def _CheckMods(kind, mods, key_types, item_fn):
12125 """Ensures requested disk/NIC modifications are valid.
12128 for (op, _, params) in mods:
12129 assert ht.TDict(params)
12131 utils.ForceDictType(params, key_types)
12133 if op == constants.DDM_REMOVE:
12135 raise errors.OpPrereqError("No settings should be passed when"
12136 " removing a %s" % kind,
12137 errors.ECODE_INVAL)
12138 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12139 item_fn(op, params)
12141 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12144 def _VerifyDiskModification(op, params):
12145 """Verifies a disk modification.
12148 if op == constants.DDM_ADD:
12149 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12150 if mode not in constants.DISK_ACCESS_SET:
12151 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12152 errors.ECODE_INVAL)
12154 size = params.get(constants.IDISK_SIZE, None)
12156 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12157 constants.IDISK_SIZE, errors.ECODE_INVAL)
12161 except (TypeError, ValueError), err:
12162 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12163 errors.ECODE_INVAL)
12165 params[constants.IDISK_SIZE] = size
12167 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12168 raise errors.OpPrereqError("Disk size change not possible, use"
12169 " grow-disk", errors.ECODE_INVAL)
12172 def _VerifyNicModification(op, params):
12173 """Verifies a network interface modification.
12176 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12177 ip = params.get(constants.INIC_IP, None)
12180 elif ip.lower() == constants.VALUE_NONE:
12181 params[constants.INIC_IP] = None
12182 elif not netutils.IPAddress.IsValid(ip):
12183 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12184 errors.ECODE_INVAL)
12186 bridge = params.get("bridge", None)
12187 link = params.get(constants.INIC_LINK, None)
12188 if bridge and link:
12189 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12190 " at the same time", errors.ECODE_INVAL)
12191 elif bridge and bridge.lower() == constants.VALUE_NONE:
12192 params["bridge"] = None
12193 elif link and link.lower() == constants.VALUE_NONE:
12194 params[constants.INIC_LINK] = None
12196 if op == constants.DDM_ADD:
12197 macaddr = params.get(constants.INIC_MAC, None)
12198 if macaddr is None:
12199 params[constants.INIC_MAC] = constants.VALUE_AUTO
12201 if constants.INIC_MAC in params:
12202 macaddr = params[constants.INIC_MAC]
12203 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12204 macaddr = utils.NormalizeAndValidateMac(macaddr)
12206 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12207 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12208 " modifying an existing NIC",
12209 errors.ECODE_INVAL)
12211 def CheckArguments(self):
12212 if not (self.op.nics or self.op.disks or self.op.disk_template or
12213 self.op.hvparams or self.op.beparams or self.op.os_name or
12214 self.op.offline is not None or self.op.runtime_mem):
12215 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12217 if self.op.hvparams:
12218 _CheckGlobalHvParams(self.op.hvparams)
12221 self._UpgradeDiskNicMods("disk", self.op.disks,
12222 opcodes.OpInstanceSetParams.TestDiskModifications)
12224 self._UpgradeDiskNicMods("NIC", self.op.nics,
12225 opcodes.OpInstanceSetParams.TestNicModifications)
12227 # Check disk modifications
12228 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12229 self._VerifyDiskModification)
12231 if self.op.disks and self.op.disk_template is not None:
12232 raise errors.OpPrereqError("Disk template conversion and other disk"
12233 " changes not supported at the same time",
12234 errors.ECODE_INVAL)
12236 if (self.op.disk_template and
12237 self.op.disk_template in constants.DTS_INT_MIRROR and
12238 self.op.remote_node is None):
12239 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12240 " one requires specifying a secondary node",
12241 errors.ECODE_INVAL)
12243 # Check NIC modifications
12244 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12245 self._VerifyNicModification)
12247 def ExpandNames(self):
12248 self._ExpandAndLockInstance()
12249 # Can't even acquire node locks in shared mode as upcoming changes in
12250 # Ganeti 2.6 will start to modify the node object on disk conversion
12251 self.needed_locks[locking.LEVEL_NODE] = []
12252 self.needed_locks[locking.LEVEL_NODE_RES] = []
12253 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12255 def DeclareLocks(self, level):
12256 # TODO: Acquire group lock in shared mode (disk parameters)
12257 if level == locking.LEVEL_NODE:
12258 self._LockInstancesNodes()
12259 if self.op.disk_template and self.op.remote_node:
12260 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12261 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12262 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12264 self.needed_locks[locking.LEVEL_NODE_RES] = \
12265 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12267 def BuildHooksEnv(self):
12268 """Build hooks env.
12270 This runs on the master, primary and secondaries.
12274 if constants.BE_MINMEM in self.be_new:
12275 args["minmem"] = self.be_new[constants.BE_MINMEM]
12276 if constants.BE_MAXMEM in self.be_new:
12277 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12278 if constants.BE_VCPUS in self.be_new:
12279 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12280 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12281 # information at all.
12283 if self._new_nics is not None:
12286 for nic in self._new_nics:
12287 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12288 mode = nicparams[constants.NIC_MODE]
12289 link = nicparams[constants.NIC_LINK]
12290 nics.append((nic.ip, nic.mac, mode, link))
12292 args["nics"] = nics
12294 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12295 if self.op.disk_template:
12296 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12297 if self.op.runtime_mem:
12298 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12302 def BuildHooksNodes(self):
12303 """Build hooks nodes.
12306 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12309 def _PrepareNicModification(self, params, private, old_ip, old_params,
12311 update_params_dict = dict([(key, params[key])
12312 for key in constants.NICS_PARAMETERS
12315 if "bridge" in params:
12316 update_params_dict[constants.NIC_LINK] = params["bridge"]
12318 new_params = _GetUpdatedParams(old_params, update_params_dict)
12319 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12321 new_filled_params = cluster.SimpleFillNIC(new_params)
12322 objects.NIC.CheckParameterSyntax(new_filled_params)
12324 new_mode = new_filled_params[constants.NIC_MODE]
12325 if new_mode == constants.NIC_MODE_BRIDGED:
12326 bridge = new_filled_params[constants.NIC_LINK]
12327 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12329 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12331 self.warn.append(msg)
12333 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12335 elif new_mode == constants.NIC_MODE_ROUTED:
12336 ip = params.get(constants.INIC_IP, old_ip)
12338 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12339 " on a routed NIC", errors.ECODE_INVAL)
12341 if constants.INIC_MAC in params:
12342 mac = params[constants.INIC_MAC]
12344 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12345 errors.ECODE_INVAL)
12346 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12347 # otherwise generate the MAC address
12348 params[constants.INIC_MAC] = \
12349 self.cfg.GenerateMAC(self.proc.GetECId())
12351 # or validate/reserve the current one
12353 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12354 except errors.ReservationError:
12355 raise errors.OpPrereqError("MAC address '%s' already in use"
12356 " in cluster" % mac,
12357 errors.ECODE_NOTUNIQUE)
12359 private.params = new_params
12360 private.filled = new_filled_params
12362 def CheckPrereq(self):
12363 """Check prerequisites.
12365 This only checks the instance list against the existing names.
12368 # checking the new params on the primary/secondary nodes
12370 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12371 cluster = self.cluster = self.cfg.GetClusterInfo()
12372 assert self.instance is not None, \
12373 "Cannot retrieve locked instance %s" % self.op.instance_name
12374 pnode = instance.primary_node
12375 nodelist = list(instance.all_nodes)
12376 pnode_info = self.cfg.GetNodeInfo(pnode)
12377 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12379 # Prepare disk/NIC modifications
12380 self.diskmod = PrepareContainerMods(self.op.disks, None)
12381 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12384 if self.op.os_name and not self.op.force:
12385 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12386 self.op.force_variant)
12387 instance_os = self.op.os_name
12389 instance_os = instance.os
12391 assert not (self.op.disk_template and self.op.disks), \
12392 "Can't modify disk template and apply disk changes at the same time"
12394 if self.op.disk_template:
12395 if instance.disk_template == self.op.disk_template:
12396 raise errors.OpPrereqError("Instance already has disk template %s" %
12397 instance.disk_template, errors.ECODE_INVAL)
12399 if (instance.disk_template,
12400 self.op.disk_template) not in self._DISK_CONVERSIONS:
12401 raise errors.OpPrereqError("Unsupported disk template conversion from"
12402 " %s to %s" % (instance.disk_template,
12403 self.op.disk_template),
12404 errors.ECODE_INVAL)
12405 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12406 msg="cannot change disk template")
12407 if self.op.disk_template in constants.DTS_INT_MIRROR:
12408 if self.op.remote_node == pnode:
12409 raise errors.OpPrereqError("Given new secondary node %s is the same"
12410 " as the primary node of the instance" %
12411 self.op.remote_node, errors.ECODE_STATE)
12412 _CheckNodeOnline(self, self.op.remote_node)
12413 _CheckNodeNotDrained(self, self.op.remote_node)
12414 # FIXME: here we assume that the old instance type is DT_PLAIN
12415 assert instance.disk_template == constants.DT_PLAIN
12416 disks = [{constants.IDISK_SIZE: d.size,
12417 constants.IDISK_VG: d.logical_id[0]}
12418 for d in instance.disks]
12419 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12420 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12422 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12423 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12424 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12425 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12426 ignore=self.op.ignore_ipolicy)
12427 if pnode_info.group != snode_info.group:
12428 self.LogWarning("The primary and secondary nodes are in two"
12429 " different node groups; the disk parameters"
12430 " from the first disk's node group will be"
12433 # hvparams processing
12434 if self.op.hvparams:
12435 hv_type = instance.hypervisor
12436 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12437 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12438 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12441 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12442 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12443 self.hv_proposed = self.hv_new = hv_new # the new actual values
12444 self.hv_inst = i_hvdict # the new dict (without defaults)
12446 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12448 self.hv_new = self.hv_inst = {}
12450 # beparams processing
12451 if self.op.beparams:
12452 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12454 objects.UpgradeBeParams(i_bedict)
12455 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12456 be_new = cluster.SimpleFillBE(i_bedict)
12457 self.be_proposed = self.be_new = be_new # the new actual values
12458 self.be_inst = i_bedict # the new dict (without defaults)
12460 self.be_new = self.be_inst = {}
12461 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12462 be_old = cluster.FillBE(instance)
12464 # CPU param validation -- checking every time a parameter is
12465 # changed to cover all cases where either CPU mask or vcpus have
12467 if (constants.BE_VCPUS in self.be_proposed and
12468 constants.HV_CPU_MASK in self.hv_proposed):
12470 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12471 # Verify mask is consistent with number of vCPUs. Can skip this
12472 # test if only 1 entry in the CPU mask, which means same mask
12473 # is applied to all vCPUs.
12474 if (len(cpu_list) > 1 and
12475 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12476 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12478 (self.be_proposed[constants.BE_VCPUS],
12479 self.hv_proposed[constants.HV_CPU_MASK]),
12480 errors.ECODE_INVAL)
12482 # Only perform this test if a new CPU mask is given
12483 if constants.HV_CPU_MASK in self.hv_new:
12484 # Calculate the largest CPU number requested
12485 max_requested_cpu = max(map(max, cpu_list))
12486 # Check that all of the instance's nodes have enough physical CPUs to
12487 # satisfy the requested CPU mask
12488 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12489 max_requested_cpu + 1, instance.hypervisor)
12491 # osparams processing
12492 if self.op.osparams:
12493 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12494 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12495 self.os_inst = i_osdict # the new dict (without defaults)
12501 #TODO(dynmem): do the appropriate check involving MINMEM
12502 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12503 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12504 mem_check_list = [pnode]
12505 if be_new[constants.BE_AUTO_BALANCE]:
12506 # either we changed auto_balance to yes or it was from before
12507 mem_check_list.extend(instance.secondary_nodes)
12508 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12509 instance.hypervisor)
12510 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12511 [instance.hypervisor])
12512 pninfo = nodeinfo[pnode]
12513 msg = pninfo.fail_msg
12515 # Assume the primary node is unreachable and go ahead
12516 self.warn.append("Can't get info from primary node %s: %s" %
12519 (_, _, (pnhvinfo, )) = pninfo.payload
12520 if not isinstance(pnhvinfo.get("memory_free", None), int):
12521 self.warn.append("Node data from primary node %s doesn't contain"
12522 " free memory information" % pnode)
12523 elif instance_info.fail_msg:
12524 self.warn.append("Can't get instance runtime information: %s" %
12525 instance_info.fail_msg)
12527 if instance_info.payload:
12528 current_mem = int(instance_info.payload["memory"])
12530 # Assume instance not running
12531 # (there is a slight race condition here, but it's not very
12532 # probable, and we have no other way to check)
12533 # TODO: Describe race condition
12535 #TODO(dynmem): do the appropriate check involving MINMEM
12536 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12537 pnhvinfo["memory_free"])
12539 raise errors.OpPrereqError("This change will prevent the instance"
12540 " from starting, due to %d MB of memory"
12541 " missing on its primary node" %
12543 errors.ECODE_NORES)
12545 if be_new[constants.BE_AUTO_BALANCE]:
12546 for node, nres in nodeinfo.items():
12547 if node not in instance.secondary_nodes:
12549 nres.Raise("Can't get info from secondary node %s" % node,
12550 prereq=True, ecode=errors.ECODE_STATE)
12551 (_, _, (nhvinfo, )) = nres.payload
12552 if not isinstance(nhvinfo.get("memory_free", None), int):
12553 raise errors.OpPrereqError("Secondary node %s didn't return free"
12554 " memory information" % node,
12555 errors.ECODE_STATE)
12556 #TODO(dynmem): do the appropriate check involving MINMEM
12557 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12558 raise errors.OpPrereqError("This change will prevent the instance"
12559 " from failover to its secondary node"
12560 " %s, due to not enough memory" % node,
12561 errors.ECODE_STATE)
12563 if self.op.runtime_mem:
12564 remote_info = self.rpc.call_instance_info(instance.primary_node,
12566 instance.hypervisor)
12567 remote_info.Raise("Error checking node %s" % instance.primary_node)
12568 if not remote_info.payload: # not running already
12569 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12570 errors.ECODE_STATE)
12572 current_memory = remote_info.payload["memory"]
12573 if (not self.op.force and
12574 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12575 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12576 raise errors.OpPrereqError("Instance %s must have memory between %d"
12577 " and %d MB of memory unless --force is"
12578 " given" % (instance.name,
12579 self.be_proposed[constants.BE_MINMEM],
12580 self.be_proposed[constants.BE_MAXMEM]),
12581 errors.ECODE_INVAL)
12583 if self.op.runtime_mem > current_memory:
12584 _CheckNodeFreeMemory(self, instance.primary_node,
12585 "ballooning memory for instance %s" %
12587 self.op.memory - current_memory,
12588 instance.hypervisor)
12590 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12591 raise errors.OpPrereqError("Disk operations not supported for"
12592 " diskless instances",
12593 errors.ECODE_INVAL)
12595 def _PrepareNicCreate(_, params, private):
12596 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12597 return (None, None)
12599 def _PrepareNicMod(_, nic, params, private):
12600 self._PrepareNicModification(params, private, nic.ip,
12601 nic.nicparams, cluster, pnode)
12604 # Verify NIC changes (operating on copy)
12605 nics = instance.nics[:]
12606 ApplyContainerMods("NIC", nics, None, self.nicmod,
12607 _PrepareNicCreate, _PrepareNicMod, None)
12608 if len(nics) > constants.MAX_NICS:
12609 raise errors.OpPrereqError("Instance has too many network interfaces"
12610 " (%d), cannot add more" % constants.MAX_NICS,
12611 errors.ECODE_STATE)
12613 # Verify disk changes (operating on a copy)
12614 disks = instance.disks[:]
12615 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12616 if len(disks) > constants.MAX_DISKS:
12617 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12618 " more" % constants.MAX_DISKS,
12619 errors.ECODE_STATE)
12621 if self.op.offline is not None:
12622 if self.op.offline:
12623 msg = "can't change to offline"
12625 msg = "can't change to online"
12626 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12628 # Pre-compute NIC changes (necessary to use result in hooks)
12629 self._nic_chgdesc = []
12631 # Operate on copies as this is still in prereq
12632 nics = [nic.Copy() for nic in instance.nics]
12633 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12634 self._CreateNewNic, self._ApplyNicMods, None)
12635 self._new_nics = nics
12637 self._new_nics = None
12639 def _ConvertPlainToDrbd(self, feedback_fn):
12640 """Converts an instance from plain to drbd.
12643 feedback_fn("Converting template to drbd")
12644 instance = self.instance
12645 pnode = instance.primary_node
12646 snode = self.op.remote_node
12648 assert instance.disk_template == constants.DT_PLAIN
12650 # create a fake disk info for _GenerateDiskTemplate
12651 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12652 constants.IDISK_VG: d.logical_id[0]}
12653 for d in instance.disks]
12654 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12655 instance.name, pnode, [snode],
12656 disk_info, None, None, 0, feedback_fn,
12658 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12660 info = _GetInstanceInfoText(instance)
12661 feedback_fn("Creating additional volumes...")
12662 # first, create the missing data and meta devices
12663 for disk in anno_disks:
12664 # unfortunately this is... not too nice
12665 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12667 for child in disk.children:
12668 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12669 # at this stage, all new LVs have been created, we can rename the
12671 feedback_fn("Renaming original volumes...")
12672 rename_list = [(o, n.children[0].logical_id)
12673 for (o, n) in zip(instance.disks, new_disks)]
12674 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12675 result.Raise("Failed to rename original LVs")
12677 feedback_fn("Initializing DRBD devices...")
12678 # all child devices are in place, we can now create the DRBD devices
12679 for disk in anno_disks:
12680 for node in [pnode, snode]:
12681 f_create = node == pnode
12682 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12684 # at this point, the instance has been modified
12685 instance.disk_template = constants.DT_DRBD8
12686 instance.disks = new_disks
12687 self.cfg.Update(instance, feedback_fn)
12689 # Release node locks while waiting for sync
12690 _ReleaseLocks(self, locking.LEVEL_NODE)
12692 # disks are created, waiting for sync
12693 disk_abort = not _WaitForSync(self, instance,
12694 oneshot=not self.op.wait_for_sync)
12696 raise errors.OpExecError("There are some degraded disks for"
12697 " this instance, please cleanup manually")
12699 # Node resource locks will be released by caller
12701 def _ConvertDrbdToPlain(self, feedback_fn):
12702 """Converts an instance from drbd to plain.
12705 instance = self.instance
12707 assert len(instance.secondary_nodes) == 1
12708 assert instance.disk_template == constants.DT_DRBD8
12710 pnode = instance.primary_node
12711 snode = instance.secondary_nodes[0]
12712 feedback_fn("Converting template to plain")
12714 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12715 new_disks = [d.children[0] for d in instance.disks]
12717 # copy over size and mode
12718 for parent, child in zip(old_disks, new_disks):
12719 child.size = parent.size
12720 child.mode = parent.mode
12722 # this is a DRBD disk, return its port to the pool
12723 # NOTE: this must be done right before the call to cfg.Update!
12724 for disk in old_disks:
12725 tcp_port = disk.logical_id[2]
12726 self.cfg.AddTcpUdpPort(tcp_port)
12728 # update instance structure
12729 instance.disks = new_disks
12730 instance.disk_template = constants.DT_PLAIN
12731 self.cfg.Update(instance, feedback_fn)
12733 # Release locks in case removing disks takes a while
12734 _ReleaseLocks(self, locking.LEVEL_NODE)
12736 feedback_fn("Removing volumes on the secondary node...")
12737 for disk in old_disks:
12738 self.cfg.SetDiskID(disk, snode)
12739 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12741 self.LogWarning("Could not remove block device %s on node %s,"
12742 " continuing anyway: %s", disk.iv_name, snode, msg)
12744 feedback_fn("Removing unneeded volumes on the primary node...")
12745 for idx, disk in enumerate(old_disks):
12746 meta = disk.children[1]
12747 self.cfg.SetDiskID(meta, pnode)
12748 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12750 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12751 " continuing anyway: %s", idx, pnode, msg)
12753 def _CreateNewDisk(self, idx, params, _):
12754 """Creates a new disk.
12757 instance = self.instance
12760 if instance.disk_template in constants.DTS_FILEBASED:
12761 (file_driver, file_path) = instance.disks[0].logical_id
12762 file_path = os.path.dirname(file_path)
12764 file_driver = file_path = None
12767 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12768 instance.primary_node, instance.secondary_nodes,
12769 [params], file_path, file_driver, idx,
12770 self.Log, self.diskparams)[0]
12772 info = _GetInstanceInfoText(instance)
12774 logging.info("Creating volume %s for instance %s",
12775 disk.iv_name, instance.name)
12776 # Note: this needs to be kept in sync with _CreateDisks
12778 for node in instance.all_nodes:
12779 f_create = (node == instance.primary_node)
12781 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12782 except errors.OpExecError, err:
12783 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12784 disk.iv_name, disk, node, err)
12787 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12791 def _ModifyDisk(idx, disk, params, _):
12792 """Modifies a disk.
12795 disk.mode = params[constants.IDISK_MODE]
12798 ("disk.mode/%d" % idx, disk.mode),
12801 def _RemoveDisk(self, idx, root, _):
12805 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12806 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12807 self.cfg.SetDiskID(disk, node)
12808 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12810 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12811 " continuing anyway", idx, node, msg)
12813 # if this is a DRBD disk, return its port to the pool
12814 if root.dev_type in constants.LDS_DRBD:
12815 self.cfg.AddTcpUdpPort(root.logical_id[2])
12818 def _CreateNewNic(idx, params, private):
12819 """Creates data structure for a new network interface.
12822 mac = params[constants.INIC_MAC]
12823 ip = params.get(constants.INIC_IP, None)
12824 nicparams = private.params
12826 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12828 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12829 (mac, ip, private.filled[constants.NIC_MODE],
12830 private.filled[constants.NIC_LINK])),
12834 def _ApplyNicMods(idx, nic, params, private):
12835 """Modifies a network interface.
12840 for key in [constants.INIC_MAC, constants.INIC_IP]:
12842 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12843 setattr(nic, key, params[key])
12846 nic.nicparams = private.params
12848 for (key, val) in params.items():
12849 changes.append(("nic.%s/%d" % (key, idx), val))
12853 def Exec(self, feedback_fn):
12854 """Modifies an instance.
12856 All parameters take effect only at the next restart of the instance.
12859 # Process here the warnings from CheckPrereq, as we don't have a
12860 # feedback_fn there.
12861 # TODO: Replace with self.LogWarning
12862 for warn in self.warn:
12863 feedback_fn("WARNING: %s" % warn)
12865 assert ((self.op.disk_template is None) ^
12866 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12867 "Not owning any node resource locks"
12870 instance = self.instance
12873 if self.op.runtime_mem:
12874 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12876 self.op.runtime_mem)
12877 rpcres.Raise("Cannot modify instance runtime memory")
12878 result.append(("runtime_memory", self.op.runtime_mem))
12880 # Apply disk changes
12881 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12882 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12883 _UpdateIvNames(0, instance.disks)
12885 if self.op.disk_template:
12887 check_nodes = set(instance.all_nodes)
12888 if self.op.remote_node:
12889 check_nodes.add(self.op.remote_node)
12890 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12891 owned = self.owned_locks(level)
12892 assert not (check_nodes - owned), \
12893 ("Not owning the correct locks, owning %r, expected at least %r" %
12894 (owned, check_nodes))
12896 r_shut = _ShutdownInstanceDisks(self, instance)
12898 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12899 " proceed with disk template conversion")
12900 mode = (instance.disk_template, self.op.disk_template)
12902 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12904 self.cfg.ReleaseDRBDMinors(instance.name)
12906 result.append(("disk_template", self.op.disk_template))
12908 assert instance.disk_template == self.op.disk_template, \
12909 ("Expected disk template '%s', found '%s'" %
12910 (self.op.disk_template, instance.disk_template))
12912 # Release node and resource locks if there are any (they might already have
12913 # been released during disk conversion)
12914 _ReleaseLocks(self, locking.LEVEL_NODE)
12915 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12917 # Apply NIC changes
12918 if self._new_nics is not None:
12919 instance.nics = self._new_nics
12920 result.extend(self._nic_chgdesc)
12923 if self.op.hvparams:
12924 instance.hvparams = self.hv_inst
12925 for key, val in self.op.hvparams.iteritems():
12926 result.append(("hv/%s" % key, val))
12929 if self.op.beparams:
12930 instance.beparams = self.be_inst
12931 for key, val in self.op.beparams.iteritems():
12932 result.append(("be/%s" % key, val))
12935 if self.op.os_name:
12936 instance.os = self.op.os_name
12939 if self.op.osparams:
12940 instance.osparams = self.os_inst
12941 for key, val in self.op.osparams.iteritems():
12942 result.append(("os/%s" % key, val))
12944 if self.op.offline is None:
12947 elif self.op.offline:
12948 # Mark instance as offline
12949 self.cfg.MarkInstanceOffline(instance.name)
12950 result.append(("admin_state", constants.ADMINST_OFFLINE))
12952 # Mark instance as online, but stopped
12953 self.cfg.MarkInstanceDown(instance.name)
12954 result.append(("admin_state", constants.ADMINST_DOWN))
12956 self.cfg.Update(instance, feedback_fn)
12958 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12959 self.owned_locks(locking.LEVEL_NODE)), \
12960 "All node locks should have been released by now"
12964 _DISK_CONVERSIONS = {
12965 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12966 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12970 class LUInstanceChangeGroup(LogicalUnit):
12971 HPATH = "instance-change-group"
12972 HTYPE = constants.HTYPE_INSTANCE
12975 def ExpandNames(self):
12976 self.share_locks = _ShareAll()
12977 self.needed_locks = {
12978 locking.LEVEL_NODEGROUP: [],
12979 locking.LEVEL_NODE: [],
12982 self._ExpandAndLockInstance()
12984 if self.op.target_groups:
12985 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12986 self.op.target_groups)
12988 self.req_target_uuids = None
12990 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12992 def DeclareLocks(self, level):
12993 if level == locking.LEVEL_NODEGROUP:
12994 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12996 if self.req_target_uuids:
12997 lock_groups = set(self.req_target_uuids)
12999 # Lock all groups used by instance optimistically; this requires going
13000 # via the node before it's locked, requiring verification later on
13001 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13002 lock_groups.update(instance_groups)
13004 # No target groups, need to lock all of them
13005 lock_groups = locking.ALL_SET
13007 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13009 elif level == locking.LEVEL_NODE:
13010 if self.req_target_uuids:
13011 # Lock all nodes used by instances
13012 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13013 self._LockInstancesNodes()
13015 # Lock all nodes in all potential target groups
13016 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13017 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13018 member_nodes = [node_name
13019 for group in lock_groups
13020 for node_name in self.cfg.GetNodeGroup(group).members]
13021 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13023 # Lock all nodes as all groups are potential targets
13024 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13026 def CheckPrereq(self):
13027 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13028 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13029 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13031 assert (self.req_target_uuids is None or
13032 owned_groups.issuperset(self.req_target_uuids))
13033 assert owned_instances == set([self.op.instance_name])
13035 # Get instance information
13036 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13038 # Check if node groups for locked instance are still correct
13039 assert owned_nodes.issuperset(self.instance.all_nodes), \
13040 ("Instance %s's nodes changed while we kept the lock" %
13041 self.op.instance_name)
13043 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13046 if self.req_target_uuids:
13047 # User requested specific target groups
13048 self.target_uuids = frozenset(self.req_target_uuids)
13050 # All groups except those used by the instance are potential targets
13051 self.target_uuids = owned_groups - inst_groups
13053 conflicting_groups = self.target_uuids & inst_groups
13054 if conflicting_groups:
13055 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13056 " used by the instance '%s'" %
13057 (utils.CommaJoin(conflicting_groups),
13058 self.op.instance_name),
13059 errors.ECODE_INVAL)
13061 if not self.target_uuids:
13062 raise errors.OpPrereqError("There are no possible target groups",
13063 errors.ECODE_INVAL)
13065 def BuildHooksEnv(self):
13066 """Build hooks env.
13069 assert self.target_uuids
13072 "TARGET_GROUPS": " ".join(self.target_uuids),
13075 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13079 def BuildHooksNodes(self):
13080 """Build hooks nodes.
13083 mn = self.cfg.GetMasterNode()
13084 return ([mn], [mn])
13086 def Exec(self, feedback_fn):
13087 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13089 assert instances == [self.op.instance_name], "Instance not locked"
13091 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13092 instances=instances, target_groups=list(self.target_uuids))
13094 ial.Run(self.op.iallocator)
13096 if not ial.success:
13097 raise errors.OpPrereqError("Can't compute solution for changing group of"
13098 " instance '%s' using iallocator '%s': %s" %
13099 (self.op.instance_name, self.op.iallocator,
13101 errors.ECODE_NORES)
13103 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13105 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13106 " instance '%s'", len(jobs), self.op.instance_name)
13108 return ResultWithJobs(jobs)
13111 class LUBackupQuery(NoHooksLU):
13112 """Query the exports list
13117 def CheckArguments(self):
13118 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13119 ["node", "export"], self.op.use_locking)
13121 def ExpandNames(self):
13122 self.expq.ExpandNames(self)
13124 def DeclareLocks(self, level):
13125 self.expq.DeclareLocks(self, level)
13127 def Exec(self, feedback_fn):
13130 for (node, expname) in self.expq.OldStyleQuery(self):
13131 if expname is None:
13132 result[node] = False
13134 result.setdefault(node, []).append(expname)
13139 class _ExportQuery(_QueryBase):
13140 FIELDS = query.EXPORT_FIELDS
13142 #: The node name is not a unique key for this query
13143 SORT_FIELD = "node"
13145 def ExpandNames(self, lu):
13146 lu.needed_locks = {}
13148 # The following variables interact with _QueryBase._GetNames
13150 self.wanted = _GetWantedNodes(lu, self.names)
13152 self.wanted = locking.ALL_SET
13154 self.do_locking = self.use_locking
13156 if self.do_locking:
13157 lu.share_locks = _ShareAll()
13158 lu.needed_locks = {
13159 locking.LEVEL_NODE: self.wanted,
13162 def DeclareLocks(self, lu, level):
13165 def _GetQueryData(self, lu):
13166 """Computes the list of nodes and their attributes.
13169 # Locking is not used
13171 assert not (compat.any(lu.glm.is_owned(level)
13172 for level in locking.LEVELS
13173 if level != locking.LEVEL_CLUSTER) or
13174 self.do_locking or self.use_locking)
13176 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13180 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13182 result.append((node, None))
13184 result.extend((node, expname) for expname in nres.payload)
13189 class LUBackupPrepare(NoHooksLU):
13190 """Prepares an instance for an export and returns useful information.
13195 def ExpandNames(self):
13196 self._ExpandAndLockInstance()
13198 def CheckPrereq(self):
13199 """Check prerequisites.
13202 instance_name = self.op.instance_name
13204 self.instance = self.cfg.GetInstanceInfo(instance_name)
13205 assert self.instance is not None, \
13206 "Cannot retrieve locked instance %s" % self.op.instance_name
13207 _CheckNodeOnline(self, self.instance.primary_node)
13209 self._cds = _GetClusterDomainSecret()
13211 def Exec(self, feedback_fn):
13212 """Prepares an instance for an export.
13215 instance = self.instance
13217 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13218 salt = utils.GenerateSecret(8)
13220 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13221 result = self.rpc.call_x509_cert_create(instance.primary_node,
13222 constants.RIE_CERT_VALIDITY)
13223 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13225 (name, cert_pem) = result.payload
13227 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13231 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13232 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13234 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13240 class LUBackupExport(LogicalUnit):
13241 """Export an instance to an image in the cluster.
13244 HPATH = "instance-export"
13245 HTYPE = constants.HTYPE_INSTANCE
13248 def CheckArguments(self):
13249 """Check the arguments.
13252 self.x509_key_name = self.op.x509_key_name
13253 self.dest_x509_ca_pem = self.op.destination_x509_ca
13255 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13256 if not self.x509_key_name:
13257 raise errors.OpPrereqError("Missing X509 key name for encryption",
13258 errors.ECODE_INVAL)
13260 if not self.dest_x509_ca_pem:
13261 raise errors.OpPrereqError("Missing destination X509 CA",
13262 errors.ECODE_INVAL)
13264 def ExpandNames(self):
13265 self._ExpandAndLockInstance()
13267 # Lock all nodes for local exports
13268 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13269 # FIXME: lock only instance primary and destination node
13271 # Sad but true, for now we have do lock all nodes, as we don't know where
13272 # the previous export might be, and in this LU we search for it and
13273 # remove it from its current node. In the future we could fix this by:
13274 # - making a tasklet to search (share-lock all), then create the
13275 # new one, then one to remove, after
13276 # - removing the removal operation altogether
13277 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13279 def DeclareLocks(self, level):
13280 """Last minute lock declaration."""
13281 # All nodes are locked anyway, so nothing to do here.
13283 def BuildHooksEnv(self):
13284 """Build hooks env.
13286 This will run on the master, primary node and target node.
13290 "EXPORT_MODE": self.op.mode,
13291 "EXPORT_NODE": self.op.target_node,
13292 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13293 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13294 # TODO: Generic function for boolean env variables
13295 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13298 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13302 def BuildHooksNodes(self):
13303 """Build hooks nodes.
13306 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13308 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13309 nl.append(self.op.target_node)
13313 def CheckPrereq(self):
13314 """Check prerequisites.
13316 This checks that the instance and node names are valid.
13319 instance_name = self.op.instance_name
13321 self.instance = self.cfg.GetInstanceInfo(instance_name)
13322 assert self.instance is not None, \
13323 "Cannot retrieve locked instance %s" % self.op.instance_name
13324 _CheckNodeOnline(self, self.instance.primary_node)
13326 if (self.op.remove_instance and
13327 self.instance.admin_state == constants.ADMINST_UP and
13328 not self.op.shutdown):
13329 raise errors.OpPrereqError("Can not remove instance without shutting it"
13332 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13333 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13334 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13335 assert self.dst_node is not None
13337 _CheckNodeOnline(self, self.dst_node.name)
13338 _CheckNodeNotDrained(self, self.dst_node.name)
13341 self.dest_disk_info = None
13342 self.dest_x509_ca = None
13344 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13345 self.dst_node = None
13347 if len(self.op.target_node) != len(self.instance.disks):
13348 raise errors.OpPrereqError(("Received destination information for %s"
13349 " disks, but instance %s has %s disks") %
13350 (len(self.op.target_node), instance_name,
13351 len(self.instance.disks)),
13352 errors.ECODE_INVAL)
13354 cds = _GetClusterDomainSecret()
13356 # Check X509 key name
13358 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13359 except (TypeError, ValueError), err:
13360 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13362 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13363 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13364 errors.ECODE_INVAL)
13366 # Load and verify CA
13368 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13369 except OpenSSL.crypto.Error, err:
13370 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13371 (err, ), errors.ECODE_INVAL)
13373 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13374 if errcode is not None:
13375 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13376 (msg, ), errors.ECODE_INVAL)
13378 self.dest_x509_ca = cert
13380 # Verify target information
13382 for idx, disk_data in enumerate(self.op.target_node):
13384 (host, port, magic) = \
13385 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13386 except errors.GenericError, err:
13387 raise errors.OpPrereqError("Target info for disk %s: %s" %
13388 (idx, err), errors.ECODE_INVAL)
13390 disk_info.append((host, port, magic))
13392 assert len(disk_info) == len(self.op.target_node)
13393 self.dest_disk_info = disk_info
13396 raise errors.ProgrammerError("Unhandled export mode %r" %
13399 # instance disk type verification
13400 # TODO: Implement export support for file-based disks
13401 for disk in self.instance.disks:
13402 if disk.dev_type == constants.LD_FILE:
13403 raise errors.OpPrereqError("Export not supported for instances with"
13404 " file-based disks", errors.ECODE_INVAL)
13406 def _CleanupExports(self, feedback_fn):
13407 """Removes exports of current instance from all other nodes.
13409 If an instance in a cluster with nodes A..D was exported to node C, its
13410 exports will be removed from the nodes A, B and D.
13413 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13415 nodelist = self.cfg.GetNodeList()
13416 nodelist.remove(self.dst_node.name)
13418 # on one-node clusters nodelist will be empty after the removal
13419 # if we proceed the backup would be removed because OpBackupQuery
13420 # substitutes an empty list with the full cluster node list.
13421 iname = self.instance.name
13423 feedback_fn("Removing old exports for instance %s" % iname)
13424 exportlist = self.rpc.call_export_list(nodelist)
13425 for node in exportlist:
13426 if exportlist[node].fail_msg:
13428 if iname in exportlist[node].payload:
13429 msg = self.rpc.call_export_remove(node, iname).fail_msg
13431 self.LogWarning("Could not remove older export for instance %s"
13432 " on node %s: %s", iname, node, msg)
13434 def Exec(self, feedback_fn):
13435 """Export an instance to an image in the cluster.
13438 assert self.op.mode in constants.EXPORT_MODES
13440 instance = self.instance
13441 src_node = instance.primary_node
13443 if self.op.shutdown:
13444 # shutdown the instance, but not the disks
13445 feedback_fn("Shutting down instance %s" % instance.name)
13446 result = self.rpc.call_instance_shutdown(src_node, instance,
13447 self.op.shutdown_timeout)
13448 # TODO: Maybe ignore failures if ignore_remove_failures is set
13449 result.Raise("Could not shutdown instance %s on"
13450 " node %s" % (instance.name, src_node))
13452 # set the disks ID correctly since call_instance_start needs the
13453 # correct drbd minor to create the symlinks
13454 for disk in instance.disks:
13455 self.cfg.SetDiskID(disk, src_node)
13457 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13460 # Activate the instance disks if we'exporting a stopped instance
13461 feedback_fn("Activating disks for %s" % instance.name)
13462 _StartInstanceDisks(self, instance, None)
13465 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13468 helper.CreateSnapshots()
13470 if (self.op.shutdown and
13471 instance.admin_state == constants.ADMINST_UP and
13472 not self.op.remove_instance):
13473 assert not activate_disks
13474 feedback_fn("Starting instance %s" % instance.name)
13475 result = self.rpc.call_instance_start(src_node,
13476 (instance, None, None), False)
13477 msg = result.fail_msg
13479 feedback_fn("Failed to start instance: %s" % msg)
13480 _ShutdownInstanceDisks(self, instance)
13481 raise errors.OpExecError("Could not start instance: %s" % msg)
13483 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13484 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13485 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13486 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13487 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13489 (key_name, _, _) = self.x509_key_name
13492 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13495 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13496 key_name, dest_ca_pem,
13501 # Check for backwards compatibility
13502 assert len(dresults) == len(instance.disks)
13503 assert compat.all(isinstance(i, bool) for i in dresults), \
13504 "Not all results are boolean: %r" % dresults
13508 feedback_fn("Deactivating disks for %s" % instance.name)
13509 _ShutdownInstanceDisks(self, instance)
13511 if not (compat.all(dresults) and fin_resu):
13514 failures.append("export finalization")
13515 if not compat.all(dresults):
13516 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13518 failures.append("disk export: disk(s) %s" % fdsk)
13520 raise errors.OpExecError("Export failed, errors in %s" %
13521 utils.CommaJoin(failures))
13523 # At this point, the export was successful, we can cleanup/finish
13525 # Remove instance if requested
13526 if self.op.remove_instance:
13527 feedback_fn("Removing instance %s" % instance.name)
13528 _RemoveInstance(self, feedback_fn, instance,
13529 self.op.ignore_remove_failures)
13531 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13532 self._CleanupExports(feedback_fn)
13534 return fin_resu, dresults
13537 class LUBackupRemove(NoHooksLU):
13538 """Remove exports related to the named instance.
13543 def ExpandNames(self):
13544 self.needed_locks = {}
13545 # We need all nodes to be locked in order for RemoveExport to work, but we
13546 # don't need to lock the instance itself, as nothing will happen to it (and
13547 # we can remove exports also for a removed instance)
13548 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13550 def Exec(self, feedback_fn):
13551 """Remove any export.
13554 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13555 # If the instance was not found we'll try with the name that was passed in.
13556 # This will only work if it was an FQDN, though.
13558 if not instance_name:
13560 instance_name = self.op.instance_name
13562 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13563 exportlist = self.rpc.call_export_list(locked_nodes)
13565 for node in exportlist:
13566 msg = exportlist[node].fail_msg
13568 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13570 if instance_name in exportlist[node].payload:
13572 result = self.rpc.call_export_remove(node, instance_name)
13573 msg = result.fail_msg
13575 logging.error("Could not remove export for instance %s"
13576 " on node %s: %s", instance_name, node, msg)
13578 if fqdn_warn and not found:
13579 feedback_fn("Export not found. If trying to remove an export belonging"
13580 " to a deleted instance please use its Fully Qualified"
13584 class LUGroupAdd(LogicalUnit):
13585 """Logical unit for creating node groups.
13588 HPATH = "group-add"
13589 HTYPE = constants.HTYPE_GROUP
13592 def ExpandNames(self):
13593 # We need the new group's UUID here so that we can create and acquire the
13594 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13595 # that it should not check whether the UUID exists in the configuration.
13596 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13597 self.needed_locks = {}
13598 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13600 def CheckPrereq(self):
13601 """Check prerequisites.
13603 This checks that the given group name is not an existing node group
13608 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13609 except errors.OpPrereqError:
13612 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13613 " node group (UUID: %s)" %
13614 (self.op.group_name, existing_uuid),
13615 errors.ECODE_EXISTS)
13617 if self.op.ndparams:
13618 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13620 if self.op.hv_state:
13621 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13623 self.new_hv_state = None
13625 if self.op.disk_state:
13626 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13628 self.new_disk_state = None
13630 if self.op.diskparams:
13631 for templ in constants.DISK_TEMPLATES:
13632 if templ in self.op.diskparams:
13633 utils.ForceDictType(self.op.diskparams[templ],
13634 constants.DISK_DT_TYPES)
13635 self.new_diskparams = self.op.diskparams
13637 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13638 except errors.OpPrereqError, err:
13639 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13640 errors.ECODE_INVAL)
13642 self.new_diskparams = {}
13644 if self.op.ipolicy:
13645 cluster = self.cfg.GetClusterInfo()
13646 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13648 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13649 except errors.ConfigurationError, err:
13650 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13651 errors.ECODE_INVAL)
13653 def BuildHooksEnv(self):
13654 """Build hooks env.
13658 "GROUP_NAME": self.op.group_name,
13661 def BuildHooksNodes(self):
13662 """Build hooks nodes.
13665 mn = self.cfg.GetMasterNode()
13666 return ([mn], [mn])
13668 def Exec(self, feedback_fn):
13669 """Add the node group to the cluster.
13672 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13673 uuid=self.group_uuid,
13674 alloc_policy=self.op.alloc_policy,
13675 ndparams=self.op.ndparams,
13676 diskparams=self.new_diskparams,
13677 ipolicy=self.op.ipolicy,
13678 hv_state_static=self.new_hv_state,
13679 disk_state_static=self.new_disk_state)
13681 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13682 del self.remove_locks[locking.LEVEL_NODEGROUP]
13685 class LUGroupAssignNodes(NoHooksLU):
13686 """Logical unit for assigning nodes to groups.
13691 def ExpandNames(self):
13692 # These raise errors.OpPrereqError on their own:
13693 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13694 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13696 # We want to lock all the affected nodes and groups. We have readily
13697 # available the list of nodes, and the *destination* group. To gather the
13698 # list of "source" groups, we need to fetch node information later on.
13699 self.needed_locks = {
13700 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13701 locking.LEVEL_NODE: self.op.nodes,
13704 def DeclareLocks(self, level):
13705 if level == locking.LEVEL_NODEGROUP:
13706 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13708 # Try to get all affected nodes' groups without having the group or node
13709 # lock yet. Needs verification later in the code flow.
13710 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13712 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13714 def CheckPrereq(self):
13715 """Check prerequisites.
13718 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13719 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13720 frozenset(self.op.nodes))
13722 expected_locks = (set([self.group_uuid]) |
13723 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13724 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13725 if actual_locks != expected_locks:
13726 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13727 " current groups are '%s', used to be '%s'" %
13728 (utils.CommaJoin(expected_locks),
13729 utils.CommaJoin(actual_locks)))
13731 self.node_data = self.cfg.GetAllNodesInfo()
13732 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13733 instance_data = self.cfg.GetAllInstancesInfo()
13735 if self.group is None:
13736 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13737 (self.op.group_name, self.group_uuid))
13739 (new_splits, previous_splits) = \
13740 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13741 for node in self.op.nodes],
13742 self.node_data, instance_data)
13745 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13747 if not self.op.force:
13748 raise errors.OpExecError("The following instances get split by this"
13749 " change and --force was not given: %s" %
13752 self.LogWarning("This operation will split the following instances: %s",
13755 if previous_splits:
13756 self.LogWarning("In addition, these already-split instances continue"
13757 " to be split across groups: %s",
13758 utils.CommaJoin(utils.NiceSort(previous_splits)))
13760 def Exec(self, feedback_fn):
13761 """Assign nodes to a new group.
13764 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13766 self.cfg.AssignGroupNodes(mods)
13769 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13770 """Check for split instances after a node assignment.
13772 This method considers a series of node assignments as an atomic operation,
13773 and returns information about split instances after applying the set of
13776 In particular, it returns information about newly split instances, and
13777 instances that were already split, and remain so after the change.
13779 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13782 @type changes: list of (node_name, new_group_uuid) pairs.
13783 @param changes: list of node assignments to consider.
13784 @param node_data: a dict with data for all nodes
13785 @param instance_data: a dict with all instances to consider
13786 @rtype: a two-tuple
13787 @return: a list of instances that were previously okay and result split as a
13788 consequence of this change, and a list of instances that were previously
13789 split and this change does not fix.
13792 changed_nodes = dict((node, group) for node, group in changes
13793 if node_data[node].group != group)
13795 all_split_instances = set()
13796 previously_split_instances = set()
13798 def InstanceNodes(instance):
13799 return [instance.primary_node] + list(instance.secondary_nodes)
13801 for inst in instance_data.values():
13802 if inst.disk_template not in constants.DTS_INT_MIRROR:
13805 instance_nodes = InstanceNodes(inst)
13807 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13808 previously_split_instances.add(inst.name)
13810 if len(set(changed_nodes.get(node, node_data[node].group)
13811 for node in instance_nodes)) > 1:
13812 all_split_instances.add(inst.name)
13814 return (list(all_split_instances - previously_split_instances),
13815 list(previously_split_instances & all_split_instances))
13818 class _GroupQuery(_QueryBase):
13819 FIELDS = query.GROUP_FIELDS
13821 def ExpandNames(self, lu):
13822 lu.needed_locks = {}
13824 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13825 self._cluster = lu.cfg.GetClusterInfo()
13826 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13829 self.wanted = [name_to_uuid[name]
13830 for name in utils.NiceSort(name_to_uuid.keys())]
13832 # Accept names to be either names or UUIDs.
13835 all_uuid = frozenset(self._all_groups.keys())
13837 for name in self.names:
13838 if name in all_uuid:
13839 self.wanted.append(name)
13840 elif name in name_to_uuid:
13841 self.wanted.append(name_to_uuid[name])
13843 missing.append(name)
13846 raise errors.OpPrereqError("Some groups do not exist: %s" %
13847 utils.CommaJoin(missing),
13848 errors.ECODE_NOENT)
13850 def DeclareLocks(self, lu, level):
13853 def _GetQueryData(self, lu):
13854 """Computes the list of node groups and their attributes.
13857 do_nodes = query.GQ_NODE in self.requested_data
13858 do_instances = query.GQ_INST in self.requested_data
13860 group_to_nodes = None
13861 group_to_instances = None
13863 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13864 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13865 # latter GetAllInstancesInfo() is not enough, for we have to go through
13866 # instance->node. Hence, we will need to process nodes even if we only need
13867 # instance information.
13868 if do_nodes or do_instances:
13869 all_nodes = lu.cfg.GetAllNodesInfo()
13870 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13873 for node in all_nodes.values():
13874 if node.group in group_to_nodes:
13875 group_to_nodes[node.group].append(node.name)
13876 node_to_group[node.name] = node.group
13879 all_instances = lu.cfg.GetAllInstancesInfo()
13880 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13882 for instance in all_instances.values():
13883 node = instance.primary_node
13884 if node in node_to_group:
13885 group_to_instances[node_to_group[node]].append(instance.name)
13888 # Do not pass on node information if it was not requested.
13889 group_to_nodes = None
13891 return query.GroupQueryData(self._cluster,
13892 [self._all_groups[uuid]
13893 for uuid in self.wanted],
13894 group_to_nodes, group_to_instances,
13895 query.GQ_DISKPARAMS in self.requested_data)
13898 class LUGroupQuery(NoHooksLU):
13899 """Logical unit for querying node groups.
13904 def CheckArguments(self):
13905 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13906 self.op.output_fields, False)
13908 def ExpandNames(self):
13909 self.gq.ExpandNames(self)
13911 def DeclareLocks(self, level):
13912 self.gq.DeclareLocks(self, level)
13914 def Exec(self, feedback_fn):
13915 return self.gq.OldStyleQuery(self)
13918 class LUGroupSetParams(LogicalUnit):
13919 """Modifies the parameters of a node group.
13922 HPATH = "group-modify"
13923 HTYPE = constants.HTYPE_GROUP
13926 def CheckArguments(self):
13929 self.op.diskparams,
13930 self.op.alloc_policy,
13932 self.op.disk_state,
13936 if all_changes.count(None) == len(all_changes):
13937 raise errors.OpPrereqError("Please pass at least one modification",
13938 errors.ECODE_INVAL)
13940 def ExpandNames(self):
13941 # This raises errors.OpPrereqError on its own:
13942 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13944 self.needed_locks = {
13945 locking.LEVEL_INSTANCE: [],
13946 locking.LEVEL_NODEGROUP: [self.group_uuid],
13949 self.share_locks[locking.LEVEL_INSTANCE] = 1
13951 def DeclareLocks(self, level):
13952 if level == locking.LEVEL_INSTANCE:
13953 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13955 # Lock instances optimistically, needs verification once group lock has
13957 self.needed_locks[locking.LEVEL_INSTANCE] = \
13958 self.cfg.GetNodeGroupInstances(self.group_uuid)
13961 def _UpdateAndVerifyDiskParams(old, new):
13962 """Updates and verifies disk parameters.
13965 new_params = _GetUpdatedParams(old, new)
13966 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13969 def CheckPrereq(self):
13970 """Check prerequisites.
13973 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13975 # Check if locked instances are still correct
13976 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13978 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13979 cluster = self.cfg.GetClusterInfo()
13981 if self.group is None:
13982 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13983 (self.op.group_name, self.group_uuid))
13985 if self.op.ndparams:
13986 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13987 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
13988 self.new_ndparams = new_ndparams
13990 if self.op.diskparams:
13991 diskparams = self.group.diskparams
13992 uavdp = self._UpdateAndVerifyDiskParams
13993 # For each disktemplate subdict update and verify the values
13994 new_diskparams = dict((dt,
13995 uavdp(diskparams.get(dt, {}),
13996 self.op.diskparams[dt]))
13997 for dt in constants.DISK_TEMPLATES
13998 if dt in self.op.diskparams)
13999 # As we've all subdicts of diskparams ready, lets merge the actual
14000 # dict with all updated subdicts
14001 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14003 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14004 except errors.OpPrereqError, err:
14005 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14006 errors.ECODE_INVAL)
14008 if self.op.hv_state:
14009 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14010 self.group.hv_state_static)
14012 if self.op.disk_state:
14013 self.new_disk_state = \
14014 _MergeAndVerifyDiskState(self.op.disk_state,
14015 self.group.disk_state_static)
14017 if self.op.ipolicy:
14018 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14022 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14023 inst_filter = lambda inst: inst.name in owned_instances
14024 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14026 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14028 new_ipolicy, instances)
14031 self.LogWarning("After the ipolicy change the following instances"
14032 " violate them: %s",
14033 utils.CommaJoin(violations))
14035 def BuildHooksEnv(self):
14036 """Build hooks env.
14040 "GROUP_NAME": self.op.group_name,
14041 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14044 def BuildHooksNodes(self):
14045 """Build hooks nodes.
14048 mn = self.cfg.GetMasterNode()
14049 return ([mn], [mn])
14051 def Exec(self, feedback_fn):
14052 """Modifies the node group.
14057 if self.op.ndparams:
14058 self.group.ndparams = self.new_ndparams
14059 result.append(("ndparams", str(self.group.ndparams)))
14061 if self.op.diskparams:
14062 self.group.diskparams = self.new_diskparams
14063 result.append(("diskparams", str(self.group.diskparams)))
14065 if self.op.alloc_policy:
14066 self.group.alloc_policy = self.op.alloc_policy
14068 if self.op.hv_state:
14069 self.group.hv_state_static = self.new_hv_state
14071 if self.op.disk_state:
14072 self.group.disk_state_static = self.new_disk_state
14074 if self.op.ipolicy:
14075 self.group.ipolicy = self.new_ipolicy
14077 self.cfg.Update(self.group, feedback_fn)
14081 class LUGroupRemove(LogicalUnit):
14082 HPATH = "group-remove"
14083 HTYPE = constants.HTYPE_GROUP
14086 def ExpandNames(self):
14087 # This will raises errors.OpPrereqError on its own:
14088 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14089 self.needed_locks = {
14090 locking.LEVEL_NODEGROUP: [self.group_uuid],
14093 def CheckPrereq(self):
14094 """Check prerequisites.
14096 This checks that the given group name exists as a node group, that is
14097 empty (i.e., contains no nodes), and that is not the last group of the
14101 # Verify that the group is empty.
14102 group_nodes = [node.name
14103 for node in self.cfg.GetAllNodesInfo().values()
14104 if node.group == self.group_uuid]
14107 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14109 (self.op.group_name,
14110 utils.CommaJoin(utils.NiceSort(group_nodes))),
14111 errors.ECODE_STATE)
14113 # Verify the cluster would not be left group-less.
14114 if len(self.cfg.GetNodeGroupList()) == 1:
14115 raise errors.OpPrereqError("Group '%s' is the only group,"
14116 " cannot be removed" %
14117 self.op.group_name,
14118 errors.ECODE_STATE)
14120 def BuildHooksEnv(self):
14121 """Build hooks env.
14125 "GROUP_NAME": self.op.group_name,
14128 def BuildHooksNodes(self):
14129 """Build hooks nodes.
14132 mn = self.cfg.GetMasterNode()
14133 return ([mn], [mn])
14135 def Exec(self, feedback_fn):
14136 """Remove the node group.
14140 self.cfg.RemoveNodeGroup(self.group_uuid)
14141 except errors.ConfigurationError:
14142 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14143 (self.op.group_name, self.group_uuid))
14145 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14148 class LUGroupRename(LogicalUnit):
14149 HPATH = "group-rename"
14150 HTYPE = constants.HTYPE_GROUP
14153 def ExpandNames(self):
14154 # This raises errors.OpPrereqError on its own:
14155 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14157 self.needed_locks = {
14158 locking.LEVEL_NODEGROUP: [self.group_uuid],
14161 def CheckPrereq(self):
14162 """Check prerequisites.
14164 Ensures requested new name is not yet used.
14168 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14169 except errors.OpPrereqError:
14172 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14173 " node group (UUID: %s)" %
14174 (self.op.new_name, new_name_uuid),
14175 errors.ECODE_EXISTS)
14177 def BuildHooksEnv(self):
14178 """Build hooks env.
14182 "OLD_NAME": self.op.group_name,
14183 "NEW_NAME": self.op.new_name,
14186 def BuildHooksNodes(self):
14187 """Build hooks nodes.
14190 mn = self.cfg.GetMasterNode()
14192 all_nodes = self.cfg.GetAllNodesInfo()
14193 all_nodes.pop(mn, None)
14196 run_nodes.extend(node.name for node in all_nodes.values()
14197 if node.group == self.group_uuid)
14199 return (run_nodes, run_nodes)
14201 def Exec(self, feedback_fn):
14202 """Rename the node group.
14205 group = self.cfg.GetNodeGroup(self.group_uuid)
14208 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14209 (self.op.group_name, self.group_uuid))
14211 group.name = self.op.new_name
14212 self.cfg.Update(group, feedback_fn)
14214 return self.op.new_name
14217 class LUGroupEvacuate(LogicalUnit):
14218 HPATH = "group-evacuate"
14219 HTYPE = constants.HTYPE_GROUP
14222 def ExpandNames(self):
14223 # This raises errors.OpPrereqError on its own:
14224 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14226 if self.op.target_groups:
14227 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14228 self.op.target_groups)
14230 self.req_target_uuids = []
14232 if self.group_uuid in self.req_target_uuids:
14233 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14234 " as a target group (targets are %s)" %
14236 utils.CommaJoin(self.req_target_uuids)),
14237 errors.ECODE_INVAL)
14239 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14241 self.share_locks = _ShareAll()
14242 self.needed_locks = {
14243 locking.LEVEL_INSTANCE: [],
14244 locking.LEVEL_NODEGROUP: [],
14245 locking.LEVEL_NODE: [],
14248 def DeclareLocks(self, level):
14249 if level == locking.LEVEL_INSTANCE:
14250 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14252 # Lock instances optimistically, needs verification once node and group
14253 # locks have been acquired
14254 self.needed_locks[locking.LEVEL_INSTANCE] = \
14255 self.cfg.GetNodeGroupInstances(self.group_uuid)
14257 elif level == locking.LEVEL_NODEGROUP:
14258 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14260 if self.req_target_uuids:
14261 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14263 # Lock all groups used by instances optimistically; this requires going
14264 # via the node before it's locked, requiring verification later on
14265 lock_groups.update(group_uuid
14266 for instance_name in
14267 self.owned_locks(locking.LEVEL_INSTANCE)
14269 self.cfg.GetInstanceNodeGroups(instance_name))
14271 # No target groups, need to lock all of them
14272 lock_groups = locking.ALL_SET
14274 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14276 elif level == locking.LEVEL_NODE:
14277 # This will only lock the nodes in the group to be evacuated which
14278 # contain actual instances
14279 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14280 self._LockInstancesNodes()
14282 # Lock all nodes in group to be evacuated and target groups
14283 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14284 assert self.group_uuid in owned_groups
14285 member_nodes = [node_name
14286 for group in owned_groups
14287 for node_name in self.cfg.GetNodeGroup(group).members]
14288 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14290 def CheckPrereq(self):
14291 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14292 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14293 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14295 assert owned_groups.issuperset(self.req_target_uuids)
14296 assert self.group_uuid in owned_groups
14298 # Check if locked instances are still correct
14299 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14301 # Get instance information
14302 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14304 # Check if node groups for locked instances are still correct
14305 _CheckInstancesNodeGroups(self.cfg, self.instances,
14306 owned_groups, owned_nodes, self.group_uuid)
14308 if self.req_target_uuids:
14309 # User requested specific target groups
14310 self.target_uuids = self.req_target_uuids
14312 # All groups except the one to be evacuated are potential targets
14313 self.target_uuids = [group_uuid for group_uuid in owned_groups
14314 if group_uuid != self.group_uuid]
14316 if not self.target_uuids:
14317 raise errors.OpPrereqError("There are no possible target groups",
14318 errors.ECODE_INVAL)
14320 def BuildHooksEnv(self):
14321 """Build hooks env.
14325 "GROUP_NAME": self.op.group_name,
14326 "TARGET_GROUPS": " ".join(self.target_uuids),
14329 def BuildHooksNodes(self):
14330 """Build hooks nodes.
14333 mn = self.cfg.GetMasterNode()
14335 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14337 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14339 return (run_nodes, run_nodes)
14341 def Exec(self, feedback_fn):
14342 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14344 assert self.group_uuid not in self.target_uuids
14346 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14347 instances=instances, target_groups=self.target_uuids)
14349 ial.Run(self.op.iallocator)
14351 if not ial.success:
14352 raise errors.OpPrereqError("Can't compute group evacuation using"
14353 " iallocator '%s': %s" %
14354 (self.op.iallocator, ial.info),
14355 errors.ECODE_NORES)
14357 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14359 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14360 len(jobs), self.op.group_name)
14362 return ResultWithJobs(jobs)
14365 class TagsLU(NoHooksLU): # pylint: disable=W0223
14366 """Generic tags LU.
14368 This is an abstract class which is the parent of all the other tags LUs.
14371 def ExpandNames(self):
14372 self.group_uuid = None
14373 self.needed_locks = {}
14375 if self.op.kind == constants.TAG_NODE:
14376 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14377 lock_level = locking.LEVEL_NODE
14378 lock_name = self.op.name
14379 elif self.op.kind == constants.TAG_INSTANCE:
14380 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14381 lock_level = locking.LEVEL_INSTANCE
14382 lock_name = self.op.name
14383 elif self.op.kind == constants.TAG_NODEGROUP:
14384 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14385 lock_level = locking.LEVEL_NODEGROUP
14386 lock_name = self.group_uuid
14391 if lock_level and getattr(self.op, "use_locking", True):
14392 self.needed_locks[lock_level] = lock_name
14394 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14395 # not possible to acquire the BGL based on opcode parameters)
14397 def CheckPrereq(self):
14398 """Check prerequisites.
14401 if self.op.kind == constants.TAG_CLUSTER:
14402 self.target = self.cfg.GetClusterInfo()
14403 elif self.op.kind == constants.TAG_NODE:
14404 self.target = self.cfg.GetNodeInfo(self.op.name)
14405 elif self.op.kind == constants.TAG_INSTANCE:
14406 self.target = self.cfg.GetInstanceInfo(self.op.name)
14407 elif self.op.kind == constants.TAG_NODEGROUP:
14408 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14410 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14411 str(self.op.kind), errors.ECODE_INVAL)
14414 class LUTagsGet(TagsLU):
14415 """Returns the tags of a given object.
14420 def ExpandNames(self):
14421 TagsLU.ExpandNames(self)
14423 # Share locks as this is only a read operation
14424 self.share_locks = _ShareAll()
14426 def Exec(self, feedback_fn):
14427 """Returns the tag list.
14430 return list(self.target.GetTags())
14433 class LUTagsSearch(NoHooksLU):
14434 """Searches the tags for a given pattern.
14439 def ExpandNames(self):
14440 self.needed_locks = {}
14442 def CheckPrereq(self):
14443 """Check prerequisites.
14445 This checks the pattern passed for validity by compiling it.
14449 self.re = re.compile(self.op.pattern)
14450 except re.error, err:
14451 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14452 (self.op.pattern, err), errors.ECODE_INVAL)
14454 def Exec(self, feedback_fn):
14455 """Returns the tag list.
14459 tgts = [("/cluster", cfg.GetClusterInfo())]
14460 ilist = cfg.GetAllInstancesInfo().values()
14461 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14462 nlist = cfg.GetAllNodesInfo().values()
14463 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14464 tgts.extend(("/nodegroup/%s" % n.name, n)
14465 for n in cfg.GetAllNodeGroupsInfo().values())
14467 for path, target in tgts:
14468 for tag in target.GetTags():
14469 if self.re.search(tag):
14470 results.append((path, tag))
14474 class LUTagsSet(TagsLU):
14475 """Sets a tag on a given object.
14480 def CheckPrereq(self):
14481 """Check prerequisites.
14483 This checks the type and length of the tag name and value.
14486 TagsLU.CheckPrereq(self)
14487 for tag in self.op.tags:
14488 objects.TaggableObject.ValidateTag(tag)
14490 def Exec(self, feedback_fn):
14495 for tag in self.op.tags:
14496 self.target.AddTag(tag)
14497 except errors.TagError, err:
14498 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14499 self.cfg.Update(self.target, feedback_fn)
14502 class LUTagsDel(TagsLU):
14503 """Delete a list of tags from a given object.
14508 def CheckPrereq(self):
14509 """Check prerequisites.
14511 This checks that we have the given tag.
14514 TagsLU.CheckPrereq(self)
14515 for tag in self.op.tags:
14516 objects.TaggableObject.ValidateTag(tag)
14517 del_tags = frozenset(self.op.tags)
14518 cur_tags = self.target.GetTags()
14520 diff_tags = del_tags - cur_tags
14522 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14523 raise errors.OpPrereqError("Tag(s) %s not found" %
14524 (utils.CommaJoin(diff_names), ),
14525 errors.ECODE_NOENT)
14527 def Exec(self, feedback_fn):
14528 """Remove the tag from the object.
14531 for tag in self.op.tags:
14532 self.target.RemoveTag(tag)
14533 self.cfg.Update(self.target, feedback_fn)
14536 class LUTestDelay(NoHooksLU):
14537 """Sleep for a specified amount of time.
14539 This LU sleeps on the master and/or nodes for a specified amount of
14545 def ExpandNames(self):
14546 """Expand names and set required locks.
14548 This expands the node list, if any.
14551 self.needed_locks = {}
14552 if self.op.on_nodes:
14553 # _GetWantedNodes can be used here, but is not always appropriate to use
14554 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14555 # more information.
14556 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14557 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14559 def _TestDelay(self):
14560 """Do the actual sleep.
14563 if self.op.on_master:
14564 if not utils.TestDelay(self.op.duration):
14565 raise errors.OpExecError("Error during master delay test")
14566 if self.op.on_nodes:
14567 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14568 for node, node_result in result.items():
14569 node_result.Raise("Failure during rpc call to node %s" % node)
14571 def Exec(self, feedback_fn):
14572 """Execute the test delay opcode, with the wanted repetitions.
14575 if self.op.repeat == 0:
14578 top_value = self.op.repeat - 1
14579 for i in range(self.op.repeat):
14580 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14584 class LUTestJqueue(NoHooksLU):
14585 """Utility LU to test some aspects of the job queue.
14590 # Must be lower than default timeout for WaitForJobChange to see whether it
14591 # notices changed jobs
14592 _CLIENT_CONNECT_TIMEOUT = 20.0
14593 _CLIENT_CONFIRM_TIMEOUT = 60.0
14596 def _NotifyUsingSocket(cls, cb, errcls):
14597 """Opens a Unix socket and waits for another program to connect.
14600 @param cb: Callback to send socket name to client
14601 @type errcls: class
14602 @param errcls: Exception class to use for errors
14605 # Using a temporary directory as there's no easy way to create temporary
14606 # sockets without writing a custom loop around tempfile.mktemp and
14608 tmpdir = tempfile.mkdtemp()
14610 tmpsock = utils.PathJoin(tmpdir, "sock")
14612 logging.debug("Creating temporary socket at %s", tmpsock)
14613 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14618 # Send details to client
14621 # Wait for client to connect before continuing
14622 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14624 (conn, _) = sock.accept()
14625 except socket.error, err:
14626 raise errcls("Client didn't connect in time (%s)" % err)
14630 # Remove as soon as client is connected
14631 shutil.rmtree(tmpdir)
14633 # Wait for client to close
14636 # pylint: disable=E1101
14637 # Instance of '_socketobject' has no ... member
14638 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14640 except socket.error, err:
14641 raise errcls("Client failed to confirm notification (%s)" % err)
14645 def _SendNotification(self, test, arg, sockname):
14646 """Sends a notification to the client.
14649 @param test: Test name
14650 @param arg: Test argument (depends on test)
14651 @type sockname: string
14652 @param sockname: Socket path
14655 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14657 def _Notify(self, prereq, test, arg):
14658 """Notifies the client of a test.
14661 @param prereq: Whether this is a prereq-phase test
14663 @param test: Test name
14664 @param arg: Test argument (depends on test)
14668 errcls = errors.OpPrereqError
14670 errcls = errors.OpExecError
14672 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14676 def CheckArguments(self):
14677 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14678 self.expandnames_calls = 0
14680 def ExpandNames(self):
14681 checkargs_calls = getattr(self, "checkargs_calls", 0)
14682 if checkargs_calls < 1:
14683 raise errors.ProgrammerError("CheckArguments was not called")
14685 self.expandnames_calls += 1
14687 if self.op.notify_waitlock:
14688 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14690 self.LogInfo("Expanding names")
14692 # Get lock on master node (just to get a lock, not for a particular reason)
14693 self.needed_locks = {
14694 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14697 def Exec(self, feedback_fn):
14698 if self.expandnames_calls < 1:
14699 raise errors.ProgrammerError("ExpandNames was not called")
14701 if self.op.notify_exec:
14702 self._Notify(False, constants.JQT_EXEC, None)
14704 self.LogInfo("Executing")
14706 if self.op.log_messages:
14707 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14708 for idx, msg in enumerate(self.op.log_messages):
14709 self.LogInfo("Sending log message %s", idx + 1)
14710 feedback_fn(constants.JQT_MSGPREFIX + msg)
14711 # Report how many test messages have been sent
14712 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14715 raise errors.OpExecError("Opcode failure was requested")
14720 class IAllocator(object):
14721 """IAllocator framework.
14723 An IAllocator instance has three sets of attributes:
14724 - cfg that is needed to query the cluster
14725 - input data (all members of the _KEYS class attribute are required)
14726 - four buffer attributes (in|out_data|text), that represent the
14727 input (to the external script) in text and data structure format,
14728 and the output from it, again in two formats
14729 - the result variables from the script (success, info, nodes) for
14733 # pylint: disable=R0902
14734 # lots of instance attributes
14736 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14738 self.rpc = rpc_runner
14739 # init buffer variables
14740 self.in_text = self.out_text = self.in_data = self.out_data = None
14741 # init all input fields so that pylint is happy
14743 self.memory = self.disks = self.disk_template = self.spindle_use = None
14744 self.os = self.tags = self.nics = self.vcpus = None
14745 self.hypervisor = None
14746 self.relocate_from = None
14748 self.instances = None
14749 self.evac_mode = None
14750 self.target_groups = []
14752 self.required_nodes = None
14753 # init result fields
14754 self.success = self.info = self.result = None
14757 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14759 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14760 " IAllocator" % self.mode)
14762 keyset = [n for (n, _) in keydata]
14765 if key not in keyset:
14766 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14767 " IAllocator" % key)
14768 setattr(self, key, kwargs[key])
14771 if key not in kwargs:
14772 raise errors.ProgrammerError("Missing input parameter '%s' to"
14773 " IAllocator" % key)
14774 self._BuildInputData(compat.partial(fn, self), keydata)
14776 def _ComputeClusterData(self):
14777 """Compute the generic allocator input data.
14779 This is the data that is independent of the actual operation.
14783 cluster_info = cfg.GetClusterInfo()
14786 "version": constants.IALLOCATOR_VERSION,
14787 "cluster_name": cfg.GetClusterName(),
14788 "cluster_tags": list(cluster_info.GetTags()),
14789 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14790 "ipolicy": cluster_info.ipolicy,
14792 ninfo = cfg.GetAllNodesInfo()
14793 iinfo = cfg.GetAllInstancesInfo().values()
14794 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14797 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14799 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14800 hypervisor_name = self.hypervisor
14801 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14802 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14804 hypervisor_name = cluster_info.primary_hypervisor
14806 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14809 self.rpc.call_all_instances_info(node_list,
14810 cluster_info.enabled_hypervisors)
14812 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14814 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14815 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14816 i_list, config_ndata)
14817 assert len(data["nodes"]) == len(ninfo), \
14818 "Incomplete node data computed"
14820 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14822 self.in_data = data
14825 def _ComputeNodeGroupData(cfg):
14826 """Compute node groups data.
14829 cluster = cfg.GetClusterInfo()
14830 ng = dict((guuid, {
14831 "name": gdata.name,
14832 "alloc_policy": gdata.alloc_policy,
14833 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14835 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14840 def _ComputeBasicNodeData(cfg, node_cfg):
14841 """Compute global node data.
14844 @returns: a dict of name: (node dict, node config)
14847 # fill in static (config-based) values
14848 node_results = dict((ninfo.name, {
14849 "tags": list(ninfo.GetTags()),
14850 "primary_ip": ninfo.primary_ip,
14851 "secondary_ip": ninfo.secondary_ip,
14852 "offline": ninfo.offline,
14853 "drained": ninfo.drained,
14854 "master_candidate": ninfo.master_candidate,
14855 "group": ninfo.group,
14856 "master_capable": ninfo.master_capable,
14857 "vm_capable": ninfo.vm_capable,
14858 "ndparams": cfg.GetNdParams(ninfo),
14860 for ninfo in node_cfg.values())
14862 return node_results
14865 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14867 """Compute global node data.
14869 @param node_results: the basic node structures as filled from the config
14872 #TODO(dynmem): compute the right data on MAX and MIN memory
14873 # make a copy of the current dict
14874 node_results = dict(node_results)
14875 for nname, nresult in node_data.items():
14876 assert nname in node_results, "Missing basic data for node %s" % nname
14877 ninfo = node_cfg[nname]
14879 if not (ninfo.offline or ninfo.drained):
14880 nresult.Raise("Can't get data for node %s" % nname)
14881 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14883 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14885 for attr in ["memory_total", "memory_free", "memory_dom0",
14886 "vg_size", "vg_free", "cpu_total"]:
14887 if attr not in remote_info:
14888 raise errors.OpExecError("Node '%s' didn't return attribute"
14889 " '%s'" % (nname, attr))
14890 if not isinstance(remote_info[attr], int):
14891 raise errors.OpExecError("Node '%s' returned invalid value"
14893 (nname, attr, remote_info[attr]))
14894 # compute memory used by primary instances
14895 i_p_mem = i_p_up_mem = 0
14896 for iinfo, beinfo in i_list:
14897 if iinfo.primary_node == nname:
14898 i_p_mem += beinfo[constants.BE_MAXMEM]
14899 if iinfo.name not in node_iinfo[nname].payload:
14902 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14903 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14904 remote_info["memory_free"] -= max(0, i_mem_diff)
14906 if iinfo.admin_state == constants.ADMINST_UP:
14907 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14909 # compute memory used by instances
14911 "total_memory": remote_info["memory_total"],
14912 "reserved_memory": remote_info["memory_dom0"],
14913 "free_memory": remote_info["memory_free"],
14914 "total_disk": remote_info["vg_size"],
14915 "free_disk": remote_info["vg_free"],
14916 "total_cpus": remote_info["cpu_total"],
14917 "i_pri_memory": i_p_mem,
14918 "i_pri_up_memory": i_p_up_mem,
14920 pnr_dyn.update(node_results[nname])
14921 node_results[nname] = pnr_dyn
14923 return node_results
14926 def _ComputeInstanceData(cluster_info, i_list):
14927 """Compute global instance data.
14931 for iinfo, beinfo in i_list:
14933 for nic in iinfo.nics:
14934 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14938 "mode": filled_params[constants.NIC_MODE],
14939 "link": filled_params[constants.NIC_LINK],
14941 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14942 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14943 nic_data.append(nic_dict)
14945 "tags": list(iinfo.GetTags()),
14946 "admin_state": iinfo.admin_state,
14947 "vcpus": beinfo[constants.BE_VCPUS],
14948 "memory": beinfo[constants.BE_MAXMEM],
14949 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14951 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14953 "disks": [{constants.IDISK_SIZE: dsk.size,
14954 constants.IDISK_MODE: dsk.mode}
14955 for dsk in iinfo.disks],
14956 "disk_template": iinfo.disk_template,
14957 "hypervisor": iinfo.hypervisor,
14959 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14961 instance_data[iinfo.name] = pir
14963 return instance_data
14965 def _AddNewInstance(self):
14966 """Add new instance data to allocator structure.
14968 This in combination with _AllocatorGetClusterData will create the
14969 correct structure needed as input for the allocator.
14971 The checks for the completeness of the opcode must have already been
14975 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14977 if self.disk_template in constants.DTS_INT_MIRROR:
14978 self.required_nodes = 2
14980 self.required_nodes = 1
14984 "disk_template": self.disk_template,
14987 "vcpus": self.vcpus,
14988 "memory": self.memory,
14989 "spindle_use": self.spindle_use,
14990 "disks": self.disks,
14991 "disk_space_total": disk_space,
14993 "required_nodes": self.required_nodes,
14994 "hypervisor": self.hypervisor,
14999 def _AddRelocateInstance(self):
15000 """Add relocate instance data to allocator structure.
15002 This in combination with _IAllocatorGetClusterData will create the
15003 correct structure needed as input for the allocator.
15005 The checks for the completeness of the opcode must have already been
15009 instance = self.cfg.GetInstanceInfo(self.name)
15010 if instance is None:
15011 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15012 " IAllocator" % self.name)
15014 if instance.disk_template not in constants.DTS_MIRRORED:
15015 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15016 errors.ECODE_INVAL)
15018 if instance.disk_template in constants.DTS_INT_MIRROR and \
15019 len(instance.secondary_nodes) != 1:
15020 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15021 errors.ECODE_STATE)
15023 self.required_nodes = 1
15024 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15025 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15029 "disk_space_total": disk_space,
15030 "required_nodes": self.required_nodes,
15031 "relocate_from": self.relocate_from,
15035 def _AddNodeEvacuate(self):
15036 """Get data for node-evacuate requests.
15040 "instances": self.instances,
15041 "evac_mode": self.evac_mode,
15044 def _AddChangeGroup(self):
15045 """Get data for node-evacuate requests.
15049 "instances": self.instances,
15050 "target_groups": self.target_groups,
15053 def _BuildInputData(self, fn, keydata):
15054 """Build input data structures.
15057 self._ComputeClusterData()
15060 request["type"] = self.mode
15061 for keyname, keytype in keydata:
15062 if keyname not in request:
15063 raise errors.ProgrammerError("Request parameter %s is missing" %
15065 val = request[keyname]
15066 if not keytype(val):
15067 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15068 " validation, value %s, expected"
15069 " type %s" % (keyname, val, keytype))
15070 self.in_data["request"] = request
15072 self.in_text = serializer.Dump(self.in_data)
15074 _STRING_LIST = ht.TListOf(ht.TString)
15075 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15076 # pylint: disable=E1101
15077 # Class '...' has no 'OP_ID' member
15078 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15079 opcodes.OpInstanceMigrate.OP_ID,
15080 opcodes.OpInstanceReplaceDisks.OP_ID])
15084 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15085 ht.TItems([ht.TNonEmptyString,
15086 ht.TNonEmptyString,
15087 ht.TListOf(ht.TNonEmptyString),
15090 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15091 ht.TItems([ht.TNonEmptyString,
15094 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15095 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15098 constants.IALLOCATOR_MODE_ALLOC:
15101 ("name", ht.TString),
15102 ("memory", ht.TInt),
15103 ("spindle_use", ht.TInt),
15104 ("disks", ht.TListOf(ht.TDict)),
15105 ("disk_template", ht.TString),
15106 ("os", ht.TString),
15107 ("tags", _STRING_LIST),
15108 ("nics", ht.TListOf(ht.TDict)),
15109 ("vcpus", ht.TInt),
15110 ("hypervisor", ht.TString),
15112 constants.IALLOCATOR_MODE_RELOC:
15113 (_AddRelocateInstance,
15114 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15116 constants.IALLOCATOR_MODE_NODE_EVAC:
15117 (_AddNodeEvacuate, [
15118 ("instances", _STRING_LIST),
15119 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15121 constants.IALLOCATOR_MODE_CHG_GROUP:
15122 (_AddChangeGroup, [
15123 ("instances", _STRING_LIST),
15124 ("target_groups", _STRING_LIST),
15128 def Run(self, name, validate=True, call_fn=None):
15129 """Run an instance allocator and return the results.
15132 if call_fn is None:
15133 call_fn = self.rpc.call_iallocator_runner
15135 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15136 result.Raise("Failure while running the iallocator script")
15138 self.out_text = result.payload
15140 self._ValidateResult()
15142 def _ValidateResult(self):
15143 """Process the allocator results.
15145 This will process and if successful save the result in
15146 self.out_data and the other parameters.
15150 rdict = serializer.Load(self.out_text)
15151 except Exception, err:
15152 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15154 if not isinstance(rdict, dict):
15155 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15157 # TODO: remove backwards compatiblity in later versions
15158 if "nodes" in rdict and "result" not in rdict:
15159 rdict["result"] = rdict["nodes"]
15162 for key in "success", "info", "result":
15163 if key not in rdict:
15164 raise errors.OpExecError("Can't parse iallocator results:"
15165 " missing key '%s'" % key)
15166 setattr(self, key, rdict[key])
15168 if not self._result_check(self.result):
15169 raise errors.OpExecError("Iallocator returned invalid result,"
15170 " expected %s, got %s" %
15171 (self._result_check, self.result),
15172 errors.ECODE_INVAL)
15174 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15175 assert self.relocate_from is not None
15176 assert self.required_nodes == 1
15178 node2group = dict((name, ndata["group"])
15179 for (name, ndata) in self.in_data["nodes"].items())
15181 fn = compat.partial(self._NodesToGroups, node2group,
15182 self.in_data["nodegroups"])
15184 instance = self.cfg.GetInstanceInfo(self.name)
15185 request_groups = fn(self.relocate_from + [instance.primary_node])
15186 result_groups = fn(rdict["result"] + [instance.primary_node])
15188 if self.success and not set(result_groups).issubset(request_groups):
15189 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15190 " differ from original groups (%s)" %
15191 (utils.CommaJoin(result_groups),
15192 utils.CommaJoin(request_groups)))
15194 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15195 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15197 self.out_data = rdict
15200 def _NodesToGroups(node2group, groups, nodes):
15201 """Returns a list of unique group names for a list of nodes.
15203 @type node2group: dict
15204 @param node2group: Map from node name to group UUID
15206 @param groups: Group information
15208 @param nodes: Node names
15215 group_uuid = node2group[node]
15217 # Ignore unknown node
15221 group = groups[group_uuid]
15223 # Can't find group, let's use UUID
15224 group_name = group_uuid
15226 group_name = group["name"]
15228 result.add(group_name)
15230 return sorted(result)
15233 class LUTestAllocator(NoHooksLU):
15234 """Run allocator tests.
15236 This LU runs the allocator tests
15239 def CheckPrereq(self):
15240 """Check prerequisites.
15242 This checks the opcode parameters depending on the director and mode test.
15245 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15246 for attr in ["memory", "disks", "disk_template",
15247 "os", "tags", "nics", "vcpus"]:
15248 if not hasattr(self.op, attr):
15249 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15250 attr, errors.ECODE_INVAL)
15251 iname = self.cfg.ExpandInstanceName(self.op.name)
15252 if iname is not None:
15253 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15254 iname, errors.ECODE_EXISTS)
15255 if not isinstance(self.op.nics, list):
15256 raise errors.OpPrereqError("Invalid parameter 'nics'",
15257 errors.ECODE_INVAL)
15258 if not isinstance(self.op.disks, list):
15259 raise errors.OpPrereqError("Invalid parameter 'disks'",
15260 errors.ECODE_INVAL)
15261 for row in self.op.disks:
15262 if (not isinstance(row, dict) or
15263 constants.IDISK_SIZE not in row or
15264 not isinstance(row[constants.IDISK_SIZE], int) or
15265 constants.IDISK_MODE not in row or
15266 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15267 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15268 " parameter", errors.ECODE_INVAL)
15269 if self.op.hypervisor is None:
15270 self.op.hypervisor = self.cfg.GetHypervisorType()
15271 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15272 fname = _ExpandInstanceName(self.cfg, self.op.name)
15273 self.op.name = fname
15274 self.relocate_from = \
15275 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15276 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15277 constants.IALLOCATOR_MODE_NODE_EVAC):
15278 if not self.op.instances:
15279 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15280 self.op.instances = _GetWantedInstances(self, self.op.instances)
15282 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15283 self.op.mode, errors.ECODE_INVAL)
15285 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15286 if self.op.allocator is None:
15287 raise errors.OpPrereqError("Missing allocator name",
15288 errors.ECODE_INVAL)
15289 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15290 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15291 self.op.direction, errors.ECODE_INVAL)
15293 def Exec(self, feedback_fn):
15294 """Run the allocator test.
15297 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15298 ial = IAllocator(self.cfg, self.rpc,
15301 memory=self.op.memory,
15302 disks=self.op.disks,
15303 disk_template=self.op.disk_template,
15307 vcpus=self.op.vcpus,
15308 hypervisor=self.op.hypervisor,
15309 spindle_use=self.op.spindle_use,
15311 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15312 ial = IAllocator(self.cfg, self.rpc,
15315 relocate_from=list(self.relocate_from),
15317 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15318 ial = IAllocator(self.cfg, self.rpc,
15320 instances=self.op.instances,
15321 target_groups=self.op.target_groups)
15322 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15323 ial = IAllocator(self.cfg, self.rpc,
15325 instances=self.op.instances,
15326 evac_mode=self.op.evac_mode)
15328 raise errors.ProgrammerError("Uncatched mode %s in"
15329 " LUTestAllocator.Exec", self.op.mode)
15331 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15332 result = ial.in_text
15334 ial.Run(self.op.allocator, validate=False)
15335 result = ial.out_text
15339 #: Query type implementations
15341 constants.QR_CLUSTER: _ClusterQuery,
15342 constants.QR_INSTANCE: _InstanceQuery,
15343 constants.QR_NODE: _NodeQuery,
15344 constants.QR_GROUP: _GroupQuery,
15345 constants.QR_OS: _OsQuery,
15346 constants.QR_EXPORT: _ExportQuery,
15349 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15352 def _GetQueryImplementation(name):
15353 """Returns the implemtnation for a query type.
15355 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15359 return _QUERY_IMPL[name]
15361 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15362 errors.ECODE_INVAL)