4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _CopyLockList(names):
707 """Makes a copy of a list of lock names.
709 Handles L{locking.ALL_SET} correctly.
712 if names == locking.ALL_SET:
713 return locking.ALL_SET
718 def _GetWantedNodes(lu, nodes):
719 """Returns list of checked and expanded node names.
721 @type lu: L{LogicalUnit}
722 @param lu: the logical unit on whose behalf we execute
724 @param nodes: list of node names or None for all nodes
726 @return: the list of nodes, sorted
727 @raise errors.ProgrammerError: if the nodes parameter is wrong type
731 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
733 return utils.NiceSort(lu.cfg.GetNodeList())
736 def _GetWantedInstances(lu, instances):
737 """Returns list of checked and expanded instance names.
739 @type lu: L{LogicalUnit}
740 @param lu: the logical unit on whose behalf we execute
741 @type instances: list
742 @param instances: list of instance names or None for all instances
744 @return: the list of instances, sorted
745 @raise errors.OpPrereqError: if the instances parameter is wrong type
746 @raise errors.OpPrereqError: if any of the passed instances is not found
750 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
752 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
756 def _GetUpdatedParams(old_params, update_dict,
757 use_default=True, use_none=False):
758 """Return the new version of a parameter dictionary.
760 @type old_params: dict
761 @param old_params: old parameters
762 @type update_dict: dict
763 @param update_dict: dict containing new parameter values, or
764 constants.VALUE_DEFAULT to reset the parameter to its default
766 @param use_default: boolean
767 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
768 values as 'to be deleted' values
769 @param use_none: boolean
770 @type use_none: whether to recognise C{None} values as 'to be
773 @return: the new parameter dictionary
776 params_copy = copy.deepcopy(old_params)
777 for key, val in update_dict.iteritems():
778 if ((use_default and val == constants.VALUE_DEFAULT) or
779 (use_none and val is None)):
785 params_copy[key] = val
789 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
790 """Return the new version of a instance policy.
792 @param group_policy: whether this policy applies to a group and thus
793 we should support removal of policy entries
796 use_none = use_default = group_policy
797 ipolicy = copy.deepcopy(old_ipolicy)
798 for key, value in new_ipolicy.items():
799 if key not in constants.IPOLICY_ALL_KEYS:
800 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
802 if key in constants.IPOLICY_ISPECS:
803 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
804 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
806 use_default=use_default)
808 if (not value or value == [constants.VALUE_DEFAULT] or
809 value == constants.VALUE_DEFAULT):
813 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
814 " on the cluster'" % key,
817 if key in constants.IPOLICY_PARAMETERS:
818 # FIXME: we assume all such values are float
820 ipolicy[key] = float(value)
821 except (TypeError, ValueError), err:
822 raise errors.OpPrereqError("Invalid value for attribute"
823 " '%s': '%s', error: %s" %
824 (key, value, err), errors.ECODE_INVAL)
826 # FIXME: we assume all others are lists; this should be redone
828 ipolicy[key] = list(value)
830 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
831 except errors.ConfigurationError, err:
832 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
837 def _UpdateAndVerifySubDict(base, updates, type_check):
838 """Updates and verifies a dict with sub dicts of the same type.
840 @param base: The dict with the old data
841 @param updates: The dict with the new data
842 @param type_check: Dict suitable to ForceDictType to verify correct types
843 @returns: A new dict with updated and verified values
847 new = _GetUpdatedParams(old, value)
848 utils.ForceDictType(new, type_check)
851 ret = copy.deepcopy(base)
852 ret.update(dict((key, fn(base.get(key, {}), value))
853 for key, value in updates.items()))
857 def _MergeAndVerifyHvState(op_input, obj_input):
858 """Combines the hv state from an opcode with the one of the object
860 @param op_input: The input dict from the opcode
861 @param obj_input: The input dict from the objects
862 @return: The verified and updated dict
866 invalid_hvs = set(op_input) - constants.HYPER_TYPES
868 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
869 " %s" % utils.CommaJoin(invalid_hvs),
871 if obj_input is None:
873 type_check = constants.HVSTS_PARAMETER_TYPES
874 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
879 def _MergeAndVerifyDiskState(op_input, obj_input):
880 """Combines the disk state from an opcode with the one of the object
882 @param op_input: The input dict from the opcode
883 @param obj_input: The input dict from the objects
884 @return: The verified and updated dict
887 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
889 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
890 utils.CommaJoin(invalid_dst),
892 type_check = constants.DSS_PARAMETER_TYPES
893 if obj_input is None:
895 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
897 for key, value in op_input.items())
902 def _ReleaseLocks(lu, level, names=None, keep=None):
903 """Releases locks owned by an LU.
905 @type lu: L{LogicalUnit}
906 @param level: Lock level
907 @type names: list or None
908 @param names: Names of locks to release
909 @type keep: list or None
910 @param keep: Names of locks to retain
913 assert not (keep is not None and names is not None), \
914 "Only one of the 'names' and the 'keep' parameters can be given"
916 if names is not None:
917 should_release = names.__contains__
919 should_release = lambda name: name not in keep
921 should_release = None
923 owned = lu.owned_locks(level)
925 # Not owning any lock at this level, do nothing
932 # Determine which locks to release
934 if should_release(name):
939 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
941 # Release just some locks
942 lu.glm.release(level, names=release)
944 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
947 lu.glm.release(level)
949 assert not lu.glm.is_owned(level), "No locks should be owned"
952 def _MapInstanceDisksToNodes(instances):
953 """Creates a map from (node, volume) to instance name.
955 @type instances: list of L{objects.Instance}
956 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
959 return dict(((node, vol), inst.name)
960 for inst in instances
961 for (node, vols) in inst.MapLVsByNode().items()
965 def _RunPostHook(lu, node_name):
966 """Runs the post-hook for an opcode on a single node.
969 hm = lu.proc.BuildHooksManager(lu)
971 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
972 except Exception, err: # pylint: disable=W0703
973 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
976 def _CheckOutputFields(static, dynamic, selected):
977 """Checks whether all selected fields are valid.
979 @type static: L{utils.FieldSet}
980 @param static: static fields set
981 @type dynamic: L{utils.FieldSet}
982 @param dynamic: dynamic fields set
989 delta = f.NonMatching(selected)
991 raise errors.OpPrereqError("Unknown output fields selected: %s"
992 % ",".join(delta), errors.ECODE_INVAL)
995 def _CheckGlobalHvParams(params):
996 """Validates that given hypervisor params are not global ones.
998 This will ensure that instances don't get customised versions of
1002 used_globals = constants.HVC_GLOBALS.intersection(params)
1004 msg = ("The following hypervisor parameters are global and cannot"
1005 " be customized at instance level, please modify them at"
1006 " cluster level: %s" % utils.CommaJoin(used_globals))
1007 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1010 def _CheckNodeOnline(lu, node, msg=None):
1011 """Ensure that a given node is online.
1013 @param lu: the LU on behalf of which we make the check
1014 @param node: the node to check
1015 @param msg: if passed, should be a message to replace the default one
1016 @raise errors.OpPrereqError: if the node is offline
1020 msg = "Can't use offline node"
1021 if lu.cfg.GetNodeInfo(node).offline:
1022 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1025 def _CheckNodeNotDrained(lu, node):
1026 """Ensure that a given node is not drained.
1028 @param lu: the LU on behalf of which we make the check
1029 @param node: the node to check
1030 @raise errors.OpPrereqError: if the node is drained
1033 if lu.cfg.GetNodeInfo(node).drained:
1034 raise errors.OpPrereqError("Can't use drained node %s" % node,
1038 def _CheckNodeVmCapable(lu, node):
1039 """Ensure that a given node is vm capable.
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @raise errors.OpPrereqError: if the node is not vm capable
1046 if not lu.cfg.GetNodeInfo(node).vm_capable:
1047 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1051 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1052 """Ensure that a node supports a given OS.
1054 @param lu: the LU on behalf of which we make the check
1055 @param node: the node to check
1056 @param os_name: the OS to query about
1057 @param force_variant: whether to ignore variant errors
1058 @raise errors.OpPrereqError: if the node is not supporting the OS
1061 result = lu.rpc.call_os_get(node, os_name)
1062 result.Raise("OS '%s' not in supported OS list for node %s" %
1064 prereq=True, ecode=errors.ECODE_INVAL)
1065 if not force_variant:
1066 _CheckOSVariant(result.payload, os_name)
1069 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1070 """Ensure that a node has the given secondary ip.
1072 @type lu: L{LogicalUnit}
1073 @param lu: the LU on behalf of which we make the check
1075 @param node: the node to check
1076 @type secondary_ip: string
1077 @param secondary_ip: the ip to check
1078 @type prereq: boolean
1079 @param prereq: whether to throw a prerequisite or an execute error
1080 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1081 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1084 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1085 result.Raise("Failure checking secondary ip on node %s" % node,
1086 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1087 if not result.payload:
1088 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1089 " please fix and re-run this command" % secondary_ip)
1091 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1093 raise errors.OpExecError(msg)
1096 def _GetClusterDomainSecret():
1097 """Reads the cluster domain secret.
1100 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1104 def _CheckInstanceState(lu, instance, req_states, msg=None):
1105 """Ensure that an instance is in one of the required states.
1107 @param lu: the LU on behalf of which we make the check
1108 @param instance: the instance to check
1109 @param msg: if passed, should be a message to replace the default one
1110 @raise errors.OpPrereqError: if the instance is not in the required state
1114 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1115 if instance.admin_state not in req_states:
1116 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1117 (instance.name, instance.admin_state, msg),
1120 if constants.ADMINST_UP not in req_states:
1121 pnode = instance.primary_node
1122 if not lu.cfg.GetNodeInfo(pnode).offline:
1123 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1124 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1125 prereq=True, ecode=errors.ECODE_ENVIRON)
1126 if instance.name in ins_l.payload:
1127 raise errors.OpPrereqError("Instance %s is running, %s" %
1128 (instance.name, msg), errors.ECODE_STATE)
1130 lu.LogWarning("Primary node offline, ignoring check that instance"
1134 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1135 """Computes if value is in the desired range.
1137 @param name: name of the parameter for which we perform the check
1138 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1140 @param ipolicy: dictionary containing min, max and std values
1141 @param value: actual value that we want to use
1142 @return: None or element not meeting the criteria
1146 if value in [None, constants.VALUE_AUTO]:
1148 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1149 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1150 if value > max_v or min_v > value:
1152 fqn = "%s/%s" % (name, qualifier)
1155 return ("%s value %s is not in range [%s, %s]" %
1156 (fqn, value, min_v, max_v))
1160 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1161 nic_count, disk_sizes, spindle_use,
1162 _compute_fn=_ComputeMinMaxSpec):
1163 """Verifies ipolicy against provided specs.
1166 @param ipolicy: The ipolicy
1168 @param mem_size: The memory size
1169 @type cpu_count: int
1170 @param cpu_count: Used cpu cores
1171 @type disk_count: int
1172 @param disk_count: Number of disks used
1173 @type nic_count: int
1174 @param nic_count: Number of nics used
1175 @type disk_sizes: list of ints
1176 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1177 @type spindle_use: int
1178 @param spindle_use: The number of spindles this instance uses
1179 @param _compute_fn: The compute function (unittest only)
1180 @return: A list of violations, or an empty list of no violations are found
1183 assert disk_count == len(disk_sizes)
1186 (constants.ISPEC_MEM_SIZE, "", mem_size),
1187 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1188 (constants.ISPEC_DISK_COUNT, "", disk_count),
1189 (constants.ISPEC_NIC_COUNT, "", nic_count),
1190 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1191 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1192 for idx, d in enumerate(disk_sizes)]
1195 (_compute_fn(name, qualifier, ipolicy, value)
1196 for (name, qualifier, value) in test_settings))
1199 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1200 _compute_fn=_ComputeIPolicySpecViolation):
1201 """Compute if instance meets the specs of ipolicy.
1204 @param ipolicy: The ipolicy to verify against
1205 @type instance: L{objects.Instance}
1206 @param instance: The instance to verify
1207 @param _compute_fn: The function to verify ipolicy (unittest only)
1208 @see: L{_ComputeIPolicySpecViolation}
1211 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1212 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1213 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1214 disk_count = len(instance.disks)
1215 disk_sizes = [disk.size for disk in instance.disks]
1216 nic_count = len(instance.nics)
1218 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1219 disk_sizes, spindle_use)
1222 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1223 _compute_fn=_ComputeIPolicySpecViolation):
1224 """Compute if instance specs meets the specs of ipolicy.
1227 @param ipolicy: The ipolicy to verify against
1228 @param instance_spec: dict
1229 @param instance_spec: The instance spec to verify
1230 @param _compute_fn: The function to verify ipolicy (unittest only)
1231 @see: L{_ComputeIPolicySpecViolation}
1234 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1235 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1236 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1237 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1238 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1239 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1241 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1242 disk_sizes, spindle_use)
1245 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1247 _compute_fn=_ComputeIPolicyInstanceViolation):
1248 """Compute if instance meets the specs of the new target group.
1250 @param ipolicy: The ipolicy to verify
1251 @param instance: The instance object to verify
1252 @param current_group: The current group of the instance
1253 @param target_group: The new group of the instance
1254 @param _compute_fn: The function to verify ipolicy (unittest only)
1255 @see: L{_ComputeIPolicySpecViolation}
1258 if current_group == target_group:
1261 return _compute_fn(ipolicy, instance)
1264 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1265 _compute_fn=_ComputeIPolicyNodeViolation):
1266 """Checks that the target node is correct in terms of instance policy.
1268 @param ipolicy: The ipolicy to verify
1269 @param instance: The instance object to verify
1270 @param node: The new node to relocate
1271 @param ignore: Ignore violations of the ipolicy
1272 @param _compute_fn: The function to verify ipolicy (unittest only)
1273 @see: L{_ComputeIPolicySpecViolation}
1276 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1277 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1280 msg = ("Instance does not meet target node group's (%s) instance"
1281 " policy: %s") % (node.group, utils.CommaJoin(res))
1285 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1288 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1289 """Computes a set of any instances that would violate the new ipolicy.
1291 @param old_ipolicy: The current (still in-place) ipolicy
1292 @param new_ipolicy: The new (to become) ipolicy
1293 @param instances: List of instances to verify
1294 @return: A list of instances which violates the new ipolicy but
1298 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1299 _ComputeViolatingInstances(old_ipolicy, instances))
1302 def _ExpandItemName(fn, name, kind):
1303 """Expand an item name.
1305 @param fn: the function to use for expansion
1306 @param name: requested item name
1307 @param kind: text description ('Node' or 'Instance')
1308 @return: the resolved (full) name
1309 @raise errors.OpPrereqError: if the item is not found
1312 full_name = fn(name)
1313 if full_name is None:
1314 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1319 def _ExpandNodeName(cfg, name):
1320 """Wrapper over L{_ExpandItemName} for nodes."""
1321 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1324 def _ExpandInstanceName(cfg, name):
1325 """Wrapper over L{_ExpandItemName} for instance."""
1326 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1329 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1330 minmem, maxmem, vcpus, nics, disk_template, disks,
1331 bep, hvp, hypervisor_name, tags):
1332 """Builds instance related env variables for hooks
1334 This builds the hook environment from individual variables.
1337 @param name: the name of the instance
1338 @type primary_node: string
1339 @param primary_node: the name of the instance's primary node
1340 @type secondary_nodes: list
1341 @param secondary_nodes: list of secondary nodes as strings
1342 @type os_type: string
1343 @param os_type: the name of the instance's OS
1344 @type status: string
1345 @param status: the desired status of the instance
1346 @type minmem: string
1347 @param minmem: the minimum memory size of the instance
1348 @type maxmem: string
1349 @param maxmem: the maximum memory size of the instance
1351 @param vcpus: the count of VCPUs the instance has
1353 @param nics: list of tuples (ip, mac, mode, link) representing
1354 the NICs the instance has
1355 @type disk_template: string
1356 @param disk_template: the disk template of the instance
1358 @param disks: the list of (size, mode) pairs
1360 @param bep: the backend parameters for the instance
1362 @param hvp: the hypervisor parameters for the instance
1363 @type hypervisor_name: string
1364 @param hypervisor_name: the hypervisor for the instance
1366 @param tags: list of instance tags as strings
1368 @return: the hook environment for this instance
1373 "INSTANCE_NAME": name,
1374 "INSTANCE_PRIMARY": primary_node,
1375 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1376 "INSTANCE_OS_TYPE": os_type,
1377 "INSTANCE_STATUS": status,
1378 "INSTANCE_MINMEM": minmem,
1379 "INSTANCE_MAXMEM": maxmem,
1380 # TODO(2.7) remove deprecated "memory" value
1381 "INSTANCE_MEMORY": maxmem,
1382 "INSTANCE_VCPUS": vcpus,
1383 "INSTANCE_DISK_TEMPLATE": disk_template,
1384 "INSTANCE_HYPERVISOR": hypervisor_name,
1387 nic_count = len(nics)
1388 for idx, (ip, mac, mode, link) in enumerate(nics):
1391 env["INSTANCE_NIC%d_IP" % idx] = ip
1392 env["INSTANCE_NIC%d_MAC" % idx] = mac
1393 env["INSTANCE_NIC%d_MODE" % idx] = mode
1394 env["INSTANCE_NIC%d_LINK" % idx] = link
1395 if mode == constants.NIC_MODE_BRIDGED:
1396 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1400 env["INSTANCE_NIC_COUNT"] = nic_count
1403 disk_count = len(disks)
1404 for idx, (size, mode) in enumerate(disks):
1405 env["INSTANCE_DISK%d_SIZE" % idx] = size
1406 env["INSTANCE_DISK%d_MODE" % idx] = mode
1410 env["INSTANCE_DISK_COUNT"] = disk_count
1415 env["INSTANCE_TAGS"] = " ".join(tags)
1417 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1418 for key, value in source.items():
1419 env["INSTANCE_%s_%s" % (kind, key)] = value
1424 def _NICListToTuple(lu, nics):
1425 """Build a list of nic information tuples.
1427 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1428 value in LUInstanceQueryData.
1430 @type lu: L{LogicalUnit}
1431 @param lu: the logical unit on whose behalf we execute
1432 @type nics: list of L{objects.NIC}
1433 @param nics: list of nics to convert to hooks tuples
1437 cluster = lu.cfg.GetClusterInfo()
1441 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1442 mode = filled_params[constants.NIC_MODE]
1443 link = filled_params[constants.NIC_LINK]
1444 hooks_nics.append((ip, mac, mode, link))
1448 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1449 """Builds instance related env variables for hooks from an object.
1451 @type lu: L{LogicalUnit}
1452 @param lu: the logical unit on whose behalf we execute
1453 @type instance: L{objects.Instance}
1454 @param instance: the instance for which we should build the
1456 @type override: dict
1457 @param override: dictionary with key/values that will override
1460 @return: the hook environment dictionary
1463 cluster = lu.cfg.GetClusterInfo()
1464 bep = cluster.FillBE(instance)
1465 hvp = cluster.FillHV(instance)
1467 "name": instance.name,
1468 "primary_node": instance.primary_node,
1469 "secondary_nodes": instance.secondary_nodes,
1470 "os_type": instance.os,
1471 "status": instance.admin_state,
1472 "maxmem": bep[constants.BE_MAXMEM],
1473 "minmem": bep[constants.BE_MINMEM],
1474 "vcpus": bep[constants.BE_VCPUS],
1475 "nics": _NICListToTuple(lu, instance.nics),
1476 "disk_template": instance.disk_template,
1477 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1480 "hypervisor_name": instance.hypervisor,
1481 "tags": instance.tags,
1484 args.update(override)
1485 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1488 def _AdjustCandidatePool(lu, exceptions):
1489 """Adjust the candidate pool after node operations.
1492 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1494 lu.LogInfo("Promoted nodes to master candidate role: %s",
1495 utils.CommaJoin(node.name for node in mod_list))
1496 for name in mod_list:
1497 lu.context.ReaddNode(name)
1498 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1500 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1504 def _DecideSelfPromotion(lu, exceptions=None):
1505 """Decide whether I should promote myself as a master candidate.
1508 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1509 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1510 # the new node will increase mc_max with one, so:
1511 mc_should = min(mc_should + 1, cp_size)
1512 return mc_now < mc_should
1515 def _CalculateGroupIPolicy(cluster, group):
1516 """Calculate instance policy for group.
1519 return cluster.SimpleFillIPolicy(group.ipolicy)
1522 def _ComputeViolatingInstances(ipolicy, instances):
1523 """Computes a set of instances who violates given ipolicy.
1525 @param ipolicy: The ipolicy to verify
1526 @type instances: object.Instance
1527 @param instances: List of instances to verify
1528 @return: A frozenset of instance names violating the ipolicy
1531 return frozenset([inst.name for inst in instances
1532 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1535 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1536 """Check that the brigdes needed by a list of nics exist.
1539 cluster = lu.cfg.GetClusterInfo()
1540 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1541 brlist = [params[constants.NIC_LINK] for params in paramslist
1542 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1544 result = lu.rpc.call_bridges_exist(target_node, brlist)
1545 result.Raise("Error checking bridges on destination node '%s'" %
1546 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1549 def _CheckInstanceBridgesExist(lu, instance, node=None):
1550 """Check that the brigdes needed by an instance exist.
1554 node = instance.primary_node
1555 _CheckNicsBridgesExist(lu, instance.nics, node)
1558 def _CheckOSVariant(os_obj, name):
1559 """Check whether an OS name conforms to the os variants specification.
1561 @type os_obj: L{objects.OS}
1562 @param os_obj: OS object to check
1564 @param name: OS name passed by the user, to check for validity
1567 variant = objects.OS.GetVariant(name)
1568 if not os_obj.supported_variants:
1570 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1571 " passed)" % (os_obj.name, variant),
1575 raise errors.OpPrereqError("OS name must include a variant",
1578 if variant not in os_obj.supported_variants:
1579 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1582 def _GetNodeInstancesInner(cfg, fn):
1583 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1586 def _GetNodeInstances(cfg, node_name):
1587 """Returns a list of all primary and secondary instances on a node.
1591 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1594 def _GetNodePrimaryInstances(cfg, node_name):
1595 """Returns primary instances on a node.
1598 return _GetNodeInstancesInner(cfg,
1599 lambda inst: node_name == inst.primary_node)
1602 def _GetNodeSecondaryInstances(cfg, node_name):
1603 """Returns secondary instances on a node.
1606 return _GetNodeInstancesInner(cfg,
1607 lambda inst: node_name in inst.secondary_nodes)
1610 def _GetStorageTypeArgs(cfg, storage_type):
1611 """Returns the arguments for a storage type.
1614 # Special case for file storage
1615 if storage_type == constants.ST_FILE:
1616 # storage.FileStorage wants a list of storage directories
1617 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1622 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1625 for dev in instance.disks:
1626 cfg.SetDiskID(dev, node_name)
1628 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1630 result.Raise("Failed to get disk status from node %s" % node_name,
1631 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1633 for idx, bdev_status in enumerate(result.payload):
1634 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1640 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1641 """Check the sanity of iallocator and node arguments and use the
1642 cluster-wide iallocator if appropriate.
1644 Check that at most one of (iallocator, node) is specified. If none is
1645 specified, then the LU's opcode's iallocator slot is filled with the
1646 cluster-wide default iallocator.
1648 @type iallocator_slot: string
1649 @param iallocator_slot: the name of the opcode iallocator slot
1650 @type node_slot: string
1651 @param node_slot: the name of the opcode target node slot
1654 node = getattr(lu.op, node_slot, None)
1655 iallocator = getattr(lu.op, iallocator_slot, None)
1657 if node is not None and iallocator is not None:
1658 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1660 elif node is None and iallocator is None:
1661 default_iallocator = lu.cfg.GetDefaultIAllocator()
1662 if default_iallocator:
1663 setattr(lu.op, iallocator_slot, default_iallocator)
1665 raise errors.OpPrereqError("No iallocator or node given and no"
1666 " cluster-wide default iallocator found;"
1667 " please specify either an iallocator or a"
1668 " node, or set a cluster-wide default"
1672 def _GetDefaultIAllocator(cfg, iallocator):
1673 """Decides on which iallocator to use.
1675 @type cfg: L{config.ConfigWriter}
1676 @param cfg: Cluster configuration object
1677 @type iallocator: string or None
1678 @param iallocator: Iallocator specified in opcode
1680 @return: Iallocator name
1684 # Use default iallocator
1685 iallocator = cfg.GetDefaultIAllocator()
1688 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1689 " opcode nor as a cluster-wide default",
1695 def _CheckHostnameSane(lu, name):
1696 """Ensures that a given hostname resolves to a 'sane' name.
1698 The given name is required to be a prefix of the resolved hostname,
1699 to prevent accidental mismatches.
1701 @param lu: the logical unit on behalf of which we're checking
1702 @param name: the name we should resolve and check
1703 @return: the resolved hostname object
1706 hostname = netutils.GetHostname(name=name)
1707 if hostname.name != name:
1708 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1709 if not utils.MatchNameComponent(name, [hostname.name]):
1710 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1711 " same as given hostname '%s'") %
1712 (hostname.name, name), errors.ECODE_INVAL)
1716 class LUClusterPostInit(LogicalUnit):
1717 """Logical unit for running hooks after cluster initialization.
1720 HPATH = "cluster-init"
1721 HTYPE = constants.HTYPE_CLUSTER
1723 def BuildHooksEnv(self):
1728 "OP_TARGET": self.cfg.GetClusterName(),
1731 def BuildHooksNodes(self):
1732 """Build hooks nodes.
1735 return ([], [self.cfg.GetMasterNode()])
1737 def Exec(self, feedback_fn):
1744 class LUClusterDestroy(LogicalUnit):
1745 """Logical unit for destroying the cluster.
1748 HPATH = "cluster-destroy"
1749 HTYPE = constants.HTYPE_CLUSTER
1751 def BuildHooksEnv(self):
1756 "OP_TARGET": self.cfg.GetClusterName(),
1759 def BuildHooksNodes(self):
1760 """Build hooks nodes.
1765 def CheckPrereq(self):
1766 """Check prerequisites.
1768 This checks whether the cluster is empty.
1770 Any errors are signaled by raising errors.OpPrereqError.
1773 master = self.cfg.GetMasterNode()
1775 nodelist = self.cfg.GetNodeList()
1776 if len(nodelist) != 1 or nodelist[0] != master:
1777 raise errors.OpPrereqError("There are still %d node(s) in"
1778 " this cluster." % (len(nodelist) - 1),
1780 instancelist = self.cfg.GetInstanceList()
1782 raise errors.OpPrereqError("There are still %d instance(s) in"
1783 " this cluster." % len(instancelist),
1786 def Exec(self, feedback_fn):
1787 """Destroys the cluster.
1790 master_params = self.cfg.GetMasterNetworkParameters()
1792 # Run post hooks on master node before it's removed
1793 _RunPostHook(self, master_params.name)
1795 ems = self.cfg.GetUseExternalMipScript()
1796 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1799 self.LogWarning("Error disabling the master IP address: %s",
1802 return master_params.name
1805 def _VerifyCertificate(filename):
1806 """Verifies a certificate for L{LUClusterVerifyConfig}.
1808 @type filename: string
1809 @param filename: Path to PEM file
1813 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1814 utils.ReadFile(filename))
1815 except Exception, err: # pylint: disable=W0703
1816 return (LUClusterVerifyConfig.ETYPE_ERROR,
1817 "Failed to load X509 certificate %s: %s" % (filename, err))
1820 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1821 constants.SSL_CERT_EXPIRATION_ERROR)
1824 fnamemsg = "While verifying %s: %s" % (filename, msg)
1829 return (None, fnamemsg)
1830 elif errcode == utils.CERT_WARNING:
1831 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1832 elif errcode == utils.CERT_ERROR:
1833 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1835 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1838 def _GetAllHypervisorParameters(cluster, instances):
1839 """Compute the set of all hypervisor parameters.
1841 @type cluster: L{objects.Cluster}
1842 @param cluster: the cluster object
1843 @param instances: list of L{objects.Instance}
1844 @param instances: additional instances from which to obtain parameters
1845 @rtype: list of (origin, hypervisor, parameters)
1846 @return: a list with all parameters found, indicating the hypervisor they
1847 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1852 for hv_name in cluster.enabled_hypervisors:
1853 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1855 for os_name, os_hvp in cluster.os_hvp.items():
1856 for hv_name, hv_params in os_hvp.items():
1858 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1859 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1861 # TODO: collapse identical parameter values in a single one
1862 for instance in instances:
1863 if instance.hvparams:
1864 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1865 cluster.FillHV(instance)))
1870 class _VerifyErrors(object):
1871 """Mix-in for cluster/group verify LUs.
1873 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1874 self.op and self._feedback_fn to be available.)
1878 ETYPE_FIELD = "code"
1879 ETYPE_ERROR = "ERROR"
1880 ETYPE_WARNING = "WARNING"
1882 def _Error(self, ecode, item, msg, *args, **kwargs):
1883 """Format an error message.
1885 Based on the opcode's error_codes parameter, either format a
1886 parseable error code, or a simpler error string.
1888 This must be called only from Exec and functions called from Exec.
1891 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1892 itype, etxt, _ = ecode
1893 # first complete the msg
1896 # then format the whole message
1897 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1898 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1904 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1905 # and finally report it via the feedback_fn
1906 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1908 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1909 """Log an error message if the passed condition is True.
1913 or self.op.debug_simulate_errors) # pylint: disable=E1101
1915 # If the error code is in the list of ignored errors, demote the error to a
1917 (_, etxt, _) = ecode
1918 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1919 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1922 self._Error(ecode, *args, **kwargs)
1924 # do not mark the operation as failed for WARN cases only
1925 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1926 self.bad = self.bad or cond
1929 class LUClusterVerify(NoHooksLU):
1930 """Submits all jobs necessary to verify the cluster.
1935 def ExpandNames(self):
1936 self.needed_locks = {}
1938 def Exec(self, feedback_fn):
1941 if self.op.group_name:
1942 groups = [self.op.group_name]
1943 depends_fn = lambda: None
1945 groups = self.cfg.GetNodeGroupList()
1947 # Verify global configuration
1949 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1952 # Always depend on global verification
1953 depends_fn = lambda: [(-len(jobs), [])]
1955 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1956 ignore_errors=self.op.ignore_errors,
1957 depends=depends_fn())]
1958 for group in groups)
1960 # Fix up all parameters
1961 for op in itertools.chain(*jobs): # pylint: disable=W0142
1962 op.debug_simulate_errors = self.op.debug_simulate_errors
1963 op.verbose = self.op.verbose
1964 op.error_codes = self.op.error_codes
1966 op.skip_checks = self.op.skip_checks
1967 except AttributeError:
1968 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1970 return ResultWithJobs(jobs)
1973 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1974 """Verifies the cluster config.
1979 def _VerifyHVP(self, hvp_data):
1980 """Verifies locally the syntax of the hypervisor parameters.
1983 for item, hv_name, hv_params in hvp_data:
1984 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1987 hv_class = hypervisor.GetHypervisor(hv_name)
1988 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1989 hv_class.CheckParameterSyntax(hv_params)
1990 except errors.GenericError, err:
1991 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1993 def ExpandNames(self):
1994 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1995 self.share_locks = _ShareAll()
1997 def CheckPrereq(self):
1998 """Check prerequisites.
2001 # Retrieve all information
2002 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2003 self.all_node_info = self.cfg.GetAllNodesInfo()
2004 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2006 def Exec(self, feedback_fn):
2007 """Verify integrity of cluster, performing various test on nodes.
2011 self._feedback_fn = feedback_fn
2013 feedback_fn("* Verifying cluster config")
2015 for msg in self.cfg.VerifyConfig():
2016 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2018 feedback_fn("* Verifying cluster certificate files")
2020 for cert_filename in constants.ALL_CERT_FILES:
2021 (errcode, msg) = _VerifyCertificate(cert_filename)
2022 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2024 feedback_fn("* Verifying hypervisor parameters")
2026 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2027 self.all_inst_info.values()))
2029 feedback_fn("* Verifying all nodes belong to an existing group")
2031 # We do this verification here because, should this bogus circumstance
2032 # occur, it would never be caught by VerifyGroup, which only acts on
2033 # nodes/instances reachable from existing node groups.
2035 dangling_nodes = set(node.name for node in self.all_node_info.values()
2036 if node.group not in self.all_group_info)
2038 dangling_instances = {}
2039 no_node_instances = []
2041 for inst in self.all_inst_info.values():
2042 if inst.primary_node in dangling_nodes:
2043 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2044 elif inst.primary_node not in self.all_node_info:
2045 no_node_instances.append(inst.name)
2050 utils.CommaJoin(dangling_instances.get(node.name,
2052 for node in dangling_nodes]
2054 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2056 "the following nodes (and their instances) belong to a non"
2057 " existing group: %s", utils.CommaJoin(pretty_dangling))
2059 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2061 "the following instances have a non-existing primary-node:"
2062 " %s", utils.CommaJoin(no_node_instances))
2067 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2068 """Verifies the status of a node group.
2071 HPATH = "cluster-verify"
2072 HTYPE = constants.HTYPE_CLUSTER
2075 _HOOKS_INDENT_RE = re.compile("^", re.M)
2077 class NodeImage(object):
2078 """A class representing the logical and physical status of a node.
2081 @ivar name: the node name to which this object refers
2082 @ivar volumes: a structure as returned from
2083 L{ganeti.backend.GetVolumeList} (runtime)
2084 @ivar instances: a list of running instances (runtime)
2085 @ivar pinst: list of configured primary instances (config)
2086 @ivar sinst: list of configured secondary instances (config)
2087 @ivar sbp: dictionary of {primary-node: list of instances} for all
2088 instances for which this node is secondary (config)
2089 @ivar mfree: free memory, as reported by hypervisor (runtime)
2090 @ivar dfree: free disk, as reported by the node (runtime)
2091 @ivar offline: the offline status (config)
2092 @type rpc_fail: boolean
2093 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2094 not whether the individual keys were correct) (runtime)
2095 @type lvm_fail: boolean
2096 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2097 @type hyp_fail: boolean
2098 @ivar hyp_fail: whether the RPC call didn't return the instance list
2099 @type ghost: boolean
2100 @ivar ghost: whether this is a known node or not (config)
2101 @type os_fail: boolean
2102 @ivar os_fail: whether the RPC call didn't return valid OS data
2104 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2105 @type vm_capable: boolean
2106 @ivar vm_capable: whether the node can host instances
2109 def __init__(self, offline=False, name=None, vm_capable=True):
2118 self.offline = offline
2119 self.vm_capable = vm_capable
2120 self.rpc_fail = False
2121 self.lvm_fail = False
2122 self.hyp_fail = False
2124 self.os_fail = False
2127 def ExpandNames(self):
2128 # This raises errors.OpPrereqError on its own:
2129 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2131 # Get instances in node group; this is unsafe and needs verification later
2133 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2135 self.needed_locks = {
2136 locking.LEVEL_INSTANCE: inst_names,
2137 locking.LEVEL_NODEGROUP: [self.group_uuid],
2138 locking.LEVEL_NODE: [],
2141 self.share_locks = _ShareAll()
2143 def DeclareLocks(self, level):
2144 if level == locking.LEVEL_NODE:
2145 # Get members of node group; this is unsafe and needs verification later
2146 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2148 all_inst_info = self.cfg.GetAllInstancesInfo()
2150 # In Exec(), we warn about mirrored instances that have primary and
2151 # secondary living in separate node groups. To fully verify that
2152 # volumes for these instances are healthy, we will need to do an
2153 # extra call to their secondaries. We ensure here those nodes will
2155 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2156 # Important: access only the instances whose lock is owned
2157 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2158 nodes.update(all_inst_info[inst].secondary_nodes)
2160 self.needed_locks[locking.LEVEL_NODE] = nodes
2162 def CheckPrereq(self):
2163 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2164 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2166 group_nodes = set(self.group_info.members)
2168 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2171 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2173 unlocked_instances = \
2174 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2177 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2178 utils.CommaJoin(unlocked_nodes),
2181 if unlocked_instances:
2182 raise errors.OpPrereqError("Missing lock for instances: %s" %
2183 utils.CommaJoin(unlocked_instances),
2186 self.all_node_info = self.cfg.GetAllNodesInfo()
2187 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2189 self.my_node_names = utils.NiceSort(group_nodes)
2190 self.my_inst_names = utils.NiceSort(group_instances)
2192 self.my_node_info = dict((name, self.all_node_info[name])
2193 for name in self.my_node_names)
2195 self.my_inst_info = dict((name, self.all_inst_info[name])
2196 for name in self.my_inst_names)
2198 # We detect here the nodes that will need the extra RPC calls for verifying
2199 # split LV volumes; they should be locked.
2200 extra_lv_nodes = set()
2202 for inst in self.my_inst_info.values():
2203 if inst.disk_template in constants.DTS_INT_MIRROR:
2204 for nname in inst.all_nodes:
2205 if self.all_node_info[nname].group != self.group_uuid:
2206 extra_lv_nodes.add(nname)
2208 unlocked_lv_nodes = \
2209 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2211 if unlocked_lv_nodes:
2212 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2213 utils.CommaJoin(unlocked_lv_nodes),
2215 self.extra_lv_nodes = list(extra_lv_nodes)
2217 def _VerifyNode(self, ninfo, nresult):
2218 """Perform some basic validation on data returned from a node.
2220 - check the result data structure is well formed and has all the
2222 - check ganeti version
2224 @type ninfo: L{objects.Node}
2225 @param ninfo: the node to check
2226 @param nresult: the results from the node
2228 @return: whether overall this call was successful (and we can expect
2229 reasonable values in the respose)
2233 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2235 # main result, nresult should be a non-empty dict
2236 test = not nresult or not isinstance(nresult, dict)
2237 _ErrorIf(test, constants.CV_ENODERPC, node,
2238 "unable to verify node: no data returned")
2242 # compares ganeti version
2243 local_version = constants.PROTOCOL_VERSION
2244 remote_version = nresult.get("version", None)
2245 test = not (remote_version and
2246 isinstance(remote_version, (list, tuple)) and
2247 len(remote_version) == 2)
2248 _ErrorIf(test, constants.CV_ENODERPC, node,
2249 "connection to node returned invalid data")
2253 test = local_version != remote_version[0]
2254 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2255 "incompatible protocol versions: master %s,"
2256 " node %s", local_version, remote_version[0])
2260 # node seems compatible, we can actually try to look into its results
2262 # full package version
2263 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2264 constants.CV_ENODEVERSION, node,
2265 "software version mismatch: master %s, node %s",
2266 constants.RELEASE_VERSION, remote_version[1],
2267 code=self.ETYPE_WARNING)
2269 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2270 if ninfo.vm_capable and isinstance(hyp_result, dict):
2271 for hv_name, hv_result in hyp_result.iteritems():
2272 test = hv_result is not None
2273 _ErrorIf(test, constants.CV_ENODEHV, node,
2274 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2276 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2277 if ninfo.vm_capable and isinstance(hvp_result, list):
2278 for item, hv_name, hv_result in hvp_result:
2279 _ErrorIf(True, constants.CV_ENODEHV, node,
2280 "hypervisor %s parameter verify failure (source %s): %s",
2281 hv_name, item, hv_result)
2283 test = nresult.get(constants.NV_NODESETUP,
2284 ["Missing NODESETUP results"])
2285 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2290 def _VerifyNodeTime(self, ninfo, nresult,
2291 nvinfo_starttime, nvinfo_endtime):
2292 """Check the node time.
2294 @type ninfo: L{objects.Node}
2295 @param ninfo: the node to check
2296 @param nresult: the remote results for the node
2297 @param nvinfo_starttime: the start time of the RPC call
2298 @param nvinfo_endtime: the end time of the RPC call
2302 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2304 ntime = nresult.get(constants.NV_TIME, None)
2306 ntime_merged = utils.MergeTime(ntime)
2307 except (ValueError, TypeError):
2308 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2311 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2312 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2313 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2314 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2318 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2319 "Node time diverges by at least %s from master node time",
2322 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2323 """Check the node LVM results.
2325 @type ninfo: L{objects.Node}
2326 @param ninfo: the node to check
2327 @param nresult: the remote results for the node
2328 @param vg_name: the configured VG name
2335 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2337 # checks vg existence and size > 20G
2338 vglist = nresult.get(constants.NV_VGLIST, None)
2340 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2342 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2343 constants.MIN_VG_SIZE)
2344 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2347 pvlist = nresult.get(constants.NV_PVLIST, None)
2348 test = pvlist is None
2349 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2351 # check that ':' is not present in PV names, since it's a
2352 # special character for lvcreate (denotes the range of PEs to
2354 for _, pvname, owner_vg in pvlist:
2355 test = ":" in pvname
2356 _ErrorIf(test, constants.CV_ENODELVM, node,
2357 "Invalid character ':' in PV '%s' of VG '%s'",
2360 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2361 """Check the node bridges.
2363 @type ninfo: L{objects.Node}
2364 @param ninfo: the node to check
2365 @param nresult: the remote results for the node
2366 @param bridges: the expected list of bridges
2373 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2375 missing = nresult.get(constants.NV_BRIDGES, None)
2376 test = not isinstance(missing, list)
2377 _ErrorIf(test, constants.CV_ENODENET, node,
2378 "did not return valid bridge information")
2380 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2381 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2383 def _VerifyNodeUserScripts(self, ninfo, nresult):
2384 """Check the results of user scripts presence and executability on the node
2386 @type ninfo: L{objects.Node}
2387 @param ninfo: the node to check
2388 @param nresult: the remote results for the node
2393 test = not constants.NV_USERSCRIPTS in nresult
2394 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2395 "did not return user scripts information")
2397 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2399 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2400 "user scripts not present or not executable: %s" %
2401 utils.CommaJoin(sorted(broken_scripts)))
2403 def _VerifyNodeNetwork(self, ninfo, nresult):
2404 """Check the node network connectivity results.
2406 @type ninfo: L{objects.Node}
2407 @param ninfo: the node to check
2408 @param nresult: the remote results for the node
2412 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2414 test = constants.NV_NODELIST not in nresult
2415 _ErrorIf(test, constants.CV_ENODESSH, node,
2416 "node hasn't returned node ssh connectivity data")
2418 if nresult[constants.NV_NODELIST]:
2419 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2420 _ErrorIf(True, constants.CV_ENODESSH, node,
2421 "ssh communication with node '%s': %s", a_node, a_msg)
2423 test = constants.NV_NODENETTEST not in nresult
2424 _ErrorIf(test, constants.CV_ENODENET, node,
2425 "node hasn't returned node tcp connectivity data")
2427 if nresult[constants.NV_NODENETTEST]:
2428 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2430 _ErrorIf(True, constants.CV_ENODENET, node,
2431 "tcp communication with node '%s': %s",
2432 anode, nresult[constants.NV_NODENETTEST][anode])
2434 test = constants.NV_MASTERIP not in nresult
2435 _ErrorIf(test, constants.CV_ENODENET, node,
2436 "node hasn't returned node master IP reachability data")
2438 if not nresult[constants.NV_MASTERIP]:
2439 if node == self.master_node:
2440 msg = "the master node cannot reach the master IP (not configured?)"
2442 msg = "cannot reach the master IP"
2443 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2445 def _VerifyInstance(self, instance, instanceconfig, node_image,
2447 """Verify an instance.
2449 This function checks to see if the required block devices are
2450 available on the instance's node.
2453 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2454 node_current = instanceconfig.primary_node
2456 node_vol_should = {}
2457 instanceconfig.MapLVsByNode(node_vol_should)
2459 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2460 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2461 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2463 for node in node_vol_should:
2464 n_img = node_image[node]
2465 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2466 # ignore missing volumes on offline or broken nodes
2468 for volume in node_vol_should[node]:
2469 test = volume not in n_img.volumes
2470 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2471 "volume %s missing on node %s", volume, node)
2473 if instanceconfig.admin_state == constants.ADMINST_UP:
2474 pri_img = node_image[node_current]
2475 test = instance not in pri_img.instances and not pri_img.offline
2476 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2477 "instance not running on its primary node %s",
2480 diskdata = [(nname, success, status, idx)
2481 for (nname, disks) in diskstatus.items()
2482 for idx, (success, status) in enumerate(disks)]
2484 for nname, success, bdev_status, idx in diskdata:
2485 # the 'ghost node' construction in Exec() ensures that we have a
2487 snode = node_image[nname]
2488 bad_snode = snode.ghost or snode.offline
2489 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2490 not success and not bad_snode,
2491 constants.CV_EINSTANCEFAULTYDISK, instance,
2492 "couldn't retrieve status for disk/%s on %s: %s",
2493 idx, nname, bdev_status)
2494 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2495 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2496 constants.CV_EINSTANCEFAULTYDISK, instance,
2497 "disk/%s on %s is faulty", idx, nname)
2499 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2500 """Verify if there are any unknown volumes in the cluster.
2502 The .os, .swap and backup volumes are ignored. All other volumes are
2503 reported as unknown.
2505 @type reserved: L{ganeti.utils.FieldSet}
2506 @param reserved: a FieldSet of reserved volume names
2509 for node, n_img in node_image.items():
2510 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2511 self.all_node_info[node].group != self.group_uuid):
2512 # skip non-healthy nodes
2514 for volume in n_img.volumes:
2515 test = ((node not in node_vol_should or
2516 volume not in node_vol_should[node]) and
2517 not reserved.Matches(volume))
2518 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2519 "volume %s is unknown", volume)
2521 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2522 """Verify N+1 Memory Resilience.
2524 Check that if one single node dies we can still start all the
2525 instances it was primary for.
2528 cluster_info = self.cfg.GetClusterInfo()
2529 for node, n_img in node_image.items():
2530 # This code checks that every node which is now listed as
2531 # secondary has enough memory to host all instances it is
2532 # supposed to should a single other node in the cluster fail.
2533 # FIXME: not ready for failover to an arbitrary node
2534 # FIXME: does not support file-backed instances
2535 # WARNING: we currently take into account down instances as well
2536 # as up ones, considering that even if they're down someone
2537 # might want to start them even in the event of a node failure.
2538 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2539 # we're skipping nodes marked offline and nodes in other groups from
2540 # the N+1 warning, since most likely we don't have good memory
2541 # infromation from them; we already list instances living on such
2542 # nodes, and that's enough warning
2544 #TODO(dynmem): also consider ballooning out other instances
2545 for prinode, instances in n_img.sbp.items():
2547 for instance in instances:
2548 bep = cluster_info.FillBE(instance_cfg[instance])
2549 if bep[constants.BE_AUTO_BALANCE]:
2550 needed_mem += bep[constants.BE_MINMEM]
2551 test = n_img.mfree < needed_mem
2552 self._ErrorIf(test, constants.CV_ENODEN1, node,
2553 "not enough memory to accomodate instance failovers"
2554 " should node %s fail (%dMiB needed, %dMiB available)",
2555 prinode, needed_mem, n_img.mfree)
2558 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2559 (files_all, files_opt, files_mc, files_vm)):
2560 """Verifies file checksums collected from all nodes.
2562 @param errorif: Callback for reporting errors
2563 @param nodeinfo: List of L{objects.Node} objects
2564 @param master_node: Name of master node
2565 @param all_nvinfo: RPC results
2568 # Define functions determining which nodes to consider for a file
2571 (files_mc, lambda node: (node.master_candidate or
2572 node.name == master_node)),
2573 (files_vm, lambda node: node.vm_capable),
2576 # Build mapping from filename to list of nodes which should have the file
2578 for (files, fn) in files2nodefn:
2580 filenodes = nodeinfo
2582 filenodes = filter(fn, nodeinfo)
2583 nodefiles.update((filename,
2584 frozenset(map(operator.attrgetter("name"), filenodes)))
2585 for filename in files)
2587 assert set(nodefiles) == (files_all | files_mc | files_vm)
2589 fileinfo = dict((filename, {}) for filename in nodefiles)
2590 ignore_nodes = set()
2592 for node in nodeinfo:
2594 ignore_nodes.add(node.name)
2597 nresult = all_nvinfo[node.name]
2599 if nresult.fail_msg or not nresult.payload:
2602 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2604 test = not (node_files and isinstance(node_files, dict))
2605 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2606 "Node did not return file checksum data")
2608 ignore_nodes.add(node.name)
2611 # Build per-checksum mapping from filename to nodes having it
2612 for (filename, checksum) in node_files.items():
2613 assert filename in nodefiles
2614 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2616 for (filename, checksums) in fileinfo.items():
2617 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2619 # Nodes having the file
2620 with_file = frozenset(node_name
2621 for nodes in fileinfo[filename].values()
2622 for node_name in nodes) - ignore_nodes
2624 expected_nodes = nodefiles[filename] - ignore_nodes
2626 # Nodes missing file
2627 missing_file = expected_nodes - with_file
2629 if filename in files_opt:
2631 errorif(missing_file and missing_file != expected_nodes,
2632 constants.CV_ECLUSTERFILECHECK, None,
2633 "File %s is optional, but it must exist on all or no"
2634 " nodes (not found on %s)",
2635 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2637 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2638 "File %s is missing from node(s) %s", filename,
2639 utils.CommaJoin(utils.NiceSort(missing_file)))
2641 # Warn if a node has a file it shouldn't
2642 unexpected = with_file - expected_nodes
2644 constants.CV_ECLUSTERFILECHECK, None,
2645 "File %s should not exist on node(s) %s",
2646 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2648 # See if there are multiple versions of the file
2649 test = len(checksums) > 1
2651 variants = ["variant %s on %s" %
2652 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2653 for (idx, (checksum, nodes)) in
2654 enumerate(sorted(checksums.items()))]
2658 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2659 "File %s found with %s different checksums (%s)",
2660 filename, len(checksums), "; ".join(variants))
2662 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2664 """Verifies and the node DRBD status.
2666 @type ninfo: L{objects.Node}
2667 @param ninfo: the node to check
2668 @param nresult: the remote results for the node
2669 @param instanceinfo: the dict of instances
2670 @param drbd_helper: the configured DRBD usermode helper
2671 @param drbd_map: the DRBD map as returned by
2672 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2676 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2679 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2680 test = (helper_result == None)
2681 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2682 "no drbd usermode helper returned")
2684 status, payload = helper_result
2686 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2687 "drbd usermode helper check unsuccessful: %s", payload)
2688 test = status and (payload != drbd_helper)
2689 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2690 "wrong drbd usermode helper: %s", payload)
2692 # compute the DRBD minors
2694 for minor, instance in drbd_map[node].items():
2695 test = instance not in instanceinfo
2696 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2697 "ghost instance '%s' in temporary DRBD map", instance)
2698 # ghost instance should not be running, but otherwise we
2699 # don't give double warnings (both ghost instance and
2700 # unallocated minor in use)
2702 node_drbd[minor] = (instance, False)
2704 instance = instanceinfo[instance]
2705 node_drbd[minor] = (instance.name,
2706 instance.admin_state == constants.ADMINST_UP)
2708 # and now check them
2709 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2710 test = not isinstance(used_minors, (tuple, list))
2711 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2712 "cannot parse drbd status file: %s", str(used_minors))
2714 # we cannot check drbd status
2717 for minor, (iname, must_exist) in node_drbd.items():
2718 test = minor not in used_minors and must_exist
2719 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2720 "drbd minor %d of instance %s is not active", minor, iname)
2721 for minor in used_minors:
2722 test = minor not in node_drbd
2723 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2724 "unallocated drbd minor %d is in use", minor)
2726 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2727 """Builds the node OS structures.
2729 @type ninfo: L{objects.Node}
2730 @param ninfo: the node to check
2731 @param nresult: the remote results for the node
2732 @param nimg: the node image object
2736 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2738 remote_os = nresult.get(constants.NV_OSLIST, None)
2739 test = (not isinstance(remote_os, list) or
2740 not compat.all(isinstance(v, list) and len(v) == 7
2741 for v in remote_os))
2743 _ErrorIf(test, constants.CV_ENODEOS, node,
2744 "node hasn't returned valid OS data")
2753 for (name, os_path, status, diagnose,
2754 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2756 if name not in os_dict:
2759 # parameters is a list of lists instead of list of tuples due to
2760 # JSON lacking a real tuple type, fix it:
2761 parameters = [tuple(v) for v in parameters]
2762 os_dict[name].append((os_path, status, diagnose,
2763 set(variants), set(parameters), set(api_ver)))
2765 nimg.oslist = os_dict
2767 def _VerifyNodeOS(self, ninfo, nimg, base):
2768 """Verifies the node OS list.
2770 @type ninfo: L{objects.Node}
2771 @param ninfo: the node to check
2772 @param nimg: the node image object
2773 @param base: the 'template' node we match against (e.g. from the master)
2777 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2779 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2781 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2782 for os_name, os_data in nimg.oslist.items():
2783 assert os_data, "Empty OS status for OS %s?!" % os_name
2784 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2785 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2786 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2787 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2788 "OS '%s' has multiple entries (first one shadows the rest): %s",
2789 os_name, utils.CommaJoin([v[0] for v in os_data]))
2790 # comparisons with the 'base' image
2791 test = os_name not in base.oslist
2792 _ErrorIf(test, constants.CV_ENODEOS, node,
2793 "Extra OS %s not present on reference node (%s)",
2797 assert base.oslist[os_name], "Base node has empty OS status?"
2798 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2800 # base OS is invalid, skipping
2802 for kind, a, b in [("API version", f_api, b_api),
2803 ("variants list", f_var, b_var),
2804 ("parameters", beautify_params(f_param),
2805 beautify_params(b_param))]:
2806 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2807 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2808 kind, os_name, base.name,
2809 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2811 # check any missing OSes
2812 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2813 _ErrorIf(missing, constants.CV_ENODEOS, node,
2814 "OSes present on reference node %s but missing on this node: %s",
2815 base.name, utils.CommaJoin(missing))
2817 def _VerifyOob(self, ninfo, nresult):
2818 """Verifies out of band functionality of a node.
2820 @type ninfo: L{objects.Node}
2821 @param ninfo: the node to check
2822 @param nresult: the remote results for the node
2826 # We just have to verify the paths on master and/or master candidates
2827 # as the oob helper is invoked on the master
2828 if ((ninfo.master_candidate or ninfo.master_capable) and
2829 constants.NV_OOB_PATHS in nresult):
2830 for path_result in nresult[constants.NV_OOB_PATHS]:
2831 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2833 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2834 """Verifies and updates the node volume data.
2836 This function will update a L{NodeImage}'s internal structures
2837 with data from the remote call.
2839 @type ninfo: L{objects.Node}
2840 @param ninfo: the node to check
2841 @param nresult: the remote results for the node
2842 @param nimg: the node image object
2843 @param vg_name: the configured VG name
2847 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2849 nimg.lvm_fail = True
2850 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2853 elif isinstance(lvdata, basestring):
2854 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2855 utils.SafeEncode(lvdata))
2856 elif not isinstance(lvdata, dict):
2857 _ErrorIf(True, constants.CV_ENODELVM, node,
2858 "rpc call to node failed (lvlist)")
2860 nimg.volumes = lvdata
2861 nimg.lvm_fail = False
2863 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2864 """Verifies and updates the node instance list.
2866 If the listing was successful, then updates this node's instance
2867 list. Otherwise, it marks the RPC call as failed for the instance
2870 @type ninfo: L{objects.Node}
2871 @param ninfo: the node to check
2872 @param nresult: the remote results for the node
2873 @param nimg: the node image object
2876 idata = nresult.get(constants.NV_INSTANCELIST, None)
2877 test = not isinstance(idata, list)
2878 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2879 "rpc call to node failed (instancelist): %s",
2880 utils.SafeEncode(str(idata)))
2882 nimg.hyp_fail = True
2884 nimg.instances = idata
2886 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2887 """Verifies and computes a node information map
2889 @type ninfo: L{objects.Node}
2890 @param ninfo: the node to check
2891 @param nresult: the remote results for the node
2892 @param nimg: the node image object
2893 @param vg_name: the configured VG name
2897 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2899 # try to read free memory (from the hypervisor)
2900 hv_info = nresult.get(constants.NV_HVINFO, None)
2901 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2902 _ErrorIf(test, constants.CV_ENODEHV, node,
2903 "rpc call to node failed (hvinfo)")
2906 nimg.mfree = int(hv_info["memory_free"])
2907 except (ValueError, TypeError):
2908 _ErrorIf(True, constants.CV_ENODERPC, node,
2909 "node returned invalid nodeinfo, check hypervisor")
2911 # FIXME: devise a free space model for file based instances as well
2912 if vg_name is not None:
2913 test = (constants.NV_VGLIST not in nresult or
2914 vg_name not in nresult[constants.NV_VGLIST])
2915 _ErrorIf(test, constants.CV_ENODELVM, node,
2916 "node didn't return data for the volume group '%s'"
2917 " - it is either missing or broken", vg_name)
2920 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2921 except (ValueError, TypeError):
2922 _ErrorIf(True, constants.CV_ENODERPC, node,
2923 "node returned invalid LVM info, check LVM status")
2925 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2926 """Gets per-disk status information for all instances.
2928 @type nodelist: list of strings
2929 @param nodelist: Node names
2930 @type node_image: dict of (name, L{objects.Node})
2931 @param node_image: Node objects
2932 @type instanceinfo: dict of (name, L{objects.Instance})
2933 @param instanceinfo: Instance objects
2934 @rtype: {instance: {node: [(succes, payload)]}}
2935 @return: a dictionary of per-instance dictionaries with nodes as
2936 keys and disk information as values; the disk information is a
2937 list of tuples (success, payload)
2940 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2943 node_disks_devonly = {}
2944 diskless_instances = set()
2945 diskless = constants.DT_DISKLESS
2947 for nname in nodelist:
2948 node_instances = list(itertools.chain(node_image[nname].pinst,
2949 node_image[nname].sinst))
2950 diskless_instances.update(inst for inst in node_instances
2951 if instanceinfo[inst].disk_template == diskless)
2952 disks = [(inst, disk)
2953 for inst in node_instances
2954 for disk in instanceinfo[inst].disks]
2957 # No need to collect data
2960 node_disks[nname] = disks
2962 # _AnnotateDiskParams makes already copies of the disks
2964 for (inst, dev) in disks:
2965 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2966 self.cfg.SetDiskID(anno_disk, nname)
2967 devonly.append(anno_disk)
2969 node_disks_devonly[nname] = devonly
2971 assert len(node_disks) == len(node_disks_devonly)
2973 # Collect data from all nodes with disks
2974 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2977 assert len(result) == len(node_disks)
2981 for (nname, nres) in result.items():
2982 disks = node_disks[nname]
2985 # No data from this node
2986 data = len(disks) * [(False, "node offline")]
2989 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2990 "while getting disk information: %s", msg)
2992 # No data from this node
2993 data = len(disks) * [(False, msg)]
2996 for idx, i in enumerate(nres.payload):
2997 if isinstance(i, (tuple, list)) and len(i) == 2:
3000 logging.warning("Invalid result from node %s, entry %d: %s",
3002 data.append((False, "Invalid result from the remote node"))
3004 for ((inst, _), status) in zip(disks, data):
3005 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3007 # Add empty entries for diskless instances.
3008 for inst in diskless_instances:
3009 assert inst not in instdisk
3012 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3013 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3014 compat.all(isinstance(s, (tuple, list)) and
3015 len(s) == 2 for s in statuses)
3016 for inst, nnames in instdisk.items()
3017 for nname, statuses in nnames.items())
3018 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3023 def _SshNodeSelector(group_uuid, all_nodes):
3024 """Create endless iterators for all potential SSH check hosts.
3027 nodes = [node for node in all_nodes
3028 if (node.group != group_uuid and
3030 keyfunc = operator.attrgetter("group")
3032 return map(itertools.cycle,
3033 [sorted(map(operator.attrgetter("name"), names))
3034 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3038 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3039 """Choose which nodes should talk to which other nodes.
3041 We will make nodes contact all nodes in their group, and one node from
3044 @warning: This algorithm has a known issue if one node group is much
3045 smaller than others (e.g. just one node). In such a case all other
3046 nodes will talk to the single node.
3049 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3050 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3052 return (online_nodes,
3053 dict((name, sorted([i.next() for i in sel]))
3054 for name in online_nodes))
3056 def BuildHooksEnv(self):
3059 Cluster-Verify hooks just ran in the post phase and their failure makes
3060 the output be logged in the verify output and the verification to fail.
3064 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3067 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3068 for node in self.my_node_info.values())
3072 def BuildHooksNodes(self):
3073 """Build hooks nodes.
3076 return ([], self.my_node_names)
3078 def Exec(self, feedback_fn):
3079 """Verify integrity of the node group, performing various test on nodes.
3082 # This method has too many local variables. pylint: disable=R0914
3083 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3085 if not self.my_node_names:
3087 feedback_fn("* Empty node group, skipping verification")
3091 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3092 verbose = self.op.verbose
3093 self._feedback_fn = feedback_fn
3095 vg_name = self.cfg.GetVGName()
3096 drbd_helper = self.cfg.GetDRBDHelper()
3097 cluster = self.cfg.GetClusterInfo()
3098 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3099 hypervisors = cluster.enabled_hypervisors
3100 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3102 i_non_redundant = [] # Non redundant instances
3103 i_non_a_balanced = [] # Non auto-balanced instances
3104 i_offline = 0 # Count of offline instances
3105 n_offline = 0 # Count of offline nodes
3106 n_drained = 0 # Count of nodes being drained
3107 node_vol_should = {}
3109 # FIXME: verify OS list
3112 filemap = _ComputeAncillaryFiles(cluster, False)
3114 # do local checksums
3115 master_node = self.master_node = self.cfg.GetMasterNode()
3116 master_ip = self.cfg.GetMasterIP()
3118 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3121 if self.cfg.GetUseExternalMipScript():
3122 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3124 node_verify_param = {
3125 constants.NV_FILELIST:
3126 utils.UniqueSequence(filename
3127 for files in filemap
3128 for filename in files),
3129 constants.NV_NODELIST:
3130 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3131 self.all_node_info.values()),
3132 constants.NV_HYPERVISOR: hypervisors,
3133 constants.NV_HVPARAMS:
3134 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3135 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3136 for node in node_data_list
3137 if not node.offline],
3138 constants.NV_INSTANCELIST: hypervisors,
3139 constants.NV_VERSION: None,
3140 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3141 constants.NV_NODESETUP: None,
3142 constants.NV_TIME: None,
3143 constants.NV_MASTERIP: (master_node, master_ip),
3144 constants.NV_OSLIST: None,
3145 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3146 constants.NV_USERSCRIPTS: user_scripts,
3149 if vg_name is not None:
3150 node_verify_param[constants.NV_VGLIST] = None
3151 node_verify_param[constants.NV_LVLIST] = vg_name
3152 node_verify_param[constants.NV_PVLIST] = [vg_name]
3155 node_verify_param[constants.NV_DRBDLIST] = None
3156 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3159 # FIXME: this needs to be changed per node-group, not cluster-wide
3161 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3162 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3163 bridges.add(default_nicpp[constants.NIC_LINK])
3164 for instance in self.my_inst_info.values():
3165 for nic in instance.nics:
3166 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3167 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3168 bridges.add(full_nic[constants.NIC_LINK])
3171 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3173 # Build our expected cluster state
3174 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3176 vm_capable=node.vm_capable))
3177 for node in node_data_list)
3181 for node in self.all_node_info.values():
3182 path = _SupportsOob(self.cfg, node)
3183 if path and path not in oob_paths:
3184 oob_paths.append(path)
3187 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3189 for instance in self.my_inst_names:
3190 inst_config = self.my_inst_info[instance]
3191 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3194 for nname in inst_config.all_nodes:
3195 if nname not in node_image:
3196 gnode = self.NodeImage(name=nname)
3197 gnode.ghost = (nname not in self.all_node_info)
3198 node_image[nname] = gnode
3200 inst_config.MapLVsByNode(node_vol_should)
3202 pnode = inst_config.primary_node
3203 node_image[pnode].pinst.append(instance)
3205 for snode in inst_config.secondary_nodes:
3206 nimg = node_image[snode]
3207 nimg.sinst.append(instance)
3208 if pnode not in nimg.sbp:
3209 nimg.sbp[pnode] = []
3210 nimg.sbp[pnode].append(instance)
3212 # At this point, we have the in-memory data structures complete,
3213 # except for the runtime information, which we'll gather next
3215 # Due to the way our RPC system works, exact response times cannot be
3216 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3217 # time before and after executing the request, we can at least have a time
3219 nvinfo_starttime = time.time()
3220 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3222 self.cfg.GetClusterName())
3223 nvinfo_endtime = time.time()
3225 if self.extra_lv_nodes and vg_name is not None:
3227 self.rpc.call_node_verify(self.extra_lv_nodes,
3228 {constants.NV_LVLIST: vg_name},
3229 self.cfg.GetClusterName())
3231 extra_lv_nvinfo = {}
3233 all_drbd_map = self.cfg.ComputeDRBDMap()
3235 feedback_fn("* Gathering disk information (%s nodes)" %
3236 len(self.my_node_names))
3237 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3240 feedback_fn("* Verifying configuration file consistency")
3242 # If not all nodes are being checked, we need to make sure the master node
3243 # and a non-checked vm_capable node are in the list.
3244 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3246 vf_nvinfo = all_nvinfo.copy()
3247 vf_node_info = list(self.my_node_info.values())
3248 additional_nodes = []
3249 if master_node not in self.my_node_info:
3250 additional_nodes.append(master_node)
3251 vf_node_info.append(self.all_node_info[master_node])
3252 # Add the first vm_capable node we find which is not included,
3253 # excluding the master node (which we already have)
3254 for node in absent_nodes:
3255 nodeinfo = self.all_node_info[node]
3256 if (nodeinfo.vm_capable and not nodeinfo.offline and
3257 node != master_node):
3258 additional_nodes.append(node)
3259 vf_node_info.append(self.all_node_info[node])
3261 key = constants.NV_FILELIST
3262 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3263 {key: node_verify_param[key]},
3264 self.cfg.GetClusterName()))
3266 vf_nvinfo = all_nvinfo
3267 vf_node_info = self.my_node_info.values()
3269 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3271 feedback_fn("* Verifying node status")
3275 for node_i in node_data_list:
3277 nimg = node_image[node]
3281 feedback_fn("* Skipping offline node %s" % (node,))
3285 if node == master_node:
3287 elif node_i.master_candidate:
3288 ntype = "master candidate"
3289 elif node_i.drained:
3295 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3297 msg = all_nvinfo[node].fail_msg
3298 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3301 nimg.rpc_fail = True
3304 nresult = all_nvinfo[node].payload
3306 nimg.call_ok = self._VerifyNode(node_i, nresult)
3307 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3308 self._VerifyNodeNetwork(node_i, nresult)
3309 self._VerifyNodeUserScripts(node_i, nresult)
3310 self._VerifyOob(node_i, nresult)
3313 self._VerifyNodeLVM(node_i, nresult, vg_name)
3314 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3317 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3318 self._UpdateNodeInstances(node_i, nresult, nimg)
3319 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3320 self._UpdateNodeOS(node_i, nresult, nimg)
3322 if not nimg.os_fail:
3323 if refos_img is None:
3325 self._VerifyNodeOS(node_i, nimg, refos_img)
3326 self._VerifyNodeBridges(node_i, nresult, bridges)
3328 # Check whether all running instancies are primary for the node. (This
3329 # can no longer be done from _VerifyInstance below, since some of the
3330 # wrong instances could be from other node groups.)
3331 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3333 for inst in non_primary_inst:
3334 test = inst in self.all_inst_info
3335 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3336 "instance should not run on node %s", node_i.name)
3337 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3338 "node is running unknown instance %s", inst)
3340 for node, result in extra_lv_nvinfo.items():
3341 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3342 node_image[node], vg_name)
3344 feedback_fn("* Verifying instance status")
3345 for instance in self.my_inst_names:
3347 feedback_fn("* Verifying instance %s" % instance)
3348 inst_config = self.my_inst_info[instance]
3349 self._VerifyInstance(instance, inst_config, node_image,
3351 inst_nodes_offline = []
3353 pnode = inst_config.primary_node
3354 pnode_img = node_image[pnode]
3355 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3356 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3357 " primary node failed", instance)
3359 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3361 constants.CV_EINSTANCEBADNODE, instance,
3362 "instance is marked as running and lives on offline node %s",
3363 inst_config.primary_node)
3365 # If the instance is non-redundant we cannot survive losing its primary
3366 # node, so we are not N+1 compliant.
3367 if inst_config.disk_template not in constants.DTS_MIRRORED:
3368 i_non_redundant.append(instance)
3370 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3371 constants.CV_EINSTANCELAYOUT,
3372 instance, "instance has multiple secondary nodes: %s",
3373 utils.CommaJoin(inst_config.secondary_nodes),
3374 code=self.ETYPE_WARNING)
3376 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3377 pnode = inst_config.primary_node
3378 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3379 instance_groups = {}
3381 for node in instance_nodes:
3382 instance_groups.setdefault(self.all_node_info[node].group,
3386 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3387 # Sort so that we always list the primary node first.
3388 for group, nodes in sorted(instance_groups.items(),
3389 key=lambda (_, nodes): pnode in nodes,
3392 self._ErrorIf(len(instance_groups) > 1,
3393 constants.CV_EINSTANCESPLITGROUPS,
3394 instance, "instance has primary and secondary nodes in"
3395 " different groups: %s", utils.CommaJoin(pretty_list),
3396 code=self.ETYPE_WARNING)
3398 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3399 i_non_a_balanced.append(instance)
3401 for snode in inst_config.secondary_nodes:
3402 s_img = node_image[snode]
3403 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3404 snode, "instance %s, connection to secondary node failed",
3408 inst_nodes_offline.append(snode)
3410 # warn that the instance lives on offline nodes
3411 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3412 "instance has offline secondary node(s) %s",
3413 utils.CommaJoin(inst_nodes_offline))
3414 # ... or ghost/non-vm_capable nodes
3415 for node in inst_config.all_nodes:
3416 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3417 instance, "instance lives on ghost node %s", node)
3418 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3419 instance, "instance lives on non-vm_capable node %s", node)
3421 feedback_fn("* Verifying orphan volumes")
3422 reserved = utils.FieldSet(*cluster.reserved_lvs)
3424 # We will get spurious "unknown volume" warnings if any node of this group
3425 # is secondary for an instance whose primary is in another group. To avoid
3426 # them, we find these instances and add their volumes to node_vol_should.
3427 for inst in self.all_inst_info.values():
3428 for secondary in inst.secondary_nodes:
3429 if (secondary in self.my_node_info
3430 and inst.name not in self.my_inst_info):
3431 inst.MapLVsByNode(node_vol_should)
3434 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3436 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3437 feedback_fn("* Verifying N+1 Memory redundancy")
3438 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3440 feedback_fn("* Other Notes")
3442 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3443 % len(i_non_redundant))
3445 if i_non_a_balanced:
3446 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3447 % len(i_non_a_balanced))
3450 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3453 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3456 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3460 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3461 """Analyze the post-hooks' result
3463 This method analyses the hook result, handles it, and sends some
3464 nicely-formatted feedback back to the user.
3466 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3467 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3468 @param hooks_results: the results of the multi-node hooks rpc call
3469 @param feedback_fn: function used send feedback back to the caller
3470 @param lu_result: previous Exec result
3471 @return: the new Exec result, based on the previous result
3475 # We only really run POST phase hooks, only for non-empty groups,
3476 # and are only interested in their results
3477 if not self.my_node_names:
3480 elif phase == constants.HOOKS_PHASE_POST:
3481 # Used to change hooks' output to proper indentation
3482 feedback_fn("* Hooks Results")
3483 assert hooks_results, "invalid result from hooks"
3485 for node_name in hooks_results:
3486 res = hooks_results[node_name]
3488 test = msg and not res.offline
3489 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3490 "Communication failure in hooks execution: %s", msg)
3491 if res.offline or msg:
3492 # No need to investigate payload if node is offline or gave
3495 for script, hkr, output in res.payload:
3496 test = hkr == constants.HKR_FAIL
3497 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3498 "Script %s failed, output:", script)
3500 output = self._HOOKS_INDENT_RE.sub(" ", output)
3501 feedback_fn("%s" % output)
3507 class LUClusterVerifyDisks(NoHooksLU):
3508 """Verifies the cluster disks status.
3513 def ExpandNames(self):
3514 self.share_locks = _ShareAll()
3515 self.needed_locks = {
3516 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3519 def Exec(self, feedback_fn):
3520 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3522 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3523 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3524 for group in group_names])
3527 class LUGroupVerifyDisks(NoHooksLU):
3528 """Verifies the status of all disks in a node group.
3533 def ExpandNames(self):
3534 # Raises errors.OpPrereqError on its own if group can't be found
3535 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3537 self.share_locks = _ShareAll()
3538 self.needed_locks = {
3539 locking.LEVEL_INSTANCE: [],
3540 locking.LEVEL_NODEGROUP: [],
3541 locking.LEVEL_NODE: [],
3544 def DeclareLocks(self, level):
3545 if level == locking.LEVEL_INSTANCE:
3546 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3548 # Lock instances optimistically, needs verification once node and group
3549 # locks have been acquired
3550 self.needed_locks[locking.LEVEL_INSTANCE] = \
3551 self.cfg.GetNodeGroupInstances(self.group_uuid)
3553 elif level == locking.LEVEL_NODEGROUP:
3554 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3556 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3557 set([self.group_uuid] +
3558 # Lock all groups used by instances optimistically; this requires
3559 # going via the node before it's locked, requiring verification
3562 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3563 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3565 elif level == locking.LEVEL_NODE:
3566 # This will only lock the nodes in the group to be verified which contain
3568 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3569 self._LockInstancesNodes()
3571 # Lock all nodes in group to be verified
3572 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3573 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3574 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3576 def CheckPrereq(self):
3577 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3578 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3579 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3581 assert self.group_uuid in owned_groups
3583 # Check if locked instances are still correct
3584 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3586 # Get instance information
3587 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3589 # Check if node groups for locked instances are still correct
3590 _CheckInstancesNodeGroups(self.cfg, self.instances,
3591 owned_groups, owned_nodes, self.group_uuid)
3593 def Exec(self, feedback_fn):
3594 """Verify integrity of cluster disks.
3596 @rtype: tuple of three items
3597 @return: a tuple of (dict of node-to-node_error, list of instances
3598 which need activate-disks, dict of instance: (node, volume) for
3603 res_instances = set()
3606 nv_dict = _MapInstanceDisksToNodes([inst
3607 for inst in self.instances.values()
3608 if inst.admin_state == constants.ADMINST_UP])
3611 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3612 set(self.cfg.GetVmCapableNodeList()))
3614 node_lvs = self.rpc.call_lv_list(nodes, [])
3616 for (node, node_res) in node_lvs.items():
3617 if node_res.offline:
3620 msg = node_res.fail_msg
3622 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3623 res_nodes[node] = msg
3626 for lv_name, (_, _, lv_online) in node_res.payload.items():
3627 inst = nv_dict.pop((node, lv_name), None)
3628 if not (lv_online or inst is None):
3629 res_instances.add(inst)
3631 # any leftover items in nv_dict are missing LVs, let's arrange the data
3633 for key, inst in nv_dict.iteritems():
3634 res_missing.setdefault(inst, []).append(list(key))
3636 return (res_nodes, list(res_instances), res_missing)
3639 class LUClusterRepairDiskSizes(NoHooksLU):
3640 """Verifies the cluster disks sizes.
3645 def ExpandNames(self):
3646 if self.op.instances:
3647 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3648 self.needed_locks = {
3649 locking.LEVEL_NODE_RES: [],
3650 locking.LEVEL_INSTANCE: self.wanted_names,
3652 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3654 self.wanted_names = None
3655 self.needed_locks = {
3656 locking.LEVEL_NODE_RES: locking.ALL_SET,
3657 locking.LEVEL_INSTANCE: locking.ALL_SET,
3659 self.share_locks = {
3660 locking.LEVEL_NODE_RES: 1,
3661 locking.LEVEL_INSTANCE: 0,
3664 def DeclareLocks(self, level):
3665 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3666 self._LockInstancesNodes(primary_only=True, level=level)
3668 def CheckPrereq(self):
3669 """Check prerequisites.
3671 This only checks the optional instance list against the existing names.
3674 if self.wanted_names is None:
3675 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3677 self.wanted_instances = \
3678 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3680 def _EnsureChildSizes(self, disk):
3681 """Ensure children of the disk have the needed disk size.
3683 This is valid mainly for DRBD8 and fixes an issue where the
3684 children have smaller disk size.
3686 @param disk: an L{ganeti.objects.Disk} object
3689 if disk.dev_type == constants.LD_DRBD8:
3690 assert disk.children, "Empty children for DRBD8?"
3691 fchild = disk.children[0]
3692 mismatch = fchild.size < disk.size
3694 self.LogInfo("Child disk has size %d, parent %d, fixing",
3695 fchild.size, disk.size)
3696 fchild.size = disk.size
3698 # and we recurse on this child only, not on the metadev
3699 return self._EnsureChildSizes(fchild) or mismatch
3703 def Exec(self, feedback_fn):
3704 """Verify the size of cluster disks.
3707 # TODO: check child disks too
3708 # TODO: check differences in size between primary/secondary nodes
3710 for instance in self.wanted_instances:
3711 pnode = instance.primary_node
3712 if pnode not in per_node_disks:
3713 per_node_disks[pnode] = []
3714 for idx, disk in enumerate(instance.disks):
3715 per_node_disks[pnode].append((instance, idx, disk))
3717 assert not (frozenset(per_node_disks.keys()) -
3718 self.owned_locks(locking.LEVEL_NODE_RES)), \
3719 "Not owning correct locks"
3720 assert not self.owned_locks(locking.LEVEL_NODE)
3723 for node, dskl in per_node_disks.items():
3724 newl = [v[2].Copy() for v in dskl]
3726 self.cfg.SetDiskID(dsk, node)
3727 result = self.rpc.call_blockdev_getsize(node, newl)
3729 self.LogWarning("Failure in blockdev_getsize call to node"
3730 " %s, ignoring", node)
3732 if len(result.payload) != len(dskl):
3733 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3734 " result.payload=%s", node, len(dskl), result.payload)
3735 self.LogWarning("Invalid result from node %s, ignoring node results",
3738 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3740 self.LogWarning("Disk %d of instance %s did not return size"
3741 " information, ignoring", idx, instance.name)
3743 if not isinstance(size, (int, long)):
3744 self.LogWarning("Disk %d of instance %s did not return valid"
3745 " size information, ignoring", idx, instance.name)
3748 if size != disk.size:
3749 self.LogInfo("Disk %d of instance %s has mismatched size,"
3750 " correcting: recorded %d, actual %d", idx,
3751 instance.name, disk.size, size)
3753 self.cfg.Update(instance, feedback_fn)
3754 changed.append((instance.name, idx, size))
3755 if self._EnsureChildSizes(disk):
3756 self.cfg.Update(instance, feedback_fn)
3757 changed.append((instance.name, idx, disk.size))
3761 class LUClusterRename(LogicalUnit):
3762 """Rename the cluster.
3765 HPATH = "cluster-rename"
3766 HTYPE = constants.HTYPE_CLUSTER
3768 def BuildHooksEnv(self):
3773 "OP_TARGET": self.cfg.GetClusterName(),
3774 "NEW_NAME": self.op.name,
3777 def BuildHooksNodes(self):
3778 """Build hooks nodes.
3781 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3783 def CheckPrereq(self):
3784 """Verify that the passed name is a valid one.
3787 hostname = netutils.GetHostname(name=self.op.name,
3788 family=self.cfg.GetPrimaryIPFamily())
3790 new_name = hostname.name
3791 self.ip = new_ip = hostname.ip
3792 old_name = self.cfg.GetClusterName()
3793 old_ip = self.cfg.GetMasterIP()
3794 if new_name == old_name and new_ip == old_ip:
3795 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3796 " cluster has changed",
3798 if new_ip != old_ip:
3799 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3800 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3801 " reachable on the network" %
3802 new_ip, errors.ECODE_NOTUNIQUE)
3804 self.op.name = new_name
3806 def Exec(self, feedback_fn):
3807 """Rename the cluster.
3810 clustername = self.op.name
3813 # shutdown the master IP
3814 master_params = self.cfg.GetMasterNetworkParameters()
3815 ems = self.cfg.GetUseExternalMipScript()
3816 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3818 result.Raise("Could not disable the master role")
3821 cluster = self.cfg.GetClusterInfo()
3822 cluster.cluster_name = clustername
3823 cluster.master_ip = new_ip
3824 self.cfg.Update(cluster, feedback_fn)
3826 # update the known hosts file
3827 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3828 node_list = self.cfg.GetOnlineNodeList()
3830 node_list.remove(master_params.name)
3833 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3835 master_params.ip = new_ip
3836 result = self.rpc.call_node_activate_master_ip(master_params.name,
3838 msg = result.fail_msg
3840 self.LogWarning("Could not re-enable the master role on"
3841 " the master, please restart manually: %s", msg)
3846 def _ValidateNetmask(cfg, netmask):
3847 """Checks if a netmask is valid.
3849 @type cfg: L{config.ConfigWriter}
3850 @param cfg: The cluster configuration
3852 @param netmask: the netmask to be verified
3853 @raise errors.OpPrereqError: if the validation fails
3856 ip_family = cfg.GetPrimaryIPFamily()
3858 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3859 except errors.ProgrammerError:
3860 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3862 if not ipcls.ValidateNetmask(netmask):
3863 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3867 class LUClusterSetParams(LogicalUnit):
3868 """Change the parameters of the cluster.
3871 HPATH = "cluster-modify"
3872 HTYPE = constants.HTYPE_CLUSTER
3875 def CheckArguments(self):
3879 if self.op.uid_pool:
3880 uidpool.CheckUidPool(self.op.uid_pool)
3882 if self.op.add_uids:
3883 uidpool.CheckUidPool(self.op.add_uids)
3885 if self.op.remove_uids:
3886 uidpool.CheckUidPool(self.op.remove_uids)
3888 if self.op.master_netmask is not None:
3889 _ValidateNetmask(self.cfg, self.op.master_netmask)
3891 if self.op.diskparams:
3892 for dt_params in self.op.diskparams.values():
3893 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3895 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3896 except errors.OpPrereqError, err:
3897 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3900 def ExpandNames(self):
3901 # FIXME: in the future maybe other cluster params won't require checking on
3902 # all nodes to be modified.
3903 self.needed_locks = {
3904 locking.LEVEL_NODE: locking.ALL_SET,
3905 locking.LEVEL_INSTANCE: locking.ALL_SET,
3906 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3908 self.share_locks = {
3909 locking.LEVEL_NODE: 1,
3910 locking.LEVEL_INSTANCE: 1,
3911 locking.LEVEL_NODEGROUP: 1,
3914 def BuildHooksEnv(self):
3919 "OP_TARGET": self.cfg.GetClusterName(),
3920 "NEW_VG_NAME": self.op.vg_name,
3923 def BuildHooksNodes(self):
3924 """Build hooks nodes.
3927 mn = self.cfg.GetMasterNode()
3930 def CheckPrereq(self):
3931 """Check prerequisites.
3933 This checks whether the given params don't conflict and
3934 if the given volume group is valid.
3937 if self.op.vg_name is not None and not self.op.vg_name:
3938 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3939 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3940 " instances exist", errors.ECODE_INVAL)
3942 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3943 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3944 raise errors.OpPrereqError("Cannot disable drbd helper while"
3945 " drbd-based instances exist",
3948 node_list = self.owned_locks(locking.LEVEL_NODE)
3950 # if vg_name not None, checks given volume group on all nodes
3952 vglist = self.rpc.call_vg_list(node_list)
3953 for node in node_list:
3954 msg = vglist[node].fail_msg
3956 # ignoring down node
3957 self.LogWarning("Error while gathering data on node %s"
3958 " (ignoring node): %s", node, msg)
3960 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3962 constants.MIN_VG_SIZE)
3964 raise errors.OpPrereqError("Error on node '%s': %s" %
3965 (node, vgstatus), errors.ECODE_ENVIRON)
3967 if self.op.drbd_helper:
3968 # checks given drbd helper on all nodes
3969 helpers = self.rpc.call_drbd_helper(node_list)
3970 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3972 self.LogInfo("Not checking drbd helper on offline node %s", node)
3974 msg = helpers[node].fail_msg
3976 raise errors.OpPrereqError("Error checking drbd helper on node"
3977 " '%s': %s" % (node, msg),
3978 errors.ECODE_ENVIRON)
3979 node_helper = helpers[node].payload
3980 if node_helper != self.op.drbd_helper:
3981 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3982 (node, node_helper), errors.ECODE_ENVIRON)
3984 self.cluster = cluster = self.cfg.GetClusterInfo()
3985 # validate params changes
3986 if self.op.beparams:
3987 objects.UpgradeBeParams(self.op.beparams)
3988 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3989 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3991 if self.op.ndparams:
3992 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3993 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3995 # TODO: we need a more general way to handle resetting
3996 # cluster-level parameters to default values
3997 if self.new_ndparams["oob_program"] == "":
3998 self.new_ndparams["oob_program"] = \
3999 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4001 if self.op.hv_state:
4002 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4003 self.cluster.hv_state_static)
4004 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4005 for hv, values in new_hv_state.items())
4007 if self.op.disk_state:
4008 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4009 self.cluster.disk_state_static)
4010 self.new_disk_state = \
4011 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4012 for name, values in svalues.items()))
4013 for storage, svalues in new_disk_state.items())
4016 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4019 all_instances = self.cfg.GetAllInstancesInfo().values()
4021 for group in self.cfg.GetAllNodeGroupsInfo().values():
4022 instances = frozenset([inst for inst in all_instances
4023 if compat.any(node in group.members
4024 for node in inst.all_nodes)])
4025 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4026 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4028 new_ipolicy, instances)
4030 violations.update(new)
4033 self.LogWarning("After the ipolicy change the following instances"
4034 " violate them: %s",
4035 utils.CommaJoin(utils.NiceSort(violations)))
4037 if self.op.nicparams:
4038 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4039 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4040 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4043 # check all instances for consistency
4044 for instance in self.cfg.GetAllInstancesInfo().values():
4045 for nic_idx, nic in enumerate(instance.nics):
4046 params_copy = copy.deepcopy(nic.nicparams)
4047 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4049 # check parameter syntax
4051 objects.NIC.CheckParameterSyntax(params_filled)
4052 except errors.ConfigurationError, err:
4053 nic_errors.append("Instance %s, nic/%d: %s" %
4054 (instance.name, nic_idx, err))
4056 # if we're moving instances to routed, check that they have an ip
4057 target_mode = params_filled[constants.NIC_MODE]
4058 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4059 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4060 " address" % (instance.name, nic_idx))
4062 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4063 "\n".join(nic_errors))
4065 # hypervisor list/parameters
4066 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4067 if self.op.hvparams:
4068 for hv_name, hv_dict in self.op.hvparams.items():
4069 if hv_name not in self.new_hvparams:
4070 self.new_hvparams[hv_name] = hv_dict
4072 self.new_hvparams[hv_name].update(hv_dict)
4074 # disk template parameters
4075 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4076 if self.op.diskparams:
4077 for dt_name, dt_params in self.op.diskparams.items():
4078 if dt_name not in self.op.diskparams:
4079 self.new_diskparams[dt_name] = dt_params
4081 self.new_diskparams[dt_name].update(dt_params)
4083 # os hypervisor parameters
4084 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4086 for os_name, hvs in self.op.os_hvp.items():
4087 if os_name not in self.new_os_hvp:
4088 self.new_os_hvp[os_name] = hvs
4090 for hv_name, hv_dict in hvs.items():
4091 if hv_name not in self.new_os_hvp[os_name]:
4092 self.new_os_hvp[os_name][hv_name] = hv_dict
4094 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4097 self.new_osp = objects.FillDict(cluster.osparams, {})
4098 if self.op.osparams:
4099 for os_name, osp in self.op.osparams.items():
4100 if os_name not in self.new_osp:
4101 self.new_osp[os_name] = {}
4103 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4106 if not self.new_osp[os_name]:
4107 # we removed all parameters
4108 del self.new_osp[os_name]
4110 # check the parameter validity (remote check)
4111 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4112 os_name, self.new_osp[os_name])
4114 # changes to the hypervisor list
4115 if self.op.enabled_hypervisors is not None:
4116 self.hv_list = self.op.enabled_hypervisors
4117 for hv in self.hv_list:
4118 # if the hypervisor doesn't already exist in the cluster
4119 # hvparams, we initialize it to empty, and then (in both
4120 # cases) we make sure to fill the defaults, as we might not
4121 # have a complete defaults list if the hypervisor wasn't
4123 if hv not in new_hvp:
4125 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4126 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4128 self.hv_list = cluster.enabled_hypervisors
4130 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4131 # either the enabled list has changed, or the parameters have, validate
4132 for hv_name, hv_params in self.new_hvparams.items():
4133 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4134 (self.op.enabled_hypervisors and
4135 hv_name in self.op.enabled_hypervisors)):
4136 # either this is a new hypervisor, or its parameters have changed
4137 hv_class = hypervisor.GetHypervisor(hv_name)
4138 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4139 hv_class.CheckParameterSyntax(hv_params)
4140 _CheckHVParams(self, node_list, hv_name, hv_params)
4143 # no need to check any newly-enabled hypervisors, since the
4144 # defaults have already been checked in the above code-block
4145 for os_name, os_hvp in self.new_os_hvp.items():
4146 for hv_name, hv_params in os_hvp.items():
4147 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4148 # we need to fill in the new os_hvp on top of the actual hv_p
4149 cluster_defaults = self.new_hvparams.get(hv_name, {})
4150 new_osp = objects.FillDict(cluster_defaults, hv_params)
4151 hv_class = hypervisor.GetHypervisor(hv_name)
4152 hv_class.CheckParameterSyntax(new_osp)
4153 _CheckHVParams(self, node_list, hv_name, new_osp)
4155 if self.op.default_iallocator:
4156 alloc_script = utils.FindFile(self.op.default_iallocator,
4157 constants.IALLOCATOR_SEARCH_PATH,
4159 if alloc_script is None:
4160 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4161 " specified" % self.op.default_iallocator,
4164 def Exec(self, feedback_fn):
4165 """Change the parameters of the cluster.
4168 if self.op.vg_name is not None:
4169 new_volume = self.op.vg_name
4172 if new_volume != self.cfg.GetVGName():
4173 self.cfg.SetVGName(new_volume)
4175 feedback_fn("Cluster LVM configuration already in desired"
4176 " state, not changing")
4177 if self.op.drbd_helper is not None:
4178 new_helper = self.op.drbd_helper
4181 if new_helper != self.cfg.GetDRBDHelper():
4182 self.cfg.SetDRBDHelper(new_helper)
4184 feedback_fn("Cluster DRBD helper already in desired state,"
4186 if self.op.hvparams:
4187 self.cluster.hvparams = self.new_hvparams
4189 self.cluster.os_hvp = self.new_os_hvp
4190 if self.op.enabled_hypervisors is not None:
4191 self.cluster.hvparams = self.new_hvparams
4192 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4193 if self.op.beparams:
4194 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4195 if self.op.nicparams:
4196 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4198 self.cluster.ipolicy = self.new_ipolicy
4199 if self.op.osparams:
4200 self.cluster.osparams = self.new_osp
4201 if self.op.ndparams:
4202 self.cluster.ndparams = self.new_ndparams
4203 if self.op.diskparams:
4204 self.cluster.diskparams = self.new_diskparams
4205 if self.op.hv_state:
4206 self.cluster.hv_state_static = self.new_hv_state
4207 if self.op.disk_state:
4208 self.cluster.disk_state_static = self.new_disk_state
4210 if self.op.candidate_pool_size is not None:
4211 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4212 # we need to update the pool size here, otherwise the save will fail
4213 _AdjustCandidatePool(self, [])
4215 if self.op.maintain_node_health is not None:
4216 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4217 feedback_fn("Note: CONFD was disabled at build time, node health"
4218 " maintenance is not useful (still enabling it)")
4219 self.cluster.maintain_node_health = self.op.maintain_node_health
4221 if self.op.prealloc_wipe_disks is not None:
4222 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4224 if self.op.add_uids is not None:
4225 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4227 if self.op.remove_uids is not None:
4228 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4230 if self.op.uid_pool is not None:
4231 self.cluster.uid_pool = self.op.uid_pool
4233 if self.op.default_iallocator is not None:
4234 self.cluster.default_iallocator = self.op.default_iallocator
4236 if self.op.reserved_lvs is not None:
4237 self.cluster.reserved_lvs = self.op.reserved_lvs
4239 if self.op.use_external_mip_script is not None:
4240 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4242 def helper_os(aname, mods, desc):
4244 lst = getattr(self.cluster, aname)
4245 for key, val in mods:
4246 if key == constants.DDM_ADD:
4248 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4251 elif key == constants.DDM_REMOVE:
4255 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4257 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4259 if self.op.hidden_os:
4260 helper_os("hidden_os", self.op.hidden_os, "hidden")
4262 if self.op.blacklisted_os:
4263 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4265 if self.op.master_netdev:
4266 master_params = self.cfg.GetMasterNetworkParameters()
4267 ems = self.cfg.GetUseExternalMipScript()
4268 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4269 self.cluster.master_netdev)
4270 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4272 result.Raise("Could not disable the master ip")
4273 feedback_fn("Changing master_netdev from %s to %s" %
4274 (master_params.netdev, self.op.master_netdev))
4275 self.cluster.master_netdev = self.op.master_netdev
4277 if self.op.master_netmask:
4278 master_params = self.cfg.GetMasterNetworkParameters()
4279 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4280 result = self.rpc.call_node_change_master_netmask(master_params.name,
4281 master_params.netmask,
4282 self.op.master_netmask,
4284 master_params.netdev)
4286 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4289 self.cluster.master_netmask = self.op.master_netmask
4291 self.cfg.Update(self.cluster, feedback_fn)
4293 if self.op.master_netdev:
4294 master_params = self.cfg.GetMasterNetworkParameters()
4295 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4296 self.op.master_netdev)
4297 ems = self.cfg.GetUseExternalMipScript()
4298 result = self.rpc.call_node_activate_master_ip(master_params.name,
4301 self.LogWarning("Could not re-enable the master ip on"
4302 " the master, please restart manually: %s",
4306 def _UploadHelper(lu, nodes, fname):
4307 """Helper for uploading a file and showing warnings.
4310 if os.path.exists(fname):
4311 result = lu.rpc.call_upload_file(nodes, fname)
4312 for to_node, to_result in result.items():
4313 msg = to_result.fail_msg
4315 msg = ("Copy of file %s to node %s failed: %s" %
4316 (fname, to_node, msg))
4317 lu.proc.LogWarning(msg)
4320 def _ComputeAncillaryFiles(cluster, redist):
4321 """Compute files external to Ganeti which need to be consistent.
4323 @type redist: boolean
4324 @param redist: Whether to include files which need to be redistributed
4327 # Compute files for all nodes
4329 constants.SSH_KNOWN_HOSTS_FILE,
4330 constants.CONFD_HMAC_KEY,
4331 constants.CLUSTER_DOMAIN_SECRET_FILE,
4332 constants.SPICE_CERT_FILE,
4333 constants.SPICE_CACERT_FILE,
4334 constants.RAPI_USERS_FILE,
4338 files_all.update(constants.ALL_CERT_FILES)
4339 files_all.update(ssconf.SimpleStore().GetFileList())
4341 # we need to ship at least the RAPI certificate
4342 files_all.add(constants.RAPI_CERT_FILE)
4344 if cluster.modify_etc_hosts:
4345 files_all.add(constants.ETC_HOSTS)
4347 if cluster.use_external_mip_script:
4348 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4350 # Files which are optional, these must:
4351 # - be present in one other category as well
4352 # - either exist or not exist on all nodes of that category (mc, vm all)
4354 constants.RAPI_USERS_FILE,
4357 # Files which should only be on master candidates
4361 files_mc.add(constants.CLUSTER_CONF_FILE)
4363 # Files which should only be on VM-capable nodes
4364 files_vm = set(filename
4365 for hv_name in cluster.enabled_hypervisors
4366 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4368 files_opt |= set(filename
4369 for hv_name in cluster.enabled_hypervisors
4370 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4372 # Filenames in each category must be unique
4373 all_files_set = files_all | files_mc | files_vm
4374 assert (len(all_files_set) ==
4375 sum(map(len, [files_all, files_mc, files_vm]))), \
4376 "Found file listed in more than one file list"
4378 # Optional files must be present in one other category
4379 assert all_files_set.issuperset(files_opt), \
4380 "Optional file not in a different required list"
4382 return (files_all, files_opt, files_mc, files_vm)
4385 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4386 """Distribute additional files which are part of the cluster configuration.
4388 ConfigWriter takes care of distributing the config and ssconf files, but
4389 there are more files which should be distributed to all nodes. This function
4390 makes sure those are copied.
4392 @param lu: calling logical unit
4393 @param additional_nodes: list of nodes not in the config to distribute to
4394 @type additional_vm: boolean
4395 @param additional_vm: whether the additional nodes are vm-capable or not
4398 # Gather target nodes
4399 cluster = lu.cfg.GetClusterInfo()
4400 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4402 online_nodes = lu.cfg.GetOnlineNodeList()
4403 online_set = frozenset(online_nodes)
4404 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4406 if additional_nodes is not None:
4407 online_nodes.extend(additional_nodes)
4409 vm_nodes.extend(additional_nodes)
4411 # Never distribute to master node
4412 for nodelist in [online_nodes, vm_nodes]:
4413 if master_info.name in nodelist:
4414 nodelist.remove(master_info.name)
4417 (files_all, _, files_mc, files_vm) = \
4418 _ComputeAncillaryFiles(cluster, True)
4420 # Never re-distribute configuration file from here
4421 assert not (constants.CLUSTER_CONF_FILE in files_all or
4422 constants.CLUSTER_CONF_FILE in files_vm)
4423 assert not files_mc, "Master candidates not handled in this function"
4426 (online_nodes, files_all),
4427 (vm_nodes, files_vm),
4431 for (node_list, files) in filemap:
4433 _UploadHelper(lu, node_list, fname)
4436 class LUClusterRedistConf(NoHooksLU):
4437 """Force the redistribution of cluster configuration.
4439 This is a very simple LU.
4444 def ExpandNames(self):
4445 self.needed_locks = {
4446 locking.LEVEL_NODE: locking.ALL_SET,
4448 self.share_locks[locking.LEVEL_NODE] = 1
4450 def Exec(self, feedback_fn):
4451 """Redistribute the configuration.
4454 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4455 _RedistributeAncillaryFiles(self)
4458 class LUClusterActivateMasterIp(NoHooksLU):
4459 """Activate the master IP on the master node.
4462 def Exec(self, feedback_fn):
4463 """Activate the master IP.
4466 master_params = self.cfg.GetMasterNetworkParameters()
4467 ems = self.cfg.GetUseExternalMipScript()
4468 result = self.rpc.call_node_activate_master_ip(master_params.name,
4470 result.Raise("Could not activate the master IP")
4473 class LUClusterDeactivateMasterIp(NoHooksLU):
4474 """Deactivate the master IP on the master node.
4477 def Exec(self, feedback_fn):
4478 """Deactivate the master IP.
4481 master_params = self.cfg.GetMasterNetworkParameters()
4482 ems = self.cfg.GetUseExternalMipScript()
4483 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4485 result.Raise("Could not deactivate the master IP")
4488 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4489 """Sleep and poll for an instance's disk to sync.
4492 if not instance.disks or disks is not None and not disks:
4495 disks = _ExpandCheckDisks(instance, disks)
4498 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4500 node = instance.primary_node
4503 lu.cfg.SetDiskID(dev, node)
4505 # TODO: Convert to utils.Retry
4508 degr_retries = 10 # in seconds, as we sleep 1 second each time
4512 cumul_degraded = False
4513 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4514 msg = rstats.fail_msg
4516 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4519 raise errors.RemoteError("Can't contact node %s for mirror data,"
4520 " aborting." % node)
4523 rstats = rstats.payload
4525 for i, mstat in enumerate(rstats):
4527 lu.LogWarning("Can't compute data for node %s/%s",
4528 node, disks[i].iv_name)
4531 cumul_degraded = (cumul_degraded or
4532 (mstat.is_degraded and mstat.sync_percent is None))
4533 if mstat.sync_percent is not None:
4535 if mstat.estimated_time is not None:
4536 rem_time = ("%s remaining (estimated)" %
4537 utils.FormatSeconds(mstat.estimated_time))
4538 max_time = mstat.estimated_time
4540 rem_time = "no time estimate"
4541 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4542 (disks[i].iv_name, mstat.sync_percent, rem_time))
4544 # if we're done but degraded, let's do a few small retries, to
4545 # make sure we see a stable and not transient situation; therefore
4546 # we force restart of the loop
4547 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4548 logging.info("Degraded disks found, %d retries left", degr_retries)
4556 time.sleep(min(60, max_time))
4559 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4560 return not cumul_degraded
4563 def _BlockdevFind(lu, node, dev, instance):
4564 """Wrapper around call_blockdev_find to annotate diskparams.
4566 @param lu: A reference to the lu object
4567 @param node: The node to call out
4568 @param dev: The device to find
4569 @param instance: The instance object the device belongs to
4570 @returns The result of the rpc call
4573 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4574 return lu.rpc.call_blockdev_find(node, disk)
4577 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4578 """Wrapper around L{_CheckDiskConsistencyInner}.
4581 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4582 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4586 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4588 """Check that mirrors are not degraded.
4590 @attention: The device has to be annotated already.
4592 The ldisk parameter, if True, will change the test from the
4593 is_degraded attribute (which represents overall non-ok status for
4594 the device(s)) to the ldisk (representing the local storage status).
4597 lu.cfg.SetDiskID(dev, node)
4601 if on_primary or dev.AssembleOnSecondary():
4602 rstats = lu.rpc.call_blockdev_find(node, dev)
4603 msg = rstats.fail_msg
4605 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4607 elif not rstats.payload:
4608 lu.LogWarning("Can't find disk on node %s", node)
4612 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4614 result = result and not rstats.payload.is_degraded
4617 for child in dev.children:
4618 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4624 class LUOobCommand(NoHooksLU):
4625 """Logical unit for OOB handling.
4629 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4631 def ExpandNames(self):
4632 """Gather locks we need.
4635 if self.op.node_names:
4636 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4637 lock_names = self.op.node_names
4639 lock_names = locking.ALL_SET
4641 self.needed_locks = {
4642 locking.LEVEL_NODE: lock_names,
4645 def CheckPrereq(self):
4646 """Check prerequisites.
4649 - the node exists in the configuration
4652 Any errors are signaled by raising errors.OpPrereqError.
4656 self.master_node = self.cfg.GetMasterNode()
4658 assert self.op.power_delay >= 0.0
4660 if self.op.node_names:
4661 if (self.op.command in self._SKIP_MASTER and
4662 self.master_node in self.op.node_names):
4663 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4664 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4666 if master_oob_handler:
4667 additional_text = ("run '%s %s %s' if you want to operate on the"
4668 " master regardless") % (master_oob_handler,
4672 additional_text = "it does not support out-of-band operations"
4674 raise errors.OpPrereqError(("Operating on the master node %s is not"
4675 " allowed for %s; %s") %
4676 (self.master_node, self.op.command,
4677 additional_text), errors.ECODE_INVAL)
4679 self.op.node_names = self.cfg.GetNodeList()
4680 if self.op.command in self._SKIP_MASTER:
4681 self.op.node_names.remove(self.master_node)
4683 if self.op.command in self._SKIP_MASTER:
4684 assert self.master_node not in self.op.node_names
4686 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4688 raise errors.OpPrereqError("Node %s not found" % node_name,
4691 self.nodes.append(node)
4693 if (not self.op.ignore_status and
4694 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4695 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4696 " not marked offline") % node_name,
4699 def Exec(self, feedback_fn):
4700 """Execute OOB and return result if we expect any.
4703 master_node = self.master_node
4706 for idx, node in enumerate(utils.NiceSort(self.nodes,
4707 key=lambda node: node.name)):
4708 node_entry = [(constants.RS_NORMAL, node.name)]
4709 ret.append(node_entry)
4711 oob_program = _SupportsOob(self.cfg, node)
4714 node_entry.append((constants.RS_UNAVAIL, None))
4717 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4718 self.op.command, oob_program, node.name)
4719 result = self.rpc.call_run_oob(master_node, oob_program,
4720 self.op.command, node.name,
4724 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4725 node.name, result.fail_msg)
4726 node_entry.append((constants.RS_NODATA, None))
4729 self._CheckPayload(result)
4730 except errors.OpExecError, err:
4731 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4733 node_entry.append((constants.RS_NODATA, None))
4735 if self.op.command == constants.OOB_HEALTH:
4736 # For health we should log important events
4737 for item, status in result.payload:
4738 if status in [constants.OOB_STATUS_WARNING,
4739 constants.OOB_STATUS_CRITICAL]:
4740 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4741 item, node.name, status)
4743 if self.op.command == constants.OOB_POWER_ON:
4745 elif self.op.command == constants.OOB_POWER_OFF:
4746 node.powered = False
4747 elif self.op.command == constants.OOB_POWER_STATUS:
4748 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4749 if powered != node.powered:
4750 logging.warning(("Recorded power state (%s) of node '%s' does not"
4751 " match actual power state (%s)"), node.powered,
4754 # For configuration changing commands we should update the node
4755 if self.op.command in (constants.OOB_POWER_ON,
4756 constants.OOB_POWER_OFF):
4757 self.cfg.Update(node, feedback_fn)
4759 node_entry.append((constants.RS_NORMAL, result.payload))
4761 if (self.op.command == constants.OOB_POWER_ON and
4762 idx < len(self.nodes) - 1):
4763 time.sleep(self.op.power_delay)
4767 def _CheckPayload(self, result):
4768 """Checks if the payload is valid.
4770 @param result: RPC result
4771 @raises errors.OpExecError: If payload is not valid
4775 if self.op.command == constants.OOB_HEALTH:
4776 if not isinstance(result.payload, list):
4777 errs.append("command 'health' is expected to return a list but got %s" %
4778 type(result.payload))
4780 for item, status in result.payload:
4781 if status not in constants.OOB_STATUSES:
4782 errs.append("health item '%s' has invalid status '%s'" %
4785 if self.op.command == constants.OOB_POWER_STATUS:
4786 if not isinstance(result.payload, dict):
4787 errs.append("power-status is expected to return a dict but got %s" %
4788 type(result.payload))
4790 if self.op.command in [
4791 constants.OOB_POWER_ON,
4792 constants.OOB_POWER_OFF,
4793 constants.OOB_POWER_CYCLE,
4795 if result.payload is not None:
4796 errs.append("%s is expected to not return payload but got '%s'" %
4797 (self.op.command, result.payload))
4800 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4801 utils.CommaJoin(errs))
4804 class _OsQuery(_QueryBase):
4805 FIELDS = query.OS_FIELDS
4807 def ExpandNames(self, lu):
4808 # Lock all nodes in shared mode
4809 # Temporary removal of locks, should be reverted later
4810 # TODO: reintroduce locks when they are lighter-weight
4811 lu.needed_locks = {}
4812 #self.share_locks[locking.LEVEL_NODE] = 1
4813 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4815 # The following variables interact with _QueryBase._GetNames
4817 self.wanted = self.names
4819 self.wanted = locking.ALL_SET
4821 self.do_locking = self.use_locking
4823 def DeclareLocks(self, lu, level):
4827 def _DiagnoseByOS(rlist):
4828 """Remaps a per-node return list into an a per-os per-node dictionary
4830 @param rlist: a map with node names as keys and OS objects as values
4833 @return: a dictionary with osnames as keys and as value another
4834 map, with nodes as keys and tuples of (path, status, diagnose,
4835 variants, parameters, api_versions) as values, eg::
4837 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4838 (/srv/..., False, "invalid api")],
4839 "node2": [(/srv/..., True, "", [], [])]}
4844 # we build here the list of nodes that didn't fail the RPC (at RPC
4845 # level), so that nodes with a non-responding node daemon don't
4846 # make all OSes invalid
4847 good_nodes = [node_name for node_name in rlist
4848 if not rlist[node_name].fail_msg]
4849 for node_name, nr in rlist.items():
4850 if nr.fail_msg or not nr.payload:
4852 for (name, path, status, diagnose, variants,
4853 params, api_versions) in nr.payload:
4854 if name not in all_os:
4855 # build a list of nodes for this os containing empty lists
4856 # for each node in node_list
4858 for nname in good_nodes:
4859 all_os[name][nname] = []
4860 # convert params from [name, help] to (name, help)
4861 params = [tuple(v) for v in params]
4862 all_os[name][node_name].append((path, status, diagnose,
4863 variants, params, api_versions))
4866 def _GetQueryData(self, lu):
4867 """Computes the list of nodes and their attributes.
4870 # Locking is not used
4871 assert not (compat.any(lu.glm.is_owned(level)
4872 for level in locking.LEVELS
4873 if level != locking.LEVEL_CLUSTER) or
4874 self.do_locking or self.use_locking)
4876 valid_nodes = [node.name
4877 for node in lu.cfg.GetAllNodesInfo().values()
4878 if not node.offline and node.vm_capable]
4879 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4880 cluster = lu.cfg.GetClusterInfo()
4884 for (os_name, os_data) in pol.items():
4885 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4886 hidden=(os_name in cluster.hidden_os),
4887 blacklisted=(os_name in cluster.blacklisted_os))
4891 api_versions = set()
4893 for idx, osl in enumerate(os_data.values()):
4894 info.valid = bool(info.valid and osl and osl[0][1])
4898 (node_variants, node_params, node_api) = osl[0][3:6]
4901 variants.update(node_variants)
4902 parameters.update(node_params)
4903 api_versions.update(node_api)
4905 # Filter out inconsistent values
4906 variants.intersection_update(node_variants)
4907 parameters.intersection_update(node_params)
4908 api_versions.intersection_update(node_api)
4910 info.variants = list(variants)
4911 info.parameters = list(parameters)
4912 info.api_versions = list(api_versions)
4914 data[os_name] = info
4916 # Prepare data in requested order
4917 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4921 class LUOsDiagnose(NoHooksLU):
4922 """Logical unit for OS diagnose/query.
4928 def _BuildFilter(fields, names):
4929 """Builds a filter for querying OSes.
4932 name_filter = qlang.MakeSimpleFilter("name", names)
4934 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4935 # respective field is not requested
4936 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4937 for fname in ["hidden", "blacklisted"]
4938 if fname not in fields]
4939 if "valid" not in fields:
4940 status_filter.append([qlang.OP_TRUE, "valid"])
4943 status_filter.insert(0, qlang.OP_AND)
4945 status_filter = None
4947 if name_filter and status_filter:
4948 return [qlang.OP_AND, name_filter, status_filter]
4952 return status_filter
4954 def CheckArguments(self):
4955 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4956 self.op.output_fields, False)
4958 def ExpandNames(self):
4959 self.oq.ExpandNames(self)
4961 def Exec(self, feedback_fn):
4962 return self.oq.OldStyleQuery(self)
4965 class LUNodeRemove(LogicalUnit):
4966 """Logical unit for removing a node.
4969 HPATH = "node-remove"
4970 HTYPE = constants.HTYPE_NODE
4972 def BuildHooksEnv(self):
4977 "OP_TARGET": self.op.node_name,
4978 "NODE_NAME": self.op.node_name,
4981 def BuildHooksNodes(self):
4982 """Build hooks nodes.
4984 This doesn't run on the target node in the pre phase as a failed
4985 node would then be impossible to remove.
4988 all_nodes = self.cfg.GetNodeList()
4990 all_nodes.remove(self.op.node_name)
4993 return (all_nodes, all_nodes)
4995 def CheckPrereq(self):
4996 """Check prerequisites.
4999 - the node exists in the configuration
5000 - it does not have primary or secondary instances
5001 - it's not the master
5003 Any errors are signaled by raising errors.OpPrereqError.
5006 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5007 node = self.cfg.GetNodeInfo(self.op.node_name)
5008 assert node is not None
5010 masternode = self.cfg.GetMasterNode()
5011 if node.name == masternode:
5012 raise errors.OpPrereqError("Node is the master node, failover to another"
5013 " node is required", errors.ECODE_INVAL)
5015 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5016 if node.name in instance.all_nodes:
5017 raise errors.OpPrereqError("Instance %s is still running on the node,"
5018 " please remove first" % instance_name,
5020 self.op.node_name = node.name
5023 def Exec(self, feedback_fn):
5024 """Removes the node from the cluster.
5028 logging.info("Stopping the node daemon and removing configs from node %s",
5031 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5033 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5036 # Promote nodes to master candidate as needed
5037 _AdjustCandidatePool(self, exceptions=[node.name])
5038 self.context.RemoveNode(node.name)
5040 # Run post hooks on the node before it's removed
5041 _RunPostHook(self, node.name)
5043 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5044 msg = result.fail_msg
5046 self.LogWarning("Errors encountered on the remote node while leaving"
5047 " the cluster: %s", msg)
5049 # Remove node from our /etc/hosts
5050 if self.cfg.GetClusterInfo().modify_etc_hosts:
5051 master_node = self.cfg.GetMasterNode()
5052 result = self.rpc.call_etc_hosts_modify(master_node,
5053 constants.ETC_HOSTS_REMOVE,
5055 result.Raise("Can't update hosts file with new host data")
5056 _RedistributeAncillaryFiles(self)
5059 class _NodeQuery(_QueryBase):
5060 FIELDS = query.NODE_FIELDS
5062 def ExpandNames(self, lu):
5063 lu.needed_locks = {}
5064 lu.share_locks = _ShareAll()
5067 self.wanted = _GetWantedNodes(lu, self.names)
5069 self.wanted = locking.ALL_SET
5071 self.do_locking = (self.use_locking and
5072 query.NQ_LIVE in self.requested_data)
5075 # If any non-static field is requested we need to lock the nodes
5076 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5078 def DeclareLocks(self, lu, level):
5081 def _GetQueryData(self, lu):
5082 """Computes the list of nodes and their attributes.
5085 all_info = lu.cfg.GetAllNodesInfo()
5087 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5089 # Gather data as requested
5090 if query.NQ_LIVE in self.requested_data:
5091 # filter out non-vm_capable nodes
5092 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5094 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5095 [lu.cfg.GetHypervisorType()])
5096 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5097 for (name, nresult) in node_data.items()
5098 if not nresult.fail_msg and nresult.payload)
5102 if query.NQ_INST in self.requested_data:
5103 node_to_primary = dict([(name, set()) for name in nodenames])
5104 node_to_secondary = dict([(name, set()) for name in nodenames])
5106 inst_data = lu.cfg.GetAllInstancesInfo()
5108 for inst in inst_data.values():
5109 if inst.primary_node in node_to_primary:
5110 node_to_primary[inst.primary_node].add(inst.name)
5111 for secnode in inst.secondary_nodes:
5112 if secnode in node_to_secondary:
5113 node_to_secondary[secnode].add(inst.name)
5115 node_to_primary = None
5116 node_to_secondary = None
5118 if query.NQ_OOB in self.requested_data:
5119 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5120 for name, node in all_info.iteritems())
5124 if query.NQ_GROUP in self.requested_data:
5125 groups = lu.cfg.GetAllNodeGroupsInfo()
5129 return query.NodeQueryData([all_info[name] for name in nodenames],
5130 live_data, lu.cfg.GetMasterNode(),
5131 node_to_primary, node_to_secondary, groups,
5132 oob_support, lu.cfg.GetClusterInfo())
5135 class LUNodeQuery(NoHooksLU):
5136 """Logical unit for querying nodes.
5139 # pylint: disable=W0142
5142 def CheckArguments(self):
5143 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5144 self.op.output_fields, self.op.use_locking)
5146 def ExpandNames(self):
5147 self.nq.ExpandNames(self)
5149 def DeclareLocks(self, level):
5150 self.nq.DeclareLocks(self, level)
5152 def Exec(self, feedback_fn):
5153 return self.nq.OldStyleQuery(self)
5156 class LUNodeQueryvols(NoHooksLU):
5157 """Logical unit for getting volumes on node(s).
5161 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5162 _FIELDS_STATIC = utils.FieldSet("node")
5164 def CheckArguments(self):
5165 _CheckOutputFields(static=self._FIELDS_STATIC,
5166 dynamic=self._FIELDS_DYNAMIC,
5167 selected=self.op.output_fields)
5169 def ExpandNames(self):
5170 self.share_locks = _ShareAll()
5171 self.needed_locks = {}
5173 if not self.op.nodes:
5174 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5176 self.needed_locks[locking.LEVEL_NODE] = \
5177 _GetWantedNodes(self, self.op.nodes)
5179 def Exec(self, feedback_fn):
5180 """Computes the list of nodes and their attributes.
5183 nodenames = self.owned_locks(locking.LEVEL_NODE)
5184 volumes = self.rpc.call_node_volumes(nodenames)
5186 ilist = self.cfg.GetAllInstancesInfo()
5187 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5190 for node in nodenames:
5191 nresult = volumes[node]
5194 msg = nresult.fail_msg
5196 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5199 node_vols = sorted(nresult.payload,
5200 key=operator.itemgetter("dev"))
5202 for vol in node_vols:
5204 for field in self.op.output_fields:
5207 elif field == "phys":
5211 elif field == "name":
5213 elif field == "size":
5214 val = int(float(vol["size"]))
5215 elif field == "instance":
5216 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5218 raise errors.ParameterError(field)
5219 node_output.append(str(val))
5221 output.append(node_output)
5226 class LUNodeQueryStorage(NoHooksLU):
5227 """Logical unit for getting information on storage units on node(s).
5230 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5233 def CheckArguments(self):
5234 _CheckOutputFields(static=self._FIELDS_STATIC,
5235 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5236 selected=self.op.output_fields)
5238 def ExpandNames(self):
5239 self.share_locks = _ShareAll()
5240 self.needed_locks = {}
5243 self.needed_locks[locking.LEVEL_NODE] = \
5244 _GetWantedNodes(self, self.op.nodes)
5246 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5248 def Exec(self, feedback_fn):
5249 """Computes the list of nodes and their attributes.
5252 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5254 # Always get name to sort by
5255 if constants.SF_NAME in self.op.output_fields:
5256 fields = self.op.output_fields[:]
5258 fields = [constants.SF_NAME] + self.op.output_fields
5260 # Never ask for node or type as it's only known to the LU
5261 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5262 while extra in fields:
5263 fields.remove(extra)
5265 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5266 name_idx = field_idx[constants.SF_NAME]
5268 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5269 data = self.rpc.call_storage_list(self.nodes,
5270 self.op.storage_type, st_args,
5271 self.op.name, fields)
5275 for node in utils.NiceSort(self.nodes):
5276 nresult = data[node]
5280 msg = nresult.fail_msg
5282 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5285 rows = dict([(row[name_idx], row) for row in nresult.payload])
5287 for name in utils.NiceSort(rows.keys()):
5292 for field in self.op.output_fields:
5293 if field == constants.SF_NODE:
5295 elif field == constants.SF_TYPE:
5296 val = self.op.storage_type
5297 elif field in field_idx:
5298 val = row[field_idx[field]]
5300 raise errors.ParameterError(field)
5309 class _InstanceQuery(_QueryBase):
5310 FIELDS = query.INSTANCE_FIELDS
5312 def ExpandNames(self, lu):
5313 lu.needed_locks = {}
5314 lu.share_locks = _ShareAll()
5317 self.wanted = _GetWantedInstances(lu, self.names)
5319 self.wanted = locking.ALL_SET
5321 self.do_locking = (self.use_locking and
5322 query.IQ_LIVE in self.requested_data)
5324 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5325 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5326 lu.needed_locks[locking.LEVEL_NODE] = []
5327 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5329 self.do_grouplocks = (self.do_locking and
5330 query.IQ_NODES in self.requested_data)
5332 def DeclareLocks(self, lu, level):
5334 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5335 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5337 # Lock all groups used by instances optimistically; this requires going
5338 # via the node before it's locked, requiring verification later on
5339 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5341 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5342 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5343 elif level == locking.LEVEL_NODE:
5344 lu._LockInstancesNodes() # pylint: disable=W0212
5347 def _CheckGroupLocks(lu):
5348 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5349 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5351 # Check if node groups for locked instances are still correct
5352 for instance_name in owned_instances:
5353 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5355 def _GetQueryData(self, lu):
5356 """Computes the list of instances and their attributes.
5359 if self.do_grouplocks:
5360 self._CheckGroupLocks(lu)
5362 cluster = lu.cfg.GetClusterInfo()
5363 all_info = lu.cfg.GetAllInstancesInfo()
5365 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5367 instance_list = [all_info[name] for name in instance_names]
5368 nodes = frozenset(itertools.chain(*(inst.all_nodes
5369 for inst in instance_list)))
5370 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5373 wrongnode_inst = set()
5375 # Gather data as requested
5376 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5378 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5380 result = node_data[name]
5382 # offline nodes will be in both lists
5383 assert result.fail_msg
5384 offline_nodes.append(name)
5386 bad_nodes.append(name)
5387 elif result.payload:
5388 for inst in result.payload:
5389 if inst in all_info:
5390 if all_info[inst].primary_node == name:
5391 live_data.update(result.payload)
5393 wrongnode_inst.add(inst)
5395 # orphan instance; we don't list it here as we don't
5396 # handle this case yet in the output of instance listing
5397 logging.warning("Orphan instance '%s' found on node %s",
5399 # else no instance is alive
5403 if query.IQ_DISKUSAGE in self.requested_data:
5404 disk_usage = dict((inst.name,
5405 _ComputeDiskSize(inst.disk_template,
5406 [{constants.IDISK_SIZE: disk.size}
5407 for disk in inst.disks]))
5408 for inst in instance_list)
5412 if query.IQ_CONSOLE in self.requested_data:
5414 for inst in instance_list:
5415 if inst.name in live_data:
5416 # Instance is running
5417 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5419 consinfo[inst.name] = None
5420 assert set(consinfo.keys()) == set(instance_names)
5424 if query.IQ_NODES in self.requested_data:
5425 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5427 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5428 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5429 for uuid in set(map(operator.attrgetter("group"),
5435 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5436 disk_usage, offline_nodes, bad_nodes,
5437 live_data, wrongnode_inst, consinfo,
5441 class LUQuery(NoHooksLU):
5442 """Query for resources/items of a certain kind.
5445 # pylint: disable=W0142
5448 def CheckArguments(self):
5449 qcls = _GetQueryImplementation(self.op.what)
5451 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5453 def ExpandNames(self):
5454 self.impl.ExpandNames(self)
5456 def DeclareLocks(self, level):
5457 self.impl.DeclareLocks(self, level)
5459 def Exec(self, feedback_fn):
5460 return self.impl.NewStyleQuery(self)
5463 class LUQueryFields(NoHooksLU):
5464 """Query for resources/items of a certain kind.
5467 # pylint: disable=W0142
5470 def CheckArguments(self):
5471 self.qcls = _GetQueryImplementation(self.op.what)
5473 def ExpandNames(self):
5474 self.needed_locks = {}
5476 def Exec(self, feedback_fn):
5477 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5480 class LUNodeModifyStorage(NoHooksLU):
5481 """Logical unit for modifying a storage volume on a node.
5486 def CheckArguments(self):
5487 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5489 storage_type = self.op.storage_type
5492 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5494 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5495 " modified" % storage_type,
5498 diff = set(self.op.changes.keys()) - modifiable
5500 raise errors.OpPrereqError("The following fields can not be modified for"
5501 " storage units of type '%s': %r" %
5502 (storage_type, list(diff)),
5505 def ExpandNames(self):
5506 self.needed_locks = {
5507 locking.LEVEL_NODE: self.op.node_name,
5510 def Exec(self, feedback_fn):
5511 """Computes the list of nodes and their attributes.
5514 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5515 result = self.rpc.call_storage_modify(self.op.node_name,
5516 self.op.storage_type, st_args,
5517 self.op.name, self.op.changes)
5518 result.Raise("Failed to modify storage unit '%s' on %s" %
5519 (self.op.name, self.op.node_name))
5522 class LUNodeAdd(LogicalUnit):
5523 """Logical unit for adding node to the cluster.
5527 HTYPE = constants.HTYPE_NODE
5528 _NFLAGS = ["master_capable", "vm_capable"]
5530 def CheckArguments(self):
5531 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5532 # validate/normalize the node name
5533 self.hostname = netutils.GetHostname(name=self.op.node_name,
5534 family=self.primary_ip_family)
5535 self.op.node_name = self.hostname.name
5537 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5538 raise errors.OpPrereqError("Cannot readd the master node",
5541 if self.op.readd and self.op.group:
5542 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5543 " being readded", errors.ECODE_INVAL)
5545 def BuildHooksEnv(self):
5548 This will run on all nodes before, and on all nodes + the new node after.
5552 "OP_TARGET": self.op.node_name,
5553 "NODE_NAME": self.op.node_name,
5554 "NODE_PIP": self.op.primary_ip,
5555 "NODE_SIP": self.op.secondary_ip,
5556 "MASTER_CAPABLE": str(self.op.master_capable),
5557 "VM_CAPABLE": str(self.op.vm_capable),
5560 def BuildHooksNodes(self):
5561 """Build hooks nodes.
5564 # Exclude added node
5565 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5566 post_nodes = pre_nodes + [self.op.node_name, ]
5568 return (pre_nodes, post_nodes)
5570 def CheckPrereq(self):
5571 """Check prerequisites.
5574 - the new node is not already in the config
5576 - its parameters (single/dual homed) matches the cluster
5578 Any errors are signaled by raising errors.OpPrereqError.
5582 hostname = self.hostname
5583 node = hostname.name
5584 primary_ip = self.op.primary_ip = hostname.ip
5585 if self.op.secondary_ip is None:
5586 if self.primary_ip_family == netutils.IP6Address.family:
5587 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5588 " IPv4 address must be given as secondary",
5590 self.op.secondary_ip = primary_ip
5592 secondary_ip = self.op.secondary_ip
5593 if not netutils.IP4Address.IsValid(secondary_ip):
5594 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5595 " address" % secondary_ip, errors.ECODE_INVAL)
5597 node_list = cfg.GetNodeList()
5598 if not self.op.readd and node in node_list:
5599 raise errors.OpPrereqError("Node %s is already in the configuration" %
5600 node, errors.ECODE_EXISTS)
5601 elif self.op.readd and node not in node_list:
5602 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5605 self.changed_primary_ip = False
5607 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5608 if self.op.readd and node == existing_node_name:
5609 if existing_node.secondary_ip != secondary_ip:
5610 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5611 " address configuration as before",
5613 if existing_node.primary_ip != primary_ip:
5614 self.changed_primary_ip = True
5618 if (existing_node.primary_ip == primary_ip or
5619 existing_node.secondary_ip == primary_ip or
5620 existing_node.primary_ip == secondary_ip or
5621 existing_node.secondary_ip == secondary_ip):
5622 raise errors.OpPrereqError("New node ip address(es) conflict with"
5623 " existing node %s" % existing_node.name,
5624 errors.ECODE_NOTUNIQUE)
5626 # After this 'if' block, None is no longer a valid value for the
5627 # _capable op attributes
5629 old_node = self.cfg.GetNodeInfo(node)
5630 assert old_node is not None, "Can't retrieve locked node %s" % node
5631 for attr in self._NFLAGS:
5632 if getattr(self.op, attr) is None:
5633 setattr(self.op, attr, getattr(old_node, attr))
5635 for attr in self._NFLAGS:
5636 if getattr(self.op, attr) is None:
5637 setattr(self.op, attr, True)
5639 if self.op.readd and not self.op.vm_capable:
5640 pri, sec = cfg.GetNodeInstances(node)
5642 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5643 " flag set to false, but it already holds"
5644 " instances" % node,
5647 # check that the type of the node (single versus dual homed) is the
5648 # same as for the master
5649 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5650 master_singlehomed = myself.secondary_ip == myself.primary_ip
5651 newbie_singlehomed = secondary_ip == primary_ip
5652 if master_singlehomed != newbie_singlehomed:
5653 if master_singlehomed:
5654 raise errors.OpPrereqError("The master has no secondary ip but the"
5655 " new node has one",
5658 raise errors.OpPrereqError("The master has a secondary ip but the"
5659 " new node doesn't have one",
5662 # checks reachability
5663 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5664 raise errors.OpPrereqError("Node not reachable by ping",
5665 errors.ECODE_ENVIRON)
5667 if not newbie_singlehomed:
5668 # check reachability from my secondary ip to newbie's secondary ip
5669 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5670 source=myself.secondary_ip):
5671 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5672 " based ping to node daemon port",
5673 errors.ECODE_ENVIRON)
5680 if self.op.master_capable:
5681 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5683 self.master_candidate = False
5686 self.new_node = old_node
5688 node_group = cfg.LookupNodeGroup(self.op.group)
5689 self.new_node = objects.Node(name=node,
5690 primary_ip=primary_ip,
5691 secondary_ip=secondary_ip,
5692 master_candidate=self.master_candidate,
5693 offline=False, drained=False,
5696 if self.op.ndparams:
5697 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5699 if self.op.hv_state:
5700 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5702 if self.op.disk_state:
5703 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5705 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5706 # it a property on the base class.
5707 result = rpc.DnsOnlyRunner().call_version([node])[node]
5708 result.Raise("Can't get version information from node %s" % node)
5709 if constants.PROTOCOL_VERSION == result.payload:
5710 logging.info("Communication to node %s fine, sw version %s match",
5711 node, result.payload)
5713 raise errors.OpPrereqError("Version mismatch master version %s,"
5714 " node version %s" %
5715 (constants.PROTOCOL_VERSION, result.payload),
5716 errors.ECODE_ENVIRON)
5718 def Exec(self, feedback_fn):
5719 """Adds the new node to the cluster.
5722 new_node = self.new_node
5723 node = new_node.name
5725 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5728 # We adding a new node so we assume it's powered
5729 new_node.powered = True
5731 # for re-adds, reset the offline/drained/master-candidate flags;
5732 # we need to reset here, otherwise offline would prevent RPC calls
5733 # later in the procedure; this also means that if the re-add
5734 # fails, we are left with a non-offlined, broken node
5736 new_node.drained = new_node.offline = False # pylint: disable=W0201
5737 self.LogInfo("Readding a node, the offline/drained flags were reset")
5738 # if we demote the node, we do cleanup later in the procedure
5739 new_node.master_candidate = self.master_candidate
5740 if self.changed_primary_ip:
5741 new_node.primary_ip = self.op.primary_ip
5743 # copy the master/vm_capable flags
5744 for attr in self._NFLAGS:
5745 setattr(new_node, attr, getattr(self.op, attr))
5747 # notify the user about any possible mc promotion
5748 if new_node.master_candidate:
5749 self.LogInfo("Node will be a master candidate")
5751 if self.op.ndparams:
5752 new_node.ndparams = self.op.ndparams
5754 new_node.ndparams = {}
5756 if self.op.hv_state:
5757 new_node.hv_state_static = self.new_hv_state
5759 if self.op.disk_state:
5760 new_node.disk_state_static = self.new_disk_state
5762 # Add node to our /etc/hosts, and add key to known_hosts
5763 if self.cfg.GetClusterInfo().modify_etc_hosts:
5764 master_node = self.cfg.GetMasterNode()
5765 result = self.rpc.call_etc_hosts_modify(master_node,
5766 constants.ETC_HOSTS_ADD,
5769 result.Raise("Can't update hosts file with new host data")
5771 if new_node.secondary_ip != new_node.primary_ip:
5772 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5775 node_verify_list = [self.cfg.GetMasterNode()]
5776 node_verify_param = {
5777 constants.NV_NODELIST: ([node], {}),
5778 # TODO: do a node-net-test as well?
5781 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5782 self.cfg.GetClusterName())
5783 for verifier in node_verify_list:
5784 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5785 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5787 for failed in nl_payload:
5788 feedback_fn("ssh/hostname verification failed"
5789 " (checking from %s): %s" %
5790 (verifier, nl_payload[failed]))
5791 raise errors.OpExecError("ssh/hostname verification failed")
5794 _RedistributeAncillaryFiles(self)
5795 self.context.ReaddNode(new_node)
5796 # make sure we redistribute the config
5797 self.cfg.Update(new_node, feedback_fn)
5798 # and make sure the new node will not have old files around
5799 if not new_node.master_candidate:
5800 result = self.rpc.call_node_demote_from_mc(new_node.name)
5801 msg = result.fail_msg
5803 self.LogWarning("Node failed to demote itself from master"
5804 " candidate status: %s" % msg)
5806 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5807 additional_vm=self.op.vm_capable)
5808 self.context.AddNode(new_node, self.proc.GetECId())
5811 class LUNodeSetParams(LogicalUnit):
5812 """Modifies the parameters of a node.
5814 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5815 to the node role (as _ROLE_*)
5816 @cvar _R2F: a dictionary from node role to tuples of flags
5817 @cvar _FLAGS: a list of attribute names corresponding to the flags
5820 HPATH = "node-modify"
5821 HTYPE = constants.HTYPE_NODE
5823 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5825 (True, False, False): _ROLE_CANDIDATE,
5826 (False, True, False): _ROLE_DRAINED,
5827 (False, False, True): _ROLE_OFFLINE,
5828 (False, False, False): _ROLE_REGULAR,
5830 _R2F = dict((v, k) for k, v in _F2R.items())
5831 _FLAGS = ["master_candidate", "drained", "offline"]
5833 def CheckArguments(self):
5834 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5835 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5836 self.op.master_capable, self.op.vm_capable,
5837 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5839 if all_mods.count(None) == len(all_mods):
5840 raise errors.OpPrereqError("Please pass at least one modification",
5842 if all_mods.count(True) > 1:
5843 raise errors.OpPrereqError("Can't set the node into more than one"
5844 " state at the same time",
5847 # Boolean value that tells us whether we might be demoting from MC
5848 self.might_demote = (self.op.master_candidate == False or
5849 self.op.offline == True or
5850 self.op.drained == True or
5851 self.op.master_capable == False)
5853 if self.op.secondary_ip:
5854 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5855 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5856 " address" % self.op.secondary_ip,
5859 self.lock_all = self.op.auto_promote and self.might_demote
5860 self.lock_instances = self.op.secondary_ip is not None
5862 def _InstanceFilter(self, instance):
5863 """Filter for getting affected instances.
5866 return (instance.disk_template in constants.DTS_INT_MIRROR and
5867 self.op.node_name in instance.all_nodes)
5869 def ExpandNames(self):
5871 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5873 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5875 # Since modifying a node can have severe effects on currently running
5876 # operations the resource lock is at least acquired in shared mode
5877 self.needed_locks[locking.LEVEL_NODE_RES] = \
5878 self.needed_locks[locking.LEVEL_NODE]
5880 # Get node resource and instance locks in shared mode; they are not used
5881 # for anything but read-only access
5882 self.share_locks[locking.LEVEL_NODE_RES] = 1
5883 self.share_locks[locking.LEVEL_INSTANCE] = 1
5885 if self.lock_instances:
5886 self.needed_locks[locking.LEVEL_INSTANCE] = \
5887 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5889 def BuildHooksEnv(self):
5892 This runs on the master node.
5896 "OP_TARGET": self.op.node_name,
5897 "MASTER_CANDIDATE": str(self.op.master_candidate),
5898 "OFFLINE": str(self.op.offline),
5899 "DRAINED": str(self.op.drained),
5900 "MASTER_CAPABLE": str(self.op.master_capable),
5901 "VM_CAPABLE": str(self.op.vm_capable),
5904 def BuildHooksNodes(self):
5905 """Build hooks nodes.
5908 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5911 def CheckPrereq(self):
5912 """Check prerequisites.
5914 This only checks the instance list against the existing names.
5917 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5919 if self.lock_instances:
5920 affected_instances = \
5921 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5923 # Verify instance locks
5924 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5925 wanted_instances = frozenset(affected_instances.keys())
5926 if wanted_instances - owned_instances:
5927 raise errors.OpPrereqError("Instances affected by changing node %s's"
5928 " secondary IP address have changed since"
5929 " locks were acquired, wanted '%s', have"
5930 " '%s'; retry the operation" %
5932 utils.CommaJoin(wanted_instances),
5933 utils.CommaJoin(owned_instances)),
5936 affected_instances = None
5938 if (self.op.master_candidate is not None or
5939 self.op.drained is not None or
5940 self.op.offline is not None):
5941 # we can't change the master's node flags
5942 if self.op.node_name == self.cfg.GetMasterNode():
5943 raise errors.OpPrereqError("The master role can be changed"
5944 " only via master-failover",
5947 if self.op.master_candidate and not node.master_capable:
5948 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5949 " it a master candidate" % node.name,
5952 if self.op.vm_capable == False:
5953 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5955 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5956 " the vm_capable flag" % node.name,
5959 if node.master_candidate and self.might_demote and not self.lock_all:
5960 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5961 # check if after removing the current node, we're missing master
5963 (mc_remaining, mc_should, _) = \
5964 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5965 if mc_remaining < mc_should:
5966 raise errors.OpPrereqError("Not enough master candidates, please"
5967 " pass auto promote option to allow"
5968 " promotion (--auto-promote or RAPI"
5969 " auto_promote=True)", errors.ECODE_STATE)
5971 self.old_flags = old_flags = (node.master_candidate,
5972 node.drained, node.offline)
5973 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5974 self.old_role = old_role = self._F2R[old_flags]
5976 # Check for ineffective changes
5977 for attr in self._FLAGS:
5978 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5979 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5980 setattr(self.op, attr, None)
5982 # Past this point, any flag change to False means a transition
5983 # away from the respective state, as only real changes are kept
5985 # TODO: We might query the real power state if it supports OOB
5986 if _SupportsOob(self.cfg, node):
5987 if self.op.offline is False and not (node.powered or
5988 self.op.powered == True):
5989 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5990 " offline status can be reset") %
5992 elif self.op.powered is not None:
5993 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5994 " as it does not support out-of-band"
5995 " handling") % self.op.node_name)
5997 # If we're being deofflined/drained, we'll MC ourself if needed
5998 if (self.op.drained == False or self.op.offline == False or
5999 (self.op.master_capable and not node.master_capable)):
6000 if _DecideSelfPromotion(self):
6001 self.op.master_candidate = True
6002 self.LogInfo("Auto-promoting node to master candidate")
6004 # If we're no longer master capable, we'll demote ourselves from MC
6005 if self.op.master_capable == False and node.master_candidate:
6006 self.LogInfo("Demoting from master candidate")
6007 self.op.master_candidate = False
6010 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6011 if self.op.master_candidate:
6012 new_role = self._ROLE_CANDIDATE
6013 elif self.op.drained:
6014 new_role = self._ROLE_DRAINED
6015 elif self.op.offline:
6016 new_role = self._ROLE_OFFLINE
6017 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6018 # False is still in new flags, which means we're un-setting (the
6020 new_role = self._ROLE_REGULAR
6021 else: # no new flags, nothing, keep old role
6024 self.new_role = new_role
6026 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6027 # Trying to transition out of offline status
6028 result = self.rpc.call_version([node.name])[node.name]
6030 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6031 " to report its version: %s" %
6032 (node.name, result.fail_msg),
6035 self.LogWarning("Transitioning node from offline to online state"
6036 " without using re-add. Please make sure the node"
6039 if self.op.secondary_ip:
6040 # Ok even without locking, because this can't be changed by any LU
6041 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6042 master_singlehomed = master.secondary_ip == master.primary_ip
6043 if master_singlehomed and self.op.secondary_ip:
6044 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6045 " homed cluster", errors.ECODE_INVAL)
6047 assert not (frozenset(affected_instances) -
6048 self.owned_locks(locking.LEVEL_INSTANCE))
6051 if affected_instances:
6052 raise errors.OpPrereqError("Cannot change secondary IP address:"
6053 " offline node has instances (%s)"
6054 " configured to use it" %
6055 utils.CommaJoin(affected_instances.keys()))
6057 # On online nodes, check that no instances are running, and that
6058 # the node has the new ip and we can reach it.
6059 for instance in affected_instances.values():
6060 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6061 msg="cannot change secondary ip")
6063 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6064 if master.name != node.name:
6065 # check reachability from master secondary ip to new secondary ip
6066 if not netutils.TcpPing(self.op.secondary_ip,
6067 constants.DEFAULT_NODED_PORT,
6068 source=master.secondary_ip):
6069 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6070 " based ping to node daemon port",
6071 errors.ECODE_ENVIRON)
6073 if self.op.ndparams:
6074 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6075 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6076 self.new_ndparams = new_ndparams
6078 if self.op.hv_state:
6079 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6080 self.node.hv_state_static)
6082 if self.op.disk_state:
6083 self.new_disk_state = \
6084 _MergeAndVerifyDiskState(self.op.disk_state,
6085 self.node.disk_state_static)
6087 def Exec(self, feedback_fn):
6092 old_role = self.old_role
6093 new_role = self.new_role
6097 if self.op.ndparams:
6098 node.ndparams = self.new_ndparams
6100 if self.op.powered is not None:
6101 node.powered = self.op.powered
6103 if self.op.hv_state:
6104 node.hv_state_static = self.new_hv_state
6106 if self.op.disk_state:
6107 node.disk_state_static = self.new_disk_state
6109 for attr in ["master_capable", "vm_capable"]:
6110 val = getattr(self.op, attr)
6112 setattr(node, attr, val)
6113 result.append((attr, str(val)))
6115 if new_role != old_role:
6116 # Tell the node to demote itself, if no longer MC and not offline
6117 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6118 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6120 self.LogWarning("Node failed to demote itself: %s", msg)
6122 new_flags = self._R2F[new_role]
6123 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6125 result.append((desc, str(nf)))
6126 (node.master_candidate, node.drained, node.offline) = new_flags
6128 # we locked all nodes, we adjust the CP before updating this node
6130 _AdjustCandidatePool(self, [node.name])
6132 if self.op.secondary_ip:
6133 node.secondary_ip = self.op.secondary_ip
6134 result.append(("secondary_ip", self.op.secondary_ip))
6136 # this will trigger configuration file update, if needed
6137 self.cfg.Update(node, feedback_fn)
6139 # this will trigger job queue propagation or cleanup if the mc
6141 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6142 self.context.ReaddNode(node)
6147 class LUNodePowercycle(NoHooksLU):
6148 """Powercycles a node.
6153 def CheckArguments(self):
6154 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6155 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6156 raise errors.OpPrereqError("The node is the master and the force"
6157 " parameter was not set",
6160 def ExpandNames(self):
6161 """Locking for PowercycleNode.
6163 This is a last-resort option and shouldn't block on other
6164 jobs. Therefore, we grab no locks.
6167 self.needed_locks = {}
6169 def Exec(self, feedback_fn):
6173 result = self.rpc.call_node_powercycle(self.op.node_name,
6174 self.cfg.GetHypervisorType())
6175 result.Raise("Failed to schedule the reboot")
6176 return result.payload
6179 class LUClusterQuery(NoHooksLU):
6180 """Query cluster configuration.
6185 def ExpandNames(self):
6186 self.needed_locks = {}
6188 def Exec(self, feedback_fn):
6189 """Return cluster config.
6192 cluster = self.cfg.GetClusterInfo()
6195 # Filter just for enabled hypervisors
6196 for os_name, hv_dict in cluster.os_hvp.items():
6197 os_hvp[os_name] = {}
6198 for hv_name, hv_params in hv_dict.items():
6199 if hv_name in cluster.enabled_hypervisors:
6200 os_hvp[os_name][hv_name] = hv_params
6202 # Convert ip_family to ip_version
6203 primary_ip_version = constants.IP4_VERSION
6204 if cluster.primary_ip_family == netutils.IP6Address.family:
6205 primary_ip_version = constants.IP6_VERSION
6208 "software_version": constants.RELEASE_VERSION,
6209 "protocol_version": constants.PROTOCOL_VERSION,
6210 "config_version": constants.CONFIG_VERSION,
6211 "os_api_version": max(constants.OS_API_VERSIONS),
6212 "export_version": constants.EXPORT_VERSION,
6213 "architecture": runtime.GetArchInfo(),
6214 "name": cluster.cluster_name,
6215 "master": cluster.master_node,
6216 "default_hypervisor": cluster.primary_hypervisor,
6217 "enabled_hypervisors": cluster.enabled_hypervisors,
6218 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6219 for hypervisor_name in cluster.enabled_hypervisors]),
6221 "beparams": cluster.beparams,
6222 "osparams": cluster.osparams,
6223 "ipolicy": cluster.ipolicy,
6224 "nicparams": cluster.nicparams,
6225 "ndparams": cluster.ndparams,
6226 "diskparams": cluster.diskparams,
6227 "candidate_pool_size": cluster.candidate_pool_size,
6228 "master_netdev": cluster.master_netdev,
6229 "master_netmask": cluster.master_netmask,
6230 "use_external_mip_script": cluster.use_external_mip_script,
6231 "volume_group_name": cluster.volume_group_name,
6232 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6233 "file_storage_dir": cluster.file_storage_dir,
6234 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6235 "maintain_node_health": cluster.maintain_node_health,
6236 "ctime": cluster.ctime,
6237 "mtime": cluster.mtime,
6238 "uuid": cluster.uuid,
6239 "tags": list(cluster.GetTags()),
6240 "uid_pool": cluster.uid_pool,
6241 "default_iallocator": cluster.default_iallocator,
6242 "reserved_lvs": cluster.reserved_lvs,
6243 "primary_ip_version": primary_ip_version,
6244 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6245 "hidden_os": cluster.hidden_os,
6246 "blacklisted_os": cluster.blacklisted_os,
6252 class LUClusterConfigQuery(NoHooksLU):
6253 """Return configuration values.
6258 def CheckArguments(self):
6259 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6261 def ExpandNames(self):
6262 self.cq.ExpandNames(self)
6264 def DeclareLocks(self, level):
6265 self.cq.DeclareLocks(self, level)
6267 def Exec(self, feedback_fn):
6268 result = self.cq.OldStyleQuery(self)
6270 assert len(result) == 1
6275 class _ClusterQuery(_QueryBase):
6276 FIELDS = query.CLUSTER_FIELDS
6278 #: Do not sort (there is only one item)
6281 def ExpandNames(self, lu):
6282 lu.needed_locks = {}
6284 # The following variables interact with _QueryBase._GetNames
6285 self.wanted = locking.ALL_SET
6286 self.do_locking = self.use_locking
6289 raise errors.OpPrereqError("Can not use locking for cluster queries",
6292 def DeclareLocks(self, lu, level):
6295 def _GetQueryData(self, lu):
6296 """Computes the list of nodes and their attributes.
6299 # Locking is not used
6300 assert not (compat.any(lu.glm.is_owned(level)
6301 for level in locking.LEVELS
6302 if level != locking.LEVEL_CLUSTER) or
6303 self.do_locking or self.use_locking)
6305 if query.CQ_CONFIG in self.requested_data:
6306 cluster = lu.cfg.GetClusterInfo()
6308 cluster = NotImplemented
6310 if query.CQ_QUEUE_DRAINED in self.requested_data:
6311 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6313 drain_flag = NotImplemented
6315 if query.CQ_WATCHER_PAUSE in self.requested_data:
6316 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6318 watcher_pause = NotImplemented
6320 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6323 class LUInstanceActivateDisks(NoHooksLU):
6324 """Bring up an instance's disks.
6329 def ExpandNames(self):
6330 self._ExpandAndLockInstance()
6331 self.needed_locks[locking.LEVEL_NODE] = []
6332 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6334 def DeclareLocks(self, level):
6335 if level == locking.LEVEL_NODE:
6336 self._LockInstancesNodes()
6338 def CheckPrereq(self):
6339 """Check prerequisites.
6341 This checks that the instance is in the cluster.
6344 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6345 assert self.instance is not None, \
6346 "Cannot retrieve locked instance %s" % self.op.instance_name
6347 _CheckNodeOnline(self, self.instance.primary_node)
6349 def Exec(self, feedback_fn):
6350 """Activate the disks.
6353 disks_ok, disks_info = \
6354 _AssembleInstanceDisks(self, self.instance,
6355 ignore_size=self.op.ignore_size)
6357 raise errors.OpExecError("Cannot activate block devices")
6362 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6364 """Prepare the block devices for an instance.
6366 This sets up the block devices on all nodes.
6368 @type lu: L{LogicalUnit}
6369 @param lu: the logical unit on whose behalf we execute
6370 @type instance: L{objects.Instance}
6371 @param instance: the instance for whose disks we assemble
6372 @type disks: list of L{objects.Disk} or None
6373 @param disks: which disks to assemble (or all, if None)
6374 @type ignore_secondaries: boolean
6375 @param ignore_secondaries: if true, errors on secondary nodes
6376 won't result in an error return from the function
6377 @type ignore_size: boolean
6378 @param ignore_size: if true, the current known size of the disk
6379 will not be used during the disk activation, useful for cases
6380 when the size is wrong
6381 @return: False if the operation failed, otherwise a list of
6382 (host, instance_visible_name, node_visible_name)
6383 with the mapping from node devices to instance devices
6388 iname = instance.name
6389 disks = _ExpandCheckDisks(instance, disks)
6391 # With the two passes mechanism we try to reduce the window of
6392 # opportunity for the race condition of switching DRBD to primary
6393 # before handshaking occured, but we do not eliminate it
6395 # The proper fix would be to wait (with some limits) until the
6396 # connection has been made and drbd transitions from WFConnection
6397 # into any other network-connected state (Connected, SyncTarget,
6400 # 1st pass, assemble on all nodes in secondary mode
6401 for idx, inst_disk in enumerate(disks):
6402 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6404 node_disk = node_disk.Copy()
6405 node_disk.UnsetSize()
6406 lu.cfg.SetDiskID(node_disk, node)
6407 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6409 msg = result.fail_msg
6411 is_offline_secondary = (node in instance.secondary_nodes and
6413 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6414 " (is_primary=False, pass=1): %s",
6415 inst_disk.iv_name, node, msg)
6416 if not (ignore_secondaries or is_offline_secondary):
6419 # FIXME: race condition on drbd migration to primary
6421 # 2nd pass, do only the primary node
6422 for idx, inst_disk in enumerate(disks):
6425 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6426 if node != instance.primary_node:
6429 node_disk = node_disk.Copy()
6430 node_disk.UnsetSize()
6431 lu.cfg.SetDiskID(node_disk, node)
6432 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6434 msg = result.fail_msg
6436 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6437 " (is_primary=True, pass=2): %s",
6438 inst_disk.iv_name, node, msg)
6441 dev_path = result.payload
6443 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6445 # leave the disks configured for the primary node
6446 # this is a workaround that would be fixed better by
6447 # improving the logical/physical id handling
6449 lu.cfg.SetDiskID(disk, instance.primary_node)
6451 return disks_ok, device_info
6454 def _StartInstanceDisks(lu, instance, force):
6455 """Start the disks of an instance.
6458 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6459 ignore_secondaries=force)
6461 _ShutdownInstanceDisks(lu, instance)
6462 if force is not None and not force:
6463 lu.proc.LogWarning("", hint="If the message above refers to a"
6465 " you can retry the operation using '--force'.")
6466 raise errors.OpExecError("Disk consistency error")
6469 class LUInstanceDeactivateDisks(NoHooksLU):
6470 """Shutdown an instance's disks.
6475 def ExpandNames(self):
6476 self._ExpandAndLockInstance()
6477 self.needed_locks[locking.LEVEL_NODE] = []
6478 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6480 def DeclareLocks(self, level):
6481 if level == locking.LEVEL_NODE:
6482 self._LockInstancesNodes()
6484 def CheckPrereq(self):
6485 """Check prerequisites.
6487 This checks that the instance is in the cluster.
6490 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6491 assert self.instance is not None, \
6492 "Cannot retrieve locked instance %s" % self.op.instance_name
6494 def Exec(self, feedback_fn):
6495 """Deactivate the disks
6498 instance = self.instance
6500 _ShutdownInstanceDisks(self, instance)
6502 _SafeShutdownInstanceDisks(self, instance)
6505 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6506 """Shutdown block devices of an instance.
6508 This function checks if an instance is running, before calling
6509 _ShutdownInstanceDisks.
6512 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6513 _ShutdownInstanceDisks(lu, instance, disks=disks)
6516 def _ExpandCheckDisks(instance, disks):
6517 """Return the instance disks selected by the disks list
6519 @type disks: list of L{objects.Disk} or None
6520 @param disks: selected disks
6521 @rtype: list of L{objects.Disk}
6522 @return: selected instance disks to act on
6526 return instance.disks
6528 if not set(disks).issubset(instance.disks):
6529 raise errors.ProgrammerError("Can only act on disks belonging to the"
6534 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6535 """Shutdown block devices of an instance.
6537 This does the shutdown on all nodes of the instance.
6539 If the ignore_primary is false, errors on the primary node are
6544 disks = _ExpandCheckDisks(instance, disks)
6547 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6548 lu.cfg.SetDiskID(top_disk, node)
6549 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6550 msg = result.fail_msg
6552 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6553 disk.iv_name, node, msg)
6554 if ((node == instance.primary_node and not ignore_primary) or
6555 (node != instance.primary_node and not result.offline)):
6560 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6561 """Checks if a node has enough free memory.
6563 This function check if a given node has the needed amount of free
6564 memory. In case the node has less memory or we cannot get the
6565 information from the node, this function raise an OpPrereqError
6568 @type lu: C{LogicalUnit}
6569 @param lu: a logical unit from which we get configuration data
6571 @param node: the node to check
6572 @type reason: C{str}
6573 @param reason: string to use in the error message
6574 @type requested: C{int}
6575 @param requested: the amount of memory in MiB to check for
6576 @type hypervisor_name: C{str}
6577 @param hypervisor_name: the hypervisor to ask for memory stats
6579 @return: node current free memory
6580 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6581 we cannot check the node
6584 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6585 nodeinfo[node].Raise("Can't get data from node %s" % node,
6586 prereq=True, ecode=errors.ECODE_ENVIRON)
6587 (_, _, (hv_info, )) = nodeinfo[node].payload
6589 free_mem = hv_info.get("memory_free", None)
6590 if not isinstance(free_mem, int):
6591 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6592 " was '%s'" % (node, free_mem),
6593 errors.ECODE_ENVIRON)
6594 if requested > free_mem:
6595 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6596 " needed %s MiB, available %s MiB" %
6597 (node, reason, requested, free_mem),
6602 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6603 """Checks if nodes have enough free disk space in the all VGs.
6605 This function check if all given nodes have the needed amount of
6606 free disk. In case any node has less disk or we cannot get the
6607 information from the node, this function raise an OpPrereqError
6610 @type lu: C{LogicalUnit}
6611 @param lu: a logical unit from which we get configuration data
6612 @type nodenames: C{list}
6613 @param nodenames: the list of node names to check
6614 @type req_sizes: C{dict}
6615 @param req_sizes: the hash of vg and corresponding amount of disk in
6617 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6618 or we cannot check the node
6621 for vg, req_size in req_sizes.items():
6622 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6625 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6626 """Checks if nodes have enough free disk space in the specified VG.
6628 This function check if all given nodes have the needed amount of
6629 free disk. In case any node has less disk or we cannot get the
6630 information from the node, this function raise an OpPrereqError
6633 @type lu: C{LogicalUnit}
6634 @param lu: a logical unit from which we get configuration data
6635 @type nodenames: C{list}
6636 @param nodenames: the list of node names to check
6638 @param vg: the volume group to check
6639 @type requested: C{int}
6640 @param requested: the amount of disk in MiB to check for
6641 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6642 or we cannot check the node
6645 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6646 for node in nodenames:
6647 info = nodeinfo[node]
6648 info.Raise("Cannot get current information from node %s" % node,
6649 prereq=True, ecode=errors.ECODE_ENVIRON)
6650 (_, (vg_info, ), _) = info.payload
6651 vg_free = vg_info.get("vg_free", None)
6652 if not isinstance(vg_free, int):
6653 raise errors.OpPrereqError("Can't compute free disk space on node"
6654 " %s for vg %s, result was '%s'" %
6655 (node, vg, vg_free), errors.ECODE_ENVIRON)
6656 if requested > vg_free:
6657 raise errors.OpPrereqError("Not enough disk space on target node %s"
6658 " vg %s: required %d MiB, available %d MiB" %
6659 (node, vg, requested, vg_free),
6663 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6664 """Checks if nodes have enough physical CPUs
6666 This function checks if all given nodes have the needed number of
6667 physical CPUs. In case any node has less CPUs or we cannot get the
6668 information from the node, this function raises an OpPrereqError
6671 @type lu: C{LogicalUnit}
6672 @param lu: a logical unit from which we get configuration data
6673 @type nodenames: C{list}
6674 @param nodenames: the list of node names to check
6675 @type requested: C{int}
6676 @param requested: the minimum acceptable number of physical CPUs
6677 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6678 or we cannot check the node
6681 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6682 for node in nodenames:
6683 info = nodeinfo[node]
6684 info.Raise("Cannot get current information from node %s" % node,
6685 prereq=True, ecode=errors.ECODE_ENVIRON)
6686 (_, _, (hv_info, )) = info.payload
6687 num_cpus = hv_info.get("cpu_total", None)
6688 if not isinstance(num_cpus, int):
6689 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6690 " on node %s, result was '%s'" %
6691 (node, num_cpus), errors.ECODE_ENVIRON)
6692 if requested > num_cpus:
6693 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6694 "required" % (node, num_cpus, requested),
6698 class LUInstanceStartup(LogicalUnit):
6699 """Starts an instance.
6702 HPATH = "instance-start"
6703 HTYPE = constants.HTYPE_INSTANCE
6706 def CheckArguments(self):
6708 if self.op.beparams:
6709 # fill the beparams dict
6710 objects.UpgradeBeParams(self.op.beparams)
6711 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6713 def ExpandNames(self):
6714 self._ExpandAndLockInstance()
6715 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6717 def DeclareLocks(self, level):
6718 if level == locking.LEVEL_NODE_RES:
6719 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6721 def BuildHooksEnv(self):
6724 This runs on master, primary and secondary nodes of the instance.
6728 "FORCE": self.op.force,
6731 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6735 def BuildHooksNodes(self):
6736 """Build hooks nodes.
6739 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6742 def CheckPrereq(self):
6743 """Check prerequisites.
6745 This checks that the instance is in the cluster.
6748 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6749 assert self.instance is not None, \
6750 "Cannot retrieve locked instance %s" % self.op.instance_name
6753 if self.op.hvparams:
6754 # check hypervisor parameter syntax (locally)
6755 cluster = self.cfg.GetClusterInfo()
6756 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6757 filled_hvp = cluster.FillHV(instance)
6758 filled_hvp.update(self.op.hvparams)
6759 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6760 hv_type.CheckParameterSyntax(filled_hvp)
6761 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6763 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6765 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6767 if self.primary_offline and self.op.ignore_offline_nodes:
6768 self.proc.LogWarning("Ignoring offline primary node")
6770 if self.op.hvparams or self.op.beparams:
6771 self.proc.LogWarning("Overridden parameters are ignored")
6773 _CheckNodeOnline(self, instance.primary_node)
6775 bep = self.cfg.GetClusterInfo().FillBE(instance)
6776 bep.update(self.op.beparams)
6778 # check bridges existence
6779 _CheckInstanceBridgesExist(self, instance)
6781 remote_info = self.rpc.call_instance_info(instance.primary_node,
6783 instance.hypervisor)
6784 remote_info.Raise("Error checking node %s" % instance.primary_node,
6785 prereq=True, ecode=errors.ECODE_ENVIRON)
6786 if not remote_info.payload: # not running already
6787 _CheckNodeFreeMemory(self, instance.primary_node,
6788 "starting instance %s" % instance.name,
6789 bep[constants.BE_MINMEM], instance.hypervisor)
6791 def Exec(self, feedback_fn):
6792 """Start the instance.
6795 instance = self.instance
6796 force = self.op.force
6798 if not self.op.no_remember:
6799 self.cfg.MarkInstanceUp(instance.name)
6801 if self.primary_offline:
6802 assert self.op.ignore_offline_nodes
6803 self.proc.LogInfo("Primary node offline, marked instance as started")
6805 node_current = instance.primary_node
6807 _StartInstanceDisks(self, instance, force)
6810 self.rpc.call_instance_start(node_current,
6811 (instance, self.op.hvparams,
6813 self.op.startup_paused)
6814 msg = result.fail_msg
6816 _ShutdownInstanceDisks(self, instance)
6817 raise errors.OpExecError("Could not start instance: %s" % msg)
6820 class LUInstanceReboot(LogicalUnit):
6821 """Reboot an instance.
6824 HPATH = "instance-reboot"
6825 HTYPE = constants.HTYPE_INSTANCE
6828 def ExpandNames(self):
6829 self._ExpandAndLockInstance()
6831 def BuildHooksEnv(self):
6834 This runs on master, primary and secondary nodes of the instance.
6838 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6839 "REBOOT_TYPE": self.op.reboot_type,
6840 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6843 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6847 def BuildHooksNodes(self):
6848 """Build hooks nodes.
6851 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6854 def CheckPrereq(self):
6855 """Check prerequisites.
6857 This checks that the instance is in the cluster.
6860 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6861 assert self.instance is not None, \
6862 "Cannot retrieve locked instance %s" % self.op.instance_name
6863 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6864 _CheckNodeOnline(self, instance.primary_node)
6866 # check bridges existence
6867 _CheckInstanceBridgesExist(self, instance)
6869 def Exec(self, feedback_fn):
6870 """Reboot the instance.
6873 instance = self.instance
6874 ignore_secondaries = self.op.ignore_secondaries
6875 reboot_type = self.op.reboot_type
6877 remote_info = self.rpc.call_instance_info(instance.primary_node,
6879 instance.hypervisor)
6880 remote_info.Raise("Error checking node %s" % instance.primary_node)
6881 instance_running = bool(remote_info.payload)
6883 node_current = instance.primary_node
6885 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6886 constants.INSTANCE_REBOOT_HARD]:
6887 for disk in instance.disks:
6888 self.cfg.SetDiskID(disk, node_current)
6889 result = self.rpc.call_instance_reboot(node_current, instance,
6891 self.op.shutdown_timeout)
6892 result.Raise("Could not reboot instance")
6894 if instance_running:
6895 result = self.rpc.call_instance_shutdown(node_current, instance,
6896 self.op.shutdown_timeout)
6897 result.Raise("Could not shutdown instance for full reboot")
6898 _ShutdownInstanceDisks(self, instance)
6900 self.LogInfo("Instance %s was already stopped, starting now",
6902 _StartInstanceDisks(self, instance, ignore_secondaries)
6903 result = self.rpc.call_instance_start(node_current,
6904 (instance, None, None), False)
6905 msg = result.fail_msg
6907 _ShutdownInstanceDisks(self, instance)
6908 raise errors.OpExecError("Could not start instance for"
6909 " full reboot: %s" % msg)
6911 self.cfg.MarkInstanceUp(instance.name)
6914 class LUInstanceShutdown(LogicalUnit):
6915 """Shutdown an instance.
6918 HPATH = "instance-stop"
6919 HTYPE = constants.HTYPE_INSTANCE
6922 def ExpandNames(self):
6923 self._ExpandAndLockInstance()
6925 def BuildHooksEnv(self):
6928 This runs on master, primary and secondary nodes of the instance.
6931 env = _BuildInstanceHookEnvByObject(self, self.instance)
6932 env["TIMEOUT"] = self.op.timeout
6935 def BuildHooksNodes(self):
6936 """Build hooks nodes.
6939 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6942 def CheckPrereq(self):
6943 """Check prerequisites.
6945 This checks that the instance is in the cluster.
6948 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6949 assert self.instance is not None, \
6950 "Cannot retrieve locked instance %s" % self.op.instance_name
6952 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6954 self.primary_offline = \
6955 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6957 if self.primary_offline and self.op.ignore_offline_nodes:
6958 self.proc.LogWarning("Ignoring offline primary node")
6960 _CheckNodeOnline(self, self.instance.primary_node)
6962 def Exec(self, feedback_fn):
6963 """Shutdown the instance.
6966 instance = self.instance
6967 node_current = instance.primary_node
6968 timeout = self.op.timeout
6970 if not self.op.no_remember:
6971 self.cfg.MarkInstanceDown(instance.name)
6973 if self.primary_offline:
6974 assert self.op.ignore_offline_nodes
6975 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6977 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6978 msg = result.fail_msg
6980 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6982 _ShutdownInstanceDisks(self, instance)
6985 class LUInstanceReinstall(LogicalUnit):
6986 """Reinstall an instance.
6989 HPATH = "instance-reinstall"
6990 HTYPE = constants.HTYPE_INSTANCE
6993 def ExpandNames(self):
6994 self._ExpandAndLockInstance()
6996 def BuildHooksEnv(self):
6999 This runs on master, primary and secondary nodes of the instance.
7002 return _BuildInstanceHookEnvByObject(self, self.instance)
7004 def BuildHooksNodes(self):
7005 """Build hooks nodes.
7008 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7011 def CheckPrereq(self):
7012 """Check prerequisites.
7014 This checks that the instance is in the cluster and is not running.
7017 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7018 assert instance is not None, \
7019 "Cannot retrieve locked instance %s" % self.op.instance_name
7020 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7021 " offline, cannot reinstall")
7023 if instance.disk_template == constants.DT_DISKLESS:
7024 raise errors.OpPrereqError("Instance '%s' has no disks" %
7025 self.op.instance_name,
7027 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7029 if self.op.os_type is not None:
7031 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7032 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7033 instance_os = self.op.os_type
7035 instance_os = instance.os
7037 nodelist = list(instance.all_nodes)
7039 if self.op.osparams:
7040 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7041 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7042 self.os_inst = i_osdict # the new dict (without defaults)
7046 self.instance = instance
7048 def Exec(self, feedback_fn):
7049 """Reinstall the instance.
7052 inst = self.instance
7054 if self.op.os_type is not None:
7055 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7056 inst.os = self.op.os_type
7057 # Write to configuration
7058 self.cfg.Update(inst, feedback_fn)
7060 _StartInstanceDisks(self, inst, None)
7062 feedback_fn("Running the instance OS create scripts...")
7063 # FIXME: pass debug option from opcode to backend
7064 result = self.rpc.call_instance_os_add(inst.primary_node,
7065 (inst, self.os_inst), True,
7066 self.op.debug_level)
7067 result.Raise("Could not install OS for instance %s on node %s" %
7068 (inst.name, inst.primary_node))
7070 _ShutdownInstanceDisks(self, inst)
7073 class LUInstanceRecreateDisks(LogicalUnit):
7074 """Recreate an instance's missing disks.
7077 HPATH = "instance-recreate-disks"
7078 HTYPE = constants.HTYPE_INSTANCE
7081 _MODIFYABLE = frozenset([
7082 constants.IDISK_SIZE,
7083 constants.IDISK_MODE,
7086 # New or changed disk parameters may have different semantics
7087 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7088 constants.IDISK_ADOPT,
7090 # TODO: Implement support changing VG while recreating
7092 constants.IDISK_METAVG,
7095 def CheckArguments(self):
7096 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7097 # Normalize and convert deprecated list of disk indices
7098 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7100 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7102 raise errors.OpPrereqError("Some disks have been specified more than"
7103 " once: %s" % utils.CommaJoin(duplicates),
7106 for (idx, params) in self.op.disks:
7107 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7108 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7110 raise errors.OpPrereqError("Parameters for disk %s try to change"
7111 " unmodifyable parameter(s): %s" %
7112 (idx, utils.CommaJoin(unsupported)),
7115 def ExpandNames(self):
7116 self._ExpandAndLockInstance()
7117 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7119 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7120 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7122 self.needed_locks[locking.LEVEL_NODE] = []
7123 self.needed_locks[locking.LEVEL_NODE_RES] = []
7125 def DeclareLocks(self, level):
7126 if level == locking.LEVEL_NODE:
7127 # if we replace the nodes, we only need to lock the old primary,
7128 # otherwise we need to lock all nodes for disk re-creation
7129 primary_only = bool(self.op.nodes)
7130 self._LockInstancesNodes(primary_only=primary_only)
7131 elif level == locking.LEVEL_NODE_RES:
7133 self.needed_locks[locking.LEVEL_NODE_RES] = \
7134 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7136 def BuildHooksEnv(self):
7139 This runs on master, primary and secondary nodes of the instance.
7142 return _BuildInstanceHookEnvByObject(self, self.instance)
7144 def BuildHooksNodes(self):
7145 """Build hooks nodes.
7148 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7151 def CheckPrereq(self):
7152 """Check prerequisites.
7154 This checks that the instance is in the cluster and is not running.
7157 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7158 assert instance is not None, \
7159 "Cannot retrieve locked instance %s" % self.op.instance_name
7161 if len(self.op.nodes) != len(instance.all_nodes):
7162 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7163 " %d replacement nodes were specified" %
7164 (instance.name, len(instance.all_nodes),
7165 len(self.op.nodes)),
7167 assert instance.disk_template != constants.DT_DRBD8 or \
7168 len(self.op.nodes) == 2
7169 assert instance.disk_template != constants.DT_PLAIN or \
7170 len(self.op.nodes) == 1
7171 primary_node = self.op.nodes[0]
7173 primary_node = instance.primary_node
7174 _CheckNodeOnline(self, primary_node)
7176 if instance.disk_template == constants.DT_DISKLESS:
7177 raise errors.OpPrereqError("Instance '%s' has no disks" %
7178 self.op.instance_name, errors.ECODE_INVAL)
7180 # if we replace nodes *and* the old primary is offline, we don't
7182 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7183 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7184 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7185 if not (self.op.nodes and old_pnode.offline):
7186 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7187 msg="cannot recreate disks")
7190 self.disks = dict(self.op.disks)
7192 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7194 maxidx = max(self.disks.keys())
7195 if maxidx >= len(instance.disks):
7196 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7199 if (self.op.nodes and
7200 sorted(self.disks.keys()) != range(len(instance.disks))):
7201 raise errors.OpPrereqError("Can't recreate disks partially and"
7202 " change the nodes at the same time",
7205 self.instance = instance
7207 def Exec(self, feedback_fn):
7208 """Recreate the disks.
7211 instance = self.instance
7213 assert (self.owned_locks(locking.LEVEL_NODE) ==
7214 self.owned_locks(locking.LEVEL_NODE_RES))
7217 mods = [] # keeps track of needed changes
7219 for idx, disk in enumerate(instance.disks):
7221 changes = self.disks[idx]
7223 # Disk should not be recreated
7227 # update secondaries for disks, if needed
7228 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7229 # need to update the nodes and minors
7230 assert len(self.op.nodes) == 2
7231 assert len(disk.logical_id) == 6 # otherwise disk internals
7233 (_, _, old_port, _, _, old_secret) = disk.logical_id
7234 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7235 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7236 new_minors[0], new_minors[1], old_secret)
7237 assert len(disk.logical_id) == len(new_id)
7241 mods.append((idx, new_id, changes))
7243 # now that we have passed all asserts above, we can apply the mods
7244 # in a single run (to avoid partial changes)
7245 for idx, new_id, changes in mods:
7246 disk = instance.disks[idx]
7247 if new_id is not None:
7248 assert disk.dev_type == constants.LD_DRBD8
7249 disk.logical_id = new_id
7251 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7252 mode=changes.get(constants.IDISK_MODE, None))
7254 # change primary node, if needed
7256 instance.primary_node = self.op.nodes[0]
7257 self.LogWarning("Changing the instance's nodes, you will have to"
7258 " remove any disks left on the older nodes manually")
7261 self.cfg.Update(instance, feedback_fn)
7263 _CreateDisks(self, instance, to_skip=to_skip)
7266 class LUInstanceRename(LogicalUnit):
7267 """Rename an instance.
7270 HPATH = "instance-rename"
7271 HTYPE = constants.HTYPE_INSTANCE
7273 def CheckArguments(self):
7277 if self.op.ip_check and not self.op.name_check:
7278 # TODO: make the ip check more flexible and not depend on the name check
7279 raise errors.OpPrereqError("IP address check requires a name check",
7282 def BuildHooksEnv(self):
7285 This runs on master, primary and secondary nodes of the instance.
7288 env = _BuildInstanceHookEnvByObject(self, self.instance)
7289 env["INSTANCE_NEW_NAME"] = self.op.new_name
7292 def BuildHooksNodes(self):
7293 """Build hooks nodes.
7296 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7299 def CheckPrereq(self):
7300 """Check prerequisites.
7302 This checks that the instance is in the cluster and is not running.
7305 self.op.instance_name = _ExpandInstanceName(self.cfg,
7306 self.op.instance_name)
7307 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7308 assert instance is not None
7309 _CheckNodeOnline(self, instance.primary_node)
7310 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7311 msg="cannot rename")
7312 self.instance = instance
7314 new_name = self.op.new_name
7315 if self.op.name_check:
7316 hostname = _CheckHostnameSane(self, new_name)
7317 new_name = self.op.new_name = hostname.name
7318 if (self.op.ip_check and
7319 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7320 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7321 (hostname.ip, new_name),
7322 errors.ECODE_NOTUNIQUE)
7324 instance_list = self.cfg.GetInstanceList()
7325 if new_name in instance_list and new_name != instance.name:
7326 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7327 new_name, errors.ECODE_EXISTS)
7329 def Exec(self, feedback_fn):
7330 """Rename the instance.
7333 inst = self.instance
7334 old_name = inst.name
7336 rename_file_storage = False
7337 if (inst.disk_template in constants.DTS_FILEBASED and
7338 self.op.new_name != inst.name):
7339 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7340 rename_file_storage = True
7342 self.cfg.RenameInstance(inst.name, self.op.new_name)
7343 # Change the instance lock. This is definitely safe while we hold the BGL.
7344 # Otherwise the new lock would have to be added in acquired mode.
7346 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7347 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7349 # re-read the instance from the configuration after rename
7350 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7352 if rename_file_storage:
7353 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7354 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7355 old_file_storage_dir,
7356 new_file_storage_dir)
7357 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7358 " (but the instance has been renamed in Ganeti)" %
7359 (inst.primary_node, old_file_storage_dir,
7360 new_file_storage_dir))
7362 _StartInstanceDisks(self, inst, None)
7364 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7365 old_name, self.op.debug_level)
7366 msg = result.fail_msg
7368 msg = ("Could not run OS rename script for instance %s on node %s"
7369 " (but the instance has been renamed in Ganeti): %s" %
7370 (inst.name, inst.primary_node, msg))
7371 self.proc.LogWarning(msg)
7373 _ShutdownInstanceDisks(self, inst)
7378 class LUInstanceRemove(LogicalUnit):
7379 """Remove an instance.
7382 HPATH = "instance-remove"
7383 HTYPE = constants.HTYPE_INSTANCE
7386 def ExpandNames(self):
7387 self._ExpandAndLockInstance()
7388 self.needed_locks[locking.LEVEL_NODE] = []
7389 self.needed_locks[locking.LEVEL_NODE_RES] = []
7390 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7392 def DeclareLocks(self, level):
7393 if level == locking.LEVEL_NODE:
7394 self._LockInstancesNodes()
7395 elif level == locking.LEVEL_NODE_RES:
7397 self.needed_locks[locking.LEVEL_NODE_RES] = \
7398 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7400 def BuildHooksEnv(self):
7403 This runs on master, primary and secondary nodes of the instance.
7406 env = _BuildInstanceHookEnvByObject(self, self.instance)
7407 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7410 def BuildHooksNodes(self):
7411 """Build hooks nodes.
7414 nl = [self.cfg.GetMasterNode()]
7415 nl_post = list(self.instance.all_nodes) + nl
7416 return (nl, nl_post)
7418 def CheckPrereq(self):
7419 """Check prerequisites.
7421 This checks that the instance is in the cluster.
7424 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7425 assert self.instance is not None, \
7426 "Cannot retrieve locked instance %s" % self.op.instance_name
7428 def Exec(self, feedback_fn):
7429 """Remove the instance.
7432 instance = self.instance
7433 logging.info("Shutting down instance %s on node %s",
7434 instance.name, instance.primary_node)
7436 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7437 self.op.shutdown_timeout)
7438 msg = result.fail_msg
7440 if self.op.ignore_failures:
7441 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7443 raise errors.OpExecError("Could not shutdown instance %s on"
7445 (instance.name, instance.primary_node, msg))
7447 assert (self.owned_locks(locking.LEVEL_NODE) ==
7448 self.owned_locks(locking.LEVEL_NODE_RES))
7449 assert not (set(instance.all_nodes) -
7450 self.owned_locks(locking.LEVEL_NODE)), \
7451 "Not owning correct locks"
7453 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7456 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7457 """Utility function to remove an instance.
7460 logging.info("Removing block devices for instance %s", instance.name)
7462 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7463 if not ignore_failures:
7464 raise errors.OpExecError("Can't remove instance's disks")
7465 feedback_fn("Warning: can't remove instance's disks")
7467 logging.info("Removing instance %s out of cluster config", instance.name)
7469 lu.cfg.RemoveInstance(instance.name)
7471 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7472 "Instance lock removal conflict"
7474 # Remove lock for the instance
7475 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7478 class LUInstanceQuery(NoHooksLU):
7479 """Logical unit for querying instances.
7482 # pylint: disable=W0142
7485 def CheckArguments(self):
7486 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7487 self.op.output_fields, self.op.use_locking)
7489 def ExpandNames(self):
7490 self.iq.ExpandNames(self)
7492 def DeclareLocks(self, level):
7493 self.iq.DeclareLocks(self, level)
7495 def Exec(self, feedback_fn):
7496 return self.iq.OldStyleQuery(self)
7499 class LUInstanceFailover(LogicalUnit):
7500 """Failover an instance.
7503 HPATH = "instance-failover"
7504 HTYPE = constants.HTYPE_INSTANCE
7507 def CheckArguments(self):
7508 """Check the arguments.
7511 self.iallocator = getattr(self.op, "iallocator", None)
7512 self.target_node = getattr(self.op, "target_node", None)
7514 def ExpandNames(self):
7515 self._ExpandAndLockInstance()
7517 if self.op.target_node is not None:
7518 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7520 self.needed_locks[locking.LEVEL_NODE] = []
7521 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7523 self.needed_locks[locking.LEVEL_NODE_RES] = []
7524 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7526 ignore_consistency = self.op.ignore_consistency
7527 shutdown_timeout = self.op.shutdown_timeout
7528 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7531 ignore_consistency=ignore_consistency,
7532 shutdown_timeout=shutdown_timeout,
7533 ignore_ipolicy=self.op.ignore_ipolicy)
7534 self.tasklets = [self._migrater]
7536 def DeclareLocks(self, level):
7537 if level == locking.LEVEL_NODE:
7538 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7539 if instance.disk_template in constants.DTS_EXT_MIRROR:
7540 if self.op.target_node is None:
7541 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7543 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7544 self.op.target_node]
7545 del self.recalculate_locks[locking.LEVEL_NODE]
7547 self._LockInstancesNodes()
7548 elif level == locking.LEVEL_NODE_RES:
7550 self.needed_locks[locking.LEVEL_NODE_RES] = \
7551 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7553 def BuildHooksEnv(self):
7556 This runs on master, primary and secondary nodes of the instance.
7559 instance = self._migrater.instance
7560 source_node = instance.primary_node
7561 target_node = self.op.target_node
7563 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7564 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7565 "OLD_PRIMARY": source_node,
7566 "NEW_PRIMARY": target_node,
7569 if instance.disk_template in constants.DTS_INT_MIRROR:
7570 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7571 env["NEW_SECONDARY"] = source_node
7573 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7575 env.update(_BuildInstanceHookEnvByObject(self, instance))
7579 def BuildHooksNodes(self):
7580 """Build hooks nodes.
7583 instance = self._migrater.instance
7584 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7585 return (nl, nl + [instance.primary_node])
7588 class LUInstanceMigrate(LogicalUnit):
7589 """Migrate an instance.
7591 This is migration without shutting down, compared to the failover,
7592 which is done with shutdown.
7595 HPATH = "instance-migrate"
7596 HTYPE = constants.HTYPE_INSTANCE
7599 def ExpandNames(self):
7600 self._ExpandAndLockInstance()
7602 if self.op.target_node is not None:
7603 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7605 self.needed_locks[locking.LEVEL_NODE] = []
7606 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7608 self.needed_locks[locking.LEVEL_NODE] = []
7609 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7612 TLMigrateInstance(self, self.op.instance_name,
7613 cleanup=self.op.cleanup,
7615 fallback=self.op.allow_failover,
7616 allow_runtime_changes=self.op.allow_runtime_changes,
7617 ignore_ipolicy=self.op.ignore_ipolicy)
7618 self.tasklets = [self._migrater]
7620 def DeclareLocks(self, level):
7621 if level == locking.LEVEL_NODE:
7622 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7623 if instance.disk_template in constants.DTS_EXT_MIRROR:
7624 if self.op.target_node is None:
7625 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7627 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7628 self.op.target_node]
7629 del self.recalculate_locks[locking.LEVEL_NODE]
7631 self._LockInstancesNodes()
7632 elif level == locking.LEVEL_NODE_RES:
7634 self.needed_locks[locking.LEVEL_NODE_RES] = \
7635 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7637 def BuildHooksEnv(self):
7640 This runs on master, primary and secondary nodes of the instance.
7643 instance = self._migrater.instance
7644 source_node = instance.primary_node
7645 target_node = self.op.target_node
7646 env = _BuildInstanceHookEnvByObject(self, instance)
7648 "MIGRATE_LIVE": self._migrater.live,
7649 "MIGRATE_CLEANUP": self.op.cleanup,
7650 "OLD_PRIMARY": source_node,
7651 "NEW_PRIMARY": target_node,
7652 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7655 if instance.disk_template in constants.DTS_INT_MIRROR:
7656 env["OLD_SECONDARY"] = target_node
7657 env["NEW_SECONDARY"] = source_node
7659 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7663 def BuildHooksNodes(self):
7664 """Build hooks nodes.
7667 instance = self._migrater.instance
7668 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7669 return (nl, nl + [instance.primary_node])
7672 class LUInstanceMove(LogicalUnit):
7673 """Move an instance by data-copying.
7676 HPATH = "instance-move"
7677 HTYPE = constants.HTYPE_INSTANCE
7680 def ExpandNames(self):
7681 self._ExpandAndLockInstance()
7682 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7683 self.op.target_node = target_node
7684 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7685 self.needed_locks[locking.LEVEL_NODE_RES] = []
7686 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7688 def DeclareLocks(self, level):
7689 if level == locking.LEVEL_NODE:
7690 self._LockInstancesNodes(primary_only=True)
7691 elif level == locking.LEVEL_NODE_RES:
7693 self.needed_locks[locking.LEVEL_NODE_RES] = \
7694 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7696 def BuildHooksEnv(self):
7699 This runs on master, primary and secondary nodes of the instance.
7703 "TARGET_NODE": self.op.target_node,
7704 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7706 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7709 def BuildHooksNodes(self):
7710 """Build hooks nodes.
7714 self.cfg.GetMasterNode(),
7715 self.instance.primary_node,
7716 self.op.target_node,
7720 def CheckPrereq(self):
7721 """Check prerequisites.
7723 This checks that the instance is in the cluster.
7726 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7727 assert self.instance is not None, \
7728 "Cannot retrieve locked instance %s" % self.op.instance_name
7730 node = self.cfg.GetNodeInfo(self.op.target_node)
7731 assert node is not None, \
7732 "Cannot retrieve locked node %s" % self.op.target_node
7734 self.target_node = target_node = node.name
7736 if target_node == instance.primary_node:
7737 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7738 (instance.name, target_node),
7741 bep = self.cfg.GetClusterInfo().FillBE(instance)
7743 for idx, dsk in enumerate(instance.disks):
7744 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7745 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7746 " cannot copy" % idx, errors.ECODE_STATE)
7748 _CheckNodeOnline(self, target_node)
7749 _CheckNodeNotDrained(self, target_node)
7750 _CheckNodeVmCapable(self, target_node)
7751 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7752 self.cfg.GetNodeGroup(node.group))
7753 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7754 ignore=self.op.ignore_ipolicy)
7756 if instance.admin_state == constants.ADMINST_UP:
7757 # check memory requirements on the secondary node
7758 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7759 instance.name, bep[constants.BE_MAXMEM],
7760 instance.hypervisor)
7762 self.LogInfo("Not checking memory on the secondary node as"
7763 " instance will not be started")
7765 # check bridge existance
7766 _CheckInstanceBridgesExist(self, instance, node=target_node)
7768 def Exec(self, feedback_fn):
7769 """Move an instance.
7771 The move is done by shutting it down on its present node, copying
7772 the data over (slow) and starting it on the new node.
7775 instance = self.instance
7777 source_node = instance.primary_node
7778 target_node = self.target_node
7780 self.LogInfo("Shutting down instance %s on source node %s",
7781 instance.name, source_node)
7783 assert (self.owned_locks(locking.LEVEL_NODE) ==
7784 self.owned_locks(locking.LEVEL_NODE_RES))
7786 result = self.rpc.call_instance_shutdown(source_node, instance,
7787 self.op.shutdown_timeout)
7788 msg = result.fail_msg
7790 if self.op.ignore_consistency:
7791 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7792 " Proceeding anyway. Please make sure node"
7793 " %s is down. Error details: %s",
7794 instance.name, source_node, source_node, msg)
7796 raise errors.OpExecError("Could not shutdown instance %s on"
7798 (instance.name, source_node, msg))
7800 # create the target disks
7802 _CreateDisks(self, instance, target_node=target_node)
7803 except errors.OpExecError:
7804 self.LogWarning("Device creation failed, reverting...")
7806 _RemoveDisks(self, instance, target_node=target_node)
7808 self.cfg.ReleaseDRBDMinors(instance.name)
7811 cluster_name = self.cfg.GetClusterInfo().cluster_name
7814 # activate, get path, copy the data over
7815 for idx, disk in enumerate(instance.disks):
7816 self.LogInfo("Copying data for disk %d", idx)
7817 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7818 instance.name, True, idx)
7820 self.LogWarning("Can't assemble newly created disk %d: %s",
7821 idx, result.fail_msg)
7822 errs.append(result.fail_msg)
7824 dev_path = result.payload
7825 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7826 target_node, dev_path,
7829 self.LogWarning("Can't copy data over for disk %d: %s",
7830 idx, result.fail_msg)
7831 errs.append(result.fail_msg)
7835 self.LogWarning("Some disks failed to copy, aborting")
7837 _RemoveDisks(self, instance, target_node=target_node)
7839 self.cfg.ReleaseDRBDMinors(instance.name)
7840 raise errors.OpExecError("Errors during disk copy: %s" %
7843 instance.primary_node = target_node
7844 self.cfg.Update(instance, feedback_fn)
7846 self.LogInfo("Removing the disks on the original node")
7847 _RemoveDisks(self, instance, target_node=source_node)
7849 # Only start the instance if it's marked as up
7850 if instance.admin_state == constants.ADMINST_UP:
7851 self.LogInfo("Starting instance %s on node %s",
7852 instance.name, target_node)
7854 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7855 ignore_secondaries=True)
7857 _ShutdownInstanceDisks(self, instance)
7858 raise errors.OpExecError("Can't activate the instance's disks")
7860 result = self.rpc.call_instance_start(target_node,
7861 (instance, None, None), False)
7862 msg = result.fail_msg
7864 _ShutdownInstanceDisks(self, instance)
7865 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7866 (instance.name, target_node, msg))
7869 class LUNodeMigrate(LogicalUnit):
7870 """Migrate all instances from a node.
7873 HPATH = "node-migrate"
7874 HTYPE = constants.HTYPE_NODE
7877 def CheckArguments(self):
7880 def ExpandNames(self):
7881 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7883 self.share_locks = _ShareAll()
7884 self.needed_locks = {
7885 locking.LEVEL_NODE: [self.op.node_name],
7888 def BuildHooksEnv(self):
7891 This runs on the master, the primary and all the secondaries.
7895 "NODE_NAME": self.op.node_name,
7896 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7899 def BuildHooksNodes(self):
7900 """Build hooks nodes.
7903 nl = [self.cfg.GetMasterNode()]
7906 def CheckPrereq(self):
7909 def Exec(self, feedback_fn):
7910 # Prepare jobs for migration instances
7911 allow_runtime_changes = self.op.allow_runtime_changes
7913 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7916 iallocator=self.op.iallocator,
7917 target_node=self.op.target_node,
7918 allow_runtime_changes=allow_runtime_changes,
7919 ignore_ipolicy=self.op.ignore_ipolicy)]
7920 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7923 # TODO: Run iallocator in this opcode and pass correct placement options to
7924 # OpInstanceMigrate. Since other jobs can modify the cluster between
7925 # running the iallocator and the actual migration, a good consistency model
7926 # will have to be found.
7928 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7929 frozenset([self.op.node_name]))
7931 return ResultWithJobs(jobs)
7934 class TLMigrateInstance(Tasklet):
7935 """Tasklet class for instance migration.
7938 @ivar live: whether the migration will be done live or non-live;
7939 this variable is initalized only after CheckPrereq has run
7940 @type cleanup: boolean
7941 @ivar cleanup: Wheater we cleanup from a failed migration
7942 @type iallocator: string
7943 @ivar iallocator: The iallocator used to determine target_node
7944 @type target_node: string
7945 @ivar target_node: If given, the target_node to reallocate the instance to
7946 @type failover: boolean
7947 @ivar failover: Whether operation results in failover or migration
7948 @type fallback: boolean
7949 @ivar fallback: Whether fallback to failover is allowed if migration not
7951 @type ignore_consistency: boolean
7952 @ivar ignore_consistency: Wheter we should ignore consistency between source
7954 @type shutdown_timeout: int
7955 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7956 @type ignore_ipolicy: bool
7957 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7962 _MIGRATION_POLL_INTERVAL = 1 # seconds
7963 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7965 def __init__(self, lu, instance_name, cleanup=False,
7966 failover=False, fallback=False,
7967 ignore_consistency=False,
7968 allow_runtime_changes=True,
7969 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7970 ignore_ipolicy=False):
7971 """Initializes this class.
7974 Tasklet.__init__(self, lu)
7977 self.instance_name = instance_name
7978 self.cleanup = cleanup
7979 self.live = False # will be overridden later
7980 self.failover = failover
7981 self.fallback = fallback
7982 self.ignore_consistency = ignore_consistency
7983 self.shutdown_timeout = shutdown_timeout
7984 self.ignore_ipolicy = ignore_ipolicy
7985 self.allow_runtime_changes = allow_runtime_changes
7987 def CheckPrereq(self):
7988 """Check prerequisites.
7990 This checks that the instance is in the cluster.
7993 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7994 instance = self.cfg.GetInstanceInfo(instance_name)
7995 assert instance is not None
7996 self.instance = instance
7997 cluster = self.cfg.GetClusterInfo()
7999 if (not self.cleanup and
8000 not instance.admin_state == constants.ADMINST_UP and
8001 not self.failover and self.fallback):
8002 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8003 " switching to failover")
8004 self.failover = True
8006 if instance.disk_template not in constants.DTS_MIRRORED:
8011 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8012 " %s" % (instance.disk_template, text),
8015 if instance.disk_template in constants.DTS_EXT_MIRROR:
8016 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8018 if self.lu.op.iallocator:
8019 self._RunAllocator()
8021 # We set set self.target_node as it is required by
8023 self.target_node = self.lu.op.target_node
8025 # Check that the target node is correct in terms of instance policy
8026 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8027 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8028 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8029 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8030 ignore=self.ignore_ipolicy)
8032 # self.target_node is already populated, either directly or by the
8034 target_node = self.target_node
8035 if self.target_node == instance.primary_node:
8036 raise errors.OpPrereqError("Cannot migrate instance %s"
8037 " to its primary (%s)" %
8038 (instance.name, instance.primary_node))
8040 if len(self.lu.tasklets) == 1:
8041 # It is safe to release locks only when we're the only tasklet
8043 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8044 keep=[instance.primary_node, self.target_node])
8047 secondary_nodes = instance.secondary_nodes
8048 if not secondary_nodes:
8049 raise errors.ConfigurationError("No secondary node but using"
8050 " %s disk template" %
8051 instance.disk_template)
8052 target_node = secondary_nodes[0]
8053 if self.lu.op.iallocator or (self.lu.op.target_node and
8054 self.lu.op.target_node != target_node):
8056 text = "failed over"
8059 raise errors.OpPrereqError("Instances with disk template %s cannot"
8060 " be %s to arbitrary nodes"
8061 " (neither an iallocator nor a target"
8062 " node can be passed)" %
8063 (instance.disk_template, text),
8065 nodeinfo = self.cfg.GetNodeInfo(target_node)
8066 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8067 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8068 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8069 ignore=self.ignore_ipolicy)
8071 i_be = cluster.FillBE(instance)
8073 # check memory requirements on the secondary node
8074 if (not self.cleanup and
8075 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8076 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8077 "migrating instance %s" %
8079 i_be[constants.BE_MINMEM],
8080 instance.hypervisor)
8082 self.lu.LogInfo("Not checking memory on the secondary node as"
8083 " instance will not be started")
8085 # check if failover must be forced instead of migration
8086 if (not self.cleanup and not self.failover and
8087 i_be[constants.BE_ALWAYS_FAILOVER]):
8088 self.lu.LogInfo("Instance configured to always failover; fallback"
8090 self.failover = True
8092 # check bridge existance
8093 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8095 if not self.cleanup:
8096 _CheckNodeNotDrained(self.lu, target_node)
8097 if not self.failover:
8098 result = self.rpc.call_instance_migratable(instance.primary_node,
8100 if result.fail_msg and self.fallback:
8101 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8103 self.failover = True
8105 result.Raise("Can't migrate, please use failover",
8106 prereq=True, ecode=errors.ECODE_STATE)
8108 assert not (self.failover and self.cleanup)
8110 if not self.failover:
8111 if self.lu.op.live is not None and self.lu.op.mode is not None:
8112 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8113 " parameters are accepted",
8115 if self.lu.op.live is not None:
8117 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8119 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8120 # reset the 'live' parameter to None so that repeated
8121 # invocations of CheckPrereq do not raise an exception
8122 self.lu.op.live = None
8123 elif self.lu.op.mode is None:
8124 # read the default value from the hypervisor
8125 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8126 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8128 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8130 # Failover is never live
8133 if not (self.failover or self.cleanup):
8134 remote_info = self.rpc.call_instance_info(instance.primary_node,
8136 instance.hypervisor)
8137 remote_info.Raise("Error checking instance on node %s" %
8138 instance.primary_node)
8139 instance_running = bool(remote_info.payload)
8140 if instance_running:
8141 self.current_mem = int(remote_info.payload["memory"])
8143 def _RunAllocator(self):
8144 """Run the allocator based on input opcode.
8147 # FIXME: add a self.ignore_ipolicy option
8148 ial = IAllocator(self.cfg, self.rpc,
8149 mode=constants.IALLOCATOR_MODE_RELOC,
8150 name=self.instance_name,
8151 relocate_from=[self.instance.primary_node],
8154 ial.Run(self.lu.op.iallocator)
8157 raise errors.OpPrereqError("Can't compute nodes using"
8158 " iallocator '%s': %s" %
8159 (self.lu.op.iallocator, ial.info),
8161 if len(ial.result) != ial.required_nodes:
8162 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8163 " of nodes (%s), required %s" %
8164 (self.lu.op.iallocator, len(ial.result),
8165 ial.required_nodes), errors.ECODE_FAULT)
8166 self.target_node = ial.result[0]
8167 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8168 self.instance_name, self.lu.op.iallocator,
8169 utils.CommaJoin(ial.result))
8171 def _WaitUntilSync(self):
8172 """Poll with custom rpc for disk sync.
8174 This uses our own step-based rpc call.
8177 self.feedback_fn("* wait until resync is done")
8181 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8183 (self.instance.disks,
8186 for node, nres in result.items():
8187 nres.Raise("Cannot resync disks on node %s" % node)
8188 node_done, node_percent = nres.payload
8189 all_done = all_done and node_done
8190 if node_percent is not None:
8191 min_percent = min(min_percent, node_percent)
8193 if min_percent < 100:
8194 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8197 def _EnsureSecondary(self, node):
8198 """Demote a node to secondary.
8201 self.feedback_fn("* switching node %s to secondary mode" % node)
8203 for dev in self.instance.disks:
8204 self.cfg.SetDiskID(dev, node)
8206 result = self.rpc.call_blockdev_close(node, self.instance.name,
8207 self.instance.disks)
8208 result.Raise("Cannot change disk to secondary on node %s" % node)
8210 def _GoStandalone(self):
8211 """Disconnect from the network.
8214 self.feedback_fn("* changing into standalone mode")
8215 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8216 self.instance.disks)
8217 for node, nres in result.items():
8218 nres.Raise("Cannot disconnect disks node %s" % node)
8220 def _GoReconnect(self, multimaster):
8221 """Reconnect to the network.
8227 msg = "single-master"
8228 self.feedback_fn("* changing disks into %s mode" % msg)
8229 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8230 (self.instance.disks, self.instance),
8231 self.instance.name, multimaster)
8232 for node, nres in result.items():
8233 nres.Raise("Cannot change disks config on node %s" % node)
8235 def _ExecCleanup(self):
8236 """Try to cleanup after a failed migration.
8238 The cleanup is done by:
8239 - check that the instance is running only on one node
8240 (and update the config if needed)
8241 - change disks on its secondary node to secondary
8242 - wait until disks are fully synchronized
8243 - disconnect from the network
8244 - change disks into single-master mode
8245 - wait again until disks are fully synchronized
8248 instance = self.instance
8249 target_node = self.target_node
8250 source_node = self.source_node
8252 # check running on only one node
8253 self.feedback_fn("* checking where the instance actually runs"
8254 " (if this hangs, the hypervisor might be in"
8256 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8257 for node, result in ins_l.items():
8258 result.Raise("Can't contact node %s" % node)
8260 runningon_source = instance.name in ins_l[source_node].payload
8261 runningon_target = instance.name in ins_l[target_node].payload
8263 if runningon_source and runningon_target:
8264 raise errors.OpExecError("Instance seems to be running on two nodes,"
8265 " or the hypervisor is confused; you will have"
8266 " to ensure manually that it runs only on one"
8267 " and restart this operation")
8269 if not (runningon_source or runningon_target):
8270 raise errors.OpExecError("Instance does not seem to be running at all;"
8271 " in this case it's safer to repair by"
8272 " running 'gnt-instance stop' to ensure disk"
8273 " shutdown, and then restarting it")
8275 if runningon_target:
8276 # the migration has actually succeeded, we need to update the config
8277 self.feedback_fn("* instance running on secondary node (%s),"
8278 " updating config" % target_node)
8279 instance.primary_node = target_node
8280 self.cfg.Update(instance, self.feedback_fn)
8281 demoted_node = source_node
8283 self.feedback_fn("* instance confirmed to be running on its"
8284 " primary node (%s)" % source_node)
8285 demoted_node = target_node
8287 if instance.disk_template in constants.DTS_INT_MIRROR:
8288 self._EnsureSecondary(demoted_node)
8290 self._WaitUntilSync()
8291 except errors.OpExecError:
8292 # we ignore here errors, since if the device is standalone, it
8293 # won't be able to sync
8295 self._GoStandalone()
8296 self._GoReconnect(False)
8297 self._WaitUntilSync()
8299 self.feedback_fn("* done")
8301 def _RevertDiskStatus(self):
8302 """Try to revert the disk status after a failed migration.
8305 target_node = self.target_node
8306 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8310 self._EnsureSecondary(target_node)
8311 self._GoStandalone()
8312 self._GoReconnect(False)
8313 self._WaitUntilSync()
8314 except errors.OpExecError, err:
8315 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8316 " please try to recover the instance manually;"
8317 " error '%s'" % str(err))
8319 def _AbortMigration(self):
8320 """Call the hypervisor code to abort a started migration.
8323 instance = self.instance
8324 target_node = self.target_node
8325 source_node = self.source_node
8326 migration_info = self.migration_info
8328 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8332 abort_msg = abort_result.fail_msg
8334 logging.error("Aborting migration failed on target node %s: %s",
8335 target_node, abort_msg)
8336 # Don't raise an exception here, as we stil have to try to revert the
8337 # disk status, even if this step failed.
8339 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8340 instance, False, self.live)
8341 abort_msg = abort_result.fail_msg
8343 logging.error("Aborting migration failed on source node %s: %s",
8344 source_node, abort_msg)
8346 def _ExecMigration(self):
8347 """Migrate an instance.
8349 The migrate is done by:
8350 - change the disks into dual-master mode
8351 - wait until disks are fully synchronized again
8352 - migrate the instance
8353 - change disks on the new secondary node (the old primary) to secondary
8354 - wait until disks are fully synchronized
8355 - change disks into single-master mode
8358 instance = self.instance
8359 target_node = self.target_node
8360 source_node = self.source_node
8362 # Check for hypervisor version mismatch and warn the user.
8363 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8364 None, [self.instance.hypervisor])
8365 for ninfo in nodeinfo.values():
8366 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8368 (_, _, (src_info, )) = nodeinfo[source_node].payload
8369 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8371 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8372 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8373 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8374 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8375 if src_version != dst_version:
8376 self.feedback_fn("* warning: hypervisor version mismatch between"
8377 " source (%s) and target (%s) node" %
8378 (src_version, dst_version))
8380 self.feedback_fn("* checking disk consistency between source and target")
8381 for (idx, dev) in enumerate(instance.disks):
8382 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8383 raise errors.OpExecError("Disk %s is degraded or not fully"
8384 " synchronized on target node,"
8385 " aborting migration" % idx)
8387 if self.current_mem > self.tgt_free_mem:
8388 if not self.allow_runtime_changes:
8389 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8390 " free memory to fit instance %s on target"
8391 " node %s (have %dMB, need %dMB)" %
8392 (instance.name, target_node,
8393 self.tgt_free_mem, self.current_mem))
8394 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8395 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8398 rpcres.Raise("Cannot modify instance runtime memory")
8400 # First get the migration information from the remote node
8401 result = self.rpc.call_migration_info(source_node, instance)
8402 msg = result.fail_msg
8404 log_err = ("Failed fetching source migration information from %s: %s" %
8406 logging.error(log_err)
8407 raise errors.OpExecError(log_err)
8409 self.migration_info = migration_info = result.payload
8411 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8412 # Then switch the disks to master/master mode
8413 self._EnsureSecondary(target_node)
8414 self._GoStandalone()
8415 self._GoReconnect(True)
8416 self._WaitUntilSync()
8418 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8419 result = self.rpc.call_accept_instance(target_node,
8422 self.nodes_ip[target_node])
8424 msg = result.fail_msg
8426 logging.error("Instance pre-migration failed, trying to revert"
8427 " disk status: %s", msg)
8428 self.feedback_fn("Pre-migration failed, aborting")
8429 self._AbortMigration()
8430 self._RevertDiskStatus()
8431 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8432 (instance.name, msg))
8434 self.feedback_fn("* migrating instance to %s" % target_node)
8435 result = self.rpc.call_instance_migrate(source_node, instance,
8436 self.nodes_ip[target_node],
8438 msg = result.fail_msg
8440 logging.error("Instance migration failed, trying to revert"
8441 " disk status: %s", msg)
8442 self.feedback_fn("Migration failed, aborting")
8443 self._AbortMigration()
8444 self._RevertDiskStatus()
8445 raise errors.OpExecError("Could not migrate instance %s: %s" %
8446 (instance.name, msg))
8448 self.feedback_fn("* starting memory transfer")
8449 last_feedback = time.time()
8451 result = self.rpc.call_instance_get_migration_status(source_node,
8453 msg = result.fail_msg
8454 ms = result.payload # MigrationStatus instance
8455 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8456 logging.error("Instance migration failed, trying to revert"
8457 " disk status: %s", msg)
8458 self.feedback_fn("Migration failed, aborting")
8459 self._AbortMigration()
8460 self._RevertDiskStatus()
8462 msg = "hypervisor returned failure"
8463 raise errors.OpExecError("Could not migrate instance %s: %s" %
8464 (instance.name, msg))
8466 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8467 self.feedback_fn("* memory transfer complete")
8470 if (utils.TimeoutExpired(last_feedback,
8471 self._MIGRATION_FEEDBACK_INTERVAL) and
8472 ms.transferred_ram is not None):
8473 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8474 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8475 last_feedback = time.time()
8477 time.sleep(self._MIGRATION_POLL_INTERVAL)
8479 result = self.rpc.call_instance_finalize_migration_src(source_node,
8483 msg = result.fail_msg
8485 logging.error("Instance migration succeeded, but finalization failed"
8486 " on the source node: %s", msg)
8487 raise errors.OpExecError("Could not finalize instance migration: %s" %
8490 instance.primary_node = target_node
8492 # distribute new instance config to the other nodes
8493 self.cfg.Update(instance, self.feedback_fn)
8495 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8499 msg = result.fail_msg
8501 logging.error("Instance migration succeeded, but finalization failed"
8502 " on the target node: %s", msg)
8503 raise errors.OpExecError("Could not finalize instance migration: %s" %
8506 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8507 self._EnsureSecondary(source_node)
8508 self._WaitUntilSync()
8509 self._GoStandalone()
8510 self._GoReconnect(False)
8511 self._WaitUntilSync()
8513 # If the instance's disk template is `rbd' and there was a successful
8514 # migration, unmap the device from the source node.
8515 if self.instance.disk_template == constants.DT_RBD:
8516 disks = _ExpandCheckDisks(instance, instance.disks)
8517 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8519 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8520 msg = result.fail_msg
8522 logging.error("Migration was successful, but couldn't unmap the"
8523 " block device %s on source node %s: %s",
8524 disk.iv_name, source_node, msg)
8525 logging.error("You need to unmap the device %s manually on %s",
8526 disk.iv_name, source_node)
8528 self.feedback_fn("* done")
8530 def _ExecFailover(self):
8531 """Failover an instance.
8533 The failover is done by shutting it down on its present node and
8534 starting it on the secondary.
8537 instance = self.instance
8538 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8540 source_node = instance.primary_node
8541 target_node = self.target_node
8543 if instance.admin_state == constants.ADMINST_UP:
8544 self.feedback_fn("* checking disk consistency between source and target")
8545 for (idx, dev) in enumerate(instance.disks):
8546 # for drbd, these are drbd over lvm
8547 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8549 if primary_node.offline:
8550 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8552 (primary_node.name, idx, target_node))
8553 elif not self.ignore_consistency:
8554 raise errors.OpExecError("Disk %s is degraded on target node,"
8555 " aborting failover" % idx)
8557 self.feedback_fn("* not checking disk consistency as instance is not"
8560 self.feedback_fn("* shutting down instance on source node")
8561 logging.info("Shutting down instance %s on node %s",
8562 instance.name, source_node)
8564 result = self.rpc.call_instance_shutdown(source_node, instance,
8565 self.shutdown_timeout)
8566 msg = result.fail_msg
8568 if self.ignore_consistency or primary_node.offline:
8569 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8570 " proceeding anyway; please make sure node"
8571 " %s is down; error details: %s",
8572 instance.name, source_node, source_node, msg)
8574 raise errors.OpExecError("Could not shutdown instance %s on"
8576 (instance.name, source_node, msg))
8578 self.feedback_fn("* deactivating the instance's disks on source node")
8579 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8580 raise errors.OpExecError("Can't shut down the instance's disks")
8582 instance.primary_node = target_node
8583 # distribute new instance config to the other nodes
8584 self.cfg.Update(instance, self.feedback_fn)
8586 # Only start the instance if it's marked as up
8587 if instance.admin_state == constants.ADMINST_UP:
8588 self.feedback_fn("* activating the instance's disks on target node %s" %
8590 logging.info("Starting instance %s on node %s",
8591 instance.name, target_node)
8593 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8594 ignore_secondaries=True)
8596 _ShutdownInstanceDisks(self.lu, instance)
8597 raise errors.OpExecError("Can't activate the instance's disks")
8599 self.feedback_fn("* starting the instance on the target node %s" %
8601 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8603 msg = result.fail_msg
8605 _ShutdownInstanceDisks(self.lu, instance)
8606 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8607 (instance.name, target_node, msg))
8609 def Exec(self, feedback_fn):
8610 """Perform the migration.
8613 self.feedback_fn = feedback_fn
8614 self.source_node = self.instance.primary_node
8616 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8617 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8618 self.target_node = self.instance.secondary_nodes[0]
8619 # Otherwise self.target_node has been populated either
8620 # directly, or through an iallocator.
8622 self.all_nodes = [self.source_node, self.target_node]
8623 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8624 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8627 feedback_fn("Failover instance %s" % self.instance.name)
8628 self._ExecFailover()
8630 feedback_fn("Migrating instance %s" % self.instance.name)
8633 return self._ExecCleanup()
8635 return self._ExecMigration()
8638 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8640 """Wrapper around L{_CreateBlockDevInner}.
8642 This method annotates the root device first.
8645 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8646 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8650 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8652 """Create a tree of block devices on a given node.
8654 If this device type has to be created on secondaries, create it and
8657 If not, just recurse to children keeping the same 'force' value.
8659 @attention: The device has to be annotated already.
8661 @param lu: the lu on whose behalf we execute
8662 @param node: the node on which to create the device
8663 @type instance: L{objects.Instance}
8664 @param instance: the instance which owns the device
8665 @type device: L{objects.Disk}
8666 @param device: the device to create
8667 @type force_create: boolean
8668 @param force_create: whether to force creation of this device; this
8669 will be change to True whenever we find a device which has
8670 CreateOnSecondary() attribute
8671 @param info: the extra 'metadata' we should attach to the device
8672 (this will be represented as a LVM tag)
8673 @type force_open: boolean
8674 @param force_open: this parameter will be passes to the
8675 L{backend.BlockdevCreate} function where it specifies
8676 whether we run on primary or not, and it affects both
8677 the child assembly and the device own Open() execution
8680 if device.CreateOnSecondary():
8684 for child in device.children:
8685 _CreateBlockDevInner(lu, node, instance, child, force_create,
8688 if not force_create:
8691 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8694 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8695 """Create a single block device on a given node.
8697 This will not recurse over children of the device, so they must be
8700 @param lu: the lu on whose behalf we execute
8701 @param node: the node on which to create the device
8702 @type instance: L{objects.Instance}
8703 @param instance: the instance which owns the device
8704 @type device: L{objects.Disk}
8705 @param device: the device to create
8706 @param info: the extra 'metadata' we should attach to the device
8707 (this will be represented as a LVM tag)
8708 @type force_open: boolean
8709 @param force_open: this parameter will be passes to the
8710 L{backend.BlockdevCreate} function where it specifies
8711 whether we run on primary or not, and it affects both
8712 the child assembly and the device own Open() execution
8715 lu.cfg.SetDiskID(device, node)
8716 result = lu.rpc.call_blockdev_create(node, device, device.size,
8717 instance.name, force_open, info)
8718 result.Raise("Can't create block device %s on"
8719 " node %s for instance %s" % (device, node, instance.name))
8720 if device.physical_id is None:
8721 device.physical_id = result.payload
8724 def _GenerateUniqueNames(lu, exts):
8725 """Generate a suitable LV name.
8727 This will generate a logical volume name for the given instance.
8732 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8733 results.append("%s%s" % (new_id, val))
8737 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8738 iv_name, p_minor, s_minor):
8739 """Generate a drbd8 device complete with its children.
8742 assert len(vgnames) == len(names) == 2
8743 port = lu.cfg.AllocatePort()
8744 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8746 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8747 logical_id=(vgnames[0], names[0]),
8749 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8750 logical_id=(vgnames[1], names[1]),
8752 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8753 logical_id=(primary, secondary, port,
8756 children=[dev_data, dev_meta],
8757 iv_name=iv_name, params={})
8761 _DISK_TEMPLATE_NAME_PREFIX = {
8762 constants.DT_PLAIN: "",
8763 constants.DT_RBD: ".rbd",
8767 _DISK_TEMPLATE_DEVICE_TYPE = {
8768 constants.DT_PLAIN: constants.LD_LV,
8769 constants.DT_FILE: constants.LD_FILE,
8770 constants.DT_SHARED_FILE: constants.LD_FILE,
8771 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8772 constants.DT_RBD: constants.LD_RBD,
8776 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8777 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8778 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8779 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8780 """Generate the entire disk layout for a given template type.
8783 #TODO: compute space requirements
8785 vgname = lu.cfg.GetVGName()
8786 disk_count = len(disk_info)
8789 if template_name == constants.DT_DISKLESS:
8791 elif template_name == constants.DT_DRBD8:
8792 if len(secondary_nodes) != 1:
8793 raise errors.ProgrammerError("Wrong template configuration")
8794 remote_node = secondary_nodes[0]
8795 minors = lu.cfg.AllocateDRBDMinor(
8796 [primary_node, remote_node] * len(disk_info), instance_name)
8798 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8800 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8803 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8804 for i in range(disk_count)]):
8805 names.append(lv_prefix + "_data")
8806 names.append(lv_prefix + "_meta")
8807 for idx, disk in enumerate(disk_info):
8808 disk_index = idx + base_index
8809 data_vg = disk.get(constants.IDISK_VG, vgname)
8810 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8811 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8812 disk[constants.IDISK_SIZE],
8814 names[idx * 2:idx * 2 + 2],
8815 "disk/%d" % disk_index,
8816 minors[idx * 2], minors[idx * 2 + 1])
8817 disk_dev.mode = disk[constants.IDISK_MODE]
8818 disks.append(disk_dev)
8821 raise errors.ProgrammerError("Wrong template configuration")
8823 if template_name == constants.DT_FILE:
8825 elif template_name == constants.DT_SHARED_FILE:
8826 _req_shr_file_storage()
8828 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8829 if name_prefix is None:
8832 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8833 (name_prefix, base_index + i)
8834 for i in range(disk_count)])
8836 if template_name == constants.DT_PLAIN:
8837 def logical_id_fn(idx, _, disk):
8838 vg = disk.get(constants.IDISK_VG, vgname)
8839 return (vg, names[idx])
8840 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8842 lambda _, disk_index, disk: (file_driver,
8843 "%s/disk%d" % (file_storage_dir,
8845 elif template_name == constants.DT_BLOCK:
8847 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8848 disk[constants.IDISK_ADOPT])
8849 elif template_name == constants.DT_RBD:
8850 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8852 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8854 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8856 for idx, disk in enumerate(disk_info):
8857 disk_index = idx + base_index
8858 size = disk[constants.IDISK_SIZE]
8859 feedback_fn("* disk %s, size %s" %
8860 (disk_index, utils.FormatUnit(size, "h")))
8861 disks.append(objects.Disk(dev_type=dev_type, size=size,
8862 logical_id=logical_id_fn(idx, disk_index, disk),
8863 iv_name="disk/%d" % disk_index,
8864 mode=disk[constants.IDISK_MODE],
8870 def _GetInstanceInfoText(instance):
8871 """Compute that text that should be added to the disk's metadata.
8874 return "originstname+%s" % instance.name
8877 def _CalcEta(time_taken, written, total_size):
8878 """Calculates the ETA based on size written and total size.
8880 @param time_taken: The time taken so far
8881 @param written: amount written so far
8882 @param total_size: The total size of data to be written
8883 @return: The remaining time in seconds
8886 avg_time = time_taken / float(written)
8887 return (total_size - written) * avg_time
8890 def _WipeDisks(lu, instance):
8891 """Wipes instance disks.
8893 @type lu: L{LogicalUnit}
8894 @param lu: the logical unit on whose behalf we execute
8895 @type instance: L{objects.Instance}
8896 @param instance: the instance whose disks we should create
8897 @return: the success of the wipe
8900 node = instance.primary_node
8902 for device in instance.disks:
8903 lu.cfg.SetDiskID(device, node)
8905 logging.info("Pause sync of instance %s disks", instance.name)
8906 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8907 (instance.disks, instance),
8909 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8911 for idx, success in enumerate(result.payload):
8913 logging.warn("pause-sync of instance %s for disks %d failed",
8917 for idx, device in enumerate(instance.disks):
8918 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8919 # MAX_WIPE_CHUNK at max
8920 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8921 constants.MIN_WIPE_CHUNK_PERCENT)
8922 # we _must_ make this an int, otherwise rounding errors will
8924 wipe_chunk_size = int(wipe_chunk_size)
8926 lu.LogInfo("* Wiping disk %d", idx)
8927 logging.info("Wiping disk %d for instance %s, node %s using"
8928 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8933 start_time = time.time()
8935 while offset < size:
8936 wipe_size = min(wipe_chunk_size, size - offset)
8937 logging.debug("Wiping disk %d, offset %s, chunk %s",
8938 idx, offset, wipe_size)
8939 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8941 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8942 (idx, offset, wipe_size))
8945 if now - last_output >= 60:
8946 eta = _CalcEta(now - start_time, offset, size)
8947 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8948 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8951 logging.info("Resume sync of instance %s disks", instance.name)
8953 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8954 (instance.disks, instance),
8958 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8959 " please have a look at the status and troubleshoot"
8960 " the issue: %s", node, result.fail_msg)
8962 for idx, success in enumerate(result.payload):
8964 lu.LogWarning("Resume sync of disk %d failed, please have a"
8965 " look at the status and troubleshoot the issue", idx)
8966 logging.warn("resume-sync of instance %s for disks %d failed",
8970 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8971 """Create all disks for an instance.
8973 This abstracts away some work from AddInstance.
8975 @type lu: L{LogicalUnit}
8976 @param lu: the logical unit on whose behalf we execute
8977 @type instance: L{objects.Instance}
8978 @param instance: the instance whose disks we should create
8980 @param to_skip: list of indices to skip
8981 @type target_node: string
8982 @param target_node: if passed, overrides the target node for creation
8984 @return: the success of the creation
8987 info = _GetInstanceInfoText(instance)
8988 if target_node is None:
8989 pnode = instance.primary_node
8990 all_nodes = instance.all_nodes
8995 if instance.disk_template in constants.DTS_FILEBASED:
8996 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8997 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8999 result.Raise("Failed to create directory '%s' on"
9000 " node %s" % (file_storage_dir, pnode))
9002 # Note: this needs to be kept in sync with adding of disks in
9003 # LUInstanceSetParams
9004 for idx, device in enumerate(instance.disks):
9005 if to_skip and idx in to_skip:
9007 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9009 for node in all_nodes:
9010 f_create = node == pnode
9011 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9014 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9015 """Remove all disks for an instance.
9017 This abstracts away some work from `AddInstance()` and
9018 `RemoveInstance()`. Note that in case some of the devices couldn't
9019 be removed, the removal will continue with the other ones (compare
9020 with `_CreateDisks()`).
9022 @type lu: L{LogicalUnit}
9023 @param lu: the logical unit on whose behalf we execute
9024 @type instance: L{objects.Instance}
9025 @param instance: the instance whose disks we should remove
9026 @type target_node: string
9027 @param target_node: used to override the node on which to remove the disks
9029 @return: the success of the removal
9032 logging.info("Removing block devices for instance %s", instance.name)
9035 ports_to_release = set()
9036 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9037 for (idx, device) in enumerate(anno_disks):
9039 edata = [(target_node, device)]
9041 edata = device.ComputeNodeTree(instance.primary_node)
9042 for node, disk in edata:
9043 lu.cfg.SetDiskID(disk, node)
9044 result = lu.rpc.call_blockdev_remove(node, disk)
9046 lu.LogWarning("Could not remove disk %s on node %s,"
9047 " continuing anyway: %s", idx, node, result.fail_msg)
9048 if not (result.offline and node != instance.primary_node):
9051 # if this is a DRBD disk, return its port to the pool
9052 if device.dev_type in constants.LDS_DRBD:
9053 ports_to_release.add(device.logical_id[2])
9055 if all_result or ignore_failures:
9056 for port in ports_to_release:
9057 lu.cfg.AddTcpUdpPort(port)
9059 if instance.disk_template in constants.DTS_FILEBASED:
9060 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9064 tgt = instance.primary_node
9065 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9067 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9068 file_storage_dir, instance.primary_node, result.fail_msg)
9074 def _ComputeDiskSizePerVG(disk_template, disks):
9075 """Compute disk size requirements in the volume group
9078 def _compute(disks, payload):
9079 """Universal algorithm.
9084 vgs[disk[constants.IDISK_VG]] = \
9085 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9089 # Required free disk space as a function of disk and swap space
9091 constants.DT_DISKLESS: {},
9092 constants.DT_PLAIN: _compute(disks, 0),
9093 # 128 MB are added for drbd metadata for each disk
9094 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9095 constants.DT_FILE: {},
9096 constants.DT_SHARED_FILE: {},
9099 if disk_template not in req_size_dict:
9100 raise errors.ProgrammerError("Disk template '%s' size requirement"
9101 " is unknown" % disk_template)
9103 return req_size_dict[disk_template]
9106 def _ComputeDiskSize(disk_template, disks):
9107 """Compute disk size requirements according to disk template
9110 # Required free disk space as a function of disk and swap space
9112 constants.DT_DISKLESS: None,
9113 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9114 # 128 MB are added for drbd metadata for each disk
9116 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9117 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9118 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9119 constants.DT_BLOCK: 0,
9120 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9123 if disk_template not in req_size_dict:
9124 raise errors.ProgrammerError("Disk template '%s' size requirement"
9125 " is unknown" % disk_template)
9127 return req_size_dict[disk_template]
9130 def _FilterVmNodes(lu, nodenames):
9131 """Filters out non-vm_capable nodes from a list.
9133 @type lu: L{LogicalUnit}
9134 @param lu: the logical unit for which we check
9135 @type nodenames: list
9136 @param nodenames: the list of nodes on which we should check
9138 @return: the list of vm-capable nodes
9141 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9142 return [name for name in nodenames if name not in vm_nodes]
9145 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9146 """Hypervisor parameter validation.
9148 This function abstract the hypervisor parameter validation to be
9149 used in both instance create and instance modify.
9151 @type lu: L{LogicalUnit}
9152 @param lu: the logical unit for which we check
9153 @type nodenames: list
9154 @param nodenames: the list of nodes on which we should check
9155 @type hvname: string
9156 @param hvname: the name of the hypervisor we should use
9157 @type hvparams: dict
9158 @param hvparams: the parameters which we need to check
9159 @raise errors.OpPrereqError: if the parameters are not valid
9162 nodenames = _FilterVmNodes(lu, nodenames)
9164 cluster = lu.cfg.GetClusterInfo()
9165 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9167 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9168 for node in nodenames:
9172 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9175 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9176 """OS parameters validation.
9178 @type lu: L{LogicalUnit}
9179 @param lu: the logical unit for which we check
9180 @type required: boolean
9181 @param required: whether the validation should fail if the OS is not
9183 @type nodenames: list
9184 @param nodenames: the list of nodes on which we should check
9185 @type osname: string
9186 @param osname: the name of the hypervisor we should use
9187 @type osparams: dict
9188 @param osparams: the parameters which we need to check
9189 @raise errors.OpPrereqError: if the parameters are not valid
9192 nodenames = _FilterVmNodes(lu, nodenames)
9193 result = lu.rpc.call_os_validate(nodenames, required, osname,
9194 [constants.OS_VALIDATE_PARAMETERS],
9196 for node, nres in result.items():
9197 # we don't check for offline cases since this should be run only
9198 # against the master node and/or an instance's nodes
9199 nres.Raise("OS Parameters validation failed on node %s" % node)
9200 if not nres.payload:
9201 lu.LogInfo("OS %s not found on node %s, validation skipped",
9205 class LUInstanceCreate(LogicalUnit):
9206 """Create an instance.
9209 HPATH = "instance-add"
9210 HTYPE = constants.HTYPE_INSTANCE
9213 def CheckArguments(self):
9217 # do not require name_check to ease forward/backward compatibility
9219 if self.op.no_install and self.op.start:
9220 self.LogInfo("No-installation mode selected, disabling startup")
9221 self.op.start = False
9222 # validate/normalize the instance name
9223 self.op.instance_name = \
9224 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9226 if self.op.ip_check and not self.op.name_check:
9227 # TODO: make the ip check more flexible and not depend on the name check
9228 raise errors.OpPrereqError("Cannot do IP address check without a name"
9229 " check", errors.ECODE_INVAL)
9231 # check nics' parameter names
9232 for nic in self.op.nics:
9233 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9235 # check disks. parameter names and consistent adopt/no-adopt strategy
9236 has_adopt = has_no_adopt = False
9237 for disk in self.op.disks:
9238 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9239 if constants.IDISK_ADOPT in disk:
9243 if has_adopt and has_no_adopt:
9244 raise errors.OpPrereqError("Either all disks are adopted or none is",
9247 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9248 raise errors.OpPrereqError("Disk adoption is not supported for the"
9249 " '%s' disk template" %
9250 self.op.disk_template,
9252 if self.op.iallocator is not None:
9253 raise errors.OpPrereqError("Disk adoption not allowed with an"
9254 " iallocator script", errors.ECODE_INVAL)
9255 if self.op.mode == constants.INSTANCE_IMPORT:
9256 raise errors.OpPrereqError("Disk adoption not allowed for"
9257 " instance import", errors.ECODE_INVAL)
9259 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9260 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9261 " but no 'adopt' parameter given" %
9262 self.op.disk_template,
9265 self.adopt_disks = has_adopt
9267 # instance name verification
9268 if self.op.name_check:
9269 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9270 self.op.instance_name = self.hostname1.name
9271 # used in CheckPrereq for ip ping check
9272 self.check_ip = self.hostname1.ip
9274 self.check_ip = None
9276 # file storage checks
9277 if (self.op.file_driver and
9278 not self.op.file_driver in constants.FILE_DRIVER):
9279 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9280 self.op.file_driver, errors.ECODE_INVAL)
9282 if self.op.disk_template == constants.DT_FILE:
9283 opcodes.RequireFileStorage()
9284 elif self.op.disk_template == constants.DT_SHARED_FILE:
9285 opcodes.RequireSharedFileStorage()
9287 ### Node/iallocator related checks
9288 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9290 if self.op.pnode is not None:
9291 if self.op.disk_template in constants.DTS_INT_MIRROR:
9292 if self.op.snode is None:
9293 raise errors.OpPrereqError("The networked disk templates need"
9294 " a mirror node", errors.ECODE_INVAL)
9296 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9298 self.op.snode = None
9300 self._cds = _GetClusterDomainSecret()
9302 if self.op.mode == constants.INSTANCE_IMPORT:
9303 # On import force_variant must be True, because if we forced it at
9304 # initial install, our only chance when importing it back is that it
9306 self.op.force_variant = True
9308 if self.op.no_install:
9309 self.LogInfo("No-installation mode has no effect during import")
9311 elif self.op.mode == constants.INSTANCE_CREATE:
9312 if self.op.os_type is None:
9313 raise errors.OpPrereqError("No guest OS specified",
9315 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9316 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9317 " installation" % self.op.os_type,
9319 if self.op.disk_template is None:
9320 raise errors.OpPrereqError("No disk template specified",
9323 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9324 # Check handshake to ensure both clusters have the same domain secret
9325 src_handshake = self.op.source_handshake
9326 if not src_handshake:
9327 raise errors.OpPrereqError("Missing source handshake",
9330 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9333 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9336 # Load and check source CA
9337 self.source_x509_ca_pem = self.op.source_x509_ca
9338 if not self.source_x509_ca_pem:
9339 raise errors.OpPrereqError("Missing source X509 CA",
9343 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9345 except OpenSSL.crypto.Error, err:
9346 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9347 (err, ), errors.ECODE_INVAL)
9349 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9350 if errcode is not None:
9351 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9354 self.source_x509_ca = cert
9356 src_instance_name = self.op.source_instance_name
9357 if not src_instance_name:
9358 raise errors.OpPrereqError("Missing source instance name",
9361 self.source_instance_name = \
9362 netutils.GetHostname(name=src_instance_name).name
9365 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9366 self.op.mode, errors.ECODE_INVAL)
9368 def ExpandNames(self):
9369 """ExpandNames for CreateInstance.
9371 Figure out the right locks for instance creation.
9374 self.needed_locks = {}
9376 instance_name = self.op.instance_name
9377 # this is just a preventive check, but someone might still add this
9378 # instance in the meantime, and creation will fail at lock-add time
9379 if instance_name in self.cfg.GetInstanceList():
9380 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9381 instance_name, errors.ECODE_EXISTS)
9383 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9385 if self.op.iallocator:
9386 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9387 # specifying a group on instance creation and then selecting nodes from
9389 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9390 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9392 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9393 nodelist = [self.op.pnode]
9394 if self.op.snode is not None:
9395 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9396 nodelist.append(self.op.snode)
9397 self.needed_locks[locking.LEVEL_NODE] = nodelist
9398 # Lock resources of instance's primary and secondary nodes (copy to
9399 # prevent accidential modification)
9400 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9402 # in case of import lock the source node too
9403 if self.op.mode == constants.INSTANCE_IMPORT:
9404 src_node = self.op.src_node
9405 src_path = self.op.src_path
9407 if src_path is None:
9408 self.op.src_path = src_path = self.op.instance_name
9410 if src_node is None:
9411 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9412 self.op.src_node = None
9413 if os.path.isabs(src_path):
9414 raise errors.OpPrereqError("Importing an instance from a path"
9415 " requires a source node option",
9418 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9419 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9420 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9421 if not os.path.isabs(src_path):
9422 self.op.src_path = src_path = \
9423 utils.PathJoin(constants.EXPORT_DIR, src_path)
9425 def _RunAllocator(self):
9426 """Run the allocator based on input opcode.
9429 nics = [n.ToDict() for n in self.nics]
9430 ial = IAllocator(self.cfg, self.rpc,
9431 mode=constants.IALLOCATOR_MODE_ALLOC,
9432 name=self.op.instance_name,
9433 disk_template=self.op.disk_template,
9436 vcpus=self.be_full[constants.BE_VCPUS],
9437 memory=self.be_full[constants.BE_MAXMEM],
9438 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9441 hypervisor=self.op.hypervisor,
9444 ial.Run(self.op.iallocator)
9447 raise errors.OpPrereqError("Can't compute nodes using"
9448 " iallocator '%s': %s" %
9449 (self.op.iallocator, ial.info),
9451 if len(ial.result) != ial.required_nodes:
9452 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9453 " of nodes (%s), required %s" %
9454 (self.op.iallocator, len(ial.result),
9455 ial.required_nodes), errors.ECODE_FAULT)
9456 self.op.pnode = ial.result[0]
9457 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9458 self.op.instance_name, self.op.iallocator,
9459 utils.CommaJoin(ial.result))
9460 if ial.required_nodes == 2:
9461 self.op.snode = ial.result[1]
9463 def BuildHooksEnv(self):
9466 This runs on master, primary and secondary nodes of the instance.
9470 "ADD_MODE": self.op.mode,
9472 if self.op.mode == constants.INSTANCE_IMPORT:
9473 env["SRC_NODE"] = self.op.src_node
9474 env["SRC_PATH"] = self.op.src_path
9475 env["SRC_IMAGES"] = self.src_images
9477 env.update(_BuildInstanceHookEnv(
9478 name=self.op.instance_name,
9479 primary_node=self.op.pnode,
9480 secondary_nodes=self.secondaries,
9481 status=self.op.start,
9482 os_type=self.op.os_type,
9483 minmem=self.be_full[constants.BE_MINMEM],
9484 maxmem=self.be_full[constants.BE_MAXMEM],
9485 vcpus=self.be_full[constants.BE_VCPUS],
9486 nics=_NICListToTuple(self, self.nics),
9487 disk_template=self.op.disk_template,
9488 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9489 for d in self.disks],
9492 hypervisor_name=self.op.hypervisor,
9498 def BuildHooksNodes(self):
9499 """Build hooks nodes.
9502 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9505 def _ReadExportInfo(self):
9506 """Reads the export information from disk.
9508 It will override the opcode source node and path with the actual
9509 information, if these two were not specified before.
9511 @return: the export information
9514 assert self.op.mode == constants.INSTANCE_IMPORT
9516 src_node = self.op.src_node
9517 src_path = self.op.src_path
9519 if src_node is None:
9520 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9521 exp_list = self.rpc.call_export_list(locked_nodes)
9523 for node in exp_list:
9524 if exp_list[node].fail_msg:
9526 if src_path in exp_list[node].payload:
9528 self.op.src_node = src_node = node
9529 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9533 raise errors.OpPrereqError("No export found for relative path %s" %
9534 src_path, errors.ECODE_INVAL)
9536 _CheckNodeOnline(self, src_node)
9537 result = self.rpc.call_export_info(src_node, src_path)
9538 result.Raise("No export or invalid export found in dir %s" % src_path)
9540 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9541 if not export_info.has_section(constants.INISECT_EXP):
9542 raise errors.ProgrammerError("Corrupted export config",
9543 errors.ECODE_ENVIRON)
9545 ei_version = export_info.get(constants.INISECT_EXP, "version")
9546 if (int(ei_version) != constants.EXPORT_VERSION):
9547 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9548 (ei_version, constants.EXPORT_VERSION),
9549 errors.ECODE_ENVIRON)
9552 def _ReadExportParams(self, einfo):
9553 """Use export parameters as defaults.
9555 In case the opcode doesn't specify (as in override) some instance
9556 parameters, then try to use them from the export information, if
9560 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9562 if self.op.disk_template is None:
9563 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9564 self.op.disk_template = einfo.get(constants.INISECT_INS,
9566 if self.op.disk_template not in constants.DISK_TEMPLATES:
9567 raise errors.OpPrereqError("Disk template specified in configuration"
9568 " file is not one of the allowed values:"
9569 " %s" % " ".join(constants.DISK_TEMPLATES))
9571 raise errors.OpPrereqError("No disk template specified and the export"
9572 " is missing the disk_template information",
9575 if not self.op.disks:
9577 # TODO: import the disk iv_name too
9578 for idx in range(constants.MAX_DISKS):
9579 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9580 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9581 disks.append({constants.IDISK_SIZE: disk_sz})
9582 self.op.disks = disks
9583 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9584 raise errors.OpPrereqError("No disk info specified and the export"
9585 " is missing the disk information",
9588 if not self.op.nics:
9590 for idx in range(constants.MAX_NICS):
9591 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9593 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9594 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9601 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9602 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9604 if (self.op.hypervisor is None and
9605 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9606 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9608 if einfo.has_section(constants.INISECT_HYP):
9609 # use the export parameters but do not override the ones
9610 # specified by the user
9611 for name, value in einfo.items(constants.INISECT_HYP):
9612 if name not in self.op.hvparams:
9613 self.op.hvparams[name] = value
9615 if einfo.has_section(constants.INISECT_BEP):
9616 # use the parameters, without overriding
9617 for name, value in einfo.items(constants.INISECT_BEP):
9618 if name not in self.op.beparams:
9619 self.op.beparams[name] = value
9620 # Compatibility for the old "memory" be param
9621 if name == constants.BE_MEMORY:
9622 if constants.BE_MAXMEM not in self.op.beparams:
9623 self.op.beparams[constants.BE_MAXMEM] = value
9624 if constants.BE_MINMEM not in self.op.beparams:
9625 self.op.beparams[constants.BE_MINMEM] = value
9627 # try to read the parameters old style, from the main section
9628 for name in constants.BES_PARAMETERS:
9629 if (name not in self.op.beparams and
9630 einfo.has_option(constants.INISECT_INS, name)):
9631 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9633 if einfo.has_section(constants.INISECT_OSP):
9634 # use the parameters, without overriding
9635 for name, value in einfo.items(constants.INISECT_OSP):
9636 if name not in self.op.osparams:
9637 self.op.osparams[name] = value
9639 def _RevertToDefaults(self, cluster):
9640 """Revert the instance parameters to the default values.
9644 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9645 for name in self.op.hvparams.keys():
9646 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9647 del self.op.hvparams[name]
9649 be_defs = cluster.SimpleFillBE({})
9650 for name in self.op.beparams.keys():
9651 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9652 del self.op.beparams[name]
9654 nic_defs = cluster.SimpleFillNIC({})
9655 for nic in self.op.nics:
9656 for name in constants.NICS_PARAMETERS:
9657 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9660 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9661 for name in self.op.osparams.keys():
9662 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9663 del self.op.osparams[name]
9665 def _CalculateFileStorageDir(self):
9666 """Calculate final instance file storage dir.
9669 # file storage dir calculation/check
9670 self.instance_file_storage_dir = None
9671 if self.op.disk_template in constants.DTS_FILEBASED:
9672 # build the full file storage dir path
9675 if self.op.disk_template == constants.DT_SHARED_FILE:
9676 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9678 get_fsd_fn = self.cfg.GetFileStorageDir
9680 cfg_storagedir = get_fsd_fn()
9681 if not cfg_storagedir:
9682 raise errors.OpPrereqError("Cluster file storage dir not defined")
9683 joinargs.append(cfg_storagedir)
9685 if self.op.file_storage_dir is not None:
9686 joinargs.append(self.op.file_storage_dir)
9688 joinargs.append(self.op.instance_name)
9690 # pylint: disable=W0142
9691 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9693 def CheckPrereq(self): # pylint: disable=R0914
9694 """Check prerequisites.
9697 self._CalculateFileStorageDir()
9699 if self.op.mode == constants.INSTANCE_IMPORT:
9700 export_info = self._ReadExportInfo()
9701 self._ReadExportParams(export_info)
9702 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9704 self._old_instance_name = None
9706 if (not self.cfg.GetVGName() and
9707 self.op.disk_template not in constants.DTS_NOT_LVM):
9708 raise errors.OpPrereqError("Cluster does not support lvm-based"
9709 " instances", errors.ECODE_STATE)
9711 if (self.op.hypervisor is None or
9712 self.op.hypervisor == constants.VALUE_AUTO):
9713 self.op.hypervisor = self.cfg.GetHypervisorType()
9715 cluster = self.cfg.GetClusterInfo()
9716 enabled_hvs = cluster.enabled_hypervisors
9717 if self.op.hypervisor not in enabled_hvs:
9718 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9719 " cluster (%s)" % (self.op.hypervisor,
9720 ",".join(enabled_hvs)),
9723 # Check tag validity
9724 for tag in self.op.tags:
9725 objects.TaggableObject.ValidateTag(tag)
9727 # check hypervisor parameter syntax (locally)
9728 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9729 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9731 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9732 hv_type.CheckParameterSyntax(filled_hvp)
9733 self.hv_full = filled_hvp
9734 # check that we don't specify global parameters on an instance
9735 _CheckGlobalHvParams(self.op.hvparams)
9737 # fill and remember the beparams dict
9738 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9739 for param, value in self.op.beparams.iteritems():
9740 if value == constants.VALUE_AUTO:
9741 self.op.beparams[param] = default_beparams[param]
9742 objects.UpgradeBeParams(self.op.beparams)
9743 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9744 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9746 # build os parameters
9747 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9749 # now that hvp/bep are in final format, let's reset to defaults,
9751 if self.op.identify_defaults:
9752 self._RevertToDefaults(cluster)
9756 for idx, nic in enumerate(self.op.nics):
9757 nic_mode_req = nic.get(constants.INIC_MODE, None)
9758 nic_mode = nic_mode_req
9759 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9760 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9762 # in routed mode, for the first nic, the default ip is 'auto'
9763 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9764 default_ip_mode = constants.VALUE_AUTO
9766 default_ip_mode = constants.VALUE_NONE
9768 # ip validity checks
9769 ip = nic.get(constants.INIC_IP, default_ip_mode)
9770 if ip is None or ip.lower() == constants.VALUE_NONE:
9772 elif ip.lower() == constants.VALUE_AUTO:
9773 if not self.op.name_check:
9774 raise errors.OpPrereqError("IP address set to auto but name checks"
9775 " have been skipped",
9777 nic_ip = self.hostname1.ip
9779 if not netutils.IPAddress.IsValid(ip):
9780 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9784 # TODO: check the ip address for uniqueness
9785 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9786 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9789 # MAC address verification
9790 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9791 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9792 mac = utils.NormalizeAndValidateMac(mac)
9795 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9796 except errors.ReservationError:
9797 raise errors.OpPrereqError("MAC address %s already in use"
9798 " in cluster" % mac,
9799 errors.ECODE_NOTUNIQUE)
9801 # Build nic parameters
9802 link = nic.get(constants.INIC_LINK, None)
9803 if link == constants.VALUE_AUTO:
9804 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9807 nicparams[constants.NIC_MODE] = nic_mode
9809 nicparams[constants.NIC_LINK] = link
9811 check_params = cluster.SimpleFillNIC(nicparams)
9812 objects.NIC.CheckParameterSyntax(check_params)
9813 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9815 # disk checks/pre-build
9816 default_vg = self.cfg.GetVGName()
9818 for disk in self.op.disks:
9819 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9820 if mode not in constants.DISK_ACCESS_SET:
9821 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9822 mode, errors.ECODE_INVAL)
9823 size = disk.get(constants.IDISK_SIZE, None)
9825 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9828 except (TypeError, ValueError):
9829 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9832 data_vg = disk.get(constants.IDISK_VG, default_vg)
9834 constants.IDISK_SIZE: size,
9835 constants.IDISK_MODE: mode,
9836 constants.IDISK_VG: data_vg,
9838 if constants.IDISK_METAVG in disk:
9839 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9840 if constants.IDISK_ADOPT in disk:
9841 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9842 self.disks.append(new_disk)
9844 if self.op.mode == constants.INSTANCE_IMPORT:
9846 for idx in range(len(self.disks)):
9847 option = "disk%d_dump" % idx
9848 if export_info.has_option(constants.INISECT_INS, option):
9849 # FIXME: are the old os-es, disk sizes, etc. useful?
9850 export_name = export_info.get(constants.INISECT_INS, option)
9851 image = utils.PathJoin(self.op.src_path, export_name)
9852 disk_images.append(image)
9854 disk_images.append(False)
9856 self.src_images = disk_images
9858 if self.op.instance_name == self._old_instance_name:
9859 for idx, nic in enumerate(self.nics):
9860 if nic.mac == constants.VALUE_AUTO:
9861 nic_mac_ini = "nic%d_mac" % idx
9862 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9864 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9866 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9867 if self.op.ip_check:
9868 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9869 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9870 (self.check_ip, self.op.instance_name),
9871 errors.ECODE_NOTUNIQUE)
9873 #### mac address generation
9874 # By generating here the mac address both the allocator and the hooks get
9875 # the real final mac address rather than the 'auto' or 'generate' value.
9876 # There is a race condition between the generation and the instance object
9877 # creation, which means that we know the mac is valid now, but we're not
9878 # sure it will be when we actually add the instance. If things go bad
9879 # adding the instance will abort because of a duplicate mac, and the
9880 # creation job will fail.
9881 for nic in self.nics:
9882 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9883 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9887 if self.op.iallocator is not None:
9888 self._RunAllocator()
9890 # Release all unneeded node locks
9891 _ReleaseLocks(self, locking.LEVEL_NODE,
9892 keep=filter(None, [self.op.pnode, self.op.snode,
9894 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9895 keep=filter(None, [self.op.pnode, self.op.snode,
9898 #### node related checks
9900 # check primary node
9901 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9902 assert self.pnode is not None, \
9903 "Cannot retrieve locked node %s" % self.op.pnode
9905 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9906 pnode.name, errors.ECODE_STATE)
9908 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9909 pnode.name, errors.ECODE_STATE)
9910 if not pnode.vm_capable:
9911 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9912 " '%s'" % pnode.name, errors.ECODE_STATE)
9914 self.secondaries = []
9916 # mirror node verification
9917 if self.op.disk_template in constants.DTS_INT_MIRROR:
9918 if self.op.snode == pnode.name:
9919 raise errors.OpPrereqError("The secondary node cannot be the"
9920 " primary node", errors.ECODE_INVAL)
9921 _CheckNodeOnline(self, self.op.snode)
9922 _CheckNodeNotDrained(self, self.op.snode)
9923 _CheckNodeVmCapable(self, self.op.snode)
9924 self.secondaries.append(self.op.snode)
9926 snode = self.cfg.GetNodeInfo(self.op.snode)
9927 if pnode.group != snode.group:
9928 self.LogWarning("The primary and secondary nodes are in two"
9929 " different node groups; the disk parameters"
9930 " from the first disk's node group will be"
9933 nodenames = [pnode.name] + self.secondaries
9935 if not self.adopt_disks:
9936 if self.op.disk_template == constants.DT_RBD:
9937 # _CheckRADOSFreeSpace() is just a placeholder.
9938 # Any function that checks prerequisites can be placed here.
9939 # Check if there is enough space on the RADOS cluster.
9940 _CheckRADOSFreeSpace()
9942 # Check lv size requirements, if not adopting
9943 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9944 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9946 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9947 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9948 disk[constants.IDISK_ADOPT])
9949 for disk in self.disks])
9950 if len(all_lvs) != len(self.disks):
9951 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9953 for lv_name in all_lvs:
9955 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9956 # to ReserveLV uses the same syntax
9957 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9958 except errors.ReservationError:
9959 raise errors.OpPrereqError("LV named %s used by another instance" %
9960 lv_name, errors.ECODE_NOTUNIQUE)
9962 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9963 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9965 node_lvs = self.rpc.call_lv_list([pnode.name],
9966 vg_names.payload.keys())[pnode.name]
9967 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9968 node_lvs = node_lvs.payload
9970 delta = all_lvs.difference(node_lvs.keys())
9972 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9973 utils.CommaJoin(delta),
9975 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9977 raise errors.OpPrereqError("Online logical volumes found, cannot"
9978 " adopt: %s" % utils.CommaJoin(online_lvs),
9980 # update the size of disk based on what is found
9981 for dsk in self.disks:
9982 dsk[constants.IDISK_SIZE] = \
9983 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9984 dsk[constants.IDISK_ADOPT])][0]))
9986 elif self.op.disk_template == constants.DT_BLOCK:
9987 # Normalize and de-duplicate device paths
9988 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9989 for disk in self.disks])
9990 if len(all_disks) != len(self.disks):
9991 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9993 baddisks = [d for d in all_disks
9994 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9996 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9997 " cannot be adopted" %
9998 (", ".join(baddisks),
9999 constants.ADOPTABLE_BLOCKDEV_ROOT),
10000 errors.ECODE_INVAL)
10002 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10003 list(all_disks))[pnode.name]
10004 node_disks.Raise("Cannot get block device information from node %s" %
10006 node_disks = node_disks.payload
10007 delta = all_disks.difference(node_disks.keys())
10009 raise errors.OpPrereqError("Missing block device(s): %s" %
10010 utils.CommaJoin(delta),
10011 errors.ECODE_INVAL)
10012 for dsk in self.disks:
10013 dsk[constants.IDISK_SIZE] = \
10014 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10016 # Verify instance specs
10017 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10019 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10020 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10021 constants.ISPEC_DISK_COUNT: len(self.disks),
10022 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10023 for disk in self.disks],
10024 constants.ISPEC_NIC_COUNT: len(self.nics),
10025 constants.ISPEC_SPINDLE_USE: spindle_use,
10028 group_info = self.cfg.GetNodeGroup(pnode.group)
10029 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10030 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10031 if not self.op.ignore_ipolicy and res:
10032 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10033 " policy: %s") % (pnode.group,
10034 utils.CommaJoin(res)),
10035 errors.ECODE_INVAL)
10037 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10039 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10040 # check OS parameters (remotely)
10041 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10043 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10045 # memory check on primary node
10046 #TODO(dynmem): use MINMEM for checking
10048 _CheckNodeFreeMemory(self, self.pnode.name,
10049 "creating instance %s" % self.op.instance_name,
10050 self.be_full[constants.BE_MAXMEM],
10051 self.op.hypervisor)
10053 self.dry_run_result = list(nodenames)
10055 def Exec(self, feedback_fn):
10056 """Create and add the instance to the cluster.
10059 instance = self.op.instance_name
10060 pnode_name = self.pnode.name
10062 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10063 self.owned_locks(locking.LEVEL_NODE)), \
10064 "Node locks differ from node resource locks"
10066 ht_kind = self.op.hypervisor
10067 if ht_kind in constants.HTS_REQ_PORT:
10068 network_port = self.cfg.AllocatePort()
10070 network_port = None
10072 # This is ugly but we got a chicken-egg problem here
10073 # We can only take the group disk parameters, as the instance
10074 # has no disks yet (we are generating them right here).
10075 node = self.cfg.GetNodeInfo(pnode_name)
10076 nodegroup = self.cfg.GetNodeGroup(node.group)
10077 disks = _GenerateDiskTemplate(self,
10078 self.op.disk_template,
10079 instance, pnode_name,
10082 self.instance_file_storage_dir,
10083 self.op.file_driver,
10086 self.cfg.GetGroupDiskParams(nodegroup))
10088 iobj = objects.Instance(name=instance, os=self.op.os_type,
10089 primary_node=pnode_name,
10090 nics=self.nics, disks=disks,
10091 disk_template=self.op.disk_template,
10092 admin_state=constants.ADMINST_DOWN,
10093 network_port=network_port,
10094 beparams=self.op.beparams,
10095 hvparams=self.op.hvparams,
10096 hypervisor=self.op.hypervisor,
10097 osparams=self.op.osparams,
10101 for tag in self.op.tags:
10104 if self.adopt_disks:
10105 if self.op.disk_template == constants.DT_PLAIN:
10106 # rename LVs to the newly-generated names; we need to construct
10107 # 'fake' LV disks with the old data, plus the new unique_id
10108 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10110 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10111 rename_to.append(t_dsk.logical_id)
10112 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10113 self.cfg.SetDiskID(t_dsk, pnode_name)
10114 result = self.rpc.call_blockdev_rename(pnode_name,
10115 zip(tmp_disks, rename_to))
10116 result.Raise("Failed to rename adoped LVs")
10118 feedback_fn("* creating instance disks...")
10120 _CreateDisks(self, iobj)
10121 except errors.OpExecError:
10122 self.LogWarning("Device creation failed, reverting...")
10124 _RemoveDisks(self, iobj)
10126 self.cfg.ReleaseDRBDMinors(instance)
10129 feedback_fn("adding instance %s to cluster config" % instance)
10131 self.cfg.AddInstance(iobj, self.proc.GetECId())
10133 # Declare that we don't want to remove the instance lock anymore, as we've
10134 # added the instance to the config
10135 del self.remove_locks[locking.LEVEL_INSTANCE]
10137 if self.op.mode == constants.INSTANCE_IMPORT:
10138 # Release unused nodes
10139 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10141 # Release all nodes
10142 _ReleaseLocks(self, locking.LEVEL_NODE)
10145 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10146 feedback_fn("* wiping instance disks...")
10148 _WipeDisks(self, iobj)
10149 except errors.OpExecError, err:
10150 logging.exception("Wiping disks failed")
10151 self.LogWarning("Wiping instance disks failed (%s)", err)
10155 # Something is already wrong with the disks, don't do anything else
10157 elif self.op.wait_for_sync:
10158 disk_abort = not _WaitForSync(self, iobj)
10159 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10160 # make sure the disks are not degraded (still sync-ing is ok)
10161 feedback_fn("* checking mirrors status")
10162 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10167 _RemoveDisks(self, iobj)
10168 self.cfg.RemoveInstance(iobj.name)
10169 # Make sure the instance lock gets removed
10170 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10171 raise errors.OpExecError("There are some degraded disks for"
10174 # Release all node resource locks
10175 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10177 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10178 # we need to set the disks ID to the primary node, since the
10179 # preceding code might or might have not done it, depending on
10180 # disk template and other options
10181 for disk in iobj.disks:
10182 self.cfg.SetDiskID(disk, pnode_name)
10183 if self.op.mode == constants.INSTANCE_CREATE:
10184 if not self.op.no_install:
10185 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10186 not self.op.wait_for_sync)
10188 feedback_fn("* pausing disk sync to install instance OS")
10189 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10192 for idx, success in enumerate(result.payload):
10194 logging.warn("pause-sync of instance %s for disk %d failed",
10197 feedback_fn("* running the instance OS create scripts...")
10198 # FIXME: pass debug option from opcode to backend
10200 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10201 self.op.debug_level)
10203 feedback_fn("* resuming disk sync")
10204 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10207 for idx, success in enumerate(result.payload):
10209 logging.warn("resume-sync of instance %s for disk %d failed",
10212 os_add_result.Raise("Could not add os for instance %s"
10213 " on node %s" % (instance, pnode_name))
10216 if self.op.mode == constants.INSTANCE_IMPORT:
10217 feedback_fn("* running the instance OS import scripts...")
10221 for idx, image in enumerate(self.src_images):
10225 # FIXME: pass debug option from opcode to backend
10226 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10227 constants.IEIO_FILE, (image, ),
10228 constants.IEIO_SCRIPT,
10229 (iobj.disks[idx], idx),
10231 transfers.append(dt)
10234 masterd.instance.TransferInstanceData(self, feedback_fn,
10235 self.op.src_node, pnode_name,
10236 self.pnode.secondary_ip,
10238 if not compat.all(import_result):
10239 self.LogWarning("Some disks for instance %s on node %s were not"
10240 " imported successfully" % (instance, pnode_name))
10242 rename_from = self._old_instance_name
10244 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10245 feedback_fn("* preparing remote import...")
10246 # The source cluster will stop the instance before attempting to make
10247 # a connection. In some cases stopping an instance can take a long
10248 # time, hence the shutdown timeout is added to the connection
10250 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10251 self.op.source_shutdown_timeout)
10252 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10254 assert iobj.primary_node == self.pnode.name
10256 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10257 self.source_x509_ca,
10258 self._cds, timeouts)
10259 if not compat.all(disk_results):
10260 # TODO: Should the instance still be started, even if some disks
10261 # failed to import (valid for local imports, too)?
10262 self.LogWarning("Some disks for instance %s on node %s were not"
10263 " imported successfully" % (instance, pnode_name))
10265 rename_from = self.source_instance_name
10268 # also checked in the prereq part
10269 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10272 # Run rename script on newly imported instance
10273 assert iobj.name == instance
10274 feedback_fn("Running rename script for %s" % instance)
10275 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10277 self.op.debug_level)
10278 if result.fail_msg:
10279 self.LogWarning("Failed to run rename script for %s on node"
10280 " %s: %s" % (instance, pnode_name, result.fail_msg))
10282 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10285 iobj.admin_state = constants.ADMINST_UP
10286 self.cfg.Update(iobj, feedback_fn)
10287 logging.info("Starting instance %s on node %s", instance, pnode_name)
10288 feedback_fn("* starting instance...")
10289 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10291 result.Raise("Could not start instance")
10293 return list(iobj.all_nodes)
10296 def _CheckRADOSFreeSpace():
10297 """Compute disk size requirements inside the RADOS cluster.
10300 # For the RADOS cluster we assume there is always enough space.
10304 class LUInstanceConsole(NoHooksLU):
10305 """Connect to an instance's console.
10307 This is somewhat special in that it returns the command line that
10308 you need to run on the master node in order to connect to the
10314 def ExpandNames(self):
10315 self.share_locks = _ShareAll()
10316 self._ExpandAndLockInstance()
10318 def CheckPrereq(self):
10319 """Check prerequisites.
10321 This checks that the instance is in the cluster.
10324 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10325 assert self.instance is not None, \
10326 "Cannot retrieve locked instance %s" % self.op.instance_name
10327 _CheckNodeOnline(self, self.instance.primary_node)
10329 def Exec(self, feedback_fn):
10330 """Connect to the console of an instance
10333 instance = self.instance
10334 node = instance.primary_node
10336 node_insts = self.rpc.call_instance_list([node],
10337 [instance.hypervisor])[node]
10338 node_insts.Raise("Can't get node information from %s" % node)
10340 if instance.name not in node_insts.payload:
10341 if instance.admin_state == constants.ADMINST_UP:
10342 state = constants.INSTST_ERRORDOWN
10343 elif instance.admin_state == constants.ADMINST_DOWN:
10344 state = constants.INSTST_ADMINDOWN
10346 state = constants.INSTST_ADMINOFFLINE
10347 raise errors.OpExecError("Instance %s is not running (state %s)" %
10348 (instance.name, state))
10350 logging.debug("Connecting to console of %s on %s", instance.name, node)
10352 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10355 def _GetInstanceConsole(cluster, instance):
10356 """Returns console information for an instance.
10358 @type cluster: L{objects.Cluster}
10359 @type instance: L{objects.Instance}
10363 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10364 # beparams and hvparams are passed separately, to avoid editing the
10365 # instance and then saving the defaults in the instance itself.
10366 hvparams = cluster.FillHV(instance)
10367 beparams = cluster.FillBE(instance)
10368 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10370 assert console.instance == instance.name
10371 assert console.Validate()
10373 return console.ToDict()
10376 class LUInstanceReplaceDisks(LogicalUnit):
10377 """Replace the disks of an instance.
10380 HPATH = "mirrors-replace"
10381 HTYPE = constants.HTYPE_INSTANCE
10384 def CheckArguments(self):
10385 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10386 self.op.iallocator)
10388 def ExpandNames(self):
10389 self._ExpandAndLockInstance()
10391 assert locking.LEVEL_NODE not in self.needed_locks
10392 assert locking.LEVEL_NODE_RES not in self.needed_locks
10393 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10395 assert self.op.iallocator is None or self.op.remote_node is None, \
10396 "Conflicting options"
10398 if self.op.remote_node is not None:
10399 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10401 # Warning: do not remove the locking of the new secondary here
10402 # unless DRBD8.AddChildren is changed to work in parallel;
10403 # currently it doesn't since parallel invocations of
10404 # FindUnusedMinor will conflict
10405 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10406 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10408 self.needed_locks[locking.LEVEL_NODE] = []
10409 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10411 if self.op.iallocator is not None:
10412 # iallocator will select a new node in the same group
10413 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10415 self.needed_locks[locking.LEVEL_NODE_RES] = []
10417 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10418 self.op.iallocator, self.op.remote_node,
10419 self.op.disks, False, self.op.early_release,
10420 self.op.ignore_ipolicy)
10422 self.tasklets = [self.replacer]
10424 def DeclareLocks(self, level):
10425 if level == locking.LEVEL_NODEGROUP:
10426 assert self.op.remote_node is None
10427 assert self.op.iallocator is not None
10428 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10430 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10431 # Lock all groups used by instance optimistically; this requires going
10432 # via the node before it's locked, requiring verification later on
10433 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10434 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10436 elif level == locking.LEVEL_NODE:
10437 if self.op.iallocator is not None:
10438 assert self.op.remote_node is None
10439 assert not self.needed_locks[locking.LEVEL_NODE]
10441 # Lock member nodes of all locked groups
10442 self.needed_locks[locking.LEVEL_NODE] = [node_name
10443 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10444 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10446 self._LockInstancesNodes()
10447 elif level == locking.LEVEL_NODE_RES:
10449 self.needed_locks[locking.LEVEL_NODE_RES] = \
10450 self.needed_locks[locking.LEVEL_NODE]
10452 def BuildHooksEnv(self):
10453 """Build hooks env.
10455 This runs on the master, the primary and all the secondaries.
10458 instance = self.replacer.instance
10460 "MODE": self.op.mode,
10461 "NEW_SECONDARY": self.op.remote_node,
10462 "OLD_SECONDARY": instance.secondary_nodes[0],
10464 env.update(_BuildInstanceHookEnvByObject(self, instance))
10467 def BuildHooksNodes(self):
10468 """Build hooks nodes.
10471 instance = self.replacer.instance
10473 self.cfg.GetMasterNode(),
10474 instance.primary_node,
10476 if self.op.remote_node is not None:
10477 nl.append(self.op.remote_node)
10480 def CheckPrereq(self):
10481 """Check prerequisites.
10484 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10485 self.op.iallocator is None)
10487 # Verify if node group locks are still correct
10488 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10490 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10492 return LogicalUnit.CheckPrereq(self)
10495 class TLReplaceDisks(Tasklet):
10496 """Replaces disks for an instance.
10498 Note: Locking is not within the scope of this class.
10501 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10502 disks, delay_iallocator, early_release, ignore_ipolicy):
10503 """Initializes this class.
10506 Tasklet.__init__(self, lu)
10509 self.instance_name = instance_name
10511 self.iallocator_name = iallocator_name
10512 self.remote_node = remote_node
10514 self.delay_iallocator = delay_iallocator
10515 self.early_release = early_release
10516 self.ignore_ipolicy = ignore_ipolicy
10519 self.instance = None
10520 self.new_node = None
10521 self.target_node = None
10522 self.other_node = None
10523 self.remote_node_info = None
10524 self.node_secondary_ip = None
10527 def CheckArguments(mode, remote_node, iallocator):
10528 """Helper function for users of this class.
10531 # check for valid parameter combination
10532 if mode == constants.REPLACE_DISK_CHG:
10533 if remote_node is None and iallocator is None:
10534 raise errors.OpPrereqError("When changing the secondary either an"
10535 " iallocator script must be used or the"
10536 " new node given", errors.ECODE_INVAL)
10538 if remote_node is not None and iallocator is not None:
10539 raise errors.OpPrereqError("Give either the iallocator or the new"
10540 " secondary, not both", errors.ECODE_INVAL)
10542 elif remote_node is not None or iallocator is not None:
10543 # Not replacing the secondary
10544 raise errors.OpPrereqError("The iallocator and new node options can"
10545 " only be used when changing the"
10546 " secondary node", errors.ECODE_INVAL)
10549 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10550 """Compute a new secondary node using an IAllocator.
10553 ial = IAllocator(lu.cfg, lu.rpc,
10554 mode=constants.IALLOCATOR_MODE_RELOC,
10555 name=instance_name,
10556 relocate_from=list(relocate_from))
10558 ial.Run(iallocator_name)
10560 if not ial.success:
10561 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10562 " %s" % (iallocator_name, ial.info),
10563 errors.ECODE_NORES)
10565 if len(ial.result) != ial.required_nodes:
10566 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10567 " of nodes (%s), required %s" %
10569 len(ial.result), ial.required_nodes),
10570 errors.ECODE_FAULT)
10572 remote_node_name = ial.result[0]
10574 lu.LogInfo("Selected new secondary for instance '%s': %s",
10575 instance_name, remote_node_name)
10577 return remote_node_name
10579 def _FindFaultyDisks(self, node_name):
10580 """Wrapper for L{_FindFaultyInstanceDisks}.
10583 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10586 def _CheckDisksActivated(self, instance):
10587 """Checks if the instance disks are activated.
10589 @param instance: The instance to check disks
10590 @return: True if they are activated, False otherwise
10593 nodes = instance.all_nodes
10595 for idx, dev in enumerate(instance.disks):
10597 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10598 self.cfg.SetDiskID(dev, node)
10600 result = _BlockdevFind(self, node, dev, instance)
10604 elif result.fail_msg or not result.payload:
10609 def CheckPrereq(self):
10610 """Check prerequisites.
10612 This checks that the instance is in the cluster.
10615 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10616 assert instance is not None, \
10617 "Cannot retrieve locked instance %s" % self.instance_name
10619 if instance.disk_template != constants.DT_DRBD8:
10620 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10621 " instances", errors.ECODE_INVAL)
10623 if len(instance.secondary_nodes) != 1:
10624 raise errors.OpPrereqError("The instance has a strange layout,"
10625 " expected one secondary but found %d" %
10626 len(instance.secondary_nodes),
10627 errors.ECODE_FAULT)
10629 if not self.delay_iallocator:
10630 self._CheckPrereq2()
10632 def _CheckPrereq2(self):
10633 """Check prerequisites, second part.
10635 This function should always be part of CheckPrereq. It was separated and is
10636 now called from Exec because during node evacuation iallocator was only
10637 called with an unmodified cluster model, not taking planned changes into
10641 instance = self.instance
10642 secondary_node = instance.secondary_nodes[0]
10644 if self.iallocator_name is None:
10645 remote_node = self.remote_node
10647 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10648 instance.name, instance.secondary_nodes)
10650 if remote_node is None:
10651 self.remote_node_info = None
10653 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10654 "Remote node '%s' is not locked" % remote_node
10656 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10657 assert self.remote_node_info is not None, \
10658 "Cannot retrieve locked node %s" % remote_node
10660 if remote_node == self.instance.primary_node:
10661 raise errors.OpPrereqError("The specified node is the primary node of"
10662 " the instance", errors.ECODE_INVAL)
10664 if remote_node == secondary_node:
10665 raise errors.OpPrereqError("The specified node is already the"
10666 " secondary node of the instance",
10667 errors.ECODE_INVAL)
10669 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10670 constants.REPLACE_DISK_CHG):
10671 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10672 errors.ECODE_INVAL)
10674 if self.mode == constants.REPLACE_DISK_AUTO:
10675 if not self._CheckDisksActivated(instance):
10676 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10677 " first" % self.instance_name,
10678 errors.ECODE_STATE)
10679 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10680 faulty_secondary = self._FindFaultyDisks(secondary_node)
10682 if faulty_primary and faulty_secondary:
10683 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10684 " one node and can not be repaired"
10685 " automatically" % self.instance_name,
10686 errors.ECODE_STATE)
10689 self.disks = faulty_primary
10690 self.target_node = instance.primary_node
10691 self.other_node = secondary_node
10692 check_nodes = [self.target_node, self.other_node]
10693 elif faulty_secondary:
10694 self.disks = faulty_secondary
10695 self.target_node = secondary_node
10696 self.other_node = instance.primary_node
10697 check_nodes = [self.target_node, self.other_node]
10703 # Non-automatic modes
10704 if self.mode == constants.REPLACE_DISK_PRI:
10705 self.target_node = instance.primary_node
10706 self.other_node = secondary_node
10707 check_nodes = [self.target_node, self.other_node]
10709 elif self.mode == constants.REPLACE_DISK_SEC:
10710 self.target_node = secondary_node
10711 self.other_node = instance.primary_node
10712 check_nodes = [self.target_node, self.other_node]
10714 elif self.mode == constants.REPLACE_DISK_CHG:
10715 self.new_node = remote_node
10716 self.other_node = instance.primary_node
10717 self.target_node = secondary_node
10718 check_nodes = [self.new_node, self.other_node]
10720 _CheckNodeNotDrained(self.lu, remote_node)
10721 _CheckNodeVmCapable(self.lu, remote_node)
10723 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10724 assert old_node_info is not None
10725 if old_node_info.offline and not self.early_release:
10726 # doesn't make sense to delay the release
10727 self.early_release = True
10728 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10729 " early-release mode", secondary_node)
10732 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10735 # If not specified all disks should be replaced
10737 self.disks = range(len(self.instance.disks))
10739 # TODO: This is ugly, but right now we can't distinguish between internal
10740 # submitted opcode and external one. We should fix that.
10741 if self.remote_node_info:
10742 # We change the node, lets verify it still meets instance policy
10743 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10744 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10746 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10747 ignore=self.ignore_ipolicy)
10749 for node in check_nodes:
10750 _CheckNodeOnline(self.lu, node)
10752 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10755 if node_name is not None)
10757 # Release unneeded node and node resource locks
10758 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10759 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10761 # Release any owned node group
10762 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10763 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10765 # Check whether disks are valid
10766 for disk_idx in self.disks:
10767 instance.FindDisk(disk_idx)
10769 # Get secondary node IP addresses
10770 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10771 in self.cfg.GetMultiNodeInfo(touched_nodes))
10773 def Exec(self, feedback_fn):
10774 """Execute disk replacement.
10776 This dispatches the disk replacement to the appropriate handler.
10779 if self.delay_iallocator:
10780 self._CheckPrereq2()
10783 # Verify owned locks before starting operation
10784 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10785 assert set(owned_nodes) == set(self.node_secondary_ip), \
10786 ("Incorrect node locks, owning %s, expected %s" %
10787 (owned_nodes, self.node_secondary_ip.keys()))
10788 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10789 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10791 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10792 assert list(owned_instances) == [self.instance_name], \
10793 "Instance '%s' not locked" % self.instance_name
10795 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10796 "Should not own any node group lock at this point"
10799 feedback_fn("No disks need replacement for instance '%s'" %
10800 self.instance.name)
10803 feedback_fn("Replacing disk(s) %s for instance '%s'" %
10804 (utils.CommaJoin(self.disks), self.instance.name))
10805 feedback_fn("Current primary node: %s", self.instance.primary_node)
10806 feedback_fn("Current seconary node: %s",
10807 utils.CommaJoin(self.instance.secondary_nodes))
10809 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10811 # Activate the instance disks if we're replacing them on a down instance
10813 _StartInstanceDisks(self.lu, self.instance, True)
10816 # Should we replace the secondary node?
10817 if self.new_node is not None:
10818 fn = self._ExecDrbd8Secondary
10820 fn = self._ExecDrbd8DiskOnly
10822 result = fn(feedback_fn)
10824 # Deactivate the instance disks if we're replacing them on a
10827 _SafeShutdownInstanceDisks(self.lu, self.instance)
10829 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10832 # Verify owned locks
10833 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10834 nodes = frozenset(self.node_secondary_ip)
10835 assert ((self.early_release and not owned_nodes) or
10836 (not self.early_release and not (set(owned_nodes) - nodes))), \
10837 ("Not owning the correct locks, early_release=%s, owned=%r,"
10838 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10842 def _CheckVolumeGroup(self, nodes):
10843 self.lu.LogInfo("Checking volume groups")
10845 vgname = self.cfg.GetVGName()
10847 # Make sure volume group exists on all involved nodes
10848 results = self.rpc.call_vg_list(nodes)
10850 raise errors.OpExecError("Can't list volume groups on the nodes")
10853 res = results[node]
10854 res.Raise("Error checking node %s" % node)
10855 if vgname not in res.payload:
10856 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10859 def _CheckDisksExistence(self, nodes):
10860 # Check disk existence
10861 for idx, dev in enumerate(self.instance.disks):
10862 if idx not in self.disks:
10866 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10867 self.cfg.SetDiskID(dev, node)
10869 result = _BlockdevFind(self, node, dev, self.instance)
10871 msg = result.fail_msg
10872 if msg or not result.payload:
10874 msg = "disk not found"
10875 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10878 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10879 for idx, dev in enumerate(self.instance.disks):
10880 if idx not in self.disks:
10883 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10886 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10887 on_primary, ldisk=ldisk):
10888 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10889 " replace disks for instance %s" %
10890 (node_name, self.instance.name))
10892 def _CreateNewStorage(self, node_name):
10893 """Create new storage on the primary or secondary node.
10895 This is only used for same-node replaces, not for changing the
10896 secondary node, hence we don't want to modify the existing disk.
10901 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10902 for idx, dev in enumerate(disks):
10903 if idx not in self.disks:
10906 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10908 self.cfg.SetDiskID(dev, node_name)
10910 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10911 names = _GenerateUniqueNames(self.lu, lv_names)
10913 (data_disk, meta_disk) = dev.children
10914 vg_data = data_disk.logical_id[0]
10915 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10916 logical_id=(vg_data, names[0]),
10917 params=data_disk.params)
10918 vg_meta = meta_disk.logical_id[0]
10919 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10920 logical_id=(vg_meta, names[1]),
10921 params=meta_disk.params)
10923 new_lvs = [lv_data, lv_meta]
10924 old_lvs = [child.Copy() for child in dev.children]
10925 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10927 # we pass force_create=True to force the LVM creation
10928 for new_lv in new_lvs:
10929 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10930 _GetInstanceInfoText(self.instance), False)
10934 def _CheckDevices(self, node_name, iv_names):
10935 for name, (dev, _, _) in iv_names.iteritems():
10936 self.cfg.SetDiskID(dev, node_name)
10938 result = _BlockdevFind(self, node_name, dev, self.instance)
10940 msg = result.fail_msg
10941 if msg or not result.payload:
10943 msg = "disk not found"
10944 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10947 if result.payload.is_degraded:
10948 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10950 def _RemoveOldStorage(self, node_name, iv_names):
10951 for name, (_, old_lvs, _) in iv_names.iteritems():
10952 self.lu.LogInfo("Remove logical volumes for %s" % name)
10955 self.cfg.SetDiskID(lv, node_name)
10957 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10959 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10960 hint="remove unused LVs manually")
10962 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10963 """Replace a disk on the primary or secondary for DRBD 8.
10965 The algorithm for replace is quite complicated:
10967 1. for each disk to be replaced:
10969 1. create new LVs on the target node with unique names
10970 1. detach old LVs from the drbd device
10971 1. rename old LVs to name_replaced.<time_t>
10972 1. rename new LVs to old LVs
10973 1. attach the new LVs (with the old names now) to the drbd device
10975 1. wait for sync across all devices
10977 1. for each modified disk:
10979 1. remove old LVs (which have the name name_replaces.<time_t>)
10981 Failures are not very well handled.
10986 # Step: check device activation
10987 self.lu.LogStep(1, steps_total, "Check device existence")
10988 self._CheckDisksExistence([self.other_node, self.target_node])
10989 self._CheckVolumeGroup([self.target_node, self.other_node])
10991 # Step: check other node consistency
10992 self.lu.LogStep(2, steps_total, "Check peer consistency")
10993 self._CheckDisksConsistency(self.other_node,
10994 self.other_node == self.instance.primary_node,
10997 # Step: create new storage
10998 self.lu.LogStep(3, steps_total, "Allocate new storage")
10999 iv_names = self._CreateNewStorage(self.target_node)
11001 # Step: for each lv, detach+rename*2+attach
11002 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11003 for dev, old_lvs, new_lvs in iv_names.itervalues():
11004 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11006 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11008 result.Raise("Can't detach drbd from local storage on node"
11009 " %s for device %s" % (self.target_node, dev.iv_name))
11011 #cfg.Update(instance)
11013 # ok, we created the new LVs, so now we know we have the needed
11014 # storage; as such, we proceed on the target node to rename
11015 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11016 # using the assumption that logical_id == physical_id (which in
11017 # turn is the unique_id on that node)
11019 # FIXME(iustin): use a better name for the replaced LVs
11020 temp_suffix = int(time.time())
11021 ren_fn = lambda d, suff: (d.physical_id[0],
11022 d.physical_id[1] + "_replaced-%s" % suff)
11024 # Build the rename list based on what LVs exist on the node
11025 rename_old_to_new = []
11026 for to_ren in old_lvs:
11027 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11028 if not result.fail_msg and result.payload:
11030 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11032 self.lu.LogInfo("Renaming the old LVs on the target node")
11033 result = self.rpc.call_blockdev_rename(self.target_node,
11035 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11037 # Now we rename the new LVs to the old LVs
11038 self.lu.LogInfo("Renaming the new LVs on the target node")
11039 rename_new_to_old = [(new, old.physical_id)
11040 for old, new in zip(old_lvs, new_lvs)]
11041 result = self.rpc.call_blockdev_rename(self.target_node,
11043 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11045 # Intermediate steps of in memory modifications
11046 for old, new in zip(old_lvs, new_lvs):
11047 new.logical_id = old.logical_id
11048 self.cfg.SetDiskID(new, self.target_node)
11050 # We need to modify old_lvs so that removal later removes the
11051 # right LVs, not the newly added ones; note that old_lvs is a
11053 for disk in old_lvs:
11054 disk.logical_id = ren_fn(disk, temp_suffix)
11055 self.cfg.SetDiskID(disk, self.target_node)
11057 # Now that the new lvs have the old name, we can add them to the device
11058 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11059 result = self.rpc.call_blockdev_addchildren(self.target_node,
11060 (dev, self.instance), new_lvs)
11061 msg = result.fail_msg
11063 for new_lv in new_lvs:
11064 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11067 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11068 hint=("cleanup manually the unused logical"
11070 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11072 cstep = itertools.count(5)
11074 if self.early_release:
11075 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11076 self._RemoveOldStorage(self.target_node, iv_names)
11077 # TODO: Check if releasing locks early still makes sense
11078 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11080 # Release all resource locks except those used by the instance
11081 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11082 keep=self.node_secondary_ip.keys())
11084 # Release all node locks while waiting for sync
11085 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11087 # TODO: Can the instance lock be downgraded here? Take the optional disk
11088 # shutdown in the caller into consideration.
11091 # This can fail as the old devices are degraded and _WaitForSync
11092 # does a combined result over all disks, so we don't check its return value
11093 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11094 _WaitForSync(self.lu, self.instance)
11096 # Check all devices manually
11097 self._CheckDevices(self.instance.primary_node, iv_names)
11099 # Step: remove old storage
11100 if not self.early_release:
11101 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11102 self._RemoveOldStorage(self.target_node, iv_names)
11104 def _ExecDrbd8Secondary(self, feedback_fn):
11105 """Replace the secondary node for DRBD 8.
11107 The algorithm for replace is quite complicated:
11108 - for all disks of the instance:
11109 - create new LVs on the new node with same names
11110 - shutdown the drbd device on the old secondary
11111 - disconnect the drbd network on the primary
11112 - create the drbd device on the new secondary
11113 - network attach the drbd on the primary, using an artifice:
11114 the drbd code for Attach() will connect to the network if it
11115 finds a device which is connected to the good local disks but
11116 not network enabled
11117 - wait for sync across all devices
11118 - remove all disks from the old secondary
11120 Failures are not very well handled.
11125 pnode = self.instance.primary_node
11127 # Step: check device activation
11128 self.lu.LogStep(1, steps_total, "Check device existence")
11129 self._CheckDisksExistence([self.instance.primary_node])
11130 self._CheckVolumeGroup([self.instance.primary_node])
11132 # Step: check other node consistency
11133 self.lu.LogStep(2, steps_total, "Check peer consistency")
11134 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11136 # Step: create new storage
11137 self.lu.LogStep(3, steps_total, "Allocate new storage")
11138 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11139 for idx, dev in enumerate(disks):
11140 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11141 (self.new_node, idx))
11142 # we pass force_create=True to force LVM creation
11143 for new_lv in dev.children:
11144 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11145 True, _GetInstanceInfoText(self.instance), False)
11147 # Step 4: dbrd minors and drbd setups changes
11148 # after this, we must manually remove the drbd minors on both the
11149 # error and the success paths
11150 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11151 minors = self.cfg.AllocateDRBDMinor([self.new_node
11152 for dev in self.instance.disks],
11153 self.instance.name)
11154 logging.debug("Allocated minors %r", minors)
11157 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11158 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11159 (self.new_node, idx))
11160 # create new devices on new_node; note that we create two IDs:
11161 # one without port, so the drbd will be activated without
11162 # networking information on the new node at this stage, and one
11163 # with network, for the latter activation in step 4
11164 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11165 if self.instance.primary_node == o_node1:
11168 assert self.instance.primary_node == o_node2, "Three-node instance?"
11171 new_alone_id = (self.instance.primary_node, self.new_node, None,
11172 p_minor, new_minor, o_secret)
11173 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11174 p_minor, new_minor, o_secret)
11176 iv_names[idx] = (dev, dev.children, new_net_id)
11177 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11179 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11180 logical_id=new_alone_id,
11181 children=dev.children,
11184 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11187 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11189 _GetInstanceInfoText(self.instance), False)
11190 except errors.GenericError:
11191 self.cfg.ReleaseDRBDMinors(self.instance.name)
11194 # We have new devices, shutdown the drbd on the old secondary
11195 for idx, dev in enumerate(self.instance.disks):
11196 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11197 self.cfg.SetDiskID(dev, self.target_node)
11198 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11199 (dev, self.instance)).fail_msg
11201 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11202 "node: %s" % (idx, msg),
11203 hint=("Please cleanup this device manually as"
11204 " soon as possible"))
11206 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11207 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11208 self.instance.disks)[pnode]
11210 msg = result.fail_msg
11212 # detaches didn't succeed (unlikely)
11213 self.cfg.ReleaseDRBDMinors(self.instance.name)
11214 raise errors.OpExecError("Can't detach the disks from the network on"
11215 " old node: %s" % (msg,))
11217 # if we managed to detach at least one, we update all the disks of
11218 # the instance to point to the new secondary
11219 self.lu.LogInfo("Updating instance configuration")
11220 for dev, _, new_logical_id in iv_names.itervalues():
11221 dev.logical_id = new_logical_id
11222 self.cfg.SetDiskID(dev, self.instance.primary_node)
11224 self.cfg.Update(self.instance, feedback_fn)
11226 # Release all node locks (the configuration has been updated)
11227 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11229 # and now perform the drbd attach
11230 self.lu.LogInfo("Attaching primary drbds to new secondary"
11231 " (standalone => connected)")
11232 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11234 self.node_secondary_ip,
11235 (self.instance.disks, self.instance),
11236 self.instance.name,
11238 for to_node, to_result in result.items():
11239 msg = to_result.fail_msg
11241 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11243 hint=("please do a gnt-instance info to see the"
11244 " status of disks"))
11246 cstep = itertools.count(5)
11248 if self.early_release:
11249 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11250 self._RemoveOldStorage(self.target_node, iv_names)
11251 # TODO: Check if releasing locks early still makes sense
11252 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11254 # Release all resource locks except those used by the instance
11255 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11256 keep=self.node_secondary_ip.keys())
11258 # TODO: Can the instance lock be downgraded here? Take the optional disk
11259 # shutdown in the caller into consideration.
11262 # This can fail as the old devices are degraded and _WaitForSync
11263 # does a combined result over all disks, so we don't check its return value
11264 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11265 _WaitForSync(self.lu, self.instance)
11267 # Check all devices manually
11268 self._CheckDevices(self.instance.primary_node, iv_names)
11270 # Step: remove old storage
11271 if not self.early_release:
11272 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11273 self._RemoveOldStorage(self.target_node, iv_names)
11276 class LURepairNodeStorage(NoHooksLU):
11277 """Repairs the volume group on a node.
11282 def CheckArguments(self):
11283 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11285 storage_type = self.op.storage_type
11287 if (constants.SO_FIX_CONSISTENCY not in
11288 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11289 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11290 " repaired" % storage_type,
11291 errors.ECODE_INVAL)
11293 def ExpandNames(self):
11294 self.needed_locks = {
11295 locking.LEVEL_NODE: [self.op.node_name],
11298 def _CheckFaultyDisks(self, instance, node_name):
11299 """Ensure faulty disks abort the opcode or at least warn."""
11301 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11303 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11304 " node '%s'" % (instance.name, node_name),
11305 errors.ECODE_STATE)
11306 except errors.OpPrereqError, err:
11307 if self.op.ignore_consistency:
11308 self.proc.LogWarning(str(err.args[0]))
11312 def CheckPrereq(self):
11313 """Check prerequisites.
11316 # Check whether any instance on this node has faulty disks
11317 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11318 if inst.admin_state != constants.ADMINST_UP:
11320 check_nodes = set(inst.all_nodes)
11321 check_nodes.discard(self.op.node_name)
11322 for inst_node_name in check_nodes:
11323 self._CheckFaultyDisks(inst, inst_node_name)
11325 def Exec(self, feedback_fn):
11326 feedback_fn("Repairing storage unit '%s' on %s ..." %
11327 (self.op.name, self.op.node_name))
11329 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11330 result = self.rpc.call_storage_execute(self.op.node_name,
11331 self.op.storage_type, st_args,
11333 constants.SO_FIX_CONSISTENCY)
11334 result.Raise("Failed to repair storage unit '%s' on %s" %
11335 (self.op.name, self.op.node_name))
11338 class LUNodeEvacuate(NoHooksLU):
11339 """Evacuates instances off a list of nodes.
11344 _MODE2IALLOCATOR = {
11345 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11346 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11347 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11349 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11350 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11351 constants.IALLOCATOR_NEVAC_MODES)
11353 def CheckArguments(self):
11354 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11356 def ExpandNames(self):
11357 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11359 if self.op.remote_node is not None:
11360 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11361 assert self.op.remote_node
11363 if self.op.remote_node == self.op.node_name:
11364 raise errors.OpPrereqError("Can not use evacuated node as a new"
11365 " secondary node", errors.ECODE_INVAL)
11367 if self.op.mode != constants.NODE_EVAC_SEC:
11368 raise errors.OpPrereqError("Without the use of an iallocator only"
11369 " secondary instances can be evacuated",
11370 errors.ECODE_INVAL)
11373 self.share_locks = _ShareAll()
11374 self.needed_locks = {
11375 locking.LEVEL_INSTANCE: [],
11376 locking.LEVEL_NODEGROUP: [],
11377 locking.LEVEL_NODE: [],
11380 # Determine nodes (via group) optimistically, needs verification once locks
11381 # have been acquired
11382 self.lock_nodes = self._DetermineNodes()
11384 def _DetermineNodes(self):
11385 """Gets the list of nodes to operate on.
11388 if self.op.remote_node is None:
11389 # Iallocator will choose any node(s) in the same group
11390 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11392 group_nodes = frozenset([self.op.remote_node])
11394 # Determine nodes to be locked
11395 return set([self.op.node_name]) | group_nodes
11397 def _DetermineInstances(self):
11398 """Builds list of instances to operate on.
11401 assert self.op.mode in constants.NODE_EVAC_MODES
11403 if self.op.mode == constants.NODE_EVAC_PRI:
11404 # Primary instances only
11405 inst_fn = _GetNodePrimaryInstances
11406 assert self.op.remote_node is None, \
11407 "Evacuating primary instances requires iallocator"
11408 elif self.op.mode == constants.NODE_EVAC_SEC:
11409 # Secondary instances only
11410 inst_fn = _GetNodeSecondaryInstances
11413 assert self.op.mode == constants.NODE_EVAC_ALL
11414 inst_fn = _GetNodeInstances
11415 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11417 raise errors.OpPrereqError("Due to an issue with the iallocator"
11418 " interface it is not possible to evacuate"
11419 " all instances at once; specify explicitly"
11420 " whether to evacuate primary or secondary"
11422 errors.ECODE_INVAL)
11424 return inst_fn(self.cfg, self.op.node_name)
11426 def DeclareLocks(self, level):
11427 if level == locking.LEVEL_INSTANCE:
11428 # Lock instances optimistically, needs verification once node and group
11429 # locks have been acquired
11430 self.needed_locks[locking.LEVEL_INSTANCE] = \
11431 set(i.name for i in self._DetermineInstances())
11433 elif level == locking.LEVEL_NODEGROUP:
11434 # Lock node groups for all potential target nodes optimistically, needs
11435 # verification once nodes have been acquired
11436 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11437 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11439 elif level == locking.LEVEL_NODE:
11440 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11442 def CheckPrereq(self):
11444 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11445 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11446 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11448 need_nodes = self._DetermineNodes()
11450 if not owned_nodes.issuperset(need_nodes):
11451 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11452 " locks were acquired, current nodes are"
11453 " are '%s', used to be '%s'; retry the"
11455 (self.op.node_name,
11456 utils.CommaJoin(need_nodes),
11457 utils.CommaJoin(owned_nodes)),
11458 errors.ECODE_STATE)
11460 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11461 if owned_groups != wanted_groups:
11462 raise errors.OpExecError("Node groups changed since locks were acquired,"
11463 " current groups are '%s', used to be '%s';"
11464 " retry the operation" %
11465 (utils.CommaJoin(wanted_groups),
11466 utils.CommaJoin(owned_groups)))
11468 # Determine affected instances
11469 self.instances = self._DetermineInstances()
11470 self.instance_names = [i.name for i in self.instances]
11472 if set(self.instance_names) != owned_instances:
11473 raise errors.OpExecError("Instances on node '%s' changed since locks"
11474 " were acquired, current instances are '%s',"
11475 " used to be '%s'; retry the operation" %
11476 (self.op.node_name,
11477 utils.CommaJoin(self.instance_names),
11478 utils.CommaJoin(owned_instances)))
11480 if self.instance_names:
11481 self.LogInfo("Evacuating instances from node '%s': %s",
11483 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11485 self.LogInfo("No instances to evacuate from node '%s'",
11488 if self.op.remote_node is not None:
11489 for i in self.instances:
11490 if i.primary_node == self.op.remote_node:
11491 raise errors.OpPrereqError("Node %s is the primary node of"
11492 " instance %s, cannot use it as"
11494 (self.op.remote_node, i.name),
11495 errors.ECODE_INVAL)
11497 def Exec(self, feedback_fn):
11498 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11500 if not self.instance_names:
11501 # No instances to evacuate
11504 elif self.op.iallocator is not None:
11505 # TODO: Implement relocation to other group
11506 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11507 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11508 instances=list(self.instance_names))
11510 ial.Run(self.op.iallocator)
11512 if not ial.success:
11513 raise errors.OpPrereqError("Can't compute node evacuation using"
11514 " iallocator '%s': %s" %
11515 (self.op.iallocator, ial.info),
11516 errors.ECODE_NORES)
11518 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11520 elif self.op.remote_node is not None:
11521 assert self.op.mode == constants.NODE_EVAC_SEC
11523 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11524 remote_node=self.op.remote_node,
11526 mode=constants.REPLACE_DISK_CHG,
11527 early_release=self.op.early_release)]
11528 for instance_name in self.instance_names
11532 raise errors.ProgrammerError("No iallocator or remote node")
11534 return ResultWithJobs(jobs)
11537 def _SetOpEarlyRelease(early_release, op):
11538 """Sets C{early_release} flag on opcodes if available.
11542 op.early_release = early_release
11543 except AttributeError:
11544 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11549 def _NodeEvacDest(use_nodes, group, nodes):
11550 """Returns group or nodes depending on caller's choice.
11554 return utils.CommaJoin(nodes)
11559 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11560 """Unpacks the result of change-group and node-evacuate iallocator requests.
11562 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11563 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11565 @type lu: L{LogicalUnit}
11566 @param lu: Logical unit instance
11567 @type alloc_result: tuple/list
11568 @param alloc_result: Result from iallocator
11569 @type early_release: bool
11570 @param early_release: Whether to release locks early if possible
11571 @type use_nodes: bool
11572 @param use_nodes: Whether to display node names instead of groups
11575 (moved, failed, jobs) = alloc_result
11578 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11579 for (name, reason) in failed)
11580 lu.LogWarning("Unable to evacuate instances %s", failreason)
11581 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11584 lu.LogInfo("Instances to be moved: %s",
11585 utils.CommaJoin("%s (to %s)" %
11586 (name, _NodeEvacDest(use_nodes, group, nodes))
11587 for (name, group, nodes) in moved))
11589 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11590 map(opcodes.OpCode.LoadOpCode, ops))
11594 class LUInstanceGrowDisk(LogicalUnit):
11595 """Grow a disk of an instance.
11598 HPATH = "disk-grow"
11599 HTYPE = constants.HTYPE_INSTANCE
11602 def ExpandNames(self):
11603 self._ExpandAndLockInstance()
11604 self.needed_locks[locking.LEVEL_NODE] = []
11605 self.needed_locks[locking.LEVEL_NODE_RES] = []
11606 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11607 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11609 def DeclareLocks(self, level):
11610 if level == locking.LEVEL_NODE:
11611 self._LockInstancesNodes()
11612 elif level == locking.LEVEL_NODE_RES:
11614 self.needed_locks[locking.LEVEL_NODE_RES] = \
11615 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11617 def BuildHooksEnv(self):
11618 """Build hooks env.
11620 This runs on the master, the primary and all the secondaries.
11624 "DISK": self.op.disk,
11625 "AMOUNT": self.op.amount,
11626 "ABSOLUTE": self.op.absolute,
11628 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11631 def BuildHooksNodes(self):
11632 """Build hooks nodes.
11635 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11638 def CheckPrereq(self):
11639 """Check prerequisites.
11641 This checks that the instance is in the cluster.
11644 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11645 assert instance is not None, \
11646 "Cannot retrieve locked instance %s" % self.op.instance_name
11647 nodenames = list(instance.all_nodes)
11648 for node in nodenames:
11649 _CheckNodeOnline(self, node)
11651 self.instance = instance
11653 if instance.disk_template not in constants.DTS_GROWABLE:
11654 raise errors.OpPrereqError("Instance's disk layout does not support"
11655 " growing", errors.ECODE_INVAL)
11657 self.disk = instance.FindDisk(self.op.disk)
11659 if self.op.absolute:
11660 self.target = self.op.amount
11661 self.delta = self.target - self.disk.size
11663 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11664 "current disk size (%s)" %
11665 (utils.FormatUnit(self.target, "h"),
11666 utils.FormatUnit(self.disk.size, "h")),
11667 errors.ECODE_STATE)
11669 self.delta = self.op.amount
11670 self.target = self.disk.size + self.delta
11672 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11673 utils.FormatUnit(self.delta, "h"),
11674 errors.ECODE_INVAL)
11676 if instance.disk_template not in (constants.DT_FILE,
11677 constants.DT_SHARED_FILE,
11679 # TODO: check the free disk space for file, when that feature will be
11681 _CheckNodesFreeDiskPerVG(self, nodenames,
11682 self.disk.ComputeGrowth(self.delta))
11684 def Exec(self, feedback_fn):
11685 """Execute disk grow.
11688 instance = self.instance
11691 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11692 assert (self.owned_locks(locking.LEVEL_NODE) ==
11693 self.owned_locks(locking.LEVEL_NODE_RES))
11695 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11697 raise errors.OpExecError("Cannot activate block device to grow")
11699 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11700 (self.op.disk, instance.name,
11701 utils.FormatUnit(self.delta, "h"),
11702 utils.FormatUnit(self.target, "h")))
11704 # First run all grow ops in dry-run mode
11705 for node in instance.all_nodes:
11706 self.cfg.SetDiskID(disk, node)
11707 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11709 result.Raise("Grow request failed to node %s" % node)
11711 # We know that (as far as we can test) operations across different
11712 # nodes will succeed, time to run it for real
11713 for node in instance.all_nodes:
11714 self.cfg.SetDiskID(disk, node)
11715 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11717 result.Raise("Grow request failed to node %s" % node)
11719 # TODO: Rewrite code to work properly
11720 # DRBD goes into sync mode for a short amount of time after executing the
11721 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11722 # calling "resize" in sync mode fails. Sleeping for a short amount of
11723 # time is a work-around.
11726 disk.RecordGrow(self.delta)
11727 self.cfg.Update(instance, feedback_fn)
11729 # Changes have been recorded, release node lock
11730 _ReleaseLocks(self, locking.LEVEL_NODE)
11732 # Downgrade lock while waiting for sync
11733 self.glm.downgrade(locking.LEVEL_INSTANCE)
11735 if self.op.wait_for_sync:
11736 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11738 self.proc.LogWarning("Disk sync-ing has not returned a good"
11739 " status; please check the instance")
11740 if instance.admin_state != constants.ADMINST_UP:
11741 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11742 elif instance.admin_state != constants.ADMINST_UP:
11743 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11744 " not supposed to be running because no wait for"
11745 " sync mode was requested")
11747 assert self.owned_locks(locking.LEVEL_NODE_RES)
11748 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11751 class LUInstanceQueryData(NoHooksLU):
11752 """Query runtime instance data.
11757 def ExpandNames(self):
11758 self.needed_locks = {}
11760 # Use locking if requested or when non-static information is wanted
11761 if not (self.op.static or self.op.use_locking):
11762 self.LogWarning("Non-static data requested, locks need to be acquired")
11763 self.op.use_locking = True
11765 if self.op.instances or not self.op.use_locking:
11766 # Expand instance names right here
11767 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11769 # Will use acquired locks
11770 self.wanted_names = None
11772 if self.op.use_locking:
11773 self.share_locks = _ShareAll()
11775 if self.wanted_names is None:
11776 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11778 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11780 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11781 self.needed_locks[locking.LEVEL_NODE] = []
11782 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11784 def DeclareLocks(self, level):
11785 if self.op.use_locking:
11786 if level == locking.LEVEL_NODEGROUP:
11787 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11789 # Lock all groups used by instances optimistically; this requires going
11790 # via the node before it's locked, requiring verification later on
11791 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11792 frozenset(group_uuid
11793 for instance_name in owned_instances
11795 self.cfg.GetInstanceNodeGroups(instance_name))
11797 elif level == locking.LEVEL_NODE:
11798 self._LockInstancesNodes()
11800 def CheckPrereq(self):
11801 """Check prerequisites.
11803 This only checks the optional instance list against the existing names.
11806 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11807 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11808 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11810 if self.wanted_names is None:
11811 assert self.op.use_locking, "Locking was not used"
11812 self.wanted_names = owned_instances
11814 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11816 if self.op.use_locking:
11817 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11820 assert not (owned_instances or owned_groups or owned_nodes)
11822 self.wanted_instances = instances.values()
11824 def _ComputeBlockdevStatus(self, node, instance, dev):
11825 """Returns the status of a block device
11828 if self.op.static or not node:
11831 self.cfg.SetDiskID(dev, node)
11833 result = self.rpc.call_blockdev_find(node, dev)
11837 result.Raise("Can't compute disk status for %s" % instance.name)
11839 status = result.payload
11843 return (status.dev_path, status.major, status.minor,
11844 status.sync_percent, status.estimated_time,
11845 status.is_degraded, status.ldisk_status)
11847 def _ComputeDiskStatus(self, instance, snode, dev):
11848 """Compute block device status.
11851 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11853 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11855 def _ComputeDiskStatusInner(self, instance, snode, dev):
11856 """Compute block device status.
11858 @attention: The device has to be annotated already.
11861 if dev.dev_type in constants.LDS_DRBD:
11862 # we change the snode then (otherwise we use the one passed in)
11863 if dev.logical_id[0] == instance.primary_node:
11864 snode = dev.logical_id[1]
11866 snode = dev.logical_id[0]
11868 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11870 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11873 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11880 "iv_name": dev.iv_name,
11881 "dev_type": dev.dev_type,
11882 "logical_id": dev.logical_id,
11883 "physical_id": dev.physical_id,
11884 "pstatus": dev_pstatus,
11885 "sstatus": dev_sstatus,
11886 "children": dev_children,
11891 def Exec(self, feedback_fn):
11892 """Gather and return data"""
11895 cluster = self.cfg.GetClusterInfo()
11897 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11898 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11900 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11901 for node in nodes.values()))
11903 group2name_fn = lambda uuid: groups[uuid].name
11905 for instance in self.wanted_instances:
11906 pnode = nodes[instance.primary_node]
11908 if self.op.static or pnode.offline:
11909 remote_state = None
11911 self.LogWarning("Primary node %s is marked offline, returning static"
11912 " information only for instance %s" %
11913 (pnode.name, instance.name))
11915 remote_info = self.rpc.call_instance_info(instance.primary_node,
11917 instance.hypervisor)
11918 remote_info.Raise("Error checking node %s" % instance.primary_node)
11919 remote_info = remote_info.payload
11920 if remote_info and "state" in remote_info:
11921 remote_state = "up"
11923 if instance.admin_state == constants.ADMINST_UP:
11924 remote_state = "down"
11926 remote_state = instance.admin_state
11928 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11931 snodes_group_uuids = [nodes[snode_name].group
11932 for snode_name in instance.secondary_nodes]
11934 result[instance.name] = {
11935 "name": instance.name,
11936 "config_state": instance.admin_state,
11937 "run_state": remote_state,
11938 "pnode": instance.primary_node,
11939 "pnode_group_uuid": pnode.group,
11940 "pnode_group_name": group2name_fn(pnode.group),
11941 "snodes": instance.secondary_nodes,
11942 "snodes_group_uuids": snodes_group_uuids,
11943 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11945 # this happens to be the same format used for hooks
11946 "nics": _NICListToTuple(self, instance.nics),
11947 "disk_template": instance.disk_template,
11949 "hypervisor": instance.hypervisor,
11950 "network_port": instance.network_port,
11951 "hv_instance": instance.hvparams,
11952 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11953 "be_instance": instance.beparams,
11954 "be_actual": cluster.FillBE(instance),
11955 "os_instance": instance.osparams,
11956 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11957 "serial_no": instance.serial_no,
11958 "mtime": instance.mtime,
11959 "ctime": instance.ctime,
11960 "uuid": instance.uuid,
11966 def PrepareContainerMods(mods, private_fn):
11967 """Prepares a list of container modifications by adding a private data field.
11969 @type mods: list of tuples; (operation, index, parameters)
11970 @param mods: List of modifications
11971 @type private_fn: callable or None
11972 @param private_fn: Callable for constructing a private data field for a
11977 if private_fn is None:
11982 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11985 #: Type description for changes as returned by L{ApplyContainerMods}'s
11987 _TApplyContModsCbChanges = \
11988 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11989 ht.TNonEmptyString,
11994 def ApplyContainerMods(kind, container, chgdesc, mods,
11995 create_fn, modify_fn, remove_fn):
11996 """Applies descriptions in C{mods} to C{container}.
11999 @param kind: One-word item description
12000 @type container: list
12001 @param container: Container to modify
12002 @type chgdesc: None or list
12003 @param chgdesc: List of applied changes
12005 @param mods: Modifications as returned by L{PrepareContainerMods}
12006 @type create_fn: callable
12007 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12008 receives absolute item index, parameters and private data object as added
12009 by L{PrepareContainerMods}, returns tuple containing new item and changes
12011 @type modify_fn: callable
12012 @param modify_fn: Callback for modifying an existing item
12013 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12014 and private data object as added by L{PrepareContainerMods}, returns
12016 @type remove_fn: callable
12017 @param remove_fn: Callback on removing item; receives absolute item index,
12018 item and private data object as added by L{PrepareContainerMods}
12021 for (op, idx, params, private) in mods:
12024 absidx = len(container) - 1
12026 raise IndexError("Not accepting negative indices other than -1")
12027 elif idx > len(container):
12028 raise IndexError("Got %s index %s, but there are only %s" %
12029 (kind, idx, len(container)))
12035 if op == constants.DDM_ADD:
12036 # Calculate where item will be added
12038 addidx = len(container)
12042 if create_fn is None:
12045 (item, changes) = create_fn(addidx, params, private)
12048 container.append(item)
12051 assert idx <= len(container)
12052 # list.insert does so before the specified index
12053 container.insert(idx, item)
12055 # Retrieve existing item
12057 item = container[absidx]
12059 raise IndexError("Invalid %s index %s" % (kind, idx))
12061 if op == constants.DDM_REMOVE:
12064 if remove_fn is not None:
12065 remove_fn(absidx, item, private)
12067 changes = [("%s/%s" % (kind, absidx), "remove")]
12069 assert container[absidx] == item
12070 del container[absidx]
12071 elif op == constants.DDM_MODIFY:
12072 if modify_fn is not None:
12073 changes = modify_fn(absidx, item, params, private)
12075 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12077 assert _TApplyContModsCbChanges(changes)
12079 if not (chgdesc is None or changes is None):
12080 chgdesc.extend(changes)
12083 def _UpdateIvNames(base_index, disks):
12084 """Updates the C{iv_name} attribute of disks.
12086 @type disks: list of L{objects.Disk}
12089 for (idx, disk) in enumerate(disks):
12090 disk.iv_name = "disk/%s" % (base_index + idx, )
12093 class _InstNicModPrivate:
12094 """Data structure for network interface modifications.
12096 Used by L{LUInstanceSetParams}.
12099 def __init__(self):
12104 class LUInstanceSetParams(LogicalUnit):
12105 """Modifies an instances's parameters.
12108 HPATH = "instance-modify"
12109 HTYPE = constants.HTYPE_INSTANCE
12113 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12114 assert ht.TList(mods)
12115 assert not mods or len(mods[0]) in (2, 3)
12117 if mods and len(mods[0]) == 2:
12121 for op, params in mods:
12122 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12123 result.append((op, -1, params))
12127 raise errors.OpPrereqError("Only one %s add or remove operation is"
12128 " supported at a time" % kind,
12129 errors.ECODE_INVAL)
12131 result.append((constants.DDM_MODIFY, op, params))
12133 assert verify_fn(result)
12140 def _CheckMods(kind, mods, key_types, item_fn):
12141 """Ensures requested disk/NIC modifications are valid.
12144 for (op, _, params) in mods:
12145 assert ht.TDict(params)
12147 utils.ForceDictType(params, key_types)
12149 if op == constants.DDM_REMOVE:
12151 raise errors.OpPrereqError("No settings should be passed when"
12152 " removing a %s" % kind,
12153 errors.ECODE_INVAL)
12154 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12155 item_fn(op, params)
12157 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12160 def _VerifyDiskModification(op, params):
12161 """Verifies a disk modification.
12164 if op == constants.DDM_ADD:
12165 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12166 if mode not in constants.DISK_ACCESS_SET:
12167 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12168 errors.ECODE_INVAL)
12170 size = params.get(constants.IDISK_SIZE, None)
12172 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12173 constants.IDISK_SIZE, errors.ECODE_INVAL)
12177 except (TypeError, ValueError), err:
12178 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12179 errors.ECODE_INVAL)
12181 params[constants.IDISK_SIZE] = size
12183 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12184 raise errors.OpPrereqError("Disk size change not possible, use"
12185 " grow-disk", errors.ECODE_INVAL)
12188 def _VerifyNicModification(op, params):
12189 """Verifies a network interface modification.
12192 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12193 ip = params.get(constants.INIC_IP, None)
12196 elif ip.lower() == constants.VALUE_NONE:
12197 params[constants.INIC_IP] = None
12198 elif not netutils.IPAddress.IsValid(ip):
12199 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12200 errors.ECODE_INVAL)
12202 bridge = params.get("bridge", None)
12203 link = params.get(constants.INIC_LINK, None)
12204 if bridge and link:
12205 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12206 " at the same time", errors.ECODE_INVAL)
12207 elif bridge and bridge.lower() == constants.VALUE_NONE:
12208 params["bridge"] = None
12209 elif link and link.lower() == constants.VALUE_NONE:
12210 params[constants.INIC_LINK] = None
12212 if op == constants.DDM_ADD:
12213 macaddr = params.get(constants.INIC_MAC, None)
12214 if macaddr is None:
12215 params[constants.INIC_MAC] = constants.VALUE_AUTO
12217 if constants.INIC_MAC in params:
12218 macaddr = params[constants.INIC_MAC]
12219 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12220 macaddr = utils.NormalizeAndValidateMac(macaddr)
12222 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12223 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12224 " modifying an existing NIC",
12225 errors.ECODE_INVAL)
12227 def CheckArguments(self):
12228 if not (self.op.nics or self.op.disks or self.op.disk_template or
12229 self.op.hvparams or self.op.beparams or self.op.os_name or
12230 self.op.offline is not None or self.op.runtime_mem):
12231 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12233 if self.op.hvparams:
12234 _CheckGlobalHvParams(self.op.hvparams)
12237 self._UpgradeDiskNicMods("disk", self.op.disks,
12238 opcodes.OpInstanceSetParams.TestDiskModifications)
12240 self._UpgradeDiskNicMods("NIC", self.op.nics,
12241 opcodes.OpInstanceSetParams.TestNicModifications)
12243 # Check disk modifications
12244 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12245 self._VerifyDiskModification)
12247 if self.op.disks and self.op.disk_template is not None:
12248 raise errors.OpPrereqError("Disk template conversion and other disk"
12249 " changes not supported at the same time",
12250 errors.ECODE_INVAL)
12252 if (self.op.disk_template and
12253 self.op.disk_template in constants.DTS_INT_MIRROR and
12254 self.op.remote_node is None):
12255 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12256 " one requires specifying a secondary node",
12257 errors.ECODE_INVAL)
12259 # Check NIC modifications
12260 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12261 self._VerifyNicModification)
12263 def ExpandNames(self):
12264 self._ExpandAndLockInstance()
12265 # Can't even acquire node locks in shared mode as upcoming changes in
12266 # Ganeti 2.6 will start to modify the node object on disk conversion
12267 self.needed_locks[locking.LEVEL_NODE] = []
12268 self.needed_locks[locking.LEVEL_NODE_RES] = []
12269 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12271 def DeclareLocks(self, level):
12272 # TODO: Acquire group lock in shared mode (disk parameters)
12273 if level == locking.LEVEL_NODE:
12274 self._LockInstancesNodes()
12275 if self.op.disk_template and self.op.remote_node:
12276 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12277 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12278 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12280 self.needed_locks[locking.LEVEL_NODE_RES] = \
12281 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12283 def BuildHooksEnv(self):
12284 """Build hooks env.
12286 This runs on the master, primary and secondaries.
12290 if constants.BE_MINMEM in self.be_new:
12291 args["minmem"] = self.be_new[constants.BE_MINMEM]
12292 if constants.BE_MAXMEM in self.be_new:
12293 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12294 if constants.BE_VCPUS in self.be_new:
12295 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12296 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12297 # information at all.
12299 if self._new_nics is not None:
12302 for nic in self._new_nics:
12303 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12304 mode = nicparams[constants.NIC_MODE]
12305 link = nicparams[constants.NIC_LINK]
12306 nics.append((nic.ip, nic.mac, mode, link))
12308 args["nics"] = nics
12310 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12311 if self.op.disk_template:
12312 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12313 if self.op.runtime_mem:
12314 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12318 def BuildHooksNodes(self):
12319 """Build hooks nodes.
12322 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12325 def _PrepareNicModification(self, params, private, old_ip, old_params,
12327 update_params_dict = dict([(key, params[key])
12328 for key in constants.NICS_PARAMETERS
12331 if "bridge" in params:
12332 update_params_dict[constants.NIC_LINK] = params["bridge"]
12334 new_params = _GetUpdatedParams(old_params, update_params_dict)
12335 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12337 new_filled_params = cluster.SimpleFillNIC(new_params)
12338 objects.NIC.CheckParameterSyntax(new_filled_params)
12340 new_mode = new_filled_params[constants.NIC_MODE]
12341 if new_mode == constants.NIC_MODE_BRIDGED:
12342 bridge = new_filled_params[constants.NIC_LINK]
12343 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12345 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12347 self.warn.append(msg)
12349 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12351 elif new_mode == constants.NIC_MODE_ROUTED:
12352 ip = params.get(constants.INIC_IP, old_ip)
12354 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12355 " on a routed NIC", errors.ECODE_INVAL)
12357 if constants.INIC_MAC in params:
12358 mac = params[constants.INIC_MAC]
12360 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12361 errors.ECODE_INVAL)
12362 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12363 # otherwise generate the MAC address
12364 params[constants.INIC_MAC] = \
12365 self.cfg.GenerateMAC(self.proc.GetECId())
12367 # or validate/reserve the current one
12369 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12370 except errors.ReservationError:
12371 raise errors.OpPrereqError("MAC address '%s' already in use"
12372 " in cluster" % mac,
12373 errors.ECODE_NOTUNIQUE)
12375 private.params = new_params
12376 private.filled = new_filled_params
12378 def CheckPrereq(self):
12379 """Check prerequisites.
12381 This only checks the instance list against the existing names.
12384 # checking the new params on the primary/secondary nodes
12386 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12387 cluster = self.cluster = self.cfg.GetClusterInfo()
12388 assert self.instance is not None, \
12389 "Cannot retrieve locked instance %s" % self.op.instance_name
12390 pnode = instance.primary_node
12391 nodelist = list(instance.all_nodes)
12392 pnode_info = self.cfg.GetNodeInfo(pnode)
12393 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12395 # Prepare disk/NIC modifications
12396 self.diskmod = PrepareContainerMods(self.op.disks, None)
12397 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12400 if self.op.os_name and not self.op.force:
12401 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12402 self.op.force_variant)
12403 instance_os = self.op.os_name
12405 instance_os = instance.os
12407 assert not (self.op.disk_template and self.op.disks), \
12408 "Can't modify disk template and apply disk changes at the same time"
12410 if self.op.disk_template:
12411 if instance.disk_template == self.op.disk_template:
12412 raise errors.OpPrereqError("Instance already has disk template %s" %
12413 instance.disk_template, errors.ECODE_INVAL)
12415 if (instance.disk_template,
12416 self.op.disk_template) not in self._DISK_CONVERSIONS:
12417 raise errors.OpPrereqError("Unsupported disk template conversion from"
12418 " %s to %s" % (instance.disk_template,
12419 self.op.disk_template),
12420 errors.ECODE_INVAL)
12421 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12422 msg="cannot change disk template")
12423 if self.op.disk_template in constants.DTS_INT_MIRROR:
12424 if self.op.remote_node == pnode:
12425 raise errors.OpPrereqError("Given new secondary node %s is the same"
12426 " as the primary node of the instance" %
12427 self.op.remote_node, errors.ECODE_STATE)
12428 _CheckNodeOnline(self, self.op.remote_node)
12429 _CheckNodeNotDrained(self, self.op.remote_node)
12430 # FIXME: here we assume that the old instance type is DT_PLAIN
12431 assert instance.disk_template == constants.DT_PLAIN
12432 disks = [{constants.IDISK_SIZE: d.size,
12433 constants.IDISK_VG: d.logical_id[0]}
12434 for d in instance.disks]
12435 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12436 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12438 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12439 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12440 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12441 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12442 ignore=self.op.ignore_ipolicy)
12443 if pnode_info.group != snode_info.group:
12444 self.LogWarning("The primary and secondary nodes are in two"
12445 " different node groups; the disk parameters"
12446 " from the first disk's node group will be"
12449 # hvparams processing
12450 if self.op.hvparams:
12451 hv_type = instance.hypervisor
12452 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12453 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12454 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12457 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12458 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12459 self.hv_proposed = self.hv_new = hv_new # the new actual values
12460 self.hv_inst = i_hvdict # the new dict (without defaults)
12462 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12464 self.hv_new = self.hv_inst = {}
12466 # beparams processing
12467 if self.op.beparams:
12468 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12470 objects.UpgradeBeParams(i_bedict)
12471 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12472 be_new = cluster.SimpleFillBE(i_bedict)
12473 self.be_proposed = self.be_new = be_new # the new actual values
12474 self.be_inst = i_bedict # the new dict (without defaults)
12476 self.be_new = self.be_inst = {}
12477 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12478 be_old = cluster.FillBE(instance)
12480 # CPU param validation -- checking every time a parameter is
12481 # changed to cover all cases where either CPU mask or vcpus have
12483 if (constants.BE_VCPUS in self.be_proposed and
12484 constants.HV_CPU_MASK in self.hv_proposed):
12486 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12487 # Verify mask is consistent with number of vCPUs. Can skip this
12488 # test if only 1 entry in the CPU mask, which means same mask
12489 # is applied to all vCPUs.
12490 if (len(cpu_list) > 1 and
12491 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12492 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12494 (self.be_proposed[constants.BE_VCPUS],
12495 self.hv_proposed[constants.HV_CPU_MASK]),
12496 errors.ECODE_INVAL)
12498 # Only perform this test if a new CPU mask is given
12499 if constants.HV_CPU_MASK in self.hv_new:
12500 # Calculate the largest CPU number requested
12501 max_requested_cpu = max(map(max, cpu_list))
12502 # Check that all of the instance's nodes have enough physical CPUs to
12503 # satisfy the requested CPU mask
12504 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12505 max_requested_cpu + 1, instance.hypervisor)
12507 # osparams processing
12508 if self.op.osparams:
12509 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12510 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12511 self.os_inst = i_osdict # the new dict (without defaults)
12517 #TODO(dynmem): do the appropriate check involving MINMEM
12518 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12519 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12520 mem_check_list = [pnode]
12521 if be_new[constants.BE_AUTO_BALANCE]:
12522 # either we changed auto_balance to yes or it was from before
12523 mem_check_list.extend(instance.secondary_nodes)
12524 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12525 instance.hypervisor)
12526 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12527 [instance.hypervisor])
12528 pninfo = nodeinfo[pnode]
12529 msg = pninfo.fail_msg
12531 # Assume the primary node is unreachable and go ahead
12532 self.warn.append("Can't get info from primary node %s: %s" %
12535 (_, _, (pnhvinfo, )) = pninfo.payload
12536 if not isinstance(pnhvinfo.get("memory_free", None), int):
12537 self.warn.append("Node data from primary node %s doesn't contain"
12538 " free memory information" % pnode)
12539 elif instance_info.fail_msg:
12540 self.warn.append("Can't get instance runtime information: %s" %
12541 instance_info.fail_msg)
12543 if instance_info.payload:
12544 current_mem = int(instance_info.payload["memory"])
12546 # Assume instance not running
12547 # (there is a slight race condition here, but it's not very
12548 # probable, and we have no other way to check)
12549 # TODO: Describe race condition
12551 #TODO(dynmem): do the appropriate check involving MINMEM
12552 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12553 pnhvinfo["memory_free"])
12555 raise errors.OpPrereqError("This change will prevent the instance"
12556 " from starting, due to %d MB of memory"
12557 " missing on its primary node" %
12559 errors.ECODE_NORES)
12561 if be_new[constants.BE_AUTO_BALANCE]:
12562 for node, nres in nodeinfo.items():
12563 if node not in instance.secondary_nodes:
12565 nres.Raise("Can't get info from secondary node %s" % node,
12566 prereq=True, ecode=errors.ECODE_STATE)
12567 (_, _, (nhvinfo, )) = nres.payload
12568 if not isinstance(nhvinfo.get("memory_free", None), int):
12569 raise errors.OpPrereqError("Secondary node %s didn't return free"
12570 " memory information" % node,
12571 errors.ECODE_STATE)
12572 #TODO(dynmem): do the appropriate check involving MINMEM
12573 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12574 raise errors.OpPrereqError("This change will prevent the instance"
12575 " from failover to its secondary node"
12576 " %s, due to not enough memory" % node,
12577 errors.ECODE_STATE)
12579 if self.op.runtime_mem:
12580 remote_info = self.rpc.call_instance_info(instance.primary_node,
12582 instance.hypervisor)
12583 remote_info.Raise("Error checking node %s" % instance.primary_node)
12584 if not remote_info.payload: # not running already
12585 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12586 errors.ECODE_STATE)
12588 current_memory = remote_info.payload["memory"]
12589 if (not self.op.force and
12590 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12591 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12592 raise errors.OpPrereqError("Instance %s must have memory between %d"
12593 " and %d MB of memory unless --force is"
12594 " given" % (instance.name,
12595 self.be_proposed[constants.BE_MINMEM],
12596 self.be_proposed[constants.BE_MAXMEM]),
12597 errors.ECODE_INVAL)
12599 delta = self.op.runtime_mem - current_memory
12601 _CheckNodeFreeMemory(self, instance.primary_node,
12602 "ballooning memory for instance %s" %
12603 instance.name, delta, instance.hypervisor)
12605 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12606 raise errors.OpPrereqError("Disk operations not supported for"
12607 " diskless instances",
12608 errors.ECODE_INVAL)
12610 def _PrepareNicCreate(_, params, private):
12611 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12612 return (None, None)
12614 def _PrepareNicMod(_, nic, params, private):
12615 self._PrepareNicModification(params, private, nic.ip,
12616 nic.nicparams, cluster, pnode)
12619 # Verify NIC changes (operating on copy)
12620 nics = instance.nics[:]
12621 ApplyContainerMods("NIC", nics, None, self.nicmod,
12622 _PrepareNicCreate, _PrepareNicMod, None)
12623 if len(nics) > constants.MAX_NICS:
12624 raise errors.OpPrereqError("Instance has too many network interfaces"
12625 " (%d), cannot add more" % constants.MAX_NICS,
12626 errors.ECODE_STATE)
12628 # Verify disk changes (operating on a copy)
12629 disks = instance.disks[:]
12630 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12631 if len(disks) > constants.MAX_DISKS:
12632 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12633 " more" % constants.MAX_DISKS,
12634 errors.ECODE_STATE)
12636 if self.op.offline is not None:
12637 if self.op.offline:
12638 msg = "can't change to offline"
12640 msg = "can't change to online"
12641 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12643 # Pre-compute NIC changes (necessary to use result in hooks)
12644 self._nic_chgdesc = []
12646 # Operate on copies as this is still in prereq
12647 nics = [nic.Copy() for nic in instance.nics]
12648 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12649 self._CreateNewNic, self._ApplyNicMods, None)
12650 self._new_nics = nics
12652 self._new_nics = None
12654 def _ConvertPlainToDrbd(self, feedback_fn):
12655 """Converts an instance from plain to drbd.
12658 feedback_fn("Converting template to drbd")
12659 instance = self.instance
12660 pnode = instance.primary_node
12661 snode = self.op.remote_node
12663 assert instance.disk_template == constants.DT_PLAIN
12665 # create a fake disk info for _GenerateDiskTemplate
12666 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12667 constants.IDISK_VG: d.logical_id[0]}
12668 for d in instance.disks]
12669 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12670 instance.name, pnode, [snode],
12671 disk_info, None, None, 0, feedback_fn,
12673 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12675 info = _GetInstanceInfoText(instance)
12676 feedback_fn("Creating additional volumes...")
12677 # first, create the missing data and meta devices
12678 for disk in anno_disks:
12679 # unfortunately this is... not too nice
12680 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12682 for child in disk.children:
12683 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12684 # at this stage, all new LVs have been created, we can rename the
12686 feedback_fn("Renaming original volumes...")
12687 rename_list = [(o, n.children[0].logical_id)
12688 for (o, n) in zip(instance.disks, new_disks)]
12689 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12690 result.Raise("Failed to rename original LVs")
12692 feedback_fn("Initializing DRBD devices...")
12693 # all child devices are in place, we can now create the DRBD devices
12694 for disk in anno_disks:
12695 for node in [pnode, snode]:
12696 f_create = node == pnode
12697 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12699 # at this point, the instance has been modified
12700 instance.disk_template = constants.DT_DRBD8
12701 instance.disks = new_disks
12702 self.cfg.Update(instance, feedback_fn)
12704 # Release node locks while waiting for sync
12705 _ReleaseLocks(self, locking.LEVEL_NODE)
12707 # disks are created, waiting for sync
12708 disk_abort = not _WaitForSync(self, instance,
12709 oneshot=not self.op.wait_for_sync)
12711 raise errors.OpExecError("There are some degraded disks for"
12712 " this instance, please cleanup manually")
12714 # Node resource locks will be released by caller
12716 def _ConvertDrbdToPlain(self, feedback_fn):
12717 """Converts an instance from drbd to plain.
12720 instance = self.instance
12722 assert len(instance.secondary_nodes) == 1
12723 assert instance.disk_template == constants.DT_DRBD8
12725 pnode = instance.primary_node
12726 snode = instance.secondary_nodes[0]
12727 feedback_fn("Converting template to plain")
12729 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12730 new_disks = [d.children[0] for d in instance.disks]
12732 # copy over size and mode
12733 for parent, child in zip(old_disks, new_disks):
12734 child.size = parent.size
12735 child.mode = parent.mode
12737 # this is a DRBD disk, return its port to the pool
12738 # NOTE: this must be done right before the call to cfg.Update!
12739 for disk in old_disks:
12740 tcp_port = disk.logical_id[2]
12741 self.cfg.AddTcpUdpPort(tcp_port)
12743 # update instance structure
12744 instance.disks = new_disks
12745 instance.disk_template = constants.DT_PLAIN
12746 self.cfg.Update(instance, feedback_fn)
12748 # Release locks in case removing disks takes a while
12749 _ReleaseLocks(self, locking.LEVEL_NODE)
12751 feedback_fn("Removing volumes on the secondary node...")
12752 for disk in old_disks:
12753 self.cfg.SetDiskID(disk, snode)
12754 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12756 self.LogWarning("Could not remove block device %s on node %s,"
12757 " continuing anyway: %s", disk.iv_name, snode, msg)
12759 feedback_fn("Removing unneeded volumes on the primary node...")
12760 for idx, disk in enumerate(old_disks):
12761 meta = disk.children[1]
12762 self.cfg.SetDiskID(meta, pnode)
12763 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12765 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12766 " continuing anyway: %s", idx, pnode, msg)
12768 def _CreateNewDisk(self, idx, params, _):
12769 """Creates a new disk.
12772 instance = self.instance
12775 if instance.disk_template in constants.DTS_FILEBASED:
12776 (file_driver, file_path) = instance.disks[0].logical_id
12777 file_path = os.path.dirname(file_path)
12779 file_driver = file_path = None
12782 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12783 instance.primary_node, instance.secondary_nodes,
12784 [params], file_path, file_driver, idx,
12785 self.Log, self.diskparams)[0]
12787 info = _GetInstanceInfoText(instance)
12789 logging.info("Creating volume %s for instance %s",
12790 disk.iv_name, instance.name)
12791 # Note: this needs to be kept in sync with _CreateDisks
12793 for node in instance.all_nodes:
12794 f_create = (node == instance.primary_node)
12796 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12797 except errors.OpExecError, err:
12798 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12799 disk.iv_name, disk, node, err)
12802 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12806 def _ModifyDisk(idx, disk, params, _):
12807 """Modifies a disk.
12810 disk.mode = params[constants.IDISK_MODE]
12813 ("disk.mode/%d" % idx, disk.mode),
12816 def _RemoveDisk(self, idx, root, _):
12820 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12821 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12822 self.cfg.SetDiskID(disk, node)
12823 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12825 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12826 " continuing anyway", idx, node, msg)
12828 # if this is a DRBD disk, return its port to the pool
12829 if root.dev_type in constants.LDS_DRBD:
12830 self.cfg.AddTcpUdpPort(root.logical_id[2])
12833 def _CreateNewNic(idx, params, private):
12834 """Creates data structure for a new network interface.
12837 mac = params[constants.INIC_MAC]
12838 ip = params.get(constants.INIC_IP, None)
12839 nicparams = private.params
12841 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12843 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12844 (mac, ip, private.filled[constants.NIC_MODE],
12845 private.filled[constants.NIC_LINK])),
12849 def _ApplyNicMods(idx, nic, params, private):
12850 """Modifies a network interface.
12855 for key in [constants.INIC_MAC, constants.INIC_IP]:
12857 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12858 setattr(nic, key, params[key])
12861 nic.nicparams = private.params
12863 for (key, val) in params.items():
12864 changes.append(("nic.%s/%d" % (key, idx), val))
12868 def Exec(self, feedback_fn):
12869 """Modifies an instance.
12871 All parameters take effect only at the next restart of the instance.
12874 # Process here the warnings from CheckPrereq, as we don't have a
12875 # feedback_fn there.
12876 # TODO: Replace with self.LogWarning
12877 for warn in self.warn:
12878 feedback_fn("WARNING: %s" % warn)
12880 assert ((self.op.disk_template is None) ^
12881 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12882 "Not owning any node resource locks"
12885 instance = self.instance
12888 if self.op.runtime_mem:
12889 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12891 self.op.runtime_mem)
12892 rpcres.Raise("Cannot modify instance runtime memory")
12893 result.append(("runtime_memory", self.op.runtime_mem))
12895 # Apply disk changes
12896 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12897 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12898 _UpdateIvNames(0, instance.disks)
12900 if self.op.disk_template:
12902 check_nodes = set(instance.all_nodes)
12903 if self.op.remote_node:
12904 check_nodes.add(self.op.remote_node)
12905 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12906 owned = self.owned_locks(level)
12907 assert not (check_nodes - owned), \
12908 ("Not owning the correct locks, owning %r, expected at least %r" %
12909 (owned, check_nodes))
12911 r_shut = _ShutdownInstanceDisks(self, instance)
12913 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12914 " proceed with disk template conversion")
12915 mode = (instance.disk_template, self.op.disk_template)
12917 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12919 self.cfg.ReleaseDRBDMinors(instance.name)
12921 result.append(("disk_template", self.op.disk_template))
12923 assert instance.disk_template == self.op.disk_template, \
12924 ("Expected disk template '%s', found '%s'" %
12925 (self.op.disk_template, instance.disk_template))
12927 # Release node and resource locks if there are any (they might already have
12928 # been released during disk conversion)
12929 _ReleaseLocks(self, locking.LEVEL_NODE)
12930 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12932 # Apply NIC changes
12933 if self._new_nics is not None:
12934 instance.nics = self._new_nics
12935 result.extend(self._nic_chgdesc)
12938 if self.op.hvparams:
12939 instance.hvparams = self.hv_inst
12940 for key, val in self.op.hvparams.iteritems():
12941 result.append(("hv/%s" % key, val))
12944 if self.op.beparams:
12945 instance.beparams = self.be_inst
12946 for key, val in self.op.beparams.iteritems():
12947 result.append(("be/%s" % key, val))
12950 if self.op.os_name:
12951 instance.os = self.op.os_name
12954 if self.op.osparams:
12955 instance.osparams = self.os_inst
12956 for key, val in self.op.osparams.iteritems():
12957 result.append(("os/%s" % key, val))
12959 if self.op.offline is None:
12962 elif self.op.offline:
12963 # Mark instance as offline
12964 self.cfg.MarkInstanceOffline(instance.name)
12965 result.append(("admin_state", constants.ADMINST_OFFLINE))
12967 # Mark instance as online, but stopped
12968 self.cfg.MarkInstanceDown(instance.name)
12969 result.append(("admin_state", constants.ADMINST_DOWN))
12971 self.cfg.Update(instance, feedback_fn)
12973 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12974 self.owned_locks(locking.LEVEL_NODE)), \
12975 "All node locks should have been released by now"
12979 _DISK_CONVERSIONS = {
12980 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12981 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12985 class LUInstanceChangeGroup(LogicalUnit):
12986 HPATH = "instance-change-group"
12987 HTYPE = constants.HTYPE_INSTANCE
12990 def ExpandNames(self):
12991 self.share_locks = _ShareAll()
12992 self.needed_locks = {
12993 locking.LEVEL_NODEGROUP: [],
12994 locking.LEVEL_NODE: [],
12997 self._ExpandAndLockInstance()
12999 if self.op.target_groups:
13000 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13001 self.op.target_groups)
13003 self.req_target_uuids = None
13005 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13007 def DeclareLocks(self, level):
13008 if level == locking.LEVEL_NODEGROUP:
13009 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13011 if self.req_target_uuids:
13012 lock_groups = set(self.req_target_uuids)
13014 # Lock all groups used by instance optimistically; this requires going
13015 # via the node before it's locked, requiring verification later on
13016 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13017 lock_groups.update(instance_groups)
13019 # No target groups, need to lock all of them
13020 lock_groups = locking.ALL_SET
13022 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13024 elif level == locking.LEVEL_NODE:
13025 if self.req_target_uuids:
13026 # Lock all nodes used by instances
13027 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13028 self._LockInstancesNodes()
13030 # Lock all nodes in all potential target groups
13031 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13032 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13033 member_nodes = [node_name
13034 for group in lock_groups
13035 for node_name in self.cfg.GetNodeGroup(group).members]
13036 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13038 # Lock all nodes as all groups are potential targets
13039 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13041 def CheckPrereq(self):
13042 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13043 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13044 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13046 assert (self.req_target_uuids is None or
13047 owned_groups.issuperset(self.req_target_uuids))
13048 assert owned_instances == set([self.op.instance_name])
13050 # Get instance information
13051 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13053 # Check if node groups for locked instance are still correct
13054 assert owned_nodes.issuperset(self.instance.all_nodes), \
13055 ("Instance %s's nodes changed while we kept the lock" %
13056 self.op.instance_name)
13058 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13061 if self.req_target_uuids:
13062 # User requested specific target groups
13063 self.target_uuids = frozenset(self.req_target_uuids)
13065 # All groups except those used by the instance are potential targets
13066 self.target_uuids = owned_groups - inst_groups
13068 conflicting_groups = self.target_uuids & inst_groups
13069 if conflicting_groups:
13070 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13071 " used by the instance '%s'" %
13072 (utils.CommaJoin(conflicting_groups),
13073 self.op.instance_name),
13074 errors.ECODE_INVAL)
13076 if not self.target_uuids:
13077 raise errors.OpPrereqError("There are no possible target groups",
13078 errors.ECODE_INVAL)
13080 def BuildHooksEnv(self):
13081 """Build hooks env.
13084 assert self.target_uuids
13087 "TARGET_GROUPS": " ".join(self.target_uuids),
13090 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13094 def BuildHooksNodes(self):
13095 """Build hooks nodes.
13098 mn = self.cfg.GetMasterNode()
13099 return ([mn], [mn])
13101 def Exec(self, feedback_fn):
13102 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13104 assert instances == [self.op.instance_name], "Instance not locked"
13106 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13107 instances=instances, target_groups=list(self.target_uuids))
13109 ial.Run(self.op.iallocator)
13111 if not ial.success:
13112 raise errors.OpPrereqError("Can't compute solution for changing group of"
13113 " instance '%s' using iallocator '%s': %s" %
13114 (self.op.instance_name, self.op.iallocator,
13116 errors.ECODE_NORES)
13118 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13120 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13121 " instance '%s'", len(jobs), self.op.instance_name)
13123 return ResultWithJobs(jobs)
13126 class LUBackupQuery(NoHooksLU):
13127 """Query the exports list
13132 def CheckArguments(self):
13133 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13134 ["node", "export"], self.op.use_locking)
13136 def ExpandNames(self):
13137 self.expq.ExpandNames(self)
13139 def DeclareLocks(self, level):
13140 self.expq.DeclareLocks(self, level)
13142 def Exec(self, feedback_fn):
13145 for (node, expname) in self.expq.OldStyleQuery(self):
13146 if expname is None:
13147 result[node] = False
13149 result.setdefault(node, []).append(expname)
13154 class _ExportQuery(_QueryBase):
13155 FIELDS = query.EXPORT_FIELDS
13157 #: The node name is not a unique key for this query
13158 SORT_FIELD = "node"
13160 def ExpandNames(self, lu):
13161 lu.needed_locks = {}
13163 # The following variables interact with _QueryBase._GetNames
13165 self.wanted = _GetWantedNodes(lu, self.names)
13167 self.wanted = locking.ALL_SET
13169 self.do_locking = self.use_locking
13171 if self.do_locking:
13172 lu.share_locks = _ShareAll()
13173 lu.needed_locks = {
13174 locking.LEVEL_NODE: self.wanted,
13177 def DeclareLocks(self, lu, level):
13180 def _GetQueryData(self, lu):
13181 """Computes the list of nodes and their attributes.
13184 # Locking is not used
13186 assert not (compat.any(lu.glm.is_owned(level)
13187 for level in locking.LEVELS
13188 if level != locking.LEVEL_CLUSTER) or
13189 self.do_locking or self.use_locking)
13191 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13195 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13197 result.append((node, None))
13199 result.extend((node, expname) for expname in nres.payload)
13204 class LUBackupPrepare(NoHooksLU):
13205 """Prepares an instance for an export and returns useful information.
13210 def ExpandNames(self):
13211 self._ExpandAndLockInstance()
13213 def CheckPrereq(self):
13214 """Check prerequisites.
13217 instance_name = self.op.instance_name
13219 self.instance = self.cfg.GetInstanceInfo(instance_name)
13220 assert self.instance is not None, \
13221 "Cannot retrieve locked instance %s" % self.op.instance_name
13222 _CheckNodeOnline(self, self.instance.primary_node)
13224 self._cds = _GetClusterDomainSecret()
13226 def Exec(self, feedback_fn):
13227 """Prepares an instance for an export.
13230 instance = self.instance
13232 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13233 salt = utils.GenerateSecret(8)
13235 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13236 result = self.rpc.call_x509_cert_create(instance.primary_node,
13237 constants.RIE_CERT_VALIDITY)
13238 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13240 (name, cert_pem) = result.payload
13242 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13246 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13247 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13249 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13255 class LUBackupExport(LogicalUnit):
13256 """Export an instance to an image in the cluster.
13259 HPATH = "instance-export"
13260 HTYPE = constants.HTYPE_INSTANCE
13263 def CheckArguments(self):
13264 """Check the arguments.
13267 self.x509_key_name = self.op.x509_key_name
13268 self.dest_x509_ca_pem = self.op.destination_x509_ca
13270 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13271 if not self.x509_key_name:
13272 raise errors.OpPrereqError("Missing X509 key name for encryption",
13273 errors.ECODE_INVAL)
13275 if not self.dest_x509_ca_pem:
13276 raise errors.OpPrereqError("Missing destination X509 CA",
13277 errors.ECODE_INVAL)
13279 def ExpandNames(self):
13280 self._ExpandAndLockInstance()
13282 # Lock all nodes for local exports
13283 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13284 # FIXME: lock only instance primary and destination node
13286 # Sad but true, for now we have do lock all nodes, as we don't know where
13287 # the previous export might be, and in this LU we search for it and
13288 # remove it from its current node. In the future we could fix this by:
13289 # - making a tasklet to search (share-lock all), then create the
13290 # new one, then one to remove, after
13291 # - removing the removal operation altogether
13292 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13294 def DeclareLocks(self, level):
13295 """Last minute lock declaration."""
13296 # All nodes are locked anyway, so nothing to do here.
13298 def BuildHooksEnv(self):
13299 """Build hooks env.
13301 This will run on the master, primary node and target node.
13305 "EXPORT_MODE": self.op.mode,
13306 "EXPORT_NODE": self.op.target_node,
13307 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13308 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13309 # TODO: Generic function for boolean env variables
13310 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13313 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13317 def BuildHooksNodes(self):
13318 """Build hooks nodes.
13321 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13323 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13324 nl.append(self.op.target_node)
13328 def CheckPrereq(self):
13329 """Check prerequisites.
13331 This checks that the instance and node names are valid.
13334 instance_name = self.op.instance_name
13336 self.instance = self.cfg.GetInstanceInfo(instance_name)
13337 assert self.instance is not None, \
13338 "Cannot retrieve locked instance %s" % self.op.instance_name
13339 _CheckNodeOnline(self, self.instance.primary_node)
13341 if (self.op.remove_instance and
13342 self.instance.admin_state == constants.ADMINST_UP and
13343 not self.op.shutdown):
13344 raise errors.OpPrereqError("Can not remove instance without shutting it"
13347 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13348 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13349 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13350 assert self.dst_node is not None
13352 _CheckNodeOnline(self, self.dst_node.name)
13353 _CheckNodeNotDrained(self, self.dst_node.name)
13356 self.dest_disk_info = None
13357 self.dest_x509_ca = None
13359 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13360 self.dst_node = None
13362 if len(self.op.target_node) != len(self.instance.disks):
13363 raise errors.OpPrereqError(("Received destination information for %s"
13364 " disks, but instance %s has %s disks") %
13365 (len(self.op.target_node), instance_name,
13366 len(self.instance.disks)),
13367 errors.ECODE_INVAL)
13369 cds = _GetClusterDomainSecret()
13371 # Check X509 key name
13373 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13374 except (TypeError, ValueError), err:
13375 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13377 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13378 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13379 errors.ECODE_INVAL)
13381 # Load and verify CA
13383 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13384 except OpenSSL.crypto.Error, err:
13385 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13386 (err, ), errors.ECODE_INVAL)
13388 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13389 if errcode is not None:
13390 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13391 (msg, ), errors.ECODE_INVAL)
13393 self.dest_x509_ca = cert
13395 # Verify target information
13397 for idx, disk_data in enumerate(self.op.target_node):
13399 (host, port, magic) = \
13400 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13401 except errors.GenericError, err:
13402 raise errors.OpPrereqError("Target info for disk %s: %s" %
13403 (idx, err), errors.ECODE_INVAL)
13405 disk_info.append((host, port, magic))
13407 assert len(disk_info) == len(self.op.target_node)
13408 self.dest_disk_info = disk_info
13411 raise errors.ProgrammerError("Unhandled export mode %r" %
13414 # instance disk type verification
13415 # TODO: Implement export support for file-based disks
13416 for disk in self.instance.disks:
13417 if disk.dev_type == constants.LD_FILE:
13418 raise errors.OpPrereqError("Export not supported for instances with"
13419 " file-based disks", errors.ECODE_INVAL)
13421 def _CleanupExports(self, feedback_fn):
13422 """Removes exports of current instance from all other nodes.
13424 If an instance in a cluster with nodes A..D was exported to node C, its
13425 exports will be removed from the nodes A, B and D.
13428 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13430 nodelist = self.cfg.GetNodeList()
13431 nodelist.remove(self.dst_node.name)
13433 # on one-node clusters nodelist will be empty after the removal
13434 # if we proceed the backup would be removed because OpBackupQuery
13435 # substitutes an empty list with the full cluster node list.
13436 iname = self.instance.name
13438 feedback_fn("Removing old exports for instance %s" % iname)
13439 exportlist = self.rpc.call_export_list(nodelist)
13440 for node in exportlist:
13441 if exportlist[node].fail_msg:
13443 if iname in exportlist[node].payload:
13444 msg = self.rpc.call_export_remove(node, iname).fail_msg
13446 self.LogWarning("Could not remove older export for instance %s"
13447 " on node %s: %s", iname, node, msg)
13449 def Exec(self, feedback_fn):
13450 """Export an instance to an image in the cluster.
13453 assert self.op.mode in constants.EXPORT_MODES
13455 instance = self.instance
13456 src_node = instance.primary_node
13458 if self.op.shutdown:
13459 # shutdown the instance, but not the disks
13460 feedback_fn("Shutting down instance %s" % instance.name)
13461 result = self.rpc.call_instance_shutdown(src_node, instance,
13462 self.op.shutdown_timeout)
13463 # TODO: Maybe ignore failures if ignore_remove_failures is set
13464 result.Raise("Could not shutdown instance %s on"
13465 " node %s" % (instance.name, src_node))
13467 # set the disks ID correctly since call_instance_start needs the
13468 # correct drbd minor to create the symlinks
13469 for disk in instance.disks:
13470 self.cfg.SetDiskID(disk, src_node)
13472 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13475 # Activate the instance disks if we'exporting a stopped instance
13476 feedback_fn("Activating disks for %s" % instance.name)
13477 _StartInstanceDisks(self, instance, None)
13480 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13483 helper.CreateSnapshots()
13485 if (self.op.shutdown and
13486 instance.admin_state == constants.ADMINST_UP and
13487 not self.op.remove_instance):
13488 assert not activate_disks
13489 feedback_fn("Starting instance %s" % instance.name)
13490 result = self.rpc.call_instance_start(src_node,
13491 (instance, None, None), False)
13492 msg = result.fail_msg
13494 feedback_fn("Failed to start instance: %s" % msg)
13495 _ShutdownInstanceDisks(self, instance)
13496 raise errors.OpExecError("Could not start instance: %s" % msg)
13498 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13499 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13500 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13501 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13502 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13504 (key_name, _, _) = self.x509_key_name
13507 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13510 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13511 key_name, dest_ca_pem,
13516 # Check for backwards compatibility
13517 assert len(dresults) == len(instance.disks)
13518 assert compat.all(isinstance(i, bool) for i in dresults), \
13519 "Not all results are boolean: %r" % dresults
13523 feedback_fn("Deactivating disks for %s" % instance.name)
13524 _ShutdownInstanceDisks(self, instance)
13526 if not (compat.all(dresults) and fin_resu):
13529 failures.append("export finalization")
13530 if not compat.all(dresults):
13531 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13533 failures.append("disk export: disk(s) %s" % fdsk)
13535 raise errors.OpExecError("Export failed, errors in %s" %
13536 utils.CommaJoin(failures))
13538 # At this point, the export was successful, we can cleanup/finish
13540 # Remove instance if requested
13541 if self.op.remove_instance:
13542 feedback_fn("Removing instance %s" % instance.name)
13543 _RemoveInstance(self, feedback_fn, instance,
13544 self.op.ignore_remove_failures)
13546 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13547 self._CleanupExports(feedback_fn)
13549 return fin_resu, dresults
13552 class LUBackupRemove(NoHooksLU):
13553 """Remove exports related to the named instance.
13558 def ExpandNames(self):
13559 self.needed_locks = {}
13560 # We need all nodes to be locked in order for RemoveExport to work, but we
13561 # don't need to lock the instance itself, as nothing will happen to it (and
13562 # we can remove exports also for a removed instance)
13563 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13565 def Exec(self, feedback_fn):
13566 """Remove any export.
13569 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13570 # If the instance was not found we'll try with the name that was passed in.
13571 # This will only work if it was an FQDN, though.
13573 if not instance_name:
13575 instance_name = self.op.instance_name
13577 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13578 exportlist = self.rpc.call_export_list(locked_nodes)
13580 for node in exportlist:
13581 msg = exportlist[node].fail_msg
13583 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13585 if instance_name in exportlist[node].payload:
13587 result = self.rpc.call_export_remove(node, instance_name)
13588 msg = result.fail_msg
13590 logging.error("Could not remove export for instance %s"
13591 " on node %s: %s", instance_name, node, msg)
13593 if fqdn_warn and not found:
13594 feedback_fn("Export not found. If trying to remove an export belonging"
13595 " to a deleted instance please use its Fully Qualified"
13599 class LUGroupAdd(LogicalUnit):
13600 """Logical unit for creating node groups.
13603 HPATH = "group-add"
13604 HTYPE = constants.HTYPE_GROUP
13607 def ExpandNames(self):
13608 # We need the new group's UUID here so that we can create and acquire the
13609 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13610 # that it should not check whether the UUID exists in the configuration.
13611 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13612 self.needed_locks = {}
13613 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13615 def CheckPrereq(self):
13616 """Check prerequisites.
13618 This checks that the given group name is not an existing node group
13623 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13624 except errors.OpPrereqError:
13627 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13628 " node group (UUID: %s)" %
13629 (self.op.group_name, existing_uuid),
13630 errors.ECODE_EXISTS)
13632 if self.op.ndparams:
13633 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13635 if self.op.hv_state:
13636 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13638 self.new_hv_state = None
13640 if self.op.disk_state:
13641 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13643 self.new_disk_state = None
13645 if self.op.diskparams:
13646 for templ in constants.DISK_TEMPLATES:
13647 if templ in self.op.diskparams:
13648 utils.ForceDictType(self.op.diskparams[templ],
13649 constants.DISK_DT_TYPES)
13650 self.new_diskparams = self.op.diskparams
13652 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13653 except errors.OpPrereqError, err:
13654 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13655 errors.ECODE_INVAL)
13657 self.new_diskparams = {}
13659 if self.op.ipolicy:
13660 cluster = self.cfg.GetClusterInfo()
13661 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13663 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13664 except errors.ConfigurationError, err:
13665 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13666 errors.ECODE_INVAL)
13668 def BuildHooksEnv(self):
13669 """Build hooks env.
13673 "GROUP_NAME": self.op.group_name,
13676 def BuildHooksNodes(self):
13677 """Build hooks nodes.
13680 mn = self.cfg.GetMasterNode()
13681 return ([mn], [mn])
13683 def Exec(self, feedback_fn):
13684 """Add the node group to the cluster.
13687 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13688 uuid=self.group_uuid,
13689 alloc_policy=self.op.alloc_policy,
13690 ndparams=self.op.ndparams,
13691 diskparams=self.new_diskparams,
13692 ipolicy=self.op.ipolicy,
13693 hv_state_static=self.new_hv_state,
13694 disk_state_static=self.new_disk_state)
13696 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13697 del self.remove_locks[locking.LEVEL_NODEGROUP]
13700 class LUGroupAssignNodes(NoHooksLU):
13701 """Logical unit for assigning nodes to groups.
13706 def ExpandNames(self):
13707 # These raise errors.OpPrereqError on their own:
13708 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13709 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13711 # We want to lock all the affected nodes and groups. We have readily
13712 # available the list of nodes, and the *destination* group. To gather the
13713 # list of "source" groups, we need to fetch node information later on.
13714 self.needed_locks = {
13715 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13716 locking.LEVEL_NODE: self.op.nodes,
13719 def DeclareLocks(self, level):
13720 if level == locking.LEVEL_NODEGROUP:
13721 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13723 # Try to get all affected nodes' groups without having the group or node
13724 # lock yet. Needs verification later in the code flow.
13725 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13727 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13729 def CheckPrereq(self):
13730 """Check prerequisites.
13733 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13734 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13735 frozenset(self.op.nodes))
13737 expected_locks = (set([self.group_uuid]) |
13738 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13739 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13740 if actual_locks != expected_locks:
13741 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13742 " current groups are '%s', used to be '%s'" %
13743 (utils.CommaJoin(expected_locks),
13744 utils.CommaJoin(actual_locks)))
13746 self.node_data = self.cfg.GetAllNodesInfo()
13747 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13748 instance_data = self.cfg.GetAllInstancesInfo()
13750 if self.group is None:
13751 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13752 (self.op.group_name, self.group_uuid))
13754 (new_splits, previous_splits) = \
13755 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13756 for node in self.op.nodes],
13757 self.node_data, instance_data)
13760 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13762 if not self.op.force:
13763 raise errors.OpExecError("The following instances get split by this"
13764 " change and --force was not given: %s" %
13767 self.LogWarning("This operation will split the following instances: %s",
13770 if previous_splits:
13771 self.LogWarning("In addition, these already-split instances continue"
13772 " to be split across groups: %s",
13773 utils.CommaJoin(utils.NiceSort(previous_splits)))
13775 def Exec(self, feedback_fn):
13776 """Assign nodes to a new group.
13779 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13781 self.cfg.AssignGroupNodes(mods)
13784 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13785 """Check for split instances after a node assignment.
13787 This method considers a series of node assignments as an atomic operation,
13788 and returns information about split instances after applying the set of
13791 In particular, it returns information about newly split instances, and
13792 instances that were already split, and remain so after the change.
13794 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13797 @type changes: list of (node_name, new_group_uuid) pairs.
13798 @param changes: list of node assignments to consider.
13799 @param node_data: a dict with data for all nodes
13800 @param instance_data: a dict with all instances to consider
13801 @rtype: a two-tuple
13802 @return: a list of instances that were previously okay and result split as a
13803 consequence of this change, and a list of instances that were previously
13804 split and this change does not fix.
13807 changed_nodes = dict((node, group) for node, group in changes
13808 if node_data[node].group != group)
13810 all_split_instances = set()
13811 previously_split_instances = set()
13813 def InstanceNodes(instance):
13814 return [instance.primary_node] + list(instance.secondary_nodes)
13816 for inst in instance_data.values():
13817 if inst.disk_template not in constants.DTS_INT_MIRROR:
13820 instance_nodes = InstanceNodes(inst)
13822 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13823 previously_split_instances.add(inst.name)
13825 if len(set(changed_nodes.get(node, node_data[node].group)
13826 for node in instance_nodes)) > 1:
13827 all_split_instances.add(inst.name)
13829 return (list(all_split_instances - previously_split_instances),
13830 list(previously_split_instances & all_split_instances))
13833 class _GroupQuery(_QueryBase):
13834 FIELDS = query.GROUP_FIELDS
13836 def ExpandNames(self, lu):
13837 lu.needed_locks = {}
13839 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13840 self._cluster = lu.cfg.GetClusterInfo()
13841 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13844 self.wanted = [name_to_uuid[name]
13845 for name in utils.NiceSort(name_to_uuid.keys())]
13847 # Accept names to be either names or UUIDs.
13850 all_uuid = frozenset(self._all_groups.keys())
13852 for name in self.names:
13853 if name in all_uuid:
13854 self.wanted.append(name)
13855 elif name in name_to_uuid:
13856 self.wanted.append(name_to_uuid[name])
13858 missing.append(name)
13861 raise errors.OpPrereqError("Some groups do not exist: %s" %
13862 utils.CommaJoin(missing),
13863 errors.ECODE_NOENT)
13865 def DeclareLocks(self, lu, level):
13868 def _GetQueryData(self, lu):
13869 """Computes the list of node groups and their attributes.
13872 do_nodes = query.GQ_NODE in self.requested_data
13873 do_instances = query.GQ_INST in self.requested_data
13875 group_to_nodes = None
13876 group_to_instances = None
13878 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13879 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13880 # latter GetAllInstancesInfo() is not enough, for we have to go through
13881 # instance->node. Hence, we will need to process nodes even if we only need
13882 # instance information.
13883 if do_nodes or do_instances:
13884 all_nodes = lu.cfg.GetAllNodesInfo()
13885 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13888 for node in all_nodes.values():
13889 if node.group in group_to_nodes:
13890 group_to_nodes[node.group].append(node.name)
13891 node_to_group[node.name] = node.group
13894 all_instances = lu.cfg.GetAllInstancesInfo()
13895 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13897 for instance in all_instances.values():
13898 node = instance.primary_node
13899 if node in node_to_group:
13900 group_to_instances[node_to_group[node]].append(instance.name)
13903 # Do not pass on node information if it was not requested.
13904 group_to_nodes = None
13906 return query.GroupQueryData(self._cluster,
13907 [self._all_groups[uuid]
13908 for uuid in self.wanted],
13909 group_to_nodes, group_to_instances,
13910 query.GQ_DISKPARAMS in self.requested_data)
13913 class LUGroupQuery(NoHooksLU):
13914 """Logical unit for querying node groups.
13919 def CheckArguments(self):
13920 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13921 self.op.output_fields, False)
13923 def ExpandNames(self):
13924 self.gq.ExpandNames(self)
13926 def DeclareLocks(self, level):
13927 self.gq.DeclareLocks(self, level)
13929 def Exec(self, feedback_fn):
13930 return self.gq.OldStyleQuery(self)
13933 class LUGroupSetParams(LogicalUnit):
13934 """Modifies the parameters of a node group.
13937 HPATH = "group-modify"
13938 HTYPE = constants.HTYPE_GROUP
13941 def CheckArguments(self):
13944 self.op.diskparams,
13945 self.op.alloc_policy,
13947 self.op.disk_state,
13951 if all_changes.count(None) == len(all_changes):
13952 raise errors.OpPrereqError("Please pass at least one modification",
13953 errors.ECODE_INVAL)
13955 def ExpandNames(self):
13956 # This raises errors.OpPrereqError on its own:
13957 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13959 self.needed_locks = {
13960 locking.LEVEL_INSTANCE: [],
13961 locking.LEVEL_NODEGROUP: [self.group_uuid],
13964 self.share_locks[locking.LEVEL_INSTANCE] = 1
13966 def DeclareLocks(self, level):
13967 if level == locking.LEVEL_INSTANCE:
13968 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13970 # Lock instances optimistically, needs verification once group lock has
13972 self.needed_locks[locking.LEVEL_INSTANCE] = \
13973 self.cfg.GetNodeGroupInstances(self.group_uuid)
13976 def _UpdateAndVerifyDiskParams(old, new):
13977 """Updates and verifies disk parameters.
13980 new_params = _GetUpdatedParams(old, new)
13981 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13984 def CheckPrereq(self):
13985 """Check prerequisites.
13988 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13990 # Check if locked instances are still correct
13991 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13993 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13994 cluster = self.cfg.GetClusterInfo()
13996 if self.group is None:
13997 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13998 (self.op.group_name, self.group_uuid))
14000 if self.op.ndparams:
14001 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14002 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14003 self.new_ndparams = new_ndparams
14005 if self.op.diskparams:
14006 diskparams = self.group.diskparams
14007 uavdp = self._UpdateAndVerifyDiskParams
14008 # For each disktemplate subdict update and verify the values
14009 new_diskparams = dict((dt,
14010 uavdp(diskparams.get(dt, {}),
14011 self.op.diskparams[dt]))
14012 for dt in constants.DISK_TEMPLATES
14013 if dt in self.op.diskparams)
14014 # As we've all subdicts of diskparams ready, lets merge the actual
14015 # dict with all updated subdicts
14016 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14018 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14019 except errors.OpPrereqError, err:
14020 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14021 errors.ECODE_INVAL)
14023 if self.op.hv_state:
14024 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14025 self.group.hv_state_static)
14027 if self.op.disk_state:
14028 self.new_disk_state = \
14029 _MergeAndVerifyDiskState(self.op.disk_state,
14030 self.group.disk_state_static)
14032 if self.op.ipolicy:
14033 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14037 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14038 inst_filter = lambda inst: inst.name in owned_instances
14039 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14041 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14043 new_ipolicy, instances)
14046 self.LogWarning("After the ipolicy change the following instances"
14047 " violate them: %s",
14048 utils.CommaJoin(violations))
14050 def BuildHooksEnv(self):
14051 """Build hooks env.
14055 "GROUP_NAME": self.op.group_name,
14056 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14059 def BuildHooksNodes(self):
14060 """Build hooks nodes.
14063 mn = self.cfg.GetMasterNode()
14064 return ([mn], [mn])
14066 def Exec(self, feedback_fn):
14067 """Modifies the node group.
14072 if self.op.ndparams:
14073 self.group.ndparams = self.new_ndparams
14074 result.append(("ndparams", str(self.group.ndparams)))
14076 if self.op.diskparams:
14077 self.group.diskparams = self.new_diskparams
14078 result.append(("diskparams", str(self.group.diskparams)))
14080 if self.op.alloc_policy:
14081 self.group.alloc_policy = self.op.alloc_policy
14083 if self.op.hv_state:
14084 self.group.hv_state_static = self.new_hv_state
14086 if self.op.disk_state:
14087 self.group.disk_state_static = self.new_disk_state
14089 if self.op.ipolicy:
14090 self.group.ipolicy = self.new_ipolicy
14092 self.cfg.Update(self.group, feedback_fn)
14096 class LUGroupRemove(LogicalUnit):
14097 HPATH = "group-remove"
14098 HTYPE = constants.HTYPE_GROUP
14101 def ExpandNames(self):
14102 # This will raises errors.OpPrereqError on its own:
14103 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14104 self.needed_locks = {
14105 locking.LEVEL_NODEGROUP: [self.group_uuid],
14108 def CheckPrereq(self):
14109 """Check prerequisites.
14111 This checks that the given group name exists as a node group, that is
14112 empty (i.e., contains no nodes), and that is not the last group of the
14116 # Verify that the group is empty.
14117 group_nodes = [node.name
14118 for node in self.cfg.GetAllNodesInfo().values()
14119 if node.group == self.group_uuid]
14122 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14124 (self.op.group_name,
14125 utils.CommaJoin(utils.NiceSort(group_nodes))),
14126 errors.ECODE_STATE)
14128 # Verify the cluster would not be left group-less.
14129 if len(self.cfg.GetNodeGroupList()) == 1:
14130 raise errors.OpPrereqError("Group '%s' is the only group,"
14131 " cannot be removed" %
14132 self.op.group_name,
14133 errors.ECODE_STATE)
14135 def BuildHooksEnv(self):
14136 """Build hooks env.
14140 "GROUP_NAME": self.op.group_name,
14143 def BuildHooksNodes(self):
14144 """Build hooks nodes.
14147 mn = self.cfg.GetMasterNode()
14148 return ([mn], [mn])
14150 def Exec(self, feedback_fn):
14151 """Remove the node group.
14155 self.cfg.RemoveNodeGroup(self.group_uuid)
14156 except errors.ConfigurationError:
14157 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14158 (self.op.group_name, self.group_uuid))
14160 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14163 class LUGroupRename(LogicalUnit):
14164 HPATH = "group-rename"
14165 HTYPE = constants.HTYPE_GROUP
14168 def ExpandNames(self):
14169 # This raises errors.OpPrereqError on its own:
14170 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14172 self.needed_locks = {
14173 locking.LEVEL_NODEGROUP: [self.group_uuid],
14176 def CheckPrereq(self):
14177 """Check prerequisites.
14179 Ensures requested new name is not yet used.
14183 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14184 except errors.OpPrereqError:
14187 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14188 " node group (UUID: %s)" %
14189 (self.op.new_name, new_name_uuid),
14190 errors.ECODE_EXISTS)
14192 def BuildHooksEnv(self):
14193 """Build hooks env.
14197 "OLD_NAME": self.op.group_name,
14198 "NEW_NAME": self.op.new_name,
14201 def BuildHooksNodes(self):
14202 """Build hooks nodes.
14205 mn = self.cfg.GetMasterNode()
14207 all_nodes = self.cfg.GetAllNodesInfo()
14208 all_nodes.pop(mn, None)
14211 run_nodes.extend(node.name for node in all_nodes.values()
14212 if node.group == self.group_uuid)
14214 return (run_nodes, run_nodes)
14216 def Exec(self, feedback_fn):
14217 """Rename the node group.
14220 group = self.cfg.GetNodeGroup(self.group_uuid)
14223 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14224 (self.op.group_name, self.group_uuid))
14226 group.name = self.op.new_name
14227 self.cfg.Update(group, feedback_fn)
14229 return self.op.new_name
14232 class LUGroupEvacuate(LogicalUnit):
14233 HPATH = "group-evacuate"
14234 HTYPE = constants.HTYPE_GROUP
14237 def ExpandNames(self):
14238 # This raises errors.OpPrereqError on its own:
14239 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14241 if self.op.target_groups:
14242 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14243 self.op.target_groups)
14245 self.req_target_uuids = []
14247 if self.group_uuid in self.req_target_uuids:
14248 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14249 " as a target group (targets are %s)" %
14251 utils.CommaJoin(self.req_target_uuids)),
14252 errors.ECODE_INVAL)
14254 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14256 self.share_locks = _ShareAll()
14257 self.needed_locks = {
14258 locking.LEVEL_INSTANCE: [],
14259 locking.LEVEL_NODEGROUP: [],
14260 locking.LEVEL_NODE: [],
14263 def DeclareLocks(self, level):
14264 if level == locking.LEVEL_INSTANCE:
14265 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14267 # Lock instances optimistically, needs verification once node and group
14268 # locks have been acquired
14269 self.needed_locks[locking.LEVEL_INSTANCE] = \
14270 self.cfg.GetNodeGroupInstances(self.group_uuid)
14272 elif level == locking.LEVEL_NODEGROUP:
14273 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14275 if self.req_target_uuids:
14276 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14278 # Lock all groups used by instances optimistically; this requires going
14279 # via the node before it's locked, requiring verification later on
14280 lock_groups.update(group_uuid
14281 for instance_name in
14282 self.owned_locks(locking.LEVEL_INSTANCE)
14284 self.cfg.GetInstanceNodeGroups(instance_name))
14286 # No target groups, need to lock all of them
14287 lock_groups = locking.ALL_SET
14289 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14291 elif level == locking.LEVEL_NODE:
14292 # This will only lock the nodes in the group to be evacuated which
14293 # contain actual instances
14294 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14295 self._LockInstancesNodes()
14297 # Lock all nodes in group to be evacuated and target groups
14298 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14299 assert self.group_uuid in owned_groups
14300 member_nodes = [node_name
14301 for group in owned_groups
14302 for node_name in self.cfg.GetNodeGroup(group).members]
14303 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14305 def CheckPrereq(self):
14306 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14307 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14308 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14310 assert owned_groups.issuperset(self.req_target_uuids)
14311 assert self.group_uuid in owned_groups
14313 # Check if locked instances are still correct
14314 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14316 # Get instance information
14317 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14319 # Check if node groups for locked instances are still correct
14320 _CheckInstancesNodeGroups(self.cfg, self.instances,
14321 owned_groups, owned_nodes, self.group_uuid)
14323 if self.req_target_uuids:
14324 # User requested specific target groups
14325 self.target_uuids = self.req_target_uuids
14327 # All groups except the one to be evacuated are potential targets
14328 self.target_uuids = [group_uuid for group_uuid in owned_groups
14329 if group_uuid != self.group_uuid]
14331 if not self.target_uuids:
14332 raise errors.OpPrereqError("There are no possible target groups",
14333 errors.ECODE_INVAL)
14335 def BuildHooksEnv(self):
14336 """Build hooks env.
14340 "GROUP_NAME": self.op.group_name,
14341 "TARGET_GROUPS": " ".join(self.target_uuids),
14344 def BuildHooksNodes(self):
14345 """Build hooks nodes.
14348 mn = self.cfg.GetMasterNode()
14350 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14352 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14354 return (run_nodes, run_nodes)
14356 def Exec(self, feedback_fn):
14357 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14359 assert self.group_uuid not in self.target_uuids
14361 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14362 instances=instances, target_groups=self.target_uuids)
14364 ial.Run(self.op.iallocator)
14366 if not ial.success:
14367 raise errors.OpPrereqError("Can't compute group evacuation using"
14368 " iallocator '%s': %s" %
14369 (self.op.iallocator, ial.info),
14370 errors.ECODE_NORES)
14372 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14374 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14375 len(jobs), self.op.group_name)
14377 return ResultWithJobs(jobs)
14380 class TagsLU(NoHooksLU): # pylint: disable=W0223
14381 """Generic tags LU.
14383 This is an abstract class which is the parent of all the other tags LUs.
14386 def ExpandNames(self):
14387 self.group_uuid = None
14388 self.needed_locks = {}
14390 if self.op.kind == constants.TAG_NODE:
14391 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14392 lock_level = locking.LEVEL_NODE
14393 lock_name = self.op.name
14394 elif self.op.kind == constants.TAG_INSTANCE:
14395 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14396 lock_level = locking.LEVEL_INSTANCE
14397 lock_name = self.op.name
14398 elif self.op.kind == constants.TAG_NODEGROUP:
14399 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14400 lock_level = locking.LEVEL_NODEGROUP
14401 lock_name = self.group_uuid
14406 if lock_level and getattr(self.op, "use_locking", True):
14407 self.needed_locks[lock_level] = lock_name
14409 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14410 # not possible to acquire the BGL based on opcode parameters)
14412 def CheckPrereq(self):
14413 """Check prerequisites.
14416 if self.op.kind == constants.TAG_CLUSTER:
14417 self.target = self.cfg.GetClusterInfo()
14418 elif self.op.kind == constants.TAG_NODE:
14419 self.target = self.cfg.GetNodeInfo(self.op.name)
14420 elif self.op.kind == constants.TAG_INSTANCE:
14421 self.target = self.cfg.GetInstanceInfo(self.op.name)
14422 elif self.op.kind == constants.TAG_NODEGROUP:
14423 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14425 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14426 str(self.op.kind), errors.ECODE_INVAL)
14429 class LUTagsGet(TagsLU):
14430 """Returns the tags of a given object.
14435 def ExpandNames(self):
14436 TagsLU.ExpandNames(self)
14438 # Share locks as this is only a read operation
14439 self.share_locks = _ShareAll()
14441 def Exec(self, feedback_fn):
14442 """Returns the tag list.
14445 return list(self.target.GetTags())
14448 class LUTagsSearch(NoHooksLU):
14449 """Searches the tags for a given pattern.
14454 def ExpandNames(self):
14455 self.needed_locks = {}
14457 def CheckPrereq(self):
14458 """Check prerequisites.
14460 This checks the pattern passed for validity by compiling it.
14464 self.re = re.compile(self.op.pattern)
14465 except re.error, err:
14466 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14467 (self.op.pattern, err), errors.ECODE_INVAL)
14469 def Exec(self, feedback_fn):
14470 """Returns the tag list.
14474 tgts = [("/cluster", cfg.GetClusterInfo())]
14475 ilist = cfg.GetAllInstancesInfo().values()
14476 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14477 nlist = cfg.GetAllNodesInfo().values()
14478 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14479 tgts.extend(("/nodegroup/%s" % n.name, n)
14480 for n in cfg.GetAllNodeGroupsInfo().values())
14482 for path, target in tgts:
14483 for tag in target.GetTags():
14484 if self.re.search(tag):
14485 results.append((path, tag))
14489 class LUTagsSet(TagsLU):
14490 """Sets a tag on a given object.
14495 def CheckPrereq(self):
14496 """Check prerequisites.
14498 This checks the type and length of the tag name and value.
14501 TagsLU.CheckPrereq(self)
14502 for tag in self.op.tags:
14503 objects.TaggableObject.ValidateTag(tag)
14505 def Exec(self, feedback_fn):
14510 for tag in self.op.tags:
14511 self.target.AddTag(tag)
14512 except errors.TagError, err:
14513 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14514 self.cfg.Update(self.target, feedback_fn)
14517 class LUTagsDel(TagsLU):
14518 """Delete a list of tags from a given object.
14523 def CheckPrereq(self):
14524 """Check prerequisites.
14526 This checks that we have the given tag.
14529 TagsLU.CheckPrereq(self)
14530 for tag in self.op.tags:
14531 objects.TaggableObject.ValidateTag(tag)
14532 del_tags = frozenset(self.op.tags)
14533 cur_tags = self.target.GetTags()
14535 diff_tags = del_tags - cur_tags
14537 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14538 raise errors.OpPrereqError("Tag(s) %s not found" %
14539 (utils.CommaJoin(diff_names), ),
14540 errors.ECODE_NOENT)
14542 def Exec(self, feedback_fn):
14543 """Remove the tag from the object.
14546 for tag in self.op.tags:
14547 self.target.RemoveTag(tag)
14548 self.cfg.Update(self.target, feedback_fn)
14551 class LUTestDelay(NoHooksLU):
14552 """Sleep for a specified amount of time.
14554 This LU sleeps on the master and/or nodes for a specified amount of
14560 def ExpandNames(self):
14561 """Expand names and set required locks.
14563 This expands the node list, if any.
14566 self.needed_locks = {}
14567 if self.op.on_nodes:
14568 # _GetWantedNodes can be used here, but is not always appropriate to use
14569 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14570 # more information.
14571 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14572 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14574 def _TestDelay(self):
14575 """Do the actual sleep.
14578 if self.op.on_master:
14579 if not utils.TestDelay(self.op.duration):
14580 raise errors.OpExecError("Error during master delay test")
14581 if self.op.on_nodes:
14582 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14583 for node, node_result in result.items():
14584 node_result.Raise("Failure during rpc call to node %s" % node)
14586 def Exec(self, feedback_fn):
14587 """Execute the test delay opcode, with the wanted repetitions.
14590 if self.op.repeat == 0:
14593 top_value = self.op.repeat - 1
14594 for i in range(self.op.repeat):
14595 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14599 class LUTestJqueue(NoHooksLU):
14600 """Utility LU to test some aspects of the job queue.
14605 # Must be lower than default timeout for WaitForJobChange to see whether it
14606 # notices changed jobs
14607 _CLIENT_CONNECT_TIMEOUT = 20.0
14608 _CLIENT_CONFIRM_TIMEOUT = 60.0
14611 def _NotifyUsingSocket(cls, cb, errcls):
14612 """Opens a Unix socket and waits for another program to connect.
14615 @param cb: Callback to send socket name to client
14616 @type errcls: class
14617 @param errcls: Exception class to use for errors
14620 # Using a temporary directory as there's no easy way to create temporary
14621 # sockets without writing a custom loop around tempfile.mktemp and
14623 tmpdir = tempfile.mkdtemp()
14625 tmpsock = utils.PathJoin(tmpdir, "sock")
14627 logging.debug("Creating temporary socket at %s", tmpsock)
14628 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14633 # Send details to client
14636 # Wait for client to connect before continuing
14637 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14639 (conn, _) = sock.accept()
14640 except socket.error, err:
14641 raise errcls("Client didn't connect in time (%s)" % err)
14645 # Remove as soon as client is connected
14646 shutil.rmtree(tmpdir)
14648 # Wait for client to close
14651 # pylint: disable=E1101
14652 # Instance of '_socketobject' has no ... member
14653 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14655 except socket.error, err:
14656 raise errcls("Client failed to confirm notification (%s)" % err)
14660 def _SendNotification(self, test, arg, sockname):
14661 """Sends a notification to the client.
14664 @param test: Test name
14665 @param arg: Test argument (depends on test)
14666 @type sockname: string
14667 @param sockname: Socket path
14670 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14672 def _Notify(self, prereq, test, arg):
14673 """Notifies the client of a test.
14676 @param prereq: Whether this is a prereq-phase test
14678 @param test: Test name
14679 @param arg: Test argument (depends on test)
14683 errcls = errors.OpPrereqError
14685 errcls = errors.OpExecError
14687 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14691 def CheckArguments(self):
14692 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14693 self.expandnames_calls = 0
14695 def ExpandNames(self):
14696 checkargs_calls = getattr(self, "checkargs_calls", 0)
14697 if checkargs_calls < 1:
14698 raise errors.ProgrammerError("CheckArguments was not called")
14700 self.expandnames_calls += 1
14702 if self.op.notify_waitlock:
14703 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14705 self.LogInfo("Expanding names")
14707 # Get lock on master node (just to get a lock, not for a particular reason)
14708 self.needed_locks = {
14709 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14712 def Exec(self, feedback_fn):
14713 if self.expandnames_calls < 1:
14714 raise errors.ProgrammerError("ExpandNames was not called")
14716 if self.op.notify_exec:
14717 self._Notify(False, constants.JQT_EXEC, None)
14719 self.LogInfo("Executing")
14721 if self.op.log_messages:
14722 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14723 for idx, msg in enumerate(self.op.log_messages):
14724 self.LogInfo("Sending log message %s", idx + 1)
14725 feedback_fn(constants.JQT_MSGPREFIX + msg)
14726 # Report how many test messages have been sent
14727 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14730 raise errors.OpExecError("Opcode failure was requested")
14735 class IAllocator(object):
14736 """IAllocator framework.
14738 An IAllocator instance has three sets of attributes:
14739 - cfg that is needed to query the cluster
14740 - input data (all members of the _KEYS class attribute are required)
14741 - four buffer attributes (in|out_data|text), that represent the
14742 input (to the external script) in text and data structure format,
14743 and the output from it, again in two formats
14744 - the result variables from the script (success, info, nodes) for
14748 # pylint: disable=R0902
14749 # lots of instance attributes
14751 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14753 self.rpc = rpc_runner
14754 # init buffer variables
14755 self.in_text = self.out_text = self.in_data = self.out_data = None
14756 # init all input fields so that pylint is happy
14758 self.memory = self.disks = self.disk_template = self.spindle_use = None
14759 self.os = self.tags = self.nics = self.vcpus = None
14760 self.hypervisor = None
14761 self.relocate_from = None
14763 self.instances = None
14764 self.evac_mode = None
14765 self.target_groups = []
14767 self.required_nodes = None
14768 # init result fields
14769 self.success = self.info = self.result = None
14772 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14774 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14775 " IAllocator" % self.mode)
14777 keyset = [n for (n, _) in keydata]
14780 if key not in keyset:
14781 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14782 " IAllocator" % key)
14783 setattr(self, key, kwargs[key])
14786 if key not in kwargs:
14787 raise errors.ProgrammerError("Missing input parameter '%s' to"
14788 " IAllocator" % key)
14789 self._BuildInputData(compat.partial(fn, self), keydata)
14791 def _ComputeClusterData(self):
14792 """Compute the generic allocator input data.
14794 This is the data that is independent of the actual operation.
14798 cluster_info = cfg.GetClusterInfo()
14801 "version": constants.IALLOCATOR_VERSION,
14802 "cluster_name": cfg.GetClusterName(),
14803 "cluster_tags": list(cluster_info.GetTags()),
14804 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14805 "ipolicy": cluster_info.ipolicy,
14807 ninfo = cfg.GetAllNodesInfo()
14808 iinfo = cfg.GetAllInstancesInfo().values()
14809 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14812 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14814 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14815 hypervisor_name = self.hypervisor
14816 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14817 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14819 hypervisor_name = cluster_info.primary_hypervisor
14821 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14824 self.rpc.call_all_instances_info(node_list,
14825 cluster_info.enabled_hypervisors)
14827 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14829 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14830 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14831 i_list, config_ndata)
14832 assert len(data["nodes"]) == len(ninfo), \
14833 "Incomplete node data computed"
14835 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14837 self.in_data = data
14840 def _ComputeNodeGroupData(cfg):
14841 """Compute node groups data.
14844 cluster = cfg.GetClusterInfo()
14845 ng = dict((guuid, {
14846 "name": gdata.name,
14847 "alloc_policy": gdata.alloc_policy,
14848 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14849 "tags": list(gdata.tags),
14851 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14856 def _ComputeBasicNodeData(cfg, node_cfg):
14857 """Compute global node data.
14860 @returns: a dict of name: (node dict, node config)
14863 # fill in static (config-based) values
14864 node_results = dict((ninfo.name, {
14865 "tags": list(ninfo.GetTags()),
14866 "primary_ip": ninfo.primary_ip,
14867 "secondary_ip": ninfo.secondary_ip,
14868 "offline": ninfo.offline,
14869 "drained": ninfo.drained,
14870 "master_candidate": ninfo.master_candidate,
14871 "group": ninfo.group,
14872 "master_capable": ninfo.master_capable,
14873 "vm_capable": ninfo.vm_capable,
14874 "ndparams": cfg.GetNdParams(ninfo),
14876 for ninfo in node_cfg.values())
14878 return node_results
14881 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14883 """Compute global node data.
14885 @param node_results: the basic node structures as filled from the config
14888 #TODO(dynmem): compute the right data on MAX and MIN memory
14889 # make a copy of the current dict
14890 node_results = dict(node_results)
14891 for nname, nresult in node_data.items():
14892 assert nname in node_results, "Missing basic data for node %s" % nname
14893 ninfo = node_cfg[nname]
14895 if not (ninfo.offline or ninfo.drained):
14896 nresult.Raise("Can't get data for node %s" % nname)
14897 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14899 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14901 for attr in ["memory_total", "memory_free", "memory_dom0",
14902 "vg_size", "vg_free", "cpu_total"]:
14903 if attr not in remote_info:
14904 raise errors.OpExecError("Node '%s' didn't return attribute"
14905 " '%s'" % (nname, attr))
14906 if not isinstance(remote_info[attr], int):
14907 raise errors.OpExecError("Node '%s' returned invalid value"
14909 (nname, attr, remote_info[attr]))
14910 # compute memory used by primary instances
14911 i_p_mem = i_p_up_mem = 0
14912 for iinfo, beinfo in i_list:
14913 if iinfo.primary_node == nname:
14914 i_p_mem += beinfo[constants.BE_MAXMEM]
14915 if iinfo.name not in node_iinfo[nname].payload:
14918 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14919 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14920 remote_info["memory_free"] -= max(0, i_mem_diff)
14922 if iinfo.admin_state == constants.ADMINST_UP:
14923 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14925 # compute memory used by instances
14927 "total_memory": remote_info["memory_total"],
14928 "reserved_memory": remote_info["memory_dom0"],
14929 "free_memory": remote_info["memory_free"],
14930 "total_disk": remote_info["vg_size"],
14931 "free_disk": remote_info["vg_free"],
14932 "total_cpus": remote_info["cpu_total"],
14933 "i_pri_memory": i_p_mem,
14934 "i_pri_up_memory": i_p_up_mem,
14936 pnr_dyn.update(node_results[nname])
14937 node_results[nname] = pnr_dyn
14939 return node_results
14942 def _ComputeInstanceData(cluster_info, i_list):
14943 """Compute global instance data.
14947 for iinfo, beinfo in i_list:
14949 for nic in iinfo.nics:
14950 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14954 "mode": filled_params[constants.NIC_MODE],
14955 "link": filled_params[constants.NIC_LINK],
14957 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14958 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14959 nic_data.append(nic_dict)
14961 "tags": list(iinfo.GetTags()),
14962 "admin_state": iinfo.admin_state,
14963 "vcpus": beinfo[constants.BE_VCPUS],
14964 "memory": beinfo[constants.BE_MAXMEM],
14965 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14967 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14969 "disks": [{constants.IDISK_SIZE: dsk.size,
14970 constants.IDISK_MODE: dsk.mode}
14971 for dsk in iinfo.disks],
14972 "disk_template": iinfo.disk_template,
14973 "hypervisor": iinfo.hypervisor,
14975 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14977 instance_data[iinfo.name] = pir
14979 return instance_data
14981 def _AddNewInstance(self):
14982 """Add new instance data to allocator structure.
14984 This in combination with _AllocatorGetClusterData will create the
14985 correct structure needed as input for the allocator.
14987 The checks for the completeness of the opcode must have already been
14991 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14993 if self.disk_template in constants.DTS_INT_MIRROR:
14994 self.required_nodes = 2
14996 self.required_nodes = 1
15000 "disk_template": self.disk_template,
15003 "vcpus": self.vcpus,
15004 "memory": self.memory,
15005 "spindle_use": self.spindle_use,
15006 "disks": self.disks,
15007 "disk_space_total": disk_space,
15009 "required_nodes": self.required_nodes,
15010 "hypervisor": self.hypervisor,
15015 def _AddRelocateInstance(self):
15016 """Add relocate instance data to allocator structure.
15018 This in combination with _IAllocatorGetClusterData will create the
15019 correct structure needed as input for the allocator.
15021 The checks for the completeness of the opcode must have already been
15025 instance = self.cfg.GetInstanceInfo(self.name)
15026 if instance is None:
15027 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15028 " IAllocator" % self.name)
15030 if instance.disk_template not in constants.DTS_MIRRORED:
15031 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15032 errors.ECODE_INVAL)
15034 if instance.disk_template in constants.DTS_INT_MIRROR and \
15035 len(instance.secondary_nodes) != 1:
15036 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15037 errors.ECODE_STATE)
15039 self.required_nodes = 1
15040 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15041 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15045 "disk_space_total": disk_space,
15046 "required_nodes": self.required_nodes,
15047 "relocate_from": self.relocate_from,
15051 def _AddNodeEvacuate(self):
15052 """Get data for node-evacuate requests.
15056 "instances": self.instances,
15057 "evac_mode": self.evac_mode,
15060 def _AddChangeGroup(self):
15061 """Get data for node-evacuate requests.
15065 "instances": self.instances,
15066 "target_groups": self.target_groups,
15069 def _BuildInputData(self, fn, keydata):
15070 """Build input data structures.
15073 self._ComputeClusterData()
15076 request["type"] = self.mode
15077 for keyname, keytype in keydata:
15078 if keyname not in request:
15079 raise errors.ProgrammerError("Request parameter %s is missing" %
15081 val = request[keyname]
15082 if not keytype(val):
15083 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15084 " validation, value %s, expected"
15085 " type %s" % (keyname, val, keytype))
15086 self.in_data["request"] = request
15088 self.in_text = serializer.Dump(self.in_data)
15090 _STRING_LIST = ht.TListOf(ht.TString)
15091 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15092 # pylint: disable=E1101
15093 # Class '...' has no 'OP_ID' member
15094 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15095 opcodes.OpInstanceMigrate.OP_ID,
15096 opcodes.OpInstanceReplaceDisks.OP_ID])
15100 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15101 ht.TItems([ht.TNonEmptyString,
15102 ht.TNonEmptyString,
15103 ht.TListOf(ht.TNonEmptyString),
15106 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15107 ht.TItems([ht.TNonEmptyString,
15110 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15111 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15114 constants.IALLOCATOR_MODE_ALLOC:
15117 ("name", ht.TString),
15118 ("memory", ht.TInt),
15119 ("spindle_use", ht.TInt),
15120 ("disks", ht.TListOf(ht.TDict)),
15121 ("disk_template", ht.TString),
15122 ("os", ht.TString),
15123 ("tags", _STRING_LIST),
15124 ("nics", ht.TListOf(ht.TDict)),
15125 ("vcpus", ht.TInt),
15126 ("hypervisor", ht.TString),
15128 constants.IALLOCATOR_MODE_RELOC:
15129 (_AddRelocateInstance,
15130 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15132 constants.IALLOCATOR_MODE_NODE_EVAC:
15133 (_AddNodeEvacuate, [
15134 ("instances", _STRING_LIST),
15135 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15137 constants.IALLOCATOR_MODE_CHG_GROUP:
15138 (_AddChangeGroup, [
15139 ("instances", _STRING_LIST),
15140 ("target_groups", _STRING_LIST),
15144 def Run(self, name, validate=True, call_fn=None):
15145 """Run an instance allocator and return the results.
15148 if call_fn is None:
15149 call_fn = self.rpc.call_iallocator_runner
15151 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15152 result.Raise("Failure while running the iallocator script")
15154 self.out_text = result.payload
15156 self._ValidateResult()
15158 def _ValidateResult(self):
15159 """Process the allocator results.
15161 This will process and if successful save the result in
15162 self.out_data and the other parameters.
15166 rdict = serializer.Load(self.out_text)
15167 except Exception, err:
15168 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15170 if not isinstance(rdict, dict):
15171 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15173 # TODO: remove backwards compatiblity in later versions
15174 if "nodes" in rdict and "result" not in rdict:
15175 rdict["result"] = rdict["nodes"]
15178 for key in "success", "info", "result":
15179 if key not in rdict:
15180 raise errors.OpExecError("Can't parse iallocator results:"
15181 " missing key '%s'" % key)
15182 setattr(self, key, rdict[key])
15184 if not self._result_check(self.result):
15185 raise errors.OpExecError("Iallocator returned invalid result,"
15186 " expected %s, got %s" %
15187 (self._result_check, self.result),
15188 errors.ECODE_INVAL)
15190 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15191 assert self.relocate_from is not None
15192 assert self.required_nodes == 1
15194 node2group = dict((name, ndata["group"])
15195 for (name, ndata) in self.in_data["nodes"].items())
15197 fn = compat.partial(self._NodesToGroups, node2group,
15198 self.in_data["nodegroups"])
15200 instance = self.cfg.GetInstanceInfo(self.name)
15201 request_groups = fn(self.relocate_from + [instance.primary_node])
15202 result_groups = fn(rdict["result"] + [instance.primary_node])
15204 if self.success and not set(result_groups).issubset(request_groups):
15205 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15206 " differ from original groups (%s)" %
15207 (utils.CommaJoin(result_groups),
15208 utils.CommaJoin(request_groups)))
15210 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15211 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15213 self.out_data = rdict
15216 def _NodesToGroups(node2group, groups, nodes):
15217 """Returns a list of unique group names for a list of nodes.
15219 @type node2group: dict
15220 @param node2group: Map from node name to group UUID
15222 @param groups: Group information
15224 @param nodes: Node names
15231 group_uuid = node2group[node]
15233 # Ignore unknown node
15237 group = groups[group_uuid]
15239 # Can't find group, let's use UUID
15240 group_name = group_uuid
15242 group_name = group["name"]
15244 result.add(group_name)
15246 return sorted(result)
15249 class LUTestAllocator(NoHooksLU):
15250 """Run allocator tests.
15252 This LU runs the allocator tests
15255 def CheckPrereq(self):
15256 """Check prerequisites.
15258 This checks the opcode parameters depending on the director and mode test.
15261 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15262 for attr in ["memory", "disks", "disk_template",
15263 "os", "tags", "nics", "vcpus"]:
15264 if not hasattr(self.op, attr):
15265 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15266 attr, errors.ECODE_INVAL)
15267 iname = self.cfg.ExpandInstanceName(self.op.name)
15268 if iname is not None:
15269 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15270 iname, errors.ECODE_EXISTS)
15271 if not isinstance(self.op.nics, list):
15272 raise errors.OpPrereqError("Invalid parameter 'nics'",
15273 errors.ECODE_INVAL)
15274 if not isinstance(self.op.disks, list):
15275 raise errors.OpPrereqError("Invalid parameter 'disks'",
15276 errors.ECODE_INVAL)
15277 for row in self.op.disks:
15278 if (not isinstance(row, dict) or
15279 constants.IDISK_SIZE not in row or
15280 not isinstance(row[constants.IDISK_SIZE], int) or
15281 constants.IDISK_MODE not in row or
15282 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15283 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15284 " parameter", errors.ECODE_INVAL)
15285 if self.op.hypervisor is None:
15286 self.op.hypervisor = self.cfg.GetHypervisorType()
15287 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15288 fname = _ExpandInstanceName(self.cfg, self.op.name)
15289 self.op.name = fname
15290 self.relocate_from = \
15291 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15292 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15293 constants.IALLOCATOR_MODE_NODE_EVAC):
15294 if not self.op.instances:
15295 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15296 self.op.instances = _GetWantedInstances(self, self.op.instances)
15298 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15299 self.op.mode, errors.ECODE_INVAL)
15301 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15302 if self.op.allocator is None:
15303 raise errors.OpPrereqError("Missing allocator name",
15304 errors.ECODE_INVAL)
15305 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15306 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15307 self.op.direction, errors.ECODE_INVAL)
15309 def Exec(self, feedback_fn):
15310 """Run the allocator test.
15313 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15314 ial = IAllocator(self.cfg, self.rpc,
15317 memory=self.op.memory,
15318 disks=self.op.disks,
15319 disk_template=self.op.disk_template,
15323 vcpus=self.op.vcpus,
15324 hypervisor=self.op.hypervisor,
15325 spindle_use=self.op.spindle_use,
15327 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15328 ial = IAllocator(self.cfg, self.rpc,
15331 relocate_from=list(self.relocate_from),
15333 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15334 ial = IAllocator(self.cfg, self.rpc,
15336 instances=self.op.instances,
15337 target_groups=self.op.target_groups)
15338 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15339 ial = IAllocator(self.cfg, self.rpc,
15341 instances=self.op.instances,
15342 evac_mode=self.op.evac_mode)
15344 raise errors.ProgrammerError("Uncatched mode %s in"
15345 " LUTestAllocator.Exec", self.op.mode)
15347 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15348 result = ial.in_text
15350 ial.Run(self.op.allocator, validate=False)
15351 result = ial.out_text
15355 #: Query type implementations
15357 constants.QR_CLUSTER: _ClusterQuery,
15358 constants.QR_INSTANCE: _InstanceQuery,
15359 constants.QR_NODE: _NodeQuery,
15360 constants.QR_GROUP: _GroupQuery,
15361 constants.QR_OS: _OsQuery,
15362 constants.QR_EXPORT: _ExportQuery,
15365 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15368 def _GetQueryImplementation(name):
15369 """Returns the implemtnation for a query type.
15371 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15375 return _QUERY_IMPL[name]
15377 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15378 errors.ECODE_INVAL)