4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 # pylint: disable=W0702
962 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
965 def _CheckOutputFields(static, dynamic, selected):
966 """Checks whether all selected fields are valid.
968 @type static: L{utils.FieldSet}
969 @param static: static fields set
970 @type dynamic: L{utils.FieldSet}
971 @param dynamic: dynamic fields set
978 delta = f.NonMatching(selected)
980 raise errors.OpPrereqError("Unknown output fields selected: %s"
981 % ",".join(delta), errors.ECODE_INVAL)
984 def _CheckGlobalHvParams(params):
985 """Validates that given hypervisor params are not global ones.
987 This will ensure that instances don't get customised versions of
991 used_globals = constants.HVC_GLOBALS.intersection(params)
993 msg = ("The following hypervisor parameters are global and cannot"
994 " be customized at instance level, please modify them at"
995 " cluster level: %s" % utils.CommaJoin(used_globals))
996 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
999 def _CheckNodeOnline(lu, node, msg=None):
1000 """Ensure that a given node is online.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @param msg: if passed, should be a message to replace the default one
1005 @raise errors.OpPrereqError: if the node is offline
1009 msg = "Can't use offline node"
1010 if lu.cfg.GetNodeInfo(node).offline:
1011 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014 def _CheckNodeNotDrained(lu, node):
1015 """Ensure that a given node is not drained.
1017 @param lu: the LU on behalf of which we make the check
1018 @param node: the node to check
1019 @raise errors.OpPrereqError: if the node is drained
1022 if lu.cfg.GetNodeInfo(node).drained:
1023 raise errors.OpPrereqError("Can't use drained node %s" % node,
1027 def _CheckNodeVmCapable(lu, node):
1028 """Ensure that a given node is vm capable.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is not vm capable
1035 if not lu.cfg.GetNodeInfo(node).vm_capable:
1036 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041 """Ensure that a node supports a given OS.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @param os_name: the OS to query about
1046 @param force_variant: whether to ignore variant errors
1047 @raise errors.OpPrereqError: if the node is not supporting the OS
1050 result = lu.rpc.call_os_get(node, os_name)
1051 result.Raise("OS '%s' not in supported OS list for node %s" %
1053 prereq=True, ecode=errors.ECODE_INVAL)
1054 if not force_variant:
1055 _CheckOSVariant(result.payload, os_name)
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059 """Ensure that a node has the given secondary ip.
1061 @type lu: L{LogicalUnit}
1062 @param lu: the LU on behalf of which we make the check
1064 @param node: the node to check
1065 @type secondary_ip: string
1066 @param secondary_ip: the ip to check
1067 @type prereq: boolean
1068 @param prereq: whether to throw a prerequisite or an execute error
1069 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074 result.Raise("Failure checking secondary ip on node %s" % node,
1075 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 if not result.payload:
1077 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078 " please fix and re-run this command" % secondary_ip)
1080 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082 raise errors.OpExecError(msg)
1085 def _GetClusterDomainSecret():
1086 """Reads the cluster domain secret.
1089 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094 """Ensure that an instance is in one of the required states.
1096 @param lu: the LU on behalf of which we make the check
1097 @param instance: the instance to check
1098 @param msg: if passed, should be a message to replace the default one
1099 @raise errors.OpPrereqError: if the instance is not in the required state
1103 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104 if instance.admin_state not in req_states:
1105 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106 (instance.name, instance.admin_state, msg),
1109 if constants.ADMINST_UP not in req_states:
1110 pnode = instance.primary_node
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1115 if instance.name in ins_l.payload:
1116 raise errors.OpPrereqError("Instance %s is running, %s" %
1117 (instance.name, msg), errors.ECODE_STATE)
1120 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1121 """Computes if value is in the desired range.
1123 @param name: name of the parameter for which we perform the check
1124 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1126 @param ipolicy: dictionary containing min, max and std values
1127 @param value: actual value that we want to use
1128 @return: None or element not meeting the criteria
1132 if value in [None, constants.VALUE_AUTO]:
1134 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1135 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1136 if value > max_v or min_v > value:
1138 fqn = "%s/%s" % (name, qualifier)
1141 return ("%s value %s is not in range [%s, %s]" %
1142 (fqn, value, min_v, max_v))
1146 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1147 nic_count, disk_sizes, spindle_use,
1148 _compute_fn=_ComputeMinMaxSpec):
1149 """Verifies ipolicy against provided specs.
1152 @param ipolicy: The ipolicy
1154 @param mem_size: The memory size
1155 @type cpu_count: int
1156 @param cpu_count: Used cpu cores
1157 @type disk_count: int
1158 @param disk_count: Number of disks used
1159 @type nic_count: int
1160 @param nic_count: Number of nics used
1161 @type disk_sizes: list of ints
1162 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1163 @type spindle_use: int
1164 @param spindle_use: The number of spindles this instance uses
1165 @param _compute_fn: The compute function (unittest only)
1166 @return: A list of violations, or an empty list of no violations are found
1169 assert disk_count == len(disk_sizes)
1172 (constants.ISPEC_MEM_SIZE, "", mem_size),
1173 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1174 (constants.ISPEC_DISK_COUNT, "", disk_count),
1175 (constants.ISPEC_NIC_COUNT, "", nic_count),
1176 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1177 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1178 for idx, d in enumerate(disk_sizes)]
1181 (_compute_fn(name, qualifier, ipolicy, value)
1182 for (name, qualifier, value) in test_settings))
1185 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1186 _compute_fn=_ComputeIPolicySpecViolation):
1187 """Compute if instance meets the specs of ipolicy.
1190 @param ipolicy: The ipolicy to verify against
1191 @type instance: L{objects.Instance}
1192 @param instance: The instance to verify
1193 @param _compute_fn: The function to verify ipolicy (unittest only)
1194 @see: L{_ComputeIPolicySpecViolation}
1197 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1198 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1199 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1200 disk_count = len(instance.disks)
1201 disk_sizes = [disk.size for disk in instance.disks]
1202 nic_count = len(instance.nics)
1204 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205 disk_sizes, spindle_use)
1208 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1209 _compute_fn=_ComputeIPolicySpecViolation):
1210 """Compute if instance specs meets the specs of ipolicy.
1213 @param ipolicy: The ipolicy to verify against
1214 @param instance_spec: dict
1215 @param instance_spec: The instance spec to verify
1216 @param _compute_fn: The function to verify ipolicy (unittest only)
1217 @see: L{_ComputeIPolicySpecViolation}
1220 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1221 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1222 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1223 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1224 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1225 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1227 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1228 disk_sizes, spindle_use)
1231 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1233 _compute_fn=_ComputeIPolicyInstanceViolation):
1234 """Compute if instance meets the specs of the new target group.
1236 @param ipolicy: The ipolicy to verify
1237 @param instance: The instance object to verify
1238 @param current_group: The current group of the instance
1239 @param target_group: The new group of the instance
1240 @param _compute_fn: The function to verify ipolicy (unittest only)
1241 @see: L{_ComputeIPolicySpecViolation}
1244 if current_group == target_group:
1247 return _compute_fn(ipolicy, instance)
1250 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1251 _compute_fn=_ComputeIPolicyNodeViolation):
1252 """Checks that the target node is correct in terms of instance policy.
1254 @param ipolicy: The ipolicy to verify
1255 @param instance: The instance object to verify
1256 @param node: The new node to relocate
1257 @param ignore: Ignore violations of the ipolicy
1258 @param _compute_fn: The function to verify ipolicy (unittest only)
1259 @see: L{_ComputeIPolicySpecViolation}
1262 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1263 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1266 msg = ("Instance does not meet target node group's (%s) instance"
1267 " policy: %s") % (node.group, utils.CommaJoin(res))
1271 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1274 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1275 """Computes a set of any instances that would violate the new ipolicy.
1277 @param old_ipolicy: The current (still in-place) ipolicy
1278 @param new_ipolicy: The new (to become) ipolicy
1279 @param instances: List of instances to verify
1280 @return: A list of instances which violates the new ipolicy but
1284 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1285 _ComputeViolatingInstances(old_ipolicy, instances))
1288 def _ExpandItemName(fn, name, kind):
1289 """Expand an item name.
1291 @param fn: the function to use for expansion
1292 @param name: requested item name
1293 @param kind: text description ('Node' or 'Instance')
1294 @return: the resolved (full) name
1295 @raise errors.OpPrereqError: if the item is not found
1298 full_name = fn(name)
1299 if full_name is None:
1300 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1305 def _ExpandNodeName(cfg, name):
1306 """Wrapper over L{_ExpandItemName} for nodes."""
1307 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1310 def _ExpandInstanceName(cfg, name):
1311 """Wrapper over L{_ExpandItemName} for instance."""
1312 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1315 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1316 minmem, maxmem, vcpus, nics, disk_template, disks,
1317 bep, hvp, hypervisor_name, tags):
1318 """Builds instance related env variables for hooks
1320 This builds the hook environment from individual variables.
1323 @param name: the name of the instance
1324 @type primary_node: string
1325 @param primary_node: the name of the instance's primary node
1326 @type secondary_nodes: list
1327 @param secondary_nodes: list of secondary nodes as strings
1328 @type os_type: string
1329 @param os_type: the name of the instance's OS
1330 @type status: string
1331 @param status: the desired status of the instance
1332 @type minmem: string
1333 @param minmem: the minimum memory size of the instance
1334 @type maxmem: string
1335 @param maxmem: the maximum memory size of the instance
1337 @param vcpus: the count of VCPUs the instance has
1339 @param nics: list of tuples (ip, mac, mode, link) representing
1340 the NICs the instance has
1341 @type disk_template: string
1342 @param disk_template: the disk template of the instance
1344 @param disks: the list of (size, mode) pairs
1346 @param bep: the backend parameters for the instance
1348 @param hvp: the hypervisor parameters for the instance
1349 @type hypervisor_name: string
1350 @param hypervisor_name: the hypervisor for the instance
1352 @param tags: list of instance tags as strings
1354 @return: the hook environment for this instance
1359 "INSTANCE_NAME": name,
1360 "INSTANCE_PRIMARY": primary_node,
1361 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1362 "INSTANCE_OS_TYPE": os_type,
1363 "INSTANCE_STATUS": status,
1364 "INSTANCE_MINMEM": minmem,
1365 "INSTANCE_MAXMEM": maxmem,
1366 # TODO(2.7) remove deprecated "memory" value
1367 "INSTANCE_MEMORY": maxmem,
1368 "INSTANCE_VCPUS": vcpus,
1369 "INSTANCE_DISK_TEMPLATE": disk_template,
1370 "INSTANCE_HYPERVISOR": hypervisor_name,
1373 nic_count = len(nics)
1374 for idx, (ip, mac, mode, link) in enumerate(nics):
1377 env["INSTANCE_NIC%d_IP" % idx] = ip
1378 env["INSTANCE_NIC%d_MAC" % idx] = mac
1379 env["INSTANCE_NIC%d_MODE" % idx] = mode
1380 env["INSTANCE_NIC%d_LINK" % idx] = link
1381 if mode == constants.NIC_MODE_BRIDGED:
1382 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1386 env["INSTANCE_NIC_COUNT"] = nic_count
1389 disk_count = len(disks)
1390 for idx, (size, mode) in enumerate(disks):
1391 env["INSTANCE_DISK%d_SIZE" % idx] = size
1392 env["INSTANCE_DISK%d_MODE" % idx] = mode
1396 env["INSTANCE_DISK_COUNT"] = disk_count
1401 env["INSTANCE_TAGS"] = " ".join(tags)
1403 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1404 for key, value in source.items():
1405 env["INSTANCE_%s_%s" % (kind, key)] = value
1410 def _NICListToTuple(lu, nics):
1411 """Build a list of nic information tuples.
1413 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1414 value in LUInstanceQueryData.
1416 @type lu: L{LogicalUnit}
1417 @param lu: the logical unit on whose behalf we execute
1418 @type nics: list of L{objects.NIC}
1419 @param nics: list of nics to convert to hooks tuples
1423 cluster = lu.cfg.GetClusterInfo()
1427 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1428 mode = filled_params[constants.NIC_MODE]
1429 link = filled_params[constants.NIC_LINK]
1430 hooks_nics.append((ip, mac, mode, link))
1434 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1435 """Builds instance related env variables for hooks from an object.
1437 @type lu: L{LogicalUnit}
1438 @param lu: the logical unit on whose behalf we execute
1439 @type instance: L{objects.Instance}
1440 @param instance: the instance for which we should build the
1442 @type override: dict
1443 @param override: dictionary with key/values that will override
1446 @return: the hook environment dictionary
1449 cluster = lu.cfg.GetClusterInfo()
1450 bep = cluster.FillBE(instance)
1451 hvp = cluster.FillHV(instance)
1453 "name": instance.name,
1454 "primary_node": instance.primary_node,
1455 "secondary_nodes": instance.secondary_nodes,
1456 "os_type": instance.os,
1457 "status": instance.admin_state,
1458 "maxmem": bep[constants.BE_MAXMEM],
1459 "minmem": bep[constants.BE_MINMEM],
1460 "vcpus": bep[constants.BE_VCPUS],
1461 "nics": _NICListToTuple(lu, instance.nics),
1462 "disk_template": instance.disk_template,
1463 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1466 "hypervisor_name": instance.hypervisor,
1467 "tags": instance.tags,
1470 args.update(override)
1471 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1474 def _AdjustCandidatePool(lu, exceptions):
1475 """Adjust the candidate pool after node operations.
1478 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1480 lu.LogInfo("Promoted nodes to master candidate role: %s",
1481 utils.CommaJoin(node.name for node in mod_list))
1482 for name in mod_list:
1483 lu.context.ReaddNode(name)
1484 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1486 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1490 def _DecideSelfPromotion(lu, exceptions=None):
1491 """Decide whether I should promote myself as a master candidate.
1494 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1495 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496 # the new node will increase mc_max with one, so:
1497 mc_should = min(mc_should + 1, cp_size)
1498 return mc_now < mc_should
1501 def _CalculateGroupIPolicy(cluster, group):
1502 """Calculate instance policy for group.
1505 return cluster.SimpleFillIPolicy(group.ipolicy)
1508 def _ComputeViolatingInstances(ipolicy, instances):
1509 """Computes a set of instances who violates given ipolicy.
1511 @param ipolicy: The ipolicy to verify
1512 @type instances: object.Instance
1513 @param instances: List of instances to verify
1514 @return: A frozenset of instance names violating the ipolicy
1517 return frozenset([inst.name for inst in instances
1518 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1521 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1522 """Check that the brigdes needed by a list of nics exist.
1525 cluster = lu.cfg.GetClusterInfo()
1526 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1527 brlist = [params[constants.NIC_LINK] for params in paramslist
1528 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1530 result = lu.rpc.call_bridges_exist(target_node, brlist)
1531 result.Raise("Error checking bridges on destination node '%s'" %
1532 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1535 def _CheckInstanceBridgesExist(lu, instance, node=None):
1536 """Check that the brigdes needed by an instance exist.
1540 node = instance.primary_node
1541 _CheckNicsBridgesExist(lu, instance.nics, node)
1544 def _CheckOSVariant(os_obj, name):
1545 """Check whether an OS name conforms to the os variants specification.
1547 @type os_obj: L{objects.OS}
1548 @param os_obj: OS object to check
1550 @param name: OS name passed by the user, to check for validity
1553 variant = objects.OS.GetVariant(name)
1554 if not os_obj.supported_variants:
1556 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1557 " passed)" % (os_obj.name, variant),
1561 raise errors.OpPrereqError("OS name must include a variant",
1564 if variant not in os_obj.supported_variants:
1565 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1568 def _GetNodeInstancesInner(cfg, fn):
1569 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1572 def _GetNodeInstances(cfg, node_name):
1573 """Returns a list of all primary and secondary instances on a node.
1577 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1580 def _GetNodePrimaryInstances(cfg, node_name):
1581 """Returns primary instances on a node.
1584 return _GetNodeInstancesInner(cfg,
1585 lambda inst: node_name == inst.primary_node)
1588 def _GetNodeSecondaryInstances(cfg, node_name):
1589 """Returns secondary instances on a node.
1592 return _GetNodeInstancesInner(cfg,
1593 lambda inst: node_name in inst.secondary_nodes)
1596 def _GetStorageTypeArgs(cfg, storage_type):
1597 """Returns the arguments for a storage type.
1600 # Special case for file storage
1601 if storage_type == constants.ST_FILE:
1602 # storage.FileStorage wants a list of storage directories
1603 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1608 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1611 for dev in instance.disks:
1612 cfg.SetDiskID(dev, node_name)
1614 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1616 result.Raise("Failed to get disk status from node %s" % node_name,
1617 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1619 for idx, bdev_status in enumerate(result.payload):
1620 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1626 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1627 """Check the sanity of iallocator and node arguments and use the
1628 cluster-wide iallocator if appropriate.
1630 Check that at most one of (iallocator, node) is specified. If none is
1631 specified, then the LU's opcode's iallocator slot is filled with the
1632 cluster-wide default iallocator.
1634 @type iallocator_slot: string
1635 @param iallocator_slot: the name of the opcode iallocator slot
1636 @type node_slot: string
1637 @param node_slot: the name of the opcode target node slot
1640 node = getattr(lu.op, node_slot, None)
1641 iallocator = getattr(lu.op, iallocator_slot, None)
1643 if node is not None and iallocator is not None:
1644 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1646 elif node is None and iallocator is None:
1647 default_iallocator = lu.cfg.GetDefaultIAllocator()
1648 if default_iallocator:
1649 setattr(lu.op, iallocator_slot, default_iallocator)
1651 raise errors.OpPrereqError("No iallocator or node given and no"
1652 " cluster-wide default iallocator found;"
1653 " please specify either an iallocator or a"
1654 " node, or set a cluster-wide default"
1658 def _GetDefaultIAllocator(cfg, iallocator):
1659 """Decides on which iallocator to use.
1661 @type cfg: L{config.ConfigWriter}
1662 @param cfg: Cluster configuration object
1663 @type iallocator: string or None
1664 @param iallocator: Iallocator specified in opcode
1666 @return: Iallocator name
1670 # Use default iallocator
1671 iallocator = cfg.GetDefaultIAllocator()
1674 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1675 " opcode nor as a cluster-wide default",
1681 class LUClusterPostInit(LogicalUnit):
1682 """Logical unit for running hooks after cluster initialization.
1685 HPATH = "cluster-init"
1686 HTYPE = constants.HTYPE_CLUSTER
1688 def BuildHooksEnv(self):
1693 "OP_TARGET": self.cfg.GetClusterName(),
1696 def BuildHooksNodes(self):
1697 """Build hooks nodes.
1700 return ([], [self.cfg.GetMasterNode()])
1702 def Exec(self, feedback_fn):
1709 class LUClusterDestroy(LogicalUnit):
1710 """Logical unit for destroying the cluster.
1713 HPATH = "cluster-destroy"
1714 HTYPE = constants.HTYPE_CLUSTER
1716 def BuildHooksEnv(self):
1721 "OP_TARGET": self.cfg.GetClusterName(),
1724 def BuildHooksNodes(self):
1725 """Build hooks nodes.
1730 def CheckPrereq(self):
1731 """Check prerequisites.
1733 This checks whether the cluster is empty.
1735 Any errors are signaled by raising errors.OpPrereqError.
1738 master = self.cfg.GetMasterNode()
1740 nodelist = self.cfg.GetNodeList()
1741 if len(nodelist) != 1 or nodelist[0] != master:
1742 raise errors.OpPrereqError("There are still %d node(s) in"
1743 " this cluster." % (len(nodelist) - 1),
1745 instancelist = self.cfg.GetInstanceList()
1747 raise errors.OpPrereqError("There are still %d instance(s) in"
1748 " this cluster." % len(instancelist),
1751 def Exec(self, feedback_fn):
1752 """Destroys the cluster.
1755 master_params = self.cfg.GetMasterNetworkParameters()
1757 # Run post hooks on master node before it's removed
1758 _RunPostHook(self, master_params.name)
1760 ems = self.cfg.GetUseExternalMipScript()
1761 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1764 self.LogWarning("Error disabling the master IP address: %s",
1767 return master_params.name
1770 def _VerifyCertificate(filename):
1771 """Verifies a certificate for L{LUClusterVerifyConfig}.
1773 @type filename: string
1774 @param filename: Path to PEM file
1778 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1779 utils.ReadFile(filename))
1780 except Exception, err: # pylint: disable=W0703
1781 return (LUClusterVerifyConfig.ETYPE_ERROR,
1782 "Failed to load X509 certificate %s: %s" % (filename, err))
1785 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1786 constants.SSL_CERT_EXPIRATION_ERROR)
1789 fnamemsg = "While verifying %s: %s" % (filename, msg)
1794 return (None, fnamemsg)
1795 elif errcode == utils.CERT_WARNING:
1796 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1797 elif errcode == utils.CERT_ERROR:
1798 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1800 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1803 def _GetAllHypervisorParameters(cluster, instances):
1804 """Compute the set of all hypervisor parameters.
1806 @type cluster: L{objects.Cluster}
1807 @param cluster: the cluster object
1808 @param instances: list of L{objects.Instance}
1809 @param instances: additional instances from which to obtain parameters
1810 @rtype: list of (origin, hypervisor, parameters)
1811 @return: a list with all parameters found, indicating the hypervisor they
1812 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1817 for hv_name in cluster.enabled_hypervisors:
1818 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1820 for os_name, os_hvp in cluster.os_hvp.items():
1821 for hv_name, hv_params in os_hvp.items():
1823 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1824 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1826 # TODO: collapse identical parameter values in a single one
1827 for instance in instances:
1828 if instance.hvparams:
1829 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1830 cluster.FillHV(instance)))
1835 class _VerifyErrors(object):
1836 """Mix-in for cluster/group verify LUs.
1838 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1839 self.op and self._feedback_fn to be available.)
1843 ETYPE_FIELD = "code"
1844 ETYPE_ERROR = "ERROR"
1845 ETYPE_WARNING = "WARNING"
1847 def _Error(self, ecode, item, msg, *args, **kwargs):
1848 """Format an error message.
1850 Based on the opcode's error_codes parameter, either format a
1851 parseable error code, or a simpler error string.
1853 This must be called only from Exec and functions called from Exec.
1856 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1857 itype, etxt, _ = ecode
1858 # first complete the msg
1861 # then format the whole message
1862 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1863 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1869 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1870 # and finally report it via the feedback_fn
1871 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1873 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1874 """Log an error message if the passed condition is True.
1878 or self.op.debug_simulate_errors) # pylint: disable=E1101
1880 # If the error code is in the list of ignored errors, demote the error to a
1882 (_, etxt, _) = ecode
1883 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1884 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1887 self._Error(ecode, *args, **kwargs)
1889 # do not mark the operation as failed for WARN cases only
1890 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1891 self.bad = self.bad or cond
1894 class LUClusterVerify(NoHooksLU):
1895 """Submits all jobs necessary to verify the cluster.
1900 def ExpandNames(self):
1901 self.needed_locks = {}
1903 def Exec(self, feedback_fn):
1906 if self.op.group_name:
1907 groups = [self.op.group_name]
1908 depends_fn = lambda: None
1910 groups = self.cfg.GetNodeGroupList()
1912 # Verify global configuration
1914 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1917 # Always depend on global verification
1918 depends_fn = lambda: [(-len(jobs), [])]
1920 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1921 ignore_errors=self.op.ignore_errors,
1922 depends=depends_fn())]
1923 for group in groups)
1925 # Fix up all parameters
1926 for op in itertools.chain(*jobs): # pylint: disable=W0142
1927 op.debug_simulate_errors = self.op.debug_simulate_errors
1928 op.verbose = self.op.verbose
1929 op.error_codes = self.op.error_codes
1931 op.skip_checks = self.op.skip_checks
1932 except AttributeError:
1933 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1935 return ResultWithJobs(jobs)
1938 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1939 """Verifies the cluster config.
1944 def _VerifyHVP(self, hvp_data):
1945 """Verifies locally the syntax of the hypervisor parameters.
1948 for item, hv_name, hv_params in hvp_data:
1949 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1952 hv_class = hypervisor.GetHypervisor(hv_name)
1953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1954 hv_class.CheckParameterSyntax(hv_params)
1955 except errors.GenericError, err:
1956 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1958 def ExpandNames(self):
1959 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1960 self.share_locks = _ShareAll()
1962 def CheckPrereq(self):
1963 """Check prerequisites.
1966 # Retrieve all information
1967 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1968 self.all_node_info = self.cfg.GetAllNodesInfo()
1969 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1971 def Exec(self, feedback_fn):
1972 """Verify integrity of cluster, performing various test on nodes.
1976 self._feedback_fn = feedback_fn
1978 feedback_fn("* Verifying cluster config")
1980 for msg in self.cfg.VerifyConfig():
1981 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1983 feedback_fn("* Verifying cluster certificate files")
1985 for cert_filename in constants.ALL_CERT_FILES:
1986 (errcode, msg) = _VerifyCertificate(cert_filename)
1987 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1989 feedback_fn("* Verifying hypervisor parameters")
1991 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1992 self.all_inst_info.values()))
1994 feedback_fn("* Verifying all nodes belong to an existing group")
1996 # We do this verification here because, should this bogus circumstance
1997 # occur, it would never be caught by VerifyGroup, which only acts on
1998 # nodes/instances reachable from existing node groups.
2000 dangling_nodes = set(node.name for node in self.all_node_info.values()
2001 if node.group not in self.all_group_info)
2003 dangling_instances = {}
2004 no_node_instances = []
2006 for inst in self.all_inst_info.values():
2007 if inst.primary_node in dangling_nodes:
2008 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2009 elif inst.primary_node not in self.all_node_info:
2010 no_node_instances.append(inst.name)
2015 utils.CommaJoin(dangling_instances.get(node.name,
2017 for node in dangling_nodes]
2019 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2021 "the following nodes (and their instances) belong to a non"
2022 " existing group: %s", utils.CommaJoin(pretty_dangling))
2024 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2026 "the following instances have a non-existing primary-node:"
2027 " %s", utils.CommaJoin(no_node_instances))
2032 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2033 """Verifies the status of a node group.
2036 HPATH = "cluster-verify"
2037 HTYPE = constants.HTYPE_CLUSTER
2040 _HOOKS_INDENT_RE = re.compile("^", re.M)
2042 class NodeImage(object):
2043 """A class representing the logical and physical status of a node.
2046 @ivar name: the node name to which this object refers
2047 @ivar volumes: a structure as returned from
2048 L{ganeti.backend.GetVolumeList} (runtime)
2049 @ivar instances: a list of running instances (runtime)
2050 @ivar pinst: list of configured primary instances (config)
2051 @ivar sinst: list of configured secondary instances (config)
2052 @ivar sbp: dictionary of {primary-node: list of instances} for all
2053 instances for which this node is secondary (config)
2054 @ivar mfree: free memory, as reported by hypervisor (runtime)
2055 @ivar dfree: free disk, as reported by the node (runtime)
2056 @ivar offline: the offline status (config)
2057 @type rpc_fail: boolean
2058 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2059 not whether the individual keys were correct) (runtime)
2060 @type lvm_fail: boolean
2061 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2062 @type hyp_fail: boolean
2063 @ivar hyp_fail: whether the RPC call didn't return the instance list
2064 @type ghost: boolean
2065 @ivar ghost: whether this is a known node or not (config)
2066 @type os_fail: boolean
2067 @ivar os_fail: whether the RPC call didn't return valid OS data
2069 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2070 @type vm_capable: boolean
2071 @ivar vm_capable: whether the node can host instances
2074 def __init__(self, offline=False, name=None, vm_capable=True):
2083 self.offline = offline
2084 self.vm_capable = vm_capable
2085 self.rpc_fail = False
2086 self.lvm_fail = False
2087 self.hyp_fail = False
2089 self.os_fail = False
2092 def ExpandNames(self):
2093 # This raises errors.OpPrereqError on its own:
2094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2096 # Get instances in node group; this is unsafe and needs verification later
2098 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2100 self.needed_locks = {
2101 locking.LEVEL_INSTANCE: inst_names,
2102 locking.LEVEL_NODEGROUP: [self.group_uuid],
2103 locking.LEVEL_NODE: [],
2106 self.share_locks = _ShareAll()
2108 def DeclareLocks(self, level):
2109 if level == locking.LEVEL_NODE:
2110 # Get members of node group; this is unsafe and needs verification later
2111 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2113 all_inst_info = self.cfg.GetAllInstancesInfo()
2115 # In Exec(), we warn about mirrored instances that have primary and
2116 # secondary living in separate node groups. To fully verify that
2117 # volumes for these instances are healthy, we will need to do an
2118 # extra call to their secondaries. We ensure here those nodes will
2120 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2121 # Important: access only the instances whose lock is owned
2122 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2123 nodes.update(all_inst_info[inst].secondary_nodes)
2125 self.needed_locks[locking.LEVEL_NODE] = nodes
2127 def CheckPrereq(self):
2128 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2129 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2131 group_nodes = set(self.group_info.members)
2133 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2136 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2138 unlocked_instances = \
2139 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2142 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2143 utils.CommaJoin(unlocked_nodes),
2146 if unlocked_instances:
2147 raise errors.OpPrereqError("Missing lock for instances: %s" %
2148 utils.CommaJoin(unlocked_instances),
2151 self.all_node_info = self.cfg.GetAllNodesInfo()
2152 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2154 self.my_node_names = utils.NiceSort(group_nodes)
2155 self.my_inst_names = utils.NiceSort(group_instances)
2157 self.my_node_info = dict((name, self.all_node_info[name])
2158 for name in self.my_node_names)
2160 self.my_inst_info = dict((name, self.all_inst_info[name])
2161 for name in self.my_inst_names)
2163 # We detect here the nodes that will need the extra RPC calls for verifying
2164 # split LV volumes; they should be locked.
2165 extra_lv_nodes = set()
2167 for inst in self.my_inst_info.values():
2168 if inst.disk_template in constants.DTS_INT_MIRROR:
2169 for nname in inst.all_nodes:
2170 if self.all_node_info[nname].group != self.group_uuid:
2171 extra_lv_nodes.add(nname)
2173 unlocked_lv_nodes = \
2174 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2176 if unlocked_lv_nodes:
2177 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2178 utils.CommaJoin(unlocked_lv_nodes),
2180 self.extra_lv_nodes = list(extra_lv_nodes)
2182 def _VerifyNode(self, ninfo, nresult):
2183 """Perform some basic validation on data returned from a node.
2185 - check the result data structure is well formed and has all the
2187 - check ganeti version
2189 @type ninfo: L{objects.Node}
2190 @param ninfo: the node to check
2191 @param nresult: the results from the node
2193 @return: whether overall this call was successful (and we can expect
2194 reasonable values in the respose)
2198 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2200 # main result, nresult should be a non-empty dict
2201 test = not nresult or not isinstance(nresult, dict)
2202 _ErrorIf(test, constants.CV_ENODERPC, node,
2203 "unable to verify node: no data returned")
2207 # compares ganeti version
2208 local_version = constants.PROTOCOL_VERSION
2209 remote_version = nresult.get("version", None)
2210 test = not (remote_version and
2211 isinstance(remote_version, (list, tuple)) and
2212 len(remote_version) == 2)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "connection to node returned invalid data")
2218 test = local_version != remote_version[0]
2219 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2220 "incompatible protocol versions: master %s,"
2221 " node %s", local_version, remote_version[0])
2225 # node seems compatible, we can actually try to look into its results
2227 # full package version
2228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2229 constants.CV_ENODEVERSION, node,
2230 "software version mismatch: master %s, node %s",
2231 constants.RELEASE_VERSION, remote_version[1],
2232 code=self.ETYPE_WARNING)
2234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2235 if ninfo.vm_capable and isinstance(hyp_result, dict):
2236 for hv_name, hv_result in hyp_result.iteritems():
2237 test = hv_result is not None
2238 _ErrorIf(test, constants.CV_ENODEHV, node,
2239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2241 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2242 if ninfo.vm_capable and isinstance(hvp_result, list):
2243 for item, hv_name, hv_result in hvp_result:
2244 _ErrorIf(True, constants.CV_ENODEHV, node,
2245 "hypervisor %s parameter verify failure (source %s): %s",
2246 hv_name, item, hv_result)
2248 test = nresult.get(constants.NV_NODESETUP,
2249 ["Missing NODESETUP results"])
2250 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2255 def _VerifyNodeTime(self, ninfo, nresult,
2256 nvinfo_starttime, nvinfo_endtime):
2257 """Check the node time.
2259 @type ninfo: L{objects.Node}
2260 @param ninfo: the node to check
2261 @param nresult: the remote results for the node
2262 @param nvinfo_starttime: the start time of the RPC call
2263 @param nvinfo_endtime: the end time of the RPC call
2267 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2269 ntime = nresult.get(constants.NV_TIME, None)
2271 ntime_merged = utils.MergeTime(ntime)
2272 except (ValueError, TypeError):
2273 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2276 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2277 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2278 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2279 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2283 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2284 "Node time diverges by at least %s from master node time",
2287 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2288 """Check the node LVM results.
2290 @type ninfo: L{objects.Node}
2291 @param ninfo: the node to check
2292 @param nresult: the remote results for the node
2293 @param vg_name: the configured VG name
2300 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2302 # checks vg existence and size > 20G
2303 vglist = nresult.get(constants.NV_VGLIST, None)
2305 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2307 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2308 constants.MIN_VG_SIZE)
2309 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2312 pvlist = nresult.get(constants.NV_PVLIST, None)
2313 test = pvlist is None
2314 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2316 # check that ':' is not present in PV names, since it's a
2317 # special character for lvcreate (denotes the range of PEs to
2319 for _, pvname, owner_vg in pvlist:
2320 test = ":" in pvname
2321 _ErrorIf(test, constants.CV_ENODELVM, node,
2322 "Invalid character ':' in PV '%s' of VG '%s'",
2325 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2326 """Check the node bridges.
2328 @type ninfo: L{objects.Node}
2329 @param ninfo: the node to check
2330 @param nresult: the remote results for the node
2331 @param bridges: the expected list of bridges
2338 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2340 missing = nresult.get(constants.NV_BRIDGES, None)
2341 test = not isinstance(missing, list)
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "did not return valid bridge information")
2345 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2346 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2348 def _VerifyNodeUserScripts(self, ninfo, nresult):
2349 """Check the results of user scripts presence and executability on the node
2351 @type ninfo: L{objects.Node}
2352 @param ninfo: the node to check
2353 @param nresult: the remote results for the node
2358 test = not constants.NV_USERSCRIPTS in nresult
2359 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2360 "did not return user scripts information")
2362 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2364 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2365 "user scripts not present or not executable: %s" %
2366 utils.CommaJoin(sorted(broken_scripts)))
2368 def _VerifyNodeNetwork(self, ninfo, nresult):
2369 """Check the node network connectivity results.
2371 @type ninfo: L{objects.Node}
2372 @param ninfo: the node to check
2373 @param nresult: the remote results for the node
2377 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2379 test = constants.NV_NODELIST not in nresult
2380 _ErrorIf(test, constants.CV_ENODESSH, node,
2381 "node hasn't returned node ssh connectivity data")
2383 if nresult[constants.NV_NODELIST]:
2384 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2385 _ErrorIf(True, constants.CV_ENODESSH, node,
2386 "ssh communication with node '%s': %s", a_node, a_msg)
2388 test = constants.NV_NODENETTEST not in nresult
2389 _ErrorIf(test, constants.CV_ENODENET, node,
2390 "node hasn't returned node tcp connectivity data")
2392 if nresult[constants.NV_NODENETTEST]:
2393 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2395 _ErrorIf(True, constants.CV_ENODENET, node,
2396 "tcp communication with node '%s': %s",
2397 anode, nresult[constants.NV_NODENETTEST][anode])
2399 test = constants.NV_MASTERIP not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node master IP reachability data")
2403 if not nresult[constants.NV_MASTERIP]:
2404 if node == self.master_node:
2405 msg = "the master node cannot reach the master IP (not configured?)"
2407 msg = "cannot reach the master IP"
2408 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2410 def _VerifyInstance(self, instance, instanceconfig, node_image,
2412 """Verify an instance.
2414 This function checks to see if the required block devices are
2415 available on the instance's node.
2418 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2419 node_current = instanceconfig.primary_node
2421 node_vol_should = {}
2422 instanceconfig.MapLVsByNode(node_vol_should)
2424 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2425 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2426 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2428 for node in node_vol_should:
2429 n_img = node_image[node]
2430 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2431 # ignore missing volumes on offline or broken nodes
2433 for volume in node_vol_should[node]:
2434 test = volume not in n_img.volumes
2435 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2436 "volume %s missing on node %s", volume, node)
2438 if instanceconfig.admin_state == constants.ADMINST_UP:
2439 pri_img = node_image[node_current]
2440 test = instance not in pri_img.instances and not pri_img.offline
2441 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2442 "instance not running on its primary node %s",
2445 diskdata = [(nname, success, status, idx)
2446 for (nname, disks) in diskstatus.items()
2447 for idx, (success, status) in enumerate(disks)]
2449 for nname, success, bdev_status, idx in diskdata:
2450 # the 'ghost node' construction in Exec() ensures that we have a
2452 snode = node_image[nname]
2453 bad_snode = snode.ghost or snode.offline
2454 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2455 not success and not bad_snode,
2456 constants.CV_EINSTANCEFAULTYDISK, instance,
2457 "couldn't retrieve status for disk/%s on %s: %s",
2458 idx, nname, bdev_status)
2459 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2460 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2461 constants.CV_EINSTANCEFAULTYDISK, instance,
2462 "disk/%s on %s is faulty", idx, nname)
2464 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2465 """Verify if there are any unknown volumes in the cluster.
2467 The .os, .swap and backup volumes are ignored. All other volumes are
2468 reported as unknown.
2470 @type reserved: L{ganeti.utils.FieldSet}
2471 @param reserved: a FieldSet of reserved volume names
2474 for node, n_img in node_image.items():
2475 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2476 self.all_node_info[node].group != self.group_uuid):
2477 # skip non-healthy nodes
2479 for volume in n_img.volumes:
2480 test = ((node not in node_vol_should or
2481 volume not in node_vol_should[node]) and
2482 not reserved.Matches(volume))
2483 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2484 "volume %s is unknown", volume)
2486 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2487 """Verify N+1 Memory Resilience.
2489 Check that if one single node dies we can still start all the
2490 instances it was primary for.
2493 cluster_info = self.cfg.GetClusterInfo()
2494 for node, n_img in node_image.items():
2495 # This code checks that every node which is now listed as
2496 # secondary has enough memory to host all instances it is
2497 # supposed to should a single other node in the cluster fail.
2498 # FIXME: not ready for failover to an arbitrary node
2499 # FIXME: does not support file-backed instances
2500 # WARNING: we currently take into account down instances as well
2501 # as up ones, considering that even if they're down someone
2502 # might want to start them even in the event of a node failure.
2503 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2504 # we're skipping nodes marked offline and nodes in other groups from
2505 # the N+1 warning, since most likely we don't have good memory
2506 # infromation from them; we already list instances living on such
2507 # nodes, and that's enough warning
2509 #TODO(dynmem): also consider ballooning out other instances
2510 for prinode, instances in n_img.sbp.items():
2512 for instance in instances:
2513 bep = cluster_info.FillBE(instance_cfg[instance])
2514 if bep[constants.BE_AUTO_BALANCE]:
2515 needed_mem += bep[constants.BE_MINMEM]
2516 test = n_img.mfree < needed_mem
2517 self._ErrorIf(test, constants.CV_ENODEN1, node,
2518 "not enough memory to accomodate instance failovers"
2519 " should node %s fail (%dMiB needed, %dMiB available)",
2520 prinode, needed_mem, n_img.mfree)
2523 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2524 (files_all, files_opt, files_mc, files_vm)):
2525 """Verifies file checksums collected from all nodes.
2527 @param errorif: Callback for reporting errors
2528 @param nodeinfo: List of L{objects.Node} objects
2529 @param master_node: Name of master node
2530 @param all_nvinfo: RPC results
2533 # Define functions determining which nodes to consider for a file
2536 (files_mc, lambda node: (node.master_candidate or
2537 node.name == master_node)),
2538 (files_vm, lambda node: node.vm_capable),
2541 # Build mapping from filename to list of nodes which should have the file
2543 for (files, fn) in files2nodefn:
2545 filenodes = nodeinfo
2547 filenodes = filter(fn, nodeinfo)
2548 nodefiles.update((filename,
2549 frozenset(map(operator.attrgetter("name"), filenodes)))
2550 for filename in files)
2552 assert set(nodefiles) == (files_all | files_mc | files_vm)
2554 fileinfo = dict((filename, {}) for filename in nodefiles)
2555 ignore_nodes = set()
2557 for node in nodeinfo:
2559 ignore_nodes.add(node.name)
2562 nresult = all_nvinfo[node.name]
2564 if nresult.fail_msg or not nresult.payload:
2567 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2569 test = not (node_files and isinstance(node_files, dict))
2570 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2571 "Node did not return file checksum data")
2573 ignore_nodes.add(node.name)
2576 # Build per-checksum mapping from filename to nodes having it
2577 for (filename, checksum) in node_files.items():
2578 assert filename in nodefiles
2579 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2581 for (filename, checksums) in fileinfo.items():
2582 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2584 # Nodes having the file
2585 with_file = frozenset(node_name
2586 for nodes in fileinfo[filename].values()
2587 for node_name in nodes) - ignore_nodes
2589 expected_nodes = nodefiles[filename] - ignore_nodes
2591 # Nodes missing file
2592 missing_file = expected_nodes - with_file
2594 if filename in files_opt:
2596 errorif(missing_file and missing_file != expected_nodes,
2597 constants.CV_ECLUSTERFILECHECK, None,
2598 "File %s is optional, but it must exist on all or no"
2599 " nodes (not found on %s)",
2600 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2602 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2603 "File %s is missing from node(s) %s", filename,
2604 utils.CommaJoin(utils.NiceSort(missing_file)))
2606 # Warn if a node has a file it shouldn't
2607 unexpected = with_file - expected_nodes
2609 constants.CV_ECLUSTERFILECHECK, None,
2610 "File %s should not exist on node(s) %s",
2611 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2613 # See if there are multiple versions of the file
2614 test = len(checksums) > 1
2616 variants = ["variant %s on %s" %
2617 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2618 for (idx, (checksum, nodes)) in
2619 enumerate(sorted(checksums.items()))]
2623 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2624 "File %s found with %s different checksums (%s)",
2625 filename, len(checksums), "; ".join(variants))
2627 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2629 """Verifies and the node DRBD status.
2631 @type ninfo: L{objects.Node}
2632 @param ninfo: the node to check
2633 @param nresult: the remote results for the node
2634 @param instanceinfo: the dict of instances
2635 @param drbd_helper: the configured DRBD usermode helper
2636 @param drbd_map: the DRBD map as returned by
2637 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2641 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2644 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2645 test = (helper_result == None)
2646 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2647 "no drbd usermode helper returned")
2649 status, payload = helper_result
2651 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2652 "drbd usermode helper check unsuccessful: %s", payload)
2653 test = status and (payload != drbd_helper)
2654 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655 "wrong drbd usermode helper: %s", payload)
2657 # compute the DRBD minors
2659 for minor, instance in drbd_map[node].items():
2660 test = instance not in instanceinfo
2661 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2662 "ghost instance '%s' in temporary DRBD map", instance)
2663 # ghost instance should not be running, but otherwise we
2664 # don't give double warnings (both ghost instance and
2665 # unallocated minor in use)
2667 node_drbd[minor] = (instance, False)
2669 instance = instanceinfo[instance]
2670 node_drbd[minor] = (instance.name,
2671 instance.admin_state == constants.ADMINST_UP)
2673 # and now check them
2674 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2675 test = not isinstance(used_minors, (tuple, list))
2676 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2677 "cannot parse drbd status file: %s", str(used_minors))
2679 # we cannot check drbd status
2682 for minor, (iname, must_exist) in node_drbd.items():
2683 test = minor not in used_minors and must_exist
2684 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2685 "drbd minor %d of instance %s is not active", minor, iname)
2686 for minor in used_minors:
2687 test = minor not in node_drbd
2688 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2689 "unallocated drbd minor %d is in use", minor)
2691 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2692 """Builds the node OS structures.
2694 @type ninfo: L{objects.Node}
2695 @param ninfo: the node to check
2696 @param nresult: the remote results for the node
2697 @param nimg: the node image object
2701 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2703 remote_os = nresult.get(constants.NV_OSLIST, None)
2704 test = (not isinstance(remote_os, list) or
2705 not compat.all(isinstance(v, list) and len(v) == 7
2706 for v in remote_os))
2708 _ErrorIf(test, constants.CV_ENODEOS, node,
2709 "node hasn't returned valid OS data")
2718 for (name, os_path, status, diagnose,
2719 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2721 if name not in os_dict:
2724 # parameters is a list of lists instead of list of tuples due to
2725 # JSON lacking a real tuple type, fix it:
2726 parameters = [tuple(v) for v in parameters]
2727 os_dict[name].append((os_path, status, diagnose,
2728 set(variants), set(parameters), set(api_ver)))
2730 nimg.oslist = os_dict
2732 def _VerifyNodeOS(self, ninfo, nimg, base):
2733 """Verifies the node OS list.
2735 @type ninfo: L{objects.Node}
2736 @param ninfo: the node to check
2737 @param nimg: the node image object
2738 @param base: the 'template' node we match against (e.g. from the master)
2742 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2744 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2746 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2747 for os_name, os_data in nimg.oslist.items():
2748 assert os_data, "Empty OS status for OS %s?!" % os_name
2749 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2750 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2751 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2752 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2753 "OS '%s' has multiple entries (first one shadows the rest): %s",
2754 os_name, utils.CommaJoin([v[0] for v in os_data]))
2755 # comparisons with the 'base' image
2756 test = os_name not in base.oslist
2757 _ErrorIf(test, constants.CV_ENODEOS, node,
2758 "Extra OS %s not present on reference node (%s)",
2762 assert base.oslist[os_name], "Base node has empty OS status?"
2763 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2765 # base OS is invalid, skipping
2767 for kind, a, b in [("API version", f_api, b_api),
2768 ("variants list", f_var, b_var),
2769 ("parameters", beautify_params(f_param),
2770 beautify_params(b_param))]:
2771 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2772 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2773 kind, os_name, base.name,
2774 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2776 # check any missing OSes
2777 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2778 _ErrorIf(missing, constants.CV_ENODEOS, node,
2779 "OSes present on reference node %s but missing on this node: %s",
2780 base.name, utils.CommaJoin(missing))
2782 def _VerifyOob(self, ninfo, nresult):
2783 """Verifies out of band functionality of a node.
2785 @type ninfo: L{objects.Node}
2786 @param ninfo: the node to check
2787 @param nresult: the remote results for the node
2791 # We just have to verify the paths on master and/or master candidates
2792 # as the oob helper is invoked on the master
2793 if ((ninfo.master_candidate or ninfo.master_capable) and
2794 constants.NV_OOB_PATHS in nresult):
2795 for path_result in nresult[constants.NV_OOB_PATHS]:
2796 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2798 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2799 """Verifies and updates the node volume data.
2801 This function will update a L{NodeImage}'s internal structures
2802 with data from the remote call.
2804 @type ninfo: L{objects.Node}
2805 @param ninfo: the node to check
2806 @param nresult: the remote results for the node
2807 @param nimg: the node image object
2808 @param vg_name: the configured VG name
2812 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2814 nimg.lvm_fail = True
2815 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2818 elif isinstance(lvdata, basestring):
2819 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2820 utils.SafeEncode(lvdata))
2821 elif not isinstance(lvdata, dict):
2822 _ErrorIf(True, constants.CV_ENODELVM, node,
2823 "rpc call to node failed (lvlist)")
2825 nimg.volumes = lvdata
2826 nimg.lvm_fail = False
2828 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2829 """Verifies and updates the node instance list.
2831 If the listing was successful, then updates this node's instance
2832 list. Otherwise, it marks the RPC call as failed for the instance
2835 @type ninfo: L{objects.Node}
2836 @param ninfo: the node to check
2837 @param nresult: the remote results for the node
2838 @param nimg: the node image object
2841 idata = nresult.get(constants.NV_INSTANCELIST, None)
2842 test = not isinstance(idata, list)
2843 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2844 "rpc call to node failed (instancelist): %s",
2845 utils.SafeEncode(str(idata)))
2847 nimg.hyp_fail = True
2849 nimg.instances = idata
2851 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2852 """Verifies and computes a node information map
2854 @type ninfo: L{objects.Node}
2855 @param ninfo: the node to check
2856 @param nresult: the remote results for the node
2857 @param nimg: the node image object
2858 @param vg_name: the configured VG name
2862 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2864 # try to read free memory (from the hypervisor)
2865 hv_info = nresult.get(constants.NV_HVINFO, None)
2866 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2867 _ErrorIf(test, constants.CV_ENODEHV, node,
2868 "rpc call to node failed (hvinfo)")
2871 nimg.mfree = int(hv_info["memory_free"])
2872 except (ValueError, TypeError):
2873 _ErrorIf(True, constants.CV_ENODERPC, node,
2874 "node returned invalid nodeinfo, check hypervisor")
2876 # FIXME: devise a free space model for file based instances as well
2877 if vg_name is not None:
2878 test = (constants.NV_VGLIST not in nresult or
2879 vg_name not in nresult[constants.NV_VGLIST])
2880 _ErrorIf(test, constants.CV_ENODELVM, node,
2881 "node didn't return data for the volume group '%s'"
2882 " - it is either missing or broken", vg_name)
2885 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2886 except (ValueError, TypeError):
2887 _ErrorIf(True, constants.CV_ENODERPC, node,
2888 "node returned invalid LVM info, check LVM status")
2890 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2891 """Gets per-disk status information for all instances.
2893 @type nodelist: list of strings
2894 @param nodelist: Node names
2895 @type node_image: dict of (name, L{objects.Node})
2896 @param node_image: Node objects
2897 @type instanceinfo: dict of (name, L{objects.Instance})
2898 @param instanceinfo: Instance objects
2899 @rtype: {instance: {node: [(succes, payload)]}}
2900 @return: a dictionary of per-instance dictionaries with nodes as
2901 keys and disk information as values; the disk information is a
2902 list of tuples (success, payload)
2905 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2908 node_disks_devonly = {}
2909 diskless_instances = set()
2910 diskless = constants.DT_DISKLESS
2912 for nname in nodelist:
2913 node_instances = list(itertools.chain(node_image[nname].pinst,
2914 node_image[nname].sinst))
2915 diskless_instances.update(inst for inst in node_instances
2916 if instanceinfo[inst].disk_template == diskless)
2917 disks = [(inst, disk)
2918 for inst in node_instances
2919 for disk in instanceinfo[inst].disks]
2922 # No need to collect data
2925 node_disks[nname] = disks
2927 # _AnnotateDiskParams makes already copies of the disks
2929 for (inst, dev) in disks:
2930 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2931 self.cfg.SetDiskID(anno_disk, nname)
2932 devonly.append(anno_disk)
2934 node_disks_devonly[nname] = devonly
2936 assert len(node_disks) == len(node_disks_devonly)
2938 # Collect data from all nodes with disks
2939 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2942 assert len(result) == len(node_disks)
2946 for (nname, nres) in result.items():
2947 disks = node_disks[nname]
2950 # No data from this node
2951 data = len(disks) * [(False, "node offline")]
2954 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2955 "while getting disk information: %s", msg)
2957 # No data from this node
2958 data = len(disks) * [(False, msg)]
2961 for idx, i in enumerate(nres.payload):
2962 if isinstance(i, (tuple, list)) and len(i) == 2:
2965 logging.warning("Invalid result from node %s, entry %d: %s",
2967 data.append((False, "Invalid result from the remote node"))
2969 for ((inst, _), status) in zip(disks, data):
2970 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2972 # Add empty entries for diskless instances.
2973 for inst in diskless_instances:
2974 assert inst not in instdisk
2977 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2978 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2979 compat.all(isinstance(s, (tuple, list)) and
2980 len(s) == 2 for s in statuses)
2981 for inst, nnames in instdisk.items()
2982 for nname, statuses in nnames.items())
2983 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2988 def _SshNodeSelector(group_uuid, all_nodes):
2989 """Create endless iterators for all potential SSH check hosts.
2992 nodes = [node for node in all_nodes
2993 if (node.group != group_uuid and
2995 keyfunc = operator.attrgetter("group")
2997 return map(itertools.cycle,
2998 [sorted(map(operator.attrgetter("name"), names))
2999 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3003 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3004 """Choose which nodes should talk to which other nodes.
3006 We will make nodes contact all nodes in their group, and one node from
3009 @warning: This algorithm has a known issue if one node group is much
3010 smaller than others (e.g. just one node). In such a case all other
3011 nodes will talk to the single node.
3014 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3015 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3017 return (online_nodes,
3018 dict((name, sorted([i.next() for i in sel]))
3019 for name in online_nodes))
3021 def BuildHooksEnv(self):
3024 Cluster-Verify hooks just ran in the post phase and their failure makes
3025 the output be logged in the verify output and the verification to fail.
3029 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3032 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3033 for node in self.my_node_info.values())
3037 def BuildHooksNodes(self):
3038 """Build hooks nodes.
3041 return ([], self.my_node_names)
3043 def Exec(self, feedback_fn):
3044 """Verify integrity of the node group, performing various test on nodes.
3047 # This method has too many local variables. pylint: disable=R0914
3048 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3050 if not self.my_node_names:
3052 feedback_fn("* Empty node group, skipping verification")
3056 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3057 verbose = self.op.verbose
3058 self._feedback_fn = feedback_fn
3060 vg_name = self.cfg.GetVGName()
3061 drbd_helper = self.cfg.GetDRBDHelper()
3062 cluster = self.cfg.GetClusterInfo()
3063 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3064 hypervisors = cluster.enabled_hypervisors
3065 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3067 i_non_redundant = [] # Non redundant instances
3068 i_non_a_balanced = [] # Non auto-balanced instances
3069 i_offline = 0 # Count of offline instances
3070 n_offline = 0 # Count of offline nodes
3071 n_drained = 0 # Count of nodes being drained
3072 node_vol_should = {}
3074 # FIXME: verify OS list
3077 filemap = _ComputeAncillaryFiles(cluster, False)
3079 # do local checksums
3080 master_node = self.master_node = self.cfg.GetMasterNode()
3081 master_ip = self.cfg.GetMasterIP()
3083 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3086 if self.cfg.GetUseExternalMipScript():
3087 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3089 node_verify_param = {
3090 constants.NV_FILELIST:
3091 utils.UniqueSequence(filename
3092 for files in filemap
3093 for filename in files),
3094 constants.NV_NODELIST:
3095 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3096 self.all_node_info.values()),
3097 constants.NV_HYPERVISOR: hypervisors,
3098 constants.NV_HVPARAMS:
3099 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3100 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3101 for node in node_data_list
3102 if not node.offline],
3103 constants.NV_INSTANCELIST: hypervisors,
3104 constants.NV_VERSION: None,
3105 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3106 constants.NV_NODESETUP: None,
3107 constants.NV_TIME: None,
3108 constants.NV_MASTERIP: (master_node, master_ip),
3109 constants.NV_OSLIST: None,
3110 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3111 constants.NV_USERSCRIPTS: user_scripts,
3114 if vg_name is not None:
3115 node_verify_param[constants.NV_VGLIST] = None
3116 node_verify_param[constants.NV_LVLIST] = vg_name
3117 node_verify_param[constants.NV_PVLIST] = [vg_name]
3118 node_verify_param[constants.NV_DRBDLIST] = None
3121 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3124 # FIXME: this needs to be changed per node-group, not cluster-wide
3126 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3127 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3128 bridges.add(default_nicpp[constants.NIC_LINK])
3129 for instance in self.my_inst_info.values():
3130 for nic in instance.nics:
3131 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3132 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3133 bridges.add(full_nic[constants.NIC_LINK])
3136 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3138 # Build our expected cluster state
3139 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3141 vm_capable=node.vm_capable))
3142 for node in node_data_list)
3146 for node in self.all_node_info.values():
3147 path = _SupportsOob(self.cfg, node)
3148 if path and path not in oob_paths:
3149 oob_paths.append(path)
3152 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3154 for instance in self.my_inst_names:
3155 inst_config = self.my_inst_info[instance]
3156 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3159 for nname in inst_config.all_nodes:
3160 if nname not in node_image:
3161 gnode = self.NodeImage(name=nname)
3162 gnode.ghost = (nname not in self.all_node_info)
3163 node_image[nname] = gnode
3165 inst_config.MapLVsByNode(node_vol_should)
3167 pnode = inst_config.primary_node
3168 node_image[pnode].pinst.append(instance)
3170 for snode in inst_config.secondary_nodes:
3171 nimg = node_image[snode]
3172 nimg.sinst.append(instance)
3173 if pnode not in nimg.sbp:
3174 nimg.sbp[pnode] = []
3175 nimg.sbp[pnode].append(instance)
3177 # At this point, we have the in-memory data structures complete,
3178 # except for the runtime information, which we'll gather next
3180 # Due to the way our RPC system works, exact response times cannot be
3181 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3182 # time before and after executing the request, we can at least have a time
3184 nvinfo_starttime = time.time()
3185 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3187 self.cfg.GetClusterName())
3188 nvinfo_endtime = time.time()
3190 if self.extra_lv_nodes and vg_name is not None:
3192 self.rpc.call_node_verify(self.extra_lv_nodes,
3193 {constants.NV_LVLIST: vg_name},
3194 self.cfg.GetClusterName())
3196 extra_lv_nvinfo = {}
3198 all_drbd_map = self.cfg.ComputeDRBDMap()
3200 feedback_fn("* Gathering disk information (%s nodes)" %
3201 len(self.my_node_names))
3202 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3205 feedback_fn("* Verifying configuration file consistency")
3207 # If not all nodes are being checked, we need to make sure the master node
3208 # and a non-checked vm_capable node are in the list.
3209 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3211 vf_nvinfo = all_nvinfo.copy()
3212 vf_node_info = list(self.my_node_info.values())
3213 additional_nodes = []
3214 if master_node not in self.my_node_info:
3215 additional_nodes.append(master_node)
3216 vf_node_info.append(self.all_node_info[master_node])
3217 # Add the first vm_capable node we find which is not included,
3218 # excluding the master node (which we already have)
3219 for node in absent_nodes:
3220 nodeinfo = self.all_node_info[node]
3221 if (nodeinfo.vm_capable and not nodeinfo.offline and
3222 node != master_node):
3223 additional_nodes.append(node)
3224 vf_node_info.append(self.all_node_info[node])
3226 key = constants.NV_FILELIST
3227 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3228 {key: node_verify_param[key]},
3229 self.cfg.GetClusterName()))
3231 vf_nvinfo = all_nvinfo
3232 vf_node_info = self.my_node_info.values()
3234 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3236 feedback_fn("* Verifying node status")
3240 for node_i in node_data_list:
3242 nimg = node_image[node]
3246 feedback_fn("* Skipping offline node %s" % (node,))
3250 if node == master_node:
3252 elif node_i.master_candidate:
3253 ntype = "master candidate"
3254 elif node_i.drained:
3260 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3262 msg = all_nvinfo[node].fail_msg
3263 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3266 nimg.rpc_fail = True
3269 nresult = all_nvinfo[node].payload
3271 nimg.call_ok = self._VerifyNode(node_i, nresult)
3272 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3273 self._VerifyNodeNetwork(node_i, nresult)
3274 self._VerifyNodeUserScripts(node_i, nresult)
3275 self._VerifyOob(node_i, nresult)
3278 self._VerifyNodeLVM(node_i, nresult, vg_name)
3279 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3282 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3283 self._UpdateNodeInstances(node_i, nresult, nimg)
3284 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3285 self._UpdateNodeOS(node_i, nresult, nimg)
3287 if not nimg.os_fail:
3288 if refos_img is None:
3290 self._VerifyNodeOS(node_i, nimg, refos_img)
3291 self._VerifyNodeBridges(node_i, nresult, bridges)
3293 # Check whether all running instancies are primary for the node. (This
3294 # can no longer be done from _VerifyInstance below, since some of the
3295 # wrong instances could be from other node groups.)
3296 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3298 for inst in non_primary_inst:
3299 test = inst in self.all_inst_info
3300 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3301 "instance should not run on node %s", node_i.name)
3302 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3303 "node is running unknown instance %s", inst)
3305 for node, result in extra_lv_nvinfo.items():
3306 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3307 node_image[node], vg_name)
3309 feedback_fn("* Verifying instance status")
3310 for instance in self.my_inst_names:
3312 feedback_fn("* Verifying instance %s" % instance)
3313 inst_config = self.my_inst_info[instance]
3314 self._VerifyInstance(instance, inst_config, node_image,
3316 inst_nodes_offline = []
3318 pnode = inst_config.primary_node
3319 pnode_img = node_image[pnode]
3320 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3321 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3322 " primary node failed", instance)
3324 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3326 constants.CV_EINSTANCEBADNODE, instance,
3327 "instance is marked as running and lives on offline node %s",
3328 inst_config.primary_node)
3330 # If the instance is non-redundant we cannot survive losing its primary
3331 # node, so we are not N+1 compliant. On the other hand we have no disk
3332 # templates with more than one secondary so that situation is not well
3334 # FIXME: does not support file-backed instances
3335 if not inst_config.secondary_nodes:
3336 i_non_redundant.append(instance)
3338 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3339 constants.CV_EINSTANCELAYOUT,
3340 instance, "instance has multiple secondary nodes: %s",
3341 utils.CommaJoin(inst_config.secondary_nodes),
3342 code=self.ETYPE_WARNING)
3344 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3345 pnode = inst_config.primary_node
3346 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3347 instance_groups = {}
3349 for node in instance_nodes:
3350 instance_groups.setdefault(self.all_node_info[node].group,
3354 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3355 # Sort so that we always list the primary node first.
3356 for group, nodes in sorted(instance_groups.items(),
3357 key=lambda (_, nodes): pnode in nodes,
3360 self._ErrorIf(len(instance_groups) > 1,
3361 constants.CV_EINSTANCESPLITGROUPS,
3362 instance, "instance has primary and secondary nodes in"
3363 " different groups: %s", utils.CommaJoin(pretty_list),
3364 code=self.ETYPE_WARNING)
3366 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3367 i_non_a_balanced.append(instance)
3369 for snode in inst_config.secondary_nodes:
3370 s_img = node_image[snode]
3371 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3372 snode, "instance %s, connection to secondary node failed",
3376 inst_nodes_offline.append(snode)
3378 # warn that the instance lives on offline nodes
3379 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3380 "instance has offline secondary node(s) %s",
3381 utils.CommaJoin(inst_nodes_offline))
3382 # ... or ghost/non-vm_capable nodes
3383 for node in inst_config.all_nodes:
3384 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3385 instance, "instance lives on ghost node %s", node)
3386 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3387 instance, "instance lives on non-vm_capable node %s", node)
3389 feedback_fn("* Verifying orphan volumes")
3390 reserved = utils.FieldSet(*cluster.reserved_lvs)
3392 # We will get spurious "unknown volume" warnings if any node of this group
3393 # is secondary for an instance whose primary is in another group. To avoid
3394 # them, we find these instances and add their volumes to node_vol_should.
3395 for inst in self.all_inst_info.values():
3396 for secondary in inst.secondary_nodes:
3397 if (secondary in self.my_node_info
3398 and inst.name not in self.my_inst_info):
3399 inst.MapLVsByNode(node_vol_should)
3402 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3404 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3405 feedback_fn("* Verifying N+1 Memory redundancy")
3406 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3408 feedback_fn("* Other Notes")
3410 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3411 % len(i_non_redundant))
3413 if i_non_a_balanced:
3414 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3415 % len(i_non_a_balanced))
3418 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3421 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3424 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3428 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3429 """Analyze the post-hooks' result
3431 This method analyses the hook result, handles it, and sends some
3432 nicely-formatted feedback back to the user.
3434 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3435 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3436 @param hooks_results: the results of the multi-node hooks rpc call
3437 @param feedback_fn: function used send feedback back to the caller
3438 @param lu_result: previous Exec result
3439 @return: the new Exec result, based on the previous result
3443 # We only really run POST phase hooks, only for non-empty groups,
3444 # and are only interested in their results
3445 if not self.my_node_names:
3448 elif phase == constants.HOOKS_PHASE_POST:
3449 # Used to change hooks' output to proper indentation
3450 feedback_fn("* Hooks Results")
3451 assert hooks_results, "invalid result from hooks"
3453 for node_name in hooks_results:
3454 res = hooks_results[node_name]
3456 test = msg and not res.offline
3457 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3458 "Communication failure in hooks execution: %s", msg)
3459 if res.offline or msg:
3460 # No need to investigate payload if node is offline or gave
3463 for script, hkr, output in res.payload:
3464 test = hkr == constants.HKR_FAIL
3465 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3466 "Script %s failed, output:", script)
3468 output = self._HOOKS_INDENT_RE.sub(" ", output)
3469 feedback_fn("%s" % output)
3475 class LUClusterVerifyDisks(NoHooksLU):
3476 """Verifies the cluster disks status.
3481 def ExpandNames(self):
3482 self.share_locks = _ShareAll()
3483 self.needed_locks = {
3484 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3487 def Exec(self, feedback_fn):
3488 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3490 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3491 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3492 for group in group_names])
3495 class LUGroupVerifyDisks(NoHooksLU):
3496 """Verifies the status of all disks in a node group.
3501 def ExpandNames(self):
3502 # Raises errors.OpPrereqError on its own if group can't be found
3503 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3505 self.share_locks = _ShareAll()
3506 self.needed_locks = {
3507 locking.LEVEL_INSTANCE: [],
3508 locking.LEVEL_NODEGROUP: [],
3509 locking.LEVEL_NODE: [],
3512 def DeclareLocks(self, level):
3513 if level == locking.LEVEL_INSTANCE:
3514 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3516 # Lock instances optimistically, needs verification once node and group
3517 # locks have been acquired
3518 self.needed_locks[locking.LEVEL_INSTANCE] = \
3519 self.cfg.GetNodeGroupInstances(self.group_uuid)
3521 elif level == locking.LEVEL_NODEGROUP:
3522 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3524 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3525 set([self.group_uuid] +
3526 # Lock all groups used by instances optimistically; this requires
3527 # going via the node before it's locked, requiring verification
3530 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3531 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3533 elif level == locking.LEVEL_NODE:
3534 # This will only lock the nodes in the group to be verified which contain
3536 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3537 self._LockInstancesNodes()
3539 # Lock all nodes in group to be verified
3540 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3541 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3542 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3544 def CheckPrereq(self):
3545 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3546 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3547 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3549 assert self.group_uuid in owned_groups
3551 # Check if locked instances are still correct
3552 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3554 # Get instance information
3555 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3557 # Check if node groups for locked instances are still correct
3558 _CheckInstancesNodeGroups(self.cfg, self.instances,
3559 owned_groups, owned_nodes, self.group_uuid)
3561 def Exec(self, feedback_fn):
3562 """Verify integrity of cluster disks.
3564 @rtype: tuple of three items
3565 @return: a tuple of (dict of node-to-node_error, list of instances
3566 which need activate-disks, dict of instance: (node, volume) for
3571 res_instances = set()
3574 nv_dict = _MapInstanceDisksToNodes([inst
3575 for inst in self.instances.values()
3576 if inst.admin_state == constants.ADMINST_UP])
3579 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3580 set(self.cfg.GetVmCapableNodeList()))
3582 node_lvs = self.rpc.call_lv_list(nodes, [])
3584 for (node, node_res) in node_lvs.items():
3585 if node_res.offline:
3588 msg = node_res.fail_msg
3590 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3591 res_nodes[node] = msg
3594 for lv_name, (_, _, lv_online) in node_res.payload.items():
3595 inst = nv_dict.pop((node, lv_name), None)
3596 if not (lv_online or inst is None):
3597 res_instances.add(inst)
3599 # any leftover items in nv_dict are missing LVs, let's arrange the data
3601 for key, inst in nv_dict.iteritems():
3602 res_missing.setdefault(inst, []).append(list(key))
3604 return (res_nodes, list(res_instances), res_missing)
3607 class LUClusterRepairDiskSizes(NoHooksLU):
3608 """Verifies the cluster disks sizes.
3613 def ExpandNames(self):
3614 if self.op.instances:
3615 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3616 self.needed_locks = {
3617 locking.LEVEL_NODE_RES: [],
3618 locking.LEVEL_INSTANCE: self.wanted_names,
3620 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3622 self.wanted_names = None
3623 self.needed_locks = {
3624 locking.LEVEL_NODE_RES: locking.ALL_SET,
3625 locking.LEVEL_INSTANCE: locking.ALL_SET,
3627 self.share_locks = {
3628 locking.LEVEL_NODE_RES: 1,
3629 locking.LEVEL_INSTANCE: 0,
3632 def DeclareLocks(self, level):
3633 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3634 self._LockInstancesNodes(primary_only=True, level=level)
3636 def CheckPrereq(self):
3637 """Check prerequisites.
3639 This only checks the optional instance list against the existing names.
3642 if self.wanted_names is None:
3643 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3645 self.wanted_instances = \
3646 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3648 def _EnsureChildSizes(self, disk):
3649 """Ensure children of the disk have the needed disk size.
3651 This is valid mainly for DRBD8 and fixes an issue where the
3652 children have smaller disk size.
3654 @param disk: an L{ganeti.objects.Disk} object
3657 if disk.dev_type == constants.LD_DRBD8:
3658 assert disk.children, "Empty children for DRBD8?"
3659 fchild = disk.children[0]
3660 mismatch = fchild.size < disk.size
3662 self.LogInfo("Child disk has size %d, parent %d, fixing",
3663 fchild.size, disk.size)
3664 fchild.size = disk.size
3666 # and we recurse on this child only, not on the metadev
3667 return self._EnsureChildSizes(fchild) or mismatch
3671 def Exec(self, feedback_fn):
3672 """Verify the size of cluster disks.
3675 # TODO: check child disks too
3676 # TODO: check differences in size between primary/secondary nodes
3678 for instance in self.wanted_instances:
3679 pnode = instance.primary_node
3680 if pnode not in per_node_disks:
3681 per_node_disks[pnode] = []
3682 for idx, disk in enumerate(instance.disks):
3683 per_node_disks[pnode].append((instance, idx, disk))
3685 assert not (frozenset(per_node_disks.keys()) -
3686 self.owned_locks(locking.LEVEL_NODE_RES)), \
3687 "Not owning correct locks"
3688 assert not self.owned_locks(locking.LEVEL_NODE)
3691 for node, dskl in per_node_disks.items():
3692 newl = [v[2].Copy() for v in dskl]
3694 self.cfg.SetDiskID(dsk, node)
3695 result = self.rpc.call_blockdev_getsize(node, newl)
3697 self.LogWarning("Failure in blockdev_getsize call to node"
3698 " %s, ignoring", node)
3700 if len(result.payload) != len(dskl):
3701 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3702 " result.payload=%s", node, len(dskl), result.payload)
3703 self.LogWarning("Invalid result from node %s, ignoring node results",
3706 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3708 self.LogWarning("Disk %d of instance %s did not return size"
3709 " information, ignoring", idx, instance.name)
3711 if not isinstance(size, (int, long)):
3712 self.LogWarning("Disk %d of instance %s did not return valid"
3713 " size information, ignoring", idx, instance.name)
3716 if size != disk.size:
3717 self.LogInfo("Disk %d of instance %s has mismatched size,"
3718 " correcting: recorded %d, actual %d", idx,
3719 instance.name, disk.size, size)
3721 self.cfg.Update(instance, feedback_fn)
3722 changed.append((instance.name, idx, size))
3723 if self._EnsureChildSizes(disk):
3724 self.cfg.Update(instance, feedback_fn)
3725 changed.append((instance.name, idx, disk.size))
3729 class LUClusterRename(LogicalUnit):
3730 """Rename the cluster.
3733 HPATH = "cluster-rename"
3734 HTYPE = constants.HTYPE_CLUSTER
3736 def BuildHooksEnv(self):
3741 "OP_TARGET": self.cfg.GetClusterName(),
3742 "NEW_NAME": self.op.name,
3745 def BuildHooksNodes(self):
3746 """Build hooks nodes.
3749 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3751 def CheckPrereq(self):
3752 """Verify that the passed name is a valid one.
3755 hostname = netutils.GetHostname(name=self.op.name,
3756 family=self.cfg.GetPrimaryIPFamily())
3758 new_name = hostname.name
3759 self.ip = new_ip = hostname.ip
3760 old_name = self.cfg.GetClusterName()
3761 old_ip = self.cfg.GetMasterIP()
3762 if new_name == old_name and new_ip == old_ip:
3763 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3764 " cluster has changed",
3766 if new_ip != old_ip:
3767 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3768 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3769 " reachable on the network" %
3770 new_ip, errors.ECODE_NOTUNIQUE)
3772 self.op.name = new_name
3774 def Exec(self, feedback_fn):
3775 """Rename the cluster.
3778 clustername = self.op.name
3781 # shutdown the master IP
3782 master_params = self.cfg.GetMasterNetworkParameters()
3783 ems = self.cfg.GetUseExternalMipScript()
3784 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3786 result.Raise("Could not disable the master role")
3789 cluster = self.cfg.GetClusterInfo()
3790 cluster.cluster_name = clustername
3791 cluster.master_ip = new_ip
3792 self.cfg.Update(cluster, feedback_fn)
3794 # update the known hosts file
3795 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3796 node_list = self.cfg.GetOnlineNodeList()
3798 node_list.remove(master_params.name)
3801 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3803 master_params.ip = new_ip
3804 result = self.rpc.call_node_activate_master_ip(master_params.name,
3806 msg = result.fail_msg
3808 self.LogWarning("Could not re-enable the master role on"
3809 " the master, please restart manually: %s", msg)
3814 def _ValidateNetmask(cfg, netmask):
3815 """Checks if a netmask is valid.
3817 @type cfg: L{config.ConfigWriter}
3818 @param cfg: The cluster configuration
3820 @param netmask: the netmask to be verified
3821 @raise errors.OpPrereqError: if the validation fails
3824 ip_family = cfg.GetPrimaryIPFamily()
3826 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3827 except errors.ProgrammerError:
3828 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3830 if not ipcls.ValidateNetmask(netmask):
3831 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3835 class LUClusterSetParams(LogicalUnit):
3836 """Change the parameters of the cluster.
3839 HPATH = "cluster-modify"
3840 HTYPE = constants.HTYPE_CLUSTER
3843 def CheckArguments(self):
3847 if self.op.uid_pool:
3848 uidpool.CheckUidPool(self.op.uid_pool)
3850 if self.op.add_uids:
3851 uidpool.CheckUidPool(self.op.add_uids)
3853 if self.op.remove_uids:
3854 uidpool.CheckUidPool(self.op.remove_uids)
3856 if self.op.master_netmask is not None:
3857 _ValidateNetmask(self.cfg, self.op.master_netmask)
3859 if self.op.diskparams:
3860 for dt_params in self.op.diskparams.values():
3861 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3863 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3864 except errors.OpPrereqError, err:
3865 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3868 def ExpandNames(self):
3869 # FIXME: in the future maybe other cluster params won't require checking on
3870 # all nodes to be modified.
3871 self.needed_locks = {
3872 locking.LEVEL_NODE: locking.ALL_SET,
3873 locking.LEVEL_INSTANCE: locking.ALL_SET,
3874 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3876 self.share_locks = {
3877 locking.LEVEL_NODE: 1,
3878 locking.LEVEL_INSTANCE: 1,
3879 locking.LEVEL_NODEGROUP: 1,
3882 def BuildHooksEnv(self):
3887 "OP_TARGET": self.cfg.GetClusterName(),
3888 "NEW_VG_NAME": self.op.vg_name,
3891 def BuildHooksNodes(self):
3892 """Build hooks nodes.
3895 mn = self.cfg.GetMasterNode()
3898 def CheckPrereq(self):
3899 """Check prerequisites.
3901 This checks whether the given params don't conflict and
3902 if the given volume group is valid.
3905 if self.op.vg_name is not None and not self.op.vg_name:
3906 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3907 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3908 " instances exist", errors.ECODE_INVAL)
3910 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3911 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3912 raise errors.OpPrereqError("Cannot disable drbd helper while"
3913 " drbd-based instances exist",
3916 node_list = self.owned_locks(locking.LEVEL_NODE)
3918 # if vg_name not None, checks given volume group on all nodes
3920 vglist = self.rpc.call_vg_list(node_list)
3921 for node in node_list:
3922 msg = vglist[node].fail_msg
3924 # ignoring down node
3925 self.LogWarning("Error while gathering data on node %s"
3926 " (ignoring node): %s", node, msg)
3928 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3930 constants.MIN_VG_SIZE)
3932 raise errors.OpPrereqError("Error on node '%s': %s" %
3933 (node, vgstatus), errors.ECODE_ENVIRON)
3935 if self.op.drbd_helper:
3936 # checks given drbd helper on all nodes
3937 helpers = self.rpc.call_drbd_helper(node_list)
3938 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3940 self.LogInfo("Not checking drbd helper on offline node %s", node)
3942 msg = helpers[node].fail_msg
3944 raise errors.OpPrereqError("Error checking drbd helper on node"
3945 " '%s': %s" % (node, msg),
3946 errors.ECODE_ENVIRON)
3947 node_helper = helpers[node].payload
3948 if node_helper != self.op.drbd_helper:
3949 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3950 (node, node_helper), errors.ECODE_ENVIRON)
3952 self.cluster = cluster = self.cfg.GetClusterInfo()
3953 # validate params changes
3954 if self.op.beparams:
3955 objects.UpgradeBeParams(self.op.beparams)
3956 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3957 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3959 if self.op.ndparams:
3960 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3961 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3963 # TODO: we need a more general way to handle resetting
3964 # cluster-level parameters to default values
3965 if self.new_ndparams["oob_program"] == "":
3966 self.new_ndparams["oob_program"] = \
3967 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3969 if self.op.hv_state:
3970 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3971 self.cluster.hv_state_static)
3972 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3973 for hv, values in new_hv_state.items())
3975 if self.op.disk_state:
3976 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3977 self.cluster.disk_state_static)
3978 self.new_disk_state = \
3979 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3980 for name, values in svalues.items()))
3981 for storage, svalues in new_disk_state.items())
3984 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3987 all_instances = self.cfg.GetAllInstancesInfo().values()
3989 for group in self.cfg.GetAllNodeGroupsInfo().values():
3990 instances = frozenset([inst for inst in all_instances
3991 if compat.any(node in group.members
3992 for node in inst.all_nodes)])
3993 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3994 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3996 new_ipolicy, instances)
3998 violations.update(new)
4001 self.LogWarning("After the ipolicy change the following instances"
4002 " violate them: %s",
4003 utils.CommaJoin(utils.NiceSort(violations)))
4005 if self.op.nicparams:
4006 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4007 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4008 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4011 # check all instances for consistency
4012 for instance in self.cfg.GetAllInstancesInfo().values():
4013 for nic_idx, nic in enumerate(instance.nics):
4014 params_copy = copy.deepcopy(nic.nicparams)
4015 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4017 # check parameter syntax
4019 objects.NIC.CheckParameterSyntax(params_filled)
4020 except errors.ConfigurationError, err:
4021 nic_errors.append("Instance %s, nic/%d: %s" %
4022 (instance.name, nic_idx, err))
4024 # if we're moving instances to routed, check that they have an ip
4025 target_mode = params_filled[constants.NIC_MODE]
4026 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4027 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4028 " address" % (instance.name, nic_idx))
4030 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4031 "\n".join(nic_errors))
4033 # hypervisor list/parameters
4034 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4035 if self.op.hvparams:
4036 for hv_name, hv_dict in self.op.hvparams.items():
4037 if hv_name not in self.new_hvparams:
4038 self.new_hvparams[hv_name] = hv_dict
4040 self.new_hvparams[hv_name].update(hv_dict)
4042 # disk template parameters
4043 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4044 if self.op.diskparams:
4045 for dt_name, dt_params in self.op.diskparams.items():
4046 if dt_name not in self.op.diskparams:
4047 self.new_diskparams[dt_name] = dt_params
4049 self.new_diskparams[dt_name].update(dt_params)
4051 # os hypervisor parameters
4052 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4054 for os_name, hvs in self.op.os_hvp.items():
4055 if os_name not in self.new_os_hvp:
4056 self.new_os_hvp[os_name] = hvs
4058 for hv_name, hv_dict in hvs.items():
4059 if hv_name not in self.new_os_hvp[os_name]:
4060 self.new_os_hvp[os_name][hv_name] = hv_dict
4062 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4065 self.new_osp = objects.FillDict(cluster.osparams, {})
4066 if self.op.osparams:
4067 for os_name, osp in self.op.osparams.items():
4068 if os_name not in self.new_osp:
4069 self.new_osp[os_name] = {}
4071 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4074 if not self.new_osp[os_name]:
4075 # we removed all parameters
4076 del self.new_osp[os_name]
4078 # check the parameter validity (remote check)
4079 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4080 os_name, self.new_osp[os_name])
4082 # changes to the hypervisor list
4083 if self.op.enabled_hypervisors is not None:
4084 self.hv_list = self.op.enabled_hypervisors
4085 for hv in self.hv_list:
4086 # if the hypervisor doesn't already exist in the cluster
4087 # hvparams, we initialize it to empty, and then (in both
4088 # cases) we make sure to fill the defaults, as we might not
4089 # have a complete defaults list if the hypervisor wasn't
4091 if hv not in new_hvp:
4093 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4094 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4096 self.hv_list = cluster.enabled_hypervisors
4098 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4099 # either the enabled list has changed, or the parameters have, validate
4100 for hv_name, hv_params in self.new_hvparams.items():
4101 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4102 (self.op.enabled_hypervisors and
4103 hv_name in self.op.enabled_hypervisors)):
4104 # either this is a new hypervisor, or its parameters have changed
4105 hv_class = hypervisor.GetHypervisor(hv_name)
4106 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4107 hv_class.CheckParameterSyntax(hv_params)
4108 _CheckHVParams(self, node_list, hv_name, hv_params)
4111 # no need to check any newly-enabled hypervisors, since the
4112 # defaults have already been checked in the above code-block
4113 for os_name, os_hvp in self.new_os_hvp.items():
4114 for hv_name, hv_params in os_hvp.items():
4115 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4116 # we need to fill in the new os_hvp on top of the actual hv_p
4117 cluster_defaults = self.new_hvparams.get(hv_name, {})
4118 new_osp = objects.FillDict(cluster_defaults, hv_params)
4119 hv_class = hypervisor.GetHypervisor(hv_name)
4120 hv_class.CheckParameterSyntax(new_osp)
4121 _CheckHVParams(self, node_list, hv_name, new_osp)
4123 if self.op.default_iallocator:
4124 alloc_script = utils.FindFile(self.op.default_iallocator,
4125 constants.IALLOCATOR_SEARCH_PATH,
4127 if alloc_script is None:
4128 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4129 " specified" % self.op.default_iallocator,
4132 def Exec(self, feedback_fn):
4133 """Change the parameters of the cluster.
4136 if self.op.vg_name is not None:
4137 new_volume = self.op.vg_name
4140 if new_volume != self.cfg.GetVGName():
4141 self.cfg.SetVGName(new_volume)
4143 feedback_fn("Cluster LVM configuration already in desired"
4144 " state, not changing")
4145 if self.op.drbd_helper is not None:
4146 new_helper = self.op.drbd_helper
4149 if new_helper != self.cfg.GetDRBDHelper():
4150 self.cfg.SetDRBDHelper(new_helper)
4152 feedback_fn("Cluster DRBD helper already in desired state,"
4154 if self.op.hvparams:
4155 self.cluster.hvparams = self.new_hvparams
4157 self.cluster.os_hvp = self.new_os_hvp
4158 if self.op.enabled_hypervisors is not None:
4159 self.cluster.hvparams = self.new_hvparams
4160 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4161 if self.op.beparams:
4162 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4163 if self.op.nicparams:
4164 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4166 self.cluster.ipolicy = self.new_ipolicy
4167 if self.op.osparams:
4168 self.cluster.osparams = self.new_osp
4169 if self.op.ndparams:
4170 self.cluster.ndparams = self.new_ndparams
4171 if self.op.diskparams:
4172 self.cluster.diskparams = self.new_diskparams
4173 if self.op.hv_state:
4174 self.cluster.hv_state_static = self.new_hv_state
4175 if self.op.disk_state:
4176 self.cluster.disk_state_static = self.new_disk_state
4178 if self.op.candidate_pool_size is not None:
4179 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4180 # we need to update the pool size here, otherwise the save will fail
4181 _AdjustCandidatePool(self, [])
4183 if self.op.maintain_node_health is not None:
4184 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4185 feedback_fn("Note: CONFD was disabled at build time, node health"
4186 " maintenance is not useful (still enabling it)")
4187 self.cluster.maintain_node_health = self.op.maintain_node_health
4189 if self.op.prealloc_wipe_disks is not None:
4190 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4192 if self.op.add_uids is not None:
4193 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4195 if self.op.remove_uids is not None:
4196 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4198 if self.op.uid_pool is not None:
4199 self.cluster.uid_pool = self.op.uid_pool
4201 if self.op.default_iallocator is not None:
4202 self.cluster.default_iallocator = self.op.default_iallocator
4204 if self.op.reserved_lvs is not None:
4205 self.cluster.reserved_lvs = self.op.reserved_lvs
4207 if self.op.use_external_mip_script is not None:
4208 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4210 def helper_os(aname, mods, desc):
4212 lst = getattr(self.cluster, aname)
4213 for key, val in mods:
4214 if key == constants.DDM_ADD:
4216 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4219 elif key == constants.DDM_REMOVE:
4223 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4225 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4227 if self.op.hidden_os:
4228 helper_os("hidden_os", self.op.hidden_os, "hidden")
4230 if self.op.blacklisted_os:
4231 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4233 if self.op.master_netdev:
4234 master_params = self.cfg.GetMasterNetworkParameters()
4235 ems = self.cfg.GetUseExternalMipScript()
4236 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4237 self.cluster.master_netdev)
4238 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4240 result.Raise("Could not disable the master ip")
4241 feedback_fn("Changing master_netdev from %s to %s" %
4242 (master_params.netdev, self.op.master_netdev))
4243 self.cluster.master_netdev = self.op.master_netdev
4245 if self.op.master_netmask:
4246 master_params = self.cfg.GetMasterNetworkParameters()
4247 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4248 result = self.rpc.call_node_change_master_netmask(master_params.name,
4249 master_params.netmask,
4250 self.op.master_netmask,
4252 master_params.netdev)
4254 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4257 self.cluster.master_netmask = self.op.master_netmask
4259 self.cfg.Update(self.cluster, feedback_fn)
4261 if self.op.master_netdev:
4262 master_params = self.cfg.GetMasterNetworkParameters()
4263 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4264 self.op.master_netdev)
4265 ems = self.cfg.GetUseExternalMipScript()
4266 result = self.rpc.call_node_activate_master_ip(master_params.name,
4269 self.LogWarning("Could not re-enable the master ip on"
4270 " the master, please restart manually: %s",
4274 def _UploadHelper(lu, nodes, fname):
4275 """Helper for uploading a file and showing warnings.
4278 if os.path.exists(fname):
4279 result = lu.rpc.call_upload_file(nodes, fname)
4280 for to_node, to_result in result.items():
4281 msg = to_result.fail_msg
4283 msg = ("Copy of file %s to node %s failed: %s" %
4284 (fname, to_node, msg))
4285 lu.proc.LogWarning(msg)
4288 def _ComputeAncillaryFiles(cluster, redist):
4289 """Compute files external to Ganeti which need to be consistent.
4291 @type redist: boolean
4292 @param redist: Whether to include files which need to be redistributed
4295 # Compute files for all nodes
4297 constants.SSH_KNOWN_HOSTS_FILE,
4298 constants.CONFD_HMAC_KEY,
4299 constants.CLUSTER_DOMAIN_SECRET_FILE,
4300 constants.SPICE_CERT_FILE,
4301 constants.SPICE_CACERT_FILE,
4302 constants.RAPI_USERS_FILE,
4306 files_all.update(constants.ALL_CERT_FILES)
4307 files_all.update(ssconf.SimpleStore().GetFileList())
4309 # we need to ship at least the RAPI certificate
4310 files_all.add(constants.RAPI_CERT_FILE)
4312 if cluster.modify_etc_hosts:
4313 files_all.add(constants.ETC_HOSTS)
4315 if cluster.use_external_mip_script:
4316 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4318 # Files which are optional, these must:
4319 # - be present in one other category as well
4320 # - either exist or not exist on all nodes of that category (mc, vm all)
4322 constants.RAPI_USERS_FILE,
4325 # Files which should only be on master candidates
4329 files_mc.add(constants.CLUSTER_CONF_FILE)
4331 # Files which should only be on VM-capable nodes
4332 files_vm = set(filename
4333 for hv_name in cluster.enabled_hypervisors
4334 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4336 files_opt |= set(filename
4337 for hv_name in cluster.enabled_hypervisors
4338 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4340 # Filenames in each category must be unique
4341 all_files_set = files_all | files_mc | files_vm
4342 assert (len(all_files_set) ==
4343 sum(map(len, [files_all, files_mc, files_vm]))), \
4344 "Found file listed in more than one file list"
4346 # Optional files must be present in one other category
4347 assert all_files_set.issuperset(files_opt), \
4348 "Optional file not in a different required list"
4350 return (files_all, files_opt, files_mc, files_vm)
4353 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4354 """Distribute additional files which are part of the cluster configuration.
4356 ConfigWriter takes care of distributing the config and ssconf files, but
4357 there are more files which should be distributed to all nodes. This function
4358 makes sure those are copied.
4360 @param lu: calling logical unit
4361 @param additional_nodes: list of nodes not in the config to distribute to
4362 @type additional_vm: boolean
4363 @param additional_vm: whether the additional nodes are vm-capable or not
4366 # Gather target nodes
4367 cluster = lu.cfg.GetClusterInfo()
4368 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4370 online_nodes = lu.cfg.GetOnlineNodeList()
4371 online_set = frozenset(online_nodes)
4372 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4374 if additional_nodes is not None:
4375 online_nodes.extend(additional_nodes)
4377 vm_nodes.extend(additional_nodes)
4379 # Never distribute to master node
4380 for nodelist in [online_nodes, vm_nodes]:
4381 if master_info.name in nodelist:
4382 nodelist.remove(master_info.name)
4385 (files_all, _, files_mc, files_vm) = \
4386 _ComputeAncillaryFiles(cluster, True)
4388 # Never re-distribute configuration file from here
4389 assert not (constants.CLUSTER_CONF_FILE in files_all or
4390 constants.CLUSTER_CONF_FILE in files_vm)
4391 assert not files_mc, "Master candidates not handled in this function"
4394 (online_nodes, files_all),
4395 (vm_nodes, files_vm),
4399 for (node_list, files) in filemap:
4401 _UploadHelper(lu, node_list, fname)
4404 class LUClusterRedistConf(NoHooksLU):
4405 """Force the redistribution of cluster configuration.
4407 This is a very simple LU.
4412 def ExpandNames(self):
4413 self.needed_locks = {
4414 locking.LEVEL_NODE: locking.ALL_SET,
4416 self.share_locks[locking.LEVEL_NODE] = 1
4418 def Exec(self, feedback_fn):
4419 """Redistribute the configuration.
4422 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4423 _RedistributeAncillaryFiles(self)
4426 class LUClusterActivateMasterIp(NoHooksLU):
4427 """Activate the master IP on the master node.
4430 def Exec(self, feedback_fn):
4431 """Activate the master IP.
4434 master_params = self.cfg.GetMasterNetworkParameters()
4435 ems = self.cfg.GetUseExternalMipScript()
4436 result = self.rpc.call_node_activate_master_ip(master_params.name,
4438 result.Raise("Could not activate the master IP")
4441 class LUClusterDeactivateMasterIp(NoHooksLU):
4442 """Deactivate the master IP on the master node.
4445 def Exec(self, feedback_fn):
4446 """Deactivate the master IP.
4449 master_params = self.cfg.GetMasterNetworkParameters()
4450 ems = self.cfg.GetUseExternalMipScript()
4451 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4453 result.Raise("Could not deactivate the master IP")
4456 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4457 """Sleep and poll for an instance's disk to sync.
4460 if not instance.disks or disks is not None and not disks:
4463 disks = _ExpandCheckDisks(instance, disks)
4466 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4468 node = instance.primary_node
4471 lu.cfg.SetDiskID(dev, node)
4473 # TODO: Convert to utils.Retry
4476 degr_retries = 10 # in seconds, as we sleep 1 second each time
4480 cumul_degraded = False
4481 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4482 msg = rstats.fail_msg
4484 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4487 raise errors.RemoteError("Can't contact node %s for mirror data,"
4488 " aborting." % node)
4491 rstats = rstats.payload
4493 for i, mstat in enumerate(rstats):
4495 lu.LogWarning("Can't compute data for node %s/%s",
4496 node, disks[i].iv_name)
4499 cumul_degraded = (cumul_degraded or
4500 (mstat.is_degraded and mstat.sync_percent is None))
4501 if mstat.sync_percent is not None:
4503 if mstat.estimated_time is not None:
4504 rem_time = ("%s remaining (estimated)" %
4505 utils.FormatSeconds(mstat.estimated_time))
4506 max_time = mstat.estimated_time
4508 rem_time = "no time estimate"
4509 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4510 (disks[i].iv_name, mstat.sync_percent, rem_time))
4512 # if we're done but degraded, let's do a few small retries, to
4513 # make sure we see a stable and not transient situation; therefore
4514 # we force restart of the loop
4515 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4516 logging.info("Degraded disks found, %d retries left", degr_retries)
4524 time.sleep(min(60, max_time))
4527 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4528 return not cumul_degraded
4531 def _BlockdevFind(lu, node, dev, instance):
4532 """Wrapper around call_blockdev_find to annotate diskparams.
4534 @param lu: A reference to the lu object
4535 @param node: The node to call out
4536 @param dev: The device to find
4537 @param instance: The instance object the device belongs to
4538 @returns The result of the rpc call
4541 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4542 return lu.rpc.call_blockdev_find(node, disk)
4545 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4546 """Wrapper around L{_CheckDiskConsistencyInner}.
4549 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4550 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4554 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4556 """Check that mirrors are not degraded.
4558 @attention: The device has to be annotated already.
4560 The ldisk parameter, if True, will change the test from the
4561 is_degraded attribute (which represents overall non-ok status for
4562 the device(s)) to the ldisk (representing the local storage status).
4565 lu.cfg.SetDiskID(dev, node)
4569 if on_primary or dev.AssembleOnSecondary():
4570 rstats = lu.rpc.call_blockdev_find(node, dev)
4571 msg = rstats.fail_msg
4573 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4575 elif not rstats.payload:
4576 lu.LogWarning("Can't find disk on node %s", node)
4580 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4582 result = result and not rstats.payload.is_degraded
4585 for child in dev.children:
4586 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4592 class LUOobCommand(NoHooksLU):
4593 """Logical unit for OOB handling.
4597 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4599 def ExpandNames(self):
4600 """Gather locks we need.
4603 if self.op.node_names:
4604 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4605 lock_names = self.op.node_names
4607 lock_names = locking.ALL_SET
4609 self.needed_locks = {
4610 locking.LEVEL_NODE: lock_names,
4613 def CheckPrereq(self):
4614 """Check prerequisites.
4617 - the node exists in the configuration
4620 Any errors are signaled by raising errors.OpPrereqError.
4624 self.master_node = self.cfg.GetMasterNode()
4626 assert self.op.power_delay >= 0.0
4628 if self.op.node_names:
4629 if (self.op.command in self._SKIP_MASTER and
4630 self.master_node in self.op.node_names):
4631 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4632 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4634 if master_oob_handler:
4635 additional_text = ("run '%s %s %s' if you want to operate on the"
4636 " master regardless") % (master_oob_handler,
4640 additional_text = "it does not support out-of-band operations"
4642 raise errors.OpPrereqError(("Operating on the master node %s is not"
4643 " allowed for %s; %s") %
4644 (self.master_node, self.op.command,
4645 additional_text), errors.ECODE_INVAL)
4647 self.op.node_names = self.cfg.GetNodeList()
4648 if self.op.command in self._SKIP_MASTER:
4649 self.op.node_names.remove(self.master_node)
4651 if self.op.command in self._SKIP_MASTER:
4652 assert self.master_node not in self.op.node_names
4654 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4656 raise errors.OpPrereqError("Node %s not found" % node_name,
4659 self.nodes.append(node)
4661 if (not self.op.ignore_status and
4662 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4663 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4664 " not marked offline") % node_name,
4667 def Exec(self, feedback_fn):
4668 """Execute OOB and return result if we expect any.
4671 master_node = self.master_node
4674 for idx, node in enumerate(utils.NiceSort(self.nodes,
4675 key=lambda node: node.name)):
4676 node_entry = [(constants.RS_NORMAL, node.name)]
4677 ret.append(node_entry)
4679 oob_program = _SupportsOob(self.cfg, node)
4682 node_entry.append((constants.RS_UNAVAIL, None))
4685 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4686 self.op.command, oob_program, node.name)
4687 result = self.rpc.call_run_oob(master_node, oob_program,
4688 self.op.command, node.name,
4692 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4693 node.name, result.fail_msg)
4694 node_entry.append((constants.RS_NODATA, None))
4697 self._CheckPayload(result)
4698 except errors.OpExecError, err:
4699 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4701 node_entry.append((constants.RS_NODATA, None))
4703 if self.op.command == constants.OOB_HEALTH:
4704 # For health we should log important events
4705 for item, status in result.payload:
4706 if status in [constants.OOB_STATUS_WARNING,
4707 constants.OOB_STATUS_CRITICAL]:
4708 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4709 item, node.name, status)
4711 if self.op.command == constants.OOB_POWER_ON:
4713 elif self.op.command == constants.OOB_POWER_OFF:
4714 node.powered = False
4715 elif self.op.command == constants.OOB_POWER_STATUS:
4716 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4717 if powered != node.powered:
4718 logging.warning(("Recorded power state (%s) of node '%s' does not"
4719 " match actual power state (%s)"), node.powered,
4722 # For configuration changing commands we should update the node
4723 if self.op.command in (constants.OOB_POWER_ON,
4724 constants.OOB_POWER_OFF):
4725 self.cfg.Update(node, feedback_fn)
4727 node_entry.append((constants.RS_NORMAL, result.payload))
4729 if (self.op.command == constants.OOB_POWER_ON and
4730 idx < len(self.nodes) - 1):
4731 time.sleep(self.op.power_delay)
4735 def _CheckPayload(self, result):
4736 """Checks if the payload is valid.
4738 @param result: RPC result
4739 @raises errors.OpExecError: If payload is not valid
4743 if self.op.command == constants.OOB_HEALTH:
4744 if not isinstance(result.payload, list):
4745 errs.append("command 'health' is expected to return a list but got %s" %
4746 type(result.payload))
4748 for item, status in result.payload:
4749 if status not in constants.OOB_STATUSES:
4750 errs.append("health item '%s' has invalid status '%s'" %
4753 if self.op.command == constants.OOB_POWER_STATUS:
4754 if not isinstance(result.payload, dict):
4755 errs.append("power-status is expected to return a dict but got %s" %
4756 type(result.payload))
4758 if self.op.command in [
4759 constants.OOB_POWER_ON,
4760 constants.OOB_POWER_OFF,
4761 constants.OOB_POWER_CYCLE,
4763 if result.payload is not None:
4764 errs.append("%s is expected to not return payload but got '%s'" %
4765 (self.op.command, result.payload))
4768 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4769 utils.CommaJoin(errs))
4772 class _OsQuery(_QueryBase):
4773 FIELDS = query.OS_FIELDS
4775 def ExpandNames(self, lu):
4776 # Lock all nodes in shared mode
4777 # Temporary removal of locks, should be reverted later
4778 # TODO: reintroduce locks when they are lighter-weight
4779 lu.needed_locks = {}
4780 #self.share_locks[locking.LEVEL_NODE] = 1
4781 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4783 # The following variables interact with _QueryBase._GetNames
4785 self.wanted = self.names
4787 self.wanted = locking.ALL_SET
4789 self.do_locking = self.use_locking
4791 def DeclareLocks(self, lu, level):
4795 def _DiagnoseByOS(rlist):
4796 """Remaps a per-node return list into an a per-os per-node dictionary
4798 @param rlist: a map with node names as keys and OS objects as values
4801 @return: a dictionary with osnames as keys and as value another
4802 map, with nodes as keys and tuples of (path, status, diagnose,
4803 variants, parameters, api_versions) as values, eg::
4805 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4806 (/srv/..., False, "invalid api")],
4807 "node2": [(/srv/..., True, "", [], [])]}
4812 # we build here the list of nodes that didn't fail the RPC (at RPC
4813 # level), so that nodes with a non-responding node daemon don't
4814 # make all OSes invalid
4815 good_nodes = [node_name for node_name in rlist
4816 if not rlist[node_name].fail_msg]
4817 for node_name, nr in rlist.items():
4818 if nr.fail_msg or not nr.payload:
4820 for (name, path, status, diagnose, variants,
4821 params, api_versions) in nr.payload:
4822 if name not in all_os:
4823 # build a list of nodes for this os containing empty lists
4824 # for each node in node_list
4826 for nname in good_nodes:
4827 all_os[name][nname] = []
4828 # convert params from [name, help] to (name, help)
4829 params = [tuple(v) for v in params]
4830 all_os[name][node_name].append((path, status, diagnose,
4831 variants, params, api_versions))
4834 def _GetQueryData(self, lu):
4835 """Computes the list of nodes and their attributes.
4838 # Locking is not used
4839 assert not (compat.any(lu.glm.is_owned(level)
4840 for level in locking.LEVELS
4841 if level != locking.LEVEL_CLUSTER) or
4842 self.do_locking or self.use_locking)
4844 valid_nodes = [node.name
4845 for node in lu.cfg.GetAllNodesInfo().values()
4846 if not node.offline and node.vm_capable]
4847 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4848 cluster = lu.cfg.GetClusterInfo()
4852 for (os_name, os_data) in pol.items():
4853 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4854 hidden=(os_name in cluster.hidden_os),
4855 blacklisted=(os_name in cluster.blacklisted_os))
4859 api_versions = set()
4861 for idx, osl in enumerate(os_data.values()):
4862 info.valid = bool(info.valid and osl and osl[0][1])
4866 (node_variants, node_params, node_api) = osl[0][3:6]
4869 variants.update(node_variants)
4870 parameters.update(node_params)
4871 api_versions.update(node_api)
4873 # Filter out inconsistent values
4874 variants.intersection_update(node_variants)
4875 parameters.intersection_update(node_params)
4876 api_versions.intersection_update(node_api)
4878 info.variants = list(variants)
4879 info.parameters = list(parameters)
4880 info.api_versions = list(api_versions)
4882 data[os_name] = info
4884 # Prepare data in requested order
4885 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4889 class LUOsDiagnose(NoHooksLU):
4890 """Logical unit for OS diagnose/query.
4896 def _BuildFilter(fields, names):
4897 """Builds a filter for querying OSes.
4900 name_filter = qlang.MakeSimpleFilter("name", names)
4902 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4903 # respective field is not requested
4904 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4905 for fname in ["hidden", "blacklisted"]
4906 if fname not in fields]
4907 if "valid" not in fields:
4908 status_filter.append([qlang.OP_TRUE, "valid"])
4911 status_filter.insert(0, qlang.OP_AND)
4913 status_filter = None
4915 if name_filter and status_filter:
4916 return [qlang.OP_AND, name_filter, status_filter]
4920 return status_filter
4922 def CheckArguments(self):
4923 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4924 self.op.output_fields, False)
4926 def ExpandNames(self):
4927 self.oq.ExpandNames(self)
4929 def Exec(self, feedback_fn):
4930 return self.oq.OldStyleQuery(self)
4933 class LUNodeRemove(LogicalUnit):
4934 """Logical unit for removing a node.
4937 HPATH = "node-remove"
4938 HTYPE = constants.HTYPE_NODE
4940 def BuildHooksEnv(self):
4945 "OP_TARGET": self.op.node_name,
4946 "NODE_NAME": self.op.node_name,
4949 def BuildHooksNodes(self):
4950 """Build hooks nodes.
4952 This doesn't run on the target node in the pre phase as a failed
4953 node would then be impossible to remove.
4956 all_nodes = self.cfg.GetNodeList()
4958 all_nodes.remove(self.op.node_name)
4961 return (all_nodes, all_nodes)
4963 def CheckPrereq(self):
4964 """Check prerequisites.
4967 - the node exists in the configuration
4968 - it does not have primary or secondary instances
4969 - it's not the master
4971 Any errors are signaled by raising errors.OpPrereqError.
4974 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4975 node = self.cfg.GetNodeInfo(self.op.node_name)
4976 assert node is not None
4978 masternode = self.cfg.GetMasterNode()
4979 if node.name == masternode:
4980 raise errors.OpPrereqError("Node is the master node, failover to another"
4981 " node is required", errors.ECODE_INVAL)
4983 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4984 if node.name in instance.all_nodes:
4985 raise errors.OpPrereqError("Instance %s is still running on the node,"
4986 " please remove first" % instance_name,
4988 self.op.node_name = node.name
4991 def Exec(self, feedback_fn):
4992 """Removes the node from the cluster.
4996 logging.info("Stopping the node daemon and removing configs from node %s",
4999 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5001 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5004 # Promote nodes to master candidate as needed
5005 _AdjustCandidatePool(self, exceptions=[node.name])
5006 self.context.RemoveNode(node.name)
5008 # Run post hooks on the node before it's removed
5009 _RunPostHook(self, node.name)
5011 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5012 msg = result.fail_msg
5014 self.LogWarning("Errors encountered on the remote node while leaving"
5015 " the cluster: %s", msg)
5017 # Remove node from our /etc/hosts
5018 if self.cfg.GetClusterInfo().modify_etc_hosts:
5019 master_node = self.cfg.GetMasterNode()
5020 result = self.rpc.call_etc_hosts_modify(master_node,
5021 constants.ETC_HOSTS_REMOVE,
5023 result.Raise("Can't update hosts file with new host data")
5024 _RedistributeAncillaryFiles(self)
5027 class _NodeQuery(_QueryBase):
5028 FIELDS = query.NODE_FIELDS
5030 def ExpandNames(self, lu):
5031 lu.needed_locks = {}
5032 lu.share_locks = _ShareAll()
5035 self.wanted = _GetWantedNodes(lu, self.names)
5037 self.wanted = locking.ALL_SET
5039 self.do_locking = (self.use_locking and
5040 query.NQ_LIVE in self.requested_data)
5043 # If any non-static field is requested we need to lock the nodes
5044 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5046 def DeclareLocks(self, lu, level):
5049 def _GetQueryData(self, lu):
5050 """Computes the list of nodes and their attributes.
5053 all_info = lu.cfg.GetAllNodesInfo()
5055 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5057 # Gather data as requested
5058 if query.NQ_LIVE in self.requested_data:
5059 # filter out non-vm_capable nodes
5060 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5062 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5063 [lu.cfg.GetHypervisorType()])
5064 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5065 for (name, nresult) in node_data.items()
5066 if not nresult.fail_msg and nresult.payload)
5070 if query.NQ_INST in self.requested_data:
5071 node_to_primary = dict([(name, set()) for name in nodenames])
5072 node_to_secondary = dict([(name, set()) for name in nodenames])
5074 inst_data = lu.cfg.GetAllInstancesInfo()
5076 for inst in inst_data.values():
5077 if inst.primary_node in node_to_primary:
5078 node_to_primary[inst.primary_node].add(inst.name)
5079 for secnode in inst.secondary_nodes:
5080 if secnode in node_to_secondary:
5081 node_to_secondary[secnode].add(inst.name)
5083 node_to_primary = None
5084 node_to_secondary = None
5086 if query.NQ_OOB in self.requested_data:
5087 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5088 for name, node in all_info.iteritems())
5092 if query.NQ_GROUP in self.requested_data:
5093 groups = lu.cfg.GetAllNodeGroupsInfo()
5097 return query.NodeQueryData([all_info[name] for name in nodenames],
5098 live_data, lu.cfg.GetMasterNode(),
5099 node_to_primary, node_to_secondary, groups,
5100 oob_support, lu.cfg.GetClusterInfo())
5103 class LUNodeQuery(NoHooksLU):
5104 """Logical unit for querying nodes.
5107 # pylint: disable=W0142
5110 def CheckArguments(self):
5111 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5112 self.op.output_fields, self.op.use_locking)
5114 def ExpandNames(self):
5115 self.nq.ExpandNames(self)
5117 def DeclareLocks(self, level):
5118 self.nq.DeclareLocks(self, level)
5120 def Exec(self, feedback_fn):
5121 return self.nq.OldStyleQuery(self)
5124 class LUNodeQueryvols(NoHooksLU):
5125 """Logical unit for getting volumes on node(s).
5129 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5130 _FIELDS_STATIC = utils.FieldSet("node")
5132 def CheckArguments(self):
5133 _CheckOutputFields(static=self._FIELDS_STATIC,
5134 dynamic=self._FIELDS_DYNAMIC,
5135 selected=self.op.output_fields)
5137 def ExpandNames(self):
5138 self.share_locks = _ShareAll()
5139 self.needed_locks = {}
5141 if not self.op.nodes:
5142 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5144 self.needed_locks[locking.LEVEL_NODE] = \
5145 _GetWantedNodes(self, self.op.nodes)
5147 def Exec(self, feedback_fn):
5148 """Computes the list of nodes and their attributes.
5151 nodenames = self.owned_locks(locking.LEVEL_NODE)
5152 volumes = self.rpc.call_node_volumes(nodenames)
5154 ilist = self.cfg.GetAllInstancesInfo()
5155 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5158 for node in nodenames:
5159 nresult = volumes[node]
5162 msg = nresult.fail_msg
5164 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5167 node_vols = sorted(nresult.payload,
5168 key=operator.itemgetter("dev"))
5170 for vol in node_vols:
5172 for field in self.op.output_fields:
5175 elif field == "phys":
5179 elif field == "name":
5181 elif field == "size":
5182 val = int(float(vol["size"]))
5183 elif field == "instance":
5184 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5186 raise errors.ParameterError(field)
5187 node_output.append(str(val))
5189 output.append(node_output)
5194 class LUNodeQueryStorage(NoHooksLU):
5195 """Logical unit for getting information on storage units on node(s).
5198 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5201 def CheckArguments(self):
5202 _CheckOutputFields(static=self._FIELDS_STATIC,
5203 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5204 selected=self.op.output_fields)
5206 def ExpandNames(self):
5207 self.share_locks = _ShareAll()
5208 self.needed_locks = {}
5211 self.needed_locks[locking.LEVEL_NODE] = \
5212 _GetWantedNodes(self, self.op.nodes)
5214 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5216 def Exec(self, feedback_fn):
5217 """Computes the list of nodes and their attributes.
5220 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5222 # Always get name to sort by
5223 if constants.SF_NAME in self.op.output_fields:
5224 fields = self.op.output_fields[:]
5226 fields = [constants.SF_NAME] + self.op.output_fields
5228 # Never ask for node or type as it's only known to the LU
5229 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5230 while extra in fields:
5231 fields.remove(extra)
5233 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5234 name_idx = field_idx[constants.SF_NAME]
5236 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5237 data = self.rpc.call_storage_list(self.nodes,
5238 self.op.storage_type, st_args,
5239 self.op.name, fields)
5243 for node in utils.NiceSort(self.nodes):
5244 nresult = data[node]
5248 msg = nresult.fail_msg
5250 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5253 rows = dict([(row[name_idx], row) for row in nresult.payload])
5255 for name in utils.NiceSort(rows.keys()):
5260 for field in self.op.output_fields:
5261 if field == constants.SF_NODE:
5263 elif field == constants.SF_TYPE:
5264 val = self.op.storage_type
5265 elif field in field_idx:
5266 val = row[field_idx[field]]
5268 raise errors.ParameterError(field)
5277 class _InstanceQuery(_QueryBase):
5278 FIELDS = query.INSTANCE_FIELDS
5280 def ExpandNames(self, lu):
5281 lu.needed_locks = {}
5282 lu.share_locks = _ShareAll()
5285 self.wanted = _GetWantedInstances(lu, self.names)
5287 self.wanted = locking.ALL_SET
5289 self.do_locking = (self.use_locking and
5290 query.IQ_LIVE in self.requested_data)
5292 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5293 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5294 lu.needed_locks[locking.LEVEL_NODE] = []
5295 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5297 self.do_grouplocks = (self.do_locking and
5298 query.IQ_NODES in self.requested_data)
5300 def DeclareLocks(self, lu, level):
5302 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5303 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5305 # Lock all groups used by instances optimistically; this requires going
5306 # via the node before it's locked, requiring verification later on
5307 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5309 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5310 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5311 elif level == locking.LEVEL_NODE:
5312 lu._LockInstancesNodes() # pylint: disable=W0212
5315 def _CheckGroupLocks(lu):
5316 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5317 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5319 # Check if node groups for locked instances are still correct
5320 for instance_name in owned_instances:
5321 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5323 def _GetQueryData(self, lu):
5324 """Computes the list of instances and their attributes.
5327 if self.do_grouplocks:
5328 self._CheckGroupLocks(lu)
5330 cluster = lu.cfg.GetClusterInfo()
5331 all_info = lu.cfg.GetAllInstancesInfo()
5333 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5335 instance_list = [all_info[name] for name in instance_names]
5336 nodes = frozenset(itertools.chain(*(inst.all_nodes
5337 for inst in instance_list)))
5338 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5341 wrongnode_inst = set()
5343 # Gather data as requested
5344 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5346 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5348 result = node_data[name]
5350 # offline nodes will be in both lists
5351 assert result.fail_msg
5352 offline_nodes.append(name)
5354 bad_nodes.append(name)
5355 elif result.payload:
5356 for inst in result.payload:
5357 if inst in all_info:
5358 if all_info[inst].primary_node == name:
5359 live_data.update(result.payload)
5361 wrongnode_inst.add(inst)
5363 # orphan instance; we don't list it here as we don't
5364 # handle this case yet in the output of instance listing
5365 logging.warning("Orphan instance '%s' found on node %s",
5367 # else no instance is alive
5371 if query.IQ_DISKUSAGE in self.requested_data:
5372 disk_usage = dict((inst.name,
5373 _ComputeDiskSize(inst.disk_template,
5374 [{constants.IDISK_SIZE: disk.size}
5375 for disk in inst.disks]))
5376 for inst in instance_list)
5380 if query.IQ_CONSOLE in self.requested_data:
5382 for inst in instance_list:
5383 if inst.name in live_data:
5384 # Instance is running
5385 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5387 consinfo[inst.name] = None
5388 assert set(consinfo.keys()) == set(instance_names)
5392 if query.IQ_NODES in self.requested_data:
5393 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5395 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5396 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5397 for uuid in set(map(operator.attrgetter("group"),
5403 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5404 disk_usage, offline_nodes, bad_nodes,
5405 live_data, wrongnode_inst, consinfo,
5409 class LUQuery(NoHooksLU):
5410 """Query for resources/items of a certain kind.
5413 # pylint: disable=W0142
5416 def CheckArguments(self):
5417 qcls = _GetQueryImplementation(self.op.what)
5419 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5421 def ExpandNames(self):
5422 self.impl.ExpandNames(self)
5424 def DeclareLocks(self, level):
5425 self.impl.DeclareLocks(self, level)
5427 def Exec(self, feedback_fn):
5428 return self.impl.NewStyleQuery(self)
5431 class LUQueryFields(NoHooksLU):
5432 """Query for resources/items of a certain kind.
5435 # pylint: disable=W0142
5438 def CheckArguments(self):
5439 self.qcls = _GetQueryImplementation(self.op.what)
5441 def ExpandNames(self):
5442 self.needed_locks = {}
5444 def Exec(self, feedback_fn):
5445 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5448 class LUNodeModifyStorage(NoHooksLU):
5449 """Logical unit for modifying a storage volume on a node.
5454 def CheckArguments(self):
5455 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5457 storage_type = self.op.storage_type
5460 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5462 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5463 " modified" % storage_type,
5466 diff = set(self.op.changes.keys()) - modifiable
5468 raise errors.OpPrereqError("The following fields can not be modified for"
5469 " storage units of type '%s': %r" %
5470 (storage_type, list(diff)),
5473 def ExpandNames(self):
5474 self.needed_locks = {
5475 locking.LEVEL_NODE: self.op.node_name,
5478 def Exec(self, feedback_fn):
5479 """Computes the list of nodes and their attributes.
5482 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5483 result = self.rpc.call_storage_modify(self.op.node_name,
5484 self.op.storage_type, st_args,
5485 self.op.name, self.op.changes)
5486 result.Raise("Failed to modify storage unit '%s' on %s" %
5487 (self.op.name, self.op.node_name))
5490 class LUNodeAdd(LogicalUnit):
5491 """Logical unit for adding node to the cluster.
5495 HTYPE = constants.HTYPE_NODE
5496 _NFLAGS = ["master_capable", "vm_capable"]
5498 def CheckArguments(self):
5499 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5500 # validate/normalize the node name
5501 self.hostname = netutils.GetHostname(name=self.op.node_name,
5502 family=self.primary_ip_family)
5503 self.op.node_name = self.hostname.name
5505 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5506 raise errors.OpPrereqError("Cannot readd the master node",
5509 if self.op.readd and self.op.group:
5510 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5511 " being readded", errors.ECODE_INVAL)
5513 def BuildHooksEnv(self):
5516 This will run on all nodes before, and on all nodes + the new node after.
5520 "OP_TARGET": self.op.node_name,
5521 "NODE_NAME": self.op.node_name,
5522 "NODE_PIP": self.op.primary_ip,
5523 "NODE_SIP": self.op.secondary_ip,
5524 "MASTER_CAPABLE": str(self.op.master_capable),
5525 "VM_CAPABLE": str(self.op.vm_capable),
5528 def BuildHooksNodes(self):
5529 """Build hooks nodes.
5532 # Exclude added node
5533 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5534 post_nodes = pre_nodes + [self.op.node_name, ]
5536 return (pre_nodes, post_nodes)
5538 def CheckPrereq(self):
5539 """Check prerequisites.
5542 - the new node is not already in the config
5544 - its parameters (single/dual homed) matches the cluster
5546 Any errors are signaled by raising errors.OpPrereqError.
5550 hostname = self.hostname
5551 node = hostname.name
5552 primary_ip = self.op.primary_ip = hostname.ip
5553 if self.op.secondary_ip is None:
5554 if self.primary_ip_family == netutils.IP6Address.family:
5555 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5556 " IPv4 address must be given as secondary",
5558 self.op.secondary_ip = primary_ip
5560 secondary_ip = self.op.secondary_ip
5561 if not netutils.IP4Address.IsValid(secondary_ip):
5562 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5563 " address" % secondary_ip, errors.ECODE_INVAL)
5565 node_list = cfg.GetNodeList()
5566 if not self.op.readd and node in node_list:
5567 raise errors.OpPrereqError("Node %s is already in the configuration" %
5568 node, errors.ECODE_EXISTS)
5569 elif self.op.readd and node not in node_list:
5570 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5573 self.changed_primary_ip = False
5575 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5576 if self.op.readd and node == existing_node_name:
5577 if existing_node.secondary_ip != secondary_ip:
5578 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5579 " address configuration as before",
5581 if existing_node.primary_ip != primary_ip:
5582 self.changed_primary_ip = True
5586 if (existing_node.primary_ip == primary_ip or
5587 existing_node.secondary_ip == primary_ip or
5588 existing_node.primary_ip == secondary_ip or
5589 existing_node.secondary_ip == secondary_ip):
5590 raise errors.OpPrereqError("New node ip address(es) conflict with"
5591 " existing node %s" % existing_node.name,
5592 errors.ECODE_NOTUNIQUE)
5594 # After this 'if' block, None is no longer a valid value for the
5595 # _capable op attributes
5597 old_node = self.cfg.GetNodeInfo(node)
5598 assert old_node is not None, "Can't retrieve locked node %s" % node
5599 for attr in self._NFLAGS:
5600 if getattr(self.op, attr) is None:
5601 setattr(self.op, attr, getattr(old_node, attr))
5603 for attr in self._NFLAGS:
5604 if getattr(self.op, attr) is None:
5605 setattr(self.op, attr, True)
5607 if self.op.readd and not self.op.vm_capable:
5608 pri, sec = cfg.GetNodeInstances(node)
5610 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5611 " flag set to false, but it already holds"
5612 " instances" % node,
5615 # check that the type of the node (single versus dual homed) is the
5616 # same as for the master
5617 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5618 master_singlehomed = myself.secondary_ip == myself.primary_ip
5619 newbie_singlehomed = secondary_ip == primary_ip
5620 if master_singlehomed != newbie_singlehomed:
5621 if master_singlehomed:
5622 raise errors.OpPrereqError("The master has no secondary ip but the"
5623 " new node has one",
5626 raise errors.OpPrereqError("The master has a secondary ip but the"
5627 " new node doesn't have one",
5630 # checks reachability
5631 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5632 raise errors.OpPrereqError("Node not reachable by ping",
5633 errors.ECODE_ENVIRON)
5635 if not newbie_singlehomed:
5636 # check reachability from my secondary ip to newbie's secondary ip
5637 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5638 source=myself.secondary_ip):
5639 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5640 " based ping to node daemon port",
5641 errors.ECODE_ENVIRON)
5648 if self.op.master_capable:
5649 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5651 self.master_candidate = False
5654 self.new_node = old_node
5656 node_group = cfg.LookupNodeGroup(self.op.group)
5657 self.new_node = objects.Node(name=node,
5658 primary_ip=primary_ip,
5659 secondary_ip=secondary_ip,
5660 master_candidate=self.master_candidate,
5661 offline=False, drained=False,
5664 if self.op.ndparams:
5665 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5667 if self.op.hv_state:
5668 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5670 if self.op.disk_state:
5671 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5673 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5674 # it a property on the base class.
5675 result = rpc.DnsOnlyRunner().call_version([node])[node]
5676 result.Raise("Can't get version information from node %s" % node)
5677 if constants.PROTOCOL_VERSION == result.payload:
5678 logging.info("Communication to node %s fine, sw version %s match",
5679 node, result.payload)
5681 raise errors.OpPrereqError("Version mismatch master version %s,"
5682 " node version %s" %
5683 (constants.PROTOCOL_VERSION, result.payload),
5684 errors.ECODE_ENVIRON)
5686 def Exec(self, feedback_fn):
5687 """Adds the new node to the cluster.
5690 new_node = self.new_node
5691 node = new_node.name
5693 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5696 # We adding a new node so we assume it's powered
5697 new_node.powered = True
5699 # for re-adds, reset the offline/drained/master-candidate flags;
5700 # we need to reset here, otherwise offline would prevent RPC calls
5701 # later in the procedure; this also means that if the re-add
5702 # fails, we are left with a non-offlined, broken node
5704 new_node.drained = new_node.offline = False # pylint: disable=W0201
5705 self.LogInfo("Readding a node, the offline/drained flags were reset")
5706 # if we demote the node, we do cleanup later in the procedure
5707 new_node.master_candidate = self.master_candidate
5708 if self.changed_primary_ip:
5709 new_node.primary_ip = self.op.primary_ip
5711 # copy the master/vm_capable flags
5712 for attr in self._NFLAGS:
5713 setattr(new_node, attr, getattr(self.op, attr))
5715 # notify the user about any possible mc promotion
5716 if new_node.master_candidate:
5717 self.LogInfo("Node will be a master candidate")
5719 if self.op.ndparams:
5720 new_node.ndparams = self.op.ndparams
5722 new_node.ndparams = {}
5724 if self.op.hv_state:
5725 new_node.hv_state_static = self.new_hv_state
5727 if self.op.disk_state:
5728 new_node.disk_state_static = self.new_disk_state
5730 # Add node to our /etc/hosts, and add key to known_hosts
5731 if self.cfg.GetClusterInfo().modify_etc_hosts:
5732 master_node = self.cfg.GetMasterNode()
5733 result = self.rpc.call_etc_hosts_modify(master_node,
5734 constants.ETC_HOSTS_ADD,
5737 result.Raise("Can't update hosts file with new host data")
5739 if new_node.secondary_ip != new_node.primary_ip:
5740 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5743 node_verify_list = [self.cfg.GetMasterNode()]
5744 node_verify_param = {
5745 constants.NV_NODELIST: ([node], {}),
5746 # TODO: do a node-net-test as well?
5749 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5750 self.cfg.GetClusterName())
5751 for verifier in node_verify_list:
5752 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5753 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5755 for failed in nl_payload:
5756 feedback_fn("ssh/hostname verification failed"
5757 " (checking from %s): %s" %
5758 (verifier, nl_payload[failed]))
5759 raise errors.OpExecError("ssh/hostname verification failed")
5762 _RedistributeAncillaryFiles(self)
5763 self.context.ReaddNode(new_node)
5764 # make sure we redistribute the config
5765 self.cfg.Update(new_node, feedback_fn)
5766 # and make sure the new node will not have old files around
5767 if not new_node.master_candidate:
5768 result = self.rpc.call_node_demote_from_mc(new_node.name)
5769 msg = result.fail_msg
5771 self.LogWarning("Node failed to demote itself from master"
5772 " candidate status: %s" % msg)
5774 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5775 additional_vm=self.op.vm_capable)
5776 self.context.AddNode(new_node, self.proc.GetECId())
5779 class LUNodeSetParams(LogicalUnit):
5780 """Modifies the parameters of a node.
5782 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5783 to the node role (as _ROLE_*)
5784 @cvar _R2F: a dictionary from node role to tuples of flags
5785 @cvar _FLAGS: a list of attribute names corresponding to the flags
5788 HPATH = "node-modify"
5789 HTYPE = constants.HTYPE_NODE
5791 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5793 (True, False, False): _ROLE_CANDIDATE,
5794 (False, True, False): _ROLE_DRAINED,
5795 (False, False, True): _ROLE_OFFLINE,
5796 (False, False, False): _ROLE_REGULAR,
5798 _R2F = dict((v, k) for k, v in _F2R.items())
5799 _FLAGS = ["master_candidate", "drained", "offline"]
5801 def CheckArguments(self):
5802 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5803 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5804 self.op.master_capable, self.op.vm_capable,
5805 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5807 if all_mods.count(None) == len(all_mods):
5808 raise errors.OpPrereqError("Please pass at least one modification",
5810 if all_mods.count(True) > 1:
5811 raise errors.OpPrereqError("Can't set the node into more than one"
5812 " state at the same time",
5815 # Boolean value that tells us whether we might be demoting from MC
5816 self.might_demote = (self.op.master_candidate == False or
5817 self.op.offline == True or
5818 self.op.drained == True or
5819 self.op.master_capable == False)
5821 if self.op.secondary_ip:
5822 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5823 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5824 " address" % self.op.secondary_ip,
5827 self.lock_all = self.op.auto_promote and self.might_demote
5828 self.lock_instances = self.op.secondary_ip is not None
5830 def _InstanceFilter(self, instance):
5831 """Filter for getting affected instances.
5834 return (instance.disk_template in constants.DTS_INT_MIRROR and
5835 self.op.node_name in instance.all_nodes)
5837 def ExpandNames(self):
5839 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5841 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5843 # Since modifying a node can have severe effects on currently running
5844 # operations the resource lock is at least acquired in shared mode
5845 self.needed_locks[locking.LEVEL_NODE_RES] = \
5846 self.needed_locks[locking.LEVEL_NODE]
5848 # Get node resource and instance locks in shared mode; they are not used
5849 # for anything but read-only access
5850 self.share_locks[locking.LEVEL_NODE_RES] = 1
5851 self.share_locks[locking.LEVEL_INSTANCE] = 1
5853 if self.lock_instances:
5854 self.needed_locks[locking.LEVEL_INSTANCE] = \
5855 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5857 def BuildHooksEnv(self):
5860 This runs on the master node.
5864 "OP_TARGET": self.op.node_name,
5865 "MASTER_CANDIDATE": str(self.op.master_candidate),
5866 "OFFLINE": str(self.op.offline),
5867 "DRAINED": str(self.op.drained),
5868 "MASTER_CAPABLE": str(self.op.master_capable),
5869 "VM_CAPABLE": str(self.op.vm_capable),
5872 def BuildHooksNodes(self):
5873 """Build hooks nodes.
5876 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5879 def CheckPrereq(self):
5880 """Check prerequisites.
5882 This only checks the instance list against the existing names.
5885 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5887 if self.lock_instances:
5888 affected_instances = \
5889 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5891 # Verify instance locks
5892 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5893 wanted_instances = frozenset(affected_instances.keys())
5894 if wanted_instances - owned_instances:
5895 raise errors.OpPrereqError("Instances affected by changing node %s's"
5896 " secondary IP address have changed since"
5897 " locks were acquired, wanted '%s', have"
5898 " '%s'; retry the operation" %
5900 utils.CommaJoin(wanted_instances),
5901 utils.CommaJoin(owned_instances)),
5904 affected_instances = None
5906 if (self.op.master_candidate is not None or
5907 self.op.drained is not None or
5908 self.op.offline is not None):
5909 # we can't change the master's node flags
5910 if self.op.node_name == self.cfg.GetMasterNode():
5911 raise errors.OpPrereqError("The master role can be changed"
5912 " only via master-failover",
5915 if self.op.master_candidate and not node.master_capable:
5916 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5917 " it a master candidate" % node.name,
5920 if self.op.vm_capable == False:
5921 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5923 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5924 " the vm_capable flag" % node.name,
5927 if node.master_candidate and self.might_demote and not self.lock_all:
5928 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5929 # check if after removing the current node, we're missing master
5931 (mc_remaining, mc_should, _) = \
5932 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5933 if mc_remaining < mc_should:
5934 raise errors.OpPrereqError("Not enough master candidates, please"
5935 " pass auto promote option to allow"
5936 " promotion (--auto-promote or RAPI"
5937 " auto_promote=True)", errors.ECODE_STATE)
5939 self.old_flags = old_flags = (node.master_candidate,
5940 node.drained, node.offline)
5941 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5942 self.old_role = old_role = self._F2R[old_flags]
5944 # Check for ineffective changes
5945 for attr in self._FLAGS:
5946 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5947 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5948 setattr(self.op, attr, None)
5950 # Past this point, any flag change to False means a transition
5951 # away from the respective state, as only real changes are kept
5953 # TODO: We might query the real power state if it supports OOB
5954 if _SupportsOob(self.cfg, node):
5955 if self.op.offline is False and not (node.powered or
5956 self.op.powered == True):
5957 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5958 " offline status can be reset") %
5960 elif self.op.powered is not None:
5961 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5962 " as it does not support out-of-band"
5963 " handling") % self.op.node_name)
5965 # If we're being deofflined/drained, we'll MC ourself if needed
5966 if (self.op.drained == False or self.op.offline == False or
5967 (self.op.master_capable and not node.master_capable)):
5968 if _DecideSelfPromotion(self):
5969 self.op.master_candidate = True
5970 self.LogInfo("Auto-promoting node to master candidate")
5972 # If we're no longer master capable, we'll demote ourselves from MC
5973 if self.op.master_capable == False and node.master_candidate:
5974 self.LogInfo("Demoting from master candidate")
5975 self.op.master_candidate = False
5978 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5979 if self.op.master_candidate:
5980 new_role = self._ROLE_CANDIDATE
5981 elif self.op.drained:
5982 new_role = self._ROLE_DRAINED
5983 elif self.op.offline:
5984 new_role = self._ROLE_OFFLINE
5985 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5986 # False is still in new flags, which means we're un-setting (the
5988 new_role = self._ROLE_REGULAR
5989 else: # no new flags, nothing, keep old role
5992 self.new_role = new_role
5994 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5995 # Trying to transition out of offline status
5996 result = self.rpc.call_version([node.name])[node.name]
5998 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5999 " to report its version: %s" %
6000 (node.name, result.fail_msg),
6003 self.LogWarning("Transitioning node from offline to online state"
6004 " without using re-add. Please make sure the node"
6007 if self.op.secondary_ip:
6008 # Ok even without locking, because this can't be changed by any LU
6009 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6010 master_singlehomed = master.secondary_ip == master.primary_ip
6011 if master_singlehomed and self.op.secondary_ip:
6012 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6013 " homed cluster", errors.ECODE_INVAL)
6015 assert not (frozenset(affected_instances) -
6016 self.owned_locks(locking.LEVEL_INSTANCE))
6019 if affected_instances:
6020 raise errors.OpPrereqError("Cannot change secondary IP address:"
6021 " offline node has instances (%s)"
6022 " configured to use it" %
6023 utils.CommaJoin(affected_instances.keys()))
6025 # On online nodes, check that no instances are running, and that
6026 # the node has the new ip and we can reach it.
6027 for instance in affected_instances.values():
6028 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6029 msg="cannot change secondary ip")
6031 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6032 if master.name != node.name:
6033 # check reachability from master secondary ip to new secondary ip
6034 if not netutils.TcpPing(self.op.secondary_ip,
6035 constants.DEFAULT_NODED_PORT,
6036 source=master.secondary_ip):
6037 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6038 " based ping to node daemon port",
6039 errors.ECODE_ENVIRON)
6041 if self.op.ndparams:
6042 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6043 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6044 self.new_ndparams = new_ndparams
6046 if self.op.hv_state:
6047 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6048 self.node.hv_state_static)
6050 if self.op.disk_state:
6051 self.new_disk_state = \
6052 _MergeAndVerifyDiskState(self.op.disk_state,
6053 self.node.disk_state_static)
6055 def Exec(self, feedback_fn):
6060 old_role = self.old_role
6061 new_role = self.new_role
6065 if self.op.ndparams:
6066 node.ndparams = self.new_ndparams
6068 if self.op.powered is not None:
6069 node.powered = self.op.powered
6071 if self.op.hv_state:
6072 node.hv_state_static = self.new_hv_state
6074 if self.op.disk_state:
6075 node.disk_state_static = self.new_disk_state
6077 for attr in ["master_capable", "vm_capable"]:
6078 val = getattr(self.op, attr)
6080 setattr(node, attr, val)
6081 result.append((attr, str(val)))
6083 if new_role != old_role:
6084 # Tell the node to demote itself, if no longer MC and not offline
6085 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6086 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6088 self.LogWarning("Node failed to demote itself: %s", msg)
6090 new_flags = self._R2F[new_role]
6091 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6093 result.append((desc, str(nf)))
6094 (node.master_candidate, node.drained, node.offline) = new_flags
6096 # we locked all nodes, we adjust the CP before updating this node
6098 _AdjustCandidatePool(self, [node.name])
6100 if self.op.secondary_ip:
6101 node.secondary_ip = self.op.secondary_ip
6102 result.append(("secondary_ip", self.op.secondary_ip))
6104 # this will trigger configuration file update, if needed
6105 self.cfg.Update(node, feedback_fn)
6107 # this will trigger job queue propagation or cleanup if the mc
6109 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6110 self.context.ReaddNode(node)
6115 class LUNodePowercycle(NoHooksLU):
6116 """Powercycles a node.
6121 def CheckArguments(self):
6122 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6123 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6124 raise errors.OpPrereqError("The node is the master and the force"
6125 " parameter was not set",
6128 def ExpandNames(self):
6129 """Locking for PowercycleNode.
6131 This is a last-resort option and shouldn't block on other
6132 jobs. Therefore, we grab no locks.
6135 self.needed_locks = {}
6137 def Exec(self, feedback_fn):
6141 result = self.rpc.call_node_powercycle(self.op.node_name,
6142 self.cfg.GetHypervisorType())
6143 result.Raise("Failed to schedule the reboot")
6144 return result.payload
6147 class LUClusterQuery(NoHooksLU):
6148 """Query cluster configuration.
6153 def ExpandNames(self):
6154 self.needed_locks = {}
6156 def Exec(self, feedback_fn):
6157 """Return cluster config.
6160 cluster = self.cfg.GetClusterInfo()
6163 # Filter just for enabled hypervisors
6164 for os_name, hv_dict in cluster.os_hvp.items():
6165 os_hvp[os_name] = {}
6166 for hv_name, hv_params in hv_dict.items():
6167 if hv_name in cluster.enabled_hypervisors:
6168 os_hvp[os_name][hv_name] = hv_params
6170 # Convert ip_family to ip_version
6171 primary_ip_version = constants.IP4_VERSION
6172 if cluster.primary_ip_family == netutils.IP6Address.family:
6173 primary_ip_version = constants.IP6_VERSION
6176 "software_version": constants.RELEASE_VERSION,
6177 "protocol_version": constants.PROTOCOL_VERSION,
6178 "config_version": constants.CONFIG_VERSION,
6179 "os_api_version": max(constants.OS_API_VERSIONS),
6180 "export_version": constants.EXPORT_VERSION,
6181 "architecture": runtime.GetArchInfo(),
6182 "name": cluster.cluster_name,
6183 "master": cluster.master_node,
6184 "default_hypervisor": cluster.primary_hypervisor,
6185 "enabled_hypervisors": cluster.enabled_hypervisors,
6186 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6187 for hypervisor_name in cluster.enabled_hypervisors]),
6189 "beparams": cluster.beparams,
6190 "osparams": cluster.osparams,
6191 "ipolicy": cluster.ipolicy,
6192 "nicparams": cluster.nicparams,
6193 "ndparams": cluster.ndparams,
6194 "diskparams": cluster.diskparams,
6195 "candidate_pool_size": cluster.candidate_pool_size,
6196 "master_netdev": cluster.master_netdev,
6197 "master_netmask": cluster.master_netmask,
6198 "use_external_mip_script": cluster.use_external_mip_script,
6199 "volume_group_name": cluster.volume_group_name,
6200 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6201 "file_storage_dir": cluster.file_storage_dir,
6202 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6203 "maintain_node_health": cluster.maintain_node_health,
6204 "ctime": cluster.ctime,
6205 "mtime": cluster.mtime,
6206 "uuid": cluster.uuid,
6207 "tags": list(cluster.GetTags()),
6208 "uid_pool": cluster.uid_pool,
6209 "default_iallocator": cluster.default_iallocator,
6210 "reserved_lvs": cluster.reserved_lvs,
6211 "primary_ip_version": primary_ip_version,
6212 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6213 "hidden_os": cluster.hidden_os,
6214 "blacklisted_os": cluster.blacklisted_os,
6220 class LUClusterConfigQuery(NoHooksLU):
6221 """Return configuration values.
6226 def CheckArguments(self):
6227 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6229 def ExpandNames(self):
6230 self.cq.ExpandNames(self)
6232 def DeclareLocks(self, level):
6233 self.cq.DeclareLocks(self, level)
6235 def Exec(self, feedback_fn):
6236 result = self.cq.OldStyleQuery(self)
6238 assert len(result) == 1
6243 class _ClusterQuery(_QueryBase):
6244 FIELDS = query.CLUSTER_FIELDS
6246 #: Do not sort (there is only one item)
6249 def ExpandNames(self, lu):
6250 lu.needed_locks = {}
6252 # The following variables interact with _QueryBase._GetNames
6253 self.wanted = locking.ALL_SET
6254 self.do_locking = self.use_locking
6257 raise errors.OpPrereqError("Can not use locking for cluster queries",
6260 def DeclareLocks(self, lu, level):
6263 def _GetQueryData(self, lu):
6264 """Computes the list of nodes and their attributes.
6267 # Locking is not used
6268 assert not (compat.any(lu.glm.is_owned(level)
6269 for level in locking.LEVELS
6270 if level != locking.LEVEL_CLUSTER) or
6271 self.do_locking or self.use_locking)
6273 if query.CQ_CONFIG in self.requested_data:
6274 cluster = lu.cfg.GetClusterInfo()
6276 cluster = NotImplemented
6278 if query.CQ_QUEUE_DRAINED in self.requested_data:
6279 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6281 drain_flag = NotImplemented
6283 if query.CQ_WATCHER_PAUSE in self.requested_data:
6284 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6286 watcher_pause = NotImplemented
6288 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6291 class LUInstanceActivateDisks(NoHooksLU):
6292 """Bring up an instance's disks.
6297 def ExpandNames(self):
6298 self._ExpandAndLockInstance()
6299 self.needed_locks[locking.LEVEL_NODE] = []
6300 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6302 def DeclareLocks(self, level):
6303 if level == locking.LEVEL_NODE:
6304 self._LockInstancesNodes()
6306 def CheckPrereq(self):
6307 """Check prerequisites.
6309 This checks that the instance is in the cluster.
6312 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6313 assert self.instance is not None, \
6314 "Cannot retrieve locked instance %s" % self.op.instance_name
6315 _CheckNodeOnline(self, self.instance.primary_node)
6317 def Exec(self, feedback_fn):
6318 """Activate the disks.
6321 disks_ok, disks_info = \
6322 _AssembleInstanceDisks(self, self.instance,
6323 ignore_size=self.op.ignore_size)
6325 raise errors.OpExecError("Cannot activate block devices")
6330 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6332 """Prepare the block devices for an instance.
6334 This sets up the block devices on all nodes.
6336 @type lu: L{LogicalUnit}
6337 @param lu: the logical unit on whose behalf we execute
6338 @type instance: L{objects.Instance}
6339 @param instance: the instance for whose disks we assemble
6340 @type disks: list of L{objects.Disk} or None
6341 @param disks: which disks to assemble (or all, if None)
6342 @type ignore_secondaries: boolean
6343 @param ignore_secondaries: if true, errors on secondary nodes
6344 won't result in an error return from the function
6345 @type ignore_size: boolean
6346 @param ignore_size: if true, the current known size of the disk
6347 will not be used during the disk activation, useful for cases
6348 when the size is wrong
6349 @return: False if the operation failed, otherwise a list of
6350 (host, instance_visible_name, node_visible_name)
6351 with the mapping from node devices to instance devices
6356 iname = instance.name
6357 disks = _ExpandCheckDisks(instance, disks)
6359 # With the two passes mechanism we try to reduce the window of
6360 # opportunity for the race condition of switching DRBD to primary
6361 # before handshaking occured, but we do not eliminate it
6363 # The proper fix would be to wait (with some limits) until the
6364 # connection has been made and drbd transitions from WFConnection
6365 # into any other network-connected state (Connected, SyncTarget,
6368 # 1st pass, assemble on all nodes in secondary mode
6369 for idx, inst_disk in enumerate(disks):
6370 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6372 node_disk = node_disk.Copy()
6373 node_disk.UnsetSize()
6374 lu.cfg.SetDiskID(node_disk, node)
6375 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6377 msg = result.fail_msg
6379 is_offline_secondary = (node in instance.secondary_nodes and
6381 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6382 " (is_primary=False, pass=1): %s",
6383 inst_disk.iv_name, node, msg)
6384 if not (ignore_secondaries or is_offline_secondary):
6387 # FIXME: race condition on drbd migration to primary
6389 # 2nd pass, do only the primary node
6390 for idx, inst_disk in enumerate(disks):
6393 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6394 if node != instance.primary_node:
6397 node_disk = node_disk.Copy()
6398 node_disk.UnsetSize()
6399 lu.cfg.SetDiskID(node_disk, node)
6400 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6402 msg = result.fail_msg
6404 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6405 " (is_primary=True, pass=2): %s",
6406 inst_disk.iv_name, node, msg)
6409 dev_path = result.payload
6411 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6413 # leave the disks configured for the primary node
6414 # this is a workaround that would be fixed better by
6415 # improving the logical/physical id handling
6417 lu.cfg.SetDiskID(disk, instance.primary_node)
6419 return disks_ok, device_info
6422 def _StartInstanceDisks(lu, instance, force):
6423 """Start the disks of an instance.
6426 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6427 ignore_secondaries=force)
6429 _ShutdownInstanceDisks(lu, instance)
6430 if force is not None and not force:
6431 lu.proc.LogWarning("", hint="If the message above refers to a"
6433 " you can retry the operation using '--force'.")
6434 raise errors.OpExecError("Disk consistency error")
6437 class LUInstanceDeactivateDisks(NoHooksLU):
6438 """Shutdown an instance's disks.
6443 def ExpandNames(self):
6444 self._ExpandAndLockInstance()
6445 self.needed_locks[locking.LEVEL_NODE] = []
6446 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6448 def DeclareLocks(self, level):
6449 if level == locking.LEVEL_NODE:
6450 self._LockInstancesNodes()
6452 def CheckPrereq(self):
6453 """Check prerequisites.
6455 This checks that the instance is in the cluster.
6458 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6459 assert self.instance is not None, \
6460 "Cannot retrieve locked instance %s" % self.op.instance_name
6462 def Exec(self, feedback_fn):
6463 """Deactivate the disks
6466 instance = self.instance
6468 _ShutdownInstanceDisks(self, instance)
6470 _SafeShutdownInstanceDisks(self, instance)
6473 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6474 """Shutdown block devices of an instance.
6476 This function checks if an instance is running, before calling
6477 _ShutdownInstanceDisks.
6480 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6481 _ShutdownInstanceDisks(lu, instance, disks=disks)
6484 def _ExpandCheckDisks(instance, disks):
6485 """Return the instance disks selected by the disks list
6487 @type disks: list of L{objects.Disk} or None
6488 @param disks: selected disks
6489 @rtype: list of L{objects.Disk}
6490 @return: selected instance disks to act on
6494 return instance.disks
6496 if not set(disks).issubset(instance.disks):
6497 raise errors.ProgrammerError("Can only act on disks belonging to the"
6502 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6503 """Shutdown block devices of an instance.
6505 This does the shutdown on all nodes of the instance.
6507 If the ignore_primary is false, errors on the primary node are
6512 disks = _ExpandCheckDisks(instance, disks)
6515 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6516 lu.cfg.SetDiskID(top_disk, node)
6517 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6518 msg = result.fail_msg
6520 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6521 disk.iv_name, node, msg)
6522 if ((node == instance.primary_node and not ignore_primary) or
6523 (node != instance.primary_node and not result.offline)):
6528 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6529 """Checks if a node has enough free memory.
6531 This function check if a given node has the needed amount of free
6532 memory. In case the node has less memory or we cannot get the
6533 information from the node, this function raise an OpPrereqError
6536 @type lu: C{LogicalUnit}
6537 @param lu: a logical unit from which we get configuration data
6539 @param node: the node to check
6540 @type reason: C{str}
6541 @param reason: string to use in the error message
6542 @type requested: C{int}
6543 @param requested: the amount of memory in MiB to check for
6544 @type hypervisor_name: C{str}
6545 @param hypervisor_name: the hypervisor to ask for memory stats
6547 @return: node current free memory
6548 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6549 we cannot check the node
6552 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6553 nodeinfo[node].Raise("Can't get data from node %s" % node,
6554 prereq=True, ecode=errors.ECODE_ENVIRON)
6555 (_, _, (hv_info, )) = nodeinfo[node].payload
6557 free_mem = hv_info.get("memory_free", None)
6558 if not isinstance(free_mem, int):
6559 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6560 " was '%s'" % (node, free_mem),
6561 errors.ECODE_ENVIRON)
6562 if requested > free_mem:
6563 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6564 " needed %s MiB, available %s MiB" %
6565 (node, reason, requested, free_mem),
6570 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6571 """Checks if nodes have enough free disk space in the all VGs.
6573 This function check if all given nodes have the needed amount of
6574 free disk. In case any node has less disk or we cannot get the
6575 information from the node, this function raise an OpPrereqError
6578 @type lu: C{LogicalUnit}
6579 @param lu: a logical unit from which we get configuration data
6580 @type nodenames: C{list}
6581 @param nodenames: the list of node names to check
6582 @type req_sizes: C{dict}
6583 @param req_sizes: the hash of vg and corresponding amount of disk in
6585 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6586 or we cannot check the node
6589 for vg, req_size in req_sizes.items():
6590 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6593 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6594 """Checks if nodes have enough free disk space in the specified VG.
6596 This function check if all given nodes have the needed amount of
6597 free disk. In case any node has less disk or we cannot get the
6598 information from the node, this function raise an OpPrereqError
6601 @type lu: C{LogicalUnit}
6602 @param lu: a logical unit from which we get configuration data
6603 @type nodenames: C{list}
6604 @param nodenames: the list of node names to check
6606 @param vg: the volume group to check
6607 @type requested: C{int}
6608 @param requested: the amount of disk in MiB to check for
6609 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6610 or we cannot check the node
6613 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6614 for node in nodenames:
6615 info = nodeinfo[node]
6616 info.Raise("Cannot get current information from node %s" % node,
6617 prereq=True, ecode=errors.ECODE_ENVIRON)
6618 (_, (vg_info, ), _) = info.payload
6619 vg_free = vg_info.get("vg_free", None)
6620 if not isinstance(vg_free, int):
6621 raise errors.OpPrereqError("Can't compute free disk space on node"
6622 " %s for vg %s, result was '%s'" %
6623 (node, vg, vg_free), errors.ECODE_ENVIRON)
6624 if requested > vg_free:
6625 raise errors.OpPrereqError("Not enough disk space on target node %s"
6626 " vg %s: required %d MiB, available %d MiB" %
6627 (node, vg, requested, vg_free),
6631 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6632 """Checks if nodes have enough physical CPUs
6634 This function checks if all given nodes have the needed number of
6635 physical CPUs. In case any node has less CPUs or we cannot get the
6636 information from the node, this function raises an OpPrereqError
6639 @type lu: C{LogicalUnit}
6640 @param lu: a logical unit from which we get configuration data
6641 @type nodenames: C{list}
6642 @param nodenames: the list of node names to check
6643 @type requested: C{int}
6644 @param requested: the minimum acceptable number of physical CPUs
6645 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6646 or we cannot check the node
6649 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6650 for node in nodenames:
6651 info = nodeinfo[node]
6652 info.Raise("Cannot get current information from node %s" % node,
6653 prereq=True, ecode=errors.ECODE_ENVIRON)
6654 (_, _, (hv_info, )) = info.payload
6655 num_cpus = hv_info.get("cpu_total", None)
6656 if not isinstance(num_cpus, int):
6657 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6658 " on node %s, result was '%s'" %
6659 (node, num_cpus), errors.ECODE_ENVIRON)
6660 if requested > num_cpus:
6661 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6662 "required" % (node, num_cpus, requested),
6666 class LUInstanceStartup(LogicalUnit):
6667 """Starts an instance.
6670 HPATH = "instance-start"
6671 HTYPE = constants.HTYPE_INSTANCE
6674 def CheckArguments(self):
6676 if self.op.beparams:
6677 # fill the beparams dict
6678 objects.UpgradeBeParams(self.op.beparams)
6679 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6681 def ExpandNames(self):
6682 self._ExpandAndLockInstance()
6683 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6685 def DeclareLocks(self, level):
6686 if level == locking.LEVEL_NODE_RES:
6687 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6689 def BuildHooksEnv(self):
6692 This runs on master, primary and secondary nodes of the instance.
6696 "FORCE": self.op.force,
6699 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6703 def BuildHooksNodes(self):
6704 """Build hooks nodes.
6707 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6710 def CheckPrereq(self):
6711 """Check prerequisites.
6713 This checks that the instance is in the cluster.
6716 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6717 assert self.instance is not None, \
6718 "Cannot retrieve locked instance %s" % self.op.instance_name
6721 if self.op.hvparams:
6722 # check hypervisor parameter syntax (locally)
6723 cluster = self.cfg.GetClusterInfo()
6724 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6725 filled_hvp = cluster.FillHV(instance)
6726 filled_hvp.update(self.op.hvparams)
6727 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6728 hv_type.CheckParameterSyntax(filled_hvp)
6729 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6731 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6733 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6735 if self.primary_offline and self.op.ignore_offline_nodes:
6736 self.proc.LogWarning("Ignoring offline primary node")
6738 if self.op.hvparams or self.op.beparams:
6739 self.proc.LogWarning("Overridden parameters are ignored")
6741 _CheckNodeOnline(self, instance.primary_node)
6743 bep = self.cfg.GetClusterInfo().FillBE(instance)
6744 bep.update(self.op.beparams)
6746 # check bridges existence
6747 _CheckInstanceBridgesExist(self, instance)
6749 remote_info = self.rpc.call_instance_info(instance.primary_node,
6751 instance.hypervisor)
6752 remote_info.Raise("Error checking node %s" % instance.primary_node,
6753 prereq=True, ecode=errors.ECODE_ENVIRON)
6754 if not remote_info.payload: # not running already
6755 _CheckNodeFreeMemory(self, instance.primary_node,
6756 "starting instance %s" % instance.name,
6757 bep[constants.BE_MINMEM], instance.hypervisor)
6759 def Exec(self, feedback_fn):
6760 """Start the instance.
6763 instance = self.instance
6764 force = self.op.force
6766 if not self.op.no_remember:
6767 self.cfg.MarkInstanceUp(instance.name)
6769 if self.primary_offline:
6770 assert self.op.ignore_offline_nodes
6771 self.proc.LogInfo("Primary node offline, marked instance as started")
6773 node_current = instance.primary_node
6775 _StartInstanceDisks(self, instance, force)
6778 self.rpc.call_instance_start(node_current,
6779 (instance, self.op.hvparams,
6781 self.op.startup_paused)
6782 msg = result.fail_msg
6784 _ShutdownInstanceDisks(self, instance)
6785 raise errors.OpExecError("Could not start instance: %s" % msg)
6788 class LUInstanceReboot(LogicalUnit):
6789 """Reboot an instance.
6792 HPATH = "instance-reboot"
6793 HTYPE = constants.HTYPE_INSTANCE
6796 def ExpandNames(self):
6797 self._ExpandAndLockInstance()
6799 def BuildHooksEnv(self):
6802 This runs on master, primary and secondary nodes of the instance.
6806 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6807 "REBOOT_TYPE": self.op.reboot_type,
6808 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6811 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6815 def BuildHooksNodes(self):
6816 """Build hooks nodes.
6819 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6822 def CheckPrereq(self):
6823 """Check prerequisites.
6825 This checks that the instance is in the cluster.
6828 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6829 assert self.instance is not None, \
6830 "Cannot retrieve locked instance %s" % self.op.instance_name
6831 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6832 _CheckNodeOnline(self, instance.primary_node)
6834 # check bridges existence
6835 _CheckInstanceBridgesExist(self, instance)
6837 def Exec(self, feedback_fn):
6838 """Reboot the instance.
6841 instance = self.instance
6842 ignore_secondaries = self.op.ignore_secondaries
6843 reboot_type = self.op.reboot_type
6845 remote_info = self.rpc.call_instance_info(instance.primary_node,
6847 instance.hypervisor)
6848 remote_info.Raise("Error checking node %s" % instance.primary_node)
6849 instance_running = bool(remote_info.payload)
6851 node_current = instance.primary_node
6853 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6854 constants.INSTANCE_REBOOT_HARD]:
6855 for disk in instance.disks:
6856 self.cfg.SetDiskID(disk, node_current)
6857 result = self.rpc.call_instance_reboot(node_current, instance,
6859 self.op.shutdown_timeout)
6860 result.Raise("Could not reboot instance")
6862 if instance_running:
6863 result = self.rpc.call_instance_shutdown(node_current, instance,
6864 self.op.shutdown_timeout)
6865 result.Raise("Could not shutdown instance for full reboot")
6866 _ShutdownInstanceDisks(self, instance)
6868 self.LogInfo("Instance %s was already stopped, starting now",
6870 _StartInstanceDisks(self, instance, ignore_secondaries)
6871 result = self.rpc.call_instance_start(node_current,
6872 (instance, None, None), False)
6873 msg = result.fail_msg
6875 _ShutdownInstanceDisks(self, instance)
6876 raise errors.OpExecError("Could not start instance for"
6877 " full reboot: %s" % msg)
6879 self.cfg.MarkInstanceUp(instance.name)
6882 class LUInstanceShutdown(LogicalUnit):
6883 """Shutdown an instance.
6886 HPATH = "instance-stop"
6887 HTYPE = constants.HTYPE_INSTANCE
6890 def ExpandNames(self):
6891 self._ExpandAndLockInstance()
6893 def BuildHooksEnv(self):
6896 This runs on master, primary and secondary nodes of the instance.
6899 env = _BuildInstanceHookEnvByObject(self, self.instance)
6900 env["TIMEOUT"] = self.op.timeout
6903 def BuildHooksNodes(self):
6904 """Build hooks nodes.
6907 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6910 def CheckPrereq(self):
6911 """Check prerequisites.
6913 This checks that the instance is in the cluster.
6916 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6917 assert self.instance is not None, \
6918 "Cannot retrieve locked instance %s" % self.op.instance_name
6920 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6922 self.primary_offline = \
6923 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6925 if self.primary_offline and self.op.ignore_offline_nodes:
6926 self.proc.LogWarning("Ignoring offline primary node")
6928 _CheckNodeOnline(self, self.instance.primary_node)
6930 def Exec(self, feedback_fn):
6931 """Shutdown the instance.
6934 instance = self.instance
6935 node_current = instance.primary_node
6936 timeout = self.op.timeout
6938 if not self.op.no_remember:
6939 self.cfg.MarkInstanceDown(instance.name)
6941 if self.primary_offline:
6942 assert self.op.ignore_offline_nodes
6943 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6945 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6946 msg = result.fail_msg
6948 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6950 _ShutdownInstanceDisks(self, instance)
6953 class LUInstanceReinstall(LogicalUnit):
6954 """Reinstall an instance.
6957 HPATH = "instance-reinstall"
6958 HTYPE = constants.HTYPE_INSTANCE
6961 def ExpandNames(self):
6962 self._ExpandAndLockInstance()
6964 def BuildHooksEnv(self):
6967 This runs on master, primary and secondary nodes of the instance.
6970 return _BuildInstanceHookEnvByObject(self, self.instance)
6972 def BuildHooksNodes(self):
6973 """Build hooks nodes.
6976 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6979 def CheckPrereq(self):
6980 """Check prerequisites.
6982 This checks that the instance is in the cluster and is not running.
6985 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6986 assert instance is not None, \
6987 "Cannot retrieve locked instance %s" % self.op.instance_name
6988 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6989 " offline, cannot reinstall")
6991 if instance.disk_template == constants.DT_DISKLESS:
6992 raise errors.OpPrereqError("Instance '%s' has no disks" %
6993 self.op.instance_name,
6995 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6997 if self.op.os_type is not None:
6999 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7000 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7001 instance_os = self.op.os_type
7003 instance_os = instance.os
7005 nodelist = list(instance.all_nodes)
7007 if self.op.osparams:
7008 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7009 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7010 self.os_inst = i_osdict # the new dict (without defaults)
7014 self.instance = instance
7016 def Exec(self, feedback_fn):
7017 """Reinstall the instance.
7020 inst = self.instance
7022 if self.op.os_type is not None:
7023 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7024 inst.os = self.op.os_type
7025 # Write to configuration
7026 self.cfg.Update(inst, feedback_fn)
7028 _StartInstanceDisks(self, inst, None)
7030 feedback_fn("Running the instance OS create scripts...")
7031 # FIXME: pass debug option from opcode to backend
7032 result = self.rpc.call_instance_os_add(inst.primary_node,
7033 (inst, self.os_inst), True,
7034 self.op.debug_level)
7035 result.Raise("Could not install OS for instance %s on node %s" %
7036 (inst.name, inst.primary_node))
7038 _ShutdownInstanceDisks(self, inst)
7041 class LUInstanceRecreateDisks(LogicalUnit):
7042 """Recreate an instance's missing disks.
7045 HPATH = "instance-recreate-disks"
7046 HTYPE = constants.HTYPE_INSTANCE
7049 _MODIFYABLE = frozenset([
7050 constants.IDISK_SIZE,
7051 constants.IDISK_MODE,
7054 # New or changed disk parameters may have different semantics
7055 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7056 constants.IDISK_ADOPT,
7058 # TODO: Implement support changing VG while recreating
7060 constants.IDISK_METAVG,
7063 def CheckArguments(self):
7064 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7065 # Normalize and convert deprecated list of disk indices
7066 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7068 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7070 raise errors.OpPrereqError("Some disks have been specified more than"
7071 " once: %s" % utils.CommaJoin(duplicates),
7074 for (idx, params) in self.op.disks:
7075 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7076 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7078 raise errors.OpPrereqError("Parameters for disk %s try to change"
7079 " unmodifyable parameter(s): %s" %
7080 (idx, utils.CommaJoin(unsupported)),
7083 def ExpandNames(self):
7084 self._ExpandAndLockInstance()
7085 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7087 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7088 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7090 self.needed_locks[locking.LEVEL_NODE] = []
7091 self.needed_locks[locking.LEVEL_NODE_RES] = []
7093 def DeclareLocks(self, level):
7094 if level == locking.LEVEL_NODE:
7095 # if we replace the nodes, we only need to lock the old primary,
7096 # otherwise we need to lock all nodes for disk re-creation
7097 primary_only = bool(self.op.nodes)
7098 self._LockInstancesNodes(primary_only=primary_only)
7099 elif level == locking.LEVEL_NODE_RES:
7101 self.needed_locks[locking.LEVEL_NODE_RES] = \
7102 self.needed_locks[locking.LEVEL_NODE][:]
7104 def BuildHooksEnv(self):
7107 This runs on master, primary and secondary nodes of the instance.
7110 return _BuildInstanceHookEnvByObject(self, self.instance)
7112 def BuildHooksNodes(self):
7113 """Build hooks nodes.
7116 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7119 def CheckPrereq(self):
7120 """Check prerequisites.
7122 This checks that the instance is in the cluster and is not running.
7125 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7126 assert instance is not None, \
7127 "Cannot retrieve locked instance %s" % self.op.instance_name
7129 if len(self.op.nodes) != len(instance.all_nodes):
7130 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7131 " %d replacement nodes were specified" %
7132 (instance.name, len(instance.all_nodes),
7133 len(self.op.nodes)),
7135 assert instance.disk_template != constants.DT_DRBD8 or \
7136 len(self.op.nodes) == 2
7137 assert instance.disk_template != constants.DT_PLAIN or \
7138 len(self.op.nodes) == 1
7139 primary_node = self.op.nodes[0]
7141 primary_node = instance.primary_node
7142 _CheckNodeOnline(self, primary_node)
7144 if instance.disk_template == constants.DT_DISKLESS:
7145 raise errors.OpPrereqError("Instance '%s' has no disks" %
7146 self.op.instance_name, errors.ECODE_INVAL)
7148 # if we replace nodes *and* the old primary is offline, we don't
7150 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7151 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7152 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7153 if not (self.op.nodes and old_pnode.offline):
7154 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7155 msg="cannot recreate disks")
7158 self.disks = dict(self.op.disks)
7160 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7162 maxidx = max(self.disks.keys())
7163 if maxidx >= len(instance.disks):
7164 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7167 if (self.op.nodes and
7168 sorted(self.disks.keys()) != range(len(instance.disks))):
7169 raise errors.OpPrereqError("Can't recreate disks partially and"
7170 " change the nodes at the same time",
7173 self.instance = instance
7175 def Exec(self, feedback_fn):
7176 """Recreate the disks.
7179 instance = self.instance
7181 assert (self.owned_locks(locking.LEVEL_NODE) ==
7182 self.owned_locks(locking.LEVEL_NODE_RES))
7185 mods = [] # keeps track of needed changes
7187 for idx, disk in enumerate(instance.disks):
7189 changes = self.disks[idx]
7191 # Disk should not be recreated
7195 # update secondaries for disks, if needed
7196 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7197 # need to update the nodes and minors
7198 assert len(self.op.nodes) == 2
7199 assert len(disk.logical_id) == 6 # otherwise disk internals
7201 (_, _, old_port, _, _, old_secret) = disk.logical_id
7202 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7203 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7204 new_minors[0], new_minors[1], old_secret)
7205 assert len(disk.logical_id) == len(new_id)
7209 mods.append((idx, new_id, changes))
7211 # now that we have passed all asserts above, we can apply the mods
7212 # in a single run (to avoid partial changes)
7213 for idx, new_id, changes in mods:
7214 disk = instance.disks[idx]
7215 if new_id is not None:
7216 assert disk.dev_type == constants.LD_DRBD8
7217 disk.logical_id = new_id
7219 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7220 mode=changes.get(constants.IDISK_MODE, None))
7222 # change primary node, if needed
7224 instance.primary_node = self.op.nodes[0]
7225 self.LogWarning("Changing the instance's nodes, you will have to"
7226 " remove any disks left on the older nodes manually")
7229 self.cfg.Update(instance, feedback_fn)
7231 _CreateDisks(self, instance, to_skip=to_skip)
7234 class LUInstanceRename(LogicalUnit):
7235 """Rename an instance.
7238 HPATH = "instance-rename"
7239 HTYPE = constants.HTYPE_INSTANCE
7241 def CheckArguments(self):
7245 if self.op.ip_check and not self.op.name_check:
7246 # TODO: make the ip check more flexible and not depend on the name check
7247 raise errors.OpPrereqError("IP address check requires a name check",
7250 def BuildHooksEnv(self):
7253 This runs on master, primary and secondary nodes of the instance.
7256 env = _BuildInstanceHookEnvByObject(self, self.instance)
7257 env["INSTANCE_NEW_NAME"] = self.op.new_name
7260 def BuildHooksNodes(self):
7261 """Build hooks nodes.
7264 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7267 def CheckPrereq(self):
7268 """Check prerequisites.
7270 This checks that the instance is in the cluster and is not running.
7273 self.op.instance_name = _ExpandInstanceName(self.cfg,
7274 self.op.instance_name)
7275 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7276 assert instance is not None
7277 _CheckNodeOnline(self, instance.primary_node)
7278 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7279 msg="cannot rename")
7280 self.instance = instance
7282 new_name = self.op.new_name
7283 if self.op.name_check:
7284 hostname = netutils.GetHostname(name=new_name)
7285 if hostname.name != new_name:
7286 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7288 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7289 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7290 " same as given hostname '%s'") %
7291 (hostname.name, self.op.new_name),
7293 new_name = self.op.new_name = hostname.name
7294 if (self.op.ip_check and
7295 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7296 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7297 (hostname.ip, new_name),
7298 errors.ECODE_NOTUNIQUE)
7300 instance_list = self.cfg.GetInstanceList()
7301 if new_name in instance_list and new_name != instance.name:
7302 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7303 new_name, errors.ECODE_EXISTS)
7305 def Exec(self, feedback_fn):
7306 """Rename the instance.
7309 inst = self.instance
7310 old_name = inst.name
7312 rename_file_storage = False
7313 if (inst.disk_template in constants.DTS_FILEBASED and
7314 self.op.new_name != inst.name):
7315 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7316 rename_file_storage = True
7318 self.cfg.RenameInstance(inst.name, self.op.new_name)
7319 # Change the instance lock. This is definitely safe while we hold the BGL.
7320 # Otherwise the new lock would have to be added in acquired mode.
7322 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7323 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7325 # re-read the instance from the configuration after rename
7326 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7328 if rename_file_storage:
7329 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7330 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7331 old_file_storage_dir,
7332 new_file_storage_dir)
7333 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7334 " (but the instance has been renamed in Ganeti)" %
7335 (inst.primary_node, old_file_storage_dir,
7336 new_file_storage_dir))
7338 _StartInstanceDisks(self, inst, None)
7340 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7341 old_name, self.op.debug_level)
7342 msg = result.fail_msg
7344 msg = ("Could not run OS rename script for instance %s on node %s"
7345 " (but the instance has been renamed in Ganeti): %s" %
7346 (inst.name, inst.primary_node, msg))
7347 self.proc.LogWarning(msg)
7349 _ShutdownInstanceDisks(self, inst)
7354 class LUInstanceRemove(LogicalUnit):
7355 """Remove an instance.
7358 HPATH = "instance-remove"
7359 HTYPE = constants.HTYPE_INSTANCE
7362 def ExpandNames(self):
7363 self._ExpandAndLockInstance()
7364 self.needed_locks[locking.LEVEL_NODE] = []
7365 self.needed_locks[locking.LEVEL_NODE_RES] = []
7366 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7368 def DeclareLocks(self, level):
7369 if level == locking.LEVEL_NODE:
7370 self._LockInstancesNodes()
7371 elif level == locking.LEVEL_NODE_RES:
7373 self.needed_locks[locking.LEVEL_NODE_RES] = \
7374 self.needed_locks[locking.LEVEL_NODE][:]
7376 def BuildHooksEnv(self):
7379 This runs on master, primary and secondary nodes of the instance.
7382 env = _BuildInstanceHookEnvByObject(self, self.instance)
7383 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7386 def BuildHooksNodes(self):
7387 """Build hooks nodes.
7390 nl = [self.cfg.GetMasterNode()]
7391 nl_post = list(self.instance.all_nodes) + nl
7392 return (nl, nl_post)
7394 def CheckPrereq(self):
7395 """Check prerequisites.
7397 This checks that the instance is in the cluster.
7400 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7401 assert self.instance is not None, \
7402 "Cannot retrieve locked instance %s" % self.op.instance_name
7404 def Exec(self, feedback_fn):
7405 """Remove the instance.
7408 instance = self.instance
7409 logging.info("Shutting down instance %s on node %s",
7410 instance.name, instance.primary_node)
7412 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7413 self.op.shutdown_timeout)
7414 msg = result.fail_msg
7416 if self.op.ignore_failures:
7417 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7419 raise errors.OpExecError("Could not shutdown instance %s on"
7421 (instance.name, instance.primary_node, msg))
7423 assert (self.owned_locks(locking.LEVEL_NODE) ==
7424 self.owned_locks(locking.LEVEL_NODE_RES))
7425 assert not (set(instance.all_nodes) -
7426 self.owned_locks(locking.LEVEL_NODE)), \
7427 "Not owning correct locks"
7429 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7432 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7433 """Utility function to remove an instance.
7436 logging.info("Removing block devices for instance %s", instance.name)
7438 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7439 if not ignore_failures:
7440 raise errors.OpExecError("Can't remove instance's disks")
7441 feedback_fn("Warning: can't remove instance's disks")
7443 logging.info("Removing instance %s out of cluster config", instance.name)
7445 lu.cfg.RemoveInstance(instance.name)
7447 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7448 "Instance lock removal conflict"
7450 # Remove lock for the instance
7451 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7454 class LUInstanceQuery(NoHooksLU):
7455 """Logical unit for querying instances.
7458 # pylint: disable=W0142
7461 def CheckArguments(self):
7462 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7463 self.op.output_fields, self.op.use_locking)
7465 def ExpandNames(self):
7466 self.iq.ExpandNames(self)
7468 def DeclareLocks(self, level):
7469 self.iq.DeclareLocks(self, level)
7471 def Exec(self, feedback_fn):
7472 return self.iq.OldStyleQuery(self)
7475 class LUInstanceFailover(LogicalUnit):
7476 """Failover an instance.
7479 HPATH = "instance-failover"
7480 HTYPE = constants.HTYPE_INSTANCE
7483 def CheckArguments(self):
7484 """Check the arguments.
7487 self.iallocator = getattr(self.op, "iallocator", None)
7488 self.target_node = getattr(self.op, "target_node", None)
7490 def ExpandNames(self):
7491 self._ExpandAndLockInstance()
7493 if self.op.target_node is not None:
7494 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7496 self.needed_locks[locking.LEVEL_NODE] = []
7497 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7499 self.needed_locks[locking.LEVEL_NODE_RES] = []
7500 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7502 ignore_consistency = self.op.ignore_consistency
7503 shutdown_timeout = self.op.shutdown_timeout
7504 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7507 ignore_consistency=ignore_consistency,
7508 shutdown_timeout=shutdown_timeout,
7509 ignore_ipolicy=self.op.ignore_ipolicy)
7510 self.tasklets = [self._migrater]
7512 def DeclareLocks(self, level):
7513 if level == locking.LEVEL_NODE:
7514 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7515 if instance.disk_template in constants.DTS_EXT_MIRROR:
7516 if self.op.target_node is None:
7517 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7519 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7520 self.op.target_node]
7521 del self.recalculate_locks[locking.LEVEL_NODE]
7523 self._LockInstancesNodes()
7524 elif level == locking.LEVEL_NODE_RES:
7526 self.needed_locks[locking.LEVEL_NODE_RES] = \
7527 self.needed_locks[locking.LEVEL_NODE][:]
7529 def BuildHooksEnv(self):
7532 This runs on master, primary and secondary nodes of the instance.
7535 instance = self._migrater.instance
7536 source_node = instance.primary_node
7537 target_node = self.op.target_node
7539 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7540 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7541 "OLD_PRIMARY": source_node,
7542 "NEW_PRIMARY": target_node,
7545 if instance.disk_template in constants.DTS_INT_MIRROR:
7546 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7547 env["NEW_SECONDARY"] = source_node
7549 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7551 env.update(_BuildInstanceHookEnvByObject(self, instance))
7555 def BuildHooksNodes(self):
7556 """Build hooks nodes.
7559 instance = self._migrater.instance
7560 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7561 return (nl, nl + [instance.primary_node])
7564 class LUInstanceMigrate(LogicalUnit):
7565 """Migrate an instance.
7567 This is migration without shutting down, compared to the failover,
7568 which is done with shutdown.
7571 HPATH = "instance-migrate"
7572 HTYPE = constants.HTYPE_INSTANCE
7575 def ExpandNames(self):
7576 self._ExpandAndLockInstance()
7578 if self.op.target_node is not None:
7579 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7581 self.needed_locks[locking.LEVEL_NODE] = []
7582 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7584 self.needed_locks[locking.LEVEL_NODE] = []
7585 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7588 TLMigrateInstance(self, self.op.instance_name,
7589 cleanup=self.op.cleanup,
7591 fallback=self.op.allow_failover,
7592 allow_runtime_changes=self.op.allow_runtime_changes,
7593 ignore_ipolicy=self.op.ignore_ipolicy)
7594 self.tasklets = [self._migrater]
7596 def DeclareLocks(self, level):
7597 if level == locking.LEVEL_NODE:
7598 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7599 if instance.disk_template in constants.DTS_EXT_MIRROR:
7600 if self.op.target_node is None:
7601 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7603 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7604 self.op.target_node]
7605 del self.recalculate_locks[locking.LEVEL_NODE]
7607 self._LockInstancesNodes()
7608 elif level == locking.LEVEL_NODE_RES:
7610 self.needed_locks[locking.LEVEL_NODE_RES] = \
7611 self.needed_locks[locking.LEVEL_NODE][:]
7613 def BuildHooksEnv(self):
7616 This runs on master, primary and secondary nodes of the instance.
7619 instance = self._migrater.instance
7620 source_node = instance.primary_node
7621 target_node = self.op.target_node
7622 env = _BuildInstanceHookEnvByObject(self, instance)
7624 "MIGRATE_LIVE": self._migrater.live,
7625 "MIGRATE_CLEANUP": self.op.cleanup,
7626 "OLD_PRIMARY": source_node,
7627 "NEW_PRIMARY": target_node,
7628 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7631 if instance.disk_template in constants.DTS_INT_MIRROR:
7632 env["OLD_SECONDARY"] = target_node
7633 env["NEW_SECONDARY"] = source_node
7635 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7639 def BuildHooksNodes(self):
7640 """Build hooks nodes.
7643 instance = self._migrater.instance
7644 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7645 return (nl, nl + [instance.primary_node])
7648 class LUInstanceMove(LogicalUnit):
7649 """Move an instance by data-copying.
7652 HPATH = "instance-move"
7653 HTYPE = constants.HTYPE_INSTANCE
7656 def ExpandNames(self):
7657 self._ExpandAndLockInstance()
7658 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7659 self.op.target_node = target_node
7660 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7661 self.needed_locks[locking.LEVEL_NODE_RES] = []
7662 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7664 def DeclareLocks(self, level):
7665 if level == locking.LEVEL_NODE:
7666 self._LockInstancesNodes(primary_only=True)
7667 elif level == locking.LEVEL_NODE_RES:
7669 self.needed_locks[locking.LEVEL_NODE_RES] = \
7670 self.needed_locks[locking.LEVEL_NODE][:]
7672 def BuildHooksEnv(self):
7675 This runs on master, primary and secondary nodes of the instance.
7679 "TARGET_NODE": self.op.target_node,
7680 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7682 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7685 def BuildHooksNodes(self):
7686 """Build hooks nodes.
7690 self.cfg.GetMasterNode(),
7691 self.instance.primary_node,
7692 self.op.target_node,
7696 def CheckPrereq(self):
7697 """Check prerequisites.
7699 This checks that the instance is in the cluster.
7702 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7703 assert self.instance is not None, \
7704 "Cannot retrieve locked instance %s" % self.op.instance_name
7706 node = self.cfg.GetNodeInfo(self.op.target_node)
7707 assert node is not None, \
7708 "Cannot retrieve locked node %s" % self.op.target_node
7710 self.target_node = target_node = node.name
7712 if target_node == instance.primary_node:
7713 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7714 (instance.name, target_node),
7717 bep = self.cfg.GetClusterInfo().FillBE(instance)
7719 for idx, dsk in enumerate(instance.disks):
7720 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7721 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7722 " cannot copy" % idx, errors.ECODE_STATE)
7724 _CheckNodeOnline(self, target_node)
7725 _CheckNodeNotDrained(self, target_node)
7726 _CheckNodeVmCapable(self, target_node)
7727 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7728 self.cfg.GetNodeGroup(node.group))
7729 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7730 ignore=self.op.ignore_ipolicy)
7732 if instance.admin_state == constants.ADMINST_UP:
7733 # check memory requirements on the secondary node
7734 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7735 instance.name, bep[constants.BE_MAXMEM],
7736 instance.hypervisor)
7738 self.LogInfo("Not checking memory on the secondary node as"
7739 " instance will not be started")
7741 # check bridge existance
7742 _CheckInstanceBridgesExist(self, instance, node=target_node)
7744 def Exec(self, feedback_fn):
7745 """Move an instance.
7747 The move is done by shutting it down on its present node, copying
7748 the data over (slow) and starting it on the new node.
7751 instance = self.instance
7753 source_node = instance.primary_node
7754 target_node = self.target_node
7756 self.LogInfo("Shutting down instance %s on source node %s",
7757 instance.name, source_node)
7759 assert (self.owned_locks(locking.LEVEL_NODE) ==
7760 self.owned_locks(locking.LEVEL_NODE_RES))
7762 result = self.rpc.call_instance_shutdown(source_node, instance,
7763 self.op.shutdown_timeout)
7764 msg = result.fail_msg
7766 if self.op.ignore_consistency:
7767 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7768 " Proceeding anyway. Please make sure node"
7769 " %s is down. Error details: %s",
7770 instance.name, source_node, source_node, msg)
7772 raise errors.OpExecError("Could not shutdown instance %s on"
7774 (instance.name, source_node, msg))
7776 # create the target disks
7778 _CreateDisks(self, instance, target_node=target_node)
7779 except errors.OpExecError:
7780 self.LogWarning("Device creation failed, reverting...")
7782 _RemoveDisks(self, instance, target_node=target_node)
7784 self.cfg.ReleaseDRBDMinors(instance.name)
7787 cluster_name = self.cfg.GetClusterInfo().cluster_name
7790 # activate, get path, copy the data over
7791 for idx, disk in enumerate(instance.disks):
7792 self.LogInfo("Copying data for disk %d", idx)
7793 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7794 instance.name, True, idx)
7796 self.LogWarning("Can't assemble newly created disk %d: %s",
7797 idx, result.fail_msg)
7798 errs.append(result.fail_msg)
7800 dev_path = result.payload
7801 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7802 target_node, dev_path,
7805 self.LogWarning("Can't copy data over for disk %d: %s",
7806 idx, result.fail_msg)
7807 errs.append(result.fail_msg)
7811 self.LogWarning("Some disks failed to copy, aborting")
7813 _RemoveDisks(self, instance, target_node=target_node)
7815 self.cfg.ReleaseDRBDMinors(instance.name)
7816 raise errors.OpExecError("Errors during disk copy: %s" %
7819 instance.primary_node = target_node
7820 self.cfg.Update(instance, feedback_fn)
7822 self.LogInfo("Removing the disks on the original node")
7823 _RemoveDisks(self, instance, target_node=source_node)
7825 # Only start the instance if it's marked as up
7826 if instance.admin_state == constants.ADMINST_UP:
7827 self.LogInfo("Starting instance %s on node %s",
7828 instance.name, target_node)
7830 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7831 ignore_secondaries=True)
7833 _ShutdownInstanceDisks(self, instance)
7834 raise errors.OpExecError("Can't activate the instance's disks")
7836 result = self.rpc.call_instance_start(target_node,
7837 (instance, None, None), False)
7838 msg = result.fail_msg
7840 _ShutdownInstanceDisks(self, instance)
7841 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7842 (instance.name, target_node, msg))
7845 class LUNodeMigrate(LogicalUnit):
7846 """Migrate all instances from a node.
7849 HPATH = "node-migrate"
7850 HTYPE = constants.HTYPE_NODE
7853 def CheckArguments(self):
7856 def ExpandNames(self):
7857 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7859 self.share_locks = _ShareAll()
7860 self.needed_locks = {
7861 locking.LEVEL_NODE: [self.op.node_name],
7864 def BuildHooksEnv(self):
7867 This runs on the master, the primary and all the secondaries.
7871 "NODE_NAME": self.op.node_name,
7872 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7875 def BuildHooksNodes(self):
7876 """Build hooks nodes.
7879 nl = [self.cfg.GetMasterNode()]
7882 def CheckPrereq(self):
7885 def Exec(self, feedback_fn):
7886 # Prepare jobs for migration instances
7887 allow_runtime_changes = self.op.allow_runtime_changes
7889 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7892 iallocator=self.op.iallocator,
7893 target_node=self.op.target_node,
7894 allow_runtime_changes=allow_runtime_changes,
7895 ignore_ipolicy=self.op.ignore_ipolicy)]
7896 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7899 # TODO: Run iallocator in this opcode and pass correct placement options to
7900 # OpInstanceMigrate. Since other jobs can modify the cluster between
7901 # running the iallocator and the actual migration, a good consistency model
7902 # will have to be found.
7904 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7905 frozenset([self.op.node_name]))
7907 return ResultWithJobs(jobs)
7910 class TLMigrateInstance(Tasklet):
7911 """Tasklet class for instance migration.
7914 @ivar live: whether the migration will be done live or non-live;
7915 this variable is initalized only after CheckPrereq has run
7916 @type cleanup: boolean
7917 @ivar cleanup: Wheater we cleanup from a failed migration
7918 @type iallocator: string
7919 @ivar iallocator: The iallocator used to determine target_node
7920 @type target_node: string
7921 @ivar target_node: If given, the target_node to reallocate the instance to
7922 @type failover: boolean
7923 @ivar failover: Whether operation results in failover or migration
7924 @type fallback: boolean
7925 @ivar fallback: Whether fallback to failover is allowed if migration not
7927 @type ignore_consistency: boolean
7928 @ivar ignore_consistency: Wheter we should ignore consistency between source
7930 @type shutdown_timeout: int
7931 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7932 @type ignore_ipolicy: bool
7933 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7938 _MIGRATION_POLL_INTERVAL = 1 # seconds
7939 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7941 def __init__(self, lu, instance_name, cleanup=False,
7942 failover=False, fallback=False,
7943 ignore_consistency=False,
7944 allow_runtime_changes=True,
7945 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7946 ignore_ipolicy=False):
7947 """Initializes this class.
7950 Tasklet.__init__(self, lu)
7953 self.instance_name = instance_name
7954 self.cleanup = cleanup
7955 self.live = False # will be overridden later
7956 self.failover = failover
7957 self.fallback = fallback
7958 self.ignore_consistency = ignore_consistency
7959 self.shutdown_timeout = shutdown_timeout
7960 self.ignore_ipolicy = ignore_ipolicy
7961 self.allow_runtime_changes = allow_runtime_changes
7963 def CheckPrereq(self):
7964 """Check prerequisites.
7966 This checks that the instance is in the cluster.
7969 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7970 instance = self.cfg.GetInstanceInfo(instance_name)
7971 assert instance is not None
7972 self.instance = instance
7973 cluster = self.cfg.GetClusterInfo()
7975 if (not self.cleanup and
7976 not instance.admin_state == constants.ADMINST_UP and
7977 not self.failover and self.fallback):
7978 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7979 " switching to failover")
7980 self.failover = True
7982 if instance.disk_template not in constants.DTS_MIRRORED:
7987 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7988 " %s" % (instance.disk_template, text),
7991 if instance.disk_template in constants.DTS_EXT_MIRROR:
7992 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7994 if self.lu.op.iallocator:
7995 self._RunAllocator()
7997 # We set set self.target_node as it is required by
7999 self.target_node = self.lu.op.target_node
8001 # Check that the target node is correct in terms of instance policy
8002 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8003 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8004 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8005 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8006 ignore=self.ignore_ipolicy)
8008 # self.target_node is already populated, either directly or by the
8010 target_node = self.target_node
8011 if self.target_node == instance.primary_node:
8012 raise errors.OpPrereqError("Cannot migrate instance %s"
8013 " to its primary (%s)" %
8014 (instance.name, instance.primary_node))
8016 if len(self.lu.tasklets) == 1:
8017 # It is safe to release locks only when we're the only tasklet
8019 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8020 keep=[instance.primary_node, self.target_node])
8023 secondary_nodes = instance.secondary_nodes
8024 if not secondary_nodes:
8025 raise errors.ConfigurationError("No secondary node but using"
8026 " %s disk template" %
8027 instance.disk_template)
8028 target_node = secondary_nodes[0]
8029 if self.lu.op.iallocator or (self.lu.op.target_node and
8030 self.lu.op.target_node != target_node):
8032 text = "failed over"
8035 raise errors.OpPrereqError("Instances with disk template %s cannot"
8036 " be %s to arbitrary nodes"
8037 " (neither an iallocator nor a target"
8038 " node can be passed)" %
8039 (instance.disk_template, text),
8041 nodeinfo = self.cfg.GetNodeInfo(target_node)
8042 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8043 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8044 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8045 ignore=self.ignore_ipolicy)
8047 i_be = cluster.FillBE(instance)
8049 # check memory requirements on the secondary node
8050 if (not self.cleanup and
8051 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8052 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8053 "migrating instance %s" %
8055 i_be[constants.BE_MINMEM],
8056 instance.hypervisor)
8058 self.lu.LogInfo("Not checking memory on the secondary node as"
8059 " instance will not be started")
8061 # check if failover must be forced instead of migration
8062 if (not self.cleanup and not self.failover and
8063 i_be[constants.BE_ALWAYS_FAILOVER]):
8065 self.lu.LogInfo("Instance configured to always failover; fallback"
8067 self.failover = True
8069 raise errors.OpPrereqError("This instance has been configured to"
8070 " always failover, please allow failover",
8073 # check bridge existance
8074 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8076 if not self.cleanup:
8077 _CheckNodeNotDrained(self.lu, target_node)
8078 if not self.failover:
8079 result = self.rpc.call_instance_migratable(instance.primary_node,
8081 if result.fail_msg and self.fallback:
8082 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8084 self.failover = True
8086 result.Raise("Can't migrate, please use failover",
8087 prereq=True, ecode=errors.ECODE_STATE)
8089 assert not (self.failover and self.cleanup)
8091 if not self.failover:
8092 if self.lu.op.live is not None and self.lu.op.mode is not None:
8093 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8094 " parameters are accepted",
8096 if self.lu.op.live is not None:
8098 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8100 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8101 # reset the 'live' parameter to None so that repeated
8102 # invocations of CheckPrereq do not raise an exception
8103 self.lu.op.live = None
8104 elif self.lu.op.mode is None:
8105 # read the default value from the hypervisor
8106 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8107 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8109 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8111 # Failover is never live
8114 if not (self.failover or self.cleanup):
8115 remote_info = self.rpc.call_instance_info(instance.primary_node,
8117 instance.hypervisor)
8118 remote_info.Raise("Error checking instance on node %s" %
8119 instance.primary_node)
8120 instance_running = bool(remote_info.payload)
8121 if instance_running:
8122 self.current_mem = int(remote_info.payload["memory"])
8124 def _RunAllocator(self):
8125 """Run the allocator based on input opcode.
8128 # FIXME: add a self.ignore_ipolicy option
8129 ial = IAllocator(self.cfg, self.rpc,
8130 mode=constants.IALLOCATOR_MODE_RELOC,
8131 name=self.instance_name,
8132 relocate_from=[self.instance.primary_node],
8135 ial.Run(self.lu.op.iallocator)
8138 raise errors.OpPrereqError("Can't compute nodes using"
8139 " iallocator '%s': %s" %
8140 (self.lu.op.iallocator, ial.info),
8142 if len(ial.result) != ial.required_nodes:
8143 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8144 " of nodes (%s), required %s" %
8145 (self.lu.op.iallocator, len(ial.result),
8146 ial.required_nodes), errors.ECODE_FAULT)
8147 self.target_node = ial.result[0]
8148 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8149 self.instance_name, self.lu.op.iallocator,
8150 utils.CommaJoin(ial.result))
8152 def _WaitUntilSync(self):
8153 """Poll with custom rpc for disk sync.
8155 This uses our own step-based rpc call.
8158 self.feedback_fn("* wait until resync is done")
8162 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8164 (self.instance.disks,
8167 for node, nres in result.items():
8168 nres.Raise("Cannot resync disks on node %s" % node)
8169 node_done, node_percent = nres.payload
8170 all_done = all_done and node_done
8171 if node_percent is not None:
8172 min_percent = min(min_percent, node_percent)
8174 if min_percent < 100:
8175 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8178 def _EnsureSecondary(self, node):
8179 """Demote a node to secondary.
8182 self.feedback_fn("* switching node %s to secondary mode" % node)
8184 for dev in self.instance.disks:
8185 self.cfg.SetDiskID(dev, node)
8187 result = self.rpc.call_blockdev_close(node, self.instance.name,
8188 self.instance.disks)
8189 result.Raise("Cannot change disk to secondary on node %s" % node)
8191 def _GoStandalone(self):
8192 """Disconnect from the network.
8195 self.feedback_fn("* changing into standalone mode")
8196 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8197 self.instance.disks)
8198 for node, nres in result.items():
8199 nres.Raise("Cannot disconnect disks node %s" % node)
8201 def _GoReconnect(self, multimaster):
8202 """Reconnect to the network.
8208 msg = "single-master"
8209 self.feedback_fn("* changing disks into %s mode" % msg)
8210 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8211 (self.instance.disks, self.instance),
8212 self.instance.name, multimaster)
8213 for node, nres in result.items():
8214 nres.Raise("Cannot change disks config on node %s" % node)
8216 def _ExecCleanup(self):
8217 """Try to cleanup after a failed migration.
8219 The cleanup is done by:
8220 - check that the instance is running only on one node
8221 (and update the config if needed)
8222 - change disks on its secondary node to secondary
8223 - wait until disks are fully synchronized
8224 - disconnect from the network
8225 - change disks into single-master mode
8226 - wait again until disks are fully synchronized
8229 instance = self.instance
8230 target_node = self.target_node
8231 source_node = self.source_node
8233 # check running on only one node
8234 self.feedback_fn("* checking where the instance actually runs"
8235 " (if this hangs, the hypervisor might be in"
8237 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8238 for node, result in ins_l.items():
8239 result.Raise("Can't contact node %s" % node)
8241 runningon_source = instance.name in ins_l[source_node].payload
8242 runningon_target = instance.name in ins_l[target_node].payload
8244 if runningon_source and runningon_target:
8245 raise errors.OpExecError("Instance seems to be running on two nodes,"
8246 " or the hypervisor is confused; you will have"
8247 " to ensure manually that it runs only on one"
8248 " and restart this operation")
8250 if not (runningon_source or runningon_target):
8251 raise errors.OpExecError("Instance does not seem to be running at all;"
8252 " in this case it's safer to repair by"
8253 " running 'gnt-instance stop' to ensure disk"
8254 " shutdown, and then restarting it")
8256 if runningon_target:
8257 # the migration has actually succeeded, we need to update the config
8258 self.feedback_fn("* instance running on secondary node (%s),"
8259 " updating config" % target_node)
8260 instance.primary_node = target_node
8261 self.cfg.Update(instance, self.feedback_fn)
8262 demoted_node = source_node
8264 self.feedback_fn("* instance confirmed to be running on its"
8265 " primary node (%s)" % source_node)
8266 demoted_node = target_node
8268 if instance.disk_template in constants.DTS_INT_MIRROR:
8269 self._EnsureSecondary(demoted_node)
8271 self._WaitUntilSync()
8272 except errors.OpExecError:
8273 # we ignore here errors, since if the device is standalone, it
8274 # won't be able to sync
8276 self._GoStandalone()
8277 self._GoReconnect(False)
8278 self._WaitUntilSync()
8280 self.feedback_fn("* done")
8282 def _RevertDiskStatus(self):
8283 """Try to revert the disk status after a failed migration.
8286 target_node = self.target_node
8287 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8291 self._EnsureSecondary(target_node)
8292 self._GoStandalone()
8293 self._GoReconnect(False)
8294 self._WaitUntilSync()
8295 except errors.OpExecError, err:
8296 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8297 " please try to recover the instance manually;"
8298 " error '%s'" % str(err))
8300 def _AbortMigration(self):
8301 """Call the hypervisor code to abort a started migration.
8304 instance = self.instance
8305 target_node = self.target_node
8306 source_node = self.source_node
8307 migration_info = self.migration_info
8309 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8313 abort_msg = abort_result.fail_msg
8315 logging.error("Aborting migration failed on target node %s: %s",
8316 target_node, abort_msg)
8317 # Don't raise an exception here, as we stil have to try to revert the
8318 # disk status, even if this step failed.
8320 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8321 instance, False, self.live)
8322 abort_msg = abort_result.fail_msg
8324 logging.error("Aborting migration failed on source node %s: %s",
8325 source_node, abort_msg)
8327 def _ExecMigration(self):
8328 """Migrate an instance.
8330 The migrate is done by:
8331 - change the disks into dual-master mode
8332 - wait until disks are fully synchronized again
8333 - migrate the instance
8334 - change disks on the new secondary node (the old primary) to secondary
8335 - wait until disks are fully synchronized
8336 - change disks into single-master mode
8339 instance = self.instance
8340 target_node = self.target_node
8341 source_node = self.source_node
8343 # Check for hypervisor version mismatch and warn the user.
8344 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8345 None, [self.instance.hypervisor])
8346 for ninfo in nodeinfo.values():
8347 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8349 (_, _, (src_info, )) = nodeinfo[source_node].payload
8350 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8352 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8353 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8354 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8355 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8356 if src_version != dst_version:
8357 self.feedback_fn("* warning: hypervisor version mismatch between"
8358 " source (%s) and target (%s) node" %
8359 (src_version, dst_version))
8361 self.feedback_fn("* checking disk consistency between source and target")
8362 for (idx, dev) in enumerate(instance.disks):
8363 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8364 raise errors.OpExecError("Disk %s is degraded or not fully"
8365 " synchronized on target node,"
8366 " aborting migration" % idx)
8368 if self.current_mem > self.tgt_free_mem:
8369 if not self.allow_runtime_changes:
8370 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8371 " free memory to fit instance %s on target"
8372 " node %s (have %dMB, need %dMB)" %
8373 (instance.name, target_node,
8374 self.tgt_free_mem, self.current_mem))
8375 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8376 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8379 rpcres.Raise("Cannot modify instance runtime memory")
8381 # First get the migration information from the remote node
8382 result = self.rpc.call_migration_info(source_node, instance)
8383 msg = result.fail_msg
8385 log_err = ("Failed fetching source migration information from %s: %s" %
8387 logging.error(log_err)
8388 raise errors.OpExecError(log_err)
8390 self.migration_info = migration_info = result.payload
8392 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8393 # Then switch the disks to master/master mode
8394 self._EnsureSecondary(target_node)
8395 self._GoStandalone()
8396 self._GoReconnect(True)
8397 self._WaitUntilSync()
8399 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8400 result = self.rpc.call_accept_instance(target_node,
8403 self.nodes_ip[target_node])
8405 msg = result.fail_msg
8407 logging.error("Instance pre-migration failed, trying to revert"
8408 " disk status: %s", msg)
8409 self.feedback_fn("Pre-migration failed, aborting")
8410 self._AbortMigration()
8411 self._RevertDiskStatus()
8412 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8413 (instance.name, msg))
8415 self.feedback_fn("* migrating instance to %s" % target_node)
8416 result = self.rpc.call_instance_migrate(source_node, instance,
8417 self.nodes_ip[target_node],
8419 msg = result.fail_msg
8421 logging.error("Instance migration failed, trying to revert"
8422 " disk status: %s", msg)
8423 self.feedback_fn("Migration failed, aborting")
8424 self._AbortMigration()
8425 self._RevertDiskStatus()
8426 raise errors.OpExecError("Could not migrate instance %s: %s" %
8427 (instance.name, msg))
8429 self.feedback_fn("* starting memory transfer")
8430 last_feedback = time.time()
8432 result = self.rpc.call_instance_get_migration_status(source_node,
8434 msg = result.fail_msg
8435 ms = result.payload # MigrationStatus instance
8436 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8437 logging.error("Instance migration failed, trying to revert"
8438 " disk status: %s", msg)
8439 self.feedback_fn("Migration failed, aborting")
8440 self._AbortMigration()
8441 self._RevertDiskStatus()
8442 raise errors.OpExecError("Could not migrate instance %s: %s" %
8443 (instance.name, msg))
8445 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8446 self.feedback_fn("* memory transfer complete")
8449 if (utils.TimeoutExpired(last_feedback,
8450 self._MIGRATION_FEEDBACK_INTERVAL) and
8451 ms.transferred_ram is not None):
8452 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8453 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8454 last_feedback = time.time()
8456 time.sleep(self._MIGRATION_POLL_INTERVAL)
8458 result = self.rpc.call_instance_finalize_migration_src(source_node,
8462 msg = result.fail_msg
8464 logging.error("Instance migration succeeded, but finalization failed"
8465 " on the source node: %s", msg)
8466 raise errors.OpExecError("Could not finalize instance migration: %s" %
8469 instance.primary_node = target_node
8471 # distribute new instance config to the other nodes
8472 self.cfg.Update(instance, self.feedback_fn)
8474 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8478 msg = result.fail_msg
8480 logging.error("Instance migration succeeded, but finalization failed"
8481 " on the target node: %s", msg)
8482 raise errors.OpExecError("Could not finalize instance migration: %s" %
8485 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8486 self._EnsureSecondary(source_node)
8487 self._WaitUntilSync()
8488 self._GoStandalone()
8489 self._GoReconnect(False)
8490 self._WaitUntilSync()
8492 # If the instance's disk template is `rbd' and there was a successful
8493 # migration, unmap the device from the source node.
8494 if self.instance.disk_template == constants.DT_RBD:
8495 disks = _ExpandCheckDisks(instance, instance.disks)
8496 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8498 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8499 msg = result.fail_msg
8501 logging.error("Migration was successful, but couldn't unmap the"
8502 " block device %s on source node %s: %s",
8503 disk.iv_name, source_node, msg)
8504 logging.error("You need to unmap the device %s manually on %s",
8505 disk.iv_name, source_node)
8507 self.feedback_fn("* done")
8509 def _ExecFailover(self):
8510 """Failover an instance.
8512 The failover is done by shutting it down on its present node and
8513 starting it on the secondary.
8516 instance = self.instance
8517 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8519 source_node = instance.primary_node
8520 target_node = self.target_node
8522 if instance.admin_state == constants.ADMINST_UP:
8523 self.feedback_fn("* checking disk consistency between source and target")
8524 for (idx, dev) in enumerate(instance.disks):
8525 # for drbd, these are drbd over lvm
8526 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8528 if primary_node.offline:
8529 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8531 (primary_node.name, idx, target_node))
8532 elif not self.ignore_consistency:
8533 raise errors.OpExecError("Disk %s is degraded on target node,"
8534 " aborting failover" % idx)
8536 self.feedback_fn("* not checking disk consistency as instance is not"
8539 self.feedback_fn("* shutting down instance on source node")
8540 logging.info("Shutting down instance %s on node %s",
8541 instance.name, source_node)
8543 result = self.rpc.call_instance_shutdown(source_node, instance,
8544 self.shutdown_timeout)
8545 msg = result.fail_msg
8547 if self.ignore_consistency or primary_node.offline:
8548 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8549 " proceeding anyway; please make sure node"
8550 " %s is down; error details: %s",
8551 instance.name, source_node, source_node, msg)
8553 raise errors.OpExecError("Could not shutdown instance %s on"
8555 (instance.name, source_node, msg))
8557 self.feedback_fn("* deactivating the instance's disks on source node")
8558 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8559 raise errors.OpExecError("Can't shut down the instance's disks")
8561 instance.primary_node = target_node
8562 # distribute new instance config to the other nodes
8563 self.cfg.Update(instance, self.feedback_fn)
8565 # Only start the instance if it's marked as up
8566 if instance.admin_state == constants.ADMINST_UP:
8567 self.feedback_fn("* activating the instance's disks on target node %s" %
8569 logging.info("Starting instance %s on node %s",
8570 instance.name, target_node)
8572 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8573 ignore_secondaries=True)
8575 _ShutdownInstanceDisks(self.lu, instance)
8576 raise errors.OpExecError("Can't activate the instance's disks")
8578 self.feedback_fn("* starting the instance on the target node %s" %
8580 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8582 msg = result.fail_msg
8584 _ShutdownInstanceDisks(self.lu, instance)
8585 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8586 (instance.name, target_node, msg))
8588 def Exec(self, feedback_fn):
8589 """Perform the migration.
8592 self.feedback_fn = feedback_fn
8593 self.source_node = self.instance.primary_node
8595 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8596 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8597 self.target_node = self.instance.secondary_nodes[0]
8598 # Otherwise self.target_node has been populated either
8599 # directly, or through an iallocator.
8601 self.all_nodes = [self.source_node, self.target_node]
8602 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8603 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8606 feedback_fn("Failover instance %s" % self.instance.name)
8607 self._ExecFailover()
8609 feedback_fn("Migrating instance %s" % self.instance.name)
8612 return self._ExecCleanup()
8614 return self._ExecMigration()
8617 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8619 """Wrapper around L{_CreateBlockDevInner}.
8621 This method annotates the root device first.
8624 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8625 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8629 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8631 """Create a tree of block devices on a given node.
8633 If this device type has to be created on secondaries, create it and
8636 If not, just recurse to children keeping the same 'force' value.
8638 @attention: The device has to be annotated already.
8640 @param lu: the lu on whose behalf we execute
8641 @param node: the node on which to create the device
8642 @type instance: L{objects.Instance}
8643 @param instance: the instance which owns the device
8644 @type device: L{objects.Disk}
8645 @param device: the device to create
8646 @type force_create: boolean
8647 @param force_create: whether to force creation of this device; this
8648 will be change to True whenever we find a device which has
8649 CreateOnSecondary() attribute
8650 @param info: the extra 'metadata' we should attach to the device
8651 (this will be represented as a LVM tag)
8652 @type force_open: boolean
8653 @param force_open: this parameter will be passes to the
8654 L{backend.BlockdevCreate} function where it specifies
8655 whether we run on primary or not, and it affects both
8656 the child assembly and the device own Open() execution
8659 if device.CreateOnSecondary():
8663 for child in device.children:
8664 _CreateBlockDevInner(lu, node, instance, child, force_create,
8667 if not force_create:
8670 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8673 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8674 """Create a single block device on a given node.
8676 This will not recurse over children of the device, so they must be
8679 @param lu: the lu on whose behalf we execute
8680 @param node: the node on which to create the device
8681 @type instance: L{objects.Instance}
8682 @param instance: the instance which owns the device
8683 @type device: L{objects.Disk}
8684 @param device: the device to create
8685 @param info: the extra 'metadata' we should attach to the device
8686 (this will be represented as a LVM tag)
8687 @type force_open: boolean
8688 @param force_open: this parameter will be passes to the
8689 L{backend.BlockdevCreate} function where it specifies
8690 whether we run on primary or not, and it affects both
8691 the child assembly and the device own Open() execution
8694 lu.cfg.SetDiskID(device, node)
8695 result = lu.rpc.call_blockdev_create(node, device, device.size,
8696 instance.name, force_open, info)
8697 result.Raise("Can't create block device %s on"
8698 " node %s for instance %s" % (device, node, instance.name))
8699 if device.physical_id is None:
8700 device.physical_id = result.payload
8703 def _GenerateUniqueNames(lu, exts):
8704 """Generate a suitable LV name.
8706 This will generate a logical volume name for the given instance.
8711 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8712 results.append("%s%s" % (new_id, val))
8716 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8717 iv_name, p_minor, s_minor):
8718 """Generate a drbd8 device complete with its children.
8721 assert len(vgnames) == len(names) == 2
8722 port = lu.cfg.AllocatePort()
8723 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8725 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8726 logical_id=(vgnames[0], names[0]),
8728 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8729 logical_id=(vgnames[1], names[1]),
8731 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8732 logical_id=(primary, secondary, port,
8735 children=[dev_data, dev_meta],
8736 iv_name=iv_name, params={})
8740 _DISK_TEMPLATE_NAME_PREFIX = {
8741 constants.DT_PLAIN: "",
8742 constants.DT_RBD: ".rbd",
8746 _DISK_TEMPLATE_DEVICE_TYPE = {
8747 constants.DT_PLAIN: constants.LD_LV,
8748 constants.DT_FILE: constants.LD_FILE,
8749 constants.DT_SHARED_FILE: constants.LD_FILE,
8750 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8751 constants.DT_RBD: constants.LD_RBD,
8755 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8756 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8757 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8758 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8759 """Generate the entire disk layout for a given template type.
8762 #TODO: compute space requirements
8764 vgname = lu.cfg.GetVGName()
8765 disk_count = len(disk_info)
8768 if template_name == constants.DT_DISKLESS:
8770 elif template_name == constants.DT_DRBD8:
8771 if len(secondary_nodes) != 1:
8772 raise errors.ProgrammerError("Wrong template configuration")
8773 remote_node = secondary_nodes[0]
8774 minors = lu.cfg.AllocateDRBDMinor(
8775 [primary_node, remote_node] * len(disk_info), instance_name)
8777 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8779 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8782 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8783 for i in range(disk_count)]):
8784 names.append(lv_prefix + "_data")
8785 names.append(lv_prefix + "_meta")
8786 for idx, disk in enumerate(disk_info):
8787 disk_index = idx + base_index
8788 data_vg = disk.get(constants.IDISK_VG, vgname)
8789 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8790 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8791 disk[constants.IDISK_SIZE],
8793 names[idx * 2:idx * 2 + 2],
8794 "disk/%d" % disk_index,
8795 minors[idx * 2], minors[idx * 2 + 1])
8796 disk_dev.mode = disk[constants.IDISK_MODE]
8797 disks.append(disk_dev)
8800 raise errors.ProgrammerError("Wrong template configuration")
8802 if template_name == constants.DT_FILE:
8804 elif template_name == constants.DT_SHARED_FILE:
8805 _req_shr_file_storage()
8807 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8808 if name_prefix is None:
8811 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8812 (name_prefix, base_index + i)
8813 for i in range(disk_count)])
8815 if template_name == constants.DT_PLAIN:
8816 def logical_id_fn(idx, _, disk):
8817 vg = disk.get(constants.IDISK_VG, vgname)
8818 return (vg, names[idx])
8819 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8821 lambda _, disk_index, disk: (file_driver,
8822 "%s/disk%d" % (file_storage_dir,
8824 elif template_name == constants.DT_BLOCK:
8826 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8827 disk[constants.IDISK_ADOPT])
8828 elif template_name == constants.DT_RBD:
8829 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8831 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8833 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8835 for idx, disk in enumerate(disk_info):
8836 disk_index = idx + base_index
8837 size = disk[constants.IDISK_SIZE]
8838 feedback_fn("* disk %s, size %s" %
8839 (disk_index, utils.FormatUnit(size, "h")))
8840 disks.append(objects.Disk(dev_type=dev_type, size=size,
8841 logical_id=logical_id_fn(idx, disk_index, disk),
8842 iv_name="disk/%d" % disk_index,
8843 mode=disk[constants.IDISK_MODE],
8849 def _GetInstanceInfoText(instance):
8850 """Compute that text that should be added to the disk's metadata.
8853 return "originstname+%s" % instance.name
8856 def _CalcEta(time_taken, written, total_size):
8857 """Calculates the ETA based on size written and total size.
8859 @param time_taken: The time taken so far
8860 @param written: amount written so far
8861 @param total_size: The total size of data to be written
8862 @return: The remaining time in seconds
8865 avg_time = time_taken / float(written)
8866 return (total_size - written) * avg_time
8869 def _WipeDisks(lu, instance):
8870 """Wipes instance disks.
8872 @type lu: L{LogicalUnit}
8873 @param lu: the logical unit on whose behalf we execute
8874 @type instance: L{objects.Instance}
8875 @param instance: the instance whose disks we should create
8876 @return: the success of the wipe
8879 node = instance.primary_node
8881 for device in instance.disks:
8882 lu.cfg.SetDiskID(device, node)
8884 logging.info("Pause sync of instance %s disks", instance.name)
8885 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8886 (instance.disks, instance),
8888 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8890 for idx, success in enumerate(result.payload):
8892 logging.warn("pause-sync of instance %s for disks %d failed",
8896 for idx, device in enumerate(instance.disks):
8897 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8898 # MAX_WIPE_CHUNK at max
8899 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8900 constants.MIN_WIPE_CHUNK_PERCENT)
8901 # we _must_ make this an int, otherwise rounding errors will
8903 wipe_chunk_size = int(wipe_chunk_size)
8905 lu.LogInfo("* Wiping disk %d", idx)
8906 logging.info("Wiping disk %d for instance %s, node %s using"
8907 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8912 start_time = time.time()
8914 while offset < size:
8915 wipe_size = min(wipe_chunk_size, size - offset)
8916 logging.debug("Wiping disk %d, offset %s, chunk %s",
8917 idx, offset, wipe_size)
8918 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8920 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8921 (idx, offset, wipe_size))
8924 if now - last_output >= 60:
8925 eta = _CalcEta(now - start_time, offset, size)
8926 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8927 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8930 logging.info("Resume sync of instance %s disks", instance.name)
8932 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8933 (instance.disks, instance),
8937 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8938 " please have a look at the status and troubleshoot"
8939 " the issue: %s", node, result.fail_msg)
8941 for idx, success in enumerate(result.payload):
8943 lu.LogWarning("Resume sync of disk %d failed, please have a"
8944 " look at the status and troubleshoot the issue", idx)
8945 logging.warn("resume-sync of instance %s for disks %d failed",
8949 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8950 """Create all disks for an instance.
8952 This abstracts away some work from AddInstance.
8954 @type lu: L{LogicalUnit}
8955 @param lu: the logical unit on whose behalf we execute
8956 @type instance: L{objects.Instance}
8957 @param instance: the instance whose disks we should create
8959 @param to_skip: list of indices to skip
8960 @type target_node: string
8961 @param target_node: if passed, overrides the target node for creation
8963 @return: the success of the creation
8966 info = _GetInstanceInfoText(instance)
8967 if target_node is None:
8968 pnode = instance.primary_node
8969 all_nodes = instance.all_nodes
8974 if instance.disk_template in constants.DTS_FILEBASED:
8975 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8976 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8978 result.Raise("Failed to create directory '%s' on"
8979 " node %s" % (file_storage_dir, pnode))
8981 # Note: this needs to be kept in sync with adding of disks in
8982 # LUInstanceSetParams
8983 for idx, device in enumerate(instance.disks):
8984 if to_skip and idx in to_skip:
8986 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8988 for node in all_nodes:
8989 f_create = node == pnode
8990 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8993 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8994 """Remove all disks for an instance.
8996 This abstracts away some work from `AddInstance()` and
8997 `RemoveInstance()`. Note that in case some of the devices couldn't
8998 be removed, the removal will continue with the other ones (compare
8999 with `_CreateDisks()`).
9001 @type lu: L{LogicalUnit}
9002 @param lu: the logical unit on whose behalf we execute
9003 @type instance: L{objects.Instance}
9004 @param instance: the instance whose disks we should remove
9005 @type target_node: string
9006 @param target_node: used to override the node on which to remove the disks
9008 @return: the success of the removal
9011 logging.info("Removing block devices for instance %s", instance.name)
9014 ports_to_release = set()
9015 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9016 for (idx, device) in enumerate(anno_disks):
9018 edata = [(target_node, device)]
9020 edata = device.ComputeNodeTree(instance.primary_node)
9021 for node, disk in edata:
9022 lu.cfg.SetDiskID(disk, node)
9023 result = lu.rpc.call_blockdev_remove(node, disk)
9025 lu.LogWarning("Could not remove disk %s on node %s,"
9026 " continuing anyway: %s", idx, node, result.fail_msg)
9027 if not (result.offline and node != instance.primary_node):
9030 # if this is a DRBD disk, return its port to the pool
9031 if device.dev_type in constants.LDS_DRBD:
9032 ports_to_release.add(device.logical_id[2])
9034 if all_result or ignore_failures:
9035 for port in ports_to_release:
9036 lu.cfg.AddTcpUdpPort(port)
9038 if instance.disk_template == constants.DT_FILE:
9039 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9043 tgt = instance.primary_node
9044 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9046 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9047 file_storage_dir, instance.primary_node, result.fail_msg)
9053 def _ComputeDiskSizePerVG(disk_template, disks):
9054 """Compute disk size requirements in the volume group
9057 def _compute(disks, payload):
9058 """Universal algorithm.
9063 vgs[disk[constants.IDISK_VG]] = \
9064 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9068 # Required free disk space as a function of disk and swap space
9070 constants.DT_DISKLESS: {},
9071 constants.DT_PLAIN: _compute(disks, 0),
9072 # 128 MB are added for drbd metadata for each disk
9073 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9074 constants.DT_FILE: {},
9075 constants.DT_SHARED_FILE: {},
9078 if disk_template not in req_size_dict:
9079 raise errors.ProgrammerError("Disk template '%s' size requirement"
9080 " is unknown" % disk_template)
9082 return req_size_dict[disk_template]
9085 def _ComputeDiskSize(disk_template, disks):
9086 """Compute disk size requirements in the volume group
9089 # Required free disk space as a function of disk and swap space
9091 constants.DT_DISKLESS: None,
9092 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9093 # 128 MB are added for drbd metadata for each disk
9095 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9096 constants.DT_FILE: None,
9097 constants.DT_SHARED_FILE: 0,
9098 constants.DT_BLOCK: 0,
9099 constants.DT_RBD: 0,
9102 if disk_template not in req_size_dict:
9103 raise errors.ProgrammerError("Disk template '%s' size requirement"
9104 " is unknown" % disk_template)
9106 return req_size_dict[disk_template]
9109 def _FilterVmNodes(lu, nodenames):
9110 """Filters out non-vm_capable nodes from a list.
9112 @type lu: L{LogicalUnit}
9113 @param lu: the logical unit for which we check
9114 @type nodenames: list
9115 @param nodenames: the list of nodes on which we should check
9117 @return: the list of vm-capable nodes
9120 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9121 return [name for name in nodenames if name not in vm_nodes]
9124 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9125 """Hypervisor parameter validation.
9127 This function abstract the hypervisor parameter validation to be
9128 used in both instance create and instance modify.
9130 @type lu: L{LogicalUnit}
9131 @param lu: the logical unit for which we check
9132 @type nodenames: list
9133 @param nodenames: the list of nodes on which we should check
9134 @type hvname: string
9135 @param hvname: the name of the hypervisor we should use
9136 @type hvparams: dict
9137 @param hvparams: the parameters which we need to check
9138 @raise errors.OpPrereqError: if the parameters are not valid
9141 nodenames = _FilterVmNodes(lu, nodenames)
9143 cluster = lu.cfg.GetClusterInfo()
9144 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9146 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9147 for node in nodenames:
9151 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9154 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9155 """OS parameters validation.
9157 @type lu: L{LogicalUnit}
9158 @param lu: the logical unit for which we check
9159 @type required: boolean
9160 @param required: whether the validation should fail if the OS is not
9162 @type nodenames: list
9163 @param nodenames: the list of nodes on which we should check
9164 @type osname: string
9165 @param osname: the name of the hypervisor we should use
9166 @type osparams: dict
9167 @param osparams: the parameters which we need to check
9168 @raise errors.OpPrereqError: if the parameters are not valid
9171 nodenames = _FilterVmNodes(lu, nodenames)
9172 result = lu.rpc.call_os_validate(nodenames, required, osname,
9173 [constants.OS_VALIDATE_PARAMETERS],
9175 for node, nres in result.items():
9176 # we don't check for offline cases since this should be run only
9177 # against the master node and/or an instance's nodes
9178 nres.Raise("OS Parameters validation failed on node %s" % node)
9179 if not nres.payload:
9180 lu.LogInfo("OS %s not found on node %s, validation skipped",
9184 class LUInstanceCreate(LogicalUnit):
9185 """Create an instance.
9188 HPATH = "instance-add"
9189 HTYPE = constants.HTYPE_INSTANCE
9192 def CheckArguments(self):
9196 # do not require name_check to ease forward/backward compatibility
9198 if self.op.no_install and self.op.start:
9199 self.LogInfo("No-installation mode selected, disabling startup")
9200 self.op.start = False
9201 # validate/normalize the instance name
9202 self.op.instance_name = \
9203 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9205 if self.op.ip_check and not self.op.name_check:
9206 # TODO: make the ip check more flexible and not depend on the name check
9207 raise errors.OpPrereqError("Cannot do IP address check without a name"
9208 " check", errors.ECODE_INVAL)
9210 # check nics' parameter names
9211 for nic in self.op.nics:
9212 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9214 # check disks. parameter names and consistent adopt/no-adopt strategy
9215 has_adopt = has_no_adopt = False
9216 for disk in self.op.disks:
9217 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9218 if constants.IDISK_ADOPT in disk:
9222 if has_adopt and has_no_adopt:
9223 raise errors.OpPrereqError("Either all disks are adopted or none is",
9226 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9227 raise errors.OpPrereqError("Disk adoption is not supported for the"
9228 " '%s' disk template" %
9229 self.op.disk_template,
9231 if self.op.iallocator is not None:
9232 raise errors.OpPrereqError("Disk adoption not allowed with an"
9233 " iallocator script", errors.ECODE_INVAL)
9234 if self.op.mode == constants.INSTANCE_IMPORT:
9235 raise errors.OpPrereqError("Disk adoption not allowed for"
9236 " instance import", errors.ECODE_INVAL)
9238 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9239 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9240 " but no 'adopt' parameter given" %
9241 self.op.disk_template,
9244 self.adopt_disks = has_adopt
9246 # instance name verification
9247 if self.op.name_check:
9248 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9249 self.op.instance_name = self.hostname1.name
9250 # used in CheckPrereq for ip ping check
9251 self.check_ip = self.hostname1.ip
9253 self.check_ip = None
9255 # file storage checks
9256 if (self.op.file_driver and
9257 not self.op.file_driver in constants.FILE_DRIVER):
9258 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9259 self.op.file_driver, errors.ECODE_INVAL)
9261 if self.op.disk_template == constants.DT_FILE:
9262 opcodes.RequireFileStorage()
9263 elif self.op.disk_template == constants.DT_SHARED_FILE:
9264 opcodes.RequireSharedFileStorage()
9266 ### Node/iallocator related checks
9267 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9269 if self.op.pnode is not None:
9270 if self.op.disk_template in constants.DTS_INT_MIRROR:
9271 if self.op.snode is None:
9272 raise errors.OpPrereqError("The networked disk templates need"
9273 " a mirror node", errors.ECODE_INVAL)
9275 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9277 self.op.snode = None
9279 self._cds = _GetClusterDomainSecret()
9281 if self.op.mode == constants.INSTANCE_IMPORT:
9282 # On import force_variant must be True, because if we forced it at
9283 # initial install, our only chance when importing it back is that it
9285 self.op.force_variant = True
9287 if self.op.no_install:
9288 self.LogInfo("No-installation mode has no effect during import")
9290 elif self.op.mode == constants.INSTANCE_CREATE:
9291 if self.op.os_type is None:
9292 raise errors.OpPrereqError("No guest OS specified",
9294 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9295 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9296 " installation" % self.op.os_type,
9298 if self.op.disk_template is None:
9299 raise errors.OpPrereqError("No disk template specified",
9302 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9303 # Check handshake to ensure both clusters have the same domain secret
9304 src_handshake = self.op.source_handshake
9305 if not src_handshake:
9306 raise errors.OpPrereqError("Missing source handshake",
9309 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9312 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9315 # Load and check source CA
9316 self.source_x509_ca_pem = self.op.source_x509_ca
9317 if not self.source_x509_ca_pem:
9318 raise errors.OpPrereqError("Missing source X509 CA",
9322 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9324 except OpenSSL.crypto.Error, err:
9325 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9326 (err, ), errors.ECODE_INVAL)
9328 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9329 if errcode is not None:
9330 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9333 self.source_x509_ca = cert
9335 src_instance_name = self.op.source_instance_name
9336 if not src_instance_name:
9337 raise errors.OpPrereqError("Missing source instance name",
9340 self.source_instance_name = \
9341 netutils.GetHostname(name=src_instance_name).name
9344 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9345 self.op.mode, errors.ECODE_INVAL)
9347 def ExpandNames(self):
9348 """ExpandNames for CreateInstance.
9350 Figure out the right locks for instance creation.
9353 self.needed_locks = {}
9355 instance_name = self.op.instance_name
9356 # this is just a preventive check, but someone might still add this
9357 # instance in the meantime, and creation will fail at lock-add time
9358 if instance_name in self.cfg.GetInstanceList():
9359 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9360 instance_name, errors.ECODE_EXISTS)
9362 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9364 if self.op.iallocator:
9365 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9366 # specifying a group on instance creation and then selecting nodes from
9368 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9369 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9371 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9372 nodelist = [self.op.pnode]
9373 if self.op.snode is not None:
9374 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9375 nodelist.append(self.op.snode)
9376 self.needed_locks[locking.LEVEL_NODE] = nodelist
9377 # Lock resources of instance's primary and secondary nodes (copy to
9378 # prevent accidential modification)
9379 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9381 # in case of import lock the source node too
9382 if self.op.mode == constants.INSTANCE_IMPORT:
9383 src_node = self.op.src_node
9384 src_path = self.op.src_path
9386 if src_path is None:
9387 self.op.src_path = src_path = self.op.instance_name
9389 if src_node is None:
9390 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9391 self.op.src_node = None
9392 if os.path.isabs(src_path):
9393 raise errors.OpPrereqError("Importing an instance from a path"
9394 " requires a source node option",
9397 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9398 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9399 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9400 if not os.path.isabs(src_path):
9401 self.op.src_path = src_path = \
9402 utils.PathJoin(constants.EXPORT_DIR, src_path)
9404 def _RunAllocator(self):
9405 """Run the allocator based on input opcode.
9408 nics = [n.ToDict() for n in self.nics]
9409 ial = IAllocator(self.cfg, self.rpc,
9410 mode=constants.IALLOCATOR_MODE_ALLOC,
9411 name=self.op.instance_name,
9412 disk_template=self.op.disk_template,
9415 vcpus=self.be_full[constants.BE_VCPUS],
9416 memory=self.be_full[constants.BE_MAXMEM],
9417 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9420 hypervisor=self.op.hypervisor,
9423 ial.Run(self.op.iallocator)
9426 raise errors.OpPrereqError("Can't compute nodes using"
9427 " iallocator '%s': %s" %
9428 (self.op.iallocator, ial.info),
9430 if len(ial.result) != ial.required_nodes:
9431 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9432 " of nodes (%s), required %s" %
9433 (self.op.iallocator, len(ial.result),
9434 ial.required_nodes), errors.ECODE_FAULT)
9435 self.op.pnode = ial.result[0]
9436 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9437 self.op.instance_name, self.op.iallocator,
9438 utils.CommaJoin(ial.result))
9439 if ial.required_nodes == 2:
9440 self.op.snode = ial.result[1]
9442 def BuildHooksEnv(self):
9445 This runs on master, primary and secondary nodes of the instance.
9449 "ADD_MODE": self.op.mode,
9451 if self.op.mode == constants.INSTANCE_IMPORT:
9452 env["SRC_NODE"] = self.op.src_node
9453 env["SRC_PATH"] = self.op.src_path
9454 env["SRC_IMAGES"] = self.src_images
9456 env.update(_BuildInstanceHookEnv(
9457 name=self.op.instance_name,
9458 primary_node=self.op.pnode,
9459 secondary_nodes=self.secondaries,
9460 status=self.op.start,
9461 os_type=self.op.os_type,
9462 minmem=self.be_full[constants.BE_MINMEM],
9463 maxmem=self.be_full[constants.BE_MAXMEM],
9464 vcpus=self.be_full[constants.BE_VCPUS],
9465 nics=_NICListToTuple(self, self.nics),
9466 disk_template=self.op.disk_template,
9467 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9468 for d in self.disks],
9471 hypervisor_name=self.op.hypervisor,
9477 def BuildHooksNodes(self):
9478 """Build hooks nodes.
9481 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9484 def _ReadExportInfo(self):
9485 """Reads the export information from disk.
9487 It will override the opcode source node and path with the actual
9488 information, if these two were not specified before.
9490 @return: the export information
9493 assert self.op.mode == constants.INSTANCE_IMPORT
9495 src_node = self.op.src_node
9496 src_path = self.op.src_path
9498 if src_node is None:
9499 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9500 exp_list = self.rpc.call_export_list(locked_nodes)
9502 for node in exp_list:
9503 if exp_list[node].fail_msg:
9505 if src_path in exp_list[node].payload:
9507 self.op.src_node = src_node = node
9508 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9512 raise errors.OpPrereqError("No export found for relative path %s" %
9513 src_path, errors.ECODE_INVAL)
9515 _CheckNodeOnline(self, src_node)
9516 result = self.rpc.call_export_info(src_node, src_path)
9517 result.Raise("No export or invalid export found in dir %s" % src_path)
9519 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9520 if not export_info.has_section(constants.INISECT_EXP):
9521 raise errors.ProgrammerError("Corrupted export config",
9522 errors.ECODE_ENVIRON)
9524 ei_version = export_info.get(constants.INISECT_EXP, "version")
9525 if (int(ei_version) != constants.EXPORT_VERSION):
9526 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9527 (ei_version, constants.EXPORT_VERSION),
9528 errors.ECODE_ENVIRON)
9531 def _ReadExportParams(self, einfo):
9532 """Use export parameters as defaults.
9534 In case the opcode doesn't specify (as in override) some instance
9535 parameters, then try to use them from the export information, if
9539 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9541 if self.op.disk_template is None:
9542 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9543 self.op.disk_template = einfo.get(constants.INISECT_INS,
9545 if self.op.disk_template not in constants.DISK_TEMPLATES:
9546 raise errors.OpPrereqError("Disk template specified in configuration"
9547 " file is not one of the allowed values:"
9548 " %s" % " ".join(constants.DISK_TEMPLATES))
9550 raise errors.OpPrereqError("No disk template specified and the export"
9551 " is missing the disk_template information",
9554 if not self.op.disks:
9556 # TODO: import the disk iv_name too
9557 for idx in range(constants.MAX_DISKS):
9558 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9559 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9560 disks.append({constants.IDISK_SIZE: disk_sz})
9561 self.op.disks = disks
9562 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9563 raise errors.OpPrereqError("No disk info specified and the export"
9564 " is missing the disk information",
9567 if not self.op.nics:
9569 for idx in range(constants.MAX_NICS):
9570 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9572 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9573 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9580 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9581 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9583 if (self.op.hypervisor is None and
9584 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9585 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9587 if einfo.has_section(constants.INISECT_HYP):
9588 # use the export parameters but do not override the ones
9589 # specified by the user
9590 for name, value in einfo.items(constants.INISECT_HYP):
9591 if name not in self.op.hvparams:
9592 self.op.hvparams[name] = value
9594 if einfo.has_section(constants.INISECT_BEP):
9595 # use the parameters, without overriding
9596 for name, value in einfo.items(constants.INISECT_BEP):
9597 if name not in self.op.beparams:
9598 self.op.beparams[name] = value
9599 # Compatibility for the old "memory" be param
9600 if name == constants.BE_MEMORY:
9601 if constants.BE_MAXMEM not in self.op.beparams:
9602 self.op.beparams[constants.BE_MAXMEM] = value
9603 if constants.BE_MINMEM not in self.op.beparams:
9604 self.op.beparams[constants.BE_MINMEM] = value
9606 # try to read the parameters old style, from the main section
9607 for name in constants.BES_PARAMETERS:
9608 if (name not in self.op.beparams and
9609 einfo.has_option(constants.INISECT_INS, name)):
9610 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9612 if einfo.has_section(constants.INISECT_OSP):
9613 # use the parameters, without overriding
9614 for name, value in einfo.items(constants.INISECT_OSP):
9615 if name not in self.op.osparams:
9616 self.op.osparams[name] = value
9618 def _RevertToDefaults(self, cluster):
9619 """Revert the instance parameters to the default values.
9623 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9624 for name in self.op.hvparams.keys():
9625 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9626 del self.op.hvparams[name]
9628 be_defs = cluster.SimpleFillBE({})
9629 for name in self.op.beparams.keys():
9630 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9631 del self.op.beparams[name]
9633 nic_defs = cluster.SimpleFillNIC({})
9634 for nic in self.op.nics:
9635 for name in constants.NICS_PARAMETERS:
9636 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9639 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9640 for name in self.op.osparams.keys():
9641 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9642 del self.op.osparams[name]
9644 def _CalculateFileStorageDir(self):
9645 """Calculate final instance file storage dir.
9648 # file storage dir calculation/check
9649 self.instance_file_storage_dir = None
9650 if self.op.disk_template in constants.DTS_FILEBASED:
9651 # build the full file storage dir path
9654 if self.op.disk_template == constants.DT_SHARED_FILE:
9655 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9657 get_fsd_fn = self.cfg.GetFileStorageDir
9659 cfg_storagedir = get_fsd_fn()
9660 if not cfg_storagedir:
9661 raise errors.OpPrereqError("Cluster file storage dir not defined")
9662 joinargs.append(cfg_storagedir)
9664 if self.op.file_storage_dir is not None:
9665 joinargs.append(self.op.file_storage_dir)
9667 joinargs.append(self.op.instance_name)
9669 # pylint: disable=W0142
9670 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9672 def CheckPrereq(self): # pylint: disable=R0914
9673 """Check prerequisites.
9676 self._CalculateFileStorageDir()
9678 if self.op.mode == constants.INSTANCE_IMPORT:
9679 export_info = self._ReadExportInfo()
9680 self._ReadExportParams(export_info)
9681 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9683 self._old_instance_name = None
9685 if (not self.cfg.GetVGName() and
9686 self.op.disk_template not in constants.DTS_NOT_LVM):
9687 raise errors.OpPrereqError("Cluster does not support lvm-based"
9688 " instances", errors.ECODE_STATE)
9690 if (self.op.hypervisor is None or
9691 self.op.hypervisor == constants.VALUE_AUTO):
9692 self.op.hypervisor = self.cfg.GetHypervisorType()
9694 cluster = self.cfg.GetClusterInfo()
9695 enabled_hvs = cluster.enabled_hypervisors
9696 if self.op.hypervisor not in enabled_hvs:
9697 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9698 " cluster (%s)" % (self.op.hypervisor,
9699 ",".join(enabled_hvs)),
9702 # Check tag validity
9703 for tag in self.op.tags:
9704 objects.TaggableObject.ValidateTag(tag)
9706 # check hypervisor parameter syntax (locally)
9707 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9708 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9710 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9711 hv_type.CheckParameterSyntax(filled_hvp)
9712 self.hv_full = filled_hvp
9713 # check that we don't specify global parameters on an instance
9714 _CheckGlobalHvParams(self.op.hvparams)
9716 # fill and remember the beparams dict
9717 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9718 for param, value in self.op.beparams.iteritems():
9719 if value == constants.VALUE_AUTO:
9720 self.op.beparams[param] = default_beparams[param]
9721 objects.UpgradeBeParams(self.op.beparams)
9722 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9723 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9725 # build os parameters
9726 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9728 # now that hvp/bep are in final format, let's reset to defaults,
9730 if self.op.identify_defaults:
9731 self._RevertToDefaults(cluster)
9735 for idx, nic in enumerate(self.op.nics):
9736 nic_mode_req = nic.get(constants.INIC_MODE, None)
9737 nic_mode = nic_mode_req
9738 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9739 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9741 # in routed mode, for the first nic, the default ip is 'auto'
9742 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9743 default_ip_mode = constants.VALUE_AUTO
9745 default_ip_mode = constants.VALUE_NONE
9747 # ip validity checks
9748 ip = nic.get(constants.INIC_IP, default_ip_mode)
9749 if ip is None or ip.lower() == constants.VALUE_NONE:
9751 elif ip.lower() == constants.VALUE_AUTO:
9752 if not self.op.name_check:
9753 raise errors.OpPrereqError("IP address set to auto but name checks"
9754 " have been skipped",
9756 nic_ip = self.hostname1.ip
9758 if not netutils.IPAddress.IsValid(ip):
9759 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9763 # TODO: check the ip address for uniqueness
9764 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9765 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9768 # MAC address verification
9769 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9770 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9771 mac = utils.NormalizeAndValidateMac(mac)
9774 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9775 except errors.ReservationError:
9776 raise errors.OpPrereqError("MAC address %s already in use"
9777 " in cluster" % mac,
9778 errors.ECODE_NOTUNIQUE)
9780 # Build nic parameters
9781 link = nic.get(constants.INIC_LINK, None)
9782 if link == constants.VALUE_AUTO:
9783 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9786 nicparams[constants.NIC_MODE] = nic_mode
9788 nicparams[constants.NIC_LINK] = link
9790 check_params = cluster.SimpleFillNIC(nicparams)
9791 objects.NIC.CheckParameterSyntax(check_params)
9792 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9794 # disk checks/pre-build
9795 default_vg = self.cfg.GetVGName()
9797 for disk in self.op.disks:
9798 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9799 if mode not in constants.DISK_ACCESS_SET:
9800 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9801 mode, errors.ECODE_INVAL)
9802 size = disk.get(constants.IDISK_SIZE, None)
9804 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9807 except (TypeError, ValueError):
9808 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9811 data_vg = disk.get(constants.IDISK_VG, default_vg)
9813 constants.IDISK_SIZE: size,
9814 constants.IDISK_MODE: mode,
9815 constants.IDISK_VG: data_vg,
9817 if constants.IDISK_METAVG in disk:
9818 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9819 if constants.IDISK_ADOPT in disk:
9820 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9821 self.disks.append(new_disk)
9823 if self.op.mode == constants.INSTANCE_IMPORT:
9825 for idx in range(len(self.disks)):
9826 option = "disk%d_dump" % idx
9827 if export_info.has_option(constants.INISECT_INS, option):
9828 # FIXME: are the old os-es, disk sizes, etc. useful?
9829 export_name = export_info.get(constants.INISECT_INS, option)
9830 image = utils.PathJoin(self.op.src_path, export_name)
9831 disk_images.append(image)
9833 disk_images.append(False)
9835 self.src_images = disk_images
9837 if self.op.instance_name == self._old_instance_name:
9838 for idx, nic in enumerate(self.nics):
9839 if nic.mac == constants.VALUE_AUTO:
9840 nic_mac_ini = "nic%d_mac" % idx
9841 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9843 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9845 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9846 if self.op.ip_check:
9847 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9848 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9849 (self.check_ip, self.op.instance_name),
9850 errors.ECODE_NOTUNIQUE)
9852 #### mac address generation
9853 # By generating here the mac address both the allocator and the hooks get
9854 # the real final mac address rather than the 'auto' or 'generate' value.
9855 # There is a race condition between the generation and the instance object
9856 # creation, which means that we know the mac is valid now, but we're not
9857 # sure it will be when we actually add the instance. If things go bad
9858 # adding the instance will abort because of a duplicate mac, and the
9859 # creation job will fail.
9860 for nic in self.nics:
9861 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9862 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9866 if self.op.iallocator is not None:
9867 self._RunAllocator()
9869 # Release all unneeded node locks
9870 _ReleaseLocks(self, locking.LEVEL_NODE,
9871 keep=filter(None, [self.op.pnode, self.op.snode,
9873 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9874 keep=filter(None, [self.op.pnode, self.op.snode,
9877 #### node related checks
9879 # check primary node
9880 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9881 assert self.pnode is not None, \
9882 "Cannot retrieve locked node %s" % self.op.pnode
9884 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9885 pnode.name, errors.ECODE_STATE)
9887 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9888 pnode.name, errors.ECODE_STATE)
9889 if not pnode.vm_capable:
9890 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9891 " '%s'" % pnode.name, errors.ECODE_STATE)
9893 self.secondaries = []
9895 # mirror node verification
9896 if self.op.disk_template in constants.DTS_INT_MIRROR:
9897 if self.op.snode == pnode.name:
9898 raise errors.OpPrereqError("The secondary node cannot be the"
9899 " primary node", errors.ECODE_INVAL)
9900 _CheckNodeOnline(self, self.op.snode)
9901 _CheckNodeNotDrained(self, self.op.snode)
9902 _CheckNodeVmCapable(self, self.op.snode)
9903 self.secondaries.append(self.op.snode)
9905 snode = self.cfg.GetNodeInfo(self.op.snode)
9906 if pnode.group != snode.group:
9907 self.LogWarning("The primary and secondary nodes are in two"
9908 " different node groups; the disk parameters"
9909 " from the first disk's node group will be"
9912 nodenames = [pnode.name] + self.secondaries
9914 # Verify instance specs
9915 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9917 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9918 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9919 constants.ISPEC_DISK_COUNT: len(self.disks),
9920 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9921 constants.ISPEC_NIC_COUNT: len(self.nics),
9922 constants.ISPEC_SPINDLE_USE: spindle_use,
9925 group_info = self.cfg.GetNodeGroup(pnode.group)
9926 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9927 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9928 if not self.op.ignore_ipolicy and res:
9929 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9930 " policy: %s") % (pnode.group,
9931 utils.CommaJoin(res)),
9934 if not self.adopt_disks:
9935 if self.op.disk_template == constants.DT_RBD:
9936 # _CheckRADOSFreeSpace() is just a placeholder.
9937 # Any function that checks prerequisites can be placed here.
9938 # Check if there is enough space on the RADOS cluster.
9939 _CheckRADOSFreeSpace()
9941 # Check lv size requirements, if not adopting
9942 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9943 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9945 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9946 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9947 disk[constants.IDISK_ADOPT])
9948 for disk in self.disks])
9949 if len(all_lvs) != len(self.disks):
9950 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9952 for lv_name in all_lvs:
9954 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9955 # to ReserveLV uses the same syntax
9956 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9957 except errors.ReservationError:
9958 raise errors.OpPrereqError("LV named %s used by another instance" %
9959 lv_name, errors.ECODE_NOTUNIQUE)
9961 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9962 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9964 node_lvs = self.rpc.call_lv_list([pnode.name],
9965 vg_names.payload.keys())[pnode.name]
9966 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9967 node_lvs = node_lvs.payload
9969 delta = all_lvs.difference(node_lvs.keys())
9971 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9972 utils.CommaJoin(delta),
9974 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9976 raise errors.OpPrereqError("Online logical volumes found, cannot"
9977 " adopt: %s" % utils.CommaJoin(online_lvs),
9979 # update the size of disk based on what is found
9980 for dsk in self.disks:
9981 dsk[constants.IDISK_SIZE] = \
9982 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9983 dsk[constants.IDISK_ADOPT])][0]))
9985 elif self.op.disk_template == constants.DT_BLOCK:
9986 # Normalize and de-duplicate device paths
9987 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9988 for disk in self.disks])
9989 if len(all_disks) != len(self.disks):
9990 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9992 baddisks = [d for d in all_disks
9993 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9995 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9996 " cannot be adopted" %
9997 (", ".join(baddisks),
9998 constants.ADOPTABLE_BLOCKDEV_ROOT),
10001 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10002 list(all_disks))[pnode.name]
10003 node_disks.Raise("Cannot get block device information from node %s" %
10005 node_disks = node_disks.payload
10006 delta = all_disks.difference(node_disks.keys())
10008 raise errors.OpPrereqError("Missing block device(s): %s" %
10009 utils.CommaJoin(delta),
10010 errors.ECODE_INVAL)
10011 for dsk in self.disks:
10012 dsk[constants.IDISK_SIZE] = \
10013 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10015 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10017 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10018 # check OS parameters (remotely)
10019 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10021 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10023 # memory check on primary node
10024 #TODO(dynmem): use MINMEM for checking
10026 _CheckNodeFreeMemory(self, self.pnode.name,
10027 "creating instance %s" % self.op.instance_name,
10028 self.be_full[constants.BE_MAXMEM],
10029 self.op.hypervisor)
10031 self.dry_run_result = list(nodenames)
10033 def Exec(self, feedback_fn):
10034 """Create and add the instance to the cluster.
10037 instance = self.op.instance_name
10038 pnode_name = self.pnode.name
10040 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10041 self.owned_locks(locking.LEVEL_NODE)), \
10042 "Node locks differ from node resource locks"
10044 ht_kind = self.op.hypervisor
10045 if ht_kind in constants.HTS_REQ_PORT:
10046 network_port = self.cfg.AllocatePort()
10048 network_port = None
10050 # This is ugly but we got a chicken-egg problem here
10051 # We can only take the group disk parameters, as the instance
10052 # has no disks yet (we are generating them right here).
10053 node = self.cfg.GetNodeInfo(pnode_name)
10054 nodegroup = self.cfg.GetNodeGroup(node.group)
10055 disks = _GenerateDiskTemplate(self,
10056 self.op.disk_template,
10057 instance, pnode_name,
10060 self.instance_file_storage_dir,
10061 self.op.file_driver,
10064 self.cfg.GetGroupDiskParams(nodegroup))
10066 iobj = objects.Instance(name=instance, os=self.op.os_type,
10067 primary_node=pnode_name,
10068 nics=self.nics, disks=disks,
10069 disk_template=self.op.disk_template,
10070 admin_state=constants.ADMINST_DOWN,
10071 network_port=network_port,
10072 beparams=self.op.beparams,
10073 hvparams=self.op.hvparams,
10074 hypervisor=self.op.hypervisor,
10075 osparams=self.op.osparams,
10079 for tag in self.op.tags:
10082 if self.adopt_disks:
10083 if self.op.disk_template == constants.DT_PLAIN:
10084 # rename LVs to the newly-generated names; we need to construct
10085 # 'fake' LV disks with the old data, plus the new unique_id
10086 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10088 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10089 rename_to.append(t_dsk.logical_id)
10090 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10091 self.cfg.SetDiskID(t_dsk, pnode_name)
10092 result = self.rpc.call_blockdev_rename(pnode_name,
10093 zip(tmp_disks, rename_to))
10094 result.Raise("Failed to rename adoped LVs")
10096 feedback_fn("* creating instance disks...")
10098 _CreateDisks(self, iobj)
10099 except errors.OpExecError:
10100 self.LogWarning("Device creation failed, reverting...")
10102 _RemoveDisks(self, iobj)
10104 self.cfg.ReleaseDRBDMinors(instance)
10107 feedback_fn("adding instance %s to cluster config" % instance)
10109 self.cfg.AddInstance(iobj, self.proc.GetECId())
10111 # Declare that we don't want to remove the instance lock anymore, as we've
10112 # added the instance to the config
10113 del self.remove_locks[locking.LEVEL_INSTANCE]
10115 if self.op.mode == constants.INSTANCE_IMPORT:
10116 # Release unused nodes
10117 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10119 # Release all nodes
10120 _ReleaseLocks(self, locking.LEVEL_NODE)
10123 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10124 feedback_fn("* wiping instance disks...")
10126 _WipeDisks(self, iobj)
10127 except errors.OpExecError, err:
10128 logging.exception("Wiping disks failed")
10129 self.LogWarning("Wiping instance disks failed (%s)", err)
10133 # Something is already wrong with the disks, don't do anything else
10135 elif self.op.wait_for_sync:
10136 disk_abort = not _WaitForSync(self, iobj)
10137 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10138 # make sure the disks are not degraded (still sync-ing is ok)
10139 feedback_fn("* checking mirrors status")
10140 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10145 _RemoveDisks(self, iobj)
10146 self.cfg.RemoveInstance(iobj.name)
10147 # Make sure the instance lock gets removed
10148 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10149 raise errors.OpExecError("There are some degraded disks for"
10152 # Release all node resource locks
10153 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10155 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10156 # we need to set the disks ID to the primary node, since the
10157 # preceding code might or might have not done it, depending on
10158 # disk template and other options
10159 for disk in iobj.disks:
10160 self.cfg.SetDiskID(disk, pnode_name)
10161 if self.op.mode == constants.INSTANCE_CREATE:
10162 if not self.op.no_install:
10163 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10164 not self.op.wait_for_sync)
10166 feedback_fn("* pausing disk sync to install instance OS")
10167 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10170 for idx, success in enumerate(result.payload):
10172 logging.warn("pause-sync of instance %s for disk %d failed",
10175 feedback_fn("* running the instance OS create scripts...")
10176 # FIXME: pass debug option from opcode to backend
10178 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10179 self.op.debug_level)
10181 feedback_fn("* resuming disk sync")
10182 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10185 for idx, success in enumerate(result.payload):
10187 logging.warn("resume-sync of instance %s for disk %d failed",
10190 os_add_result.Raise("Could not add os for instance %s"
10191 " on node %s" % (instance, pnode_name))
10194 if self.op.mode == constants.INSTANCE_IMPORT:
10195 feedback_fn("* running the instance OS import scripts...")
10199 for idx, image in enumerate(self.src_images):
10203 # FIXME: pass debug option from opcode to backend
10204 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10205 constants.IEIO_FILE, (image, ),
10206 constants.IEIO_SCRIPT,
10207 (iobj.disks[idx], idx),
10209 transfers.append(dt)
10212 masterd.instance.TransferInstanceData(self, feedback_fn,
10213 self.op.src_node, pnode_name,
10214 self.pnode.secondary_ip,
10216 if not compat.all(import_result):
10217 self.LogWarning("Some disks for instance %s on node %s were not"
10218 " imported successfully" % (instance, pnode_name))
10220 rename_from = self._old_instance_name
10222 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10223 feedback_fn("* preparing remote import...")
10224 # The source cluster will stop the instance before attempting to make
10225 # a connection. In some cases stopping an instance can take a long
10226 # time, hence the shutdown timeout is added to the connection
10228 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10229 self.op.source_shutdown_timeout)
10230 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10232 assert iobj.primary_node == self.pnode.name
10234 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10235 self.source_x509_ca,
10236 self._cds, timeouts)
10237 if not compat.all(disk_results):
10238 # TODO: Should the instance still be started, even if some disks
10239 # failed to import (valid for local imports, too)?
10240 self.LogWarning("Some disks for instance %s on node %s were not"
10241 " imported successfully" % (instance, pnode_name))
10243 rename_from = self.source_instance_name
10246 # also checked in the prereq part
10247 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10250 # Run rename script on newly imported instance
10251 assert iobj.name == instance
10252 feedback_fn("Running rename script for %s" % instance)
10253 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10255 self.op.debug_level)
10256 if result.fail_msg:
10257 self.LogWarning("Failed to run rename script for %s on node"
10258 " %s: %s" % (instance, pnode_name, result.fail_msg))
10260 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10263 iobj.admin_state = constants.ADMINST_UP
10264 self.cfg.Update(iobj, feedback_fn)
10265 logging.info("Starting instance %s on node %s", instance, pnode_name)
10266 feedback_fn("* starting instance...")
10267 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10269 result.Raise("Could not start instance")
10271 return list(iobj.all_nodes)
10274 def _CheckRADOSFreeSpace():
10275 """Compute disk size requirements inside the RADOS cluster.
10278 # For the RADOS cluster we assume there is always enough space.
10282 class LUInstanceConsole(NoHooksLU):
10283 """Connect to an instance's console.
10285 This is somewhat special in that it returns the command line that
10286 you need to run on the master node in order to connect to the
10292 def ExpandNames(self):
10293 self.share_locks = _ShareAll()
10294 self._ExpandAndLockInstance()
10296 def CheckPrereq(self):
10297 """Check prerequisites.
10299 This checks that the instance is in the cluster.
10302 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10303 assert self.instance is not None, \
10304 "Cannot retrieve locked instance %s" % self.op.instance_name
10305 _CheckNodeOnline(self, self.instance.primary_node)
10307 def Exec(self, feedback_fn):
10308 """Connect to the console of an instance
10311 instance = self.instance
10312 node = instance.primary_node
10314 node_insts = self.rpc.call_instance_list([node],
10315 [instance.hypervisor])[node]
10316 node_insts.Raise("Can't get node information from %s" % node)
10318 if instance.name not in node_insts.payload:
10319 if instance.admin_state == constants.ADMINST_UP:
10320 state = constants.INSTST_ERRORDOWN
10321 elif instance.admin_state == constants.ADMINST_DOWN:
10322 state = constants.INSTST_ADMINDOWN
10324 state = constants.INSTST_ADMINOFFLINE
10325 raise errors.OpExecError("Instance %s is not running (state %s)" %
10326 (instance.name, state))
10328 logging.debug("Connecting to console of %s on %s", instance.name, node)
10330 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10333 def _GetInstanceConsole(cluster, instance):
10334 """Returns console information for an instance.
10336 @type cluster: L{objects.Cluster}
10337 @type instance: L{objects.Instance}
10341 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10342 # beparams and hvparams are passed separately, to avoid editing the
10343 # instance and then saving the defaults in the instance itself.
10344 hvparams = cluster.FillHV(instance)
10345 beparams = cluster.FillBE(instance)
10346 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10348 assert console.instance == instance.name
10349 assert console.Validate()
10351 return console.ToDict()
10354 class LUInstanceReplaceDisks(LogicalUnit):
10355 """Replace the disks of an instance.
10358 HPATH = "mirrors-replace"
10359 HTYPE = constants.HTYPE_INSTANCE
10362 def CheckArguments(self):
10363 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10364 self.op.iallocator)
10366 def ExpandNames(self):
10367 self._ExpandAndLockInstance()
10369 assert locking.LEVEL_NODE not in self.needed_locks
10370 assert locking.LEVEL_NODE_RES not in self.needed_locks
10371 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10373 assert self.op.iallocator is None or self.op.remote_node is None, \
10374 "Conflicting options"
10376 if self.op.remote_node is not None:
10377 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10379 # Warning: do not remove the locking of the new secondary here
10380 # unless DRBD8.AddChildren is changed to work in parallel;
10381 # currently it doesn't since parallel invocations of
10382 # FindUnusedMinor will conflict
10383 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10384 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10386 self.needed_locks[locking.LEVEL_NODE] = []
10387 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10389 if self.op.iallocator is not None:
10390 # iallocator will select a new node in the same group
10391 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10393 self.needed_locks[locking.LEVEL_NODE_RES] = []
10395 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10396 self.op.iallocator, self.op.remote_node,
10397 self.op.disks, False, self.op.early_release,
10398 self.op.ignore_ipolicy)
10400 self.tasklets = [self.replacer]
10402 def DeclareLocks(self, level):
10403 if level == locking.LEVEL_NODEGROUP:
10404 assert self.op.remote_node is None
10405 assert self.op.iallocator is not None
10406 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10408 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10409 # Lock all groups used by instance optimistically; this requires going
10410 # via the node before it's locked, requiring verification later on
10411 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10412 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10414 elif level == locking.LEVEL_NODE:
10415 if self.op.iallocator is not None:
10416 assert self.op.remote_node is None
10417 assert not self.needed_locks[locking.LEVEL_NODE]
10419 # Lock member nodes of all locked groups
10420 self.needed_locks[locking.LEVEL_NODE] = [node_name
10421 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10422 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10424 self._LockInstancesNodes()
10425 elif level == locking.LEVEL_NODE_RES:
10427 self.needed_locks[locking.LEVEL_NODE_RES] = \
10428 self.needed_locks[locking.LEVEL_NODE]
10430 def BuildHooksEnv(self):
10431 """Build hooks env.
10433 This runs on the master, the primary and all the secondaries.
10436 instance = self.replacer.instance
10438 "MODE": self.op.mode,
10439 "NEW_SECONDARY": self.op.remote_node,
10440 "OLD_SECONDARY": instance.secondary_nodes[0],
10442 env.update(_BuildInstanceHookEnvByObject(self, instance))
10445 def BuildHooksNodes(self):
10446 """Build hooks nodes.
10449 instance = self.replacer.instance
10451 self.cfg.GetMasterNode(),
10452 instance.primary_node,
10454 if self.op.remote_node is not None:
10455 nl.append(self.op.remote_node)
10458 def CheckPrereq(self):
10459 """Check prerequisites.
10462 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10463 self.op.iallocator is None)
10465 # Verify if node group locks are still correct
10466 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10468 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10470 return LogicalUnit.CheckPrereq(self)
10473 class TLReplaceDisks(Tasklet):
10474 """Replaces disks for an instance.
10476 Note: Locking is not within the scope of this class.
10479 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10480 disks, delay_iallocator, early_release, ignore_ipolicy):
10481 """Initializes this class.
10484 Tasklet.__init__(self, lu)
10487 self.instance_name = instance_name
10489 self.iallocator_name = iallocator_name
10490 self.remote_node = remote_node
10492 self.delay_iallocator = delay_iallocator
10493 self.early_release = early_release
10494 self.ignore_ipolicy = ignore_ipolicy
10497 self.instance = None
10498 self.new_node = None
10499 self.target_node = None
10500 self.other_node = None
10501 self.remote_node_info = None
10502 self.node_secondary_ip = None
10505 def CheckArguments(mode, remote_node, iallocator):
10506 """Helper function for users of this class.
10509 # check for valid parameter combination
10510 if mode == constants.REPLACE_DISK_CHG:
10511 if remote_node is None and iallocator is None:
10512 raise errors.OpPrereqError("When changing the secondary either an"
10513 " iallocator script must be used or the"
10514 " new node given", errors.ECODE_INVAL)
10516 if remote_node is not None and iallocator is not None:
10517 raise errors.OpPrereqError("Give either the iallocator or the new"
10518 " secondary, not both", errors.ECODE_INVAL)
10520 elif remote_node is not None or iallocator is not None:
10521 # Not replacing the secondary
10522 raise errors.OpPrereqError("The iallocator and new node options can"
10523 " only be used when changing the"
10524 " secondary node", errors.ECODE_INVAL)
10527 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10528 """Compute a new secondary node using an IAllocator.
10531 ial = IAllocator(lu.cfg, lu.rpc,
10532 mode=constants.IALLOCATOR_MODE_RELOC,
10533 name=instance_name,
10534 relocate_from=list(relocate_from))
10536 ial.Run(iallocator_name)
10538 if not ial.success:
10539 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10540 " %s" % (iallocator_name, ial.info),
10541 errors.ECODE_NORES)
10543 if len(ial.result) != ial.required_nodes:
10544 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10545 " of nodes (%s), required %s" %
10547 len(ial.result), ial.required_nodes),
10548 errors.ECODE_FAULT)
10550 remote_node_name = ial.result[0]
10552 lu.LogInfo("Selected new secondary for instance '%s': %s",
10553 instance_name, remote_node_name)
10555 return remote_node_name
10557 def _FindFaultyDisks(self, node_name):
10558 """Wrapper for L{_FindFaultyInstanceDisks}.
10561 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10564 def _CheckDisksActivated(self, instance):
10565 """Checks if the instance disks are activated.
10567 @param instance: The instance to check disks
10568 @return: True if they are activated, False otherwise
10571 nodes = instance.all_nodes
10573 for idx, dev in enumerate(instance.disks):
10575 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10576 self.cfg.SetDiskID(dev, node)
10578 result = _BlockdevFind(self, node, dev, instance)
10582 elif result.fail_msg or not result.payload:
10587 def CheckPrereq(self):
10588 """Check prerequisites.
10590 This checks that the instance is in the cluster.
10593 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10594 assert instance is not None, \
10595 "Cannot retrieve locked instance %s" % self.instance_name
10597 if instance.disk_template != constants.DT_DRBD8:
10598 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10599 " instances", errors.ECODE_INVAL)
10601 if len(instance.secondary_nodes) != 1:
10602 raise errors.OpPrereqError("The instance has a strange layout,"
10603 " expected one secondary but found %d" %
10604 len(instance.secondary_nodes),
10605 errors.ECODE_FAULT)
10607 if not self.delay_iallocator:
10608 self._CheckPrereq2()
10610 def _CheckPrereq2(self):
10611 """Check prerequisites, second part.
10613 This function should always be part of CheckPrereq. It was separated and is
10614 now called from Exec because during node evacuation iallocator was only
10615 called with an unmodified cluster model, not taking planned changes into
10619 instance = self.instance
10620 secondary_node = instance.secondary_nodes[0]
10622 if self.iallocator_name is None:
10623 remote_node = self.remote_node
10625 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10626 instance.name, instance.secondary_nodes)
10628 if remote_node is None:
10629 self.remote_node_info = None
10631 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10632 "Remote node '%s' is not locked" % remote_node
10634 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10635 assert self.remote_node_info is not None, \
10636 "Cannot retrieve locked node %s" % remote_node
10638 if remote_node == self.instance.primary_node:
10639 raise errors.OpPrereqError("The specified node is the primary node of"
10640 " the instance", errors.ECODE_INVAL)
10642 if remote_node == secondary_node:
10643 raise errors.OpPrereqError("The specified node is already the"
10644 " secondary node of the instance",
10645 errors.ECODE_INVAL)
10647 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10648 constants.REPLACE_DISK_CHG):
10649 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10650 errors.ECODE_INVAL)
10652 if self.mode == constants.REPLACE_DISK_AUTO:
10653 if not self._CheckDisksActivated(instance):
10654 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10655 " first" % self.instance_name,
10656 errors.ECODE_STATE)
10657 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10658 faulty_secondary = self._FindFaultyDisks(secondary_node)
10660 if faulty_primary and faulty_secondary:
10661 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10662 " one node and can not be repaired"
10663 " automatically" % self.instance_name,
10664 errors.ECODE_STATE)
10667 self.disks = faulty_primary
10668 self.target_node = instance.primary_node
10669 self.other_node = secondary_node
10670 check_nodes = [self.target_node, self.other_node]
10671 elif faulty_secondary:
10672 self.disks = faulty_secondary
10673 self.target_node = secondary_node
10674 self.other_node = instance.primary_node
10675 check_nodes = [self.target_node, self.other_node]
10681 # Non-automatic modes
10682 if self.mode == constants.REPLACE_DISK_PRI:
10683 self.target_node = instance.primary_node
10684 self.other_node = secondary_node
10685 check_nodes = [self.target_node, self.other_node]
10687 elif self.mode == constants.REPLACE_DISK_SEC:
10688 self.target_node = secondary_node
10689 self.other_node = instance.primary_node
10690 check_nodes = [self.target_node, self.other_node]
10692 elif self.mode == constants.REPLACE_DISK_CHG:
10693 self.new_node = remote_node
10694 self.other_node = instance.primary_node
10695 self.target_node = secondary_node
10696 check_nodes = [self.new_node, self.other_node]
10698 _CheckNodeNotDrained(self.lu, remote_node)
10699 _CheckNodeVmCapable(self.lu, remote_node)
10701 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10702 assert old_node_info is not None
10703 if old_node_info.offline and not self.early_release:
10704 # doesn't make sense to delay the release
10705 self.early_release = True
10706 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10707 " early-release mode", secondary_node)
10710 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10713 # If not specified all disks should be replaced
10715 self.disks = range(len(self.instance.disks))
10717 # TODO: This is ugly, but right now we can't distinguish between internal
10718 # submitted opcode and external one. We should fix that.
10719 if self.remote_node_info:
10720 # We change the node, lets verify it still meets instance policy
10721 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10722 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10724 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10725 ignore=self.ignore_ipolicy)
10727 for node in check_nodes:
10728 _CheckNodeOnline(self.lu, node)
10730 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10733 if node_name is not None)
10735 # Release unneeded node and node resource locks
10736 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10737 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10739 # Release any owned node group
10740 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10741 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10743 # Check whether disks are valid
10744 for disk_idx in self.disks:
10745 instance.FindDisk(disk_idx)
10747 # Get secondary node IP addresses
10748 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10749 in self.cfg.GetMultiNodeInfo(touched_nodes))
10751 def Exec(self, feedback_fn):
10752 """Execute disk replacement.
10754 This dispatches the disk replacement to the appropriate handler.
10757 if self.delay_iallocator:
10758 self._CheckPrereq2()
10761 # Verify owned locks before starting operation
10762 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10763 assert set(owned_nodes) == set(self.node_secondary_ip), \
10764 ("Incorrect node locks, owning %s, expected %s" %
10765 (owned_nodes, self.node_secondary_ip.keys()))
10766 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10767 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10769 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10770 assert list(owned_instances) == [self.instance_name], \
10771 "Instance '%s' not locked" % self.instance_name
10773 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10774 "Should not own any node group lock at this point"
10777 feedback_fn("No disks need replacement")
10780 feedback_fn("Replacing disk(s) %s for %s" %
10781 (utils.CommaJoin(self.disks), self.instance.name))
10783 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10785 # Activate the instance disks if we're replacing them on a down instance
10787 _StartInstanceDisks(self.lu, self.instance, True)
10790 # Should we replace the secondary node?
10791 if self.new_node is not None:
10792 fn = self._ExecDrbd8Secondary
10794 fn = self._ExecDrbd8DiskOnly
10796 result = fn(feedback_fn)
10798 # Deactivate the instance disks if we're replacing them on a
10801 _SafeShutdownInstanceDisks(self.lu, self.instance)
10803 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10806 # Verify owned locks
10807 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10808 nodes = frozenset(self.node_secondary_ip)
10809 assert ((self.early_release and not owned_nodes) or
10810 (not self.early_release and not (set(owned_nodes) - nodes))), \
10811 ("Not owning the correct locks, early_release=%s, owned=%r,"
10812 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10816 def _CheckVolumeGroup(self, nodes):
10817 self.lu.LogInfo("Checking volume groups")
10819 vgname = self.cfg.GetVGName()
10821 # Make sure volume group exists on all involved nodes
10822 results = self.rpc.call_vg_list(nodes)
10824 raise errors.OpExecError("Can't list volume groups on the nodes")
10827 res = results[node]
10828 res.Raise("Error checking node %s" % node)
10829 if vgname not in res.payload:
10830 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10833 def _CheckDisksExistence(self, nodes):
10834 # Check disk existence
10835 for idx, dev in enumerate(self.instance.disks):
10836 if idx not in self.disks:
10840 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10841 self.cfg.SetDiskID(dev, node)
10843 result = _BlockdevFind(self, node, dev, self.instance)
10845 msg = result.fail_msg
10846 if msg or not result.payload:
10848 msg = "disk not found"
10849 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10852 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10853 for idx, dev in enumerate(self.instance.disks):
10854 if idx not in self.disks:
10857 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10860 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10861 on_primary, ldisk=ldisk):
10862 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10863 " replace disks for instance %s" %
10864 (node_name, self.instance.name))
10866 def _CreateNewStorage(self, node_name):
10867 """Create new storage on the primary or secondary node.
10869 This is only used for same-node replaces, not for changing the
10870 secondary node, hence we don't want to modify the existing disk.
10875 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10876 for idx, dev in enumerate(disks):
10877 if idx not in self.disks:
10880 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10882 self.cfg.SetDiskID(dev, node_name)
10884 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10885 names = _GenerateUniqueNames(self.lu, lv_names)
10887 (data_disk, meta_disk) = dev.children
10888 vg_data = data_disk.logical_id[0]
10889 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10890 logical_id=(vg_data, names[0]),
10891 params=data_disk.params)
10892 vg_meta = meta_disk.logical_id[0]
10893 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10894 logical_id=(vg_meta, names[1]),
10895 params=meta_disk.params)
10897 new_lvs = [lv_data, lv_meta]
10898 old_lvs = [child.Copy() for child in dev.children]
10899 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10901 # we pass force_create=True to force the LVM creation
10902 for new_lv in new_lvs:
10903 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10904 _GetInstanceInfoText(self.instance), False)
10908 def _CheckDevices(self, node_name, iv_names):
10909 for name, (dev, _, _) in iv_names.iteritems():
10910 self.cfg.SetDiskID(dev, node_name)
10912 result = _BlockdevFind(self, node_name, dev, self.instance)
10914 msg = result.fail_msg
10915 if msg or not result.payload:
10917 msg = "disk not found"
10918 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10921 if result.payload.is_degraded:
10922 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10924 def _RemoveOldStorage(self, node_name, iv_names):
10925 for name, (_, old_lvs, _) in iv_names.iteritems():
10926 self.lu.LogInfo("Remove logical volumes for %s" % name)
10929 self.cfg.SetDiskID(lv, node_name)
10931 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10933 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10934 hint="remove unused LVs manually")
10936 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10937 """Replace a disk on the primary or secondary for DRBD 8.
10939 The algorithm for replace is quite complicated:
10941 1. for each disk to be replaced:
10943 1. create new LVs on the target node with unique names
10944 1. detach old LVs from the drbd device
10945 1. rename old LVs to name_replaced.<time_t>
10946 1. rename new LVs to old LVs
10947 1. attach the new LVs (with the old names now) to the drbd device
10949 1. wait for sync across all devices
10951 1. for each modified disk:
10953 1. remove old LVs (which have the name name_replaces.<time_t>)
10955 Failures are not very well handled.
10960 # Step: check device activation
10961 self.lu.LogStep(1, steps_total, "Check device existence")
10962 self._CheckDisksExistence([self.other_node, self.target_node])
10963 self._CheckVolumeGroup([self.target_node, self.other_node])
10965 # Step: check other node consistency
10966 self.lu.LogStep(2, steps_total, "Check peer consistency")
10967 self._CheckDisksConsistency(self.other_node,
10968 self.other_node == self.instance.primary_node,
10971 # Step: create new storage
10972 self.lu.LogStep(3, steps_total, "Allocate new storage")
10973 iv_names = self._CreateNewStorage(self.target_node)
10975 # Step: for each lv, detach+rename*2+attach
10976 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10977 for dev, old_lvs, new_lvs in iv_names.itervalues():
10978 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10980 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10982 result.Raise("Can't detach drbd from local storage on node"
10983 " %s for device %s" % (self.target_node, dev.iv_name))
10985 #cfg.Update(instance)
10987 # ok, we created the new LVs, so now we know we have the needed
10988 # storage; as such, we proceed on the target node to rename
10989 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10990 # using the assumption that logical_id == physical_id (which in
10991 # turn is the unique_id on that node)
10993 # FIXME(iustin): use a better name for the replaced LVs
10994 temp_suffix = int(time.time())
10995 ren_fn = lambda d, suff: (d.physical_id[0],
10996 d.physical_id[1] + "_replaced-%s" % suff)
10998 # Build the rename list based on what LVs exist on the node
10999 rename_old_to_new = []
11000 for to_ren in old_lvs:
11001 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11002 if not result.fail_msg and result.payload:
11004 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11006 self.lu.LogInfo("Renaming the old LVs on the target node")
11007 result = self.rpc.call_blockdev_rename(self.target_node,
11009 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11011 # Now we rename the new LVs to the old LVs
11012 self.lu.LogInfo("Renaming the new LVs on the target node")
11013 rename_new_to_old = [(new, old.physical_id)
11014 for old, new in zip(old_lvs, new_lvs)]
11015 result = self.rpc.call_blockdev_rename(self.target_node,
11017 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11019 # Intermediate steps of in memory modifications
11020 for old, new in zip(old_lvs, new_lvs):
11021 new.logical_id = old.logical_id
11022 self.cfg.SetDiskID(new, self.target_node)
11024 # We need to modify old_lvs so that removal later removes the
11025 # right LVs, not the newly added ones; note that old_lvs is a
11027 for disk in old_lvs:
11028 disk.logical_id = ren_fn(disk, temp_suffix)
11029 self.cfg.SetDiskID(disk, self.target_node)
11031 # Now that the new lvs have the old name, we can add them to the device
11032 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11033 result = self.rpc.call_blockdev_addchildren(self.target_node,
11034 (dev, self.instance), new_lvs)
11035 msg = result.fail_msg
11037 for new_lv in new_lvs:
11038 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11041 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11042 hint=("cleanup manually the unused logical"
11044 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11046 cstep = itertools.count(5)
11048 if self.early_release:
11049 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11050 self._RemoveOldStorage(self.target_node, iv_names)
11051 # TODO: Check if releasing locks early still makes sense
11052 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11054 # Release all resource locks except those used by the instance
11055 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11056 keep=self.node_secondary_ip.keys())
11058 # Release all node locks while waiting for sync
11059 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11061 # TODO: Can the instance lock be downgraded here? Take the optional disk
11062 # shutdown in the caller into consideration.
11065 # This can fail as the old devices are degraded and _WaitForSync
11066 # does a combined result over all disks, so we don't check its return value
11067 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11068 _WaitForSync(self.lu, self.instance)
11070 # Check all devices manually
11071 self._CheckDevices(self.instance.primary_node, iv_names)
11073 # Step: remove old storage
11074 if not self.early_release:
11075 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11076 self._RemoveOldStorage(self.target_node, iv_names)
11078 def _ExecDrbd8Secondary(self, feedback_fn):
11079 """Replace the secondary node for DRBD 8.
11081 The algorithm for replace is quite complicated:
11082 - for all disks of the instance:
11083 - create new LVs on the new node with same names
11084 - shutdown the drbd device on the old secondary
11085 - disconnect the drbd network on the primary
11086 - create the drbd device on the new secondary
11087 - network attach the drbd on the primary, using an artifice:
11088 the drbd code for Attach() will connect to the network if it
11089 finds a device which is connected to the good local disks but
11090 not network enabled
11091 - wait for sync across all devices
11092 - remove all disks from the old secondary
11094 Failures are not very well handled.
11099 pnode = self.instance.primary_node
11101 # Step: check device activation
11102 self.lu.LogStep(1, steps_total, "Check device existence")
11103 self._CheckDisksExistence([self.instance.primary_node])
11104 self._CheckVolumeGroup([self.instance.primary_node])
11106 # Step: check other node consistency
11107 self.lu.LogStep(2, steps_total, "Check peer consistency")
11108 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11110 # Step: create new storage
11111 self.lu.LogStep(3, steps_total, "Allocate new storage")
11112 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11113 for idx, dev in enumerate(disks):
11114 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11115 (self.new_node, idx))
11116 # we pass force_create=True to force LVM creation
11117 for new_lv in dev.children:
11118 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11119 True, _GetInstanceInfoText(self.instance), False)
11121 # Step 4: dbrd minors and drbd setups changes
11122 # after this, we must manually remove the drbd minors on both the
11123 # error and the success paths
11124 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11125 minors = self.cfg.AllocateDRBDMinor([self.new_node
11126 for dev in self.instance.disks],
11127 self.instance.name)
11128 logging.debug("Allocated minors %r", minors)
11131 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11132 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11133 (self.new_node, idx))
11134 # create new devices on new_node; note that we create two IDs:
11135 # one without port, so the drbd will be activated without
11136 # networking information on the new node at this stage, and one
11137 # with network, for the latter activation in step 4
11138 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11139 if self.instance.primary_node == o_node1:
11142 assert self.instance.primary_node == o_node2, "Three-node instance?"
11145 new_alone_id = (self.instance.primary_node, self.new_node, None,
11146 p_minor, new_minor, o_secret)
11147 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11148 p_minor, new_minor, o_secret)
11150 iv_names[idx] = (dev, dev.children, new_net_id)
11151 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11153 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11154 logical_id=new_alone_id,
11155 children=dev.children,
11158 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11161 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11163 _GetInstanceInfoText(self.instance), False)
11164 except errors.GenericError:
11165 self.cfg.ReleaseDRBDMinors(self.instance.name)
11168 # We have new devices, shutdown the drbd on the old secondary
11169 for idx, dev in enumerate(self.instance.disks):
11170 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11171 self.cfg.SetDiskID(dev, self.target_node)
11172 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11173 (dev, self.instance)).fail_msg
11175 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11176 "node: %s" % (idx, msg),
11177 hint=("Please cleanup this device manually as"
11178 " soon as possible"))
11180 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11181 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11182 self.instance.disks)[pnode]
11184 msg = result.fail_msg
11186 # detaches didn't succeed (unlikely)
11187 self.cfg.ReleaseDRBDMinors(self.instance.name)
11188 raise errors.OpExecError("Can't detach the disks from the network on"
11189 " old node: %s" % (msg,))
11191 # if we managed to detach at least one, we update all the disks of
11192 # the instance to point to the new secondary
11193 self.lu.LogInfo("Updating instance configuration")
11194 for dev, _, new_logical_id in iv_names.itervalues():
11195 dev.logical_id = new_logical_id
11196 self.cfg.SetDiskID(dev, self.instance.primary_node)
11198 self.cfg.Update(self.instance, feedback_fn)
11200 # Release all node locks (the configuration has been updated)
11201 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11203 # and now perform the drbd attach
11204 self.lu.LogInfo("Attaching primary drbds to new secondary"
11205 " (standalone => connected)")
11206 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11208 self.node_secondary_ip,
11209 (self.instance.disks, self.instance),
11210 self.instance.name,
11212 for to_node, to_result in result.items():
11213 msg = to_result.fail_msg
11215 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11217 hint=("please do a gnt-instance info to see the"
11218 " status of disks"))
11220 cstep = itertools.count(5)
11222 if self.early_release:
11223 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11224 self._RemoveOldStorage(self.target_node, iv_names)
11225 # TODO: Check if releasing locks early still makes sense
11226 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11228 # Release all resource locks except those used by the instance
11229 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11230 keep=self.node_secondary_ip.keys())
11232 # TODO: Can the instance lock be downgraded here? Take the optional disk
11233 # shutdown in the caller into consideration.
11236 # This can fail as the old devices are degraded and _WaitForSync
11237 # does a combined result over all disks, so we don't check its return value
11238 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11239 _WaitForSync(self.lu, self.instance)
11241 # Check all devices manually
11242 self._CheckDevices(self.instance.primary_node, iv_names)
11244 # Step: remove old storage
11245 if not self.early_release:
11246 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11247 self._RemoveOldStorage(self.target_node, iv_names)
11250 class LURepairNodeStorage(NoHooksLU):
11251 """Repairs the volume group on a node.
11256 def CheckArguments(self):
11257 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11259 storage_type = self.op.storage_type
11261 if (constants.SO_FIX_CONSISTENCY not in
11262 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11263 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11264 " repaired" % storage_type,
11265 errors.ECODE_INVAL)
11267 def ExpandNames(self):
11268 self.needed_locks = {
11269 locking.LEVEL_NODE: [self.op.node_name],
11272 def _CheckFaultyDisks(self, instance, node_name):
11273 """Ensure faulty disks abort the opcode or at least warn."""
11275 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11277 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11278 " node '%s'" % (instance.name, node_name),
11279 errors.ECODE_STATE)
11280 except errors.OpPrereqError, err:
11281 if self.op.ignore_consistency:
11282 self.proc.LogWarning(str(err.args[0]))
11286 def CheckPrereq(self):
11287 """Check prerequisites.
11290 # Check whether any instance on this node has faulty disks
11291 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11292 if inst.admin_state != constants.ADMINST_UP:
11294 check_nodes = set(inst.all_nodes)
11295 check_nodes.discard(self.op.node_name)
11296 for inst_node_name in check_nodes:
11297 self._CheckFaultyDisks(inst, inst_node_name)
11299 def Exec(self, feedback_fn):
11300 feedback_fn("Repairing storage unit '%s' on %s ..." %
11301 (self.op.name, self.op.node_name))
11303 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11304 result = self.rpc.call_storage_execute(self.op.node_name,
11305 self.op.storage_type, st_args,
11307 constants.SO_FIX_CONSISTENCY)
11308 result.Raise("Failed to repair storage unit '%s' on %s" %
11309 (self.op.name, self.op.node_name))
11312 class LUNodeEvacuate(NoHooksLU):
11313 """Evacuates instances off a list of nodes.
11318 _MODE2IALLOCATOR = {
11319 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11320 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11321 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11323 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11324 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11325 constants.IALLOCATOR_NEVAC_MODES)
11327 def CheckArguments(self):
11328 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11330 def ExpandNames(self):
11331 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11333 if self.op.remote_node is not None:
11334 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11335 assert self.op.remote_node
11337 if self.op.remote_node == self.op.node_name:
11338 raise errors.OpPrereqError("Can not use evacuated node as a new"
11339 " secondary node", errors.ECODE_INVAL)
11341 if self.op.mode != constants.NODE_EVAC_SEC:
11342 raise errors.OpPrereqError("Without the use of an iallocator only"
11343 " secondary instances can be evacuated",
11344 errors.ECODE_INVAL)
11347 self.share_locks = _ShareAll()
11348 self.needed_locks = {
11349 locking.LEVEL_INSTANCE: [],
11350 locking.LEVEL_NODEGROUP: [],
11351 locking.LEVEL_NODE: [],
11354 # Determine nodes (via group) optimistically, needs verification once locks
11355 # have been acquired
11356 self.lock_nodes = self._DetermineNodes()
11358 def _DetermineNodes(self):
11359 """Gets the list of nodes to operate on.
11362 if self.op.remote_node is None:
11363 # Iallocator will choose any node(s) in the same group
11364 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11366 group_nodes = frozenset([self.op.remote_node])
11368 # Determine nodes to be locked
11369 return set([self.op.node_name]) | group_nodes
11371 def _DetermineInstances(self):
11372 """Builds list of instances to operate on.
11375 assert self.op.mode in constants.NODE_EVAC_MODES
11377 if self.op.mode == constants.NODE_EVAC_PRI:
11378 # Primary instances only
11379 inst_fn = _GetNodePrimaryInstances
11380 assert self.op.remote_node is None, \
11381 "Evacuating primary instances requires iallocator"
11382 elif self.op.mode == constants.NODE_EVAC_SEC:
11383 # Secondary instances only
11384 inst_fn = _GetNodeSecondaryInstances
11387 assert self.op.mode == constants.NODE_EVAC_ALL
11388 inst_fn = _GetNodeInstances
11389 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11391 raise errors.OpPrereqError("Due to an issue with the iallocator"
11392 " interface it is not possible to evacuate"
11393 " all instances at once; specify explicitly"
11394 " whether to evacuate primary or secondary"
11396 errors.ECODE_INVAL)
11398 return inst_fn(self.cfg, self.op.node_name)
11400 def DeclareLocks(self, level):
11401 if level == locking.LEVEL_INSTANCE:
11402 # Lock instances optimistically, needs verification once node and group
11403 # locks have been acquired
11404 self.needed_locks[locking.LEVEL_INSTANCE] = \
11405 set(i.name for i in self._DetermineInstances())
11407 elif level == locking.LEVEL_NODEGROUP:
11408 # Lock node groups for all potential target nodes optimistically, needs
11409 # verification once nodes have been acquired
11410 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11411 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11413 elif level == locking.LEVEL_NODE:
11414 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11416 def CheckPrereq(self):
11418 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11419 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11420 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11422 need_nodes = self._DetermineNodes()
11424 if not owned_nodes.issuperset(need_nodes):
11425 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11426 " locks were acquired, current nodes are"
11427 " are '%s', used to be '%s'; retry the"
11429 (self.op.node_name,
11430 utils.CommaJoin(need_nodes),
11431 utils.CommaJoin(owned_nodes)),
11432 errors.ECODE_STATE)
11434 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11435 if owned_groups != wanted_groups:
11436 raise errors.OpExecError("Node groups changed since locks were acquired,"
11437 " current groups are '%s', used to be '%s';"
11438 " retry the operation" %
11439 (utils.CommaJoin(wanted_groups),
11440 utils.CommaJoin(owned_groups)))
11442 # Determine affected instances
11443 self.instances = self._DetermineInstances()
11444 self.instance_names = [i.name for i in self.instances]
11446 if set(self.instance_names) != owned_instances:
11447 raise errors.OpExecError("Instances on node '%s' changed since locks"
11448 " were acquired, current instances are '%s',"
11449 " used to be '%s'; retry the operation" %
11450 (self.op.node_name,
11451 utils.CommaJoin(self.instance_names),
11452 utils.CommaJoin(owned_instances)))
11454 if self.instance_names:
11455 self.LogInfo("Evacuating instances from node '%s': %s",
11457 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11459 self.LogInfo("No instances to evacuate from node '%s'",
11462 if self.op.remote_node is not None:
11463 for i in self.instances:
11464 if i.primary_node == self.op.remote_node:
11465 raise errors.OpPrereqError("Node %s is the primary node of"
11466 " instance %s, cannot use it as"
11468 (self.op.remote_node, i.name),
11469 errors.ECODE_INVAL)
11471 def Exec(self, feedback_fn):
11472 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11474 if not self.instance_names:
11475 # No instances to evacuate
11478 elif self.op.iallocator is not None:
11479 # TODO: Implement relocation to other group
11480 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11481 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11482 instances=list(self.instance_names))
11484 ial.Run(self.op.iallocator)
11486 if not ial.success:
11487 raise errors.OpPrereqError("Can't compute node evacuation using"
11488 " iallocator '%s': %s" %
11489 (self.op.iallocator, ial.info),
11490 errors.ECODE_NORES)
11492 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11494 elif self.op.remote_node is not None:
11495 assert self.op.mode == constants.NODE_EVAC_SEC
11497 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11498 remote_node=self.op.remote_node,
11500 mode=constants.REPLACE_DISK_CHG,
11501 early_release=self.op.early_release)]
11502 for instance_name in self.instance_names
11506 raise errors.ProgrammerError("No iallocator or remote node")
11508 return ResultWithJobs(jobs)
11511 def _SetOpEarlyRelease(early_release, op):
11512 """Sets C{early_release} flag on opcodes if available.
11516 op.early_release = early_release
11517 except AttributeError:
11518 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11523 def _NodeEvacDest(use_nodes, group, nodes):
11524 """Returns group or nodes depending on caller's choice.
11528 return utils.CommaJoin(nodes)
11533 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11534 """Unpacks the result of change-group and node-evacuate iallocator requests.
11536 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11537 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11539 @type lu: L{LogicalUnit}
11540 @param lu: Logical unit instance
11541 @type alloc_result: tuple/list
11542 @param alloc_result: Result from iallocator
11543 @type early_release: bool
11544 @param early_release: Whether to release locks early if possible
11545 @type use_nodes: bool
11546 @param use_nodes: Whether to display node names instead of groups
11549 (moved, failed, jobs) = alloc_result
11552 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11553 for (name, reason) in failed)
11554 lu.LogWarning("Unable to evacuate instances %s", failreason)
11555 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11558 lu.LogInfo("Instances to be moved: %s",
11559 utils.CommaJoin("%s (to %s)" %
11560 (name, _NodeEvacDest(use_nodes, group, nodes))
11561 for (name, group, nodes) in moved))
11563 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11564 map(opcodes.OpCode.LoadOpCode, ops))
11568 class LUInstanceGrowDisk(LogicalUnit):
11569 """Grow a disk of an instance.
11572 HPATH = "disk-grow"
11573 HTYPE = constants.HTYPE_INSTANCE
11576 def ExpandNames(self):
11577 self._ExpandAndLockInstance()
11578 self.needed_locks[locking.LEVEL_NODE] = []
11579 self.needed_locks[locking.LEVEL_NODE_RES] = []
11580 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11581 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11583 def DeclareLocks(self, level):
11584 if level == locking.LEVEL_NODE:
11585 self._LockInstancesNodes()
11586 elif level == locking.LEVEL_NODE_RES:
11588 self.needed_locks[locking.LEVEL_NODE_RES] = \
11589 self.needed_locks[locking.LEVEL_NODE][:]
11591 def BuildHooksEnv(self):
11592 """Build hooks env.
11594 This runs on the master, the primary and all the secondaries.
11598 "DISK": self.op.disk,
11599 "AMOUNT": self.op.amount,
11600 "ABSOLUTE": self.op.absolute,
11602 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11605 def BuildHooksNodes(self):
11606 """Build hooks nodes.
11609 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11612 def CheckPrereq(self):
11613 """Check prerequisites.
11615 This checks that the instance is in the cluster.
11618 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11619 assert instance is not None, \
11620 "Cannot retrieve locked instance %s" % self.op.instance_name
11621 nodenames = list(instance.all_nodes)
11622 for node in nodenames:
11623 _CheckNodeOnline(self, node)
11625 self.instance = instance
11627 if instance.disk_template not in constants.DTS_GROWABLE:
11628 raise errors.OpPrereqError("Instance's disk layout does not support"
11629 " growing", errors.ECODE_INVAL)
11631 self.disk = instance.FindDisk(self.op.disk)
11633 if self.op.absolute:
11634 self.target = self.op.amount
11635 self.delta = self.target - self.disk.size
11637 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11638 "current disk size (%s)" %
11639 (utils.FormatUnit(self.target, "h"),
11640 utils.FormatUnit(self.disk.size, "h")),
11641 errors.ECODE_STATE)
11643 self.delta = self.op.amount
11644 self.target = self.disk.size + self.delta
11646 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11647 utils.FormatUnit(self.delta, "h"),
11648 errors.ECODE_INVAL)
11650 if instance.disk_template not in (constants.DT_FILE,
11651 constants.DT_SHARED_FILE,
11653 # TODO: check the free disk space for file, when that feature will be
11655 _CheckNodesFreeDiskPerVG(self, nodenames,
11656 self.disk.ComputeGrowth(self.delta))
11658 def Exec(self, feedback_fn):
11659 """Execute disk grow.
11662 instance = self.instance
11665 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11666 assert (self.owned_locks(locking.LEVEL_NODE) ==
11667 self.owned_locks(locking.LEVEL_NODE_RES))
11669 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11671 raise errors.OpExecError("Cannot activate block device to grow")
11673 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11674 (self.op.disk, instance.name,
11675 utils.FormatUnit(self.delta, "h"),
11676 utils.FormatUnit(self.target, "h")))
11678 # First run all grow ops in dry-run mode
11679 for node in instance.all_nodes:
11680 self.cfg.SetDiskID(disk, node)
11681 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11683 result.Raise("Grow request failed to node %s" % node)
11685 # We know that (as far as we can test) operations across different
11686 # nodes will succeed, time to run it for real
11687 for node in instance.all_nodes:
11688 self.cfg.SetDiskID(disk, node)
11689 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11691 result.Raise("Grow request failed to node %s" % node)
11693 # TODO: Rewrite code to work properly
11694 # DRBD goes into sync mode for a short amount of time after executing the
11695 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11696 # calling "resize" in sync mode fails. Sleeping for a short amount of
11697 # time is a work-around.
11700 disk.RecordGrow(self.delta)
11701 self.cfg.Update(instance, feedback_fn)
11703 # Changes have been recorded, release node lock
11704 _ReleaseLocks(self, locking.LEVEL_NODE)
11706 # Downgrade lock while waiting for sync
11707 self.glm.downgrade(locking.LEVEL_INSTANCE)
11709 if self.op.wait_for_sync:
11710 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11712 self.proc.LogWarning("Disk sync-ing has not returned a good"
11713 " status; please check the instance")
11714 if instance.admin_state != constants.ADMINST_UP:
11715 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11716 elif instance.admin_state != constants.ADMINST_UP:
11717 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11718 " not supposed to be running because no wait for"
11719 " sync mode was requested")
11721 assert self.owned_locks(locking.LEVEL_NODE_RES)
11722 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11725 class LUInstanceQueryData(NoHooksLU):
11726 """Query runtime instance data.
11731 def ExpandNames(self):
11732 self.needed_locks = {}
11734 # Use locking if requested or when non-static information is wanted
11735 if not (self.op.static or self.op.use_locking):
11736 self.LogWarning("Non-static data requested, locks need to be acquired")
11737 self.op.use_locking = True
11739 if self.op.instances or not self.op.use_locking:
11740 # Expand instance names right here
11741 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11743 # Will use acquired locks
11744 self.wanted_names = None
11746 if self.op.use_locking:
11747 self.share_locks = _ShareAll()
11749 if self.wanted_names is None:
11750 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11752 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11754 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11755 self.needed_locks[locking.LEVEL_NODE] = []
11756 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11758 def DeclareLocks(self, level):
11759 if self.op.use_locking:
11760 if level == locking.LEVEL_NODEGROUP:
11761 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11763 # Lock all groups used by instances optimistically; this requires going
11764 # via the node before it's locked, requiring verification later on
11765 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11766 frozenset(group_uuid
11767 for instance_name in owned_instances
11769 self.cfg.GetInstanceNodeGroups(instance_name))
11771 elif level == locking.LEVEL_NODE:
11772 self._LockInstancesNodes()
11774 def CheckPrereq(self):
11775 """Check prerequisites.
11777 This only checks the optional instance list against the existing names.
11780 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11781 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11782 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11784 if self.wanted_names is None:
11785 assert self.op.use_locking, "Locking was not used"
11786 self.wanted_names = owned_instances
11788 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11790 if self.op.use_locking:
11791 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11794 assert not (owned_instances or owned_groups or owned_nodes)
11796 self.wanted_instances = instances.values()
11798 def _ComputeBlockdevStatus(self, node, instance, dev):
11799 """Returns the status of a block device
11802 if self.op.static or not node:
11805 self.cfg.SetDiskID(dev, node)
11807 result = self.rpc.call_blockdev_find(node, dev)
11811 result.Raise("Can't compute disk status for %s" % instance.name)
11813 status = result.payload
11817 return (status.dev_path, status.major, status.minor,
11818 status.sync_percent, status.estimated_time,
11819 status.is_degraded, status.ldisk_status)
11821 def _ComputeDiskStatus(self, instance, snode, dev):
11822 """Compute block device status.
11825 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11827 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11829 def _ComputeDiskStatusInner(self, instance, snode, dev):
11830 """Compute block device status.
11832 @attention: The device has to be annotated already.
11835 if dev.dev_type in constants.LDS_DRBD:
11836 # we change the snode then (otherwise we use the one passed in)
11837 if dev.logical_id[0] == instance.primary_node:
11838 snode = dev.logical_id[1]
11840 snode = dev.logical_id[0]
11842 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11844 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11847 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11854 "iv_name": dev.iv_name,
11855 "dev_type": dev.dev_type,
11856 "logical_id": dev.logical_id,
11857 "physical_id": dev.physical_id,
11858 "pstatus": dev_pstatus,
11859 "sstatus": dev_sstatus,
11860 "children": dev_children,
11865 def Exec(self, feedback_fn):
11866 """Gather and return data"""
11869 cluster = self.cfg.GetClusterInfo()
11871 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11872 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11874 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11875 for node in nodes.values()))
11877 group2name_fn = lambda uuid: groups[uuid].name
11879 for instance in self.wanted_instances:
11880 pnode = nodes[instance.primary_node]
11882 if self.op.static or pnode.offline:
11883 remote_state = None
11885 self.LogWarning("Primary node %s is marked offline, returning static"
11886 " information only for instance %s" %
11887 (pnode.name, instance.name))
11889 remote_info = self.rpc.call_instance_info(instance.primary_node,
11891 instance.hypervisor)
11892 remote_info.Raise("Error checking node %s" % instance.primary_node)
11893 remote_info = remote_info.payload
11894 if remote_info and "state" in remote_info:
11895 remote_state = "up"
11897 if instance.admin_state == constants.ADMINST_UP:
11898 remote_state = "down"
11900 remote_state = instance.admin_state
11902 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11905 snodes_group_uuids = [nodes[snode_name].group
11906 for snode_name in instance.secondary_nodes]
11908 result[instance.name] = {
11909 "name": instance.name,
11910 "config_state": instance.admin_state,
11911 "run_state": remote_state,
11912 "pnode": instance.primary_node,
11913 "pnode_group_uuid": pnode.group,
11914 "pnode_group_name": group2name_fn(pnode.group),
11915 "snodes": instance.secondary_nodes,
11916 "snodes_group_uuids": snodes_group_uuids,
11917 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11919 # this happens to be the same format used for hooks
11920 "nics": _NICListToTuple(self, instance.nics),
11921 "disk_template": instance.disk_template,
11923 "hypervisor": instance.hypervisor,
11924 "network_port": instance.network_port,
11925 "hv_instance": instance.hvparams,
11926 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11927 "be_instance": instance.beparams,
11928 "be_actual": cluster.FillBE(instance),
11929 "os_instance": instance.osparams,
11930 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11931 "serial_no": instance.serial_no,
11932 "mtime": instance.mtime,
11933 "ctime": instance.ctime,
11934 "uuid": instance.uuid,
11940 def PrepareContainerMods(mods, private_fn):
11941 """Prepares a list of container modifications by adding a private data field.
11943 @type mods: list of tuples; (operation, index, parameters)
11944 @param mods: List of modifications
11945 @type private_fn: callable or None
11946 @param private_fn: Callable for constructing a private data field for a
11951 if private_fn is None:
11956 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11959 #: Type description for changes as returned by L{ApplyContainerMods}'s
11961 _TApplyContModsCbChanges = \
11962 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11963 ht.TNonEmptyString,
11968 def ApplyContainerMods(kind, container, chgdesc, mods,
11969 create_fn, modify_fn, remove_fn):
11970 """Applies descriptions in C{mods} to C{container}.
11973 @param kind: One-word item description
11974 @type container: list
11975 @param container: Container to modify
11976 @type chgdesc: None or list
11977 @param chgdesc: List of applied changes
11979 @param mods: Modifications as returned by L{PrepareContainerMods}
11980 @type create_fn: callable
11981 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11982 receives absolute item index, parameters and private data object as added
11983 by L{PrepareContainerMods}, returns tuple containing new item and changes
11985 @type modify_fn: callable
11986 @param modify_fn: Callback for modifying an existing item
11987 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11988 and private data object as added by L{PrepareContainerMods}, returns
11990 @type remove_fn: callable
11991 @param remove_fn: Callback on removing item; receives absolute item index,
11992 item and private data object as added by L{PrepareContainerMods}
11995 for (op, idx, params, private) in mods:
11998 absidx = len(container) - 1
12000 raise IndexError("Not accepting negative indices other than -1")
12001 elif idx > len(container):
12002 raise IndexError("Got %s index %s, but there are only %s" %
12003 (kind, idx, len(container)))
12009 if op == constants.DDM_ADD:
12010 # Calculate where item will be added
12012 addidx = len(container)
12016 if create_fn is None:
12019 (item, changes) = create_fn(addidx, params, private)
12022 container.append(item)
12025 assert idx <= len(container)
12026 # list.insert does so before the specified index
12027 container.insert(idx, item)
12029 # Retrieve existing item
12031 item = container[absidx]
12033 raise IndexError("Invalid %s index %s" % (kind, idx))
12035 if op == constants.DDM_REMOVE:
12038 if remove_fn is not None:
12039 remove_fn(absidx, item, private)
12041 changes = [("%s/%s" % (kind, absidx), "remove")]
12043 assert container[absidx] == item
12044 del container[absidx]
12045 elif op == constants.DDM_MODIFY:
12046 if modify_fn is not None:
12047 changes = modify_fn(absidx, item, params, private)
12049 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12051 assert _TApplyContModsCbChanges(changes)
12053 if not (chgdesc is None or changes is None):
12054 chgdesc.extend(changes)
12057 def _UpdateIvNames(base_index, disks):
12058 """Updates the C{iv_name} attribute of disks.
12060 @type disks: list of L{objects.Disk}
12063 for (idx, disk) in enumerate(disks):
12064 disk.iv_name = "disk/%s" % (base_index + idx, )
12067 class _InstNicModPrivate:
12068 """Data structure for network interface modifications.
12070 Used by L{LUInstanceSetParams}.
12073 def __init__(self):
12078 class LUInstanceSetParams(LogicalUnit):
12079 """Modifies an instances's parameters.
12082 HPATH = "instance-modify"
12083 HTYPE = constants.HTYPE_INSTANCE
12087 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12088 assert ht.TList(mods)
12089 assert not mods or len(mods[0]) in (2, 3)
12091 if mods and len(mods[0]) == 2:
12095 for op, params in mods:
12096 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12097 result.append((op, -1, params))
12101 raise errors.OpPrereqError("Only one %s add or remove operation is"
12102 " supported at a time" % kind,
12103 errors.ECODE_INVAL)
12105 result.append((constants.DDM_MODIFY, op, params))
12107 assert verify_fn(result)
12114 def _CheckMods(kind, mods, key_types, item_fn):
12115 """Ensures requested disk/NIC modifications are valid.
12118 for (op, _, params) in mods:
12119 assert ht.TDict(params)
12121 utils.ForceDictType(params, key_types)
12123 if op == constants.DDM_REMOVE:
12125 raise errors.OpPrereqError("No settings should be passed when"
12126 " removing a %s" % kind,
12127 errors.ECODE_INVAL)
12128 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12129 item_fn(op, params)
12131 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12134 def _VerifyDiskModification(op, params):
12135 """Verifies a disk modification.
12138 if op == constants.DDM_ADD:
12139 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12140 if mode not in constants.DISK_ACCESS_SET:
12141 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12142 errors.ECODE_INVAL)
12144 size = params.get(constants.IDISK_SIZE, None)
12146 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12147 constants.IDISK_SIZE, errors.ECODE_INVAL)
12151 except (TypeError, ValueError), err:
12152 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12153 errors.ECODE_INVAL)
12155 params[constants.IDISK_SIZE] = size
12157 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12158 raise errors.OpPrereqError("Disk size change not possible, use"
12159 " grow-disk", errors.ECODE_INVAL)
12162 def _VerifyNicModification(op, params):
12163 """Verifies a network interface modification.
12166 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12167 ip = params.get(constants.INIC_IP, None)
12170 elif ip.lower() == constants.VALUE_NONE:
12171 params[constants.INIC_IP] = None
12172 elif not netutils.IPAddress.IsValid(ip):
12173 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12174 errors.ECODE_INVAL)
12176 bridge = params.get("bridge", None)
12177 link = params.get(constants.INIC_LINK, None)
12178 if bridge and link:
12179 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12180 " at the same time", errors.ECODE_INVAL)
12181 elif bridge and bridge.lower() == constants.VALUE_NONE:
12182 params["bridge"] = None
12183 elif link and link.lower() == constants.VALUE_NONE:
12184 params[constants.INIC_LINK] = None
12186 if op == constants.DDM_ADD:
12187 macaddr = params.get(constants.INIC_MAC, None)
12188 if macaddr is None:
12189 params[constants.INIC_MAC] = constants.VALUE_AUTO
12191 if constants.INIC_MAC in params:
12192 macaddr = params[constants.INIC_MAC]
12193 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12194 macaddr = utils.NormalizeAndValidateMac(macaddr)
12196 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12197 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12198 " modifying an existing NIC",
12199 errors.ECODE_INVAL)
12201 def CheckArguments(self):
12202 if not (self.op.nics or self.op.disks or self.op.disk_template or
12203 self.op.hvparams or self.op.beparams or self.op.os_name or
12204 self.op.offline is not None or self.op.runtime_mem):
12205 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12207 if self.op.hvparams:
12208 _CheckGlobalHvParams(self.op.hvparams)
12211 self._UpgradeDiskNicMods("disk", self.op.disks,
12212 opcodes.OpInstanceSetParams.TestDiskModifications)
12214 self._UpgradeDiskNicMods("NIC", self.op.nics,
12215 opcodes.OpInstanceSetParams.TestNicModifications)
12217 # Check disk modifications
12218 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12219 self._VerifyDiskModification)
12221 if self.op.disks and self.op.disk_template is not None:
12222 raise errors.OpPrereqError("Disk template conversion and other disk"
12223 " changes not supported at the same time",
12224 errors.ECODE_INVAL)
12226 if (self.op.disk_template and
12227 self.op.disk_template in constants.DTS_INT_MIRROR and
12228 self.op.remote_node is None):
12229 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12230 " one requires specifying a secondary node",
12231 errors.ECODE_INVAL)
12233 # Check NIC modifications
12234 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12235 self._VerifyNicModification)
12237 def ExpandNames(self):
12238 self._ExpandAndLockInstance()
12239 # Can't even acquire node locks in shared mode as upcoming changes in
12240 # Ganeti 2.6 will start to modify the node object on disk conversion
12241 self.needed_locks[locking.LEVEL_NODE] = []
12242 self.needed_locks[locking.LEVEL_NODE_RES] = []
12243 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12245 def DeclareLocks(self, level):
12246 # TODO: Acquire group lock in shared mode (disk parameters)
12247 if level == locking.LEVEL_NODE:
12248 self._LockInstancesNodes()
12249 if self.op.disk_template and self.op.remote_node:
12250 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12251 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12252 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12254 self.needed_locks[locking.LEVEL_NODE_RES] = \
12255 self.needed_locks[locking.LEVEL_NODE][:]
12257 def BuildHooksEnv(self):
12258 """Build hooks env.
12260 This runs on the master, primary and secondaries.
12264 if constants.BE_MINMEM in self.be_new:
12265 args["minmem"] = self.be_new[constants.BE_MINMEM]
12266 if constants.BE_MAXMEM in self.be_new:
12267 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12268 if constants.BE_VCPUS in self.be_new:
12269 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12270 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12271 # information at all.
12273 if self._new_nics is not None:
12276 for nic in self._new_nics:
12277 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12278 mode = nicparams[constants.NIC_MODE]
12279 link = nicparams[constants.NIC_LINK]
12280 nics.append((nic.ip, nic.mac, mode, link))
12282 args["nics"] = nics
12284 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12285 if self.op.disk_template:
12286 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12287 if self.op.runtime_mem:
12288 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12292 def BuildHooksNodes(self):
12293 """Build hooks nodes.
12296 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12299 def _PrepareNicModification(self, params, private, old_ip, old_params,
12301 update_params_dict = dict([(key, params[key])
12302 for key in constants.NICS_PARAMETERS
12305 if "bridge" in params:
12306 update_params_dict[constants.NIC_LINK] = params["bridge"]
12308 new_params = _GetUpdatedParams(old_params, update_params_dict)
12309 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12311 new_filled_params = cluster.SimpleFillNIC(new_params)
12312 objects.NIC.CheckParameterSyntax(new_filled_params)
12314 new_mode = new_filled_params[constants.NIC_MODE]
12315 if new_mode == constants.NIC_MODE_BRIDGED:
12316 bridge = new_filled_params[constants.NIC_LINK]
12317 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12319 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12321 self.warn.append(msg)
12323 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12325 elif new_mode == constants.NIC_MODE_ROUTED:
12326 ip = params.get(constants.INIC_IP, old_ip)
12328 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12329 " on a routed NIC", errors.ECODE_INVAL)
12331 if constants.INIC_MAC in params:
12332 mac = params[constants.INIC_MAC]
12334 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12335 errors.ECODE_INVAL)
12336 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12337 # otherwise generate the MAC address
12338 params[constants.INIC_MAC] = \
12339 self.cfg.GenerateMAC(self.proc.GetECId())
12341 # or validate/reserve the current one
12343 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12344 except errors.ReservationError:
12345 raise errors.OpPrereqError("MAC address '%s' already in use"
12346 " in cluster" % mac,
12347 errors.ECODE_NOTUNIQUE)
12349 private.params = new_params
12350 private.filled = new_filled_params
12352 def CheckPrereq(self):
12353 """Check prerequisites.
12355 This only checks the instance list against the existing names.
12358 # checking the new params on the primary/secondary nodes
12360 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12361 cluster = self.cluster = self.cfg.GetClusterInfo()
12362 assert self.instance is not None, \
12363 "Cannot retrieve locked instance %s" % self.op.instance_name
12364 pnode = instance.primary_node
12365 nodelist = list(instance.all_nodes)
12366 pnode_info = self.cfg.GetNodeInfo(pnode)
12367 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12369 # Prepare disk/NIC modifications
12370 self.diskmod = PrepareContainerMods(self.op.disks, None)
12371 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12374 if self.op.os_name and not self.op.force:
12375 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12376 self.op.force_variant)
12377 instance_os = self.op.os_name
12379 instance_os = instance.os
12381 assert not (self.op.disk_template and self.op.disks), \
12382 "Can't modify disk template and apply disk changes at the same time"
12384 if self.op.disk_template:
12385 if instance.disk_template == self.op.disk_template:
12386 raise errors.OpPrereqError("Instance already has disk template %s" %
12387 instance.disk_template, errors.ECODE_INVAL)
12389 if (instance.disk_template,
12390 self.op.disk_template) not in self._DISK_CONVERSIONS:
12391 raise errors.OpPrereqError("Unsupported disk template conversion from"
12392 " %s to %s" % (instance.disk_template,
12393 self.op.disk_template),
12394 errors.ECODE_INVAL)
12395 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12396 msg="cannot change disk template")
12397 if self.op.disk_template in constants.DTS_INT_MIRROR:
12398 if self.op.remote_node == pnode:
12399 raise errors.OpPrereqError("Given new secondary node %s is the same"
12400 " as the primary node of the instance" %
12401 self.op.remote_node, errors.ECODE_STATE)
12402 _CheckNodeOnline(self, self.op.remote_node)
12403 _CheckNodeNotDrained(self, self.op.remote_node)
12404 # FIXME: here we assume that the old instance type is DT_PLAIN
12405 assert instance.disk_template == constants.DT_PLAIN
12406 disks = [{constants.IDISK_SIZE: d.size,
12407 constants.IDISK_VG: d.logical_id[0]}
12408 for d in instance.disks]
12409 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12410 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12412 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12413 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12414 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12415 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12416 ignore=self.op.ignore_ipolicy)
12417 if pnode_info.group != snode_info.group:
12418 self.LogWarning("The primary and secondary nodes are in two"
12419 " different node groups; the disk parameters"
12420 " from the first disk's node group will be"
12423 # hvparams processing
12424 if self.op.hvparams:
12425 hv_type = instance.hypervisor
12426 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12427 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12428 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12431 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12432 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12433 self.hv_proposed = self.hv_new = hv_new # the new actual values
12434 self.hv_inst = i_hvdict # the new dict (without defaults)
12436 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12438 self.hv_new = self.hv_inst = {}
12440 # beparams processing
12441 if self.op.beparams:
12442 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12444 objects.UpgradeBeParams(i_bedict)
12445 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12446 be_new = cluster.SimpleFillBE(i_bedict)
12447 self.be_proposed = self.be_new = be_new # the new actual values
12448 self.be_inst = i_bedict # the new dict (without defaults)
12450 self.be_new = self.be_inst = {}
12451 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12452 be_old = cluster.FillBE(instance)
12454 # CPU param validation -- checking every time a parameter is
12455 # changed to cover all cases where either CPU mask or vcpus have
12457 if (constants.BE_VCPUS in self.be_proposed and
12458 constants.HV_CPU_MASK in self.hv_proposed):
12460 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12461 # Verify mask is consistent with number of vCPUs. Can skip this
12462 # test if only 1 entry in the CPU mask, which means same mask
12463 # is applied to all vCPUs.
12464 if (len(cpu_list) > 1 and
12465 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12466 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12468 (self.be_proposed[constants.BE_VCPUS],
12469 self.hv_proposed[constants.HV_CPU_MASK]),
12470 errors.ECODE_INVAL)
12472 # Only perform this test if a new CPU mask is given
12473 if constants.HV_CPU_MASK in self.hv_new:
12474 # Calculate the largest CPU number requested
12475 max_requested_cpu = max(map(max, cpu_list))
12476 # Check that all of the instance's nodes have enough physical CPUs to
12477 # satisfy the requested CPU mask
12478 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12479 max_requested_cpu + 1, instance.hypervisor)
12481 # osparams processing
12482 if self.op.osparams:
12483 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12484 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12485 self.os_inst = i_osdict # the new dict (without defaults)
12491 #TODO(dynmem): do the appropriate check involving MINMEM
12492 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12493 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12494 mem_check_list = [pnode]
12495 if be_new[constants.BE_AUTO_BALANCE]:
12496 # either we changed auto_balance to yes or it was from before
12497 mem_check_list.extend(instance.secondary_nodes)
12498 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12499 instance.hypervisor)
12500 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12501 [instance.hypervisor])
12502 pninfo = nodeinfo[pnode]
12503 msg = pninfo.fail_msg
12505 # Assume the primary node is unreachable and go ahead
12506 self.warn.append("Can't get info from primary node %s: %s" %
12509 (_, _, (pnhvinfo, )) = pninfo.payload
12510 if not isinstance(pnhvinfo.get("memory_free", None), int):
12511 self.warn.append("Node data from primary node %s doesn't contain"
12512 " free memory information" % pnode)
12513 elif instance_info.fail_msg:
12514 self.warn.append("Can't get instance runtime information: %s" %
12515 instance_info.fail_msg)
12517 if instance_info.payload:
12518 current_mem = int(instance_info.payload["memory"])
12520 # Assume instance not running
12521 # (there is a slight race condition here, but it's not very
12522 # probable, and we have no other way to check)
12523 # TODO: Describe race condition
12525 #TODO(dynmem): do the appropriate check involving MINMEM
12526 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12527 pnhvinfo["memory_free"])
12529 raise errors.OpPrereqError("This change will prevent the instance"
12530 " from starting, due to %d MB of memory"
12531 " missing on its primary node" %
12533 errors.ECODE_NORES)
12535 if be_new[constants.BE_AUTO_BALANCE]:
12536 for node, nres in nodeinfo.items():
12537 if node not in instance.secondary_nodes:
12539 nres.Raise("Can't get info from secondary node %s" % node,
12540 prereq=True, ecode=errors.ECODE_STATE)
12541 (_, _, (nhvinfo, )) = nres.payload
12542 if not isinstance(nhvinfo.get("memory_free", None), int):
12543 raise errors.OpPrereqError("Secondary node %s didn't return free"
12544 " memory information" % node,
12545 errors.ECODE_STATE)
12546 #TODO(dynmem): do the appropriate check involving MINMEM
12547 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12548 raise errors.OpPrereqError("This change will prevent the instance"
12549 " from failover to its secondary node"
12550 " %s, due to not enough memory" % node,
12551 errors.ECODE_STATE)
12553 if self.op.runtime_mem:
12554 remote_info = self.rpc.call_instance_info(instance.primary_node,
12556 instance.hypervisor)
12557 remote_info.Raise("Error checking node %s" % instance.primary_node)
12558 if not remote_info.payload: # not running already
12559 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12560 errors.ECODE_STATE)
12562 current_memory = remote_info.payload["memory"]
12563 if (not self.op.force and
12564 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12565 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12566 raise errors.OpPrereqError("Instance %s must have memory between %d"
12567 " and %d MB of memory unless --force is"
12568 " given" % (instance.name,
12569 self.be_proposed[constants.BE_MINMEM],
12570 self.be_proposed[constants.BE_MAXMEM]),
12571 errors.ECODE_INVAL)
12573 if self.op.runtime_mem > current_memory:
12574 _CheckNodeFreeMemory(self, instance.primary_node,
12575 "ballooning memory for instance %s" %
12577 self.op.memory - current_memory,
12578 instance.hypervisor)
12580 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12581 raise errors.OpPrereqError("Disk operations not supported for"
12582 " diskless instances",
12583 errors.ECODE_INVAL)
12585 def _PrepareNicCreate(_, params, private):
12586 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12587 return (None, None)
12589 def _PrepareNicMod(_, nic, params, private):
12590 self._PrepareNicModification(params, private, nic.ip,
12591 nic.nicparams, cluster, pnode)
12594 # Verify NIC changes (operating on copy)
12595 nics = instance.nics[:]
12596 ApplyContainerMods("NIC", nics, None, self.nicmod,
12597 _PrepareNicCreate, _PrepareNicMod, None)
12598 if len(nics) > constants.MAX_NICS:
12599 raise errors.OpPrereqError("Instance has too many network interfaces"
12600 " (%d), cannot add more" % constants.MAX_NICS,
12601 errors.ECODE_STATE)
12603 # Verify disk changes (operating on a copy)
12604 disks = instance.disks[:]
12605 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12606 if len(disks) > constants.MAX_DISKS:
12607 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12608 " more" % constants.MAX_DISKS,
12609 errors.ECODE_STATE)
12611 if self.op.offline is not None:
12612 if self.op.offline:
12613 msg = "can't change to offline"
12615 msg = "can't change to online"
12616 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12618 # Pre-compute NIC changes (necessary to use result in hooks)
12619 self._nic_chgdesc = []
12621 # Operate on copies as this is still in prereq
12622 nics = [nic.Copy() for nic in instance.nics]
12623 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12624 self._CreateNewNic, self._ApplyNicMods, None)
12625 self._new_nics = nics
12627 self._new_nics = None
12629 def _ConvertPlainToDrbd(self, feedback_fn):
12630 """Converts an instance from plain to drbd.
12633 feedback_fn("Converting template to drbd")
12634 instance = self.instance
12635 pnode = instance.primary_node
12636 snode = self.op.remote_node
12638 assert instance.disk_template == constants.DT_PLAIN
12640 # create a fake disk info for _GenerateDiskTemplate
12641 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12642 constants.IDISK_VG: d.logical_id[0]}
12643 for d in instance.disks]
12644 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12645 instance.name, pnode, [snode],
12646 disk_info, None, None, 0, feedback_fn,
12648 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12650 info = _GetInstanceInfoText(instance)
12651 feedback_fn("Creating additional volumes...")
12652 # first, create the missing data and meta devices
12653 for disk in anno_disks:
12654 # unfortunately this is... not too nice
12655 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12657 for child in disk.children:
12658 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12659 # at this stage, all new LVs have been created, we can rename the
12661 feedback_fn("Renaming original volumes...")
12662 rename_list = [(o, n.children[0].logical_id)
12663 for (o, n) in zip(instance.disks, new_disks)]
12664 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12665 result.Raise("Failed to rename original LVs")
12667 feedback_fn("Initializing DRBD devices...")
12668 # all child devices are in place, we can now create the DRBD devices
12669 for disk in anno_disks:
12670 for node in [pnode, snode]:
12671 f_create = node == pnode
12672 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12674 # at this point, the instance has been modified
12675 instance.disk_template = constants.DT_DRBD8
12676 instance.disks = new_disks
12677 self.cfg.Update(instance, feedback_fn)
12679 # Release node locks while waiting for sync
12680 _ReleaseLocks(self, locking.LEVEL_NODE)
12682 # disks are created, waiting for sync
12683 disk_abort = not _WaitForSync(self, instance,
12684 oneshot=not self.op.wait_for_sync)
12686 raise errors.OpExecError("There are some degraded disks for"
12687 " this instance, please cleanup manually")
12689 # Node resource locks will be released by caller
12691 def _ConvertDrbdToPlain(self, feedback_fn):
12692 """Converts an instance from drbd to plain.
12695 instance = self.instance
12697 assert len(instance.secondary_nodes) == 1
12698 assert instance.disk_template == constants.DT_DRBD8
12700 pnode = instance.primary_node
12701 snode = instance.secondary_nodes[0]
12702 feedback_fn("Converting template to plain")
12704 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12705 new_disks = [d.children[0] for d in instance.disks]
12707 # copy over size and mode
12708 for parent, child in zip(old_disks, new_disks):
12709 child.size = parent.size
12710 child.mode = parent.mode
12712 # this is a DRBD disk, return its port to the pool
12713 # NOTE: this must be done right before the call to cfg.Update!
12714 for disk in old_disks:
12715 tcp_port = disk.logical_id[2]
12716 self.cfg.AddTcpUdpPort(tcp_port)
12718 # update instance structure
12719 instance.disks = new_disks
12720 instance.disk_template = constants.DT_PLAIN
12721 self.cfg.Update(instance, feedback_fn)
12723 # Release locks in case removing disks takes a while
12724 _ReleaseLocks(self, locking.LEVEL_NODE)
12726 feedback_fn("Removing volumes on the secondary node...")
12727 for disk in old_disks:
12728 self.cfg.SetDiskID(disk, snode)
12729 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12731 self.LogWarning("Could not remove block device %s on node %s,"
12732 " continuing anyway: %s", disk.iv_name, snode, msg)
12734 feedback_fn("Removing unneeded volumes on the primary node...")
12735 for idx, disk in enumerate(old_disks):
12736 meta = disk.children[1]
12737 self.cfg.SetDiskID(meta, pnode)
12738 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12740 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12741 " continuing anyway: %s", idx, pnode, msg)
12743 def _CreateNewDisk(self, idx, params, _):
12744 """Creates a new disk.
12747 instance = self.instance
12750 if instance.disk_template in constants.DTS_FILEBASED:
12751 (file_driver, file_path) = instance.disks[0].logical_id
12752 file_path = os.path.dirname(file_path)
12754 file_driver = file_path = None
12757 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12758 instance.primary_node, instance.secondary_nodes,
12759 [params], file_path, file_driver, idx,
12760 self.Log, self.diskparams)[0]
12762 info = _GetInstanceInfoText(instance)
12764 logging.info("Creating volume %s for instance %s",
12765 disk.iv_name, instance.name)
12766 # Note: this needs to be kept in sync with _CreateDisks
12768 for node in instance.all_nodes:
12769 f_create = (node == instance.primary_node)
12771 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12772 except errors.OpExecError, err:
12773 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12774 disk.iv_name, disk, node, err)
12777 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12781 def _ModifyDisk(idx, disk, params, _):
12782 """Modifies a disk.
12785 disk.mode = params[constants.IDISK_MODE]
12788 ("disk.mode/%d" % idx, disk.mode),
12791 def _RemoveDisk(self, idx, root, _):
12795 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12796 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12797 self.cfg.SetDiskID(disk, node)
12798 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12800 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12801 " continuing anyway", idx, node, msg)
12803 # if this is a DRBD disk, return its port to the pool
12804 if root.dev_type in constants.LDS_DRBD:
12805 self.cfg.AddTcpUdpPort(root.logical_id[2])
12808 def _CreateNewNic(idx, params, private):
12809 """Creates data structure for a new network interface.
12812 mac = params[constants.INIC_MAC]
12813 ip = params.get(constants.INIC_IP, None)
12814 nicparams = private.params
12816 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12818 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12819 (mac, ip, private.filled[constants.NIC_MODE],
12820 private.filled[constants.NIC_LINK])),
12824 def _ApplyNicMods(idx, nic, params, private):
12825 """Modifies a network interface.
12830 for key in [constants.INIC_MAC, constants.INIC_IP]:
12832 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12833 setattr(nic, key, params[key])
12836 nic.nicparams = private.params
12838 for (key, val) in params.items():
12839 changes.append(("nic.%s/%d" % (key, idx), val))
12843 def Exec(self, feedback_fn):
12844 """Modifies an instance.
12846 All parameters take effect only at the next restart of the instance.
12849 # Process here the warnings from CheckPrereq, as we don't have a
12850 # feedback_fn there.
12851 # TODO: Replace with self.LogWarning
12852 for warn in self.warn:
12853 feedback_fn("WARNING: %s" % warn)
12855 assert ((self.op.disk_template is None) ^
12856 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12857 "Not owning any node resource locks"
12860 instance = self.instance
12863 if self.op.runtime_mem:
12864 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12866 self.op.runtime_mem)
12867 rpcres.Raise("Cannot modify instance runtime memory")
12868 result.append(("runtime_memory", self.op.runtime_mem))
12870 # Apply disk changes
12871 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12872 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12873 _UpdateIvNames(0, instance.disks)
12875 if self.op.disk_template:
12877 check_nodes = set(instance.all_nodes)
12878 if self.op.remote_node:
12879 check_nodes.add(self.op.remote_node)
12880 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12881 owned = self.owned_locks(level)
12882 assert not (check_nodes - owned), \
12883 ("Not owning the correct locks, owning %r, expected at least %r" %
12884 (owned, check_nodes))
12886 r_shut = _ShutdownInstanceDisks(self, instance)
12888 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12889 " proceed with disk template conversion")
12890 mode = (instance.disk_template, self.op.disk_template)
12892 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12894 self.cfg.ReleaseDRBDMinors(instance.name)
12896 result.append(("disk_template", self.op.disk_template))
12898 assert instance.disk_template == self.op.disk_template, \
12899 ("Expected disk template '%s', found '%s'" %
12900 (self.op.disk_template, instance.disk_template))
12902 # Release node and resource locks if there are any (they might already have
12903 # been released during disk conversion)
12904 _ReleaseLocks(self, locking.LEVEL_NODE)
12905 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12907 # Apply NIC changes
12908 if self._new_nics is not None:
12909 instance.nics = self._new_nics
12910 result.extend(self._nic_chgdesc)
12913 if self.op.hvparams:
12914 instance.hvparams = self.hv_inst
12915 for key, val in self.op.hvparams.iteritems():
12916 result.append(("hv/%s" % key, val))
12919 if self.op.beparams:
12920 instance.beparams = self.be_inst
12921 for key, val in self.op.beparams.iteritems():
12922 result.append(("be/%s" % key, val))
12925 if self.op.os_name:
12926 instance.os = self.op.os_name
12929 if self.op.osparams:
12930 instance.osparams = self.os_inst
12931 for key, val in self.op.osparams.iteritems():
12932 result.append(("os/%s" % key, val))
12934 if self.op.offline is None:
12937 elif self.op.offline:
12938 # Mark instance as offline
12939 self.cfg.MarkInstanceOffline(instance.name)
12940 result.append(("admin_state", constants.ADMINST_OFFLINE))
12942 # Mark instance as online, but stopped
12943 self.cfg.MarkInstanceDown(instance.name)
12944 result.append(("admin_state", constants.ADMINST_DOWN))
12946 self.cfg.Update(instance, feedback_fn)
12948 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12949 self.owned_locks(locking.LEVEL_NODE)), \
12950 "All node locks should have been released by now"
12954 _DISK_CONVERSIONS = {
12955 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12956 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12960 class LUInstanceChangeGroup(LogicalUnit):
12961 HPATH = "instance-change-group"
12962 HTYPE = constants.HTYPE_INSTANCE
12965 def ExpandNames(self):
12966 self.share_locks = _ShareAll()
12967 self.needed_locks = {
12968 locking.LEVEL_NODEGROUP: [],
12969 locking.LEVEL_NODE: [],
12972 self._ExpandAndLockInstance()
12974 if self.op.target_groups:
12975 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12976 self.op.target_groups)
12978 self.req_target_uuids = None
12980 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12982 def DeclareLocks(self, level):
12983 if level == locking.LEVEL_NODEGROUP:
12984 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12986 if self.req_target_uuids:
12987 lock_groups = set(self.req_target_uuids)
12989 # Lock all groups used by instance optimistically; this requires going
12990 # via the node before it's locked, requiring verification later on
12991 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12992 lock_groups.update(instance_groups)
12994 # No target groups, need to lock all of them
12995 lock_groups = locking.ALL_SET
12997 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12999 elif level == locking.LEVEL_NODE:
13000 if self.req_target_uuids:
13001 # Lock all nodes used by instances
13002 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13003 self._LockInstancesNodes()
13005 # Lock all nodes in all potential target groups
13006 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13007 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13008 member_nodes = [node_name
13009 for group in lock_groups
13010 for node_name in self.cfg.GetNodeGroup(group).members]
13011 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13013 # Lock all nodes as all groups are potential targets
13014 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13016 def CheckPrereq(self):
13017 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13018 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13019 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13021 assert (self.req_target_uuids is None or
13022 owned_groups.issuperset(self.req_target_uuids))
13023 assert owned_instances == set([self.op.instance_name])
13025 # Get instance information
13026 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13028 # Check if node groups for locked instance are still correct
13029 assert owned_nodes.issuperset(self.instance.all_nodes), \
13030 ("Instance %s's nodes changed while we kept the lock" %
13031 self.op.instance_name)
13033 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13036 if self.req_target_uuids:
13037 # User requested specific target groups
13038 self.target_uuids = frozenset(self.req_target_uuids)
13040 # All groups except those used by the instance are potential targets
13041 self.target_uuids = owned_groups - inst_groups
13043 conflicting_groups = self.target_uuids & inst_groups
13044 if conflicting_groups:
13045 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13046 " used by the instance '%s'" %
13047 (utils.CommaJoin(conflicting_groups),
13048 self.op.instance_name),
13049 errors.ECODE_INVAL)
13051 if not self.target_uuids:
13052 raise errors.OpPrereqError("There are no possible target groups",
13053 errors.ECODE_INVAL)
13055 def BuildHooksEnv(self):
13056 """Build hooks env.
13059 assert self.target_uuids
13062 "TARGET_GROUPS": " ".join(self.target_uuids),
13065 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13069 def BuildHooksNodes(self):
13070 """Build hooks nodes.
13073 mn = self.cfg.GetMasterNode()
13074 return ([mn], [mn])
13076 def Exec(self, feedback_fn):
13077 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13079 assert instances == [self.op.instance_name], "Instance not locked"
13081 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13082 instances=instances, target_groups=list(self.target_uuids))
13084 ial.Run(self.op.iallocator)
13086 if not ial.success:
13087 raise errors.OpPrereqError("Can't compute solution for changing group of"
13088 " instance '%s' using iallocator '%s': %s" %
13089 (self.op.instance_name, self.op.iallocator,
13091 errors.ECODE_NORES)
13093 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13095 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13096 " instance '%s'", len(jobs), self.op.instance_name)
13098 return ResultWithJobs(jobs)
13101 class LUBackupQuery(NoHooksLU):
13102 """Query the exports list
13107 def CheckArguments(self):
13108 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13109 ["node", "export"], self.op.use_locking)
13111 def ExpandNames(self):
13112 self.expq.ExpandNames(self)
13114 def DeclareLocks(self, level):
13115 self.expq.DeclareLocks(self, level)
13117 def Exec(self, feedback_fn):
13120 for (node, expname) in self.expq.OldStyleQuery(self):
13121 if expname is None:
13122 result[node] = False
13124 result.setdefault(node, []).append(expname)
13129 class _ExportQuery(_QueryBase):
13130 FIELDS = query.EXPORT_FIELDS
13132 #: The node name is not a unique key for this query
13133 SORT_FIELD = "node"
13135 def ExpandNames(self, lu):
13136 lu.needed_locks = {}
13138 # The following variables interact with _QueryBase._GetNames
13140 self.wanted = _GetWantedNodes(lu, self.names)
13142 self.wanted = locking.ALL_SET
13144 self.do_locking = self.use_locking
13146 if self.do_locking:
13147 lu.share_locks = _ShareAll()
13148 lu.needed_locks = {
13149 locking.LEVEL_NODE: self.wanted,
13152 def DeclareLocks(self, lu, level):
13155 def _GetQueryData(self, lu):
13156 """Computes the list of nodes and their attributes.
13159 # Locking is not used
13161 assert not (compat.any(lu.glm.is_owned(level)
13162 for level in locking.LEVELS
13163 if level != locking.LEVEL_CLUSTER) or
13164 self.do_locking or self.use_locking)
13166 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13170 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13172 result.append((node, None))
13174 result.extend((node, expname) for expname in nres.payload)
13179 class LUBackupPrepare(NoHooksLU):
13180 """Prepares an instance for an export and returns useful information.
13185 def ExpandNames(self):
13186 self._ExpandAndLockInstance()
13188 def CheckPrereq(self):
13189 """Check prerequisites.
13192 instance_name = self.op.instance_name
13194 self.instance = self.cfg.GetInstanceInfo(instance_name)
13195 assert self.instance is not None, \
13196 "Cannot retrieve locked instance %s" % self.op.instance_name
13197 _CheckNodeOnline(self, self.instance.primary_node)
13199 self._cds = _GetClusterDomainSecret()
13201 def Exec(self, feedback_fn):
13202 """Prepares an instance for an export.
13205 instance = self.instance
13207 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13208 salt = utils.GenerateSecret(8)
13210 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13211 result = self.rpc.call_x509_cert_create(instance.primary_node,
13212 constants.RIE_CERT_VALIDITY)
13213 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13215 (name, cert_pem) = result.payload
13217 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13221 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13222 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13224 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13230 class LUBackupExport(LogicalUnit):
13231 """Export an instance to an image in the cluster.
13234 HPATH = "instance-export"
13235 HTYPE = constants.HTYPE_INSTANCE
13238 def CheckArguments(self):
13239 """Check the arguments.
13242 self.x509_key_name = self.op.x509_key_name
13243 self.dest_x509_ca_pem = self.op.destination_x509_ca
13245 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13246 if not self.x509_key_name:
13247 raise errors.OpPrereqError("Missing X509 key name for encryption",
13248 errors.ECODE_INVAL)
13250 if not self.dest_x509_ca_pem:
13251 raise errors.OpPrereqError("Missing destination X509 CA",
13252 errors.ECODE_INVAL)
13254 def ExpandNames(self):
13255 self._ExpandAndLockInstance()
13257 # Lock all nodes for local exports
13258 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13259 # FIXME: lock only instance primary and destination node
13261 # Sad but true, for now we have do lock all nodes, as we don't know where
13262 # the previous export might be, and in this LU we search for it and
13263 # remove it from its current node. In the future we could fix this by:
13264 # - making a tasklet to search (share-lock all), then create the
13265 # new one, then one to remove, after
13266 # - removing the removal operation altogether
13267 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13269 def DeclareLocks(self, level):
13270 """Last minute lock declaration."""
13271 # All nodes are locked anyway, so nothing to do here.
13273 def BuildHooksEnv(self):
13274 """Build hooks env.
13276 This will run on the master, primary node and target node.
13280 "EXPORT_MODE": self.op.mode,
13281 "EXPORT_NODE": self.op.target_node,
13282 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13283 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13284 # TODO: Generic function for boolean env variables
13285 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13288 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13292 def BuildHooksNodes(self):
13293 """Build hooks nodes.
13296 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13298 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13299 nl.append(self.op.target_node)
13303 def CheckPrereq(self):
13304 """Check prerequisites.
13306 This checks that the instance and node names are valid.
13309 instance_name = self.op.instance_name
13311 self.instance = self.cfg.GetInstanceInfo(instance_name)
13312 assert self.instance is not None, \
13313 "Cannot retrieve locked instance %s" % self.op.instance_name
13314 _CheckNodeOnline(self, self.instance.primary_node)
13316 if (self.op.remove_instance and
13317 self.instance.admin_state == constants.ADMINST_UP and
13318 not self.op.shutdown):
13319 raise errors.OpPrereqError("Can not remove instance without shutting it"
13322 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13323 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13324 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13325 assert self.dst_node is not None
13327 _CheckNodeOnline(self, self.dst_node.name)
13328 _CheckNodeNotDrained(self, self.dst_node.name)
13331 self.dest_disk_info = None
13332 self.dest_x509_ca = None
13334 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13335 self.dst_node = None
13337 if len(self.op.target_node) != len(self.instance.disks):
13338 raise errors.OpPrereqError(("Received destination information for %s"
13339 " disks, but instance %s has %s disks") %
13340 (len(self.op.target_node), instance_name,
13341 len(self.instance.disks)),
13342 errors.ECODE_INVAL)
13344 cds = _GetClusterDomainSecret()
13346 # Check X509 key name
13348 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13349 except (TypeError, ValueError), err:
13350 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13352 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13353 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13354 errors.ECODE_INVAL)
13356 # Load and verify CA
13358 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13359 except OpenSSL.crypto.Error, err:
13360 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13361 (err, ), errors.ECODE_INVAL)
13363 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13364 if errcode is not None:
13365 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13366 (msg, ), errors.ECODE_INVAL)
13368 self.dest_x509_ca = cert
13370 # Verify target information
13372 for idx, disk_data in enumerate(self.op.target_node):
13374 (host, port, magic) = \
13375 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13376 except errors.GenericError, err:
13377 raise errors.OpPrereqError("Target info for disk %s: %s" %
13378 (idx, err), errors.ECODE_INVAL)
13380 disk_info.append((host, port, magic))
13382 assert len(disk_info) == len(self.op.target_node)
13383 self.dest_disk_info = disk_info
13386 raise errors.ProgrammerError("Unhandled export mode %r" %
13389 # instance disk type verification
13390 # TODO: Implement export support for file-based disks
13391 for disk in self.instance.disks:
13392 if disk.dev_type == constants.LD_FILE:
13393 raise errors.OpPrereqError("Export not supported for instances with"
13394 " file-based disks", errors.ECODE_INVAL)
13396 def _CleanupExports(self, feedback_fn):
13397 """Removes exports of current instance from all other nodes.
13399 If an instance in a cluster with nodes A..D was exported to node C, its
13400 exports will be removed from the nodes A, B and D.
13403 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13405 nodelist = self.cfg.GetNodeList()
13406 nodelist.remove(self.dst_node.name)
13408 # on one-node clusters nodelist will be empty after the removal
13409 # if we proceed the backup would be removed because OpBackupQuery
13410 # substitutes an empty list with the full cluster node list.
13411 iname = self.instance.name
13413 feedback_fn("Removing old exports for instance %s" % iname)
13414 exportlist = self.rpc.call_export_list(nodelist)
13415 for node in exportlist:
13416 if exportlist[node].fail_msg:
13418 if iname in exportlist[node].payload:
13419 msg = self.rpc.call_export_remove(node, iname).fail_msg
13421 self.LogWarning("Could not remove older export for instance %s"
13422 " on node %s: %s", iname, node, msg)
13424 def Exec(self, feedback_fn):
13425 """Export an instance to an image in the cluster.
13428 assert self.op.mode in constants.EXPORT_MODES
13430 instance = self.instance
13431 src_node = instance.primary_node
13433 if self.op.shutdown:
13434 # shutdown the instance, but not the disks
13435 feedback_fn("Shutting down instance %s" % instance.name)
13436 result = self.rpc.call_instance_shutdown(src_node, instance,
13437 self.op.shutdown_timeout)
13438 # TODO: Maybe ignore failures if ignore_remove_failures is set
13439 result.Raise("Could not shutdown instance %s on"
13440 " node %s" % (instance.name, src_node))
13442 # set the disks ID correctly since call_instance_start needs the
13443 # correct drbd minor to create the symlinks
13444 for disk in instance.disks:
13445 self.cfg.SetDiskID(disk, src_node)
13447 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13450 # Activate the instance disks if we'exporting a stopped instance
13451 feedback_fn("Activating disks for %s" % instance.name)
13452 _StartInstanceDisks(self, instance, None)
13455 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13458 helper.CreateSnapshots()
13460 if (self.op.shutdown and
13461 instance.admin_state == constants.ADMINST_UP and
13462 not self.op.remove_instance):
13463 assert not activate_disks
13464 feedback_fn("Starting instance %s" % instance.name)
13465 result = self.rpc.call_instance_start(src_node,
13466 (instance, None, None), False)
13467 msg = result.fail_msg
13469 feedback_fn("Failed to start instance: %s" % msg)
13470 _ShutdownInstanceDisks(self, instance)
13471 raise errors.OpExecError("Could not start instance: %s" % msg)
13473 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13474 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13475 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13476 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13477 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13479 (key_name, _, _) = self.x509_key_name
13482 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13485 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13486 key_name, dest_ca_pem,
13491 # Check for backwards compatibility
13492 assert len(dresults) == len(instance.disks)
13493 assert compat.all(isinstance(i, bool) for i in dresults), \
13494 "Not all results are boolean: %r" % dresults
13498 feedback_fn("Deactivating disks for %s" % instance.name)
13499 _ShutdownInstanceDisks(self, instance)
13501 if not (compat.all(dresults) and fin_resu):
13504 failures.append("export finalization")
13505 if not compat.all(dresults):
13506 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13508 failures.append("disk export: disk(s) %s" % fdsk)
13510 raise errors.OpExecError("Export failed, errors in %s" %
13511 utils.CommaJoin(failures))
13513 # At this point, the export was successful, we can cleanup/finish
13515 # Remove instance if requested
13516 if self.op.remove_instance:
13517 feedback_fn("Removing instance %s" % instance.name)
13518 _RemoveInstance(self, feedback_fn, instance,
13519 self.op.ignore_remove_failures)
13521 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13522 self._CleanupExports(feedback_fn)
13524 return fin_resu, dresults
13527 class LUBackupRemove(NoHooksLU):
13528 """Remove exports related to the named instance.
13533 def ExpandNames(self):
13534 self.needed_locks = {}
13535 # We need all nodes to be locked in order for RemoveExport to work, but we
13536 # don't need to lock the instance itself, as nothing will happen to it (and
13537 # we can remove exports also for a removed instance)
13538 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13540 def Exec(self, feedback_fn):
13541 """Remove any export.
13544 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13545 # If the instance was not found we'll try with the name that was passed in.
13546 # This will only work if it was an FQDN, though.
13548 if not instance_name:
13550 instance_name = self.op.instance_name
13552 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13553 exportlist = self.rpc.call_export_list(locked_nodes)
13555 for node in exportlist:
13556 msg = exportlist[node].fail_msg
13558 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13560 if instance_name in exportlist[node].payload:
13562 result = self.rpc.call_export_remove(node, instance_name)
13563 msg = result.fail_msg
13565 logging.error("Could not remove export for instance %s"
13566 " on node %s: %s", instance_name, node, msg)
13568 if fqdn_warn and not found:
13569 feedback_fn("Export not found. If trying to remove an export belonging"
13570 " to a deleted instance please use its Fully Qualified"
13574 class LUGroupAdd(LogicalUnit):
13575 """Logical unit for creating node groups.
13578 HPATH = "group-add"
13579 HTYPE = constants.HTYPE_GROUP
13582 def ExpandNames(self):
13583 # We need the new group's UUID here so that we can create and acquire the
13584 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13585 # that it should not check whether the UUID exists in the configuration.
13586 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13587 self.needed_locks = {}
13588 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13590 def CheckPrereq(self):
13591 """Check prerequisites.
13593 This checks that the given group name is not an existing node group
13598 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13599 except errors.OpPrereqError:
13602 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13603 " node group (UUID: %s)" %
13604 (self.op.group_name, existing_uuid),
13605 errors.ECODE_EXISTS)
13607 if self.op.ndparams:
13608 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13610 if self.op.hv_state:
13611 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13613 self.new_hv_state = None
13615 if self.op.disk_state:
13616 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13618 self.new_disk_state = None
13620 if self.op.diskparams:
13621 for templ in constants.DISK_TEMPLATES:
13622 if templ in self.op.diskparams:
13623 utils.ForceDictType(self.op.diskparams[templ],
13624 constants.DISK_DT_TYPES)
13625 self.new_diskparams = self.op.diskparams
13627 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13628 except errors.OpPrereqError, err:
13629 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13630 errors.ECODE_INVAL)
13632 self.new_diskparams = {}
13634 if self.op.ipolicy:
13635 cluster = self.cfg.GetClusterInfo()
13636 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13638 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13639 except errors.ConfigurationError, err:
13640 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13641 errors.ECODE_INVAL)
13643 def BuildHooksEnv(self):
13644 """Build hooks env.
13648 "GROUP_NAME": self.op.group_name,
13651 def BuildHooksNodes(self):
13652 """Build hooks nodes.
13655 mn = self.cfg.GetMasterNode()
13656 return ([mn], [mn])
13658 def Exec(self, feedback_fn):
13659 """Add the node group to the cluster.
13662 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13663 uuid=self.group_uuid,
13664 alloc_policy=self.op.alloc_policy,
13665 ndparams=self.op.ndparams,
13666 diskparams=self.new_diskparams,
13667 ipolicy=self.op.ipolicy,
13668 hv_state_static=self.new_hv_state,
13669 disk_state_static=self.new_disk_state)
13671 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13672 del self.remove_locks[locking.LEVEL_NODEGROUP]
13675 class LUGroupAssignNodes(NoHooksLU):
13676 """Logical unit for assigning nodes to groups.
13681 def ExpandNames(self):
13682 # These raise errors.OpPrereqError on their own:
13683 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13684 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13686 # We want to lock all the affected nodes and groups. We have readily
13687 # available the list of nodes, and the *destination* group. To gather the
13688 # list of "source" groups, we need to fetch node information later on.
13689 self.needed_locks = {
13690 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13691 locking.LEVEL_NODE: self.op.nodes,
13694 def DeclareLocks(self, level):
13695 if level == locking.LEVEL_NODEGROUP:
13696 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13698 # Try to get all affected nodes' groups without having the group or node
13699 # lock yet. Needs verification later in the code flow.
13700 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13702 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13704 def CheckPrereq(self):
13705 """Check prerequisites.
13708 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13709 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13710 frozenset(self.op.nodes))
13712 expected_locks = (set([self.group_uuid]) |
13713 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13714 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13715 if actual_locks != expected_locks:
13716 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13717 " current groups are '%s', used to be '%s'" %
13718 (utils.CommaJoin(expected_locks),
13719 utils.CommaJoin(actual_locks)))
13721 self.node_data = self.cfg.GetAllNodesInfo()
13722 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13723 instance_data = self.cfg.GetAllInstancesInfo()
13725 if self.group is None:
13726 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13727 (self.op.group_name, self.group_uuid))
13729 (new_splits, previous_splits) = \
13730 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13731 for node in self.op.nodes],
13732 self.node_data, instance_data)
13735 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13737 if not self.op.force:
13738 raise errors.OpExecError("The following instances get split by this"
13739 " change and --force was not given: %s" %
13742 self.LogWarning("This operation will split the following instances: %s",
13745 if previous_splits:
13746 self.LogWarning("In addition, these already-split instances continue"
13747 " to be split across groups: %s",
13748 utils.CommaJoin(utils.NiceSort(previous_splits)))
13750 def Exec(self, feedback_fn):
13751 """Assign nodes to a new group.
13754 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13756 self.cfg.AssignGroupNodes(mods)
13759 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13760 """Check for split instances after a node assignment.
13762 This method considers a series of node assignments as an atomic operation,
13763 and returns information about split instances after applying the set of
13766 In particular, it returns information about newly split instances, and
13767 instances that were already split, and remain so after the change.
13769 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13772 @type changes: list of (node_name, new_group_uuid) pairs.
13773 @param changes: list of node assignments to consider.
13774 @param node_data: a dict with data for all nodes
13775 @param instance_data: a dict with all instances to consider
13776 @rtype: a two-tuple
13777 @return: a list of instances that were previously okay and result split as a
13778 consequence of this change, and a list of instances that were previously
13779 split and this change does not fix.
13782 changed_nodes = dict((node, group) for node, group in changes
13783 if node_data[node].group != group)
13785 all_split_instances = set()
13786 previously_split_instances = set()
13788 def InstanceNodes(instance):
13789 return [instance.primary_node] + list(instance.secondary_nodes)
13791 for inst in instance_data.values():
13792 if inst.disk_template not in constants.DTS_INT_MIRROR:
13795 instance_nodes = InstanceNodes(inst)
13797 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13798 previously_split_instances.add(inst.name)
13800 if len(set(changed_nodes.get(node, node_data[node].group)
13801 for node in instance_nodes)) > 1:
13802 all_split_instances.add(inst.name)
13804 return (list(all_split_instances - previously_split_instances),
13805 list(previously_split_instances & all_split_instances))
13808 class _GroupQuery(_QueryBase):
13809 FIELDS = query.GROUP_FIELDS
13811 def ExpandNames(self, lu):
13812 lu.needed_locks = {}
13814 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13815 self._cluster = lu.cfg.GetClusterInfo()
13816 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13819 self.wanted = [name_to_uuid[name]
13820 for name in utils.NiceSort(name_to_uuid.keys())]
13822 # Accept names to be either names or UUIDs.
13825 all_uuid = frozenset(self._all_groups.keys())
13827 for name in self.names:
13828 if name in all_uuid:
13829 self.wanted.append(name)
13830 elif name in name_to_uuid:
13831 self.wanted.append(name_to_uuid[name])
13833 missing.append(name)
13836 raise errors.OpPrereqError("Some groups do not exist: %s" %
13837 utils.CommaJoin(missing),
13838 errors.ECODE_NOENT)
13840 def DeclareLocks(self, lu, level):
13843 def _GetQueryData(self, lu):
13844 """Computes the list of node groups and their attributes.
13847 do_nodes = query.GQ_NODE in self.requested_data
13848 do_instances = query.GQ_INST in self.requested_data
13850 group_to_nodes = None
13851 group_to_instances = None
13853 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13854 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13855 # latter GetAllInstancesInfo() is not enough, for we have to go through
13856 # instance->node. Hence, we will need to process nodes even if we only need
13857 # instance information.
13858 if do_nodes or do_instances:
13859 all_nodes = lu.cfg.GetAllNodesInfo()
13860 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13863 for node in all_nodes.values():
13864 if node.group in group_to_nodes:
13865 group_to_nodes[node.group].append(node.name)
13866 node_to_group[node.name] = node.group
13869 all_instances = lu.cfg.GetAllInstancesInfo()
13870 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13872 for instance in all_instances.values():
13873 node = instance.primary_node
13874 if node in node_to_group:
13875 group_to_instances[node_to_group[node]].append(instance.name)
13878 # Do not pass on node information if it was not requested.
13879 group_to_nodes = None
13881 return query.GroupQueryData(self._cluster,
13882 [self._all_groups[uuid]
13883 for uuid in self.wanted],
13884 group_to_nodes, group_to_instances,
13885 query.GQ_DISKPARAMS in self.requested_data)
13888 class LUGroupQuery(NoHooksLU):
13889 """Logical unit for querying node groups.
13894 def CheckArguments(self):
13895 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13896 self.op.output_fields, False)
13898 def ExpandNames(self):
13899 self.gq.ExpandNames(self)
13901 def DeclareLocks(self, level):
13902 self.gq.DeclareLocks(self, level)
13904 def Exec(self, feedback_fn):
13905 return self.gq.OldStyleQuery(self)
13908 class LUGroupSetParams(LogicalUnit):
13909 """Modifies the parameters of a node group.
13912 HPATH = "group-modify"
13913 HTYPE = constants.HTYPE_GROUP
13916 def CheckArguments(self):
13919 self.op.diskparams,
13920 self.op.alloc_policy,
13922 self.op.disk_state,
13926 if all_changes.count(None) == len(all_changes):
13927 raise errors.OpPrereqError("Please pass at least one modification",
13928 errors.ECODE_INVAL)
13930 def ExpandNames(self):
13931 # This raises errors.OpPrereqError on its own:
13932 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13934 self.needed_locks = {
13935 locking.LEVEL_INSTANCE: [],
13936 locking.LEVEL_NODEGROUP: [self.group_uuid],
13939 self.share_locks[locking.LEVEL_INSTANCE] = 1
13941 def DeclareLocks(self, level):
13942 if level == locking.LEVEL_INSTANCE:
13943 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13945 # Lock instances optimistically, needs verification once group lock has
13947 self.needed_locks[locking.LEVEL_INSTANCE] = \
13948 self.cfg.GetNodeGroupInstances(self.group_uuid)
13951 def _UpdateAndVerifyDiskParams(old, new):
13952 """Updates and verifies disk parameters.
13955 new_params = _GetUpdatedParams(old, new)
13956 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13959 def CheckPrereq(self):
13960 """Check prerequisites.
13963 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13965 # Check if locked instances are still correct
13966 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13968 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13969 cluster = self.cfg.GetClusterInfo()
13971 if self.group is None:
13972 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13973 (self.op.group_name, self.group_uuid))
13975 if self.op.ndparams:
13976 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13977 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
13978 self.new_ndparams = new_ndparams
13980 if self.op.diskparams:
13981 diskparams = self.group.diskparams
13982 uavdp = self._UpdateAndVerifyDiskParams
13983 # For each disktemplate subdict update and verify the values
13984 new_diskparams = dict((dt,
13985 uavdp(diskparams.get(dt, {}),
13986 self.op.diskparams[dt]))
13987 for dt in constants.DISK_TEMPLATES
13988 if dt in self.op.diskparams)
13989 # As we've all subdicts of diskparams ready, lets merge the actual
13990 # dict with all updated subdicts
13991 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13993 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13994 except errors.OpPrereqError, err:
13995 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13996 errors.ECODE_INVAL)
13998 if self.op.hv_state:
13999 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14000 self.group.hv_state_static)
14002 if self.op.disk_state:
14003 self.new_disk_state = \
14004 _MergeAndVerifyDiskState(self.op.disk_state,
14005 self.group.disk_state_static)
14007 if self.op.ipolicy:
14008 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14012 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14013 inst_filter = lambda inst: inst.name in owned_instances
14014 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14016 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14018 new_ipolicy, instances)
14021 self.LogWarning("After the ipolicy change the following instances"
14022 " violate them: %s",
14023 utils.CommaJoin(violations))
14025 def BuildHooksEnv(self):
14026 """Build hooks env.
14030 "GROUP_NAME": self.op.group_name,
14031 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14034 def BuildHooksNodes(self):
14035 """Build hooks nodes.
14038 mn = self.cfg.GetMasterNode()
14039 return ([mn], [mn])
14041 def Exec(self, feedback_fn):
14042 """Modifies the node group.
14047 if self.op.ndparams:
14048 self.group.ndparams = self.new_ndparams
14049 result.append(("ndparams", str(self.group.ndparams)))
14051 if self.op.diskparams:
14052 self.group.diskparams = self.new_diskparams
14053 result.append(("diskparams", str(self.group.diskparams)))
14055 if self.op.alloc_policy:
14056 self.group.alloc_policy = self.op.alloc_policy
14058 if self.op.hv_state:
14059 self.group.hv_state_static = self.new_hv_state
14061 if self.op.disk_state:
14062 self.group.disk_state_static = self.new_disk_state
14064 if self.op.ipolicy:
14065 self.group.ipolicy = self.new_ipolicy
14067 self.cfg.Update(self.group, feedback_fn)
14071 class LUGroupRemove(LogicalUnit):
14072 HPATH = "group-remove"
14073 HTYPE = constants.HTYPE_GROUP
14076 def ExpandNames(self):
14077 # This will raises errors.OpPrereqError on its own:
14078 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14079 self.needed_locks = {
14080 locking.LEVEL_NODEGROUP: [self.group_uuid],
14083 def CheckPrereq(self):
14084 """Check prerequisites.
14086 This checks that the given group name exists as a node group, that is
14087 empty (i.e., contains no nodes), and that is not the last group of the
14091 # Verify that the group is empty.
14092 group_nodes = [node.name
14093 for node in self.cfg.GetAllNodesInfo().values()
14094 if node.group == self.group_uuid]
14097 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14099 (self.op.group_name,
14100 utils.CommaJoin(utils.NiceSort(group_nodes))),
14101 errors.ECODE_STATE)
14103 # Verify the cluster would not be left group-less.
14104 if len(self.cfg.GetNodeGroupList()) == 1:
14105 raise errors.OpPrereqError("Group '%s' is the only group,"
14106 " cannot be removed" %
14107 self.op.group_name,
14108 errors.ECODE_STATE)
14110 def BuildHooksEnv(self):
14111 """Build hooks env.
14115 "GROUP_NAME": self.op.group_name,
14118 def BuildHooksNodes(self):
14119 """Build hooks nodes.
14122 mn = self.cfg.GetMasterNode()
14123 return ([mn], [mn])
14125 def Exec(self, feedback_fn):
14126 """Remove the node group.
14130 self.cfg.RemoveNodeGroup(self.group_uuid)
14131 except errors.ConfigurationError:
14132 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14133 (self.op.group_name, self.group_uuid))
14135 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14138 class LUGroupRename(LogicalUnit):
14139 HPATH = "group-rename"
14140 HTYPE = constants.HTYPE_GROUP
14143 def ExpandNames(self):
14144 # This raises errors.OpPrereqError on its own:
14145 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14147 self.needed_locks = {
14148 locking.LEVEL_NODEGROUP: [self.group_uuid],
14151 def CheckPrereq(self):
14152 """Check prerequisites.
14154 Ensures requested new name is not yet used.
14158 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14159 except errors.OpPrereqError:
14162 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14163 " node group (UUID: %s)" %
14164 (self.op.new_name, new_name_uuid),
14165 errors.ECODE_EXISTS)
14167 def BuildHooksEnv(self):
14168 """Build hooks env.
14172 "OLD_NAME": self.op.group_name,
14173 "NEW_NAME": self.op.new_name,
14176 def BuildHooksNodes(self):
14177 """Build hooks nodes.
14180 mn = self.cfg.GetMasterNode()
14182 all_nodes = self.cfg.GetAllNodesInfo()
14183 all_nodes.pop(mn, None)
14186 run_nodes.extend(node.name for node in all_nodes.values()
14187 if node.group == self.group_uuid)
14189 return (run_nodes, run_nodes)
14191 def Exec(self, feedback_fn):
14192 """Rename the node group.
14195 group = self.cfg.GetNodeGroup(self.group_uuid)
14198 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14199 (self.op.group_name, self.group_uuid))
14201 group.name = self.op.new_name
14202 self.cfg.Update(group, feedback_fn)
14204 return self.op.new_name
14207 class LUGroupEvacuate(LogicalUnit):
14208 HPATH = "group-evacuate"
14209 HTYPE = constants.HTYPE_GROUP
14212 def ExpandNames(self):
14213 # This raises errors.OpPrereqError on its own:
14214 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14216 if self.op.target_groups:
14217 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14218 self.op.target_groups)
14220 self.req_target_uuids = []
14222 if self.group_uuid in self.req_target_uuids:
14223 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14224 " as a target group (targets are %s)" %
14226 utils.CommaJoin(self.req_target_uuids)),
14227 errors.ECODE_INVAL)
14229 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14231 self.share_locks = _ShareAll()
14232 self.needed_locks = {
14233 locking.LEVEL_INSTANCE: [],
14234 locking.LEVEL_NODEGROUP: [],
14235 locking.LEVEL_NODE: [],
14238 def DeclareLocks(self, level):
14239 if level == locking.LEVEL_INSTANCE:
14240 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14242 # Lock instances optimistically, needs verification once node and group
14243 # locks have been acquired
14244 self.needed_locks[locking.LEVEL_INSTANCE] = \
14245 self.cfg.GetNodeGroupInstances(self.group_uuid)
14247 elif level == locking.LEVEL_NODEGROUP:
14248 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14250 if self.req_target_uuids:
14251 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14253 # Lock all groups used by instances optimistically; this requires going
14254 # via the node before it's locked, requiring verification later on
14255 lock_groups.update(group_uuid
14256 for instance_name in
14257 self.owned_locks(locking.LEVEL_INSTANCE)
14259 self.cfg.GetInstanceNodeGroups(instance_name))
14261 # No target groups, need to lock all of them
14262 lock_groups = locking.ALL_SET
14264 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14266 elif level == locking.LEVEL_NODE:
14267 # This will only lock the nodes in the group to be evacuated which
14268 # contain actual instances
14269 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14270 self._LockInstancesNodes()
14272 # Lock all nodes in group to be evacuated and target groups
14273 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14274 assert self.group_uuid in owned_groups
14275 member_nodes = [node_name
14276 for group in owned_groups
14277 for node_name in self.cfg.GetNodeGroup(group).members]
14278 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14280 def CheckPrereq(self):
14281 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14282 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14283 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14285 assert owned_groups.issuperset(self.req_target_uuids)
14286 assert self.group_uuid in owned_groups
14288 # Check if locked instances are still correct
14289 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14291 # Get instance information
14292 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14294 # Check if node groups for locked instances are still correct
14295 _CheckInstancesNodeGroups(self.cfg, self.instances,
14296 owned_groups, owned_nodes, self.group_uuid)
14298 if self.req_target_uuids:
14299 # User requested specific target groups
14300 self.target_uuids = self.req_target_uuids
14302 # All groups except the one to be evacuated are potential targets
14303 self.target_uuids = [group_uuid for group_uuid in owned_groups
14304 if group_uuid != self.group_uuid]
14306 if not self.target_uuids:
14307 raise errors.OpPrereqError("There are no possible target groups",
14308 errors.ECODE_INVAL)
14310 def BuildHooksEnv(self):
14311 """Build hooks env.
14315 "GROUP_NAME": self.op.group_name,
14316 "TARGET_GROUPS": " ".join(self.target_uuids),
14319 def BuildHooksNodes(self):
14320 """Build hooks nodes.
14323 mn = self.cfg.GetMasterNode()
14325 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14327 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14329 return (run_nodes, run_nodes)
14331 def Exec(self, feedback_fn):
14332 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14334 assert self.group_uuid not in self.target_uuids
14336 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14337 instances=instances, target_groups=self.target_uuids)
14339 ial.Run(self.op.iallocator)
14341 if not ial.success:
14342 raise errors.OpPrereqError("Can't compute group evacuation using"
14343 " iallocator '%s': %s" %
14344 (self.op.iallocator, ial.info),
14345 errors.ECODE_NORES)
14347 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14349 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14350 len(jobs), self.op.group_name)
14352 return ResultWithJobs(jobs)
14355 class TagsLU(NoHooksLU): # pylint: disable=W0223
14356 """Generic tags LU.
14358 This is an abstract class which is the parent of all the other tags LUs.
14361 def ExpandNames(self):
14362 self.group_uuid = None
14363 self.needed_locks = {}
14365 if self.op.kind == constants.TAG_NODE:
14366 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14367 lock_level = locking.LEVEL_NODE
14368 lock_name = self.op.name
14369 elif self.op.kind == constants.TAG_INSTANCE:
14370 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14371 lock_level = locking.LEVEL_INSTANCE
14372 lock_name = self.op.name
14373 elif self.op.kind == constants.TAG_NODEGROUP:
14374 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14375 lock_level = locking.LEVEL_NODEGROUP
14376 lock_name = self.group_uuid
14381 if lock_level and getattr(self.op, "use_locking", True):
14382 self.needed_locks[lock_level] = lock_name
14384 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14385 # not possible to acquire the BGL based on opcode parameters)
14387 def CheckPrereq(self):
14388 """Check prerequisites.
14391 if self.op.kind == constants.TAG_CLUSTER:
14392 self.target = self.cfg.GetClusterInfo()
14393 elif self.op.kind == constants.TAG_NODE:
14394 self.target = self.cfg.GetNodeInfo(self.op.name)
14395 elif self.op.kind == constants.TAG_INSTANCE:
14396 self.target = self.cfg.GetInstanceInfo(self.op.name)
14397 elif self.op.kind == constants.TAG_NODEGROUP:
14398 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14400 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14401 str(self.op.kind), errors.ECODE_INVAL)
14404 class LUTagsGet(TagsLU):
14405 """Returns the tags of a given object.
14410 def ExpandNames(self):
14411 TagsLU.ExpandNames(self)
14413 # Share locks as this is only a read operation
14414 self.share_locks = _ShareAll()
14416 def Exec(self, feedback_fn):
14417 """Returns the tag list.
14420 return list(self.target.GetTags())
14423 class LUTagsSearch(NoHooksLU):
14424 """Searches the tags for a given pattern.
14429 def ExpandNames(self):
14430 self.needed_locks = {}
14432 def CheckPrereq(self):
14433 """Check prerequisites.
14435 This checks the pattern passed for validity by compiling it.
14439 self.re = re.compile(self.op.pattern)
14440 except re.error, err:
14441 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14442 (self.op.pattern, err), errors.ECODE_INVAL)
14444 def Exec(self, feedback_fn):
14445 """Returns the tag list.
14449 tgts = [("/cluster", cfg.GetClusterInfo())]
14450 ilist = cfg.GetAllInstancesInfo().values()
14451 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14452 nlist = cfg.GetAllNodesInfo().values()
14453 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14454 tgts.extend(("/nodegroup/%s" % n.name, n)
14455 for n in cfg.GetAllNodeGroupsInfo().values())
14457 for path, target in tgts:
14458 for tag in target.GetTags():
14459 if self.re.search(tag):
14460 results.append((path, tag))
14464 class LUTagsSet(TagsLU):
14465 """Sets a tag on a given object.
14470 def CheckPrereq(self):
14471 """Check prerequisites.
14473 This checks the type and length of the tag name and value.
14476 TagsLU.CheckPrereq(self)
14477 for tag in self.op.tags:
14478 objects.TaggableObject.ValidateTag(tag)
14480 def Exec(self, feedback_fn):
14485 for tag in self.op.tags:
14486 self.target.AddTag(tag)
14487 except errors.TagError, err:
14488 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14489 self.cfg.Update(self.target, feedback_fn)
14492 class LUTagsDel(TagsLU):
14493 """Delete a list of tags from a given object.
14498 def CheckPrereq(self):
14499 """Check prerequisites.
14501 This checks that we have the given tag.
14504 TagsLU.CheckPrereq(self)
14505 for tag in self.op.tags:
14506 objects.TaggableObject.ValidateTag(tag)
14507 del_tags = frozenset(self.op.tags)
14508 cur_tags = self.target.GetTags()
14510 diff_tags = del_tags - cur_tags
14512 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14513 raise errors.OpPrereqError("Tag(s) %s not found" %
14514 (utils.CommaJoin(diff_names), ),
14515 errors.ECODE_NOENT)
14517 def Exec(self, feedback_fn):
14518 """Remove the tag from the object.
14521 for tag in self.op.tags:
14522 self.target.RemoveTag(tag)
14523 self.cfg.Update(self.target, feedback_fn)
14526 class LUTestDelay(NoHooksLU):
14527 """Sleep for a specified amount of time.
14529 This LU sleeps on the master and/or nodes for a specified amount of
14535 def ExpandNames(self):
14536 """Expand names and set required locks.
14538 This expands the node list, if any.
14541 self.needed_locks = {}
14542 if self.op.on_nodes:
14543 # _GetWantedNodes can be used here, but is not always appropriate to use
14544 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14545 # more information.
14546 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14547 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14549 def _TestDelay(self):
14550 """Do the actual sleep.
14553 if self.op.on_master:
14554 if not utils.TestDelay(self.op.duration):
14555 raise errors.OpExecError("Error during master delay test")
14556 if self.op.on_nodes:
14557 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14558 for node, node_result in result.items():
14559 node_result.Raise("Failure during rpc call to node %s" % node)
14561 def Exec(self, feedback_fn):
14562 """Execute the test delay opcode, with the wanted repetitions.
14565 if self.op.repeat == 0:
14568 top_value = self.op.repeat - 1
14569 for i in range(self.op.repeat):
14570 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14574 class LUTestJqueue(NoHooksLU):
14575 """Utility LU to test some aspects of the job queue.
14580 # Must be lower than default timeout for WaitForJobChange to see whether it
14581 # notices changed jobs
14582 _CLIENT_CONNECT_TIMEOUT = 20.0
14583 _CLIENT_CONFIRM_TIMEOUT = 60.0
14586 def _NotifyUsingSocket(cls, cb, errcls):
14587 """Opens a Unix socket and waits for another program to connect.
14590 @param cb: Callback to send socket name to client
14591 @type errcls: class
14592 @param errcls: Exception class to use for errors
14595 # Using a temporary directory as there's no easy way to create temporary
14596 # sockets without writing a custom loop around tempfile.mktemp and
14598 tmpdir = tempfile.mkdtemp()
14600 tmpsock = utils.PathJoin(tmpdir, "sock")
14602 logging.debug("Creating temporary socket at %s", tmpsock)
14603 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14608 # Send details to client
14611 # Wait for client to connect before continuing
14612 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14614 (conn, _) = sock.accept()
14615 except socket.error, err:
14616 raise errcls("Client didn't connect in time (%s)" % err)
14620 # Remove as soon as client is connected
14621 shutil.rmtree(tmpdir)
14623 # Wait for client to close
14626 # pylint: disable=E1101
14627 # Instance of '_socketobject' has no ... member
14628 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14630 except socket.error, err:
14631 raise errcls("Client failed to confirm notification (%s)" % err)
14635 def _SendNotification(self, test, arg, sockname):
14636 """Sends a notification to the client.
14639 @param test: Test name
14640 @param arg: Test argument (depends on test)
14641 @type sockname: string
14642 @param sockname: Socket path
14645 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14647 def _Notify(self, prereq, test, arg):
14648 """Notifies the client of a test.
14651 @param prereq: Whether this is a prereq-phase test
14653 @param test: Test name
14654 @param arg: Test argument (depends on test)
14658 errcls = errors.OpPrereqError
14660 errcls = errors.OpExecError
14662 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14666 def CheckArguments(self):
14667 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14668 self.expandnames_calls = 0
14670 def ExpandNames(self):
14671 checkargs_calls = getattr(self, "checkargs_calls", 0)
14672 if checkargs_calls < 1:
14673 raise errors.ProgrammerError("CheckArguments was not called")
14675 self.expandnames_calls += 1
14677 if self.op.notify_waitlock:
14678 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14680 self.LogInfo("Expanding names")
14682 # Get lock on master node (just to get a lock, not for a particular reason)
14683 self.needed_locks = {
14684 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14687 def Exec(self, feedback_fn):
14688 if self.expandnames_calls < 1:
14689 raise errors.ProgrammerError("ExpandNames was not called")
14691 if self.op.notify_exec:
14692 self._Notify(False, constants.JQT_EXEC, None)
14694 self.LogInfo("Executing")
14696 if self.op.log_messages:
14697 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14698 for idx, msg in enumerate(self.op.log_messages):
14699 self.LogInfo("Sending log message %s", idx + 1)
14700 feedback_fn(constants.JQT_MSGPREFIX + msg)
14701 # Report how many test messages have been sent
14702 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14705 raise errors.OpExecError("Opcode failure was requested")
14710 class IAllocator(object):
14711 """IAllocator framework.
14713 An IAllocator instance has three sets of attributes:
14714 - cfg that is needed to query the cluster
14715 - input data (all members of the _KEYS class attribute are required)
14716 - four buffer attributes (in|out_data|text), that represent the
14717 input (to the external script) in text and data structure format,
14718 and the output from it, again in two formats
14719 - the result variables from the script (success, info, nodes) for
14723 # pylint: disable=R0902
14724 # lots of instance attributes
14726 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14728 self.rpc = rpc_runner
14729 # init buffer variables
14730 self.in_text = self.out_text = self.in_data = self.out_data = None
14731 # init all input fields so that pylint is happy
14733 self.memory = self.disks = self.disk_template = self.spindle_use = None
14734 self.os = self.tags = self.nics = self.vcpus = None
14735 self.hypervisor = None
14736 self.relocate_from = None
14738 self.instances = None
14739 self.evac_mode = None
14740 self.target_groups = []
14742 self.required_nodes = None
14743 # init result fields
14744 self.success = self.info = self.result = None
14747 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14749 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14750 " IAllocator" % self.mode)
14752 keyset = [n for (n, _) in keydata]
14755 if key not in keyset:
14756 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14757 " IAllocator" % key)
14758 setattr(self, key, kwargs[key])
14761 if key not in kwargs:
14762 raise errors.ProgrammerError("Missing input parameter '%s' to"
14763 " IAllocator" % key)
14764 self._BuildInputData(compat.partial(fn, self), keydata)
14766 def _ComputeClusterData(self):
14767 """Compute the generic allocator input data.
14769 This is the data that is independent of the actual operation.
14773 cluster_info = cfg.GetClusterInfo()
14776 "version": constants.IALLOCATOR_VERSION,
14777 "cluster_name": cfg.GetClusterName(),
14778 "cluster_tags": list(cluster_info.GetTags()),
14779 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14780 "ipolicy": cluster_info.ipolicy,
14782 ninfo = cfg.GetAllNodesInfo()
14783 iinfo = cfg.GetAllInstancesInfo().values()
14784 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14787 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14789 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14790 hypervisor_name = self.hypervisor
14791 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14792 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14794 hypervisor_name = cluster_info.primary_hypervisor
14796 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14799 self.rpc.call_all_instances_info(node_list,
14800 cluster_info.enabled_hypervisors)
14802 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14804 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14805 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14806 i_list, config_ndata)
14807 assert len(data["nodes"]) == len(ninfo), \
14808 "Incomplete node data computed"
14810 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14812 self.in_data = data
14815 def _ComputeNodeGroupData(cfg):
14816 """Compute node groups data.
14819 cluster = cfg.GetClusterInfo()
14820 ng = dict((guuid, {
14821 "name": gdata.name,
14822 "alloc_policy": gdata.alloc_policy,
14823 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14825 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14830 def _ComputeBasicNodeData(cfg, node_cfg):
14831 """Compute global node data.
14834 @returns: a dict of name: (node dict, node config)
14837 # fill in static (config-based) values
14838 node_results = dict((ninfo.name, {
14839 "tags": list(ninfo.GetTags()),
14840 "primary_ip": ninfo.primary_ip,
14841 "secondary_ip": ninfo.secondary_ip,
14842 "offline": ninfo.offline,
14843 "drained": ninfo.drained,
14844 "master_candidate": ninfo.master_candidate,
14845 "group": ninfo.group,
14846 "master_capable": ninfo.master_capable,
14847 "vm_capable": ninfo.vm_capable,
14848 "ndparams": cfg.GetNdParams(ninfo),
14850 for ninfo in node_cfg.values())
14852 return node_results
14855 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14857 """Compute global node data.
14859 @param node_results: the basic node structures as filled from the config
14862 #TODO(dynmem): compute the right data on MAX and MIN memory
14863 # make a copy of the current dict
14864 node_results = dict(node_results)
14865 for nname, nresult in node_data.items():
14866 assert nname in node_results, "Missing basic data for node %s" % nname
14867 ninfo = node_cfg[nname]
14869 if not (ninfo.offline or ninfo.drained):
14870 nresult.Raise("Can't get data for node %s" % nname)
14871 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14873 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14875 for attr in ["memory_total", "memory_free", "memory_dom0",
14876 "vg_size", "vg_free", "cpu_total"]:
14877 if attr not in remote_info:
14878 raise errors.OpExecError("Node '%s' didn't return attribute"
14879 " '%s'" % (nname, attr))
14880 if not isinstance(remote_info[attr], int):
14881 raise errors.OpExecError("Node '%s' returned invalid value"
14883 (nname, attr, remote_info[attr]))
14884 # compute memory used by primary instances
14885 i_p_mem = i_p_up_mem = 0
14886 for iinfo, beinfo in i_list:
14887 if iinfo.primary_node == nname:
14888 i_p_mem += beinfo[constants.BE_MAXMEM]
14889 if iinfo.name not in node_iinfo[nname].payload:
14892 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14893 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14894 remote_info["memory_free"] -= max(0, i_mem_diff)
14896 if iinfo.admin_state == constants.ADMINST_UP:
14897 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14899 # compute memory used by instances
14901 "total_memory": remote_info["memory_total"],
14902 "reserved_memory": remote_info["memory_dom0"],
14903 "free_memory": remote_info["memory_free"],
14904 "total_disk": remote_info["vg_size"],
14905 "free_disk": remote_info["vg_free"],
14906 "total_cpus": remote_info["cpu_total"],
14907 "i_pri_memory": i_p_mem,
14908 "i_pri_up_memory": i_p_up_mem,
14910 pnr_dyn.update(node_results[nname])
14911 node_results[nname] = pnr_dyn
14913 return node_results
14916 def _ComputeInstanceData(cluster_info, i_list):
14917 """Compute global instance data.
14921 for iinfo, beinfo in i_list:
14923 for nic in iinfo.nics:
14924 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14928 "mode": filled_params[constants.NIC_MODE],
14929 "link": filled_params[constants.NIC_LINK],
14931 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14932 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14933 nic_data.append(nic_dict)
14935 "tags": list(iinfo.GetTags()),
14936 "admin_state": iinfo.admin_state,
14937 "vcpus": beinfo[constants.BE_VCPUS],
14938 "memory": beinfo[constants.BE_MAXMEM],
14939 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14941 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14943 "disks": [{constants.IDISK_SIZE: dsk.size,
14944 constants.IDISK_MODE: dsk.mode}
14945 for dsk in iinfo.disks],
14946 "disk_template": iinfo.disk_template,
14947 "hypervisor": iinfo.hypervisor,
14949 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14951 instance_data[iinfo.name] = pir
14953 return instance_data
14955 def _AddNewInstance(self):
14956 """Add new instance data to allocator structure.
14958 This in combination with _AllocatorGetClusterData will create the
14959 correct structure needed as input for the allocator.
14961 The checks for the completeness of the opcode must have already been
14965 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14967 if self.disk_template in constants.DTS_INT_MIRROR:
14968 self.required_nodes = 2
14970 self.required_nodes = 1
14974 "disk_template": self.disk_template,
14977 "vcpus": self.vcpus,
14978 "memory": self.memory,
14979 "spindle_use": self.spindle_use,
14980 "disks": self.disks,
14981 "disk_space_total": disk_space,
14983 "required_nodes": self.required_nodes,
14984 "hypervisor": self.hypervisor,
14989 def _AddRelocateInstance(self):
14990 """Add relocate instance data to allocator structure.
14992 This in combination with _IAllocatorGetClusterData will create the
14993 correct structure needed as input for the allocator.
14995 The checks for the completeness of the opcode must have already been
14999 instance = self.cfg.GetInstanceInfo(self.name)
15000 if instance is None:
15001 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15002 " IAllocator" % self.name)
15004 if instance.disk_template not in constants.DTS_MIRRORED:
15005 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15006 errors.ECODE_INVAL)
15008 if instance.disk_template in constants.DTS_INT_MIRROR and \
15009 len(instance.secondary_nodes) != 1:
15010 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15011 errors.ECODE_STATE)
15013 self.required_nodes = 1
15014 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15015 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15019 "disk_space_total": disk_space,
15020 "required_nodes": self.required_nodes,
15021 "relocate_from": self.relocate_from,
15025 def _AddNodeEvacuate(self):
15026 """Get data for node-evacuate requests.
15030 "instances": self.instances,
15031 "evac_mode": self.evac_mode,
15034 def _AddChangeGroup(self):
15035 """Get data for node-evacuate requests.
15039 "instances": self.instances,
15040 "target_groups": self.target_groups,
15043 def _BuildInputData(self, fn, keydata):
15044 """Build input data structures.
15047 self._ComputeClusterData()
15050 request["type"] = self.mode
15051 for keyname, keytype in keydata:
15052 if keyname not in request:
15053 raise errors.ProgrammerError("Request parameter %s is missing" %
15055 val = request[keyname]
15056 if not keytype(val):
15057 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15058 " validation, value %s, expected"
15059 " type %s" % (keyname, val, keytype))
15060 self.in_data["request"] = request
15062 self.in_text = serializer.Dump(self.in_data)
15064 _STRING_LIST = ht.TListOf(ht.TString)
15065 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15066 # pylint: disable=E1101
15067 # Class '...' has no 'OP_ID' member
15068 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15069 opcodes.OpInstanceMigrate.OP_ID,
15070 opcodes.OpInstanceReplaceDisks.OP_ID])
15074 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15075 ht.TItems([ht.TNonEmptyString,
15076 ht.TNonEmptyString,
15077 ht.TListOf(ht.TNonEmptyString),
15080 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15081 ht.TItems([ht.TNonEmptyString,
15084 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15085 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15088 constants.IALLOCATOR_MODE_ALLOC:
15091 ("name", ht.TString),
15092 ("memory", ht.TInt),
15093 ("spindle_use", ht.TInt),
15094 ("disks", ht.TListOf(ht.TDict)),
15095 ("disk_template", ht.TString),
15096 ("os", ht.TString),
15097 ("tags", _STRING_LIST),
15098 ("nics", ht.TListOf(ht.TDict)),
15099 ("vcpus", ht.TInt),
15100 ("hypervisor", ht.TString),
15102 constants.IALLOCATOR_MODE_RELOC:
15103 (_AddRelocateInstance,
15104 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15106 constants.IALLOCATOR_MODE_NODE_EVAC:
15107 (_AddNodeEvacuate, [
15108 ("instances", _STRING_LIST),
15109 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15111 constants.IALLOCATOR_MODE_CHG_GROUP:
15112 (_AddChangeGroup, [
15113 ("instances", _STRING_LIST),
15114 ("target_groups", _STRING_LIST),
15118 def Run(self, name, validate=True, call_fn=None):
15119 """Run an instance allocator and return the results.
15122 if call_fn is None:
15123 call_fn = self.rpc.call_iallocator_runner
15125 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15126 result.Raise("Failure while running the iallocator script")
15128 self.out_text = result.payload
15130 self._ValidateResult()
15132 def _ValidateResult(self):
15133 """Process the allocator results.
15135 This will process and if successful save the result in
15136 self.out_data and the other parameters.
15140 rdict = serializer.Load(self.out_text)
15141 except Exception, err:
15142 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15144 if not isinstance(rdict, dict):
15145 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15147 # TODO: remove backwards compatiblity in later versions
15148 if "nodes" in rdict and "result" not in rdict:
15149 rdict["result"] = rdict["nodes"]
15152 for key in "success", "info", "result":
15153 if key not in rdict:
15154 raise errors.OpExecError("Can't parse iallocator results:"
15155 " missing key '%s'" % key)
15156 setattr(self, key, rdict[key])
15158 if not self._result_check(self.result):
15159 raise errors.OpExecError("Iallocator returned invalid result,"
15160 " expected %s, got %s" %
15161 (self._result_check, self.result),
15162 errors.ECODE_INVAL)
15164 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15165 assert self.relocate_from is not None
15166 assert self.required_nodes == 1
15168 node2group = dict((name, ndata["group"])
15169 for (name, ndata) in self.in_data["nodes"].items())
15171 fn = compat.partial(self._NodesToGroups, node2group,
15172 self.in_data["nodegroups"])
15174 instance = self.cfg.GetInstanceInfo(self.name)
15175 request_groups = fn(self.relocate_from + [instance.primary_node])
15176 result_groups = fn(rdict["result"] + [instance.primary_node])
15178 if self.success and not set(result_groups).issubset(request_groups):
15179 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15180 " differ from original groups (%s)" %
15181 (utils.CommaJoin(result_groups),
15182 utils.CommaJoin(request_groups)))
15184 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15185 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15187 self.out_data = rdict
15190 def _NodesToGroups(node2group, groups, nodes):
15191 """Returns a list of unique group names for a list of nodes.
15193 @type node2group: dict
15194 @param node2group: Map from node name to group UUID
15196 @param groups: Group information
15198 @param nodes: Node names
15205 group_uuid = node2group[node]
15207 # Ignore unknown node
15211 group = groups[group_uuid]
15213 # Can't find group, let's use UUID
15214 group_name = group_uuid
15216 group_name = group["name"]
15218 result.add(group_name)
15220 return sorted(result)
15223 class LUTestAllocator(NoHooksLU):
15224 """Run allocator tests.
15226 This LU runs the allocator tests
15229 def CheckPrereq(self):
15230 """Check prerequisites.
15232 This checks the opcode parameters depending on the director and mode test.
15235 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15236 for attr in ["memory", "disks", "disk_template",
15237 "os", "tags", "nics", "vcpus"]:
15238 if not hasattr(self.op, attr):
15239 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15240 attr, errors.ECODE_INVAL)
15241 iname = self.cfg.ExpandInstanceName(self.op.name)
15242 if iname is not None:
15243 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15244 iname, errors.ECODE_EXISTS)
15245 if not isinstance(self.op.nics, list):
15246 raise errors.OpPrereqError("Invalid parameter 'nics'",
15247 errors.ECODE_INVAL)
15248 if not isinstance(self.op.disks, list):
15249 raise errors.OpPrereqError("Invalid parameter 'disks'",
15250 errors.ECODE_INVAL)
15251 for row in self.op.disks:
15252 if (not isinstance(row, dict) or
15253 constants.IDISK_SIZE not in row or
15254 not isinstance(row[constants.IDISK_SIZE], int) or
15255 constants.IDISK_MODE not in row or
15256 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15257 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15258 " parameter", errors.ECODE_INVAL)
15259 if self.op.hypervisor is None:
15260 self.op.hypervisor = self.cfg.GetHypervisorType()
15261 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15262 fname = _ExpandInstanceName(self.cfg, self.op.name)
15263 self.op.name = fname
15264 self.relocate_from = \
15265 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15266 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15267 constants.IALLOCATOR_MODE_NODE_EVAC):
15268 if not self.op.instances:
15269 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15270 self.op.instances = _GetWantedInstances(self, self.op.instances)
15272 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15273 self.op.mode, errors.ECODE_INVAL)
15275 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15276 if self.op.allocator is None:
15277 raise errors.OpPrereqError("Missing allocator name",
15278 errors.ECODE_INVAL)
15279 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15280 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15281 self.op.direction, errors.ECODE_INVAL)
15283 def Exec(self, feedback_fn):
15284 """Run the allocator test.
15287 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15288 ial = IAllocator(self.cfg, self.rpc,
15291 memory=self.op.memory,
15292 disks=self.op.disks,
15293 disk_template=self.op.disk_template,
15297 vcpus=self.op.vcpus,
15298 hypervisor=self.op.hypervisor,
15300 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15301 ial = IAllocator(self.cfg, self.rpc,
15304 relocate_from=list(self.relocate_from),
15306 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15307 ial = IAllocator(self.cfg, self.rpc,
15309 instances=self.op.instances,
15310 target_groups=self.op.target_groups)
15311 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15312 ial = IAllocator(self.cfg, self.rpc,
15314 instances=self.op.instances,
15315 evac_mode=self.op.evac_mode)
15317 raise errors.ProgrammerError("Uncatched mode %s in"
15318 " LUTestAllocator.Exec", self.op.mode)
15320 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15321 result = ial.in_text
15323 ial.Run(self.op.allocator, validate=False)
15324 result = ial.out_text
15328 #: Query type implementations
15330 constants.QR_CLUSTER: _ClusterQuery,
15331 constants.QR_INSTANCE: _InstanceQuery,
15332 constants.QR_NODE: _NodeQuery,
15333 constants.QR_GROUP: _GroupQuery,
15334 constants.QR_OS: _OsQuery,
15335 constants.QR_EXPORT: _ExportQuery,
15338 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15341 def _GetQueryImplementation(name):
15342 """Returns the implemtnation for a query type.
15344 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15348 return _QUERY_IMPL[name]
15350 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15351 errors.ECODE_INVAL)