4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 # pylint: disable=W0702
962 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
965 def _CheckOutputFields(static, dynamic, selected):
966 """Checks whether all selected fields are valid.
968 @type static: L{utils.FieldSet}
969 @param static: static fields set
970 @type dynamic: L{utils.FieldSet}
971 @param dynamic: dynamic fields set
978 delta = f.NonMatching(selected)
980 raise errors.OpPrereqError("Unknown output fields selected: %s"
981 % ",".join(delta), errors.ECODE_INVAL)
984 def _CheckGlobalHvParams(params):
985 """Validates that given hypervisor params are not global ones.
987 This will ensure that instances don't get customised versions of
991 used_globals = constants.HVC_GLOBALS.intersection(params)
993 msg = ("The following hypervisor parameters are global and cannot"
994 " be customized at instance level, please modify them at"
995 " cluster level: %s" % utils.CommaJoin(used_globals))
996 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
999 def _CheckNodeOnline(lu, node, msg=None):
1000 """Ensure that a given node is online.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @param msg: if passed, should be a message to replace the default one
1005 @raise errors.OpPrereqError: if the node is offline
1009 msg = "Can't use offline node"
1010 if lu.cfg.GetNodeInfo(node).offline:
1011 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014 def _CheckNodeNotDrained(lu, node):
1015 """Ensure that a given node is not drained.
1017 @param lu: the LU on behalf of which we make the check
1018 @param node: the node to check
1019 @raise errors.OpPrereqError: if the node is drained
1022 if lu.cfg.GetNodeInfo(node).drained:
1023 raise errors.OpPrereqError("Can't use drained node %s" % node,
1027 def _CheckNodeVmCapable(lu, node):
1028 """Ensure that a given node is vm capable.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is not vm capable
1035 if not lu.cfg.GetNodeInfo(node).vm_capable:
1036 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041 """Ensure that a node supports a given OS.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @param os_name: the OS to query about
1046 @param force_variant: whether to ignore variant errors
1047 @raise errors.OpPrereqError: if the node is not supporting the OS
1050 result = lu.rpc.call_os_get(node, os_name)
1051 result.Raise("OS '%s' not in supported OS list for node %s" %
1053 prereq=True, ecode=errors.ECODE_INVAL)
1054 if not force_variant:
1055 _CheckOSVariant(result.payload, os_name)
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059 """Ensure that a node has the given secondary ip.
1061 @type lu: L{LogicalUnit}
1062 @param lu: the LU on behalf of which we make the check
1064 @param node: the node to check
1065 @type secondary_ip: string
1066 @param secondary_ip: the ip to check
1067 @type prereq: boolean
1068 @param prereq: whether to throw a prerequisite or an execute error
1069 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074 result.Raise("Failure checking secondary ip on node %s" % node,
1075 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 if not result.payload:
1077 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078 " please fix and re-run this command" % secondary_ip)
1080 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082 raise errors.OpExecError(msg)
1085 def _GetClusterDomainSecret():
1086 """Reads the cluster domain secret.
1089 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094 """Ensure that an instance is in one of the required states.
1096 @param lu: the LU on behalf of which we make the check
1097 @param instance: the instance to check
1098 @param msg: if passed, should be a message to replace the default one
1099 @raise errors.OpPrereqError: if the instance is not in the required state
1103 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104 if instance.admin_state not in req_states:
1105 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106 (instance.name, instance.admin_state, msg),
1109 if constants.ADMINST_UP not in req_states:
1110 pnode = instance.primary_node
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1115 if instance.name in ins_l.payload:
1116 raise errors.OpPrereqError("Instance %s is running, %s" %
1117 (instance.name, msg), errors.ECODE_STATE)
1120 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1121 """Computes if value is in the desired range.
1123 @param name: name of the parameter for which we perform the check
1124 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1126 @param ipolicy: dictionary containing min, max and std values
1127 @param value: actual value that we want to use
1128 @return: None or element not meeting the criteria
1132 if value in [None, constants.VALUE_AUTO]:
1134 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1135 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1136 if value > max_v or min_v > value:
1138 fqn = "%s/%s" % (name, qualifier)
1141 return ("%s value %s is not in range [%s, %s]" %
1142 (fqn, value, min_v, max_v))
1146 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1147 nic_count, disk_sizes, spindle_use,
1148 _compute_fn=_ComputeMinMaxSpec):
1149 """Verifies ipolicy against provided specs.
1152 @param ipolicy: The ipolicy
1154 @param mem_size: The memory size
1155 @type cpu_count: int
1156 @param cpu_count: Used cpu cores
1157 @type disk_count: int
1158 @param disk_count: Number of disks used
1159 @type nic_count: int
1160 @param nic_count: Number of nics used
1161 @type disk_sizes: list of ints
1162 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1163 @type spindle_use: int
1164 @param spindle_use: The number of spindles this instance uses
1165 @param _compute_fn: The compute function (unittest only)
1166 @return: A list of violations, or an empty list of no violations are found
1169 assert disk_count == len(disk_sizes)
1172 (constants.ISPEC_MEM_SIZE, "", mem_size),
1173 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1174 (constants.ISPEC_DISK_COUNT, "", disk_count),
1175 (constants.ISPEC_NIC_COUNT, "", nic_count),
1176 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1177 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1178 for idx, d in enumerate(disk_sizes)]
1181 (_compute_fn(name, qualifier, ipolicy, value)
1182 for (name, qualifier, value) in test_settings))
1185 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1186 _compute_fn=_ComputeIPolicySpecViolation):
1187 """Compute if instance meets the specs of ipolicy.
1190 @param ipolicy: The ipolicy to verify against
1191 @type instance: L{objects.Instance}
1192 @param instance: The instance to verify
1193 @param _compute_fn: The function to verify ipolicy (unittest only)
1194 @see: L{_ComputeIPolicySpecViolation}
1197 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1198 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1199 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1200 disk_count = len(instance.disks)
1201 disk_sizes = [disk.size for disk in instance.disks]
1202 nic_count = len(instance.nics)
1204 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205 disk_sizes, spindle_use)
1208 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1209 _compute_fn=_ComputeIPolicySpecViolation):
1210 """Compute if instance specs meets the specs of ipolicy.
1213 @param ipolicy: The ipolicy to verify against
1214 @param instance_spec: dict
1215 @param instance_spec: The instance spec to verify
1216 @param _compute_fn: The function to verify ipolicy (unittest only)
1217 @see: L{_ComputeIPolicySpecViolation}
1220 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1221 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1222 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1223 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1224 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1225 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1227 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1228 disk_sizes, spindle_use)
1231 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1233 _compute_fn=_ComputeIPolicyInstanceViolation):
1234 """Compute if instance meets the specs of the new target group.
1236 @param ipolicy: The ipolicy to verify
1237 @param instance: The instance object to verify
1238 @param current_group: The current group of the instance
1239 @param target_group: The new group of the instance
1240 @param _compute_fn: The function to verify ipolicy (unittest only)
1241 @see: L{_ComputeIPolicySpecViolation}
1244 if current_group == target_group:
1247 return _compute_fn(ipolicy, instance)
1250 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1251 _compute_fn=_ComputeIPolicyNodeViolation):
1252 """Checks that the target node is correct in terms of instance policy.
1254 @param ipolicy: The ipolicy to verify
1255 @param instance: The instance object to verify
1256 @param node: The new node to relocate
1257 @param ignore: Ignore violations of the ipolicy
1258 @param _compute_fn: The function to verify ipolicy (unittest only)
1259 @see: L{_ComputeIPolicySpecViolation}
1262 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1263 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1266 msg = ("Instance does not meet target node group's (%s) instance"
1267 " policy: %s") % (node.group, utils.CommaJoin(res))
1271 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1274 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1275 """Computes a set of any instances that would violate the new ipolicy.
1277 @param old_ipolicy: The current (still in-place) ipolicy
1278 @param new_ipolicy: The new (to become) ipolicy
1279 @param instances: List of instances to verify
1280 @return: A list of instances which violates the new ipolicy but
1284 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1285 _ComputeViolatingInstances(old_ipolicy, instances))
1288 def _ExpandItemName(fn, name, kind):
1289 """Expand an item name.
1291 @param fn: the function to use for expansion
1292 @param name: requested item name
1293 @param kind: text description ('Node' or 'Instance')
1294 @return: the resolved (full) name
1295 @raise errors.OpPrereqError: if the item is not found
1298 full_name = fn(name)
1299 if full_name is None:
1300 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1305 def _ExpandNodeName(cfg, name):
1306 """Wrapper over L{_ExpandItemName} for nodes."""
1307 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1310 def _ExpandInstanceName(cfg, name):
1311 """Wrapper over L{_ExpandItemName} for instance."""
1312 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1315 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1316 minmem, maxmem, vcpus, nics, disk_template, disks,
1317 bep, hvp, hypervisor_name, tags):
1318 """Builds instance related env variables for hooks
1320 This builds the hook environment from individual variables.
1323 @param name: the name of the instance
1324 @type primary_node: string
1325 @param primary_node: the name of the instance's primary node
1326 @type secondary_nodes: list
1327 @param secondary_nodes: list of secondary nodes as strings
1328 @type os_type: string
1329 @param os_type: the name of the instance's OS
1330 @type status: string
1331 @param status: the desired status of the instance
1332 @type minmem: string
1333 @param minmem: the minimum memory size of the instance
1334 @type maxmem: string
1335 @param maxmem: the maximum memory size of the instance
1337 @param vcpus: the count of VCPUs the instance has
1339 @param nics: list of tuples (ip, mac, mode, link) representing
1340 the NICs the instance has
1341 @type disk_template: string
1342 @param disk_template: the disk template of the instance
1344 @param disks: the list of (size, mode) pairs
1346 @param bep: the backend parameters for the instance
1348 @param hvp: the hypervisor parameters for the instance
1349 @type hypervisor_name: string
1350 @param hypervisor_name: the hypervisor for the instance
1352 @param tags: list of instance tags as strings
1354 @return: the hook environment for this instance
1359 "INSTANCE_NAME": name,
1360 "INSTANCE_PRIMARY": primary_node,
1361 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1362 "INSTANCE_OS_TYPE": os_type,
1363 "INSTANCE_STATUS": status,
1364 "INSTANCE_MINMEM": minmem,
1365 "INSTANCE_MAXMEM": maxmem,
1366 # TODO(2.7) remove deprecated "memory" value
1367 "INSTANCE_MEMORY": maxmem,
1368 "INSTANCE_VCPUS": vcpus,
1369 "INSTANCE_DISK_TEMPLATE": disk_template,
1370 "INSTANCE_HYPERVISOR": hypervisor_name,
1373 nic_count = len(nics)
1374 for idx, (ip, mac, mode, link) in enumerate(nics):
1377 env["INSTANCE_NIC%d_IP" % idx] = ip
1378 env["INSTANCE_NIC%d_MAC" % idx] = mac
1379 env["INSTANCE_NIC%d_MODE" % idx] = mode
1380 env["INSTANCE_NIC%d_LINK" % idx] = link
1381 if mode == constants.NIC_MODE_BRIDGED:
1382 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1386 env["INSTANCE_NIC_COUNT"] = nic_count
1389 disk_count = len(disks)
1390 for idx, (size, mode) in enumerate(disks):
1391 env["INSTANCE_DISK%d_SIZE" % idx] = size
1392 env["INSTANCE_DISK%d_MODE" % idx] = mode
1396 env["INSTANCE_DISK_COUNT"] = disk_count
1401 env["INSTANCE_TAGS"] = " ".join(tags)
1403 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1404 for key, value in source.items():
1405 env["INSTANCE_%s_%s" % (kind, key)] = value
1410 def _NICListToTuple(lu, nics):
1411 """Build a list of nic information tuples.
1413 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1414 value in LUInstanceQueryData.
1416 @type lu: L{LogicalUnit}
1417 @param lu: the logical unit on whose behalf we execute
1418 @type nics: list of L{objects.NIC}
1419 @param nics: list of nics to convert to hooks tuples
1423 cluster = lu.cfg.GetClusterInfo()
1427 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1428 mode = filled_params[constants.NIC_MODE]
1429 link = filled_params[constants.NIC_LINK]
1430 hooks_nics.append((ip, mac, mode, link))
1434 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1435 """Builds instance related env variables for hooks from an object.
1437 @type lu: L{LogicalUnit}
1438 @param lu: the logical unit on whose behalf we execute
1439 @type instance: L{objects.Instance}
1440 @param instance: the instance for which we should build the
1442 @type override: dict
1443 @param override: dictionary with key/values that will override
1446 @return: the hook environment dictionary
1449 cluster = lu.cfg.GetClusterInfo()
1450 bep = cluster.FillBE(instance)
1451 hvp = cluster.FillHV(instance)
1453 "name": instance.name,
1454 "primary_node": instance.primary_node,
1455 "secondary_nodes": instance.secondary_nodes,
1456 "os_type": instance.os,
1457 "status": instance.admin_state,
1458 "maxmem": bep[constants.BE_MAXMEM],
1459 "minmem": bep[constants.BE_MINMEM],
1460 "vcpus": bep[constants.BE_VCPUS],
1461 "nics": _NICListToTuple(lu, instance.nics),
1462 "disk_template": instance.disk_template,
1463 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1466 "hypervisor_name": instance.hypervisor,
1467 "tags": instance.tags,
1470 args.update(override)
1471 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1474 def _AdjustCandidatePool(lu, exceptions):
1475 """Adjust the candidate pool after node operations.
1478 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1480 lu.LogInfo("Promoted nodes to master candidate role: %s",
1481 utils.CommaJoin(node.name for node in mod_list))
1482 for name in mod_list:
1483 lu.context.ReaddNode(name)
1484 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1486 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1490 def _DecideSelfPromotion(lu, exceptions=None):
1491 """Decide whether I should promote myself as a master candidate.
1494 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1495 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496 # the new node will increase mc_max with one, so:
1497 mc_should = min(mc_should + 1, cp_size)
1498 return mc_now < mc_should
1501 def _CalculateGroupIPolicy(cluster, group):
1502 """Calculate instance policy for group.
1505 return cluster.SimpleFillIPolicy(group.ipolicy)
1508 def _ComputeViolatingInstances(ipolicy, instances):
1509 """Computes a set of instances who violates given ipolicy.
1511 @param ipolicy: The ipolicy to verify
1512 @type instances: object.Instance
1513 @param instances: List of instances to verify
1514 @return: A frozenset of instance names violating the ipolicy
1517 return frozenset([inst.name for inst in instances
1518 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1521 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1522 """Check that the brigdes needed by a list of nics exist.
1525 cluster = lu.cfg.GetClusterInfo()
1526 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1527 brlist = [params[constants.NIC_LINK] for params in paramslist
1528 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1530 result = lu.rpc.call_bridges_exist(target_node, brlist)
1531 result.Raise("Error checking bridges on destination node '%s'" %
1532 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1535 def _CheckInstanceBridgesExist(lu, instance, node=None):
1536 """Check that the brigdes needed by an instance exist.
1540 node = instance.primary_node
1541 _CheckNicsBridgesExist(lu, instance.nics, node)
1544 def _CheckOSVariant(os_obj, name):
1545 """Check whether an OS name conforms to the os variants specification.
1547 @type os_obj: L{objects.OS}
1548 @param os_obj: OS object to check
1550 @param name: OS name passed by the user, to check for validity
1553 variant = objects.OS.GetVariant(name)
1554 if not os_obj.supported_variants:
1556 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1557 " passed)" % (os_obj.name, variant),
1561 raise errors.OpPrereqError("OS name must include a variant",
1564 if variant not in os_obj.supported_variants:
1565 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1568 def _GetNodeInstancesInner(cfg, fn):
1569 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1572 def _GetNodeInstances(cfg, node_name):
1573 """Returns a list of all primary and secondary instances on a node.
1577 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1580 def _GetNodePrimaryInstances(cfg, node_name):
1581 """Returns primary instances on a node.
1584 return _GetNodeInstancesInner(cfg,
1585 lambda inst: node_name == inst.primary_node)
1588 def _GetNodeSecondaryInstances(cfg, node_name):
1589 """Returns secondary instances on a node.
1592 return _GetNodeInstancesInner(cfg,
1593 lambda inst: node_name in inst.secondary_nodes)
1596 def _GetStorageTypeArgs(cfg, storage_type):
1597 """Returns the arguments for a storage type.
1600 # Special case for file storage
1601 if storage_type == constants.ST_FILE:
1602 # storage.FileStorage wants a list of storage directories
1603 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1608 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1611 for dev in instance.disks:
1612 cfg.SetDiskID(dev, node_name)
1614 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1615 result.Raise("Failed to get disk status from node %s" % node_name,
1616 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1618 for idx, bdev_status in enumerate(result.payload):
1619 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1625 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1626 """Check the sanity of iallocator and node arguments and use the
1627 cluster-wide iallocator if appropriate.
1629 Check that at most one of (iallocator, node) is specified. If none is
1630 specified, then the LU's opcode's iallocator slot is filled with the
1631 cluster-wide default iallocator.
1633 @type iallocator_slot: string
1634 @param iallocator_slot: the name of the opcode iallocator slot
1635 @type node_slot: string
1636 @param node_slot: the name of the opcode target node slot
1639 node = getattr(lu.op, node_slot, None)
1640 iallocator = getattr(lu.op, iallocator_slot, None)
1642 if node is not None and iallocator is not None:
1643 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1645 elif node is None and iallocator is None:
1646 default_iallocator = lu.cfg.GetDefaultIAllocator()
1647 if default_iallocator:
1648 setattr(lu.op, iallocator_slot, default_iallocator)
1650 raise errors.OpPrereqError("No iallocator or node given and no"
1651 " cluster-wide default iallocator found;"
1652 " please specify either an iallocator or a"
1653 " node, or set a cluster-wide default"
1657 def _GetDefaultIAllocator(cfg, iallocator):
1658 """Decides on which iallocator to use.
1660 @type cfg: L{config.ConfigWriter}
1661 @param cfg: Cluster configuration object
1662 @type iallocator: string or None
1663 @param iallocator: Iallocator specified in opcode
1665 @return: Iallocator name
1669 # Use default iallocator
1670 iallocator = cfg.GetDefaultIAllocator()
1673 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1674 " opcode nor as a cluster-wide default",
1680 class LUClusterPostInit(LogicalUnit):
1681 """Logical unit for running hooks after cluster initialization.
1684 HPATH = "cluster-init"
1685 HTYPE = constants.HTYPE_CLUSTER
1687 def BuildHooksEnv(self):
1692 "OP_TARGET": self.cfg.GetClusterName(),
1695 def BuildHooksNodes(self):
1696 """Build hooks nodes.
1699 return ([], [self.cfg.GetMasterNode()])
1701 def Exec(self, feedback_fn):
1708 class LUClusterDestroy(LogicalUnit):
1709 """Logical unit for destroying the cluster.
1712 HPATH = "cluster-destroy"
1713 HTYPE = constants.HTYPE_CLUSTER
1715 def BuildHooksEnv(self):
1720 "OP_TARGET": self.cfg.GetClusterName(),
1723 def BuildHooksNodes(self):
1724 """Build hooks nodes.
1729 def CheckPrereq(self):
1730 """Check prerequisites.
1732 This checks whether the cluster is empty.
1734 Any errors are signaled by raising errors.OpPrereqError.
1737 master = self.cfg.GetMasterNode()
1739 nodelist = self.cfg.GetNodeList()
1740 if len(nodelist) != 1 or nodelist[0] != master:
1741 raise errors.OpPrereqError("There are still %d node(s) in"
1742 " this cluster." % (len(nodelist) - 1),
1744 instancelist = self.cfg.GetInstanceList()
1746 raise errors.OpPrereqError("There are still %d instance(s) in"
1747 " this cluster." % len(instancelist),
1750 def Exec(self, feedback_fn):
1751 """Destroys the cluster.
1754 master_params = self.cfg.GetMasterNetworkParameters()
1756 # Run post hooks on master node before it's removed
1757 _RunPostHook(self, master_params.name)
1759 ems = self.cfg.GetUseExternalMipScript()
1760 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1763 self.LogWarning("Error disabling the master IP address: %s",
1766 return master_params.name
1769 def _VerifyCertificate(filename):
1770 """Verifies a certificate for L{LUClusterVerifyConfig}.
1772 @type filename: string
1773 @param filename: Path to PEM file
1777 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1778 utils.ReadFile(filename))
1779 except Exception, err: # pylint: disable=W0703
1780 return (LUClusterVerifyConfig.ETYPE_ERROR,
1781 "Failed to load X509 certificate %s: %s" % (filename, err))
1784 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1785 constants.SSL_CERT_EXPIRATION_ERROR)
1788 fnamemsg = "While verifying %s: %s" % (filename, msg)
1793 return (None, fnamemsg)
1794 elif errcode == utils.CERT_WARNING:
1795 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1796 elif errcode == utils.CERT_ERROR:
1797 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1799 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1802 def _GetAllHypervisorParameters(cluster, instances):
1803 """Compute the set of all hypervisor parameters.
1805 @type cluster: L{objects.Cluster}
1806 @param cluster: the cluster object
1807 @param instances: list of L{objects.Instance}
1808 @param instances: additional instances from which to obtain parameters
1809 @rtype: list of (origin, hypervisor, parameters)
1810 @return: a list with all parameters found, indicating the hypervisor they
1811 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1816 for hv_name in cluster.enabled_hypervisors:
1817 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1819 for os_name, os_hvp in cluster.os_hvp.items():
1820 for hv_name, hv_params in os_hvp.items():
1822 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1823 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1825 # TODO: collapse identical parameter values in a single one
1826 for instance in instances:
1827 if instance.hvparams:
1828 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1829 cluster.FillHV(instance)))
1834 class _VerifyErrors(object):
1835 """Mix-in for cluster/group verify LUs.
1837 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1838 self.op and self._feedback_fn to be available.)
1842 ETYPE_FIELD = "code"
1843 ETYPE_ERROR = "ERROR"
1844 ETYPE_WARNING = "WARNING"
1846 def _Error(self, ecode, item, msg, *args, **kwargs):
1847 """Format an error message.
1849 Based on the opcode's error_codes parameter, either format a
1850 parseable error code, or a simpler error string.
1852 This must be called only from Exec and functions called from Exec.
1855 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1856 itype, etxt, _ = ecode
1857 # first complete the msg
1860 # then format the whole message
1861 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1862 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1868 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1869 # and finally report it via the feedback_fn
1870 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1872 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1873 """Log an error message if the passed condition is True.
1877 or self.op.debug_simulate_errors) # pylint: disable=E1101
1879 # If the error code is in the list of ignored errors, demote the error to a
1881 (_, etxt, _) = ecode
1882 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1883 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1886 self._Error(ecode, *args, **kwargs)
1888 # do not mark the operation as failed for WARN cases only
1889 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1890 self.bad = self.bad or cond
1893 class LUClusterVerify(NoHooksLU):
1894 """Submits all jobs necessary to verify the cluster.
1899 def ExpandNames(self):
1900 self.needed_locks = {}
1902 def Exec(self, feedback_fn):
1905 if self.op.group_name:
1906 groups = [self.op.group_name]
1907 depends_fn = lambda: None
1909 groups = self.cfg.GetNodeGroupList()
1911 # Verify global configuration
1913 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1916 # Always depend on global verification
1917 depends_fn = lambda: [(-len(jobs), [])]
1919 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1920 ignore_errors=self.op.ignore_errors,
1921 depends=depends_fn())]
1922 for group in groups)
1924 # Fix up all parameters
1925 for op in itertools.chain(*jobs): # pylint: disable=W0142
1926 op.debug_simulate_errors = self.op.debug_simulate_errors
1927 op.verbose = self.op.verbose
1928 op.error_codes = self.op.error_codes
1930 op.skip_checks = self.op.skip_checks
1931 except AttributeError:
1932 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1934 return ResultWithJobs(jobs)
1937 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1938 """Verifies the cluster config.
1943 def _VerifyHVP(self, hvp_data):
1944 """Verifies locally the syntax of the hypervisor parameters.
1947 for item, hv_name, hv_params in hvp_data:
1948 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1951 hv_class = hypervisor.GetHypervisor(hv_name)
1952 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1953 hv_class.CheckParameterSyntax(hv_params)
1954 except errors.GenericError, err:
1955 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1957 def ExpandNames(self):
1958 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1959 self.share_locks = _ShareAll()
1961 def CheckPrereq(self):
1962 """Check prerequisites.
1965 # Retrieve all information
1966 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1967 self.all_node_info = self.cfg.GetAllNodesInfo()
1968 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1970 def Exec(self, feedback_fn):
1971 """Verify integrity of cluster, performing various test on nodes.
1975 self._feedback_fn = feedback_fn
1977 feedback_fn("* Verifying cluster config")
1979 for msg in self.cfg.VerifyConfig():
1980 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1982 feedback_fn("* Verifying cluster certificate files")
1984 for cert_filename in constants.ALL_CERT_FILES:
1985 (errcode, msg) = _VerifyCertificate(cert_filename)
1986 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1988 feedback_fn("* Verifying hypervisor parameters")
1990 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1991 self.all_inst_info.values()))
1993 feedback_fn("* Verifying all nodes belong to an existing group")
1995 # We do this verification here because, should this bogus circumstance
1996 # occur, it would never be caught by VerifyGroup, which only acts on
1997 # nodes/instances reachable from existing node groups.
1999 dangling_nodes = set(node.name for node in self.all_node_info.values()
2000 if node.group not in self.all_group_info)
2002 dangling_instances = {}
2003 no_node_instances = []
2005 for inst in self.all_inst_info.values():
2006 if inst.primary_node in dangling_nodes:
2007 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2008 elif inst.primary_node not in self.all_node_info:
2009 no_node_instances.append(inst.name)
2014 utils.CommaJoin(dangling_instances.get(node.name,
2016 for node in dangling_nodes]
2018 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2020 "the following nodes (and their instances) belong to a non"
2021 " existing group: %s", utils.CommaJoin(pretty_dangling))
2023 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2025 "the following instances have a non-existing primary-node:"
2026 " %s", utils.CommaJoin(no_node_instances))
2031 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2032 """Verifies the status of a node group.
2035 HPATH = "cluster-verify"
2036 HTYPE = constants.HTYPE_CLUSTER
2039 _HOOKS_INDENT_RE = re.compile("^", re.M)
2041 class NodeImage(object):
2042 """A class representing the logical and physical status of a node.
2045 @ivar name: the node name to which this object refers
2046 @ivar volumes: a structure as returned from
2047 L{ganeti.backend.GetVolumeList} (runtime)
2048 @ivar instances: a list of running instances (runtime)
2049 @ivar pinst: list of configured primary instances (config)
2050 @ivar sinst: list of configured secondary instances (config)
2051 @ivar sbp: dictionary of {primary-node: list of instances} for all
2052 instances for which this node is secondary (config)
2053 @ivar mfree: free memory, as reported by hypervisor (runtime)
2054 @ivar dfree: free disk, as reported by the node (runtime)
2055 @ivar offline: the offline status (config)
2056 @type rpc_fail: boolean
2057 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2058 not whether the individual keys were correct) (runtime)
2059 @type lvm_fail: boolean
2060 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2061 @type hyp_fail: boolean
2062 @ivar hyp_fail: whether the RPC call didn't return the instance list
2063 @type ghost: boolean
2064 @ivar ghost: whether this is a known node or not (config)
2065 @type os_fail: boolean
2066 @ivar os_fail: whether the RPC call didn't return valid OS data
2068 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2069 @type vm_capable: boolean
2070 @ivar vm_capable: whether the node can host instances
2073 def __init__(self, offline=False, name=None, vm_capable=True):
2082 self.offline = offline
2083 self.vm_capable = vm_capable
2084 self.rpc_fail = False
2085 self.lvm_fail = False
2086 self.hyp_fail = False
2088 self.os_fail = False
2091 def ExpandNames(self):
2092 # This raises errors.OpPrereqError on its own:
2093 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2095 # Get instances in node group; this is unsafe and needs verification later
2097 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2099 self.needed_locks = {
2100 locking.LEVEL_INSTANCE: inst_names,
2101 locking.LEVEL_NODEGROUP: [self.group_uuid],
2102 locking.LEVEL_NODE: [],
2105 self.share_locks = _ShareAll()
2107 def DeclareLocks(self, level):
2108 if level == locking.LEVEL_NODE:
2109 # Get members of node group; this is unsafe and needs verification later
2110 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2112 all_inst_info = self.cfg.GetAllInstancesInfo()
2114 # In Exec(), we warn about mirrored instances that have primary and
2115 # secondary living in separate node groups. To fully verify that
2116 # volumes for these instances are healthy, we will need to do an
2117 # extra call to their secondaries. We ensure here those nodes will
2119 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2120 # Important: access only the instances whose lock is owned
2121 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2122 nodes.update(all_inst_info[inst].secondary_nodes)
2124 self.needed_locks[locking.LEVEL_NODE] = nodes
2126 def CheckPrereq(self):
2127 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2128 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2130 group_nodes = set(self.group_info.members)
2132 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2135 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2137 unlocked_instances = \
2138 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2141 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2142 utils.CommaJoin(unlocked_nodes),
2145 if unlocked_instances:
2146 raise errors.OpPrereqError("Missing lock for instances: %s" %
2147 utils.CommaJoin(unlocked_instances),
2150 self.all_node_info = self.cfg.GetAllNodesInfo()
2151 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2153 self.my_node_names = utils.NiceSort(group_nodes)
2154 self.my_inst_names = utils.NiceSort(group_instances)
2156 self.my_node_info = dict((name, self.all_node_info[name])
2157 for name in self.my_node_names)
2159 self.my_inst_info = dict((name, self.all_inst_info[name])
2160 for name in self.my_inst_names)
2162 # We detect here the nodes that will need the extra RPC calls for verifying
2163 # split LV volumes; they should be locked.
2164 extra_lv_nodes = set()
2166 for inst in self.my_inst_info.values():
2167 if inst.disk_template in constants.DTS_INT_MIRROR:
2168 for nname in inst.all_nodes:
2169 if self.all_node_info[nname].group != self.group_uuid:
2170 extra_lv_nodes.add(nname)
2172 unlocked_lv_nodes = \
2173 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2175 if unlocked_lv_nodes:
2176 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2177 utils.CommaJoin(unlocked_lv_nodes),
2179 self.extra_lv_nodes = list(extra_lv_nodes)
2181 def _VerifyNode(self, ninfo, nresult):
2182 """Perform some basic validation on data returned from a node.
2184 - check the result data structure is well formed and has all the
2186 - check ganeti version
2188 @type ninfo: L{objects.Node}
2189 @param ninfo: the node to check
2190 @param nresult: the results from the node
2192 @return: whether overall this call was successful (and we can expect
2193 reasonable values in the respose)
2197 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2199 # main result, nresult should be a non-empty dict
2200 test = not nresult or not isinstance(nresult, dict)
2201 _ErrorIf(test, constants.CV_ENODERPC, node,
2202 "unable to verify node: no data returned")
2206 # compares ganeti version
2207 local_version = constants.PROTOCOL_VERSION
2208 remote_version = nresult.get("version", None)
2209 test = not (remote_version and
2210 isinstance(remote_version, (list, tuple)) and
2211 len(remote_version) == 2)
2212 _ErrorIf(test, constants.CV_ENODERPC, node,
2213 "connection to node returned invalid data")
2217 test = local_version != remote_version[0]
2218 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2219 "incompatible protocol versions: master %s,"
2220 " node %s", local_version, remote_version[0])
2224 # node seems compatible, we can actually try to look into its results
2226 # full package version
2227 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2228 constants.CV_ENODEVERSION, node,
2229 "software version mismatch: master %s, node %s",
2230 constants.RELEASE_VERSION, remote_version[1],
2231 code=self.ETYPE_WARNING)
2233 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2234 if ninfo.vm_capable and isinstance(hyp_result, dict):
2235 for hv_name, hv_result in hyp_result.iteritems():
2236 test = hv_result is not None
2237 _ErrorIf(test, constants.CV_ENODEHV, node,
2238 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2240 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2241 if ninfo.vm_capable and isinstance(hvp_result, list):
2242 for item, hv_name, hv_result in hvp_result:
2243 _ErrorIf(True, constants.CV_ENODEHV, node,
2244 "hypervisor %s parameter verify failure (source %s): %s",
2245 hv_name, item, hv_result)
2247 test = nresult.get(constants.NV_NODESETUP,
2248 ["Missing NODESETUP results"])
2249 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2254 def _VerifyNodeTime(self, ninfo, nresult,
2255 nvinfo_starttime, nvinfo_endtime):
2256 """Check the node time.
2258 @type ninfo: L{objects.Node}
2259 @param ninfo: the node to check
2260 @param nresult: the remote results for the node
2261 @param nvinfo_starttime: the start time of the RPC call
2262 @param nvinfo_endtime: the end time of the RPC call
2266 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2268 ntime = nresult.get(constants.NV_TIME, None)
2270 ntime_merged = utils.MergeTime(ntime)
2271 except (ValueError, TypeError):
2272 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2275 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2276 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2277 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2278 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2282 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2283 "Node time diverges by at least %s from master node time",
2286 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2287 """Check the node LVM results.
2289 @type ninfo: L{objects.Node}
2290 @param ninfo: the node to check
2291 @param nresult: the remote results for the node
2292 @param vg_name: the configured VG name
2299 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2301 # checks vg existence and size > 20G
2302 vglist = nresult.get(constants.NV_VGLIST, None)
2304 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2306 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2307 constants.MIN_VG_SIZE)
2308 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2311 pvlist = nresult.get(constants.NV_PVLIST, None)
2312 test = pvlist is None
2313 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2315 # check that ':' is not present in PV names, since it's a
2316 # special character for lvcreate (denotes the range of PEs to
2318 for _, pvname, owner_vg in pvlist:
2319 test = ":" in pvname
2320 _ErrorIf(test, constants.CV_ENODELVM, node,
2321 "Invalid character ':' in PV '%s' of VG '%s'",
2324 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2325 """Check the node bridges.
2327 @type ninfo: L{objects.Node}
2328 @param ninfo: the node to check
2329 @param nresult: the remote results for the node
2330 @param bridges: the expected list of bridges
2337 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2339 missing = nresult.get(constants.NV_BRIDGES, None)
2340 test = not isinstance(missing, list)
2341 _ErrorIf(test, constants.CV_ENODENET, node,
2342 "did not return valid bridge information")
2344 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2345 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2347 def _VerifyNodeUserScripts(self, ninfo, nresult):
2348 """Check the results of user scripts presence and executability on the node
2350 @type ninfo: L{objects.Node}
2351 @param ninfo: the node to check
2352 @param nresult: the remote results for the node
2357 test = not constants.NV_USERSCRIPTS in nresult
2358 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2359 "did not return user scripts information")
2361 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2363 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2364 "user scripts not present or not executable: %s" %
2365 utils.CommaJoin(sorted(broken_scripts)))
2367 def _VerifyNodeNetwork(self, ninfo, nresult):
2368 """Check the node network connectivity results.
2370 @type ninfo: L{objects.Node}
2371 @param ninfo: the node to check
2372 @param nresult: the remote results for the node
2376 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2378 test = constants.NV_NODELIST not in nresult
2379 _ErrorIf(test, constants.CV_ENODESSH, node,
2380 "node hasn't returned node ssh connectivity data")
2382 if nresult[constants.NV_NODELIST]:
2383 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2384 _ErrorIf(True, constants.CV_ENODESSH, node,
2385 "ssh communication with node '%s': %s", a_node, a_msg)
2387 test = constants.NV_NODENETTEST not in nresult
2388 _ErrorIf(test, constants.CV_ENODENET, node,
2389 "node hasn't returned node tcp connectivity data")
2391 if nresult[constants.NV_NODENETTEST]:
2392 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2394 _ErrorIf(True, constants.CV_ENODENET, node,
2395 "tcp communication with node '%s': %s",
2396 anode, nresult[constants.NV_NODENETTEST][anode])
2398 test = constants.NV_MASTERIP not in nresult
2399 _ErrorIf(test, constants.CV_ENODENET, node,
2400 "node hasn't returned node master IP reachability data")
2402 if not nresult[constants.NV_MASTERIP]:
2403 if node == self.master_node:
2404 msg = "the master node cannot reach the master IP (not configured?)"
2406 msg = "cannot reach the master IP"
2407 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2409 def _VerifyInstance(self, instance, instanceconfig, node_image,
2411 """Verify an instance.
2413 This function checks to see if the required block devices are
2414 available on the instance's node.
2417 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2418 node_current = instanceconfig.primary_node
2420 node_vol_should = {}
2421 instanceconfig.MapLVsByNode(node_vol_should)
2423 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2424 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2425 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2427 for node in node_vol_should:
2428 n_img = node_image[node]
2429 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2430 # ignore missing volumes on offline or broken nodes
2432 for volume in node_vol_should[node]:
2433 test = volume not in n_img.volumes
2434 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2435 "volume %s missing on node %s", volume, node)
2437 if instanceconfig.admin_state == constants.ADMINST_UP:
2438 pri_img = node_image[node_current]
2439 test = instance not in pri_img.instances and not pri_img.offline
2440 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2441 "instance not running on its primary node %s",
2444 diskdata = [(nname, success, status, idx)
2445 for (nname, disks) in diskstatus.items()
2446 for idx, (success, status) in enumerate(disks)]
2448 for nname, success, bdev_status, idx in diskdata:
2449 # the 'ghost node' construction in Exec() ensures that we have a
2451 snode = node_image[nname]
2452 bad_snode = snode.ghost or snode.offline
2453 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2454 not success and not bad_snode,
2455 constants.CV_EINSTANCEFAULTYDISK, instance,
2456 "couldn't retrieve status for disk/%s on %s: %s",
2457 idx, nname, bdev_status)
2458 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2459 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2460 constants.CV_EINSTANCEFAULTYDISK, instance,
2461 "disk/%s on %s is faulty", idx, nname)
2463 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2464 """Verify if there are any unknown volumes in the cluster.
2466 The .os, .swap and backup volumes are ignored. All other volumes are
2467 reported as unknown.
2469 @type reserved: L{ganeti.utils.FieldSet}
2470 @param reserved: a FieldSet of reserved volume names
2473 for node, n_img in node_image.items():
2474 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2475 self.all_node_info[node].group != self.group_uuid):
2476 # skip non-healthy nodes
2478 for volume in n_img.volumes:
2479 test = ((node not in node_vol_should or
2480 volume not in node_vol_should[node]) and
2481 not reserved.Matches(volume))
2482 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2483 "volume %s is unknown", volume)
2485 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2486 """Verify N+1 Memory Resilience.
2488 Check that if one single node dies we can still start all the
2489 instances it was primary for.
2492 cluster_info = self.cfg.GetClusterInfo()
2493 for node, n_img in node_image.items():
2494 # This code checks that every node which is now listed as
2495 # secondary has enough memory to host all instances it is
2496 # supposed to should a single other node in the cluster fail.
2497 # FIXME: not ready for failover to an arbitrary node
2498 # FIXME: does not support file-backed instances
2499 # WARNING: we currently take into account down instances as well
2500 # as up ones, considering that even if they're down someone
2501 # might want to start them even in the event of a node failure.
2502 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2503 # we're skipping nodes marked offline and nodes in other groups from
2504 # the N+1 warning, since most likely we don't have good memory
2505 # infromation from them; we already list instances living on such
2506 # nodes, and that's enough warning
2508 #TODO(dynmem): also consider ballooning out other instances
2509 for prinode, instances in n_img.sbp.items():
2511 for instance in instances:
2512 bep = cluster_info.FillBE(instance_cfg[instance])
2513 if bep[constants.BE_AUTO_BALANCE]:
2514 needed_mem += bep[constants.BE_MINMEM]
2515 test = n_img.mfree < needed_mem
2516 self._ErrorIf(test, constants.CV_ENODEN1, node,
2517 "not enough memory to accomodate instance failovers"
2518 " should node %s fail (%dMiB needed, %dMiB available)",
2519 prinode, needed_mem, n_img.mfree)
2522 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2523 (files_all, files_opt, files_mc, files_vm)):
2524 """Verifies file checksums collected from all nodes.
2526 @param errorif: Callback for reporting errors
2527 @param nodeinfo: List of L{objects.Node} objects
2528 @param master_node: Name of master node
2529 @param all_nvinfo: RPC results
2532 # Define functions determining which nodes to consider for a file
2535 (files_mc, lambda node: (node.master_candidate or
2536 node.name == master_node)),
2537 (files_vm, lambda node: node.vm_capable),
2540 # Build mapping from filename to list of nodes which should have the file
2542 for (files, fn) in files2nodefn:
2544 filenodes = nodeinfo
2546 filenodes = filter(fn, nodeinfo)
2547 nodefiles.update((filename,
2548 frozenset(map(operator.attrgetter("name"), filenodes)))
2549 for filename in files)
2551 assert set(nodefiles) == (files_all | files_mc | files_vm)
2553 fileinfo = dict((filename, {}) for filename in nodefiles)
2554 ignore_nodes = set()
2556 for node in nodeinfo:
2558 ignore_nodes.add(node.name)
2561 nresult = all_nvinfo[node.name]
2563 if nresult.fail_msg or not nresult.payload:
2566 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2568 test = not (node_files and isinstance(node_files, dict))
2569 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2570 "Node did not return file checksum data")
2572 ignore_nodes.add(node.name)
2575 # Build per-checksum mapping from filename to nodes having it
2576 for (filename, checksum) in node_files.items():
2577 assert filename in nodefiles
2578 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2580 for (filename, checksums) in fileinfo.items():
2581 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2583 # Nodes having the file
2584 with_file = frozenset(node_name
2585 for nodes in fileinfo[filename].values()
2586 for node_name in nodes) - ignore_nodes
2588 expected_nodes = nodefiles[filename] - ignore_nodes
2590 # Nodes missing file
2591 missing_file = expected_nodes - with_file
2593 if filename in files_opt:
2595 errorif(missing_file and missing_file != expected_nodes,
2596 constants.CV_ECLUSTERFILECHECK, None,
2597 "File %s is optional, but it must exist on all or no"
2598 " nodes (not found on %s)",
2599 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2601 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2602 "File %s is missing from node(s) %s", filename,
2603 utils.CommaJoin(utils.NiceSort(missing_file)))
2605 # Warn if a node has a file it shouldn't
2606 unexpected = with_file - expected_nodes
2608 constants.CV_ECLUSTERFILECHECK, None,
2609 "File %s should not exist on node(s) %s",
2610 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2612 # See if there are multiple versions of the file
2613 test = len(checksums) > 1
2615 variants = ["variant %s on %s" %
2616 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2617 for (idx, (checksum, nodes)) in
2618 enumerate(sorted(checksums.items()))]
2622 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2623 "File %s found with %s different checksums (%s)",
2624 filename, len(checksums), "; ".join(variants))
2626 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2628 """Verifies and the node DRBD status.
2630 @type ninfo: L{objects.Node}
2631 @param ninfo: the node to check
2632 @param nresult: the remote results for the node
2633 @param instanceinfo: the dict of instances
2634 @param drbd_helper: the configured DRBD usermode helper
2635 @param drbd_map: the DRBD map as returned by
2636 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2640 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2643 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2644 test = (helper_result == None)
2645 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2646 "no drbd usermode helper returned")
2648 status, payload = helper_result
2650 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2651 "drbd usermode helper check unsuccessful: %s", payload)
2652 test = status and (payload != drbd_helper)
2653 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2654 "wrong drbd usermode helper: %s", payload)
2656 # compute the DRBD minors
2658 for minor, instance in drbd_map[node].items():
2659 test = instance not in instanceinfo
2660 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2661 "ghost instance '%s' in temporary DRBD map", instance)
2662 # ghost instance should not be running, but otherwise we
2663 # don't give double warnings (both ghost instance and
2664 # unallocated minor in use)
2666 node_drbd[minor] = (instance, False)
2668 instance = instanceinfo[instance]
2669 node_drbd[minor] = (instance.name,
2670 instance.admin_state == constants.ADMINST_UP)
2672 # and now check them
2673 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2674 test = not isinstance(used_minors, (tuple, list))
2675 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2676 "cannot parse drbd status file: %s", str(used_minors))
2678 # we cannot check drbd status
2681 for minor, (iname, must_exist) in node_drbd.items():
2682 test = minor not in used_minors and must_exist
2683 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2684 "drbd minor %d of instance %s is not active", minor, iname)
2685 for minor in used_minors:
2686 test = minor not in node_drbd
2687 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2688 "unallocated drbd minor %d is in use", minor)
2690 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2691 """Builds the node OS structures.
2693 @type ninfo: L{objects.Node}
2694 @param ninfo: the node to check
2695 @param nresult: the remote results for the node
2696 @param nimg: the node image object
2700 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2702 remote_os = nresult.get(constants.NV_OSLIST, None)
2703 test = (not isinstance(remote_os, list) or
2704 not compat.all(isinstance(v, list) and len(v) == 7
2705 for v in remote_os))
2707 _ErrorIf(test, constants.CV_ENODEOS, node,
2708 "node hasn't returned valid OS data")
2717 for (name, os_path, status, diagnose,
2718 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2720 if name not in os_dict:
2723 # parameters is a list of lists instead of list of tuples due to
2724 # JSON lacking a real tuple type, fix it:
2725 parameters = [tuple(v) for v in parameters]
2726 os_dict[name].append((os_path, status, diagnose,
2727 set(variants), set(parameters), set(api_ver)))
2729 nimg.oslist = os_dict
2731 def _VerifyNodeOS(self, ninfo, nimg, base):
2732 """Verifies the node OS list.
2734 @type ninfo: L{objects.Node}
2735 @param ninfo: the node to check
2736 @param nimg: the node image object
2737 @param base: the 'template' node we match against (e.g. from the master)
2741 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2743 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2745 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2746 for os_name, os_data in nimg.oslist.items():
2747 assert os_data, "Empty OS status for OS %s?!" % os_name
2748 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2749 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2750 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2751 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2752 "OS '%s' has multiple entries (first one shadows the rest): %s",
2753 os_name, utils.CommaJoin([v[0] for v in os_data]))
2754 # comparisons with the 'base' image
2755 test = os_name not in base.oslist
2756 _ErrorIf(test, constants.CV_ENODEOS, node,
2757 "Extra OS %s not present on reference node (%s)",
2761 assert base.oslist[os_name], "Base node has empty OS status?"
2762 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2764 # base OS is invalid, skipping
2766 for kind, a, b in [("API version", f_api, b_api),
2767 ("variants list", f_var, b_var),
2768 ("parameters", beautify_params(f_param),
2769 beautify_params(b_param))]:
2770 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2771 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2772 kind, os_name, base.name,
2773 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2775 # check any missing OSes
2776 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2777 _ErrorIf(missing, constants.CV_ENODEOS, node,
2778 "OSes present on reference node %s but missing on this node: %s",
2779 base.name, utils.CommaJoin(missing))
2781 def _VerifyOob(self, ninfo, nresult):
2782 """Verifies out of band functionality of a node.
2784 @type ninfo: L{objects.Node}
2785 @param ninfo: the node to check
2786 @param nresult: the remote results for the node
2790 # We just have to verify the paths on master and/or master candidates
2791 # as the oob helper is invoked on the master
2792 if ((ninfo.master_candidate or ninfo.master_capable) and
2793 constants.NV_OOB_PATHS in nresult):
2794 for path_result in nresult[constants.NV_OOB_PATHS]:
2795 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2797 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2798 """Verifies and updates the node volume data.
2800 This function will update a L{NodeImage}'s internal structures
2801 with data from the remote call.
2803 @type ninfo: L{objects.Node}
2804 @param ninfo: the node to check
2805 @param nresult: the remote results for the node
2806 @param nimg: the node image object
2807 @param vg_name: the configured VG name
2811 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2813 nimg.lvm_fail = True
2814 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2817 elif isinstance(lvdata, basestring):
2818 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2819 utils.SafeEncode(lvdata))
2820 elif not isinstance(lvdata, dict):
2821 _ErrorIf(True, constants.CV_ENODELVM, node,
2822 "rpc call to node failed (lvlist)")
2824 nimg.volumes = lvdata
2825 nimg.lvm_fail = False
2827 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2828 """Verifies and updates the node instance list.
2830 If the listing was successful, then updates this node's instance
2831 list. Otherwise, it marks the RPC call as failed for the instance
2834 @type ninfo: L{objects.Node}
2835 @param ninfo: the node to check
2836 @param nresult: the remote results for the node
2837 @param nimg: the node image object
2840 idata = nresult.get(constants.NV_INSTANCELIST, None)
2841 test = not isinstance(idata, list)
2842 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2843 "rpc call to node failed (instancelist): %s",
2844 utils.SafeEncode(str(idata)))
2846 nimg.hyp_fail = True
2848 nimg.instances = idata
2850 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2851 """Verifies and computes a node information map
2853 @type ninfo: L{objects.Node}
2854 @param ninfo: the node to check
2855 @param nresult: the remote results for the node
2856 @param nimg: the node image object
2857 @param vg_name: the configured VG name
2861 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2863 # try to read free memory (from the hypervisor)
2864 hv_info = nresult.get(constants.NV_HVINFO, None)
2865 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2866 _ErrorIf(test, constants.CV_ENODEHV, node,
2867 "rpc call to node failed (hvinfo)")
2870 nimg.mfree = int(hv_info["memory_free"])
2871 except (ValueError, TypeError):
2872 _ErrorIf(True, constants.CV_ENODERPC, node,
2873 "node returned invalid nodeinfo, check hypervisor")
2875 # FIXME: devise a free space model for file based instances as well
2876 if vg_name is not None:
2877 test = (constants.NV_VGLIST not in nresult or
2878 vg_name not in nresult[constants.NV_VGLIST])
2879 _ErrorIf(test, constants.CV_ENODELVM, node,
2880 "node didn't return data for the volume group '%s'"
2881 " - it is either missing or broken", vg_name)
2884 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2885 except (ValueError, TypeError):
2886 _ErrorIf(True, constants.CV_ENODERPC, node,
2887 "node returned invalid LVM info, check LVM status")
2889 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2890 """Gets per-disk status information for all instances.
2892 @type nodelist: list of strings
2893 @param nodelist: Node names
2894 @type node_image: dict of (name, L{objects.Node})
2895 @param node_image: Node objects
2896 @type instanceinfo: dict of (name, L{objects.Instance})
2897 @param instanceinfo: Instance objects
2898 @rtype: {instance: {node: [(succes, payload)]}}
2899 @return: a dictionary of per-instance dictionaries with nodes as
2900 keys and disk information as values; the disk information is a
2901 list of tuples (success, payload)
2904 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2907 node_disks_devonly = {}
2908 diskless_instances = set()
2909 diskless = constants.DT_DISKLESS
2911 for nname in nodelist:
2912 node_instances = list(itertools.chain(node_image[nname].pinst,
2913 node_image[nname].sinst))
2914 diskless_instances.update(inst for inst in node_instances
2915 if instanceinfo[inst].disk_template == diskless)
2916 disks = [(inst, disk)
2917 for inst in node_instances
2918 for disk in instanceinfo[inst].disks]
2921 # No need to collect data
2924 node_disks[nname] = disks
2926 # Creating copies as SetDiskID below will modify the objects and that can
2927 # lead to incorrect data returned from nodes
2928 devonly = [dev.Copy() for (_, dev) in disks]
2931 self.cfg.SetDiskID(dev, nname)
2933 node_disks_devonly[nname] = devonly
2935 assert len(node_disks) == len(node_disks_devonly)
2937 # Collect data from all nodes with disks
2938 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2941 assert len(result) == len(node_disks)
2945 for (nname, nres) in result.items():
2946 disks = node_disks[nname]
2949 # No data from this node
2950 data = len(disks) * [(False, "node offline")]
2953 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2954 "while getting disk information: %s", msg)
2956 # No data from this node
2957 data = len(disks) * [(False, msg)]
2960 for idx, i in enumerate(nres.payload):
2961 if isinstance(i, (tuple, list)) and len(i) == 2:
2964 logging.warning("Invalid result from node %s, entry %d: %s",
2966 data.append((False, "Invalid result from the remote node"))
2968 for ((inst, _), status) in zip(disks, data):
2969 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2971 # Add empty entries for diskless instances.
2972 for inst in diskless_instances:
2973 assert inst not in instdisk
2976 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2977 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2978 compat.all(isinstance(s, (tuple, list)) and
2979 len(s) == 2 for s in statuses)
2980 for inst, nnames in instdisk.items()
2981 for nname, statuses in nnames.items())
2982 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2987 def _SshNodeSelector(group_uuid, all_nodes):
2988 """Create endless iterators for all potential SSH check hosts.
2991 nodes = [node for node in all_nodes
2992 if (node.group != group_uuid and
2994 keyfunc = operator.attrgetter("group")
2996 return map(itertools.cycle,
2997 [sorted(map(operator.attrgetter("name"), names))
2998 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3002 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3003 """Choose which nodes should talk to which other nodes.
3005 We will make nodes contact all nodes in their group, and one node from
3008 @warning: This algorithm has a known issue if one node group is much
3009 smaller than others (e.g. just one node). In such a case all other
3010 nodes will talk to the single node.
3013 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3014 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3016 return (online_nodes,
3017 dict((name, sorted([i.next() for i in sel]))
3018 for name in online_nodes))
3020 def BuildHooksEnv(self):
3023 Cluster-Verify hooks just ran in the post phase and their failure makes
3024 the output be logged in the verify output and the verification to fail.
3028 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3031 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3032 for node in self.my_node_info.values())
3036 def BuildHooksNodes(self):
3037 """Build hooks nodes.
3040 return ([], self.my_node_names)
3042 def Exec(self, feedback_fn):
3043 """Verify integrity of the node group, performing various test on nodes.
3046 # This method has too many local variables. pylint: disable=R0914
3047 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3049 if not self.my_node_names:
3051 feedback_fn("* Empty node group, skipping verification")
3055 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3056 verbose = self.op.verbose
3057 self._feedback_fn = feedback_fn
3059 vg_name = self.cfg.GetVGName()
3060 drbd_helper = self.cfg.GetDRBDHelper()
3061 cluster = self.cfg.GetClusterInfo()
3062 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3063 hypervisors = cluster.enabled_hypervisors
3064 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3066 i_non_redundant = [] # Non redundant instances
3067 i_non_a_balanced = [] # Non auto-balanced instances
3068 i_offline = 0 # Count of offline instances
3069 n_offline = 0 # Count of offline nodes
3070 n_drained = 0 # Count of nodes being drained
3071 node_vol_should = {}
3073 # FIXME: verify OS list
3076 filemap = _ComputeAncillaryFiles(cluster, False)
3078 # do local checksums
3079 master_node = self.master_node = self.cfg.GetMasterNode()
3080 master_ip = self.cfg.GetMasterIP()
3082 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3085 if self.cfg.GetUseExternalMipScript():
3086 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3088 node_verify_param = {
3089 constants.NV_FILELIST:
3090 utils.UniqueSequence(filename
3091 for files in filemap
3092 for filename in files),
3093 constants.NV_NODELIST:
3094 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3095 self.all_node_info.values()),
3096 constants.NV_HYPERVISOR: hypervisors,
3097 constants.NV_HVPARAMS:
3098 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3099 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3100 for node in node_data_list
3101 if not node.offline],
3102 constants.NV_INSTANCELIST: hypervisors,
3103 constants.NV_VERSION: None,
3104 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3105 constants.NV_NODESETUP: None,
3106 constants.NV_TIME: None,
3107 constants.NV_MASTERIP: (master_node, master_ip),
3108 constants.NV_OSLIST: None,
3109 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3110 constants.NV_USERSCRIPTS: user_scripts,
3113 if vg_name is not None:
3114 node_verify_param[constants.NV_VGLIST] = None
3115 node_verify_param[constants.NV_LVLIST] = vg_name
3116 node_verify_param[constants.NV_PVLIST] = [vg_name]
3117 node_verify_param[constants.NV_DRBDLIST] = None
3120 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3123 # FIXME: this needs to be changed per node-group, not cluster-wide
3125 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3126 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3127 bridges.add(default_nicpp[constants.NIC_LINK])
3128 for instance in self.my_inst_info.values():
3129 for nic in instance.nics:
3130 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3131 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3132 bridges.add(full_nic[constants.NIC_LINK])
3135 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3137 # Build our expected cluster state
3138 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3140 vm_capable=node.vm_capable))
3141 for node in node_data_list)
3145 for node in self.all_node_info.values():
3146 path = _SupportsOob(self.cfg, node)
3147 if path and path not in oob_paths:
3148 oob_paths.append(path)
3151 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3153 for instance in self.my_inst_names:
3154 inst_config = self.my_inst_info[instance]
3156 for nname in inst_config.all_nodes:
3157 if nname not in node_image:
3158 gnode = self.NodeImage(name=nname)
3159 gnode.ghost = (nname not in self.all_node_info)
3160 node_image[nname] = gnode
3162 inst_config.MapLVsByNode(node_vol_should)
3164 pnode = inst_config.primary_node
3165 node_image[pnode].pinst.append(instance)
3167 for snode in inst_config.secondary_nodes:
3168 nimg = node_image[snode]
3169 nimg.sinst.append(instance)
3170 if pnode not in nimg.sbp:
3171 nimg.sbp[pnode] = []
3172 nimg.sbp[pnode].append(instance)
3174 # At this point, we have the in-memory data structures complete,
3175 # except for the runtime information, which we'll gather next
3177 # Due to the way our RPC system works, exact response times cannot be
3178 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3179 # time before and after executing the request, we can at least have a time
3181 nvinfo_starttime = time.time()
3182 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3184 self.cfg.GetClusterName())
3185 nvinfo_endtime = time.time()
3187 if self.extra_lv_nodes and vg_name is not None:
3189 self.rpc.call_node_verify(self.extra_lv_nodes,
3190 {constants.NV_LVLIST: vg_name},
3191 self.cfg.GetClusterName())
3193 extra_lv_nvinfo = {}
3195 all_drbd_map = self.cfg.ComputeDRBDMap()
3197 feedback_fn("* Gathering disk information (%s nodes)" %
3198 len(self.my_node_names))
3199 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3202 feedback_fn("* Verifying configuration file consistency")
3204 # If not all nodes are being checked, we need to make sure the master node
3205 # and a non-checked vm_capable node are in the list.
3206 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3208 vf_nvinfo = all_nvinfo.copy()
3209 vf_node_info = list(self.my_node_info.values())
3210 additional_nodes = []
3211 if master_node not in self.my_node_info:
3212 additional_nodes.append(master_node)
3213 vf_node_info.append(self.all_node_info[master_node])
3214 # Add the first vm_capable node we find which is not included
3215 for node in absent_nodes:
3216 nodeinfo = self.all_node_info[node]
3217 if nodeinfo.vm_capable and not nodeinfo.offline:
3218 additional_nodes.append(node)
3219 vf_node_info.append(self.all_node_info[node])
3221 key = constants.NV_FILELIST
3222 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3223 {key: node_verify_param[key]},
3224 self.cfg.GetClusterName()))
3226 vf_nvinfo = all_nvinfo
3227 vf_node_info = self.my_node_info.values()
3229 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3231 feedback_fn("* Verifying node status")
3235 for node_i in node_data_list:
3237 nimg = node_image[node]
3241 feedback_fn("* Skipping offline node %s" % (node,))
3245 if node == master_node:
3247 elif node_i.master_candidate:
3248 ntype = "master candidate"
3249 elif node_i.drained:
3255 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3257 msg = all_nvinfo[node].fail_msg
3258 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3261 nimg.rpc_fail = True
3264 nresult = all_nvinfo[node].payload
3266 nimg.call_ok = self._VerifyNode(node_i, nresult)
3267 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3268 self._VerifyNodeNetwork(node_i, nresult)
3269 self._VerifyNodeUserScripts(node_i, nresult)
3270 self._VerifyOob(node_i, nresult)
3273 self._VerifyNodeLVM(node_i, nresult, vg_name)
3274 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3277 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3278 self._UpdateNodeInstances(node_i, nresult, nimg)
3279 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3280 self._UpdateNodeOS(node_i, nresult, nimg)
3282 if not nimg.os_fail:
3283 if refos_img is None:
3285 self._VerifyNodeOS(node_i, nimg, refos_img)
3286 self._VerifyNodeBridges(node_i, nresult, bridges)
3288 # Check whether all running instancies are primary for the node. (This
3289 # can no longer be done from _VerifyInstance below, since some of the
3290 # wrong instances could be from other node groups.)
3291 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3293 for inst in non_primary_inst:
3294 # FIXME: investigate best way to handle offline insts
3295 if inst.admin_state == constants.ADMINST_OFFLINE:
3297 feedback_fn("* Skipping offline instance %s" % inst.name)
3300 test = inst in self.all_inst_info
3301 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3302 "instance should not run on node %s", node_i.name)
3303 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3304 "node is running unknown instance %s", inst)
3306 for node, result in extra_lv_nvinfo.items():
3307 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3308 node_image[node], vg_name)
3310 feedback_fn("* Verifying instance status")
3311 for instance in self.my_inst_names:
3313 feedback_fn("* Verifying instance %s" % instance)
3314 inst_config = self.my_inst_info[instance]
3315 self._VerifyInstance(instance, inst_config, node_image,
3317 inst_nodes_offline = []
3319 pnode = inst_config.primary_node
3320 pnode_img = node_image[pnode]
3321 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3322 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3323 " primary node failed", instance)
3325 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3327 constants.CV_EINSTANCEBADNODE, instance,
3328 "instance is marked as running and lives on offline node %s",
3329 inst_config.primary_node)
3331 # If the instance is non-redundant we cannot survive losing its primary
3332 # node, so we are not N+1 compliant. On the other hand we have no disk
3333 # templates with more than one secondary so that situation is not well
3335 # FIXME: does not support file-backed instances
3336 if not inst_config.secondary_nodes:
3337 i_non_redundant.append(instance)
3339 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3340 constants.CV_EINSTANCELAYOUT,
3341 instance, "instance has multiple secondary nodes: %s",
3342 utils.CommaJoin(inst_config.secondary_nodes),
3343 code=self.ETYPE_WARNING)
3345 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3346 pnode = inst_config.primary_node
3347 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3348 instance_groups = {}
3350 for node in instance_nodes:
3351 instance_groups.setdefault(self.all_node_info[node].group,
3355 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3356 # Sort so that we always list the primary node first.
3357 for group, nodes in sorted(instance_groups.items(),
3358 key=lambda (_, nodes): pnode in nodes,
3361 self._ErrorIf(len(instance_groups) > 1,
3362 constants.CV_EINSTANCESPLITGROUPS,
3363 instance, "instance has primary and secondary nodes in"
3364 " different groups: %s", utils.CommaJoin(pretty_list),
3365 code=self.ETYPE_WARNING)
3367 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3368 i_non_a_balanced.append(instance)
3370 for snode in inst_config.secondary_nodes:
3371 s_img = node_image[snode]
3372 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3373 snode, "instance %s, connection to secondary node failed",
3377 inst_nodes_offline.append(snode)
3379 # warn that the instance lives on offline nodes
3380 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3381 "instance has offline secondary node(s) %s",
3382 utils.CommaJoin(inst_nodes_offline))
3383 # ... or ghost/non-vm_capable nodes
3384 for node in inst_config.all_nodes:
3385 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3386 instance, "instance lives on ghost node %s", node)
3387 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3388 instance, "instance lives on non-vm_capable node %s", node)
3390 feedback_fn("* Verifying orphan volumes")
3391 reserved = utils.FieldSet(*cluster.reserved_lvs)
3393 # We will get spurious "unknown volume" warnings if any node of this group
3394 # is secondary for an instance whose primary is in another group. To avoid
3395 # them, we find these instances and add their volumes to node_vol_should.
3396 for inst in self.all_inst_info.values():
3397 for secondary in inst.secondary_nodes:
3398 if (secondary in self.my_node_info
3399 and inst.name not in self.my_inst_info):
3400 inst.MapLVsByNode(node_vol_should)
3403 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3405 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3406 feedback_fn("* Verifying N+1 Memory redundancy")
3407 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3409 feedback_fn("* Other Notes")
3411 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3412 % len(i_non_redundant))
3414 if i_non_a_balanced:
3415 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3416 % len(i_non_a_balanced))
3419 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3422 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3425 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3429 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3430 """Analyze the post-hooks' result
3432 This method analyses the hook result, handles it, and sends some
3433 nicely-formatted feedback back to the user.
3435 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3436 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3437 @param hooks_results: the results of the multi-node hooks rpc call
3438 @param feedback_fn: function used send feedback back to the caller
3439 @param lu_result: previous Exec result
3440 @return: the new Exec result, based on the previous result
3444 # We only really run POST phase hooks, only for non-empty groups,
3445 # and are only interested in their results
3446 if not self.my_node_names:
3449 elif phase == constants.HOOKS_PHASE_POST:
3450 # Used to change hooks' output to proper indentation
3451 feedback_fn("* Hooks Results")
3452 assert hooks_results, "invalid result from hooks"
3454 for node_name in hooks_results:
3455 res = hooks_results[node_name]
3457 test = msg and not res.offline
3458 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3459 "Communication failure in hooks execution: %s", msg)
3460 if res.offline or msg:
3461 # No need to investigate payload if node is offline or gave
3464 for script, hkr, output in res.payload:
3465 test = hkr == constants.HKR_FAIL
3466 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3467 "Script %s failed, output:", script)
3469 output = self._HOOKS_INDENT_RE.sub(" ", output)
3470 feedback_fn("%s" % output)
3476 class LUClusterVerifyDisks(NoHooksLU):
3477 """Verifies the cluster disks status.
3482 def ExpandNames(self):
3483 self.share_locks = _ShareAll()
3484 self.needed_locks = {
3485 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3488 def Exec(self, feedback_fn):
3489 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3491 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3492 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3493 for group in group_names])
3496 class LUGroupVerifyDisks(NoHooksLU):
3497 """Verifies the status of all disks in a node group.
3502 def ExpandNames(self):
3503 # Raises errors.OpPrereqError on its own if group can't be found
3504 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3506 self.share_locks = _ShareAll()
3507 self.needed_locks = {
3508 locking.LEVEL_INSTANCE: [],
3509 locking.LEVEL_NODEGROUP: [],
3510 locking.LEVEL_NODE: [],
3513 def DeclareLocks(self, level):
3514 if level == locking.LEVEL_INSTANCE:
3515 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3517 # Lock instances optimistically, needs verification once node and group
3518 # locks have been acquired
3519 self.needed_locks[locking.LEVEL_INSTANCE] = \
3520 self.cfg.GetNodeGroupInstances(self.group_uuid)
3522 elif level == locking.LEVEL_NODEGROUP:
3523 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3525 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3526 set([self.group_uuid] +
3527 # Lock all groups used by instances optimistically; this requires
3528 # going via the node before it's locked, requiring verification
3531 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3532 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3534 elif level == locking.LEVEL_NODE:
3535 # This will only lock the nodes in the group to be verified which contain
3537 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3538 self._LockInstancesNodes()
3540 # Lock all nodes in group to be verified
3541 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3542 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3543 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3545 def CheckPrereq(self):
3546 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3547 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3548 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3550 assert self.group_uuid in owned_groups
3552 # Check if locked instances are still correct
3553 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3555 # Get instance information
3556 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3558 # Check if node groups for locked instances are still correct
3559 _CheckInstancesNodeGroups(self.cfg, self.instances,
3560 owned_groups, owned_nodes, self.group_uuid)
3562 def Exec(self, feedback_fn):
3563 """Verify integrity of cluster disks.
3565 @rtype: tuple of three items
3566 @return: a tuple of (dict of node-to-node_error, list of instances
3567 which need activate-disks, dict of instance: (node, volume) for
3572 res_instances = set()
3575 nv_dict = _MapInstanceDisksToNodes([inst
3576 for inst in self.instances.values()
3577 if inst.admin_state == constants.ADMINST_UP])
3580 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3581 set(self.cfg.GetVmCapableNodeList()))
3583 node_lvs = self.rpc.call_lv_list(nodes, [])
3585 for (node, node_res) in node_lvs.items():
3586 if node_res.offline:
3589 msg = node_res.fail_msg
3591 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3592 res_nodes[node] = msg
3595 for lv_name, (_, _, lv_online) in node_res.payload.items():
3596 inst = nv_dict.pop((node, lv_name), None)
3597 if not (lv_online or inst is None):
3598 res_instances.add(inst)
3600 # any leftover items in nv_dict are missing LVs, let's arrange the data
3602 for key, inst in nv_dict.iteritems():
3603 res_missing.setdefault(inst, []).append(list(key))
3605 return (res_nodes, list(res_instances), res_missing)
3608 class LUClusterRepairDiskSizes(NoHooksLU):
3609 """Verifies the cluster disks sizes.
3614 def ExpandNames(self):
3615 if self.op.instances:
3616 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3617 self.needed_locks = {
3618 locking.LEVEL_NODE_RES: [],
3619 locking.LEVEL_INSTANCE: self.wanted_names,
3621 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3623 self.wanted_names = None
3624 self.needed_locks = {
3625 locking.LEVEL_NODE_RES: locking.ALL_SET,
3626 locking.LEVEL_INSTANCE: locking.ALL_SET,
3628 self.share_locks = {
3629 locking.LEVEL_NODE_RES: 1,
3630 locking.LEVEL_INSTANCE: 0,
3633 def DeclareLocks(self, level):
3634 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3635 self._LockInstancesNodes(primary_only=True, level=level)
3637 def CheckPrereq(self):
3638 """Check prerequisites.
3640 This only checks the optional instance list against the existing names.
3643 if self.wanted_names is None:
3644 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3646 self.wanted_instances = \
3647 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3649 def _EnsureChildSizes(self, disk):
3650 """Ensure children of the disk have the needed disk size.
3652 This is valid mainly for DRBD8 and fixes an issue where the
3653 children have smaller disk size.
3655 @param disk: an L{ganeti.objects.Disk} object
3658 if disk.dev_type == constants.LD_DRBD8:
3659 assert disk.children, "Empty children for DRBD8?"
3660 fchild = disk.children[0]
3661 mismatch = fchild.size < disk.size
3663 self.LogInfo("Child disk has size %d, parent %d, fixing",
3664 fchild.size, disk.size)
3665 fchild.size = disk.size
3667 # and we recurse on this child only, not on the metadev
3668 return self._EnsureChildSizes(fchild) or mismatch
3672 def Exec(self, feedback_fn):
3673 """Verify the size of cluster disks.
3676 # TODO: check child disks too
3677 # TODO: check differences in size between primary/secondary nodes
3679 for instance in self.wanted_instances:
3680 pnode = instance.primary_node
3681 if pnode not in per_node_disks:
3682 per_node_disks[pnode] = []
3683 for idx, disk in enumerate(instance.disks):
3684 per_node_disks[pnode].append((instance, idx, disk))
3686 assert not (frozenset(per_node_disks.keys()) -
3687 self.owned_locks(locking.LEVEL_NODE_RES)), \
3688 "Not owning correct locks"
3689 assert not self.owned_locks(locking.LEVEL_NODE)
3692 for node, dskl in per_node_disks.items():
3693 newl = [v[2].Copy() for v in dskl]
3695 self.cfg.SetDiskID(dsk, node)
3696 result = self.rpc.call_blockdev_getsize(node, newl)
3698 self.LogWarning("Failure in blockdev_getsize call to node"
3699 " %s, ignoring", node)
3701 if len(result.payload) != len(dskl):
3702 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3703 " result.payload=%s", node, len(dskl), result.payload)
3704 self.LogWarning("Invalid result from node %s, ignoring node results",
3707 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3709 self.LogWarning("Disk %d of instance %s did not return size"
3710 " information, ignoring", idx, instance.name)
3712 if not isinstance(size, (int, long)):
3713 self.LogWarning("Disk %d of instance %s did not return valid"
3714 " size information, ignoring", idx, instance.name)
3717 if size != disk.size:
3718 self.LogInfo("Disk %d of instance %s has mismatched size,"
3719 " correcting: recorded %d, actual %d", idx,
3720 instance.name, disk.size, size)
3722 self.cfg.Update(instance, feedback_fn)
3723 changed.append((instance.name, idx, size))
3724 if self._EnsureChildSizes(disk):
3725 self.cfg.Update(instance, feedback_fn)
3726 changed.append((instance.name, idx, disk.size))
3730 class LUClusterRename(LogicalUnit):
3731 """Rename the cluster.
3734 HPATH = "cluster-rename"
3735 HTYPE = constants.HTYPE_CLUSTER
3737 def BuildHooksEnv(self):
3742 "OP_TARGET": self.cfg.GetClusterName(),
3743 "NEW_NAME": self.op.name,
3746 def BuildHooksNodes(self):
3747 """Build hooks nodes.
3750 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3752 def CheckPrereq(self):
3753 """Verify that the passed name is a valid one.
3756 hostname = netutils.GetHostname(name=self.op.name,
3757 family=self.cfg.GetPrimaryIPFamily())
3759 new_name = hostname.name
3760 self.ip = new_ip = hostname.ip
3761 old_name = self.cfg.GetClusterName()
3762 old_ip = self.cfg.GetMasterIP()
3763 if new_name == old_name and new_ip == old_ip:
3764 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3765 " cluster has changed",
3767 if new_ip != old_ip:
3768 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3769 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3770 " reachable on the network" %
3771 new_ip, errors.ECODE_NOTUNIQUE)
3773 self.op.name = new_name
3775 def Exec(self, feedback_fn):
3776 """Rename the cluster.
3779 clustername = self.op.name
3782 # shutdown the master IP
3783 master_params = self.cfg.GetMasterNetworkParameters()
3784 ems = self.cfg.GetUseExternalMipScript()
3785 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3787 result.Raise("Could not disable the master role")
3790 cluster = self.cfg.GetClusterInfo()
3791 cluster.cluster_name = clustername
3792 cluster.master_ip = new_ip
3793 self.cfg.Update(cluster, feedback_fn)
3795 # update the known hosts file
3796 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3797 node_list = self.cfg.GetOnlineNodeList()
3799 node_list.remove(master_params.name)
3802 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3804 master_params.ip = new_ip
3805 result = self.rpc.call_node_activate_master_ip(master_params.name,
3807 msg = result.fail_msg
3809 self.LogWarning("Could not re-enable the master role on"
3810 " the master, please restart manually: %s", msg)
3815 def _ValidateNetmask(cfg, netmask):
3816 """Checks if a netmask is valid.
3818 @type cfg: L{config.ConfigWriter}
3819 @param cfg: The cluster configuration
3821 @param netmask: the netmask to be verified
3822 @raise errors.OpPrereqError: if the validation fails
3825 ip_family = cfg.GetPrimaryIPFamily()
3827 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3828 except errors.ProgrammerError:
3829 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3831 if not ipcls.ValidateNetmask(netmask):
3832 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3836 class LUClusterSetParams(LogicalUnit):
3837 """Change the parameters of the cluster.
3840 HPATH = "cluster-modify"
3841 HTYPE = constants.HTYPE_CLUSTER
3844 def CheckArguments(self):
3848 if self.op.uid_pool:
3849 uidpool.CheckUidPool(self.op.uid_pool)
3851 if self.op.add_uids:
3852 uidpool.CheckUidPool(self.op.add_uids)
3854 if self.op.remove_uids:
3855 uidpool.CheckUidPool(self.op.remove_uids)
3857 if self.op.master_netmask is not None:
3858 _ValidateNetmask(self.cfg, self.op.master_netmask)
3860 if self.op.diskparams:
3861 for dt_params in self.op.diskparams.values():
3862 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3864 def ExpandNames(self):
3865 # FIXME: in the future maybe other cluster params won't require checking on
3866 # all nodes to be modified.
3867 self.needed_locks = {
3868 locking.LEVEL_NODE: locking.ALL_SET,
3869 locking.LEVEL_INSTANCE: locking.ALL_SET,
3870 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3872 self.share_locks = {
3873 locking.LEVEL_NODE: 1,
3874 locking.LEVEL_INSTANCE: 1,
3875 locking.LEVEL_NODEGROUP: 1,
3878 def BuildHooksEnv(self):
3883 "OP_TARGET": self.cfg.GetClusterName(),
3884 "NEW_VG_NAME": self.op.vg_name,
3887 def BuildHooksNodes(self):
3888 """Build hooks nodes.
3891 mn = self.cfg.GetMasterNode()
3894 def CheckPrereq(self):
3895 """Check prerequisites.
3897 This checks whether the given params don't conflict and
3898 if the given volume group is valid.
3901 if self.op.vg_name is not None and not self.op.vg_name:
3902 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3903 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3904 " instances exist", errors.ECODE_INVAL)
3906 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3907 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3908 raise errors.OpPrereqError("Cannot disable drbd helper while"
3909 " drbd-based instances exist",
3912 node_list = self.owned_locks(locking.LEVEL_NODE)
3914 # if vg_name not None, checks given volume group on all nodes
3916 vglist = self.rpc.call_vg_list(node_list)
3917 for node in node_list:
3918 msg = vglist[node].fail_msg
3920 # ignoring down node
3921 self.LogWarning("Error while gathering data on node %s"
3922 " (ignoring node): %s", node, msg)
3924 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3926 constants.MIN_VG_SIZE)
3928 raise errors.OpPrereqError("Error on node '%s': %s" %
3929 (node, vgstatus), errors.ECODE_ENVIRON)
3931 if self.op.drbd_helper:
3932 # checks given drbd helper on all nodes
3933 helpers = self.rpc.call_drbd_helper(node_list)
3934 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3936 self.LogInfo("Not checking drbd helper on offline node %s", node)
3938 msg = helpers[node].fail_msg
3940 raise errors.OpPrereqError("Error checking drbd helper on node"
3941 " '%s': %s" % (node, msg),
3942 errors.ECODE_ENVIRON)
3943 node_helper = helpers[node].payload
3944 if node_helper != self.op.drbd_helper:
3945 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3946 (node, node_helper), errors.ECODE_ENVIRON)
3948 self.cluster = cluster = self.cfg.GetClusterInfo()
3949 # validate params changes
3950 if self.op.beparams:
3951 objects.UpgradeBeParams(self.op.beparams)
3952 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3953 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3955 if self.op.ndparams:
3956 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3957 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3959 # TODO: we need a more general way to handle resetting
3960 # cluster-level parameters to default values
3961 if self.new_ndparams["oob_program"] == "":
3962 self.new_ndparams["oob_program"] = \
3963 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3965 if self.op.hv_state:
3966 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3967 self.cluster.hv_state_static)
3968 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3969 for hv, values in new_hv_state.items())
3971 if self.op.disk_state:
3972 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3973 self.cluster.disk_state_static)
3974 self.new_disk_state = \
3975 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3976 for name, values in svalues.items()))
3977 for storage, svalues in new_disk_state.items())
3980 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3983 all_instances = self.cfg.GetAllInstancesInfo().values()
3985 for group in self.cfg.GetAllNodeGroupsInfo().values():
3986 instances = frozenset([inst for inst in all_instances
3987 if compat.any(node in group.members
3988 for node in inst.all_nodes)])
3989 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3990 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3992 new_ipolicy, instances)
3994 violations.update(new)
3997 self.LogWarning("After the ipolicy change the following instances"
3998 " violate them: %s",
3999 utils.CommaJoin(utils.NiceSort(violations)))
4001 if self.op.nicparams:
4002 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4003 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4004 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4007 # check all instances for consistency
4008 for instance in self.cfg.GetAllInstancesInfo().values():
4009 for nic_idx, nic in enumerate(instance.nics):
4010 params_copy = copy.deepcopy(nic.nicparams)
4011 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4013 # check parameter syntax
4015 objects.NIC.CheckParameterSyntax(params_filled)
4016 except errors.ConfigurationError, err:
4017 nic_errors.append("Instance %s, nic/%d: %s" %
4018 (instance.name, nic_idx, err))
4020 # if we're moving instances to routed, check that they have an ip
4021 target_mode = params_filled[constants.NIC_MODE]
4022 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4023 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4024 " address" % (instance.name, nic_idx))
4026 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4027 "\n".join(nic_errors))
4029 # hypervisor list/parameters
4030 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4031 if self.op.hvparams:
4032 for hv_name, hv_dict in self.op.hvparams.items():
4033 if hv_name not in self.new_hvparams:
4034 self.new_hvparams[hv_name] = hv_dict
4036 self.new_hvparams[hv_name].update(hv_dict)
4038 # disk template parameters
4039 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4040 if self.op.diskparams:
4041 for dt_name, dt_params in self.op.diskparams.items():
4042 if dt_name not in self.op.diskparams:
4043 self.new_diskparams[dt_name] = dt_params
4045 self.new_diskparams[dt_name].update(dt_params)
4047 # os hypervisor parameters
4048 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4050 for os_name, hvs in self.op.os_hvp.items():
4051 if os_name not in self.new_os_hvp:
4052 self.new_os_hvp[os_name] = hvs
4054 for hv_name, hv_dict in hvs.items():
4055 if hv_name not in self.new_os_hvp[os_name]:
4056 self.new_os_hvp[os_name][hv_name] = hv_dict
4058 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4061 self.new_osp = objects.FillDict(cluster.osparams, {})
4062 if self.op.osparams:
4063 for os_name, osp in self.op.osparams.items():
4064 if os_name not in self.new_osp:
4065 self.new_osp[os_name] = {}
4067 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4070 if not self.new_osp[os_name]:
4071 # we removed all parameters
4072 del self.new_osp[os_name]
4074 # check the parameter validity (remote check)
4075 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4076 os_name, self.new_osp[os_name])
4078 # changes to the hypervisor list
4079 if self.op.enabled_hypervisors is not None:
4080 self.hv_list = self.op.enabled_hypervisors
4081 for hv in self.hv_list:
4082 # if the hypervisor doesn't already exist in the cluster
4083 # hvparams, we initialize it to empty, and then (in both
4084 # cases) we make sure to fill the defaults, as we might not
4085 # have a complete defaults list if the hypervisor wasn't
4087 if hv not in new_hvp:
4089 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4090 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4092 self.hv_list = cluster.enabled_hypervisors
4094 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4095 # either the enabled list has changed, or the parameters have, validate
4096 for hv_name, hv_params in self.new_hvparams.items():
4097 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4098 (self.op.enabled_hypervisors and
4099 hv_name in self.op.enabled_hypervisors)):
4100 # either this is a new hypervisor, or its parameters have changed
4101 hv_class = hypervisor.GetHypervisor(hv_name)
4102 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4103 hv_class.CheckParameterSyntax(hv_params)
4104 _CheckHVParams(self, node_list, hv_name, hv_params)
4107 # no need to check any newly-enabled hypervisors, since the
4108 # defaults have already been checked in the above code-block
4109 for os_name, os_hvp in self.new_os_hvp.items():
4110 for hv_name, hv_params in os_hvp.items():
4111 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4112 # we need to fill in the new os_hvp on top of the actual hv_p
4113 cluster_defaults = self.new_hvparams.get(hv_name, {})
4114 new_osp = objects.FillDict(cluster_defaults, hv_params)
4115 hv_class = hypervisor.GetHypervisor(hv_name)
4116 hv_class.CheckParameterSyntax(new_osp)
4117 _CheckHVParams(self, node_list, hv_name, new_osp)
4119 if self.op.default_iallocator:
4120 alloc_script = utils.FindFile(self.op.default_iallocator,
4121 constants.IALLOCATOR_SEARCH_PATH,
4123 if alloc_script is None:
4124 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4125 " specified" % self.op.default_iallocator,
4128 def Exec(self, feedback_fn):
4129 """Change the parameters of the cluster.
4132 if self.op.vg_name is not None:
4133 new_volume = self.op.vg_name
4136 if new_volume != self.cfg.GetVGName():
4137 self.cfg.SetVGName(new_volume)
4139 feedback_fn("Cluster LVM configuration already in desired"
4140 " state, not changing")
4141 if self.op.drbd_helper is not None:
4142 new_helper = self.op.drbd_helper
4145 if new_helper != self.cfg.GetDRBDHelper():
4146 self.cfg.SetDRBDHelper(new_helper)
4148 feedback_fn("Cluster DRBD helper already in desired state,"
4150 if self.op.hvparams:
4151 self.cluster.hvparams = self.new_hvparams
4153 self.cluster.os_hvp = self.new_os_hvp
4154 if self.op.enabled_hypervisors is not None:
4155 self.cluster.hvparams = self.new_hvparams
4156 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4157 if self.op.beparams:
4158 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4159 if self.op.nicparams:
4160 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4162 self.cluster.ipolicy = self.new_ipolicy
4163 if self.op.osparams:
4164 self.cluster.osparams = self.new_osp
4165 if self.op.ndparams:
4166 self.cluster.ndparams = self.new_ndparams
4167 if self.op.diskparams:
4168 self.cluster.diskparams = self.new_diskparams
4169 if self.op.hv_state:
4170 self.cluster.hv_state_static = self.new_hv_state
4171 if self.op.disk_state:
4172 self.cluster.disk_state_static = self.new_disk_state
4174 if self.op.candidate_pool_size is not None:
4175 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4176 # we need to update the pool size here, otherwise the save will fail
4177 _AdjustCandidatePool(self, [])
4179 if self.op.maintain_node_health is not None:
4180 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4181 feedback_fn("Note: CONFD was disabled at build time, node health"
4182 " maintenance is not useful (still enabling it)")
4183 self.cluster.maintain_node_health = self.op.maintain_node_health
4185 if self.op.prealloc_wipe_disks is not None:
4186 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4188 if self.op.add_uids is not None:
4189 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4191 if self.op.remove_uids is not None:
4192 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4194 if self.op.uid_pool is not None:
4195 self.cluster.uid_pool = self.op.uid_pool
4197 if self.op.default_iallocator is not None:
4198 self.cluster.default_iallocator = self.op.default_iallocator
4200 if self.op.reserved_lvs is not None:
4201 self.cluster.reserved_lvs = self.op.reserved_lvs
4203 if self.op.use_external_mip_script is not None:
4204 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4206 def helper_os(aname, mods, desc):
4208 lst = getattr(self.cluster, aname)
4209 for key, val in mods:
4210 if key == constants.DDM_ADD:
4212 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4215 elif key == constants.DDM_REMOVE:
4219 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4221 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4223 if self.op.hidden_os:
4224 helper_os("hidden_os", self.op.hidden_os, "hidden")
4226 if self.op.blacklisted_os:
4227 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4229 if self.op.master_netdev:
4230 master_params = self.cfg.GetMasterNetworkParameters()
4231 ems = self.cfg.GetUseExternalMipScript()
4232 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4233 self.cluster.master_netdev)
4234 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4236 result.Raise("Could not disable the master ip")
4237 feedback_fn("Changing master_netdev from %s to %s" %
4238 (master_params.netdev, self.op.master_netdev))
4239 self.cluster.master_netdev = self.op.master_netdev
4241 if self.op.master_netmask:
4242 master_params = self.cfg.GetMasterNetworkParameters()
4243 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4244 result = self.rpc.call_node_change_master_netmask(master_params.name,
4245 master_params.netmask,
4246 self.op.master_netmask,
4248 master_params.netdev)
4250 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4253 self.cluster.master_netmask = self.op.master_netmask
4255 self.cfg.Update(self.cluster, feedback_fn)
4257 if self.op.master_netdev:
4258 master_params = self.cfg.GetMasterNetworkParameters()
4259 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4260 self.op.master_netdev)
4261 ems = self.cfg.GetUseExternalMipScript()
4262 result = self.rpc.call_node_activate_master_ip(master_params.name,
4265 self.LogWarning("Could not re-enable the master ip on"
4266 " the master, please restart manually: %s",
4270 def _UploadHelper(lu, nodes, fname):
4271 """Helper for uploading a file and showing warnings.
4274 if os.path.exists(fname):
4275 result = lu.rpc.call_upload_file(nodes, fname)
4276 for to_node, to_result in result.items():
4277 msg = to_result.fail_msg
4279 msg = ("Copy of file %s to node %s failed: %s" %
4280 (fname, to_node, msg))
4281 lu.proc.LogWarning(msg)
4284 def _ComputeAncillaryFiles(cluster, redist):
4285 """Compute files external to Ganeti which need to be consistent.
4287 @type redist: boolean
4288 @param redist: Whether to include files which need to be redistributed
4291 # Compute files for all nodes
4293 constants.SSH_KNOWN_HOSTS_FILE,
4294 constants.CONFD_HMAC_KEY,
4295 constants.CLUSTER_DOMAIN_SECRET_FILE,
4296 constants.SPICE_CERT_FILE,
4297 constants.SPICE_CACERT_FILE,
4298 constants.RAPI_USERS_FILE,
4302 files_all.update(constants.ALL_CERT_FILES)
4303 files_all.update(ssconf.SimpleStore().GetFileList())
4305 # we need to ship at least the RAPI certificate
4306 files_all.add(constants.RAPI_CERT_FILE)
4308 if cluster.modify_etc_hosts:
4309 files_all.add(constants.ETC_HOSTS)
4311 # Files which are optional, these must:
4312 # - be present in one other category as well
4313 # - either exist or not exist on all nodes of that category (mc, vm all)
4315 constants.RAPI_USERS_FILE,
4318 # Files which should only be on master candidates
4322 files_mc.add(constants.CLUSTER_CONF_FILE)
4324 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4326 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4328 # Files which should only be on VM-capable nodes
4329 files_vm = set(filename
4330 for hv_name in cluster.enabled_hypervisors
4331 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4333 files_opt |= set(filename
4334 for hv_name in cluster.enabled_hypervisors
4335 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4337 # Filenames in each category must be unique
4338 all_files_set = files_all | files_mc | files_vm
4339 assert (len(all_files_set) ==
4340 sum(map(len, [files_all, files_mc, files_vm]))), \
4341 "Found file listed in more than one file list"
4343 # Optional files must be present in one other category
4344 assert all_files_set.issuperset(files_opt), \
4345 "Optional file not in a different required list"
4347 return (files_all, files_opt, files_mc, files_vm)
4350 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4351 """Distribute additional files which are part of the cluster configuration.
4353 ConfigWriter takes care of distributing the config and ssconf files, but
4354 there are more files which should be distributed to all nodes. This function
4355 makes sure those are copied.
4357 @param lu: calling logical unit
4358 @param additional_nodes: list of nodes not in the config to distribute to
4359 @type additional_vm: boolean
4360 @param additional_vm: whether the additional nodes are vm-capable or not
4363 # Gather target nodes
4364 cluster = lu.cfg.GetClusterInfo()
4365 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4367 online_nodes = lu.cfg.GetOnlineNodeList()
4368 vm_nodes = lu.cfg.GetVmCapableNodeList()
4370 if additional_nodes is not None:
4371 online_nodes.extend(additional_nodes)
4373 vm_nodes.extend(additional_nodes)
4375 # Never distribute to master node
4376 for nodelist in [online_nodes, vm_nodes]:
4377 if master_info.name in nodelist:
4378 nodelist.remove(master_info.name)
4381 (files_all, _, files_mc, files_vm) = \
4382 _ComputeAncillaryFiles(cluster, True)
4384 # Never re-distribute configuration file from here
4385 assert not (constants.CLUSTER_CONF_FILE in files_all or
4386 constants.CLUSTER_CONF_FILE in files_vm)
4387 assert not files_mc, "Master candidates not handled in this function"
4390 (online_nodes, files_all),
4391 (vm_nodes, files_vm),
4395 for (node_list, files) in filemap:
4397 _UploadHelper(lu, node_list, fname)
4400 class LUClusterRedistConf(NoHooksLU):
4401 """Force the redistribution of cluster configuration.
4403 This is a very simple LU.
4408 def ExpandNames(self):
4409 self.needed_locks = {
4410 locking.LEVEL_NODE: locking.ALL_SET,
4412 self.share_locks[locking.LEVEL_NODE] = 1
4414 def Exec(self, feedback_fn):
4415 """Redistribute the configuration.
4418 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4419 _RedistributeAncillaryFiles(self)
4422 class LUClusterActivateMasterIp(NoHooksLU):
4423 """Activate the master IP on the master node.
4426 def Exec(self, feedback_fn):
4427 """Activate the master IP.
4430 master_params = self.cfg.GetMasterNetworkParameters()
4431 ems = self.cfg.GetUseExternalMipScript()
4432 result = self.rpc.call_node_activate_master_ip(master_params.name,
4434 result.Raise("Could not activate the master IP")
4437 class LUClusterDeactivateMasterIp(NoHooksLU):
4438 """Deactivate the master IP on the master node.
4441 def Exec(self, feedback_fn):
4442 """Deactivate the master IP.
4445 master_params = self.cfg.GetMasterNetworkParameters()
4446 ems = self.cfg.GetUseExternalMipScript()
4447 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4449 result.Raise("Could not deactivate the master IP")
4452 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4453 """Sleep and poll for an instance's disk to sync.
4456 if not instance.disks or disks is not None and not disks:
4459 disks = _ExpandCheckDisks(instance, disks)
4462 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4464 node = instance.primary_node
4467 lu.cfg.SetDiskID(dev, node)
4469 # TODO: Convert to utils.Retry
4472 degr_retries = 10 # in seconds, as we sleep 1 second each time
4476 cumul_degraded = False
4477 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4478 msg = rstats.fail_msg
4480 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4483 raise errors.RemoteError("Can't contact node %s for mirror data,"
4484 " aborting." % node)
4487 rstats = rstats.payload
4489 for i, mstat in enumerate(rstats):
4491 lu.LogWarning("Can't compute data for node %s/%s",
4492 node, disks[i].iv_name)
4495 cumul_degraded = (cumul_degraded or
4496 (mstat.is_degraded and mstat.sync_percent is None))
4497 if mstat.sync_percent is not None:
4499 if mstat.estimated_time is not None:
4500 rem_time = ("%s remaining (estimated)" %
4501 utils.FormatSeconds(mstat.estimated_time))
4502 max_time = mstat.estimated_time
4504 rem_time = "no time estimate"
4505 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4506 (disks[i].iv_name, mstat.sync_percent, rem_time))
4508 # if we're done but degraded, let's do a few small retries, to
4509 # make sure we see a stable and not transient situation; therefore
4510 # we force restart of the loop
4511 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4512 logging.info("Degraded disks found, %d retries left", degr_retries)
4520 time.sleep(min(60, max_time))
4523 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4524 return not cumul_degraded
4527 def _BlockdevFind(lu, node, dev, instance):
4528 """Wrapper around call_blockdev_find to annotate diskparams.
4530 @param lu: A reference to the lu object
4531 @param node: The node to call out
4532 @param dev: The device to find
4533 @param instance: The instance object the device belongs to
4534 @returns The result of the rpc call
4537 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4538 return lu.rpc.call_blockdev_find(node, disk)
4541 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4542 """Wrapper around L{_CheckDiskConsistencyInner}.
4545 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4546 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4550 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4552 """Check that mirrors are not degraded.
4554 @attention: The device has to be annotated already.
4556 The ldisk parameter, if True, will change the test from the
4557 is_degraded attribute (which represents overall non-ok status for
4558 the device(s)) to the ldisk (representing the local storage status).
4561 lu.cfg.SetDiskID(dev, node)
4565 if on_primary or dev.AssembleOnSecondary():
4566 rstats = lu.rpc.call_blockdev_find(node, dev)
4567 msg = rstats.fail_msg
4569 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4571 elif not rstats.payload:
4572 lu.LogWarning("Can't find disk on node %s", node)
4576 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4578 result = result and not rstats.payload.is_degraded
4581 for child in dev.children:
4582 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4588 class LUOobCommand(NoHooksLU):
4589 """Logical unit for OOB handling.
4593 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4595 def ExpandNames(self):
4596 """Gather locks we need.
4599 if self.op.node_names:
4600 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4601 lock_names = self.op.node_names
4603 lock_names = locking.ALL_SET
4605 self.needed_locks = {
4606 locking.LEVEL_NODE: lock_names,
4609 def CheckPrereq(self):
4610 """Check prerequisites.
4613 - the node exists in the configuration
4616 Any errors are signaled by raising errors.OpPrereqError.
4620 self.master_node = self.cfg.GetMasterNode()
4622 assert self.op.power_delay >= 0.0
4624 if self.op.node_names:
4625 if (self.op.command in self._SKIP_MASTER and
4626 self.master_node in self.op.node_names):
4627 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4628 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4630 if master_oob_handler:
4631 additional_text = ("run '%s %s %s' if you want to operate on the"
4632 " master regardless") % (master_oob_handler,
4636 additional_text = "it does not support out-of-band operations"
4638 raise errors.OpPrereqError(("Operating on the master node %s is not"
4639 " allowed for %s; %s") %
4640 (self.master_node, self.op.command,
4641 additional_text), errors.ECODE_INVAL)
4643 self.op.node_names = self.cfg.GetNodeList()
4644 if self.op.command in self._SKIP_MASTER:
4645 self.op.node_names.remove(self.master_node)
4647 if self.op.command in self._SKIP_MASTER:
4648 assert self.master_node not in self.op.node_names
4650 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4652 raise errors.OpPrereqError("Node %s not found" % node_name,
4655 self.nodes.append(node)
4657 if (not self.op.ignore_status and
4658 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4659 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4660 " not marked offline") % node_name,
4663 def Exec(self, feedback_fn):
4664 """Execute OOB and return result if we expect any.
4667 master_node = self.master_node
4670 for idx, node in enumerate(utils.NiceSort(self.nodes,
4671 key=lambda node: node.name)):
4672 node_entry = [(constants.RS_NORMAL, node.name)]
4673 ret.append(node_entry)
4675 oob_program = _SupportsOob(self.cfg, node)
4678 node_entry.append((constants.RS_UNAVAIL, None))
4681 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4682 self.op.command, oob_program, node.name)
4683 result = self.rpc.call_run_oob(master_node, oob_program,
4684 self.op.command, node.name,
4688 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4689 node.name, result.fail_msg)
4690 node_entry.append((constants.RS_NODATA, None))
4693 self._CheckPayload(result)
4694 except errors.OpExecError, err:
4695 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4697 node_entry.append((constants.RS_NODATA, None))
4699 if self.op.command == constants.OOB_HEALTH:
4700 # For health we should log important events
4701 for item, status in result.payload:
4702 if status in [constants.OOB_STATUS_WARNING,
4703 constants.OOB_STATUS_CRITICAL]:
4704 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4705 item, node.name, status)
4707 if self.op.command == constants.OOB_POWER_ON:
4709 elif self.op.command == constants.OOB_POWER_OFF:
4710 node.powered = False
4711 elif self.op.command == constants.OOB_POWER_STATUS:
4712 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4713 if powered != node.powered:
4714 logging.warning(("Recorded power state (%s) of node '%s' does not"
4715 " match actual power state (%s)"), node.powered,
4718 # For configuration changing commands we should update the node
4719 if self.op.command in (constants.OOB_POWER_ON,
4720 constants.OOB_POWER_OFF):
4721 self.cfg.Update(node, feedback_fn)
4723 node_entry.append((constants.RS_NORMAL, result.payload))
4725 if (self.op.command == constants.OOB_POWER_ON and
4726 idx < len(self.nodes) - 1):
4727 time.sleep(self.op.power_delay)
4731 def _CheckPayload(self, result):
4732 """Checks if the payload is valid.
4734 @param result: RPC result
4735 @raises errors.OpExecError: If payload is not valid
4739 if self.op.command == constants.OOB_HEALTH:
4740 if not isinstance(result.payload, list):
4741 errs.append("command 'health' is expected to return a list but got %s" %
4742 type(result.payload))
4744 for item, status in result.payload:
4745 if status not in constants.OOB_STATUSES:
4746 errs.append("health item '%s' has invalid status '%s'" %
4749 if self.op.command == constants.OOB_POWER_STATUS:
4750 if not isinstance(result.payload, dict):
4751 errs.append("power-status is expected to return a dict but got %s" %
4752 type(result.payload))
4754 if self.op.command in [
4755 constants.OOB_POWER_ON,
4756 constants.OOB_POWER_OFF,
4757 constants.OOB_POWER_CYCLE,
4759 if result.payload is not None:
4760 errs.append("%s is expected to not return payload but got '%s'" %
4761 (self.op.command, result.payload))
4764 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4765 utils.CommaJoin(errs))
4768 class _OsQuery(_QueryBase):
4769 FIELDS = query.OS_FIELDS
4771 def ExpandNames(self, lu):
4772 # Lock all nodes in shared mode
4773 # Temporary removal of locks, should be reverted later
4774 # TODO: reintroduce locks when they are lighter-weight
4775 lu.needed_locks = {}
4776 #self.share_locks[locking.LEVEL_NODE] = 1
4777 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4779 # The following variables interact with _QueryBase._GetNames
4781 self.wanted = self.names
4783 self.wanted = locking.ALL_SET
4785 self.do_locking = self.use_locking
4787 def DeclareLocks(self, lu, level):
4791 def _DiagnoseByOS(rlist):
4792 """Remaps a per-node return list into an a per-os per-node dictionary
4794 @param rlist: a map with node names as keys and OS objects as values
4797 @return: a dictionary with osnames as keys and as value another
4798 map, with nodes as keys and tuples of (path, status, diagnose,
4799 variants, parameters, api_versions) as values, eg::
4801 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4802 (/srv/..., False, "invalid api")],
4803 "node2": [(/srv/..., True, "", [], [])]}
4808 # we build here the list of nodes that didn't fail the RPC (at RPC
4809 # level), so that nodes with a non-responding node daemon don't
4810 # make all OSes invalid
4811 good_nodes = [node_name for node_name in rlist
4812 if not rlist[node_name].fail_msg]
4813 for node_name, nr in rlist.items():
4814 if nr.fail_msg or not nr.payload:
4816 for (name, path, status, diagnose, variants,
4817 params, api_versions) in nr.payload:
4818 if name not in all_os:
4819 # build a list of nodes for this os containing empty lists
4820 # for each node in node_list
4822 for nname in good_nodes:
4823 all_os[name][nname] = []
4824 # convert params from [name, help] to (name, help)
4825 params = [tuple(v) for v in params]
4826 all_os[name][node_name].append((path, status, diagnose,
4827 variants, params, api_versions))
4830 def _GetQueryData(self, lu):
4831 """Computes the list of nodes and their attributes.
4834 # Locking is not used
4835 assert not (compat.any(lu.glm.is_owned(level)
4836 for level in locking.LEVELS
4837 if level != locking.LEVEL_CLUSTER) or
4838 self.do_locking or self.use_locking)
4840 valid_nodes = [node.name
4841 for node in lu.cfg.GetAllNodesInfo().values()
4842 if not node.offline and node.vm_capable]
4843 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4844 cluster = lu.cfg.GetClusterInfo()
4848 for (os_name, os_data) in pol.items():
4849 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4850 hidden=(os_name in cluster.hidden_os),
4851 blacklisted=(os_name in cluster.blacklisted_os))
4855 api_versions = set()
4857 for idx, osl in enumerate(os_data.values()):
4858 info.valid = bool(info.valid and osl and osl[0][1])
4862 (node_variants, node_params, node_api) = osl[0][3:6]
4865 variants.update(node_variants)
4866 parameters.update(node_params)
4867 api_versions.update(node_api)
4869 # Filter out inconsistent values
4870 variants.intersection_update(node_variants)
4871 parameters.intersection_update(node_params)
4872 api_versions.intersection_update(node_api)
4874 info.variants = list(variants)
4875 info.parameters = list(parameters)
4876 info.api_versions = list(api_versions)
4878 data[os_name] = info
4880 # Prepare data in requested order
4881 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4885 class LUOsDiagnose(NoHooksLU):
4886 """Logical unit for OS diagnose/query.
4892 def _BuildFilter(fields, names):
4893 """Builds a filter for querying OSes.
4896 name_filter = qlang.MakeSimpleFilter("name", names)
4898 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4899 # respective field is not requested
4900 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4901 for fname in ["hidden", "blacklisted"]
4902 if fname not in fields]
4903 if "valid" not in fields:
4904 status_filter.append([qlang.OP_TRUE, "valid"])
4907 status_filter.insert(0, qlang.OP_AND)
4909 status_filter = None
4911 if name_filter and status_filter:
4912 return [qlang.OP_AND, name_filter, status_filter]
4916 return status_filter
4918 def CheckArguments(self):
4919 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4920 self.op.output_fields, False)
4922 def ExpandNames(self):
4923 self.oq.ExpandNames(self)
4925 def Exec(self, feedback_fn):
4926 return self.oq.OldStyleQuery(self)
4929 class LUNodeRemove(LogicalUnit):
4930 """Logical unit for removing a node.
4933 HPATH = "node-remove"
4934 HTYPE = constants.HTYPE_NODE
4936 def BuildHooksEnv(self):
4941 "OP_TARGET": self.op.node_name,
4942 "NODE_NAME": self.op.node_name,
4945 def BuildHooksNodes(self):
4946 """Build hooks nodes.
4948 This doesn't run on the target node in the pre phase as a failed
4949 node would then be impossible to remove.
4952 all_nodes = self.cfg.GetNodeList()
4954 all_nodes.remove(self.op.node_name)
4957 return (all_nodes, all_nodes)
4959 def CheckPrereq(self):
4960 """Check prerequisites.
4963 - the node exists in the configuration
4964 - it does not have primary or secondary instances
4965 - it's not the master
4967 Any errors are signaled by raising errors.OpPrereqError.
4970 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4971 node = self.cfg.GetNodeInfo(self.op.node_name)
4972 assert node is not None
4974 masternode = self.cfg.GetMasterNode()
4975 if node.name == masternode:
4976 raise errors.OpPrereqError("Node is the master node, failover to another"
4977 " node is required", errors.ECODE_INVAL)
4979 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4980 if node.name in instance.all_nodes:
4981 raise errors.OpPrereqError("Instance %s is still running on the node,"
4982 " please remove first" % instance_name,
4984 self.op.node_name = node.name
4987 def Exec(self, feedback_fn):
4988 """Removes the node from the cluster.
4992 logging.info("Stopping the node daemon and removing configs from node %s",
4995 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4997 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5000 # Promote nodes to master candidate as needed
5001 _AdjustCandidatePool(self, exceptions=[node.name])
5002 self.context.RemoveNode(node.name)
5004 # Run post hooks on the node before it's removed
5005 _RunPostHook(self, node.name)
5007 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5008 msg = result.fail_msg
5010 self.LogWarning("Errors encountered on the remote node while leaving"
5011 " the cluster: %s", msg)
5013 # Remove node from our /etc/hosts
5014 if self.cfg.GetClusterInfo().modify_etc_hosts:
5015 master_node = self.cfg.GetMasterNode()
5016 result = self.rpc.call_etc_hosts_modify(master_node,
5017 constants.ETC_HOSTS_REMOVE,
5019 result.Raise("Can't update hosts file with new host data")
5020 _RedistributeAncillaryFiles(self)
5023 class _NodeQuery(_QueryBase):
5024 FIELDS = query.NODE_FIELDS
5026 def ExpandNames(self, lu):
5027 lu.needed_locks = {}
5028 lu.share_locks = _ShareAll()
5031 self.wanted = _GetWantedNodes(lu, self.names)
5033 self.wanted = locking.ALL_SET
5035 self.do_locking = (self.use_locking and
5036 query.NQ_LIVE in self.requested_data)
5039 # If any non-static field is requested we need to lock the nodes
5040 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5042 def DeclareLocks(self, lu, level):
5045 def _GetQueryData(self, lu):
5046 """Computes the list of nodes and their attributes.
5049 all_info = lu.cfg.GetAllNodesInfo()
5051 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5053 # Gather data as requested
5054 if query.NQ_LIVE in self.requested_data:
5055 # filter out non-vm_capable nodes
5056 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5058 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5059 [lu.cfg.GetHypervisorType()])
5060 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5061 for (name, nresult) in node_data.items()
5062 if not nresult.fail_msg and nresult.payload)
5066 if query.NQ_INST in self.requested_data:
5067 node_to_primary = dict([(name, set()) for name in nodenames])
5068 node_to_secondary = dict([(name, set()) for name in nodenames])
5070 inst_data = lu.cfg.GetAllInstancesInfo()
5072 for inst in inst_data.values():
5073 if inst.primary_node in node_to_primary:
5074 node_to_primary[inst.primary_node].add(inst.name)
5075 for secnode in inst.secondary_nodes:
5076 if secnode in node_to_secondary:
5077 node_to_secondary[secnode].add(inst.name)
5079 node_to_primary = None
5080 node_to_secondary = None
5082 if query.NQ_OOB in self.requested_data:
5083 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5084 for name, node in all_info.iteritems())
5088 if query.NQ_GROUP in self.requested_data:
5089 groups = lu.cfg.GetAllNodeGroupsInfo()
5093 return query.NodeQueryData([all_info[name] for name in nodenames],
5094 live_data, lu.cfg.GetMasterNode(),
5095 node_to_primary, node_to_secondary, groups,
5096 oob_support, lu.cfg.GetClusterInfo())
5099 class LUNodeQuery(NoHooksLU):
5100 """Logical unit for querying nodes.
5103 # pylint: disable=W0142
5106 def CheckArguments(self):
5107 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5108 self.op.output_fields, self.op.use_locking)
5110 def ExpandNames(self):
5111 self.nq.ExpandNames(self)
5113 def DeclareLocks(self, level):
5114 self.nq.DeclareLocks(self, level)
5116 def Exec(self, feedback_fn):
5117 return self.nq.OldStyleQuery(self)
5120 class LUNodeQueryvols(NoHooksLU):
5121 """Logical unit for getting volumes on node(s).
5125 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5126 _FIELDS_STATIC = utils.FieldSet("node")
5128 def CheckArguments(self):
5129 _CheckOutputFields(static=self._FIELDS_STATIC,
5130 dynamic=self._FIELDS_DYNAMIC,
5131 selected=self.op.output_fields)
5133 def ExpandNames(self):
5134 self.share_locks = _ShareAll()
5135 self.needed_locks = {}
5137 if not self.op.nodes:
5138 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5140 self.needed_locks[locking.LEVEL_NODE] = \
5141 _GetWantedNodes(self, self.op.nodes)
5143 def Exec(self, feedback_fn):
5144 """Computes the list of nodes and their attributes.
5147 nodenames = self.owned_locks(locking.LEVEL_NODE)
5148 volumes = self.rpc.call_node_volumes(nodenames)
5150 ilist = self.cfg.GetAllInstancesInfo()
5151 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5154 for node in nodenames:
5155 nresult = volumes[node]
5158 msg = nresult.fail_msg
5160 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5163 node_vols = sorted(nresult.payload,
5164 key=operator.itemgetter("dev"))
5166 for vol in node_vols:
5168 for field in self.op.output_fields:
5171 elif field == "phys":
5175 elif field == "name":
5177 elif field == "size":
5178 val = int(float(vol["size"]))
5179 elif field == "instance":
5180 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5182 raise errors.ParameterError(field)
5183 node_output.append(str(val))
5185 output.append(node_output)
5190 class LUNodeQueryStorage(NoHooksLU):
5191 """Logical unit for getting information on storage units on node(s).
5194 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5197 def CheckArguments(self):
5198 _CheckOutputFields(static=self._FIELDS_STATIC,
5199 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5200 selected=self.op.output_fields)
5202 def ExpandNames(self):
5203 self.share_locks = _ShareAll()
5204 self.needed_locks = {}
5207 self.needed_locks[locking.LEVEL_NODE] = \
5208 _GetWantedNodes(self, self.op.nodes)
5210 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5212 def Exec(self, feedback_fn):
5213 """Computes the list of nodes and their attributes.
5216 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5218 # Always get name to sort by
5219 if constants.SF_NAME in self.op.output_fields:
5220 fields = self.op.output_fields[:]
5222 fields = [constants.SF_NAME] + self.op.output_fields
5224 # Never ask for node or type as it's only known to the LU
5225 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5226 while extra in fields:
5227 fields.remove(extra)
5229 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5230 name_idx = field_idx[constants.SF_NAME]
5232 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5233 data = self.rpc.call_storage_list(self.nodes,
5234 self.op.storage_type, st_args,
5235 self.op.name, fields)
5239 for node in utils.NiceSort(self.nodes):
5240 nresult = data[node]
5244 msg = nresult.fail_msg
5246 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5249 rows = dict([(row[name_idx], row) for row in nresult.payload])
5251 for name in utils.NiceSort(rows.keys()):
5256 for field in self.op.output_fields:
5257 if field == constants.SF_NODE:
5259 elif field == constants.SF_TYPE:
5260 val = self.op.storage_type
5261 elif field in field_idx:
5262 val = row[field_idx[field]]
5264 raise errors.ParameterError(field)
5273 class _InstanceQuery(_QueryBase):
5274 FIELDS = query.INSTANCE_FIELDS
5276 def ExpandNames(self, lu):
5277 lu.needed_locks = {}
5278 lu.share_locks = _ShareAll()
5281 self.wanted = _GetWantedInstances(lu, self.names)
5283 self.wanted = locking.ALL_SET
5285 self.do_locking = (self.use_locking and
5286 query.IQ_LIVE in self.requested_data)
5288 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5289 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5290 lu.needed_locks[locking.LEVEL_NODE] = []
5291 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5293 self.do_grouplocks = (self.do_locking and
5294 query.IQ_NODES in self.requested_data)
5296 def DeclareLocks(self, lu, level):
5298 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5299 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5301 # Lock all groups used by instances optimistically; this requires going
5302 # via the node before it's locked, requiring verification later on
5303 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5305 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5306 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5307 elif level == locking.LEVEL_NODE:
5308 lu._LockInstancesNodes() # pylint: disable=W0212
5311 def _CheckGroupLocks(lu):
5312 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5313 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5315 # Check if node groups for locked instances are still correct
5316 for instance_name in owned_instances:
5317 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5319 def _GetQueryData(self, lu):
5320 """Computes the list of instances and their attributes.
5323 if self.do_grouplocks:
5324 self._CheckGroupLocks(lu)
5326 cluster = lu.cfg.GetClusterInfo()
5327 all_info = lu.cfg.GetAllInstancesInfo()
5329 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5331 instance_list = [all_info[name] for name in instance_names]
5332 nodes = frozenset(itertools.chain(*(inst.all_nodes
5333 for inst in instance_list)))
5334 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5337 wrongnode_inst = set()
5339 # Gather data as requested
5340 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5342 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5344 result = node_data[name]
5346 # offline nodes will be in both lists
5347 assert result.fail_msg
5348 offline_nodes.append(name)
5350 bad_nodes.append(name)
5351 elif result.payload:
5352 for inst in result.payload:
5353 if inst in all_info:
5354 if all_info[inst].primary_node == name:
5355 live_data.update(result.payload)
5357 wrongnode_inst.add(inst)
5359 # orphan instance; we don't list it here as we don't
5360 # handle this case yet in the output of instance listing
5361 logging.warning("Orphan instance '%s' found on node %s",
5363 # else no instance is alive
5367 if query.IQ_DISKUSAGE in self.requested_data:
5368 disk_usage = dict((inst.name,
5369 _ComputeDiskSize(inst.disk_template,
5370 [{constants.IDISK_SIZE: disk.size}
5371 for disk in inst.disks]))
5372 for inst in instance_list)
5376 if query.IQ_CONSOLE in self.requested_data:
5378 for inst in instance_list:
5379 if inst.name in live_data:
5380 # Instance is running
5381 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5383 consinfo[inst.name] = None
5384 assert set(consinfo.keys()) == set(instance_names)
5388 if query.IQ_NODES in self.requested_data:
5389 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5391 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5392 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5393 for uuid in set(map(operator.attrgetter("group"),
5399 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5400 disk_usage, offline_nodes, bad_nodes,
5401 live_data, wrongnode_inst, consinfo,
5405 class LUQuery(NoHooksLU):
5406 """Query for resources/items of a certain kind.
5409 # pylint: disable=W0142
5412 def CheckArguments(self):
5413 qcls = _GetQueryImplementation(self.op.what)
5415 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5417 def ExpandNames(self):
5418 self.impl.ExpandNames(self)
5420 def DeclareLocks(self, level):
5421 self.impl.DeclareLocks(self, level)
5423 def Exec(self, feedback_fn):
5424 return self.impl.NewStyleQuery(self)
5427 class LUQueryFields(NoHooksLU):
5428 """Query for resources/items of a certain kind.
5431 # pylint: disable=W0142
5434 def CheckArguments(self):
5435 self.qcls = _GetQueryImplementation(self.op.what)
5437 def ExpandNames(self):
5438 self.needed_locks = {}
5440 def Exec(self, feedback_fn):
5441 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5444 class LUNodeModifyStorage(NoHooksLU):
5445 """Logical unit for modifying a storage volume on a node.
5450 def CheckArguments(self):
5451 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5453 storage_type = self.op.storage_type
5456 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5458 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5459 " modified" % storage_type,
5462 diff = set(self.op.changes.keys()) - modifiable
5464 raise errors.OpPrereqError("The following fields can not be modified for"
5465 " storage units of type '%s': %r" %
5466 (storage_type, list(diff)),
5469 def ExpandNames(self):
5470 self.needed_locks = {
5471 locking.LEVEL_NODE: self.op.node_name,
5474 def Exec(self, feedback_fn):
5475 """Computes the list of nodes and their attributes.
5478 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5479 result = self.rpc.call_storage_modify(self.op.node_name,
5480 self.op.storage_type, st_args,
5481 self.op.name, self.op.changes)
5482 result.Raise("Failed to modify storage unit '%s' on %s" %
5483 (self.op.name, self.op.node_name))
5486 class LUNodeAdd(LogicalUnit):
5487 """Logical unit for adding node to the cluster.
5491 HTYPE = constants.HTYPE_NODE
5492 _NFLAGS = ["master_capable", "vm_capable"]
5494 def CheckArguments(self):
5495 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5496 # validate/normalize the node name
5497 self.hostname = netutils.GetHostname(name=self.op.node_name,
5498 family=self.primary_ip_family)
5499 self.op.node_name = self.hostname.name
5501 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5502 raise errors.OpPrereqError("Cannot readd the master node",
5505 if self.op.readd and self.op.group:
5506 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5507 " being readded", errors.ECODE_INVAL)
5509 def BuildHooksEnv(self):
5512 This will run on all nodes before, and on all nodes + the new node after.
5516 "OP_TARGET": self.op.node_name,
5517 "NODE_NAME": self.op.node_name,
5518 "NODE_PIP": self.op.primary_ip,
5519 "NODE_SIP": self.op.secondary_ip,
5520 "MASTER_CAPABLE": str(self.op.master_capable),
5521 "VM_CAPABLE": str(self.op.vm_capable),
5524 def BuildHooksNodes(self):
5525 """Build hooks nodes.
5528 # Exclude added node
5529 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5530 post_nodes = pre_nodes + [self.op.node_name, ]
5532 return (pre_nodes, post_nodes)
5534 def CheckPrereq(self):
5535 """Check prerequisites.
5538 - the new node is not already in the config
5540 - its parameters (single/dual homed) matches the cluster
5542 Any errors are signaled by raising errors.OpPrereqError.
5546 hostname = self.hostname
5547 node = hostname.name
5548 primary_ip = self.op.primary_ip = hostname.ip
5549 if self.op.secondary_ip is None:
5550 if self.primary_ip_family == netutils.IP6Address.family:
5551 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5552 " IPv4 address must be given as secondary",
5554 self.op.secondary_ip = primary_ip
5556 secondary_ip = self.op.secondary_ip
5557 if not netutils.IP4Address.IsValid(secondary_ip):
5558 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5559 " address" % secondary_ip, errors.ECODE_INVAL)
5561 node_list = cfg.GetNodeList()
5562 if not self.op.readd and node in node_list:
5563 raise errors.OpPrereqError("Node %s is already in the configuration" %
5564 node, errors.ECODE_EXISTS)
5565 elif self.op.readd and node not in node_list:
5566 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5569 self.changed_primary_ip = False
5571 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5572 if self.op.readd and node == existing_node_name:
5573 if existing_node.secondary_ip != secondary_ip:
5574 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5575 " address configuration as before",
5577 if existing_node.primary_ip != primary_ip:
5578 self.changed_primary_ip = True
5582 if (existing_node.primary_ip == primary_ip or
5583 existing_node.secondary_ip == primary_ip or
5584 existing_node.primary_ip == secondary_ip or
5585 existing_node.secondary_ip == secondary_ip):
5586 raise errors.OpPrereqError("New node ip address(es) conflict with"
5587 " existing node %s" % existing_node.name,
5588 errors.ECODE_NOTUNIQUE)
5590 # After this 'if' block, None is no longer a valid value for the
5591 # _capable op attributes
5593 old_node = self.cfg.GetNodeInfo(node)
5594 assert old_node is not None, "Can't retrieve locked node %s" % node
5595 for attr in self._NFLAGS:
5596 if getattr(self.op, attr) is None:
5597 setattr(self.op, attr, getattr(old_node, attr))
5599 for attr in self._NFLAGS:
5600 if getattr(self.op, attr) is None:
5601 setattr(self.op, attr, True)
5603 if self.op.readd and not self.op.vm_capable:
5604 pri, sec = cfg.GetNodeInstances(node)
5606 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5607 " flag set to false, but it already holds"
5608 " instances" % node,
5611 # check that the type of the node (single versus dual homed) is the
5612 # same as for the master
5613 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5614 master_singlehomed = myself.secondary_ip == myself.primary_ip
5615 newbie_singlehomed = secondary_ip == primary_ip
5616 if master_singlehomed != newbie_singlehomed:
5617 if master_singlehomed:
5618 raise errors.OpPrereqError("The master has no secondary ip but the"
5619 " new node has one",
5622 raise errors.OpPrereqError("The master has a secondary ip but the"
5623 " new node doesn't have one",
5626 # checks reachability
5627 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5628 raise errors.OpPrereqError("Node not reachable by ping",
5629 errors.ECODE_ENVIRON)
5631 if not newbie_singlehomed:
5632 # check reachability from my secondary ip to newbie's secondary ip
5633 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5634 source=myself.secondary_ip):
5635 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5636 " based ping to node daemon port",
5637 errors.ECODE_ENVIRON)
5644 if self.op.master_capable:
5645 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5647 self.master_candidate = False
5650 self.new_node = old_node
5652 node_group = cfg.LookupNodeGroup(self.op.group)
5653 self.new_node = objects.Node(name=node,
5654 primary_ip=primary_ip,
5655 secondary_ip=secondary_ip,
5656 master_candidate=self.master_candidate,
5657 offline=False, drained=False,
5660 if self.op.ndparams:
5661 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5663 if self.op.hv_state:
5664 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5666 if self.op.disk_state:
5667 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5669 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5670 # it a property on the base class.
5671 result = rpc.DnsOnlyRunner().call_version([node])[node]
5672 result.Raise("Can't get version information from node %s" % node)
5673 if constants.PROTOCOL_VERSION == result.payload:
5674 logging.info("Communication to node %s fine, sw version %s match",
5675 node, result.payload)
5677 raise errors.OpPrereqError("Version mismatch master version %s,"
5678 " node version %s" %
5679 (constants.PROTOCOL_VERSION, result.payload),
5680 errors.ECODE_ENVIRON)
5682 def Exec(self, feedback_fn):
5683 """Adds the new node to the cluster.
5686 new_node = self.new_node
5687 node = new_node.name
5689 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5692 # We adding a new node so we assume it's powered
5693 new_node.powered = True
5695 # for re-adds, reset the offline/drained/master-candidate flags;
5696 # we need to reset here, otherwise offline would prevent RPC calls
5697 # later in the procedure; this also means that if the re-add
5698 # fails, we are left with a non-offlined, broken node
5700 new_node.drained = new_node.offline = False # pylint: disable=W0201
5701 self.LogInfo("Readding a node, the offline/drained flags were reset")
5702 # if we demote the node, we do cleanup later in the procedure
5703 new_node.master_candidate = self.master_candidate
5704 if self.changed_primary_ip:
5705 new_node.primary_ip = self.op.primary_ip
5707 # copy the master/vm_capable flags
5708 for attr in self._NFLAGS:
5709 setattr(new_node, attr, getattr(self.op, attr))
5711 # notify the user about any possible mc promotion
5712 if new_node.master_candidate:
5713 self.LogInfo("Node will be a master candidate")
5715 if self.op.ndparams:
5716 new_node.ndparams = self.op.ndparams
5718 new_node.ndparams = {}
5720 if self.op.hv_state:
5721 new_node.hv_state_static = self.new_hv_state
5723 if self.op.disk_state:
5724 new_node.disk_state_static = self.new_disk_state
5726 # Add node to our /etc/hosts, and add key to known_hosts
5727 if self.cfg.GetClusterInfo().modify_etc_hosts:
5728 master_node = self.cfg.GetMasterNode()
5729 result = self.rpc.call_etc_hosts_modify(master_node,
5730 constants.ETC_HOSTS_ADD,
5733 result.Raise("Can't update hosts file with new host data")
5735 if new_node.secondary_ip != new_node.primary_ip:
5736 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5739 node_verify_list = [self.cfg.GetMasterNode()]
5740 node_verify_param = {
5741 constants.NV_NODELIST: ([node], {}),
5742 # TODO: do a node-net-test as well?
5745 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5746 self.cfg.GetClusterName())
5747 for verifier in node_verify_list:
5748 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5749 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5751 for failed in nl_payload:
5752 feedback_fn("ssh/hostname verification failed"
5753 " (checking from %s): %s" %
5754 (verifier, nl_payload[failed]))
5755 raise errors.OpExecError("ssh/hostname verification failed")
5758 _RedistributeAncillaryFiles(self)
5759 self.context.ReaddNode(new_node)
5760 # make sure we redistribute the config
5761 self.cfg.Update(new_node, feedback_fn)
5762 # and make sure the new node will not have old files around
5763 if not new_node.master_candidate:
5764 result = self.rpc.call_node_demote_from_mc(new_node.name)
5765 msg = result.fail_msg
5767 self.LogWarning("Node failed to demote itself from master"
5768 " candidate status: %s" % msg)
5770 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5771 additional_vm=self.op.vm_capable)
5772 self.context.AddNode(new_node, self.proc.GetECId())
5775 class LUNodeSetParams(LogicalUnit):
5776 """Modifies the parameters of a node.
5778 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5779 to the node role (as _ROLE_*)
5780 @cvar _R2F: a dictionary from node role to tuples of flags
5781 @cvar _FLAGS: a list of attribute names corresponding to the flags
5784 HPATH = "node-modify"
5785 HTYPE = constants.HTYPE_NODE
5787 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5789 (True, False, False): _ROLE_CANDIDATE,
5790 (False, True, False): _ROLE_DRAINED,
5791 (False, False, True): _ROLE_OFFLINE,
5792 (False, False, False): _ROLE_REGULAR,
5794 _R2F = dict((v, k) for k, v in _F2R.items())
5795 _FLAGS = ["master_candidate", "drained", "offline"]
5797 def CheckArguments(self):
5798 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5799 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5800 self.op.master_capable, self.op.vm_capable,
5801 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5803 if all_mods.count(None) == len(all_mods):
5804 raise errors.OpPrereqError("Please pass at least one modification",
5806 if all_mods.count(True) > 1:
5807 raise errors.OpPrereqError("Can't set the node into more than one"
5808 " state at the same time",
5811 # Boolean value that tells us whether we might be demoting from MC
5812 self.might_demote = (self.op.master_candidate == False or
5813 self.op.offline == True or
5814 self.op.drained == True or
5815 self.op.master_capable == False)
5817 if self.op.secondary_ip:
5818 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5819 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5820 " address" % self.op.secondary_ip,
5823 self.lock_all = self.op.auto_promote and self.might_demote
5824 self.lock_instances = self.op.secondary_ip is not None
5826 def _InstanceFilter(self, instance):
5827 """Filter for getting affected instances.
5830 return (instance.disk_template in constants.DTS_INT_MIRROR and
5831 self.op.node_name in instance.all_nodes)
5833 def ExpandNames(self):
5835 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5837 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5839 # Since modifying a node can have severe effects on currently running
5840 # operations the resource lock is at least acquired in shared mode
5841 self.needed_locks[locking.LEVEL_NODE_RES] = \
5842 self.needed_locks[locking.LEVEL_NODE]
5844 # Get node resource and instance locks in shared mode; they are not used
5845 # for anything but read-only access
5846 self.share_locks[locking.LEVEL_NODE_RES] = 1
5847 self.share_locks[locking.LEVEL_INSTANCE] = 1
5849 if self.lock_instances:
5850 self.needed_locks[locking.LEVEL_INSTANCE] = \
5851 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5853 def BuildHooksEnv(self):
5856 This runs on the master node.
5860 "OP_TARGET": self.op.node_name,
5861 "MASTER_CANDIDATE": str(self.op.master_candidate),
5862 "OFFLINE": str(self.op.offline),
5863 "DRAINED": str(self.op.drained),
5864 "MASTER_CAPABLE": str(self.op.master_capable),
5865 "VM_CAPABLE": str(self.op.vm_capable),
5868 def BuildHooksNodes(self):
5869 """Build hooks nodes.
5872 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5875 def CheckPrereq(self):
5876 """Check prerequisites.
5878 This only checks the instance list against the existing names.
5881 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5883 if self.lock_instances:
5884 affected_instances = \
5885 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5887 # Verify instance locks
5888 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5889 wanted_instances = frozenset(affected_instances.keys())
5890 if wanted_instances - owned_instances:
5891 raise errors.OpPrereqError("Instances affected by changing node %s's"
5892 " secondary IP address have changed since"
5893 " locks were acquired, wanted '%s', have"
5894 " '%s'; retry the operation" %
5896 utils.CommaJoin(wanted_instances),
5897 utils.CommaJoin(owned_instances)),
5900 affected_instances = None
5902 if (self.op.master_candidate is not None or
5903 self.op.drained is not None or
5904 self.op.offline is not None):
5905 # we can't change the master's node flags
5906 if self.op.node_name == self.cfg.GetMasterNode():
5907 raise errors.OpPrereqError("The master role can be changed"
5908 " only via master-failover",
5911 if self.op.master_candidate and not node.master_capable:
5912 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5913 " it a master candidate" % node.name,
5916 if self.op.vm_capable == False:
5917 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5919 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5920 " the vm_capable flag" % node.name,
5923 if node.master_candidate and self.might_demote and not self.lock_all:
5924 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5925 # check if after removing the current node, we're missing master
5927 (mc_remaining, mc_should, _) = \
5928 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5929 if mc_remaining < mc_should:
5930 raise errors.OpPrereqError("Not enough master candidates, please"
5931 " pass auto promote option to allow"
5932 " promotion", errors.ECODE_STATE)
5934 self.old_flags = old_flags = (node.master_candidate,
5935 node.drained, node.offline)
5936 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5937 self.old_role = old_role = self._F2R[old_flags]
5939 # Check for ineffective changes
5940 for attr in self._FLAGS:
5941 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5942 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5943 setattr(self.op, attr, None)
5945 # Past this point, any flag change to False means a transition
5946 # away from the respective state, as only real changes are kept
5948 # TODO: We might query the real power state if it supports OOB
5949 if _SupportsOob(self.cfg, node):
5950 if self.op.offline is False and not (node.powered or
5951 self.op.powered == True):
5952 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5953 " offline status can be reset") %
5955 elif self.op.powered is not None:
5956 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5957 " as it does not support out-of-band"
5958 " handling") % self.op.node_name)
5960 # If we're being deofflined/drained, we'll MC ourself if needed
5961 if (self.op.drained == False or self.op.offline == False or
5962 (self.op.master_capable and not node.master_capable)):
5963 if _DecideSelfPromotion(self):
5964 self.op.master_candidate = True
5965 self.LogInfo("Auto-promoting node to master candidate")
5967 # If we're no longer master capable, we'll demote ourselves from MC
5968 if self.op.master_capable == False and node.master_candidate:
5969 self.LogInfo("Demoting from master candidate")
5970 self.op.master_candidate = False
5973 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5974 if self.op.master_candidate:
5975 new_role = self._ROLE_CANDIDATE
5976 elif self.op.drained:
5977 new_role = self._ROLE_DRAINED
5978 elif self.op.offline:
5979 new_role = self._ROLE_OFFLINE
5980 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5981 # False is still in new flags, which means we're un-setting (the
5983 new_role = self._ROLE_REGULAR
5984 else: # no new flags, nothing, keep old role
5987 self.new_role = new_role
5989 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5990 # Trying to transition out of offline status
5991 result = self.rpc.call_version([node.name])[node.name]
5993 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5994 " to report its version: %s" %
5995 (node.name, result.fail_msg),
5998 self.LogWarning("Transitioning node from offline to online state"
5999 " without using re-add. Please make sure the node"
6002 if self.op.secondary_ip:
6003 # Ok even without locking, because this can't be changed by any LU
6004 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6005 master_singlehomed = master.secondary_ip == master.primary_ip
6006 if master_singlehomed and self.op.secondary_ip:
6007 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6008 " homed cluster", errors.ECODE_INVAL)
6010 assert not (frozenset(affected_instances) -
6011 self.owned_locks(locking.LEVEL_INSTANCE))
6014 if affected_instances:
6015 raise errors.OpPrereqError("Cannot change secondary IP address:"
6016 " offline node has instances (%s)"
6017 " configured to use it" %
6018 utils.CommaJoin(affected_instances.keys()))
6020 # On online nodes, check that no instances are running, and that
6021 # the node has the new ip and we can reach it.
6022 for instance in affected_instances.values():
6023 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6024 msg="cannot change secondary ip")
6026 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6027 if master.name != node.name:
6028 # check reachability from master secondary ip to new secondary ip
6029 if not netutils.TcpPing(self.op.secondary_ip,
6030 constants.DEFAULT_NODED_PORT,
6031 source=master.secondary_ip):
6032 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6033 " based ping to node daemon port",
6034 errors.ECODE_ENVIRON)
6036 if self.op.ndparams:
6037 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6038 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6039 self.new_ndparams = new_ndparams
6041 if self.op.hv_state:
6042 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6043 self.node.hv_state_static)
6045 if self.op.disk_state:
6046 self.new_disk_state = \
6047 _MergeAndVerifyDiskState(self.op.disk_state,
6048 self.node.disk_state_static)
6050 def Exec(self, feedback_fn):
6055 old_role = self.old_role
6056 new_role = self.new_role
6060 if self.op.ndparams:
6061 node.ndparams = self.new_ndparams
6063 if self.op.powered is not None:
6064 node.powered = self.op.powered
6066 if self.op.hv_state:
6067 node.hv_state_static = self.new_hv_state
6069 if self.op.disk_state:
6070 node.disk_state_static = self.new_disk_state
6072 for attr in ["master_capable", "vm_capable"]:
6073 val = getattr(self.op, attr)
6075 setattr(node, attr, val)
6076 result.append((attr, str(val)))
6078 if new_role != old_role:
6079 # Tell the node to demote itself, if no longer MC and not offline
6080 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6081 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6083 self.LogWarning("Node failed to demote itself: %s", msg)
6085 new_flags = self._R2F[new_role]
6086 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6088 result.append((desc, str(nf)))
6089 (node.master_candidate, node.drained, node.offline) = new_flags
6091 # we locked all nodes, we adjust the CP before updating this node
6093 _AdjustCandidatePool(self, [node.name])
6095 if self.op.secondary_ip:
6096 node.secondary_ip = self.op.secondary_ip
6097 result.append(("secondary_ip", self.op.secondary_ip))
6099 # this will trigger configuration file update, if needed
6100 self.cfg.Update(node, feedback_fn)
6102 # this will trigger job queue propagation or cleanup if the mc
6104 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6105 self.context.ReaddNode(node)
6110 class LUNodePowercycle(NoHooksLU):
6111 """Powercycles a node.
6116 def CheckArguments(self):
6117 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6118 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6119 raise errors.OpPrereqError("The node is the master and the force"
6120 " parameter was not set",
6123 def ExpandNames(self):
6124 """Locking for PowercycleNode.
6126 This is a last-resort option and shouldn't block on other
6127 jobs. Therefore, we grab no locks.
6130 self.needed_locks = {}
6132 def Exec(self, feedback_fn):
6136 result = self.rpc.call_node_powercycle(self.op.node_name,
6137 self.cfg.GetHypervisorType())
6138 result.Raise("Failed to schedule the reboot")
6139 return result.payload
6142 class LUClusterQuery(NoHooksLU):
6143 """Query cluster configuration.
6148 def ExpandNames(self):
6149 self.needed_locks = {}
6151 def Exec(self, feedback_fn):
6152 """Return cluster config.
6155 cluster = self.cfg.GetClusterInfo()
6158 # Filter just for enabled hypervisors
6159 for os_name, hv_dict in cluster.os_hvp.items():
6160 os_hvp[os_name] = {}
6161 for hv_name, hv_params in hv_dict.items():
6162 if hv_name in cluster.enabled_hypervisors:
6163 os_hvp[os_name][hv_name] = hv_params
6165 # Convert ip_family to ip_version
6166 primary_ip_version = constants.IP4_VERSION
6167 if cluster.primary_ip_family == netutils.IP6Address.family:
6168 primary_ip_version = constants.IP6_VERSION
6171 "software_version": constants.RELEASE_VERSION,
6172 "protocol_version": constants.PROTOCOL_VERSION,
6173 "config_version": constants.CONFIG_VERSION,
6174 "os_api_version": max(constants.OS_API_VERSIONS),
6175 "export_version": constants.EXPORT_VERSION,
6176 "architecture": runtime.GetArchInfo(),
6177 "name": cluster.cluster_name,
6178 "master": cluster.master_node,
6179 "default_hypervisor": cluster.primary_hypervisor,
6180 "enabled_hypervisors": cluster.enabled_hypervisors,
6181 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6182 for hypervisor_name in cluster.enabled_hypervisors]),
6184 "beparams": cluster.beparams,
6185 "osparams": cluster.osparams,
6186 "ipolicy": cluster.ipolicy,
6187 "nicparams": cluster.nicparams,
6188 "ndparams": cluster.ndparams,
6189 "diskparams": cluster.diskparams,
6190 "candidate_pool_size": cluster.candidate_pool_size,
6191 "master_netdev": cluster.master_netdev,
6192 "master_netmask": cluster.master_netmask,
6193 "use_external_mip_script": cluster.use_external_mip_script,
6194 "volume_group_name": cluster.volume_group_name,
6195 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6196 "file_storage_dir": cluster.file_storage_dir,
6197 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6198 "maintain_node_health": cluster.maintain_node_health,
6199 "ctime": cluster.ctime,
6200 "mtime": cluster.mtime,
6201 "uuid": cluster.uuid,
6202 "tags": list(cluster.GetTags()),
6203 "uid_pool": cluster.uid_pool,
6204 "default_iallocator": cluster.default_iallocator,
6205 "reserved_lvs": cluster.reserved_lvs,
6206 "primary_ip_version": primary_ip_version,
6207 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6208 "hidden_os": cluster.hidden_os,
6209 "blacklisted_os": cluster.blacklisted_os,
6215 class LUClusterConfigQuery(NoHooksLU):
6216 """Return configuration values.
6221 def CheckArguments(self):
6222 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6224 def ExpandNames(self):
6225 self.cq.ExpandNames(self)
6227 def DeclareLocks(self, level):
6228 self.cq.DeclareLocks(self, level)
6230 def Exec(self, feedback_fn):
6231 result = self.cq.OldStyleQuery(self)
6233 assert len(result) == 1
6238 class _ClusterQuery(_QueryBase):
6239 FIELDS = query.CLUSTER_FIELDS
6241 #: Do not sort (there is only one item)
6244 def ExpandNames(self, lu):
6245 lu.needed_locks = {}
6247 # The following variables interact with _QueryBase._GetNames
6248 self.wanted = locking.ALL_SET
6249 self.do_locking = self.use_locking
6252 raise errors.OpPrereqError("Can not use locking for cluster queries",
6255 def DeclareLocks(self, lu, level):
6258 def _GetQueryData(self, lu):
6259 """Computes the list of nodes and their attributes.
6262 # Locking is not used
6263 assert not (compat.any(lu.glm.is_owned(level)
6264 for level in locking.LEVELS
6265 if level != locking.LEVEL_CLUSTER) or
6266 self.do_locking or self.use_locking)
6268 if query.CQ_CONFIG in self.requested_data:
6269 cluster = lu.cfg.GetClusterInfo()
6271 cluster = NotImplemented
6273 if query.CQ_QUEUE_DRAINED in self.requested_data:
6274 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6276 drain_flag = NotImplemented
6278 if query.CQ_WATCHER_PAUSE in self.requested_data:
6279 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6281 watcher_pause = NotImplemented
6283 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6286 class LUInstanceActivateDisks(NoHooksLU):
6287 """Bring up an instance's disks.
6292 def ExpandNames(self):
6293 self._ExpandAndLockInstance()
6294 self.needed_locks[locking.LEVEL_NODE] = []
6295 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6297 def DeclareLocks(self, level):
6298 if level == locking.LEVEL_NODE:
6299 self._LockInstancesNodes()
6301 def CheckPrereq(self):
6302 """Check prerequisites.
6304 This checks that the instance is in the cluster.
6307 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6308 assert self.instance is not None, \
6309 "Cannot retrieve locked instance %s" % self.op.instance_name
6310 _CheckNodeOnline(self, self.instance.primary_node)
6312 def Exec(self, feedback_fn):
6313 """Activate the disks.
6316 disks_ok, disks_info = \
6317 _AssembleInstanceDisks(self, self.instance,
6318 ignore_size=self.op.ignore_size)
6320 raise errors.OpExecError("Cannot activate block devices")
6325 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6327 """Prepare the block devices for an instance.
6329 This sets up the block devices on all nodes.
6331 @type lu: L{LogicalUnit}
6332 @param lu: the logical unit on whose behalf we execute
6333 @type instance: L{objects.Instance}
6334 @param instance: the instance for whose disks we assemble
6335 @type disks: list of L{objects.Disk} or None
6336 @param disks: which disks to assemble (or all, if None)
6337 @type ignore_secondaries: boolean
6338 @param ignore_secondaries: if true, errors on secondary nodes
6339 won't result in an error return from the function
6340 @type ignore_size: boolean
6341 @param ignore_size: if true, the current known size of the disk
6342 will not be used during the disk activation, useful for cases
6343 when the size is wrong
6344 @return: False if the operation failed, otherwise a list of
6345 (host, instance_visible_name, node_visible_name)
6346 with the mapping from node devices to instance devices
6351 iname = instance.name
6352 disks = _ExpandCheckDisks(instance, disks)
6354 # With the two passes mechanism we try to reduce the window of
6355 # opportunity for the race condition of switching DRBD to primary
6356 # before handshaking occured, but we do not eliminate it
6358 # The proper fix would be to wait (with some limits) until the
6359 # connection has been made and drbd transitions from WFConnection
6360 # into any other network-connected state (Connected, SyncTarget,
6363 # 1st pass, assemble on all nodes in secondary mode
6364 for idx, inst_disk in enumerate(disks):
6365 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6367 node_disk = node_disk.Copy()
6368 node_disk.UnsetSize()
6369 lu.cfg.SetDiskID(node_disk, node)
6370 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6372 msg = result.fail_msg
6374 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6375 " (is_primary=False, pass=1): %s",
6376 inst_disk.iv_name, node, msg)
6377 if not ignore_secondaries:
6380 # FIXME: race condition on drbd migration to primary
6382 # 2nd pass, do only the primary node
6383 for idx, inst_disk in enumerate(disks):
6386 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6387 if node != instance.primary_node:
6390 node_disk = node_disk.Copy()
6391 node_disk.UnsetSize()
6392 lu.cfg.SetDiskID(node_disk, node)
6393 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6395 msg = result.fail_msg
6397 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6398 " (is_primary=True, pass=2): %s",
6399 inst_disk.iv_name, node, msg)
6402 dev_path = result.payload
6404 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6406 # leave the disks configured for the primary node
6407 # this is a workaround that would be fixed better by
6408 # improving the logical/physical id handling
6410 lu.cfg.SetDiskID(disk, instance.primary_node)
6412 return disks_ok, device_info
6415 def _StartInstanceDisks(lu, instance, force):
6416 """Start the disks of an instance.
6419 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6420 ignore_secondaries=force)
6422 _ShutdownInstanceDisks(lu, instance)
6423 if force is not None and not force:
6424 lu.proc.LogWarning("", hint="If the message above refers to a"
6426 " you can retry the operation using '--force'.")
6427 raise errors.OpExecError("Disk consistency error")
6430 class LUInstanceDeactivateDisks(NoHooksLU):
6431 """Shutdown an instance's disks.
6436 def ExpandNames(self):
6437 self._ExpandAndLockInstance()
6438 self.needed_locks[locking.LEVEL_NODE] = []
6439 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6441 def DeclareLocks(self, level):
6442 if level == locking.LEVEL_NODE:
6443 self._LockInstancesNodes()
6445 def CheckPrereq(self):
6446 """Check prerequisites.
6448 This checks that the instance is in the cluster.
6451 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6452 assert self.instance is not None, \
6453 "Cannot retrieve locked instance %s" % self.op.instance_name
6455 def Exec(self, feedback_fn):
6456 """Deactivate the disks
6459 instance = self.instance
6461 _ShutdownInstanceDisks(self, instance)
6463 _SafeShutdownInstanceDisks(self, instance)
6466 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6467 """Shutdown block devices of an instance.
6469 This function checks if an instance is running, before calling
6470 _ShutdownInstanceDisks.
6473 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6474 _ShutdownInstanceDisks(lu, instance, disks=disks)
6477 def _ExpandCheckDisks(instance, disks):
6478 """Return the instance disks selected by the disks list
6480 @type disks: list of L{objects.Disk} or None
6481 @param disks: selected disks
6482 @rtype: list of L{objects.Disk}
6483 @return: selected instance disks to act on
6487 return instance.disks
6489 if not set(disks).issubset(instance.disks):
6490 raise errors.ProgrammerError("Can only act on disks belonging to the"
6495 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6496 """Shutdown block devices of an instance.
6498 This does the shutdown on all nodes of the instance.
6500 If the ignore_primary is false, errors on the primary node are
6505 disks = _ExpandCheckDisks(instance, disks)
6508 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6509 lu.cfg.SetDiskID(top_disk, node)
6510 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6511 msg = result.fail_msg
6513 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6514 disk.iv_name, node, msg)
6515 if ((node == instance.primary_node and not ignore_primary) or
6516 (node != instance.primary_node and not result.offline)):
6521 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6522 """Checks if a node has enough free memory.
6524 This function check if a given node has the needed amount of free
6525 memory. In case the node has less memory or we cannot get the
6526 information from the node, this function raise an OpPrereqError
6529 @type lu: C{LogicalUnit}
6530 @param lu: a logical unit from which we get configuration data
6532 @param node: the node to check
6533 @type reason: C{str}
6534 @param reason: string to use in the error message
6535 @type requested: C{int}
6536 @param requested: the amount of memory in MiB to check for
6537 @type hypervisor_name: C{str}
6538 @param hypervisor_name: the hypervisor to ask for memory stats
6540 @return: node current free memory
6541 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6542 we cannot check the node
6545 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6546 nodeinfo[node].Raise("Can't get data from node %s" % node,
6547 prereq=True, ecode=errors.ECODE_ENVIRON)
6548 (_, _, (hv_info, )) = nodeinfo[node].payload
6550 free_mem = hv_info.get("memory_free", None)
6551 if not isinstance(free_mem, int):
6552 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6553 " was '%s'" % (node, free_mem),
6554 errors.ECODE_ENVIRON)
6555 if requested > free_mem:
6556 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6557 " needed %s MiB, available %s MiB" %
6558 (node, reason, requested, free_mem),
6563 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6564 """Checks if nodes have enough free disk space in the all VGs.
6566 This function check if all given nodes have the needed amount of
6567 free disk. In case any node has less disk or we cannot get the
6568 information from the node, this function raise an OpPrereqError
6571 @type lu: C{LogicalUnit}
6572 @param lu: a logical unit from which we get configuration data
6573 @type nodenames: C{list}
6574 @param nodenames: the list of node names to check
6575 @type req_sizes: C{dict}
6576 @param req_sizes: the hash of vg and corresponding amount of disk in
6578 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6579 or we cannot check the node
6582 for vg, req_size in req_sizes.items():
6583 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6586 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6587 """Checks if nodes have enough free disk space in the specified VG.
6589 This function check if all given nodes have the needed amount of
6590 free disk. In case any node has less disk or we cannot get the
6591 information from the node, this function raise an OpPrereqError
6594 @type lu: C{LogicalUnit}
6595 @param lu: a logical unit from which we get configuration data
6596 @type nodenames: C{list}
6597 @param nodenames: the list of node names to check
6599 @param vg: the volume group to check
6600 @type requested: C{int}
6601 @param requested: the amount of disk in MiB to check for
6602 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6603 or we cannot check the node
6606 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6607 for node in nodenames:
6608 info = nodeinfo[node]
6609 info.Raise("Cannot get current information from node %s" % node,
6610 prereq=True, ecode=errors.ECODE_ENVIRON)
6611 (_, (vg_info, ), _) = info.payload
6612 vg_free = vg_info.get("vg_free", None)
6613 if not isinstance(vg_free, int):
6614 raise errors.OpPrereqError("Can't compute free disk space on node"
6615 " %s for vg %s, result was '%s'" %
6616 (node, vg, vg_free), errors.ECODE_ENVIRON)
6617 if requested > vg_free:
6618 raise errors.OpPrereqError("Not enough disk space on target node %s"
6619 " vg %s: required %d MiB, available %d MiB" %
6620 (node, vg, requested, vg_free),
6624 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6625 """Checks if nodes have enough physical CPUs
6627 This function checks if all given nodes have the needed number of
6628 physical CPUs. In case any node has less CPUs or we cannot get the
6629 information from the node, this function raises an OpPrereqError
6632 @type lu: C{LogicalUnit}
6633 @param lu: a logical unit from which we get configuration data
6634 @type nodenames: C{list}
6635 @param nodenames: the list of node names to check
6636 @type requested: C{int}
6637 @param requested: the minimum acceptable number of physical CPUs
6638 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6639 or we cannot check the node
6642 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6643 for node in nodenames:
6644 info = nodeinfo[node]
6645 info.Raise("Cannot get current information from node %s" % node,
6646 prereq=True, ecode=errors.ECODE_ENVIRON)
6647 (_, _, (hv_info, )) = info.payload
6648 num_cpus = hv_info.get("cpu_total", None)
6649 if not isinstance(num_cpus, int):
6650 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6651 " on node %s, result was '%s'" %
6652 (node, num_cpus), errors.ECODE_ENVIRON)
6653 if requested > num_cpus:
6654 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6655 "required" % (node, num_cpus, requested),
6659 class LUInstanceStartup(LogicalUnit):
6660 """Starts an instance.
6663 HPATH = "instance-start"
6664 HTYPE = constants.HTYPE_INSTANCE
6667 def CheckArguments(self):
6669 if self.op.beparams:
6670 # fill the beparams dict
6671 objects.UpgradeBeParams(self.op.beparams)
6672 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6674 def ExpandNames(self):
6675 self._ExpandAndLockInstance()
6676 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6678 def DeclareLocks(self, level):
6679 if level == locking.LEVEL_NODE_RES:
6680 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6682 def BuildHooksEnv(self):
6685 This runs on master, primary and secondary nodes of the instance.
6689 "FORCE": self.op.force,
6692 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6696 def BuildHooksNodes(self):
6697 """Build hooks nodes.
6700 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6703 def CheckPrereq(self):
6704 """Check prerequisites.
6706 This checks that the instance is in the cluster.
6709 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6710 assert self.instance is not None, \
6711 "Cannot retrieve locked instance %s" % self.op.instance_name
6714 if self.op.hvparams:
6715 # check hypervisor parameter syntax (locally)
6716 cluster = self.cfg.GetClusterInfo()
6717 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6718 filled_hvp = cluster.FillHV(instance)
6719 filled_hvp.update(self.op.hvparams)
6720 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6721 hv_type.CheckParameterSyntax(filled_hvp)
6722 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6724 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6726 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6728 if self.primary_offline and self.op.ignore_offline_nodes:
6729 self.proc.LogWarning("Ignoring offline primary node")
6731 if self.op.hvparams or self.op.beparams:
6732 self.proc.LogWarning("Overridden parameters are ignored")
6734 _CheckNodeOnline(self, instance.primary_node)
6736 bep = self.cfg.GetClusterInfo().FillBE(instance)
6737 bep.update(self.op.beparams)
6739 # check bridges existence
6740 _CheckInstanceBridgesExist(self, instance)
6742 remote_info = self.rpc.call_instance_info(instance.primary_node,
6744 instance.hypervisor)
6745 remote_info.Raise("Error checking node %s" % instance.primary_node,
6746 prereq=True, ecode=errors.ECODE_ENVIRON)
6747 if not remote_info.payload: # not running already
6748 _CheckNodeFreeMemory(self, instance.primary_node,
6749 "starting instance %s" % instance.name,
6750 bep[constants.BE_MINMEM], instance.hypervisor)
6752 def Exec(self, feedback_fn):
6753 """Start the instance.
6756 instance = self.instance
6757 force = self.op.force
6759 if not self.op.no_remember:
6760 self.cfg.MarkInstanceUp(instance.name)
6762 if self.primary_offline:
6763 assert self.op.ignore_offline_nodes
6764 self.proc.LogInfo("Primary node offline, marked instance as started")
6766 node_current = instance.primary_node
6768 _StartInstanceDisks(self, instance, force)
6771 self.rpc.call_instance_start(node_current,
6772 (instance, self.op.hvparams,
6774 self.op.startup_paused)
6775 msg = result.fail_msg
6777 _ShutdownInstanceDisks(self, instance)
6778 raise errors.OpExecError("Could not start instance: %s" % msg)
6781 class LUInstanceReboot(LogicalUnit):
6782 """Reboot an instance.
6785 HPATH = "instance-reboot"
6786 HTYPE = constants.HTYPE_INSTANCE
6789 def ExpandNames(self):
6790 self._ExpandAndLockInstance()
6792 def BuildHooksEnv(self):
6795 This runs on master, primary and secondary nodes of the instance.
6799 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6800 "REBOOT_TYPE": self.op.reboot_type,
6801 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6804 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6808 def BuildHooksNodes(self):
6809 """Build hooks nodes.
6812 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6815 def CheckPrereq(self):
6816 """Check prerequisites.
6818 This checks that the instance is in the cluster.
6821 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6822 assert self.instance is not None, \
6823 "Cannot retrieve locked instance %s" % self.op.instance_name
6824 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6825 _CheckNodeOnline(self, instance.primary_node)
6827 # check bridges existence
6828 _CheckInstanceBridgesExist(self, instance)
6830 def Exec(self, feedback_fn):
6831 """Reboot the instance.
6834 instance = self.instance
6835 ignore_secondaries = self.op.ignore_secondaries
6836 reboot_type = self.op.reboot_type
6838 remote_info = self.rpc.call_instance_info(instance.primary_node,
6840 instance.hypervisor)
6841 remote_info.Raise("Error checking node %s" % instance.primary_node)
6842 instance_running = bool(remote_info.payload)
6844 node_current = instance.primary_node
6846 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6847 constants.INSTANCE_REBOOT_HARD]:
6848 for disk in instance.disks:
6849 self.cfg.SetDiskID(disk, node_current)
6850 result = self.rpc.call_instance_reboot(node_current, instance,
6852 self.op.shutdown_timeout)
6853 result.Raise("Could not reboot instance")
6855 if instance_running:
6856 result = self.rpc.call_instance_shutdown(node_current, instance,
6857 self.op.shutdown_timeout)
6858 result.Raise("Could not shutdown instance for full reboot")
6859 _ShutdownInstanceDisks(self, instance)
6861 self.LogInfo("Instance %s was already stopped, starting now",
6863 _StartInstanceDisks(self, instance, ignore_secondaries)
6864 result = self.rpc.call_instance_start(node_current,
6865 (instance, None, None), False)
6866 msg = result.fail_msg
6868 _ShutdownInstanceDisks(self, instance)
6869 raise errors.OpExecError("Could not start instance for"
6870 " full reboot: %s" % msg)
6872 self.cfg.MarkInstanceUp(instance.name)
6875 class LUInstanceShutdown(LogicalUnit):
6876 """Shutdown an instance.
6879 HPATH = "instance-stop"
6880 HTYPE = constants.HTYPE_INSTANCE
6883 def ExpandNames(self):
6884 self._ExpandAndLockInstance()
6886 def BuildHooksEnv(self):
6889 This runs on master, primary and secondary nodes of the instance.
6892 env = _BuildInstanceHookEnvByObject(self, self.instance)
6893 env["TIMEOUT"] = self.op.timeout
6896 def BuildHooksNodes(self):
6897 """Build hooks nodes.
6900 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6903 def CheckPrereq(self):
6904 """Check prerequisites.
6906 This checks that the instance is in the cluster.
6909 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6910 assert self.instance is not None, \
6911 "Cannot retrieve locked instance %s" % self.op.instance_name
6913 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6915 self.primary_offline = \
6916 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6918 if self.primary_offline and self.op.ignore_offline_nodes:
6919 self.proc.LogWarning("Ignoring offline primary node")
6921 _CheckNodeOnline(self, self.instance.primary_node)
6923 def Exec(self, feedback_fn):
6924 """Shutdown the instance.
6927 instance = self.instance
6928 node_current = instance.primary_node
6929 timeout = self.op.timeout
6931 if not self.op.no_remember:
6932 self.cfg.MarkInstanceDown(instance.name)
6934 if self.primary_offline:
6935 assert self.op.ignore_offline_nodes
6936 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6938 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6939 msg = result.fail_msg
6941 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6943 _ShutdownInstanceDisks(self, instance)
6946 class LUInstanceReinstall(LogicalUnit):
6947 """Reinstall an instance.
6950 HPATH = "instance-reinstall"
6951 HTYPE = constants.HTYPE_INSTANCE
6954 def ExpandNames(self):
6955 self._ExpandAndLockInstance()
6957 def BuildHooksEnv(self):
6960 This runs on master, primary and secondary nodes of the instance.
6963 return _BuildInstanceHookEnvByObject(self, self.instance)
6965 def BuildHooksNodes(self):
6966 """Build hooks nodes.
6969 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6972 def CheckPrereq(self):
6973 """Check prerequisites.
6975 This checks that the instance is in the cluster and is not running.
6978 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6979 assert instance is not None, \
6980 "Cannot retrieve locked instance %s" % self.op.instance_name
6981 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6982 " offline, cannot reinstall")
6983 for node in instance.secondary_nodes:
6984 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6985 " cannot reinstall")
6987 if instance.disk_template == constants.DT_DISKLESS:
6988 raise errors.OpPrereqError("Instance '%s' has no disks" %
6989 self.op.instance_name,
6991 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6993 if self.op.os_type is not None:
6995 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6996 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6997 instance_os = self.op.os_type
6999 instance_os = instance.os
7001 nodelist = list(instance.all_nodes)
7003 if self.op.osparams:
7004 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7005 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7006 self.os_inst = i_osdict # the new dict (without defaults)
7010 self.instance = instance
7012 def Exec(self, feedback_fn):
7013 """Reinstall the instance.
7016 inst = self.instance
7018 if self.op.os_type is not None:
7019 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7020 inst.os = self.op.os_type
7021 # Write to configuration
7022 self.cfg.Update(inst, feedback_fn)
7024 _StartInstanceDisks(self, inst, None)
7026 feedback_fn("Running the instance OS create scripts...")
7027 # FIXME: pass debug option from opcode to backend
7028 result = self.rpc.call_instance_os_add(inst.primary_node,
7029 (inst, self.os_inst), True,
7030 self.op.debug_level)
7031 result.Raise("Could not install OS for instance %s on node %s" %
7032 (inst.name, inst.primary_node))
7034 _ShutdownInstanceDisks(self, inst)
7037 class LUInstanceRecreateDisks(LogicalUnit):
7038 """Recreate an instance's missing disks.
7041 HPATH = "instance-recreate-disks"
7042 HTYPE = constants.HTYPE_INSTANCE
7045 _MODIFYABLE = frozenset([
7046 constants.IDISK_SIZE,
7047 constants.IDISK_MODE,
7050 # New or changed disk parameters may have different semantics
7051 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7052 constants.IDISK_ADOPT,
7054 # TODO: Implement support changing VG while recreating
7056 constants.IDISK_METAVG,
7059 def CheckArguments(self):
7060 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7061 # Normalize and convert deprecated list of disk indices
7062 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7064 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7066 raise errors.OpPrereqError("Some disks have been specified more than"
7067 " once: %s" % utils.CommaJoin(duplicates),
7070 for (idx, params) in self.op.disks:
7071 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7072 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7074 raise errors.OpPrereqError("Parameters for disk %s try to change"
7075 " unmodifyable parameter(s): %s" %
7076 (idx, utils.CommaJoin(unsupported)),
7079 def ExpandNames(self):
7080 self._ExpandAndLockInstance()
7081 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7083 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7084 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7086 self.needed_locks[locking.LEVEL_NODE] = []
7087 self.needed_locks[locking.LEVEL_NODE_RES] = []
7089 def DeclareLocks(self, level):
7090 if level == locking.LEVEL_NODE:
7091 # if we replace the nodes, we only need to lock the old primary,
7092 # otherwise we need to lock all nodes for disk re-creation
7093 primary_only = bool(self.op.nodes)
7094 self._LockInstancesNodes(primary_only=primary_only)
7095 elif level == locking.LEVEL_NODE_RES:
7097 self.needed_locks[locking.LEVEL_NODE_RES] = \
7098 self.needed_locks[locking.LEVEL_NODE][:]
7100 def BuildHooksEnv(self):
7103 This runs on master, primary and secondary nodes of the instance.
7106 return _BuildInstanceHookEnvByObject(self, self.instance)
7108 def BuildHooksNodes(self):
7109 """Build hooks nodes.
7112 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7115 def CheckPrereq(self):
7116 """Check prerequisites.
7118 This checks that the instance is in the cluster and is not running.
7121 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7122 assert instance is not None, \
7123 "Cannot retrieve locked instance %s" % self.op.instance_name
7125 if len(self.op.nodes) != len(instance.all_nodes):
7126 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7127 " %d replacement nodes were specified" %
7128 (instance.name, len(instance.all_nodes),
7129 len(self.op.nodes)),
7131 assert instance.disk_template != constants.DT_DRBD8 or \
7132 len(self.op.nodes) == 2
7133 assert instance.disk_template != constants.DT_PLAIN or \
7134 len(self.op.nodes) == 1
7135 primary_node = self.op.nodes[0]
7137 primary_node = instance.primary_node
7138 _CheckNodeOnline(self, primary_node)
7140 if instance.disk_template == constants.DT_DISKLESS:
7141 raise errors.OpPrereqError("Instance '%s' has no disks" %
7142 self.op.instance_name, errors.ECODE_INVAL)
7144 # if we replace nodes *and* the old primary is offline, we don't
7146 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7147 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7148 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7149 if not (self.op.nodes and old_pnode.offline):
7150 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7151 msg="cannot recreate disks")
7154 self.disks = dict(self.op.disks)
7156 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7158 maxidx = max(self.disks.keys())
7159 if maxidx >= len(instance.disks):
7160 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7163 if (self.op.nodes and
7164 sorted(self.disks.keys()) != range(len(instance.disks))):
7165 raise errors.OpPrereqError("Can't recreate disks partially and"
7166 " change the nodes at the same time",
7169 self.instance = instance
7171 def Exec(self, feedback_fn):
7172 """Recreate the disks.
7175 instance = self.instance
7177 assert (self.owned_locks(locking.LEVEL_NODE) ==
7178 self.owned_locks(locking.LEVEL_NODE_RES))
7181 mods = [] # keeps track of needed changes
7183 for idx, disk in enumerate(instance.disks):
7185 changes = self.disks[idx]
7187 # Disk should not be recreated
7191 # update secondaries for disks, if needed
7192 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7193 # need to update the nodes and minors
7194 assert len(self.op.nodes) == 2
7195 assert len(disk.logical_id) == 6 # otherwise disk internals
7197 (_, _, old_port, _, _, old_secret) = disk.logical_id
7198 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7199 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7200 new_minors[0], new_minors[1], old_secret)
7201 assert len(disk.logical_id) == len(new_id)
7205 mods.append((idx, new_id, changes))
7207 # now that we have passed all asserts above, we can apply the mods
7208 # in a single run (to avoid partial changes)
7209 for idx, new_id, changes in mods:
7210 disk = instance.disks[idx]
7211 if new_id is not None:
7212 assert disk.dev_type == constants.LD_DRBD8
7213 disk.logical_id = new_id
7215 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7216 mode=changes.get(constants.IDISK_MODE, None))
7218 # change primary node, if needed
7220 instance.primary_node = self.op.nodes[0]
7221 self.LogWarning("Changing the instance's nodes, you will have to"
7222 " remove any disks left on the older nodes manually")
7225 self.cfg.Update(instance, feedback_fn)
7227 _CreateDisks(self, instance, to_skip=to_skip)
7230 class LUInstanceRename(LogicalUnit):
7231 """Rename an instance.
7234 HPATH = "instance-rename"
7235 HTYPE = constants.HTYPE_INSTANCE
7237 def CheckArguments(self):
7241 if self.op.ip_check and not self.op.name_check:
7242 # TODO: make the ip check more flexible and not depend on the name check
7243 raise errors.OpPrereqError("IP address check requires a name check",
7246 def BuildHooksEnv(self):
7249 This runs on master, primary and secondary nodes of the instance.
7252 env = _BuildInstanceHookEnvByObject(self, self.instance)
7253 env["INSTANCE_NEW_NAME"] = self.op.new_name
7256 def BuildHooksNodes(self):
7257 """Build hooks nodes.
7260 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7263 def CheckPrereq(self):
7264 """Check prerequisites.
7266 This checks that the instance is in the cluster and is not running.
7269 self.op.instance_name = _ExpandInstanceName(self.cfg,
7270 self.op.instance_name)
7271 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7272 assert instance is not None
7273 _CheckNodeOnline(self, instance.primary_node)
7274 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7275 msg="cannot rename")
7276 self.instance = instance
7278 new_name = self.op.new_name
7279 if self.op.name_check:
7280 hostname = netutils.GetHostname(name=new_name)
7281 if hostname.name != new_name:
7282 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7284 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7285 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7286 " same as given hostname '%s'") %
7287 (hostname.name, self.op.new_name),
7289 new_name = self.op.new_name = hostname.name
7290 if (self.op.ip_check and
7291 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7292 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7293 (hostname.ip, new_name),
7294 errors.ECODE_NOTUNIQUE)
7296 instance_list = self.cfg.GetInstanceList()
7297 if new_name in instance_list and new_name != instance.name:
7298 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7299 new_name, errors.ECODE_EXISTS)
7301 def Exec(self, feedback_fn):
7302 """Rename the instance.
7305 inst = self.instance
7306 old_name = inst.name
7308 rename_file_storage = False
7309 if (inst.disk_template in constants.DTS_FILEBASED and
7310 self.op.new_name != inst.name):
7311 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7312 rename_file_storage = True
7314 self.cfg.RenameInstance(inst.name, self.op.new_name)
7315 # Change the instance lock. This is definitely safe while we hold the BGL.
7316 # Otherwise the new lock would have to be added in acquired mode.
7318 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7319 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7321 # re-read the instance from the configuration after rename
7322 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7324 if rename_file_storage:
7325 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7326 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7327 old_file_storage_dir,
7328 new_file_storage_dir)
7329 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7330 " (but the instance has been renamed in Ganeti)" %
7331 (inst.primary_node, old_file_storage_dir,
7332 new_file_storage_dir))
7334 _StartInstanceDisks(self, inst, None)
7336 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7337 old_name, self.op.debug_level)
7338 msg = result.fail_msg
7340 msg = ("Could not run OS rename script for instance %s on node %s"
7341 " (but the instance has been renamed in Ganeti): %s" %
7342 (inst.name, inst.primary_node, msg))
7343 self.proc.LogWarning(msg)
7345 _ShutdownInstanceDisks(self, inst)
7350 class LUInstanceRemove(LogicalUnit):
7351 """Remove an instance.
7354 HPATH = "instance-remove"
7355 HTYPE = constants.HTYPE_INSTANCE
7358 def ExpandNames(self):
7359 self._ExpandAndLockInstance()
7360 self.needed_locks[locking.LEVEL_NODE] = []
7361 self.needed_locks[locking.LEVEL_NODE_RES] = []
7362 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7364 def DeclareLocks(self, level):
7365 if level == locking.LEVEL_NODE:
7366 self._LockInstancesNodes()
7367 elif level == locking.LEVEL_NODE_RES:
7369 self.needed_locks[locking.LEVEL_NODE_RES] = \
7370 self.needed_locks[locking.LEVEL_NODE][:]
7372 def BuildHooksEnv(self):
7375 This runs on master, primary and secondary nodes of the instance.
7378 env = _BuildInstanceHookEnvByObject(self, self.instance)
7379 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7382 def BuildHooksNodes(self):
7383 """Build hooks nodes.
7386 nl = [self.cfg.GetMasterNode()]
7387 nl_post = list(self.instance.all_nodes) + nl
7388 return (nl, nl_post)
7390 def CheckPrereq(self):
7391 """Check prerequisites.
7393 This checks that the instance is in the cluster.
7396 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7397 assert self.instance is not None, \
7398 "Cannot retrieve locked instance %s" % self.op.instance_name
7400 def Exec(self, feedback_fn):
7401 """Remove the instance.
7404 instance = self.instance
7405 logging.info("Shutting down instance %s on node %s",
7406 instance.name, instance.primary_node)
7408 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7409 self.op.shutdown_timeout)
7410 msg = result.fail_msg
7412 if self.op.ignore_failures:
7413 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7415 raise errors.OpExecError("Could not shutdown instance %s on"
7417 (instance.name, instance.primary_node, msg))
7419 assert (self.owned_locks(locking.LEVEL_NODE) ==
7420 self.owned_locks(locking.LEVEL_NODE_RES))
7421 assert not (set(instance.all_nodes) -
7422 self.owned_locks(locking.LEVEL_NODE)), \
7423 "Not owning correct locks"
7425 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7428 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7429 """Utility function to remove an instance.
7432 logging.info("Removing block devices for instance %s", instance.name)
7434 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7435 if not ignore_failures:
7436 raise errors.OpExecError("Can't remove instance's disks")
7437 feedback_fn("Warning: can't remove instance's disks")
7439 logging.info("Removing instance %s out of cluster config", instance.name)
7441 lu.cfg.RemoveInstance(instance.name)
7443 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7444 "Instance lock removal conflict"
7446 # Remove lock for the instance
7447 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7450 class LUInstanceQuery(NoHooksLU):
7451 """Logical unit for querying instances.
7454 # pylint: disable=W0142
7457 def CheckArguments(self):
7458 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7459 self.op.output_fields, self.op.use_locking)
7461 def ExpandNames(self):
7462 self.iq.ExpandNames(self)
7464 def DeclareLocks(self, level):
7465 self.iq.DeclareLocks(self, level)
7467 def Exec(self, feedback_fn):
7468 return self.iq.OldStyleQuery(self)
7471 class LUInstanceFailover(LogicalUnit):
7472 """Failover an instance.
7475 HPATH = "instance-failover"
7476 HTYPE = constants.HTYPE_INSTANCE
7479 def CheckArguments(self):
7480 """Check the arguments.
7483 self.iallocator = getattr(self.op, "iallocator", None)
7484 self.target_node = getattr(self.op, "target_node", None)
7486 def ExpandNames(self):
7487 self._ExpandAndLockInstance()
7489 if self.op.target_node is not None:
7490 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7492 self.needed_locks[locking.LEVEL_NODE] = []
7493 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7495 self.needed_locks[locking.LEVEL_NODE_RES] = []
7496 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7498 ignore_consistency = self.op.ignore_consistency
7499 shutdown_timeout = self.op.shutdown_timeout
7500 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7503 ignore_consistency=ignore_consistency,
7504 shutdown_timeout=shutdown_timeout,
7505 ignore_ipolicy=self.op.ignore_ipolicy)
7506 self.tasklets = [self._migrater]
7508 def DeclareLocks(self, level):
7509 if level == locking.LEVEL_NODE:
7510 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7511 if instance.disk_template in constants.DTS_EXT_MIRROR:
7512 if self.op.target_node is None:
7513 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7515 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7516 self.op.target_node]
7517 del self.recalculate_locks[locking.LEVEL_NODE]
7519 self._LockInstancesNodes()
7520 elif level == locking.LEVEL_NODE_RES:
7522 self.needed_locks[locking.LEVEL_NODE_RES] = \
7523 self.needed_locks[locking.LEVEL_NODE][:]
7525 def BuildHooksEnv(self):
7528 This runs on master, primary and secondary nodes of the instance.
7531 instance = self._migrater.instance
7532 source_node = instance.primary_node
7533 target_node = self.op.target_node
7535 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7536 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7537 "OLD_PRIMARY": source_node,
7538 "NEW_PRIMARY": target_node,
7541 if instance.disk_template in constants.DTS_INT_MIRROR:
7542 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7543 env["NEW_SECONDARY"] = source_node
7545 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7547 env.update(_BuildInstanceHookEnvByObject(self, instance))
7551 def BuildHooksNodes(self):
7552 """Build hooks nodes.
7555 instance = self._migrater.instance
7556 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7557 return (nl, nl + [instance.primary_node])
7560 class LUInstanceMigrate(LogicalUnit):
7561 """Migrate an instance.
7563 This is migration without shutting down, compared to the failover,
7564 which is done with shutdown.
7567 HPATH = "instance-migrate"
7568 HTYPE = constants.HTYPE_INSTANCE
7571 def ExpandNames(self):
7572 self._ExpandAndLockInstance()
7574 if self.op.target_node is not None:
7575 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7577 self.needed_locks[locking.LEVEL_NODE] = []
7578 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7580 self.needed_locks[locking.LEVEL_NODE] = []
7581 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7584 TLMigrateInstance(self, self.op.instance_name,
7585 cleanup=self.op.cleanup,
7587 fallback=self.op.allow_failover,
7588 allow_runtime_changes=self.op.allow_runtime_changes,
7589 ignore_ipolicy=self.op.ignore_ipolicy)
7590 self.tasklets = [self._migrater]
7592 def DeclareLocks(self, level):
7593 if level == locking.LEVEL_NODE:
7594 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7595 if instance.disk_template in constants.DTS_EXT_MIRROR:
7596 if self.op.target_node is None:
7597 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7599 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7600 self.op.target_node]
7601 del self.recalculate_locks[locking.LEVEL_NODE]
7603 self._LockInstancesNodes()
7604 elif level == locking.LEVEL_NODE_RES:
7606 self.needed_locks[locking.LEVEL_NODE_RES] = \
7607 self.needed_locks[locking.LEVEL_NODE][:]
7609 def BuildHooksEnv(self):
7612 This runs on master, primary and secondary nodes of the instance.
7615 instance = self._migrater.instance
7616 source_node = instance.primary_node
7617 target_node = self.op.target_node
7618 env = _BuildInstanceHookEnvByObject(self, instance)
7620 "MIGRATE_LIVE": self._migrater.live,
7621 "MIGRATE_CLEANUP": self.op.cleanup,
7622 "OLD_PRIMARY": source_node,
7623 "NEW_PRIMARY": target_node,
7624 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7627 if instance.disk_template in constants.DTS_INT_MIRROR:
7628 env["OLD_SECONDARY"] = target_node
7629 env["NEW_SECONDARY"] = source_node
7631 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7635 def BuildHooksNodes(self):
7636 """Build hooks nodes.
7639 instance = self._migrater.instance
7640 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7641 return (nl, nl + [instance.primary_node])
7644 class LUInstanceMove(LogicalUnit):
7645 """Move an instance by data-copying.
7648 HPATH = "instance-move"
7649 HTYPE = constants.HTYPE_INSTANCE
7652 def ExpandNames(self):
7653 self._ExpandAndLockInstance()
7654 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7655 self.op.target_node = target_node
7656 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7657 self.needed_locks[locking.LEVEL_NODE_RES] = []
7658 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7660 def DeclareLocks(self, level):
7661 if level == locking.LEVEL_NODE:
7662 self._LockInstancesNodes(primary_only=True)
7663 elif level == locking.LEVEL_NODE_RES:
7665 self.needed_locks[locking.LEVEL_NODE_RES] = \
7666 self.needed_locks[locking.LEVEL_NODE][:]
7668 def BuildHooksEnv(self):
7671 This runs on master, primary and secondary nodes of the instance.
7675 "TARGET_NODE": self.op.target_node,
7676 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7678 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7681 def BuildHooksNodes(self):
7682 """Build hooks nodes.
7686 self.cfg.GetMasterNode(),
7687 self.instance.primary_node,
7688 self.op.target_node,
7692 def CheckPrereq(self):
7693 """Check prerequisites.
7695 This checks that the instance is in the cluster.
7698 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7699 assert self.instance is not None, \
7700 "Cannot retrieve locked instance %s" % self.op.instance_name
7702 node = self.cfg.GetNodeInfo(self.op.target_node)
7703 assert node is not None, \
7704 "Cannot retrieve locked node %s" % self.op.target_node
7706 self.target_node = target_node = node.name
7708 if target_node == instance.primary_node:
7709 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7710 (instance.name, target_node),
7713 bep = self.cfg.GetClusterInfo().FillBE(instance)
7715 for idx, dsk in enumerate(instance.disks):
7716 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7717 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7718 " cannot copy" % idx, errors.ECODE_STATE)
7720 _CheckNodeOnline(self, target_node)
7721 _CheckNodeNotDrained(self, target_node)
7722 _CheckNodeVmCapable(self, target_node)
7723 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7724 self.cfg.GetNodeGroup(node.group))
7725 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7726 ignore=self.op.ignore_ipolicy)
7728 if instance.admin_state == constants.ADMINST_UP:
7729 # check memory requirements on the secondary node
7730 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7731 instance.name, bep[constants.BE_MAXMEM],
7732 instance.hypervisor)
7734 self.LogInfo("Not checking memory on the secondary node as"
7735 " instance will not be started")
7737 # check bridge existance
7738 _CheckInstanceBridgesExist(self, instance, node=target_node)
7740 def Exec(self, feedback_fn):
7741 """Move an instance.
7743 The move is done by shutting it down on its present node, copying
7744 the data over (slow) and starting it on the new node.
7747 instance = self.instance
7749 source_node = instance.primary_node
7750 target_node = self.target_node
7752 self.LogInfo("Shutting down instance %s on source node %s",
7753 instance.name, source_node)
7755 assert (self.owned_locks(locking.LEVEL_NODE) ==
7756 self.owned_locks(locking.LEVEL_NODE_RES))
7758 result = self.rpc.call_instance_shutdown(source_node, instance,
7759 self.op.shutdown_timeout)
7760 msg = result.fail_msg
7762 if self.op.ignore_consistency:
7763 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7764 " Proceeding anyway. Please make sure node"
7765 " %s is down. Error details: %s",
7766 instance.name, source_node, source_node, msg)
7768 raise errors.OpExecError("Could not shutdown instance %s on"
7770 (instance.name, source_node, msg))
7772 # create the target disks
7774 _CreateDisks(self, instance, target_node=target_node)
7775 except errors.OpExecError:
7776 self.LogWarning("Device creation failed, reverting...")
7778 _RemoveDisks(self, instance, target_node=target_node)
7780 self.cfg.ReleaseDRBDMinors(instance.name)
7783 cluster_name = self.cfg.GetClusterInfo().cluster_name
7786 # activate, get path, copy the data over
7787 for idx, disk in enumerate(instance.disks):
7788 self.LogInfo("Copying data for disk %d", idx)
7789 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7790 instance.name, True, idx)
7792 self.LogWarning("Can't assemble newly created disk %d: %s",
7793 idx, result.fail_msg)
7794 errs.append(result.fail_msg)
7796 dev_path = result.payload
7797 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7798 target_node, dev_path,
7801 self.LogWarning("Can't copy data over for disk %d: %s",
7802 idx, result.fail_msg)
7803 errs.append(result.fail_msg)
7807 self.LogWarning("Some disks failed to copy, aborting")
7809 _RemoveDisks(self, instance, target_node=target_node)
7811 self.cfg.ReleaseDRBDMinors(instance.name)
7812 raise errors.OpExecError("Errors during disk copy: %s" %
7815 instance.primary_node = target_node
7816 self.cfg.Update(instance, feedback_fn)
7818 self.LogInfo("Removing the disks on the original node")
7819 _RemoveDisks(self, instance, target_node=source_node)
7821 # Only start the instance if it's marked as up
7822 if instance.admin_state == constants.ADMINST_UP:
7823 self.LogInfo("Starting instance %s on node %s",
7824 instance.name, target_node)
7826 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7827 ignore_secondaries=True)
7829 _ShutdownInstanceDisks(self, instance)
7830 raise errors.OpExecError("Can't activate the instance's disks")
7832 result = self.rpc.call_instance_start(target_node,
7833 (instance, None, None), False)
7834 msg = result.fail_msg
7836 _ShutdownInstanceDisks(self, instance)
7837 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7838 (instance.name, target_node, msg))
7841 class LUNodeMigrate(LogicalUnit):
7842 """Migrate all instances from a node.
7845 HPATH = "node-migrate"
7846 HTYPE = constants.HTYPE_NODE
7849 def CheckArguments(self):
7852 def ExpandNames(self):
7853 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7855 self.share_locks = _ShareAll()
7856 self.needed_locks = {
7857 locking.LEVEL_NODE: [self.op.node_name],
7860 def BuildHooksEnv(self):
7863 This runs on the master, the primary and all the secondaries.
7867 "NODE_NAME": self.op.node_name,
7868 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7871 def BuildHooksNodes(self):
7872 """Build hooks nodes.
7875 nl = [self.cfg.GetMasterNode()]
7878 def CheckPrereq(self):
7881 def Exec(self, feedback_fn):
7882 # Prepare jobs for migration instances
7883 allow_runtime_changes = self.op.allow_runtime_changes
7885 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7888 iallocator=self.op.iallocator,
7889 target_node=self.op.target_node,
7890 allow_runtime_changes=allow_runtime_changes,
7891 ignore_ipolicy=self.op.ignore_ipolicy)]
7892 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7895 # TODO: Run iallocator in this opcode and pass correct placement options to
7896 # OpInstanceMigrate. Since other jobs can modify the cluster between
7897 # running the iallocator and the actual migration, a good consistency model
7898 # will have to be found.
7900 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7901 frozenset([self.op.node_name]))
7903 return ResultWithJobs(jobs)
7906 class TLMigrateInstance(Tasklet):
7907 """Tasklet class for instance migration.
7910 @ivar live: whether the migration will be done live or non-live;
7911 this variable is initalized only after CheckPrereq has run
7912 @type cleanup: boolean
7913 @ivar cleanup: Wheater we cleanup from a failed migration
7914 @type iallocator: string
7915 @ivar iallocator: The iallocator used to determine target_node
7916 @type target_node: string
7917 @ivar target_node: If given, the target_node to reallocate the instance to
7918 @type failover: boolean
7919 @ivar failover: Whether operation results in failover or migration
7920 @type fallback: boolean
7921 @ivar fallback: Whether fallback to failover is allowed if migration not
7923 @type ignore_consistency: boolean
7924 @ivar ignore_consistency: Wheter we should ignore consistency between source
7926 @type shutdown_timeout: int
7927 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7928 @type ignore_ipolicy: bool
7929 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7934 _MIGRATION_POLL_INTERVAL = 1 # seconds
7935 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7937 def __init__(self, lu, instance_name, cleanup=False,
7938 failover=False, fallback=False,
7939 ignore_consistency=False,
7940 allow_runtime_changes=True,
7941 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7942 ignore_ipolicy=False):
7943 """Initializes this class.
7946 Tasklet.__init__(self, lu)
7949 self.instance_name = instance_name
7950 self.cleanup = cleanup
7951 self.live = False # will be overridden later
7952 self.failover = failover
7953 self.fallback = fallback
7954 self.ignore_consistency = ignore_consistency
7955 self.shutdown_timeout = shutdown_timeout
7956 self.ignore_ipolicy = ignore_ipolicy
7957 self.allow_runtime_changes = allow_runtime_changes
7959 def CheckPrereq(self):
7960 """Check prerequisites.
7962 This checks that the instance is in the cluster.
7965 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7966 instance = self.cfg.GetInstanceInfo(instance_name)
7967 assert instance is not None
7968 self.instance = instance
7969 cluster = self.cfg.GetClusterInfo()
7971 if (not self.cleanup and
7972 not instance.admin_state == constants.ADMINST_UP and
7973 not self.failover and self.fallback):
7974 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7975 " switching to failover")
7976 self.failover = True
7978 if instance.disk_template not in constants.DTS_MIRRORED:
7983 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7984 " %s" % (instance.disk_template, text),
7987 if instance.disk_template in constants.DTS_EXT_MIRROR:
7988 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7990 if self.lu.op.iallocator:
7991 self._RunAllocator()
7993 # We set set self.target_node as it is required by
7995 self.target_node = self.lu.op.target_node
7997 # Check that the target node is correct in terms of instance policy
7998 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7999 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8000 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8001 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8002 ignore=self.ignore_ipolicy)
8004 # self.target_node is already populated, either directly or by the
8006 target_node = self.target_node
8007 if self.target_node == instance.primary_node:
8008 raise errors.OpPrereqError("Cannot migrate instance %s"
8009 " to its primary (%s)" %
8010 (instance.name, instance.primary_node))
8012 if len(self.lu.tasklets) == 1:
8013 # It is safe to release locks only when we're the only tasklet
8015 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8016 keep=[instance.primary_node, self.target_node])
8019 secondary_nodes = instance.secondary_nodes
8020 if not secondary_nodes:
8021 raise errors.ConfigurationError("No secondary node but using"
8022 " %s disk template" %
8023 instance.disk_template)
8024 target_node = secondary_nodes[0]
8025 if self.lu.op.iallocator or (self.lu.op.target_node and
8026 self.lu.op.target_node != target_node):
8028 text = "failed over"
8031 raise errors.OpPrereqError("Instances with disk template %s cannot"
8032 " be %s to arbitrary nodes"
8033 " (neither an iallocator nor a target"
8034 " node can be passed)" %
8035 (instance.disk_template, text),
8037 nodeinfo = self.cfg.GetNodeInfo(target_node)
8038 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8039 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8040 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8041 ignore=self.ignore_ipolicy)
8043 i_be = cluster.FillBE(instance)
8045 # check memory requirements on the secondary node
8046 if (not self.cleanup and
8047 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8048 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8049 "migrating instance %s" %
8051 i_be[constants.BE_MINMEM],
8052 instance.hypervisor)
8054 self.lu.LogInfo("Not checking memory on the secondary node as"
8055 " instance will not be started")
8057 # check if failover must be forced instead of migration
8058 if (not self.cleanup and not self.failover and
8059 i_be[constants.BE_ALWAYS_FAILOVER]):
8061 self.lu.LogInfo("Instance configured to always failover; fallback"
8063 self.failover = True
8065 raise errors.OpPrereqError("This instance has been configured to"
8066 " always failover, please allow failover",
8069 # check bridge existance
8070 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8072 if not self.cleanup:
8073 _CheckNodeNotDrained(self.lu, target_node)
8074 if not self.failover:
8075 result = self.rpc.call_instance_migratable(instance.primary_node,
8077 if result.fail_msg and self.fallback:
8078 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8080 self.failover = True
8082 result.Raise("Can't migrate, please use failover",
8083 prereq=True, ecode=errors.ECODE_STATE)
8085 assert not (self.failover and self.cleanup)
8087 if not self.failover:
8088 if self.lu.op.live is not None and self.lu.op.mode is not None:
8089 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8090 " parameters are accepted",
8092 if self.lu.op.live is not None:
8094 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8096 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8097 # reset the 'live' parameter to None so that repeated
8098 # invocations of CheckPrereq do not raise an exception
8099 self.lu.op.live = None
8100 elif self.lu.op.mode is None:
8101 # read the default value from the hypervisor
8102 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8103 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8105 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8107 # Failover is never live
8110 if not (self.failover or self.cleanup):
8111 remote_info = self.rpc.call_instance_info(instance.primary_node,
8113 instance.hypervisor)
8114 remote_info.Raise("Error checking instance on node %s" %
8115 instance.primary_node)
8116 instance_running = bool(remote_info.payload)
8117 if instance_running:
8118 self.current_mem = int(remote_info.payload["memory"])
8120 def _RunAllocator(self):
8121 """Run the allocator based on input opcode.
8124 # FIXME: add a self.ignore_ipolicy option
8125 ial = IAllocator(self.cfg, self.rpc,
8126 mode=constants.IALLOCATOR_MODE_RELOC,
8127 name=self.instance_name,
8128 relocate_from=[self.instance.primary_node],
8131 ial.Run(self.lu.op.iallocator)
8134 raise errors.OpPrereqError("Can't compute nodes using"
8135 " iallocator '%s': %s" %
8136 (self.lu.op.iallocator, ial.info),
8138 if len(ial.result) != ial.required_nodes:
8139 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8140 " of nodes (%s), required %s" %
8141 (self.lu.op.iallocator, len(ial.result),
8142 ial.required_nodes), errors.ECODE_FAULT)
8143 self.target_node = ial.result[0]
8144 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8145 self.instance_name, self.lu.op.iallocator,
8146 utils.CommaJoin(ial.result))
8148 def _WaitUntilSync(self):
8149 """Poll with custom rpc for disk sync.
8151 This uses our own step-based rpc call.
8154 self.feedback_fn("* wait until resync is done")
8158 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8160 (self.instance.disks,
8163 for node, nres in result.items():
8164 nres.Raise("Cannot resync disks on node %s" % node)
8165 node_done, node_percent = nres.payload
8166 all_done = all_done and node_done
8167 if node_percent is not None:
8168 min_percent = min(min_percent, node_percent)
8170 if min_percent < 100:
8171 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8174 def _EnsureSecondary(self, node):
8175 """Demote a node to secondary.
8178 self.feedback_fn("* switching node %s to secondary mode" % node)
8180 for dev in self.instance.disks:
8181 self.cfg.SetDiskID(dev, node)
8183 result = self.rpc.call_blockdev_close(node, self.instance.name,
8184 self.instance.disks)
8185 result.Raise("Cannot change disk to secondary on node %s" % node)
8187 def _GoStandalone(self):
8188 """Disconnect from the network.
8191 self.feedback_fn("* changing into standalone mode")
8192 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8193 self.instance.disks)
8194 for node, nres in result.items():
8195 nres.Raise("Cannot disconnect disks node %s" % node)
8197 def _GoReconnect(self, multimaster):
8198 """Reconnect to the network.
8204 msg = "single-master"
8205 self.feedback_fn("* changing disks into %s mode" % msg)
8206 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8207 (self.instance.disks, self.instance),
8208 self.instance.name, multimaster)
8209 for node, nres in result.items():
8210 nres.Raise("Cannot change disks config on node %s" % node)
8212 def _ExecCleanup(self):
8213 """Try to cleanup after a failed migration.
8215 The cleanup is done by:
8216 - check that the instance is running only on one node
8217 (and update the config if needed)
8218 - change disks on its secondary node to secondary
8219 - wait until disks are fully synchronized
8220 - disconnect from the network
8221 - change disks into single-master mode
8222 - wait again until disks are fully synchronized
8225 instance = self.instance
8226 target_node = self.target_node
8227 source_node = self.source_node
8229 # check running on only one node
8230 self.feedback_fn("* checking where the instance actually runs"
8231 " (if this hangs, the hypervisor might be in"
8233 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8234 for node, result in ins_l.items():
8235 result.Raise("Can't contact node %s" % node)
8237 runningon_source = instance.name in ins_l[source_node].payload
8238 runningon_target = instance.name in ins_l[target_node].payload
8240 if runningon_source and runningon_target:
8241 raise errors.OpExecError("Instance seems to be running on two nodes,"
8242 " or the hypervisor is confused; you will have"
8243 " to ensure manually that it runs only on one"
8244 " and restart this operation")
8246 if not (runningon_source or runningon_target):
8247 raise errors.OpExecError("Instance does not seem to be running at all;"
8248 " in this case it's safer to repair by"
8249 " running 'gnt-instance stop' to ensure disk"
8250 " shutdown, and then restarting it")
8252 if runningon_target:
8253 # the migration has actually succeeded, we need to update the config
8254 self.feedback_fn("* instance running on secondary node (%s),"
8255 " updating config" % target_node)
8256 instance.primary_node = target_node
8257 self.cfg.Update(instance, self.feedback_fn)
8258 demoted_node = source_node
8260 self.feedback_fn("* instance confirmed to be running on its"
8261 " primary node (%s)" % source_node)
8262 demoted_node = target_node
8264 if instance.disk_template in constants.DTS_INT_MIRROR:
8265 self._EnsureSecondary(demoted_node)
8267 self._WaitUntilSync()
8268 except errors.OpExecError:
8269 # we ignore here errors, since if the device is standalone, it
8270 # won't be able to sync
8272 self._GoStandalone()
8273 self._GoReconnect(False)
8274 self._WaitUntilSync()
8276 self.feedback_fn("* done")
8278 def _RevertDiskStatus(self):
8279 """Try to revert the disk status after a failed migration.
8282 target_node = self.target_node
8283 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8287 self._EnsureSecondary(target_node)
8288 self._GoStandalone()
8289 self._GoReconnect(False)
8290 self._WaitUntilSync()
8291 except errors.OpExecError, err:
8292 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8293 " please try to recover the instance manually;"
8294 " error '%s'" % str(err))
8296 def _AbortMigration(self):
8297 """Call the hypervisor code to abort a started migration.
8300 instance = self.instance
8301 target_node = self.target_node
8302 source_node = self.source_node
8303 migration_info = self.migration_info
8305 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8309 abort_msg = abort_result.fail_msg
8311 logging.error("Aborting migration failed on target node %s: %s",
8312 target_node, abort_msg)
8313 # Don't raise an exception here, as we stil have to try to revert the
8314 # disk status, even if this step failed.
8316 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8317 instance, False, self.live)
8318 abort_msg = abort_result.fail_msg
8320 logging.error("Aborting migration failed on source node %s: %s",
8321 source_node, abort_msg)
8323 def _ExecMigration(self):
8324 """Migrate an instance.
8326 The migrate is done by:
8327 - change the disks into dual-master mode
8328 - wait until disks are fully synchronized again
8329 - migrate the instance
8330 - change disks on the new secondary node (the old primary) to secondary
8331 - wait until disks are fully synchronized
8332 - change disks into single-master mode
8335 instance = self.instance
8336 target_node = self.target_node
8337 source_node = self.source_node
8339 # Check for hypervisor version mismatch and warn the user.
8340 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8341 None, [self.instance.hypervisor])
8342 for ninfo in nodeinfo.values():
8343 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8345 (_, _, (src_info, )) = nodeinfo[source_node].payload
8346 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8348 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8349 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8350 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8351 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8352 if src_version != dst_version:
8353 self.feedback_fn("* warning: hypervisor version mismatch between"
8354 " source (%s) and target (%s) node" %
8355 (src_version, dst_version))
8357 self.feedback_fn("* checking disk consistency between source and target")
8358 for (idx, dev) in enumerate(instance.disks):
8359 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8360 raise errors.OpExecError("Disk %s is degraded or not fully"
8361 " synchronized on target node,"
8362 " aborting migration" % idx)
8364 if self.current_mem > self.tgt_free_mem:
8365 if not self.allow_runtime_changes:
8366 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8367 " free memory to fit instance %s on target"
8368 " node %s (have %dMB, need %dMB)" %
8369 (instance.name, target_node,
8370 self.tgt_free_mem, self.current_mem))
8371 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8372 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8375 rpcres.Raise("Cannot modify instance runtime memory")
8377 # First get the migration information from the remote node
8378 result = self.rpc.call_migration_info(source_node, instance)
8379 msg = result.fail_msg
8381 log_err = ("Failed fetching source migration information from %s: %s" %
8383 logging.error(log_err)
8384 raise errors.OpExecError(log_err)
8386 self.migration_info = migration_info = result.payload
8388 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8389 # Then switch the disks to master/master mode
8390 self._EnsureSecondary(target_node)
8391 self._GoStandalone()
8392 self._GoReconnect(True)
8393 self._WaitUntilSync()
8395 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8396 result = self.rpc.call_accept_instance(target_node,
8399 self.nodes_ip[target_node])
8401 msg = result.fail_msg
8403 logging.error("Instance pre-migration failed, trying to revert"
8404 " disk status: %s", msg)
8405 self.feedback_fn("Pre-migration failed, aborting")
8406 self._AbortMigration()
8407 self._RevertDiskStatus()
8408 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8409 (instance.name, msg))
8411 self.feedback_fn("* migrating instance to %s" % target_node)
8412 result = self.rpc.call_instance_migrate(source_node, instance,
8413 self.nodes_ip[target_node],
8415 msg = result.fail_msg
8417 logging.error("Instance migration failed, trying to revert"
8418 " disk status: %s", msg)
8419 self.feedback_fn("Migration failed, aborting")
8420 self._AbortMigration()
8421 self._RevertDiskStatus()
8422 raise errors.OpExecError("Could not migrate instance %s: %s" %
8423 (instance.name, msg))
8425 self.feedback_fn("* starting memory transfer")
8426 last_feedback = time.time()
8428 result = self.rpc.call_instance_get_migration_status(source_node,
8430 msg = result.fail_msg
8431 ms = result.payload # MigrationStatus instance
8432 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8433 logging.error("Instance migration failed, trying to revert"
8434 " disk status: %s", msg)
8435 self.feedback_fn("Migration failed, aborting")
8436 self._AbortMigration()
8437 self._RevertDiskStatus()
8438 raise errors.OpExecError("Could not migrate instance %s: %s" %
8439 (instance.name, msg))
8441 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8442 self.feedback_fn("* memory transfer complete")
8445 if (utils.TimeoutExpired(last_feedback,
8446 self._MIGRATION_FEEDBACK_INTERVAL) and
8447 ms.transferred_ram is not None):
8448 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8449 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8450 last_feedback = time.time()
8452 time.sleep(self._MIGRATION_POLL_INTERVAL)
8454 result = self.rpc.call_instance_finalize_migration_src(source_node,
8458 msg = result.fail_msg
8460 logging.error("Instance migration succeeded, but finalization failed"
8461 " on the source node: %s", msg)
8462 raise errors.OpExecError("Could not finalize instance migration: %s" %
8465 instance.primary_node = target_node
8467 # distribute new instance config to the other nodes
8468 self.cfg.Update(instance, self.feedback_fn)
8470 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8474 msg = result.fail_msg
8476 logging.error("Instance migration succeeded, but finalization failed"
8477 " on the target node: %s", msg)
8478 raise errors.OpExecError("Could not finalize instance migration: %s" %
8481 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8482 self._EnsureSecondary(source_node)
8483 self._WaitUntilSync()
8484 self._GoStandalone()
8485 self._GoReconnect(False)
8486 self._WaitUntilSync()
8488 # If the instance's disk template is `rbd' and there was a successful
8489 # migration, unmap the device from the source node.
8490 if self.instance.disk_template == constants.DT_RBD:
8491 disks = _ExpandCheckDisks(instance, instance.disks)
8492 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8494 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8495 msg = result.fail_msg
8497 logging.error("Migration was successful, but couldn't unmap the"
8498 " block device %s on source node %s: %s",
8499 disk.iv_name, source_node, msg)
8500 logging.error("You need to unmap the device %s manually on %s",
8501 disk.iv_name, source_node)
8503 self.feedback_fn("* done")
8505 def _ExecFailover(self):
8506 """Failover an instance.
8508 The failover is done by shutting it down on its present node and
8509 starting it on the secondary.
8512 instance = self.instance
8513 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8515 source_node = instance.primary_node
8516 target_node = self.target_node
8518 if instance.admin_state == constants.ADMINST_UP:
8519 self.feedback_fn("* checking disk consistency between source and target")
8520 for (idx, dev) in enumerate(instance.disks):
8521 # for drbd, these are drbd over lvm
8522 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8524 if primary_node.offline:
8525 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8527 (primary_node.name, idx, target_node))
8528 elif not self.ignore_consistency:
8529 raise errors.OpExecError("Disk %s is degraded on target node,"
8530 " aborting failover" % idx)
8532 self.feedback_fn("* not checking disk consistency as instance is not"
8535 self.feedback_fn("* shutting down instance on source node")
8536 logging.info("Shutting down instance %s on node %s",
8537 instance.name, source_node)
8539 result = self.rpc.call_instance_shutdown(source_node, instance,
8540 self.shutdown_timeout)
8541 msg = result.fail_msg
8543 if self.ignore_consistency or primary_node.offline:
8544 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8545 " proceeding anyway; please make sure node"
8546 " %s is down; error details: %s",
8547 instance.name, source_node, source_node, msg)
8549 raise errors.OpExecError("Could not shutdown instance %s on"
8551 (instance.name, source_node, msg))
8553 self.feedback_fn("* deactivating the instance's disks on source node")
8554 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8555 raise errors.OpExecError("Can't shut down the instance's disks")
8557 instance.primary_node = target_node
8558 # distribute new instance config to the other nodes
8559 self.cfg.Update(instance, self.feedback_fn)
8561 # Only start the instance if it's marked as up
8562 if instance.admin_state == constants.ADMINST_UP:
8563 self.feedback_fn("* activating the instance's disks on target node %s" %
8565 logging.info("Starting instance %s on node %s",
8566 instance.name, target_node)
8568 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8569 ignore_secondaries=True)
8571 _ShutdownInstanceDisks(self.lu, instance)
8572 raise errors.OpExecError("Can't activate the instance's disks")
8574 self.feedback_fn("* starting the instance on the target node %s" %
8576 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8578 msg = result.fail_msg
8580 _ShutdownInstanceDisks(self.lu, instance)
8581 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8582 (instance.name, target_node, msg))
8584 def Exec(self, feedback_fn):
8585 """Perform the migration.
8588 self.feedback_fn = feedback_fn
8589 self.source_node = self.instance.primary_node
8591 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8592 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8593 self.target_node = self.instance.secondary_nodes[0]
8594 # Otherwise self.target_node has been populated either
8595 # directly, or through an iallocator.
8597 self.all_nodes = [self.source_node, self.target_node]
8598 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8599 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8602 feedback_fn("Failover instance %s" % self.instance.name)
8603 self._ExecFailover()
8605 feedback_fn("Migrating instance %s" % self.instance.name)
8608 return self._ExecCleanup()
8610 return self._ExecMigration()
8613 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8615 """Wrapper around L{_CreateBlockDevInner}.
8617 This method annotates the root device first.
8620 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8621 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8625 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8627 """Create a tree of block devices on a given node.
8629 If this device type has to be created on secondaries, create it and
8632 If not, just recurse to children keeping the same 'force' value.
8634 @attention: The device has to be annotated already.
8636 @param lu: the lu on whose behalf we execute
8637 @param node: the node on which to create the device
8638 @type instance: L{objects.Instance}
8639 @param instance: the instance which owns the device
8640 @type device: L{objects.Disk}
8641 @param device: the device to create
8642 @type force_create: boolean
8643 @param force_create: whether to force creation of this device; this
8644 will be change to True whenever we find a device which has
8645 CreateOnSecondary() attribute
8646 @param info: the extra 'metadata' we should attach to the device
8647 (this will be represented as a LVM tag)
8648 @type force_open: boolean
8649 @param force_open: this parameter will be passes to the
8650 L{backend.BlockdevCreate} function where it specifies
8651 whether we run on primary or not, and it affects both
8652 the child assembly and the device own Open() execution
8655 if device.CreateOnSecondary():
8659 for child in device.children:
8660 _CreateBlockDevInner(lu, node, instance, child, force_create,
8663 if not force_create:
8666 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8669 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8670 """Create a single block device on a given node.
8672 This will not recurse over children of the device, so they must be
8675 @param lu: the lu on whose behalf we execute
8676 @param node: the node on which to create the device
8677 @type instance: L{objects.Instance}
8678 @param instance: the instance which owns the device
8679 @type device: L{objects.Disk}
8680 @param device: the device to create
8681 @param info: the extra 'metadata' we should attach to the device
8682 (this will be represented as a LVM tag)
8683 @type force_open: boolean
8684 @param force_open: this parameter will be passes to the
8685 L{backend.BlockdevCreate} function where it specifies
8686 whether we run on primary or not, and it affects both
8687 the child assembly and the device own Open() execution
8690 lu.cfg.SetDiskID(device, node)
8691 result = lu.rpc.call_blockdev_create(node, device, device.size,
8692 instance.name, force_open, info)
8693 result.Raise("Can't create block device %s on"
8694 " node %s for instance %s" % (device, node, instance.name))
8695 if device.physical_id is None:
8696 device.physical_id = result.payload
8699 def _GenerateUniqueNames(lu, exts):
8700 """Generate a suitable LV name.
8702 This will generate a logical volume name for the given instance.
8707 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8708 results.append("%s%s" % (new_id, val))
8712 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8713 iv_name, p_minor, s_minor):
8714 """Generate a drbd8 device complete with its children.
8717 assert len(vgnames) == len(names) == 2
8718 port = lu.cfg.AllocatePort()
8719 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8721 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8722 logical_id=(vgnames[0], names[0]),
8724 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8725 logical_id=(vgnames[1], names[1]),
8727 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8728 logical_id=(primary, secondary, port,
8731 children=[dev_data, dev_meta],
8732 iv_name=iv_name, params={})
8736 _DISK_TEMPLATE_NAME_PREFIX = {
8737 constants.DT_PLAIN: "",
8738 constants.DT_RBD: ".rbd",
8742 _DISK_TEMPLATE_DEVICE_TYPE = {
8743 constants.DT_PLAIN: constants.LD_LV,
8744 constants.DT_FILE: constants.LD_FILE,
8745 constants.DT_SHARED_FILE: constants.LD_FILE,
8746 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8747 constants.DT_RBD: constants.LD_RBD,
8751 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8752 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8753 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8754 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8755 """Generate the entire disk layout for a given template type.
8758 #TODO: compute space requirements
8760 vgname = lu.cfg.GetVGName()
8761 disk_count = len(disk_info)
8764 if template_name == constants.DT_DISKLESS:
8766 elif template_name == constants.DT_DRBD8:
8767 if len(secondary_nodes) != 1:
8768 raise errors.ProgrammerError("Wrong template configuration")
8769 remote_node = secondary_nodes[0]
8770 minors = lu.cfg.AllocateDRBDMinor(
8771 [primary_node, remote_node] * len(disk_info), instance_name)
8773 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8775 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8778 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8779 for i in range(disk_count)]):
8780 names.append(lv_prefix + "_data")
8781 names.append(lv_prefix + "_meta")
8782 for idx, disk in enumerate(disk_info):
8783 disk_index = idx + base_index
8784 data_vg = disk.get(constants.IDISK_VG, vgname)
8785 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8786 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8787 disk[constants.IDISK_SIZE],
8789 names[idx * 2:idx * 2 + 2],
8790 "disk/%d" % disk_index,
8791 minors[idx * 2], minors[idx * 2 + 1])
8792 disk_dev.mode = disk[constants.IDISK_MODE]
8793 disks.append(disk_dev)
8796 raise errors.ProgrammerError("Wrong template configuration")
8798 if template_name == constants.DT_FILE:
8800 elif template_name == constants.DT_SHARED_FILE:
8801 _req_shr_file_storage()
8803 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8804 if name_prefix is None:
8807 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8808 (name_prefix, base_index + i)
8809 for i in range(disk_count)])
8811 if template_name == constants.DT_PLAIN:
8812 def logical_id_fn(idx, _, disk):
8813 vg = disk.get(constants.IDISK_VG, vgname)
8814 return (vg, names[idx])
8815 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8817 lambda _, disk_index, disk: (file_driver,
8818 "%s/disk%d" % (file_storage_dir,
8820 elif template_name == constants.DT_BLOCK:
8822 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8823 disk[constants.IDISK_ADOPT])
8824 elif template_name == constants.DT_RBD:
8825 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8827 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8829 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8831 for idx, disk in enumerate(disk_info):
8832 disk_index = idx + base_index
8833 size = disk[constants.IDISK_SIZE]
8834 feedback_fn("* disk %s, size %s" %
8835 (disk_index, utils.FormatUnit(size, "h")))
8836 disks.append(objects.Disk(dev_type=dev_type, size=size,
8837 logical_id=logical_id_fn(idx, disk_index, disk),
8838 iv_name="disk/%d" % disk_index,
8839 mode=disk[constants.IDISK_MODE],
8845 def _GetInstanceInfoText(instance):
8846 """Compute that text that should be added to the disk's metadata.
8849 return "originstname+%s" % instance.name
8852 def _CalcEta(time_taken, written, total_size):
8853 """Calculates the ETA based on size written and total size.
8855 @param time_taken: The time taken so far
8856 @param written: amount written so far
8857 @param total_size: The total size of data to be written
8858 @return: The remaining time in seconds
8861 avg_time = time_taken / float(written)
8862 return (total_size - written) * avg_time
8865 def _WipeDisks(lu, instance):
8866 """Wipes instance disks.
8868 @type lu: L{LogicalUnit}
8869 @param lu: the logical unit on whose behalf we execute
8870 @type instance: L{objects.Instance}
8871 @param instance: the instance whose disks we should create
8872 @return: the success of the wipe
8875 node = instance.primary_node
8877 for device in instance.disks:
8878 lu.cfg.SetDiskID(device, node)
8880 logging.info("Pause sync of instance %s disks", instance.name)
8881 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8882 (instance.disks, instance),
8885 for idx, success in enumerate(result.payload):
8887 logging.warn("pause-sync of instance %s for disks %d failed",
8891 for idx, device in enumerate(instance.disks):
8892 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8893 # MAX_WIPE_CHUNK at max
8894 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8895 constants.MIN_WIPE_CHUNK_PERCENT)
8896 # we _must_ make this an int, otherwise rounding errors will
8898 wipe_chunk_size = int(wipe_chunk_size)
8900 lu.LogInfo("* Wiping disk %d", idx)
8901 logging.info("Wiping disk %d for instance %s, node %s using"
8902 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8907 start_time = time.time()
8909 while offset < size:
8910 wipe_size = min(wipe_chunk_size, size - offset)
8911 logging.debug("Wiping disk %d, offset %s, chunk %s",
8912 idx, offset, wipe_size)
8913 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8915 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8916 (idx, offset, wipe_size))
8919 if now - last_output >= 60:
8920 eta = _CalcEta(now - start_time, offset, size)
8921 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8922 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8925 logging.info("Resume sync of instance %s disks", instance.name)
8927 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8928 (instance.disks, instance),
8931 for idx, success in enumerate(result.payload):
8933 lu.LogWarning("Resume sync of disk %d failed, please have a"
8934 " look at the status and troubleshoot the issue", idx)
8935 logging.warn("resume-sync of instance %s for disks %d failed",
8939 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8940 """Create all disks for an instance.
8942 This abstracts away some work from AddInstance.
8944 @type lu: L{LogicalUnit}
8945 @param lu: the logical unit on whose behalf we execute
8946 @type instance: L{objects.Instance}
8947 @param instance: the instance whose disks we should create
8949 @param to_skip: list of indices to skip
8950 @type target_node: string
8951 @param target_node: if passed, overrides the target node for creation
8953 @return: the success of the creation
8956 info = _GetInstanceInfoText(instance)
8957 if target_node is None:
8958 pnode = instance.primary_node
8959 all_nodes = instance.all_nodes
8964 if instance.disk_template in constants.DTS_FILEBASED:
8965 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8966 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8968 result.Raise("Failed to create directory '%s' on"
8969 " node %s" % (file_storage_dir, pnode))
8971 # Note: this needs to be kept in sync with adding of disks in
8972 # LUInstanceSetParams
8973 for idx, device in enumerate(instance.disks):
8974 if to_skip and idx in to_skip:
8976 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8978 for node in all_nodes:
8979 f_create = node == pnode
8980 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8983 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8984 """Remove all disks for an instance.
8986 This abstracts away some work from `AddInstance()` and
8987 `RemoveInstance()`. Note that in case some of the devices couldn't
8988 be removed, the removal will continue with the other ones (compare
8989 with `_CreateDisks()`).
8991 @type lu: L{LogicalUnit}
8992 @param lu: the logical unit on whose behalf we execute
8993 @type instance: L{objects.Instance}
8994 @param instance: the instance whose disks we should remove
8995 @type target_node: string
8996 @param target_node: used to override the node on which to remove the disks
8998 @return: the success of the removal
9001 logging.info("Removing block devices for instance %s", instance.name)
9004 ports_to_release = set()
9005 for (idx, device) in enumerate(instance.disks):
9007 edata = [(target_node, device)]
9009 edata = device.ComputeNodeTree(instance.primary_node)
9010 for node, disk in edata:
9011 lu.cfg.SetDiskID(disk, node)
9012 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
9014 lu.LogWarning("Could not remove disk %s on node %s,"
9015 " continuing anyway: %s", idx, node, msg)
9018 # if this is a DRBD disk, return its port to the pool
9019 if device.dev_type in constants.LDS_DRBD:
9020 ports_to_release.add(device.logical_id[2])
9022 if all_result or ignore_failures:
9023 for port in ports_to_release:
9024 lu.cfg.AddTcpUdpPort(port)
9026 if instance.disk_template == constants.DT_FILE:
9027 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9031 tgt = instance.primary_node
9032 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9034 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9035 file_storage_dir, instance.primary_node, result.fail_msg)
9041 def _ComputeDiskSizePerVG(disk_template, disks):
9042 """Compute disk size requirements in the volume group
9045 def _compute(disks, payload):
9046 """Universal algorithm.
9051 vgs[disk[constants.IDISK_VG]] = \
9052 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9056 # Required free disk space as a function of disk and swap space
9058 constants.DT_DISKLESS: {},
9059 constants.DT_PLAIN: _compute(disks, 0),
9060 # 128 MB are added for drbd metadata for each disk
9061 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9062 constants.DT_FILE: {},
9063 constants.DT_SHARED_FILE: {},
9066 if disk_template not in req_size_dict:
9067 raise errors.ProgrammerError("Disk template '%s' size requirement"
9068 " is unknown" % disk_template)
9070 return req_size_dict[disk_template]
9073 def _ComputeDiskSize(disk_template, disks):
9074 """Compute disk size requirements in the volume group
9077 # Required free disk space as a function of disk and swap space
9079 constants.DT_DISKLESS: None,
9080 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9081 # 128 MB are added for drbd metadata for each disk
9083 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9084 constants.DT_FILE: None,
9085 constants.DT_SHARED_FILE: 0,
9086 constants.DT_BLOCK: 0,
9087 constants.DT_RBD: 0,
9090 if disk_template not in req_size_dict:
9091 raise errors.ProgrammerError("Disk template '%s' size requirement"
9092 " is unknown" % disk_template)
9094 return req_size_dict[disk_template]
9097 def _FilterVmNodes(lu, nodenames):
9098 """Filters out non-vm_capable nodes from a list.
9100 @type lu: L{LogicalUnit}
9101 @param lu: the logical unit for which we check
9102 @type nodenames: list
9103 @param nodenames: the list of nodes on which we should check
9105 @return: the list of vm-capable nodes
9108 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9109 return [name for name in nodenames if name not in vm_nodes]
9112 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9113 """Hypervisor parameter validation.
9115 This function abstract the hypervisor parameter validation to be
9116 used in both instance create and instance modify.
9118 @type lu: L{LogicalUnit}
9119 @param lu: the logical unit for which we check
9120 @type nodenames: list
9121 @param nodenames: the list of nodes on which we should check
9122 @type hvname: string
9123 @param hvname: the name of the hypervisor we should use
9124 @type hvparams: dict
9125 @param hvparams: the parameters which we need to check
9126 @raise errors.OpPrereqError: if the parameters are not valid
9129 nodenames = _FilterVmNodes(lu, nodenames)
9131 cluster = lu.cfg.GetClusterInfo()
9132 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9134 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9135 for node in nodenames:
9139 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9142 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9143 """OS parameters validation.
9145 @type lu: L{LogicalUnit}
9146 @param lu: the logical unit for which we check
9147 @type required: boolean
9148 @param required: whether the validation should fail if the OS is not
9150 @type nodenames: list
9151 @param nodenames: the list of nodes on which we should check
9152 @type osname: string
9153 @param osname: the name of the hypervisor we should use
9154 @type osparams: dict
9155 @param osparams: the parameters which we need to check
9156 @raise errors.OpPrereqError: if the parameters are not valid
9159 nodenames = _FilterVmNodes(lu, nodenames)
9160 result = lu.rpc.call_os_validate(nodenames, required, osname,
9161 [constants.OS_VALIDATE_PARAMETERS],
9163 for node, nres in result.items():
9164 # we don't check for offline cases since this should be run only
9165 # against the master node and/or an instance's nodes
9166 nres.Raise("OS Parameters validation failed on node %s" % node)
9167 if not nres.payload:
9168 lu.LogInfo("OS %s not found on node %s, validation skipped",
9172 class LUInstanceCreate(LogicalUnit):
9173 """Create an instance.
9176 HPATH = "instance-add"
9177 HTYPE = constants.HTYPE_INSTANCE
9180 def CheckArguments(self):
9184 # do not require name_check to ease forward/backward compatibility
9186 if self.op.no_install and self.op.start:
9187 self.LogInfo("No-installation mode selected, disabling startup")
9188 self.op.start = False
9189 # validate/normalize the instance name
9190 self.op.instance_name = \
9191 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9193 if self.op.ip_check and not self.op.name_check:
9194 # TODO: make the ip check more flexible and not depend on the name check
9195 raise errors.OpPrereqError("Cannot do IP address check without a name"
9196 " check", errors.ECODE_INVAL)
9198 # check nics' parameter names
9199 for nic in self.op.nics:
9200 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9202 # check disks. parameter names and consistent adopt/no-adopt strategy
9203 has_adopt = has_no_adopt = False
9204 for disk in self.op.disks:
9205 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9206 if constants.IDISK_ADOPT in disk:
9210 if has_adopt and has_no_adopt:
9211 raise errors.OpPrereqError("Either all disks are adopted or none is",
9214 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9215 raise errors.OpPrereqError("Disk adoption is not supported for the"
9216 " '%s' disk template" %
9217 self.op.disk_template,
9219 if self.op.iallocator is not None:
9220 raise errors.OpPrereqError("Disk adoption not allowed with an"
9221 " iallocator script", errors.ECODE_INVAL)
9222 if self.op.mode == constants.INSTANCE_IMPORT:
9223 raise errors.OpPrereqError("Disk adoption not allowed for"
9224 " instance import", errors.ECODE_INVAL)
9226 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9227 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9228 " but no 'adopt' parameter given" %
9229 self.op.disk_template,
9232 self.adopt_disks = has_adopt
9234 # instance name verification
9235 if self.op.name_check:
9236 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9237 self.op.instance_name = self.hostname1.name
9238 # used in CheckPrereq for ip ping check
9239 self.check_ip = self.hostname1.ip
9241 self.check_ip = None
9243 # file storage checks
9244 if (self.op.file_driver and
9245 not self.op.file_driver in constants.FILE_DRIVER):
9246 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9247 self.op.file_driver, errors.ECODE_INVAL)
9249 if self.op.disk_template == constants.DT_FILE:
9250 opcodes.RequireFileStorage()
9251 elif self.op.disk_template == constants.DT_SHARED_FILE:
9252 opcodes.RequireSharedFileStorage()
9254 ### Node/iallocator related checks
9255 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9257 if self.op.pnode is not None:
9258 if self.op.disk_template in constants.DTS_INT_MIRROR:
9259 if self.op.snode is None:
9260 raise errors.OpPrereqError("The networked disk templates need"
9261 " a mirror node", errors.ECODE_INVAL)
9263 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9265 self.op.snode = None
9267 self._cds = _GetClusterDomainSecret()
9269 if self.op.mode == constants.INSTANCE_IMPORT:
9270 # On import force_variant must be True, because if we forced it at
9271 # initial install, our only chance when importing it back is that it
9273 self.op.force_variant = True
9275 if self.op.no_install:
9276 self.LogInfo("No-installation mode has no effect during import")
9278 elif self.op.mode == constants.INSTANCE_CREATE:
9279 if self.op.os_type is None:
9280 raise errors.OpPrereqError("No guest OS specified",
9282 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9283 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9284 " installation" % self.op.os_type,
9286 if self.op.disk_template is None:
9287 raise errors.OpPrereqError("No disk template specified",
9290 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9291 # Check handshake to ensure both clusters have the same domain secret
9292 src_handshake = self.op.source_handshake
9293 if not src_handshake:
9294 raise errors.OpPrereqError("Missing source handshake",
9297 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9300 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9303 # Load and check source CA
9304 self.source_x509_ca_pem = self.op.source_x509_ca
9305 if not self.source_x509_ca_pem:
9306 raise errors.OpPrereqError("Missing source X509 CA",
9310 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9312 except OpenSSL.crypto.Error, err:
9313 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9314 (err, ), errors.ECODE_INVAL)
9316 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9317 if errcode is not None:
9318 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9321 self.source_x509_ca = cert
9323 src_instance_name = self.op.source_instance_name
9324 if not src_instance_name:
9325 raise errors.OpPrereqError("Missing source instance name",
9328 self.source_instance_name = \
9329 netutils.GetHostname(name=src_instance_name).name
9332 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9333 self.op.mode, errors.ECODE_INVAL)
9335 def ExpandNames(self):
9336 """ExpandNames for CreateInstance.
9338 Figure out the right locks for instance creation.
9341 self.needed_locks = {}
9343 instance_name = self.op.instance_name
9344 # this is just a preventive check, but someone might still add this
9345 # instance in the meantime, and creation will fail at lock-add time
9346 if instance_name in self.cfg.GetInstanceList():
9347 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9348 instance_name, errors.ECODE_EXISTS)
9350 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9352 if self.op.iallocator:
9353 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9354 # specifying a group on instance creation and then selecting nodes from
9356 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9357 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9359 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9360 nodelist = [self.op.pnode]
9361 if self.op.snode is not None:
9362 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9363 nodelist.append(self.op.snode)
9364 self.needed_locks[locking.LEVEL_NODE] = nodelist
9365 # Lock resources of instance's primary and secondary nodes (copy to
9366 # prevent accidential modification)
9367 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9369 # in case of import lock the source node too
9370 if self.op.mode == constants.INSTANCE_IMPORT:
9371 src_node = self.op.src_node
9372 src_path = self.op.src_path
9374 if src_path is None:
9375 self.op.src_path = src_path = self.op.instance_name
9377 if src_node is None:
9378 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9379 self.op.src_node = None
9380 if os.path.isabs(src_path):
9381 raise errors.OpPrereqError("Importing an instance from a path"
9382 " requires a source node option",
9385 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9386 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9387 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9388 if not os.path.isabs(src_path):
9389 self.op.src_path = src_path = \
9390 utils.PathJoin(constants.EXPORT_DIR, src_path)
9392 def _RunAllocator(self):
9393 """Run the allocator based on input opcode.
9396 nics = [n.ToDict() for n in self.nics]
9397 ial = IAllocator(self.cfg, self.rpc,
9398 mode=constants.IALLOCATOR_MODE_ALLOC,
9399 name=self.op.instance_name,
9400 disk_template=self.op.disk_template,
9403 vcpus=self.be_full[constants.BE_VCPUS],
9404 memory=self.be_full[constants.BE_MAXMEM],
9405 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9408 hypervisor=self.op.hypervisor,
9411 ial.Run(self.op.iallocator)
9414 raise errors.OpPrereqError("Can't compute nodes using"
9415 " iallocator '%s': %s" %
9416 (self.op.iallocator, ial.info),
9418 if len(ial.result) != ial.required_nodes:
9419 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9420 " of nodes (%s), required %s" %
9421 (self.op.iallocator, len(ial.result),
9422 ial.required_nodes), errors.ECODE_FAULT)
9423 self.op.pnode = ial.result[0]
9424 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9425 self.op.instance_name, self.op.iallocator,
9426 utils.CommaJoin(ial.result))
9427 if ial.required_nodes == 2:
9428 self.op.snode = ial.result[1]
9430 def BuildHooksEnv(self):
9433 This runs on master, primary and secondary nodes of the instance.
9437 "ADD_MODE": self.op.mode,
9439 if self.op.mode == constants.INSTANCE_IMPORT:
9440 env["SRC_NODE"] = self.op.src_node
9441 env["SRC_PATH"] = self.op.src_path
9442 env["SRC_IMAGES"] = self.src_images
9444 env.update(_BuildInstanceHookEnv(
9445 name=self.op.instance_name,
9446 primary_node=self.op.pnode,
9447 secondary_nodes=self.secondaries,
9448 status=self.op.start,
9449 os_type=self.op.os_type,
9450 minmem=self.be_full[constants.BE_MINMEM],
9451 maxmem=self.be_full[constants.BE_MAXMEM],
9452 vcpus=self.be_full[constants.BE_VCPUS],
9453 nics=_NICListToTuple(self, self.nics),
9454 disk_template=self.op.disk_template,
9455 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9456 for d in self.disks],
9459 hypervisor_name=self.op.hypervisor,
9465 def BuildHooksNodes(self):
9466 """Build hooks nodes.
9469 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9472 def _ReadExportInfo(self):
9473 """Reads the export information from disk.
9475 It will override the opcode source node and path with the actual
9476 information, if these two were not specified before.
9478 @return: the export information
9481 assert self.op.mode == constants.INSTANCE_IMPORT
9483 src_node = self.op.src_node
9484 src_path = self.op.src_path
9486 if src_node is None:
9487 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9488 exp_list = self.rpc.call_export_list(locked_nodes)
9490 for node in exp_list:
9491 if exp_list[node].fail_msg:
9493 if src_path in exp_list[node].payload:
9495 self.op.src_node = src_node = node
9496 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9500 raise errors.OpPrereqError("No export found for relative path %s" %
9501 src_path, errors.ECODE_INVAL)
9503 _CheckNodeOnline(self, src_node)
9504 result = self.rpc.call_export_info(src_node, src_path)
9505 result.Raise("No export or invalid export found in dir %s" % src_path)
9507 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9508 if not export_info.has_section(constants.INISECT_EXP):
9509 raise errors.ProgrammerError("Corrupted export config",
9510 errors.ECODE_ENVIRON)
9512 ei_version = export_info.get(constants.INISECT_EXP, "version")
9513 if (int(ei_version) != constants.EXPORT_VERSION):
9514 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9515 (ei_version, constants.EXPORT_VERSION),
9516 errors.ECODE_ENVIRON)
9519 def _ReadExportParams(self, einfo):
9520 """Use export parameters as defaults.
9522 In case the opcode doesn't specify (as in override) some instance
9523 parameters, then try to use them from the export information, if
9527 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9529 if self.op.disk_template is None:
9530 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9531 self.op.disk_template = einfo.get(constants.INISECT_INS,
9533 if self.op.disk_template not in constants.DISK_TEMPLATES:
9534 raise errors.OpPrereqError("Disk template specified in configuration"
9535 " file is not one of the allowed values:"
9536 " %s" % " ".join(constants.DISK_TEMPLATES))
9538 raise errors.OpPrereqError("No disk template specified and the export"
9539 " is missing the disk_template information",
9542 if not self.op.disks:
9544 # TODO: import the disk iv_name too
9545 for idx in range(constants.MAX_DISKS):
9546 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9547 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9548 disks.append({constants.IDISK_SIZE: disk_sz})
9549 self.op.disks = disks
9550 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9551 raise errors.OpPrereqError("No disk info specified and the export"
9552 " is missing the disk information",
9555 if not self.op.nics:
9557 for idx in range(constants.MAX_NICS):
9558 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9560 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9561 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9568 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9569 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9571 if (self.op.hypervisor is None and
9572 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9573 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9575 if einfo.has_section(constants.INISECT_HYP):
9576 # use the export parameters but do not override the ones
9577 # specified by the user
9578 for name, value in einfo.items(constants.INISECT_HYP):
9579 if name not in self.op.hvparams:
9580 self.op.hvparams[name] = value
9582 if einfo.has_section(constants.INISECT_BEP):
9583 # use the parameters, without overriding
9584 for name, value in einfo.items(constants.INISECT_BEP):
9585 if name not in self.op.beparams:
9586 self.op.beparams[name] = value
9587 # Compatibility for the old "memory" be param
9588 if name == constants.BE_MEMORY:
9589 if constants.BE_MAXMEM not in self.op.beparams:
9590 self.op.beparams[constants.BE_MAXMEM] = value
9591 if constants.BE_MINMEM not in self.op.beparams:
9592 self.op.beparams[constants.BE_MINMEM] = value
9594 # try to read the parameters old style, from the main section
9595 for name in constants.BES_PARAMETERS:
9596 if (name not in self.op.beparams and
9597 einfo.has_option(constants.INISECT_INS, name)):
9598 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9600 if einfo.has_section(constants.INISECT_OSP):
9601 # use the parameters, without overriding
9602 for name, value in einfo.items(constants.INISECT_OSP):
9603 if name not in self.op.osparams:
9604 self.op.osparams[name] = value
9606 def _RevertToDefaults(self, cluster):
9607 """Revert the instance parameters to the default values.
9611 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9612 for name in self.op.hvparams.keys():
9613 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9614 del self.op.hvparams[name]
9616 be_defs = cluster.SimpleFillBE({})
9617 for name in self.op.beparams.keys():
9618 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9619 del self.op.beparams[name]
9621 nic_defs = cluster.SimpleFillNIC({})
9622 for nic in self.op.nics:
9623 for name in constants.NICS_PARAMETERS:
9624 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9627 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9628 for name in self.op.osparams.keys():
9629 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9630 del self.op.osparams[name]
9632 def _CalculateFileStorageDir(self):
9633 """Calculate final instance file storage dir.
9636 # file storage dir calculation/check
9637 self.instance_file_storage_dir = None
9638 if self.op.disk_template in constants.DTS_FILEBASED:
9639 # build the full file storage dir path
9642 if self.op.disk_template == constants.DT_SHARED_FILE:
9643 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9645 get_fsd_fn = self.cfg.GetFileStorageDir
9647 cfg_storagedir = get_fsd_fn()
9648 if not cfg_storagedir:
9649 raise errors.OpPrereqError("Cluster file storage dir not defined")
9650 joinargs.append(cfg_storagedir)
9652 if self.op.file_storage_dir is not None:
9653 joinargs.append(self.op.file_storage_dir)
9655 joinargs.append(self.op.instance_name)
9657 # pylint: disable=W0142
9658 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9660 def CheckPrereq(self): # pylint: disable=R0914
9661 """Check prerequisites.
9664 self._CalculateFileStorageDir()
9666 if self.op.mode == constants.INSTANCE_IMPORT:
9667 export_info = self._ReadExportInfo()
9668 self._ReadExportParams(export_info)
9669 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9671 self._old_instance_name = None
9673 if (not self.cfg.GetVGName() and
9674 self.op.disk_template not in constants.DTS_NOT_LVM):
9675 raise errors.OpPrereqError("Cluster does not support lvm-based"
9676 " instances", errors.ECODE_STATE)
9678 if (self.op.hypervisor is None or
9679 self.op.hypervisor == constants.VALUE_AUTO):
9680 self.op.hypervisor = self.cfg.GetHypervisorType()
9682 cluster = self.cfg.GetClusterInfo()
9683 enabled_hvs = cluster.enabled_hypervisors
9684 if self.op.hypervisor not in enabled_hvs:
9685 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9686 " cluster (%s)" % (self.op.hypervisor,
9687 ",".join(enabled_hvs)),
9690 # Check tag validity
9691 for tag in self.op.tags:
9692 objects.TaggableObject.ValidateTag(tag)
9694 # check hypervisor parameter syntax (locally)
9695 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9696 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9698 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9699 hv_type.CheckParameterSyntax(filled_hvp)
9700 self.hv_full = filled_hvp
9701 # check that we don't specify global parameters on an instance
9702 _CheckGlobalHvParams(self.op.hvparams)
9704 # fill and remember the beparams dict
9705 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9706 for param, value in self.op.beparams.iteritems():
9707 if value == constants.VALUE_AUTO:
9708 self.op.beparams[param] = default_beparams[param]
9709 objects.UpgradeBeParams(self.op.beparams)
9710 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9711 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9713 # build os parameters
9714 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9716 # now that hvp/bep are in final format, let's reset to defaults,
9718 if self.op.identify_defaults:
9719 self._RevertToDefaults(cluster)
9723 for idx, nic in enumerate(self.op.nics):
9724 nic_mode_req = nic.get(constants.INIC_MODE, None)
9725 nic_mode = nic_mode_req
9726 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9727 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9729 # in routed mode, for the first nic, the default ip is 'auto'
9730 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9731 default_ip_mode = constants.VALUE_AUTO
9733 default_ip_mode = constants.VALUE_NONE
9735 # ip validity checks
9736 ip = nic.get(constants.INIC_IP, default_ip_mode)
9737 if ip is None or ip.lower() == constants.VALUE_NONE:
9739 elif ip.lower() == constants.VALUE_AUTO:
9740 if not self.op.name_check:
9741 raise errors.OpPrereqError("IP address set to auto but name checks"
9742 " have been skipped",
9744 nic_ip = self.hostname1.ip
9746 if not netutils.IPAddress.IsValid(ip):
9747 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9751 # TODO: check the ip address for uniqueness
9752 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9753 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9756 # MAC address verification
9757 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9758 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9759 mac = utils.NormalizeAndValidateMac(mac)
9762 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9763 except errors.ReservationError:
9764 raise errors.OpPrereqError("MAC address %s already in use"
9765 " in cluster" % mac,
9766 errors.ECODE_NOTUNIQUE)
9768 # Build nic parameters
9769 link = nic.get(constants.INIC_LINK, None)
9770 if link == constants.VALUE_AUTO:
9771 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9774 nicparams[constants.NIC_MODE] = nic_mode
9776 nicparams[constants.NIC_LINK] = link
9778 check_params = cluster.SimpleFillNIC(nicparams)
9779 objects.NIC.CheckParameterSyntax(check_params)
9780 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9782 # disk checks/pre-build
9783 default_vg = self.cfg.GetVGName()
9785 for disk in self.op.disks:
9786 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9787 if mode not in constants.DISK_ACCESS_SET:
9788 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9789 mode, errors.ECODE_INVAL)
9790 size = disk.get(constants.IDISK_SIZE, None)
9792 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9795 except (TypeError, ValueError):
9796 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9799 data_vg = disk.get(constants.IDISK_VG, default_vg)
9801 constants.IDISK_SIZE: size,
9802 constants.IDISK_MODE: mode,
9803 constants.IDISK_VG: data_vg,
9805 if constants.IDISK_METAVG in disk:
9806 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9807 if constants.IDISK_ADOPT in disk:
9808 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9809 self.disks.append(new_disk)
9811 if self.op.mode == constants.INSTANCE_IMPORT:
9813 for idx in range(len(self.disks)):
9814 option = "disk%d_dump" % idx
9815 if export_info.has_option(constants.INISECT_INS, option):
9816 # FIXME: are the old os-es, disk sizes, etc. useful?
9817 export_name = export_info.get(constants.INISECT_INS, option)
9818 image = utils.PathJoin(self.op.src_path, export_name)
9819 disk_images.append(image)
9821 disk_images.append(False)
9823 self.src_images = disk_images
9825 if self.op.instance_name == self._old_instance_name:
9826 for idx, nic in enumerate(self.nics):
9827 if nic.mac == constants.VALUE_AUTO:
9828 nic_mac_ini = "nic%d_mac" % idx
9829 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9831 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9833 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9834 if self.op.ip_check:
9835 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9836 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9837 (self.check_ip, self.op.instance_name),
9838 errors.ECODE_NOTUNIQUE)
9840 #### mac address generation
9841 # By generating here the mac address both the allocator and the hooks get
9842 # the real final mac address rather than the 'auto' or 'generate' value.
9843 # There is a race condition between the generation and the instance object
9844 # creation, which means that we know the mac is valid now, but we're not
9845 # sure it will be when we actually add the instance. If things go bad
9846 # adding the instance will abort because of a duplicate mac, and the
9847 # creation job will fail.
9848 for nic in self.nics:
9849 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9850 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9854 if self.op.iallocator is not None:
9855 self._RunAllocator()
9857 # Release all unneeded node locks
9858 _ReleaseLocks(self, locking.LEVEL_NODE,
9859 keep=filter(None, [self.op.pnode, self.op.snode,
9861 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9862 keep=filter(None, [self.op.pnode, self.op.snode,
9865 #### node related checks
9867 # check primary node
9868 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9869 assert self.pnode is not None, \
9870 "Cannot retrieve locked node %s" % self.op.pnode
9872 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9873 pnode.name, errors.ECODE_STATE)
9875 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9876 pnode.name, errors.ECODE_STATE)
9877 if not pnode.vm_capable:
9878 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9879 " '%s'" % pnode.name, errors.ECODE_STATE)
9881 self.secondaries = []
9883 # mirror node verification
9884 if self.op.disk_template in constants.DTS_INT_MIRROR:
9885 if self.op.snode == pnode.name:
9886 raise errors.OpPrereqError("The secondary node cannot be the"
9887 " primary node", errors.ECODE_INVAL)
9888 _CheckNodeOnline(self, self.op.snode)
9889 _CheckNodeNotDrained(self, self.op.snode)
9890 _CheckNodeVmCapable(self, self.op.snode)
9891 self.secondaries.append(self.op.snode)
9893 snode = self.cfg.GetNodeInfo(self.op.snode)
9894 if pnode.group != snode.group:
9895 self.LogWarning("The primary and secondary nodes are in two"
9896 " different node groups; the disk parameters"
9897 " from the first disk's node group will be"
9900 nodenames = [pnode.name] + self.secondaries
9902 # Verify instance specs
9903 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9905 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9906 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9907 constants.ISPEC_DISK_COUNT: len(self.disks),
9908 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9909 constants.ISPEC_NIC_COUNT: len(self.nics),
9910 constants.ISPEC_SPINDLE_USE: spindle_use,
9913 group_info = self.cfg.GetNodeGroup(pnode.group)
9914 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9915 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9916 if not self.op.ignore_ipolicy and res:
9917 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9918 " policy: %s") % (pnode.group,
9919 utils.CommaJoin(res)),
9922 if not self.adopt_disks:
9923 if self.op.disk_template == constants.DT_RBD:
9924 # _CheckRADOSFreeSpace() is just a placeholder.
9925 # Any function that checks prerequisites can be placed here.
9926 # Check if there is enough space on the RADOS cluster.
9927 _CheckRADOSFreeSpace()
9929 # Check lv size requirements, if not adopting
9930 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9931 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9933 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9934 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9935 disk[constants.IDISK_ADOPT])
9936 for disk in self.disks])
9937 if len(all_lvs) != len(self.disks):
9938 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9940 for lv_name in all_lvs:
9942 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9943 # to ReserveLV uses the same syntax
9944 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9945 except errors.ReservationError:
9946 raise errors.OpPrereqError("LV named %s used by another instance" %
9947 lv_name, errors.ECODE_NOTUNIQUE)
9949 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9950 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9952 node_lvs = self.rpc.call_lv_list([pnode.name],
9953 vg_names.payload.keys())[pnode.name]
9954 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9955 node_lvs = node_lvs.payload
9957 delta = all_lvs.difference(node_lvs.keys())
9959 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9960 utils.CommaJoin(delta),
9962 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9964 raise errors.OpPrereqError("Online logical volumes found, cannot"
9965 " adopt: %s" % utils.CommaJoin(online_lvs),
9967 # update the size of disk based on what is found
9968 for dsk in self.disks:
9969 dsk[constants.IDISK_SIZE] = \
9970 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9971 dsk[constants.IDISK_ADOPT])][0]))
9973 elif self.op.disk_template == constants.DT_BLOCK:
9974 # Normalize and de-duplicate device paths
9975 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9976 for disk in self.disks])
9977 if len(all_disks) != len(self.disks):
9978 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9980 baddisks = [d for d in all_disks
9981 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9983 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9984 " cannot be adopted" %
9985 (", ".join(baddisks),
9986 constants.ADOPTABLE_BLOCKDEV_ROOT),
9989 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9990 list(all_disks))[pnode.name]
9991 node_disks.Raise("Cannot get block device information from node %s" %
9993 node_disks = node_disks.payload
9994 delta = all_disks.difference(node_disks.keys())
9996 raise errors.OpPrereqError("Missing block device(s): %s" %
9997 utils.CommaJoin(delta),
9999 for dsk in self.disks:
10000 dsk[constants.IDISK_SIZE] = \
10001 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10003 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10005 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10006 # check OS parameters (remotely)
10007 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10009 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10011 # memory check on primary node
10012 #TODO(dynmem): use MINMEM for checking
10014 _CheckNodeFreeMemory(self, self.pnode.name,
10015 "creating instance %s" % self.op.instance_name,
10016 self.be_full[constants.BE_MAXMEM],
10017 self.op.hypervisor)
10019 self.dry_run_result = list(nodenames)
10021 def Exec(self, feedback_fn):
10022 """Create and add the instance to the cluster.
10025 instance = self.op.instance_name
10026 pnode_name = self.pnode.name
10028 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10029 self.owned_locks(locking.LEVEL_NODE)), \
10030 "Node locks differ from node resource locks"
10032 ht_kind = self.op.hypervisor
10033 if ht_kind in constants.HTS_REQ_PORT:
10034 network_port = self.cfg.AllocatePort()
10036 network_port = None
10038 # This is ugly but we got a chicken-egg problem here
10039 # We can only take the group disk parameters, as the instance
10040 # has no disks yet (we are generating them right here).
10041 node = self.cfg.GetNodeInfo(pnode_name)
10042 nodegroup = self.cfg.GetNodeGroup(node.group)
10043 disks = _GenerateDiskTemplate(self,
10044 self.op.disk_template,
10045 instance, pnode_name,
10048 self.instance_file_storage_dir,
10049 self.op.file_driver,
10052 self.cfg.GetGroupDiskParams(nodegroup))
10054 iobj = objects.Instance(name=instance, os=self.op.os_type,
10055 primary_node=pnode_name,
10056 nics=self.nics, disks=disks,
10057 disk_template=self.op.disk_template,
10058 admin_state=constants.ADMINST_DOWN,
10059 network_port=network_port,
10060 beparams=self.op.beparams,
10061 hvparams=self.op.hvparams,
10062 hypervisor=self.op.hypervisor,
10063 osparams=self.op.osparams,
10067 for tag in self.op.tags:
10070 if self.adopt_disks:
10071 if self.op.disk_template == constants.DT_PLAIN:
10072 # rename LVs to the newly-generated names; we need to construct
10073 # 'fake' LV disks with the old data, plus the new unique_id
10074 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10076 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10077 rename_to.append(t_dsk.logical_id)
10078 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10079 self.cfg.SetDiskID(t_dsk, pnode_name)
10080 result = self.rpc.call_blockdev_rename(pnode_name,
10081 zip(tmp_disks, rename_to))
10082 result.Raise("Failed to rename adoped LVs")
10084 feedback_fn("* creating instance disks...")
10086 _CreateDisks(self, iobj)
10087 except errors.OpExecError:
10088 self.LogWarning("Device creation failed, reverting...")
10090 _RemoveDisks(self, iobj)
10092 self.cfg.ReleaseDRBDMinors(instance)
10095 feedback_fn("adding instance %s to cluster config" % instance)
10097 self.cfg.AddInstance(iobj, self.proc.GetECId())
10099 # Declare that we don't want to remove the instance lock anymore, as we've
10100 # added the instance to the config
10101 del self.remove_locks[locking.LEVEL_INSTANCE]
10103 if self.op.mode == constants.INSTANCE_IMPORT:
10104 # Release unused nodes
10105 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10107 # Release all nodes
10108 _ReleaseLocks(self, locking.LEVEL_NODE)
10111 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10112 feedback_fn("* wiping instance disks...")
10114 _WipeDisks(self, iobj)
10115 except errors.OpExecError, err:
10116 logging.exception("Wiping disks failed")
10117 self.LogWarning("Wiping instance disks failed (%s)", err)
10121 # Something is already wrong with the disks, don't do anything else
10123 elif self.op.wait_for_sync:
10124 disk_abort = not _WaitForSync(self, iobj)
10125 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10126 # make sure the disks are not degraded (still sync-ing is ok)
10127 feedback_fn("* checking mirrors status")
10128 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10133 _RemoveDisks(self, iobj)
10134 self.cfg.RemoveInstance(iobj.name)
10135 # Make sure the instance lock gets removed
10136 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10137 raise errors.OpExecError("There are some degraded disks for"
10140 # Release all node resource locks
10141 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10143 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10144 if self.op.mode == constants.INSTANCE_CREATE:
10145 if not self.op.no_install:
10146 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10147 not self.op.wait_for_sync)
10149 feedback_fn("* pausing disk sync to install instance OS")
10150 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10153 for idx, success in enumerate(result.payload):
10155 logging.warn("pause-sync of instance %s for disk %d failed",
10158 feedback_fn("* running the instance OS create scripts...")
10159 # FIXME: pass debug option from opcode to backend
10161 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10162 self.op.debug_level)
10164 feedback_fn("* resuming disk sync")
10165 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10168 for idx, success in enumerate(result.payload):
10170 logging.warn("resume-sync of instance %s for disk %d failed",
10173 os_add_result.Raise("Could not add os for instance %s"
10174 " on node %s" % (instance, pnode_name))
10177 if self.op.mode == constants.INSTANCE_IMPORT:
10178 feedback_fn("* running the instance OS import scripts...")
10182 for idx, image in enumerate(self.src_images):
10186 # FIXME: pass debug option from opcode to backend
10187 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10188 constants.IEIO_FILE, (image, ),
10189 constants.IEIO_SCRIPT,
10190 (iobj.disks[idx], idx),
10192 transfers.append(dt)
10195 masterd.instance.TransferInstanceData(self, feedback_fn,
10196 self.op.src_node, pnode_name,
10197 self.pnode.secondary_ip,
10199 if not compat.all(import_result):
10200 self.LogWarning("Some disks for instance %s on node %s were not"
10201 " imported successfully" % (instance, pnode_name))
10203 rename_from = self._old_instance_name
10205 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10206 feedback_fn("* preparing remote import...")
10207 # The source cluster will stop the instance before attempting to make
10208 # a connection. In some cases stopping an instance can take a long
10209 # time, hence the shutdown timeout is added to the connection
10211 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10212 self.op.source_shutdown_timeout)
10213 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10215 assert iobj.primary_node == self.pnode.name
10217 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10218 self.source_x509_ca,
10219 self._cds, timeouts)
10220 if not compat.all(disk_results):
10221 # TODO: Should the instance still be started, even if some disks
10222 # failed to import (valid for local imports, too)?
10223 self.LogWarning("Some disks for instance %s on node %s were not"
10224 " imported successfully" % (instance, pnode_name))
10226 rename_from = self.source_instance_name
10229 # also checked in the prereq part
10230 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10233 # Run rename script on newly imported instance
10234 assert iobj.name == instance
10235 feedback_fn("Running rename script for %s" % instance)
10236 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10238 self.op.debug_level)
10239 if result.fail_msg:
10240 self.LogWarning("Failed to run rename script for %s on node"
10241 " %s: %s" % (instance, pnode_name, result.fail_msg))
10243 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10246 iobj.admin_state = constants.ADMINST_UP
10247 self.cfg.Update(iobj, feedback_fn)
10248 logging.info("Starting instance %s on node %s", instance, pnode_name)
10249 feedback_fn("* starting instance...")
10250 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10252 result.Raise("Could not start instance")
10254 return list(iobj.all_nodes)
10257 def _CheckRADOSFreeSpace():
10258 """Compute disk size requirements inside the RADOS cluster.
10261 # For the RADOS cluster we assume there is always enough space.
10265 class LUInstanceConsole(NoHooksLU):
10266 """Connect to an instance's console.
10268 This is somewhat special in that it returns the command line that
10269 you need to run on the master node in order to connect to the
10275 def ExpandNames(self):
10276 self.share_locks = _ShareAll()
10277 self._ExpandAndLockInstance()
10279 def CheckPrereq(self):
10280 """Check prerequisites.
10282 This checks that the instance is in the cluster.
10285 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10286 assert self.instance is not None, \
10287 "Cannot retrieve locked instance %s" % self.op.instance_name
10288 _CheckNodeOnline(self, self.instance.primary_node)
10290 def Exec(self, feedback_fn):
10291 """Connect to the console of an instance
10294 instance = self.instance
10295 node = instance.primary_node
10297 node_insts = self.rpc.call_instance_list([node],
10298 [instance.hypervisor])[node]
10299 node_insts.Raise("Can't get node information from %s" % node)
10301 if instance.name not in node_insts.payload:
10302 if instance.admin_state == constants.ADMINST_UP:
10303 state = constants.INSTST_ERRORDOWN
10304 elif instance.admin_state == constants.ADMINST_DOWN:
10305 state = constants.INSTST_ADMINDOWN
10307 state = constants.INSTST_ADMINOFFLINE
10308 raise errors.OpExecError("Instance %s is not running (state %s)" %
10309 (instance.name, state))
10311 logging.debug("Connecting to console of %s on %s", instance.name, node)
10313 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10316 def _GetInstanceConsole(cluster, instance):
10317 """Returns console information for an instance.
10319 @type cluster: L{objects.Cluster}
10320 @type instance: L{objects.Instance}
10324 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10325 # beparams and hvparams are passed separately, to avoid editing the
10326 # instance and then saving the defaults in the instance itself.
10327 hvparams = cluster.FillHV(instance)
10328 beparams = cluster.FillBE(instance)
10329 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10331 assert console.instance == instance.name
10332 assert console.Validate()
10334 return console.ToDict()
10337 class LUInstanceReplaceDisks(LogicalUnit):
10338 """Replace the disks of an instance.
10341 HPATH = "mirrors-replace"
10342 HTYPE = constants.HTYPE_INSTANCE
10345 def CheckArguments(self):
10346 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10347 self.op.iallocator)
10349 def ExpandNames(self):
10350 self._ExpandAndLockInstance()
10352 assert locking.LEVEL_NODE not in self.needed_locks
10353 assert locking.LEVEL_NODE_RES not in self.needed_locks
10354 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10356 assert self.op.iallocator is None or self.op.remote_node is None, \
10357 "Conflicting options"
10359 if self.op.remote_node is not None:
10360 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10362 # Warning: do not remove the locking of the new secondary here
10363 # unless DRBD8.AddChildren is changed to work in parallel;
10364 # currently it doesn't since parallel invocations of
10365 # FindUnusedMinor will conflict
10366 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10367 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10369 self.needed_locks[locking.LEVEL_NODE] = []
10370 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10372 if self.op.iallocator is not None:
10373 # iallocator will select a new node in the same group
10374 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10376 self.needed_locks[locking.LEVEL_NODE_RES] = []
10378 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10379 self.op.iallocator, self.op.remote_node,
10380 self.op.disks, False, self.op.early_release,
10381 self.op.ignore_ipolicy)
10383 self.tasklets = [self.replacer]
10385 def DeclareLocks(self, level):
10386 if level == locking.LEVEL_NODEGROUP:
10387 assert self.op.remote_node is None
10388 assert self.op.iallocator is not None
10389 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10391 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10392 # Lock all groups used by instance optimistically; this requires going
10393 # via the node before it's locked, requiring verification later on
10394 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10395 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10397 elif level == locking.LEVEL_NODE:
10398 if self.op.iallocator is not None:
10399 assert self.op.remote_node is None
10400 assert not self.needed_locks[locking.LEVEL_NODE]
10402 # Lock member nodes of all locked groups
10403 self.needed_locks[locking.LEVEL_NODE] = [node_name
10404 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10405 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10407 self._LockInstancesNodes()
10408 elif level == locking.LEVEL_NODE_RES:
10410 self.needed_locks[locking.LEVEL_NODE_RES] = \
10411 self.needed_locks[locking.LEVEL_NODE]
10413 def BuildHooksEnv(self):
10414 """Build hooks env.
10416 This runs on the master, the primary and all the secondaries.
10419 instance = self.replacer.instance
10421 "MODE": self.op.mode,
10422 "NEW_SECONDARY": self.op.remote_node,
10423 "OLD_SECONDARY": instance.secondary_nodes[0],
10425 env.update(_BuildInstanceHookEnvByObject(self, instance))
10428 def BuildHooksNodes(self):
10429 """Build hooks nodes.
10432 instance = self.replacer.instance
10434 self.cfg.GetMasterNode(),
10435 instance.primary_node,
10437 if self.op.remote_node is not None:
10438 nl.append(self.op.remote_node)
10441 def CheckPrereq(self):
10442 """Check prerequisites.
10445 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10446 self.op.iallocator is None)
10448 # Verify if node group locks are still correct
10449 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10451 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10453 return LogicalUnit.CheckPrereq(self)
10456 class TLReplaceDisks(Tasklet):
10457 """Replaces disks for an instance.
10459 Note: Locking is not within the scope of this class.
10462 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10463 disks, delay_iallocator, early_release, ignore_ipolicy):
10464 """Initializes this class.
10467 Tasklet.__init__(self, lu)
10470 self.instance_name = instance_name
10472 self.iallocator_name = iallocator_name
10473 self.remote_node = remote_node
10475 self.delay_iallocator = delay_iallocator
10476 self.early_release = early_release
10477 self.ignore_ipolicy = ignore_ipolicy
10480 self.instance = None
10481 self.new_node = None
10482 self.target_node = None
10483 self.other_node = None
10484 self.remote_node_info = None
10485 self.node_secondary_ip = None
10488 def CheckArguments(mode, remote_node, iallocator):
10489 """Helper function for users of this class.
10492 # check for valid parameter combination
10493 if mode == constants.REPLACE_DISK_CHG:
10494 if remote_node is None and iallocator is None:
10495 raise errors.OpPrereqError("When changing the secondary either an"
10496 " iallocator script must be used or the"
10497 " new node given", errors.ECODE_INVAL)
10499 if remote_node is not None and iallocator is not None:
10500 raise errors.OpPrereqError("Give either the iallocator or the new"
10501 " secondary, not both", errors.ECODE_INVAL)
10503 elif remote_node is not None or iallocator is not None:
10504 # Not replacing the secondary
10505 raise errors.OpPrereqError("The iallocator and new node options can"
10506 " only be used when changing the"
10507 " secondary node", errors.ECODE_INVAL)
10510 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10511 """Compute a new secondary node using an IAllocator.
10514 ial = IAllocator(lu.cfg, lu.rpc,
10515 mode=constants.IALLOCATOR_MODE_RELOC,
10516 name=instance_name,
10517 relocate_from=list(relocate_from))
10519 ial.Run(iallocator_name)
10521 if not ial.success:
10522 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10523 " %s" % (iallocator_name, ial.info),
10524 errors.ECODE_NORES)
10526 if len(ial.result) != ial.required_nodes:
10527 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10528 " of nodes (%s), required %s" %
10530 len(ial.result), ial.required_nodes),
10531 errors.ECODE_FAULT)
10533 remote_node_name = ial.result[0]
10535 lu.LogInfo("Selected new secondary for instance '%s': %s",
10536 instance_name, remote_node_name)
10538 return remote_node_name
10540 def _FindFaultyDisks(self, node_name):
10541 """Wrapper for L{_FindFaultyInstanceDisks}.
10544 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10547 def _CheckDisksActivated(self, instance):
10548 """Checks if the instance disks are activated.
10550 @param instance: The instance to check disks
10551 @return: True if they are activated, False otherwise
10554 nodes = instance.all_nodes
10556 for idx, dev in enumerate(instance.disks):
10558 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10559 self.cfg.SetDiskID(dev, node)
10561 result = _BlockdevFind(self, node, dev, instance)
10565 elif result.fail_msg or not result.payload:
10570 def CheckPrereq(self):
10571 """Check prerequisites.
10573 This checks that the instance is in the cluster.
10576 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10577 assert instance is not None, \
10578 "Cannot retrieve locked instance %s" % self.instance_name
10580 if instance.disk_template != constants.DT_DRBD8:
10581 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10582 " instances", errors.ECODE_INVAL)
10584 if len(instance.secondary_nodes) != 1:
10585 raise errors.OpPrereqError("The instance has a strange layout,"
10586 " expected one secondary but found %d" %
10587 len(instance.secondary_nodes),
10588 errors.ECODE_FAULT)
10590 if not self.delay_iallocator:
10591 self._CheckPrereq2()
10593 def _CheckPrereq2(self):
10594 """Check prerequisites, second part.
10596 This function should always be part of CheckPrereq. It was separated and is
10597 now called from Exec because during node evacuation iallocator was only
10598 called with an unmodified cluster model, not taking planned changes into
10602 instance = self.instance
10603 secondary_node = instance.secondary_nodes[0]
10605 if self.iallocator_name is None:
10606 remote_node = self.remote_node
10608 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10609 instance.name, instance.secondary_nodes)
10611 if remote_node is None:
10612 self.remote_node_info = None
10614 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10615 "Remote node '%s' is not locked" % remote_node
10617 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10618 assert self.remote_node_info is not None, \
10619 "Cannot retrieve locked node %s" % remote_node
10621 if remote_node == self.instance.primary_node:
10622 raise errors.OpPrereqError("The specified node is the primary node of"
10623 " the instance", errors.ECODE_INVAL)
10625 if remote_node == secondary_node:
10626 raise errors.OpPrereqError("The specified node is already the"
10627 " secondary node of the instance",
10628 errors.ECODE_INVAL)
10630 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10631 constants.REPLACE_DISK_CHG):
10632 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10633 errors.ECODE_INVAL)
10635 if self.mode == constants.REPLACE_DISK_AUTO:
10636 if not self._CheckDisksActivated(instance):
10637 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10638 " first" % self.instance_name,
10639 errors.ECODE_STATE)
10640 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10641 faulty_secondary = self._FindFaultyDisks(secondary_node)
10643 if faulty_primary and faulty_secondary:
10644 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10645 " one node and can not be repaired"
10646 " automatically" % self.instance_name,
10647 errors.ECODE_STATE)
10650 self.disks = faulty_primary
10651 self.target_node = instance.primary_node
10652 self.other_node = secondary_node
10653 check_nodes = [self.target_node, self.other_node]
10654 elif faulty_secondary:
10655 self.disks = faulty_secondary
10656 self.target_node = secondary_node
10657 self.other_node = instance.primary_node
10658 check_nodes = [self.target_node, self.other_node]
10664 # Non-automatic modes
10665 if self.mode == constants.REPLACE_DISK_PRI:
10666 self.target_node = instance.primary_node
10667 self.other_node = secondary_node
10668 check_nodes = [self.target_node, self.other_node]
10670 elif self.mode == constants.REPLACE_DISK_SEC:
10671 self.target_node = secondary_node
10672 self.other_node = instance.primary_node
10673 check_nodes = [self.target_node, self.other_node]
10675 elif self.mode == constants.REPLACE_DISK_CHG:
10676 self.new_node = remote_node
10677 self.other_node = instance.primary_node
10678 self.target_node = secondary_node
10679 check_nodes = [self.new_node, self.other_node]
10681 _CheckNodeNotDrained(self.lu, remote_node)
10682 _CheckNodeVmCapable(self.lu, remote_node)
10684 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10685 assert old_node_info is not None
10686 if old_node_info.offline and not self.early_release:
10687 # doesn't make sense to delay the release
10688 self.early_release = True
10689 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10690 " early-release mode", secondary_node)
10693 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10696 # If not specified all disks should be replaced
10698 self.disks = range(len(self.instance.disks))
10700 # TODO: This is ugly, but right now we can't distinguish between internal
10701 # submitted opcode and external one. We should fix that.
10702 if self.remote_node_info:
10703 # We change the node, lets verify it still meets instance policy
10704 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10705 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10707 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10708 ignore=self.ignore_ipolicy)
10710 for node in check_nodes:
10711 _CheckNodeOnline(self.lu, node)
10713 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10716 if node_name is not None)
10718 # Release unneeded node and node resource locks
10719 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10720 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10722 # Release any owned node group
10723 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10724 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10726 # Check whether disks are valid
10727 for disk_idx in self.disks:
10728 instance.FindDisk(disk_idx)
10730 # Get secondary node IP addresses
10731 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10732 in self.cfg.GetMultiNodeInfo(touched_nodes))
10734 def Exec(self, feedback_fn):
10735 """Execute disk replacement.
10737 This dispatches the disk replacement to the appropriate handler.
10740 if self.delay_iallocator:
10741 self._CheckPrereq2()
10744 # Verify owned locks before starting operation
10745 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10746 assert set(owned_nodes) == set(self.node_secondary_ip), \
10747 ("Incorrect node locks, owning %s, expected %s" %
10748 (owned_nodes, self.node_secondary_ip.keys()))
10749 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10750 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10752 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10753 assert list(owned_instances) == [self.instance_name], \
10754 "Instance '%s' not locked" % self.instance_name
10756 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10757 "Should not own any node group lock at this point"
10760 feedback_fn("No disks need replacement")
10763 feedback_fn("Replacing disk(s) %s for %s" %
10764 (utils.CommaJoin(self.disks), self.instance.name))
10766 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10768 # Activate the instance disks if we're replacing them on a down instance
10770 _StartInstanceDisks(self.lu, self.instance, True)
10773 # Should we replace the secondary node?
10774 if self.new_node is not None:
10775 fn = self._ExecDrbd8Secondary
10777 fn = self._ExecDrbd8DiskOnly
10779 result = fn(feedback_fn)
10781 # Deactivate the instance disks if we're replacing them on a
10784 _SafeShutdownInstanceDisks(self.lu, self.instance)
10786 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10789 # Verify owned locks
10790 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10791 nodes = frozenset(self.node_secondary_ip)
10792 assert ((self.early_release and not owned_nodes) or
10793 (not self.early_release and not (set(owned_nodes) - nodes))), \
10794 ("Not owning the correct locks, early_release=%s, owned=%r,"
10795 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10799 def _CheckVolumeGroup(self, nodes):
10800 self.lu.LogInfo("Checking volume groups")
10802 vgname = self.cfg.GetVGName()
10804 # Make sure volume group exists on all involved nodes
10805 results = self.rpc.call_vg_list(nodes)
10807 raise errors.OpExecError("Can't list volume groups on the nodes")
10810 res = results[node]
10811 res.Raise("Error checking node %s" % node)
10812 if vgname not in res.payload:
10813 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10816 def _CheckDisksExistence(self, nodes):
10817 # Check disk existence
10818 for idx, dev in enumerate(self.instance.disks):
10819 if idx not in self.disks:
10823 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10824 self.cfg.SetDiskID(dev, node)
10826 result = _BlockdevFind(self, node, dev, self.instance)
10828 msg = result.fail_msg
10829 if msg or not result.payload:
10831 msg = "disk not found"
10832 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10835 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10836 for idx, dev in enumerate(self.instance.disks):
10837 if idx not in self.disks:
10840 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10843 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10844 on_primary, ldisk=ldisk):
10845 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10846 " replace disks for instance %s" %
10847 (node_name, self.instance.name))
10849 def _CreateNewStorage(self, node_name):
10850 """Create new storage on the primary or secondary node.
10852 This is only used for same-node replaces, not for changing the
10853 secondary node, hence we don't want to modify the existing disk.
10858 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10859 for idx, dev in enumerate(disks):
10860 if idx not in self.disks:
10863 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10865 self.cfg.SetDiskID(dev, node_name)
10867 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10868 names = _GenerateUniqueNames(self.lu, lv_names)
10870 (data_disk, meta_disk) = dev.children
10871 vg_data = data_disk.logical_id[0]
10872 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10873 logical_id=(vg_data, names[0]),
10874 params=data_disk.params)
10875 vg_meta = meta_disk.logical_id[0]
10876 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10877 logical_id=(vg_meta, names[1]),
10878 params=meta_disk.params)
10880 new_lvs = [lv_data, lv_meta]
10881 old_lvs = [child.Copy() for child in dev.children]
10882 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10884 # we pass force_create=True to force the LVM creation
10885 for new_lv in new_lvs:
10886 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10887 _GetInstanceInfoText(self.instance), False)
10891 def _CheckDevices(self, node_name, iv_names):
10892 for name, (dev, _, _) in iv_names.iteritems():
10893 self.cfg.SetDiskID(dev, node_name)
10895 result = _BlockdevFind(self, node_name, dev, self.instance)
10897 msg = result.fail_msg
10898 if msg or not result.payload:
10900 msg = "disk not found"
10901 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10904 if result.payload.is_degraded:
10905 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10907 def _RemoveOldStorage(self, node_name, iv_names):
10908 for name, (_, old_lvs, _) in iv_names.iteritems():
10909 self.lu.LogInfo("Remove logical volumes for %s" % name)
10912 self.cfg.SetDiskID(lv, node_name)
10914 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10916 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10917 hint="remove unused LVs manually")
10919 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10920 """Replace a disk on the primary or secondary for DRBD 8.
10922 The algorithm for replace is quite complicated:
10924 1. for each disk to be replaced:
10926 1. create new LVs on the target node with unique names
10927 1. detach old LVs from the drbd device
10928 1. rename old LVs to name_replaced.<time_t>
10929 1. rename new LVs to old LVs
10930 1. attach the new LVs (with the old names now) to the drbd device
10932 1. wait for sync across all devices
10934 1. for each modified disk:
10936 1. remove old LVs (which have the name name_replaces.<time_t>)
10938 Failures are not very well handled.
10943 # Step: check device activation
10944 self.lu.LogStep(1, steps_total, "Check device existence")
10945 self._CheckDisksExistence([self.other_node, self.target_node])
10946 self._CheckVolumeGroup([self.target_node, self.other_node])
10948 # Step: check other node consistency
10949 self.lu.LogStep(2, steps_total, "Check peer consistency")
10950 self._CheckDisksConsistency(self.other_node,
10951 self.other_node == self.instance.primary_node,
10954 # Step: create new storage
10955 self.lu.LogStep(3, steps_total, "Allocate new storage")
10956 iv_names = self._CreateNewStorage(self.target_node)
10958 # Step: for each lv, detach+rename*2+attach
10959 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10960 for dev, old_lvs, new_lvs in iv_names.itervalues():
10961 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10963 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10965 result.Raise("Can't detach drbd from local storage on node"
10966 " %s for device %s" % (self.target_node, dev.iv_name))
10968 #cfg.Update(instance)
10970 # ok, we created the new LVs, so now we know we have the needed
10971 # storage; as such, we proceed on the target node to rename
10972 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10973 # using the assumption that logical_id == physical_id (which in
10974 # turn is the unique_id on that node)
10976 # FIXME(iustin): use a better name for the replaced LVs
10977 temp_suffix = int(time.time())
10978 ren_fn = lambda d, suff: (d.physical_id[0],
10979 d.physical_id[1] + "_replaced-%s" % suff)
10981 # Build the rename list based on what LVs exist on the node
10982 rename_old_to_new = []
10983 for to_ren in old_lvs:
10984 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10985 if not result.fail_msg and result.payload:
10987 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10989 self.lu.LogInfo("Renaming the old LVs on the target node")
10990 result = self.rpc.call_blockdev_rename(self.target_node,
10992 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10994 # Now we rename the new LVs to the old LVs
10995 self.lu.LogInfo("Renaming the new LVs on the target node")
10996 rename_new_to_old = [(new, old.physical_id)
10997 for old, new in zip(old_lvs, new_lvs)]
10998 result = self.rpc.call_blockdev_rename(self.target_node,
11000 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11002 # Intermediate steps of in memory modifications
11003 for old, new in zip(old_lvs, new_lvs):
11004 new.logical_id = old.logical_id
11005 self.cfg.SetDiskID(new, self.target_node)
11007 # We need to modify old_lvs so that removal later removes the
11008 # right LVs, not the newly added ones; note that old_lvs is a
11010 for disk in old_lvs:
11011 disk.logical_id = ren_fn(disk, temp_suffix)
11012 self.cfg.SetDiskID(disk, self.target_node)
11014 # Now that the new lvs have the old name, we can add them to the device
11015 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11016 result = self.rpc.call_blockdev_addchildren(self.target_node,
11017 (dev, self.instance), new_lvs)
11018 msg = result.fail_msg
11020 for new_lv in new_lvs:
11021 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11024 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11025 hint=("cleanup manually the unused logical"
11027 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11029 cstep = itertools.count(5)
11031 if self.early_release:
11032 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11033 self._RemoveOldStorage(self.target_node, iv_names)
11034 # TODO: Check if releasing locks early still makes sense
11035 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11037 # Release all resource locks except those used by the instance
11038 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11039 keep=self.node_secondary_ip.keys())
11041 # Release all node locks while waiting for sync
11042 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11044 # TODO: Can the instance lock be downgraded here? Take the optional disk
11045 # shutdown in the caller into consideration.
11048 # This can fail as the old devices are degraded and _WaitForSync
11049 # does a combined result over all disks, so we don't check its return value
11050 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11051 _WaitForSync(self.lu, self.instance)
11053 # Check all devices manually
11054 self._CheckDevices(self.instance.primary_node, iv_names)
11056 # Step: remove old storage
11057 if not self.early_release:
11058 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11059 self._RemoveOldStorage(self.target_node, iv_names)
11061 def _ExecDrbd8Secondary(self, feedback_fn):
11062 """Replace the secondary node for DRBD 8.
11064 The algorithm for replace is quite complicated:
11065 - for all disks of the instance:
11066 - create new LVs on the new node with same names
11067 - shutdown the drbd device on the old secondary
11068 - disconnect the drbd network on the primary
11069 - create the drbd device on the new secondary
11070 - network attach the drbd on the primary, using an artifice:
11071 the drbd code for Attach() will connect to the network if it
11072 finds a device which is connected to the good local disks but
11073 not network enabled
11074 - wait for sync across all devices
11075 - remove all disks from the old secondary
11077 Failures are not very well handled.
11082 pnode = self.instance.primary_node
11084 # Step: check device activation
11085 self.lu.LogStep(1, steps_total, "Check device existence")
11086 self._CheckDisksExistence([self.instance.primary_node])
11087 self._CheckVolumeGroup([self.instance.primary_node])
11089 # Step: check other node consistency
11090 self.lu.LogStep(2, steps_total, "Check peer consistency")
11091 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11093 # Step: create new storage
11094 self.lu.LogStep(3, steps_total, "Allocate new storage")
11095 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11096 for idx, dev in enumerate(disks):
11097 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11098 (self.new_node, idx))
11099 # we pass force_create=True to force LVM creation
11100 for new_lv in dev.children:
11101 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11102 True, _GetInstanceInfoText(self.instance), False)
11104 # Step 4: dbrd minors and drbd setups changes
11105 # after this, we must manually remove the drbd minors on both the
11106 # error and the success paths
11107 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11108 minors = self.cfg.AllocateDRBDMinor([self.new_node
11109 for dev in self.instance.disks],
11110 self.instance.name)
11111 logging.debug("Allocated minors %r", minors)
11114 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11115 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11116 (self.new_node, idx))
11117 # create new devices on new_node; note that we create two IDs:
11118 # one without port, so the drbd will be activated without
11119 # networking information on the new node at this stage, and one
11120 # with network, for the latter activation in step 4
11121 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11122 if self.instance.primary_node == o_node1:
11125 assert self.instance.primary_node == o_node2, "Three-node instance?"
11128 new_alone_id = (self.instance.primary_node, self.new_node, None,
11129 p_minor, new_minor, o_secret)
11130 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11131 p_minor, new_minor, o_secret)
11133 iv_names[idx] = (dev, dev.children, new_net_id)
11134 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11136 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11137 logical_id=new_alone_id,
11138 children=dev.children,
11141 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11144 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11146 _GetInstanceInfoText(self.instance), False)
11147 except errors.GenericError:
11148 self.cfg.ReleaseDRBDMinors(self.instance.name)
11151 # We have new devices, shutdown the drbd on the old secondary
11152 for idx, dev in enumerate(self.instance.disks):
11153 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11154 self.cfg.SetDiskID(dev, self.target_node)
11155 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11157 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11158 "node: %s" % (idx, msg),
11159 hint=("Please cleanup this device manually as"
11160 " soon as possible"))
11162 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11163 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11164 self.instance.disks)[pnode]
11166 msg = result.fail_msg
11168 # detaches didn't succeed (unlikely)
11169 self.cfg.ReleaseDRBDMinors(self.instance.name)
11170 raise errors.OpExecError("Can't detach the disks from the network on"
11171 " old node: %s" % (msg,))
11173 # if we managed to detach at least one, we update all the disks of
11174 # the instance to point to the new secondary
11175 self.lu.LogInfo("Updating instance configuration")
11176 for dev, _, new_logical_id in iv_names.itervalues():
11177 dev.logical_id = new_logical_id
11178 self.cfg.SetDiskID(dev, self.instance.primary_node)
11180 self.cfg.Update(self.instance, feedback_fn)
11182 # Release all node locks (the configuration has been updated)
11183 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11185 # and now perform the drbd attach
11186 self.lu.LogInfo("Attaching primary drbds to new secondary"
11187 " (standalone => connected)")
11188 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11190 self.node_secondary_ip,
11191 (self.instance.disks, self.instance),
11192 self.instance.name,
11194 for to_node, to_result in result.items():
11195 msg = to_result.fail_msg
11197 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11199 hint=("please do a gnt-instance info to see the"
11200 " status of disks"))
11202 cstep = itertools.count(5)
11204 if self.early_release:
11205 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11206 self._RemoveOldStorage(self.target_node, iv_names)
11207 # TODO: Check if releasing locks early still makes sense
11208 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11210 # Release all resource locks except those used by the instance
11211 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11212 keep=self.node_secondary_ip.keys())
11214 # TODO: Can the instance lock be downgraded here? Take the optional disk
11215 # shutdown in the caller into consideration.
11218 # This can fail as the old devices are degraded and _WaitForSync
11219 # does a combined result over all disks, so we don't check its return value
11220 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11221 _WaitForSync(self.lu, self.instance)
11223 # Check all devices manually
11224 self._CheckDevices(self.instance.primary_node, iv_names)
11226 # Step: remove old storage
11227 if not self.early_release:
11228 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11229 self._RemoveOldStorage(self.target_node, iv_names)
11232 class LURepairNodeStorage(NoHooksLU):
11233 """Repairs the volume group on a node.
11238 def CheckArguments(self):
11239 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11241 storage_type = self.op.storage_type
11243 if (constants.SO_FIX_CONSISTENCY not in
11244 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11245 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11246 " repaired" % storage_type,
11247 errors.ECODE_INVAL)
11249 def ExpandNames(self):
11250 self.needed_locks = {
11251 locking.LEVEL_NODE: [self.op.node_name],
11254 def _CheckFaultyDisks(self, instance, node_name):
11255 """Ensure faulty disks abort the opcode or at least warn."""
11257 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11259 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11260 " node '%s'" % (instance.name, node_name),
11261 errors.ECODE_STATE)
11262 except errors.OpPrereqError, err:
11263 if self.op.ignore_consistency:
11264 self.proc.LogWarning(str(err.args[0]))
11268 def CheckPrereq(self):
11269 """Check prerequisites.
11272 # Check whether any instance on this node has faulty disks
11273 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11274 if inst.admin_state != constants.ADMINST_UP:
11276 check_nodes = set(inst.all_nodes)
11277 check_nodes.discard(self.op.node_name)
11278 for inst_node_name in check_nodes:
11279 self._CheckFaultyDisks(inst, inst_node_name)
11281 def Exec(self, feedback_fn):
11282 feedback_fn("Repairing storage unit '%s' on %s ..." %
11283 (self.op.name, self.op.node_name))
11285 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11286 result = self.rpc.call_storage_execute(self.op.node_name,
11287 self.op.storage_type, st_args,
11289 constants.SO_FIX_CONSISTENCY)
11290 result.Raise("Failed to repair storage unit '%s' on %s" %
11291 (self.op.name, self.op.node_name))
11294 class LUNodeEvacuate(NoHooksLU):
11295 """Evacuates instances off a list of nodes.
11300 _MODE2IALLOCATOR = {
11301 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11302 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11303 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11305 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11306 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11307 constants.IALLOCATOR_NEVAC_MODES)
11309 def CheckArguments(self):
11310 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11312 def ExpandNames(self):
11313 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11315 if self.op.remote_node is not None:
11316 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11317 assert self.op.remote_node
11319 if self.op.remote_node == self.op.node_name:
11320 raise errors.OpPrereqError("Can not use evacuated node as a new"
11321 " secondary node", errors.ECODE_INVAL)
11323 if self.op.mode != constants.NODE_EVAC_SEC:
11324 raise errors.OpPrereqError("Without the use of an iallocator only"
11325 " secondary instances can be evacuated",
11326 errors.ECODE_INVAL)
11329 self.share_locks = _ShareAll()
11330 self.needed_locks = {
11331 locking.LEVEL_INSTANCE: [],
11332 locking.LEVEL_NODEGROUP: [],
11333 locking.LEVEL_NODE: [],
11336 # Determine nodes (via group) optimistically, needs verification once locks
11337 # have been acquired
11338 self.lock_nodes = self._DetermineNodes()
11340 def _DetermineNodes(self):
11341 """Gets the list of nodes to operate on.
11344 if self.op.remote_node is None:
11345 # Iallocator will choose any node(s) in the same group
11346 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11348 group_nodes = frozenset([self.op.remote_node])
11350 # Determine nodes to be locked
11351 return set([self.op.node_name]) | group_nodes
11353 def _DetermineInstances(self):
11354 """Builds list of instances to operate on.
11357 assert self.op.mode in constants.NODE_EVAC_MODES
11359 if self.op.mode == constants.NODE_EVAC_PRI:
11360 # Primary instances only
11361 inst_fn = _GetNodePrimaryInstances
11362 assert self.op.remote_node is None, \
11363 "Evacuating primary instances requires iallocator"
11364 elif self.op.mode == constants.NODE_EVAC_SEC:
11365 # Secondary instances only
11366 inst_fn = _GetNodeSecondaryInstances
11369 assert self.op.mode == constants.NODE_EVAC_ALL
11370 inst_fn = _GetNodeInstances
11371 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11373 raise errors.OpPrereqError("Due to an issue with the iallocator"
11374 " interface it is not possible to evacuate"
11375 " all instances at once; specify explicitly"
11376 " whether to evacuate primary or secondary"
11378 errors.ECODE_INVAL)
11380 return inst_fn(self.cfg, self.op.node_name)
11382 def DeclareLocks(self, level):
11383 if level == locking.LEVEL_INSTANCE:
11384 # Lock instances optimistically, needs verification once node and group
11385 # locks have been acquired
11386 self.needed_locks[locking.LEVEL_INSTANCE] = \
11387 set(i.name for i in self._DetermineInstances())
11389 elif level == locking.LEVEL_NODEGROUP:
11390 # Lock node groups for all potential target nodes optimistically, needs
11391 # verification once nodes have been acquired
11392 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11393 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11395 elif level == locking.LEVEL_NODE:
11396 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11398 def CheckPrereq(self):
11400 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11401 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11402 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11404 need_nodes = self._DetermineNodes()
11406 if not owned_nodes.issuperset(need_nodes):
11407 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11408 " locks were acquired, current nodes are"
11409 " are '%s', used to be '%s'; retry the"
11411 (self.op.node_name,
11412 utils.CommaJoin(need_nodes),
11413 utils.CommaJoin(owned_nodes)),
11414 errors.ECODE_STATE)
11416 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11417 if owned_groups != wanted_groups:
11418 raise errors.OpExecError("Node groups changed since locks were acquired,"
11419 " current groups are '%s', used to be '%s';"
11420 " retry the operation" %
11421 (utils.CommaJoin(wanted_groups),
11422 utils.CommaJoin(owned_groups)))
11424 # Determine affected instances
11425 self.instances = self._DetermineInstances()
11426 self.instance_names = [i.name for i in self.instances]
11428 if set(self.instance_names) != owned_instances:
11429 raise errors.OpExecError("Instances on node '%s' changed since locks"
11430 " were acquired, current instances are '%s',"
11431 " used to be '%s'; retry the operation" %
11432 (self.op.node_name,
11433 utils.CommaJoin(self.instance_names),
11434 utils.CommaJoin(owned_instances)))
11436 if self.instance_names:
11437 self.LogInfo("Evacuating instances from node '%s': %s",
11439 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11441 self.LogInfo("No instances to evacuate from node '%s'",
11444 if self.op.remote_node is not None:
11445 for i in self.instances:
11446 if i.primary_node == self.op.remote_node:
11447 raise errors.OpPrereqError("Node %s is the primary node of"
11448 " instance %s, cannot use it as"
11450 (self.op.remote_node, i.name),
11451 errors.ECODE_INVAL)
11453 def Exec(self, feedback_fn):
11454 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11456 if not self.instance_names:
11457 # No instances to evacuate
11460 elif self.op.iallocator is not None:
11461 # TODO: Implement relocation to other group
11462 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11463 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11464 instances=list(self.instance_names))
11466 ial.Run(self.op.iallocator)
11468 if not ial.success:
11469 raise errors.OpPrereqError("Can't compute node evacuation using"
11470 " iallocator '%s': %s" %
11471 (self.op.iallocator, ial.info),
11472 errors.ECODE_NORES)
11474 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11476 elif self.op.remote_node is not None:
11477 assert self.op.mode == constants.NODE_EVAC_SEC
11479 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11480 remote_node=self.op.remote_node,
11482 mode=constants.REPLACE_DISK_CHG,
11483 early_release=self.op.early_release)]
11484 for instance_name in self.instance_names
11488 raise errors.ProgrammerError("No iallocator or remote node")
11490 return ResultWithJobs(jobs)
11493 def _SetOpEarlyRelease(early_release, op):
11494 """Sets C{early_release} flag on opcodes if available.
11498 op.early_release = early_release
11499 except AttributeError:
11500 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11505 def _NodeEvacDest(use_nodes, group, nodes):
11506 """Returns group or nodes depending on caller's choice.
11510 return utils.CommaJoin(nodes)
11515 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11516 """Unpacks the result of change-group and node-evacuate iallocator requests.
11518 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11519 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11521 @type lu: L{LogicalUnit}
11522 @param lu: Logical unit instance
11523 @type alloc_result: tuple/list
11524 @param alloc_result: Result from iallocator
11525 @type early_release: bool
11526 @param early_release: Whether to release locks early if possible
11527 @type use_nodes: bool
11528 @param use_nodes: Whether to display node names instead of groups
11531 (moved, failed, jobs) = alloc_result
11534 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11535 for (name, reason) in failed)
11536 lu.LogWarning("Unable to evacuate instances %s", failreason)
11537 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11540 lu.LogInfo("Instances to be moved: %s",
11541 utils.CommaJoin("%s (to %s)" %
11542 (name, _NodeEvacDest(use_nodes, group, nodes))
11543 for (name, group, nodes) in moved))
11545 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11546 map(opcodes.OpCode.LoadOpCode, ops))
11550 class LUInstanceGrowDisk(LogicalUnit):
11551 """Grow a disk of an instance.
11554 HPATH = "disk-grow"
11555 HTYPE = constants.HTYPE_INSTANCE
11558 def ExpandNames(self):
11559 self._ExpandAndLockInstance()
11560 self.needed_locks[locking.LEVEL_NODE] = []
11561 self.needed_locks[locking.LEVEL_NODE_RES] = []
11562 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11563 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11565 def DeclareLocks(self, level):
11566 if level == locking.LEVEL_NODE:
11567 self._LockInstancesNodes()
11568 elif level == locking.LEVEL_NODE_RES:
11570 self.needed_locks[locking.LEVEL_NODE_RES] = \
11571 self.needed_locks[locking.LEVEL_NODE][:]
11573 def BuildHooksEnv(self):
11574 """Build hooks env.
11576 This runs on the master, the primary and all the secondaries.
11580 "DISK": self.op.disk,
11581 "AMOUNT": self.op.amount,
11582 "ABSOLUTE": self.op.absolute,
11584 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11587 def BuildHooksNodes(self):
11588 """Build hooks nodes.
11591 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11594 def CheckPrereq(self):
11595 """Check prerequisites.
11597 This checks that the instance is in the cluster.
11600 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11601 assert instance is not None, \
11602 "Cannot retrieve locked instance %s" % self.op.instance_name
11603 nodenames = list(instance.all_nodes)
11604 for node in nodenames:
11605 _CheckNodeOnline(self, node)
11607 self.instance = instance
11609 if instance.disk_template not in constants.DTS_GROWABLE:
11610 raise errors.OpPrereqError("Instance's disk layout does not support"
11611 " growing", errors.ECODE_INVAL)
11613 self.disk = instance.FindDisk(self.op.disk)
11615 if self.op.absolute:
11616 self.target = self.op.amount
11617 self.delta = self.target - self.disk.size
11619 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11620 "current disk size (%s)" %
11621 (utils.FormatUnit(self.target, "h"),
11622 utils.FormatUnit(self.disk.size, "h")),
11623 errors.ECODE_STATE)
11625 self.delta = self.op.amount
11626 self.target = self.disk.size + self.delta
11628 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11629 utils.FormatUnit(self.delta, "h"),
11630 errors.ECODE_INVAL)
11632 if instance.disk_template not in (constants.DT_FILE,
11633 constants.DT_SHARED_FILE,
11635 # TODO: check the free disk space for file, when that feature will be
11637 _CheckNodesFreeDiskPerVG(self, nodenames,
11638 self.disk.ComputeGrowth(self.delta))
11640 def Exec(self, feedback_fn):
11641 """Execute disk grow.
11644 instance = self.instance
11647 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11648 assert (self.owned_locks(locking.LEVEL_NODE) ==
11649 self.owned_locks(locking.LEVEL_NODE_RES))
11651 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11653 raise errors.OpExecError("Cannot activate block device to grow")
11655 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11656 (self.op.disk, instance.name,
11657 utils.FormatUnit(self.delta, "h"),
11658 utils.FormatUnit(self.target, "h")))
11660 # First run all grow ops in dry-run mode
11661 for node in instance.all_nodes:
11662 self.cfg.SetDiskID(disk, node)
11663 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11665 result.Raise("Grow request failed to node %s" % node)
11667 # We know that (as far as we can test) operations across different
11668 # nodes will succeed, time to run it for real
11669 for node in instance.all_nodes:
11670 self.cfg.SetDiskID(disk, node)
11671 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11673 result.Raise("Grow request failed to node %s" % node)
11675 # TODO: Rewrite code to work properly
11676 # DRBD goes into sync mode for a short amount of time after executing the
11677 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11678 # calling "resize" in sync mode fails. Sleeping for a short amount of
11679 # time is a work-around.
11682 disk.RecordGrow(self.delta)
11683 self.cfg.Update(instance, feedback_fn)
11685 # Changes have been recorded, release node lock
11686 _ReleaseLocks(self, locking.LEVEL_NODE)
11688 # Downgrade lock while waiting for sync
11689 self.glm.downgrade(locking.LEVEL_INSTANCE)
11691 if self.op.wait_for_sync:
11692 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11694 self.proc.LogWarning("Disk sync-ing has not returned a good"
11695 " status; please check the instance")
11696 if instance.admin_state != constants.ADMINST_UP:
11697 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11698 elif instance.admin_state != constants.ADMINST_UP:
11699 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11700 " not supposed to be running because no wait for"
11701 " sync mode was requested")
11703 assert self.owned_locks(locking.LEVEL_NODE_RES)
11704 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11707 class LUInstanceQueryData(NoHooksLU):
11708 """Query runtime instance data.
11713 def ExpandNames(self):
11714 self.needed_locks = {}
11716 # Use locking if requested or when non-static information is wanted
11717 if not (self.op.static or self.op.use_locking):
11718 self.LogWarning("Non-static data requested, locks need to be acquired")
11719 self.op.use_locking = True
11721 if self.op.instances or not self.op.use_locking:
11722 # Expand instance names right here
11723 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11725 # Will use acquired locks
11726 self.wanted_names = None
11728 if self.op.use_locking:
11729 self.share_locks = _ShareAll()
11731 if self.wanted_names is None:
11732 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11734 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11736 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11737 self.needed_locks[locking.LEVEL_NODE] = []
11738 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11740 def DeclareLocks(self, level):
11741 if self.op.use_locking:
11742 if level == locking.LEVEL_NODEGROUP:
11743 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11745 # Lock all groups used by instances optimistically; this requires going
11746 # via the node before it's locked, requiring verification later on
11747 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11748 frozenset(group_uuid
11749 for instance_name in owned_instances
11751 self.cfg.GetInstanceNodeGroups(instance_name))
11753 elif level == locking.LEVEL_NODE:
11754 self._LockInstancesNodes()
11756 def CheckPrereq(self):
11757 """Check prerequisites.
11759 This only checks the optional instance list against the existing names.
11762 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11763 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11764 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11766 if self.wanted_names is None:
11767 assert self.op.use_locking, "Locking was not used"
11768 self.wanted_names = owned_instances
11770 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11772 if self.op.use_locking:
11773 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11776 assert not (owned_instances or owned_groups or owned_nodes)
11778 self.wanted_instances = instances.values()
11780 def _ComputeBlockdevStatus(self, node, instance, dev):
11781 """Returns the status of a block device
11784 if self.op.static or not node:
11787 self.cfg.SetDiskID(dev, node)
11789 result = self.rpc.call_blockdev_find(node, dev)
11793 result.Raise("Can't compute disk status for %s" % instance.name)
11795 status = result.payload
11799 return (status.dev_path, status.major, status.minor,
11800 status.sync_percent, status.estimated_time,
11801 status.is_degraded, status.ldisk_status)
11803 def _ComputeDiskStatus(self, instance, snode, dev):
11804 """Compute block device status.
11807 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11809 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11811 def _ComputeDiskStatusInner(self, instance, snode, dev):
11812 """Compute block device status.
11814 @attention: The device has to be annotated already.
11817 if dev.dev_type in constants.LDS_DRBD:
11818 # we change the snode then (otherwise we use the one passed in)
11819 if dev.logical_id[0] == instance.primary_node:
11820 snode = dev.logical_id[1]
11822 snode = dev.logical_id[0]
11824 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11826 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11829 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11836 "iv_name": dev.iv_name,
11837 "dev_type": dev.dev_type,
11838 "logical_id": dev.logical_id,
11839 "physical_id": dev.physical_id,
11840 "pstatus": dev_pstatus,
11841 "sstatus": dev_sstatus,
11842 "children": dev_children,
11847 def Exec(self, feedback_fn):
11848 """Gather and return data"""
11851 cluster = self.cfg.GetClusterInfo()
11853 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11854 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11856 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11857 for node in nodes.values()))
11859 group2name_fn = lambda uuid: groups[uuid].name
11861 for instance in self.wanted_instances:
11862 pnode = nodes[instance.primary_node]
11864 if self.op.static or pnode.offline:
11865 remote_state = None
11867 self.LogWarning("Primary node %s is marked offline, returning static"
11868 " information only for instance %s" %
11869 (pnode.name, instance.name))
11871 remote_info = self.rpc.call_instance_info(instance.primary_node,
11873 instance.hypervisor)
11874 remote_info.Raise("Error checking node %s" % instance.primary_node)
11875 remote_info = remote_info.payload
11876 if remote_info and "state" in remote_info:
11877 remote_state = "up"
11879 if instance.admin_state == constants.ADMINST_UP:
11880 remote_state = "down"
11882 remote_state = instance.admin_state
11884 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11887 snodes_group_uuids = [nodes[snode_name].group
11888 for snode_name in instance.secondary_nodes]
11890 result[instance.name] = {
11891 "name": instance.name,
11892 "config_state": instance.admin_state,
11893 "run_state": remote_state,
11894 "pnode": instance.primary_node,
11895 "pnode_group_uuid": pnode.group,
11896 "pnode_group_name": group2name_fn(pnode.group),
11897 "snodes": instance.secondary_nodes,
11898 "snodes_group_uuids": snodes_group_uuids,
11899 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11901 # this happens to be the same format used for hooks
11902 "nics": _NICListToTuple(self, instance.nics),
11903 "disk_template": instance.disk_template,
11905 "hypervisor": instance.hypervisor,
11906 "network_port": instance.network_port,
11907 "hv_instance": instance.hvparams,
11908 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11909 "be_instance": instance.beparams,
11910 "be_actual": cluster.FillBE(instance),
11911 "os_instance": instance.osparams,
11912 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11913 "serial_no": instance.serial_no,
11914 "mtime": instance.mtime,
11915 "ctime": instance.ctime,
11916 "uuid": instance.uuid,
11922 def PrepareContainerMods(mods, private_fn):
11923 """Prepares a list of container modifications by adding a private data field.
11925 @type mods: list of tuples; (operation, index, parameters)
11926 @param mods: List of modifications
11927 @type private_fn: callable or None
11928 @param private_fn: Callable for constructing a private data field for a
11933 if private_fn is None:
11938 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11941 #: Type description for changes as returned by L{ApplyContainerMods}'s
11943 _TApplyContModsCbChanges = \
11944 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11945 ht.TNonEmptyString,
11950 def ApplyContainerMods(kind, container, chgdesc, mods,
11951 create_fn, modify_fn, remove_fn):
11952 """Applies descriptions in C{mods} to C{container}.
11955 @param kind: One-word item description
11956 @type container: list
11957 @param container: Container to modify
11958 @type chgdesc: None or list
11959 @param chgdesc: List of applied changes
11961 @param mods: Modifications as returned by L{PrepareContainerMods}
11962 @type create_fn: callable
11963 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11964 receives absolute item index, parameters and private data object as added
11965 by L{PrepareContainerMods}, returns tuple containing new item and changes
11967 @type modify_fn: callable
11968 @param modify_fn: Callback for modifying an existing item
11969 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11970 and private data object as added by L{PrepareContainerMods}, returns
11972 @type remove_fn: callable
11973 @param remove_fn: Callback on removing item; receives absolute item index,
11974 item and private data object as added by L{PrepareContainerMods}
11977 for (op, idx, params, private) in mods:
11980 absidx = len(container) - 1
11982 raise IndexError("Not accepting negative indices other than -1")
11983 elif idx > len(container):
11984 raise IndexError("Got %s index %s, but there are only %s" %
11985 (kind, idx, len(container)))
11991 if op == constants.DDM_ADD:
11992 # Calculate where item will be added
11994 addidx = len(container)
11998 if create_fn is None:
12001 (item, changes) = create_fn(addidx, params, private)
12004 container.append(item)
12007 assert idx <= len(container)
12008 # list.insert does so before the specified index
12009 container.insert(idx, item)
12011 # Retrieve existing item
12013 item = container[absidx]
12015 raise IndexError("Invalid %s index %s" % (kind, idx))
12017 if op == constants.DDM_REMOVE:
12020 if remove_fn is not None:
12021 remove_fn(absidx, item, private)
12023 changes = [("%s/%s" % (kind, absidx), "remove")]
12025 assert container[absidx] == item
12026 del container[absidx]
12027 elif op == constants.DDM_MODIFY:
12028 if modify_fn is not None:
12029 changes = modify_fn(absidx, item, params, private)
12031 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12033 assert _TApplyContModsCbChanges(changes)
12035 if not (chgdesc is None or changes is None):
12036 chgdesc.extend(changes)
12039 def _UpdateIvNames(base_index, disks):
12040 """Updates the C{iv_name} attribute of disks.
12042 @type disks: list of L{objects.Disk}
12045 for (idx, disk) in enumerate(disks):
12046 disk.iv_name = "disk/%s" % (base_index + idx, )
12049 class _InstNicModPrivate:
12050 """Data structure for network interface modifications.
12052 Used by L{LUInstanceSetParams}.
12055 def __init__(self):
12060 class LUInstanceSetParams(LogicalUnit):
12061 """Modifies an instances's parameters.
12064 HPATH = "instance-modify"
12065 HTYPE = constants.HTYPE_INSTANCE
12069 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12070 assert ht.TList(mods)
12071 assert not mods or len(mods[0]) in (2, 3)
12073 if mods and len(mods[0]) == 2:
12077 for op, params in mods:
12078 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12079 result.append((op, -1, params))
12083 raise errors.OpPrereqError("Only one %s add or remove operation is"
12084 " supported at a time" % kind,
12085 errors.ECODE_INVAL)
12087 result.append((constants.DDM_MODIFY, op, params))
12089 assert verify_fn(result)
12096 def _CheckMods(kind, mods, key_types, item_fn):
12097 """Ensures requested disk/NIC modifications are valid.
12100 for (op, _, params) in mods:
12101 assert ht.TDict(params)
12103 utils.ForceDictType(params, key_types)
12105 if op == constants.DDM_REMOVE:
12107 raise errors.OpPrereqError("No settings should be passed when"
12108 " removing a %s" % kind,
12109 errors.ECODE_INVAL)
12110 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12111 item_fn(op, params)
12113 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12116 def _VerifyDiskModification(op, params):
12117 """Verifies a disk modification.
12120 if op == constants.DDM_ADD:
12121 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12122 if mode not in constants.DISK_ACCESS_SET:
12123 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12124 errors.ECODE_INVAL)
12126 size = params.get(constants.IDISK_SIZE, None)
12128 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12129 constants.IDISK_SIZE, errors.ECODE_INVAL)
12133 except (TypeError, ValueError), err:
12134 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12135 errors.ECODE_INVAL)
12137 params[constants.IDISK_SIZE] = size
12139 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12140 raise errors.OpPrereqError("Disk size change not possible, use"
12141 " grow-disk", errors.ECODE_INVAL)
12144 def _VerifyNicModification(op, params):
12145 """Verifies a network interface modification.
12148 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12149 ip = params.get(constants.INIC_IP, None)
12152 elif ip.lower() == constants.VALUE_NONE:
12153 params[constants.INIC_IP] = None
12154 elif not netutils.IPAddress.IsValid(ip):
12155 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12156 errors.ECODE_INVAL)
12158 bridge = params.get("bridge", None)
12159 link = params.get(constants.INIC_LINK, None)
12160 if bridge and link:
12161 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12162 " at the same time", errors.ECODE_INVAL)
12163 elif bridge and bridge.lower() == constants.VALUE_NONE:
12164 params["bridge"] = None
12165 elif link and link.lower() == constants.VALUE_NONE:
12166 params[constants.INIC_LINK] = None
12168 if op == constants.DDM_ADD:
12169 macaddr = params.get(constants.INIC_MAC, None)
12170 if macaddr is None:
12171 params[constants.INIC_MAC] = constants.VALUE_AUTO
12173 if constants.INIC_MAC in params:
12174 macaddr = params[constants.INIC_MAC]
12175 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12176 macaddr = utils.NormalizeAndValidateMac(macaddr)
12178 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12179 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12180 " modifying an existing NIC",
12181 errors.ECODE_INVAL)
12183 def CheckArguments(self):
12184 if not (self.op.nics or self.op.disks or self.op.disk_template or
12185 self.op.hvparams or self.op.beparams or self.op.os_name or
12186 self.op.offline is not None or self.op.runtime_mem):
12187 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12189 if self.op.hvparams:
12190 _CheckGlobalHvParams(self.op.hvparams)
12193 self._UpgradeDiskNicMods("disk", self.op.disks,
12194 opcodes.OpInstanceSetParams.TestDiskModifications)
12196 self._UpgradeDiskNicMods("NIC", self.op.nics,
12197 opcodes.OpInstanceSetParams.TestNicModifications)
12199 # Check disk modifications
12200 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12201 self._VerifyDiskModification)
12203 if self.op.disks and self.op.disk_template is not None:
12204 raise errors.OpPrereqError("Disk template conversion and other disk"
12205 " changes not supported at the same time",
12206 errors.ECODE_INVAL)
12208 if (self.op.disk_template and
12209 self.op.disk_template in constants.DTS_INT_MIRROR and
12210 self.op.remote_node is None):
12211 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12212 " one requires specifying a secondary node",
12213 errors.ECODE_INVAL)
12215 # Check NIC modifications
12216 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12217 self._VerifyNicModification)
12219 def ExpandNames(self):
12220 self._ExpandAndLockInstance()
12221 # Can't even acquire node locks in shared mode as upcoming changes in
12222 # Ganeti 2.6 will start to modify the node object on disk conversion
12223 self.needed_locks[locking.LEVEL_NODE] = []
12224 self.needed_locks[locking.LEVEL_NODE_RES] = []
12225 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12227 def DeclareLocks(self, level):
12228 # TODO: Acquire group lock in shared mode (disk parameters)
12229 if level == locking.LEVEL_NODE:
12230 self._LockInstancesNodes()
12231 if self.op.disk_template and self.op.remote_node:
12232 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12233 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12234 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12236 self.needed_locks[locking.LEVEL_NODE_RES] = \
12237 self.needed_locks[locking.LEVEL_NODE][:]
12239 def BuildHooksEnv(self):
12240 """Build hooks env.
12242 This runs on the master, primary and secondaries.
12246 if constants.BE_MINMEM in self.be_new:
12247 args["minmem"] = self.be_new[constants.BE_MINMEM]
12248 if constants.BE_MAXMEM in self.be_new:
12249 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12250 if constants.BE_VCPUS in self.be_new:
12251 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12252 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12253 # information at all.
12255 if self._new_nics is not None:
12258 for nic in self._new_nics:
12259 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12260 mode = nicparams[constants.NIC_MODE]
12261 link = nicparams[constants.NIC_LINK]
12262 nics.append((nic.ip, nic.mac, mode, link))
12264 args["nics"] = nics
12266 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12267 if self.op.disk_template:
12268 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12269 if self.op.runtime_mem:
12270 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12274 def BuildHooksNodes(self):
12275 """Build hooks nodes.
12278 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12281 def _PrepareNicModification(self, params, private, old_ip, old_params,
12283 update_params_dict = dict([(key, params[key])
12284 for key in constants.NICS_PARAMETERS
12287 if "bridge" in params:
12288 update_params_dict[constants.NIC_LINK] = params["bridge"]
12290 new_params = _GetUpdatedParams(old_params, update_params_dict)
12291 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12293 new_filled_params = cluster.SimpleFillNIC(new_params)
12294 objects.NIC.CheckParameterSyntax(new_filled_params)
12296 new_mode = new_filled_params[constants.NIC_MODE]
12297 if new_mode == constants.NIC_MODE_BRIDGED:
12298 bridge = new_filled_params[constants.NIC_LINK]
12299 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12301 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12303 self.warn.append(msg)
12305 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12307 elif new_mode == constants.NIC_MODE_ROUTED:
12308 ip = params.get(constants.INIC_IP, old_ip)
12310 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12311 " on a routed NIC", errors.ECODE_INVAL)
12313 if constants.INIC_MAC in params:
12314 mac = params[constants.INIC_MAC]
12316 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12317 errors.ECODE_INVAL)
12318 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12319 # otherwise generate the MAC address
12320 params[constants.INIC_MAC] = \
12321 self.cfg.GenerateMAC(self.proc.GetECId())
12323 # or validate/reserve the current one
12325 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12326 except errors.ReservationError:
12327 raise errors.OpPrereqError("MAC address '%s' already in use"
12328 " in cluster" % mac,
12329 errors.ECODE_NOTUNIQUE)
12331 private.params = new_params
12332 private.filled = new_filled_params
12334 return (None, None)
12336 def CheckPrereq(self):
12337 """Check prerequisites.
12339 This only checks the instance list against the existing names.
12342 # checking the new params on the primary/secondary nodes
12344 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12345 cluster = self.cluster = self.cfg.GetClusterInfo()
12346 assert self.instance is not None, \
12347 "Cannot retrieve locked instance %s" % self.op.instance_name
12348 pnode = instance.primary_node
12349 nodelist = list(instance.all_nodes)
12350 pnode_info = self.cfg.GetNodeInfo(pnode)
12351 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12353 # Prepare disk/NIC modifications
12354 self.diskmod = PrepareContainerMods(self.op.disks, None)
12355 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12358 if self.op.os_name and not self.op.force:
12359 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12360 self.op.force_variant)
12361 instance_os = self.op.os_name
12363 instance_os = instance.os
12365 assert not (self.op.disk_template and self.op.disks), \
12366 "Can't modify disk template and apply disk changes at the same time"
12368 if self.op.disk_template:
12369 if instance.disk_template == self.op.disk_template:
12370 raise errors.OpPrereqError("Instance already has disk template %s" %
12371 instance.disk_template, errors.ECODE_INVAL)
12373 if (instance.disk_template,
12374 self.op.disk_template) not in self._DISK_CONVERSIONS:
12375 raise errors.OpPrereqError("Unsupported disk template conversion from"
12376 " %s to %s" % (instance.disk_template,
12377 self.op.disk_template),
12378 errors.ECODE_INVAL)
12379 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12380 msg="cannot change disk template")
12381 if self.op.disk_template in constants.DTS_INT_MIRROR:
12382 if self.op.remote_node == pnode:
12383 raise errors.OpPrereqError("Given new secondary node %s is the same"
12384 " as the primary node of the instance" %
12385 self.op.remote_node, errors.ECODE_STATE)
12386 _CheckNodeOnline(self, self.op.remote_node)
12387 _CheckNodeNotDrained(self, self.op.remote_node)
12388 # FIXME: here we assume that the old instance type is DT_PLAIN
12389 assert instance.disk_template == constants.DT_PLAIN
12390 disks = [{constants.IDISK_SIZE: d.size,
12391 constants.IDISK_VG: d.logical_id[0]}
12392 for d in instance.disks]
12393 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12394 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12396 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12397 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12398 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12399 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12400 ignore=self.op.ignore_ipolicy)
12401 if pnode_info.group != snode_info.group:
12402 self.LogWarning("The primary and secondary nodes are in two"
12403 " different node groups; the disk parameters"
12404 " from the first disk's node group will be"
12407 # hvparams processing
12408 if self.op.hvparams:
12409 hv_type = instance.hypervisor
12410 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12411 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12412 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12415 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12416 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12417 self.hv_proposed = self.hv_new = hv_new # the new actual values
12418 self.hv_inst = i_hvdict # the new dict (without defaults)
12420 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12422 self.hv_new = self.hv_inst = {}
12424 # beparams processing
12425 if self.op.beparams:
12426 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12428 objects.UpgradeBeParams(i_bedict)
12429 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12430 be_new = cluster.SimpleFillBE(i_bedict)
12431 self.be_proposed = self.be_new = be_new # the new actual values
12432 self.be_inst = i_bedict # the new dict (without defaults)
12434 self.be_new = self.be_inst = {}
12435 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12436 be_old = cluster.FillBE(instance)
12438 # CPU param validation -- checking every time a parameter is
12439 # changed to cover all cases where either CPU mask or vcpus have
12441 if (constants.BE_VCPUS in self.be_proposed and
12442 constants.HV_CPU_MASK in self.hv_proposed):
12444 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12445 # Verify mask is consistent with number of vCPUs. Can skip this
12446 # test if only 1 entry in the CPU mask, which means same mask
12447 # is applied to all vCPUs.
12448 if (len(cpu_list) > 1 and
12449 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12450 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12452 (self.be_proposed[constants.BE_VCPUS],
12453 self.hv_proposed[constants.HV_CPU_MASK]),
12454 errors.ECODE_INVAL)
12456 # Only perform this test if a new CPU mask is given
12457 if constants.HV_CPU_MASK in self.hv_new:
12458 # Calculate the largest CPU number requested
12459 max_requested_cpu = max(map(max, cpu_list))
12460 # Check that all of the instance's nodes have enough physical CPUs to
12461 # satisfy the requested CPU mask
12462 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12463 max_requested_cpu + 1, instance.hypervisor)
12465 # osparams processing
12466 if self.op.osparams:
12467 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12468 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12469 self.os_inst = i_osdict # the new dict (without defaults)
12475 #TODO(dynmem): do the appropriate check involving MINMEM
12476 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12477 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12478 mem_check_list = [pnode]
12479 if be_new[constants.BE_AUTO_BALANCE]:
12480 # either we changed auto_balance to yes or it was from before
12481 mem_check_list.extend(instance.secondary_nodes)
12482 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12483 instance.hypervisor)
12484 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12485 [instance.hypervisor])
12486 pninfo = nodeinfo[pnode]
12487 msg = pninfo.fail_msg
12489 # Assume the primary node is unreachable and go ahead
12490 self.warn.append("Can't get info from primary node %s: %s" %
12493 (_, _, (pnhvinfo, )) = pninfo.payload
12494 if not isinstance(pnhvinfo.get("memory_free", None), int):
12495 self.warn.append("Node data from primary node %s doesn't contain"
12496 " free memory information" % pnode)
12497 elif instance_info.fail_msg:
12498 self.warn.append("Can't get instance runtime information: %s" %
12499 instance_info.fail_msg)
12501 if instance_info.payload:
12502 current_mem = int(instance_info.payload["memory"])
12504 # Assume instance not running
12505 # (there is a slight race condition here, but it's not very
12506 # probable, and we have no other way to check)
12507 # TODO: Describe race condition
12509 #TODO(dynmem): do the appropriate check involving MINMEM
12510 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12511 pnhvinfo["memory_free"])
12513 raise errors.OpPrereqError("This change will prevent the instance"
12514 " from starting, due to %d MB of memory"
12515 " missing on its primary node" %
12517 errors.ECODE_NORES)
12519 if be_new[constants.BE_AUTO_BALANCE]:
12520 for node, nres in nodeinfo.items():
12521 if node not in instance.secondary_nodes:
12523 nres.Raise("Can't get info from secondary node %s" % node,
12524 prereq=True, ecode=errors.ECODE_STATE)
12525 (_, _, (nhvinfo, )) = nres.payload
12526 if not isinstance(nhvinfo.get("memory_free", None), int):
12527 raise errors.OpPrereqError("Secondary node %s didn't return free"
12528 " memory information" % node,
12529 errors.ECODE_STATE)
12530 #TODO(dynmem): do the appropriate check involving MINMEM
12531 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12532 raise errors.OpPrereqError("This change will prevent the instance"
12533 " from failover to its secondary node"
12534 " %s, due to not enough memory" % node,
12535 errors.ECODE_STATE)
12537 if self.op.runtime_mem:
12538 remote_info = self.rpc.call_instance_info(instance.primary_node,
12540 instance.hypervisor)
12541 remote_info.Raise("Error checking node %s" % instance.primary_node)
12542 if not remote_info.payload: # not running already
12543 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12544 errors.ECODE_STATE)
12546 current_memory = remote_info.payload["memory"]
12547 if (not self.op.force and
12548 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12549 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12550 raise errors.OpPrereqError("Instance %s must have memory between %d"
12551 " and %d MB of memory unless --force is"
12552 " given" % (instance.name,
12553 self.be_proposed[constants.BE_MINMEM],
12554 self.be_proposed[constants.BE_MAXMEM]),
12555 errors.ECODE_INVAL)
12557 if self.op.runtime_mem > current_memory:
12558 _CheckNodeFreeMemory(self, instance.primary_node,
12559 "ballooning memory for instance %s" %
12561 self.op.memory - current_memory,
12562 instance.hypervisor)
12564 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12565 raise errors.OpPrereqError("Disk operations not supported for"
12566 " diskless instances",
12567 errors.ECODE_INVAL)
12569 def _PrepareNicCreate(_, params, private):
12570 return self._PrepareNicModification(params, private, None, {},
12573 def _PrepareNicMod(_, nic, params, private):
12574 return self._PrepareNicModification(params, private, nic.ip,
12575 nic.nicparams, cluster, pnode)
12577 # Verify NIC changes (operating on copy)
12578 nics = instance.nics[:]
12579 ApplyContainerMods("NIC", nics, None, self.nicmod,
12580 _PrepareNicCreate, _PrepareNicMod, None)
12581 if len(nics) > constants.MAX_NICS:
12582 raise errors.OpPrereqError("Instance has too many network interfaces"
12583 " (%d), cannot add more" % constants.MAX_NICS,
12584 errors.ECODE_STATE)
12586 # Verify disk changes (operating on a copy)
12587 disks = instance.disks[:]
12588 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12589 if len(disks) > constants.MAX_DISKS:
12590 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12591 " more" % constants.MAX_DISKS,
12592 errors.ECODE_STATE)
12594 if self.op.offline is not None:
12595 if self.op.offline:
12596 msg = "can't change to offline"
12598 msg = "can't change to online"
12599 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12601 # Pre-compute NIC changes (necessary to use result in hooks)
12602 self._nic_chgdesc = []
12604 # Operate on copies as this is still in prereq
12605 nics = [nic.Copy() for nic in instance.nics]
12606 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12607 self._CreateNewNic, self._ApplyNicMods, None)
12608 self._new_nics = nics
12610 self._new_nics = None
12612 def _ConvertPlainToDrbd(self, feedback_fn):
12613 """Converts an instance from plain to drbd.
12616 feedback_fn("Converting template to drbd")
12617 instance = self.instance
12618 pnode = instance.primary_node
12619 snode = self.op.remote_node
12621 assert instance.disk_template == constants.DT_PLAIN
12623 # create a fake disk info for _GenerateDiskTemplate
12624 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12625 constants.IDISK_VG: d.logical_id[0]}
12626 for d in instance.disks]
12627 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12628 instance.name, pnode, [snode],
12629 disk_info, None, None, 0, feedback_fn,
12631 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12633 info = _GetInstanceInfoText(instance)
12634 feedback_fn("Creating additional volumes...")
12635 # first, create the missing data and meta devices
12636 for disk in anno_disks:
12637 # unfortunately this is... not too nice
12638 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12640 for child in disk.children:
12641 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12642 # at this stage, all new LVs have been created, we can rename the
12644 feedback_fn("Renaming original volumes...")
12645 rename_list = [(o, n.children[0].logical_id)
12646 for (o, n) in zip(instance.disks, new_disks)]
12647 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12648 result.Raise("Failed to rename original LVs")
12650 feedback_fn("Initializing DRBD devices...")
12651 # all child devices are in place, we can now create the DRBD devices
12652 for disk in anno_disks:
12653 for node in [pnode, snode]:
12654 f_create = node == pnode
12655 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12657 # at this point, the instance has been modified
12658 instance.disk_template = constants.DT_DRBD8
12659 instance.disks = new_disks
12660 self.cfg.Update(instance, feedback_fn)
12662 # Release node locks while waiting for sync
12663 _ReleaseLocks(self, locking.LEVEL_NODE)
12665 # disks are created, waiting for sync
12666 disk_abort = not _WaitForSync(self, instance,
12667 oneshot=not self.op.wait_for_sync)
12669 raise errors.OpExecError("There are some degraded disks for"
12670 " this instance, please cleanup manually")
12672 # Node resource locks will be released by caller
12674 def _ConvertDrbdToPlain(self, feedback_fn):
12675 """Converts an instance from drbd to plain.
12678 instance = self.instance
12680 assert len(instance.secondary_nodes) == 1
12681 assert instance.disk_template == constants.DT_DRBD8
12683 pnode = instance.primary_node
12684 snode = instance.secondary_nodes[0]
12685 feedback_fn("Converting template to plain")
12687 old_disks = instance.disks
12688 new_disks = [d.children[0] for d in old_disks]
12690 # copy over size and mode
12691 for parent, child in zip(old_disks, new_disks):
12692 child.size = parent.size
12693 child.mode = parent.mode
12695 # this is a DRBD disk, return its port to the pool
12696 # NOTE: this must be done right before the call to cfg.Update!
12697 for disk in old_disks:
12698 tcp_port = disk.logical_id[2]
12699 self.cfg.AddTcpUdpPort(tcp_port)
12701 # update instance structure
12702 instance.disks = new_disks
12703 instance.disk_template = constants.DT_PLAIN
12704 self.cfg.Update(instance, feedback_fn)
12706 # Release locks in case removing disks takes a while
12707 _ReleaseLocks(self, locking.LEVEL_NODE)
12709 feedback_fn("Removing volumes on the secondary node...")
12710 for disk in old_disks:
12711 self.cfg.SetDiskID(disk, snode)
12712 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12714 self.LogWarning("Could not remove block device %s on node %s,"
12715 " continuing anyway: %s", disk.iv_name, snode, msg)
12717 feedback_fn("Removing unneeded volumes on the primary node...")
12718 for idx, disk in enumerate(old_disks):
12719 meta = disk.children[1]
12720 self.cfg.SetDiskID(meta, pnode)
12721 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12723 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12724 " continuing anyway: %s", idx, pnode, msg)
12726 def _CreateNewDisk(self, idx, params, _):
12727 """Creates a new disk.
12730 instance = self.instance
12733 if instance.disk_template in constants.DTS_FILEBASED:
12734 (file_driver, file_path) = instance.disks[0].logical_id
12735 file_path = os.path.dirname(file_path)
12737 file_driver = file_path = None
12740 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12741 instance.primary_node, instance.secondary_nodes,
12742 [params], file_path, file_driver, idx,
12743 self.Log, self.diskparams)[0]
12745 info = _GetInstanceInfoText(instance)
12747 logging.info("Creating volume %s for instance %s",
12748 disk.iv_name, instance.name)
12749 # Note: this needs to be kept in sync with _CreateDisks
12751 for node in instance.all_nodes:
12752 f_create = (node == instance.primary_node)
12754 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12755 except errors.OpExecError, err:
12756 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12757 disk.iv_name, disk, node, err)
12760 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12764 def _ModifyDisk(idx, disk, params, _):
12765 """Modifies a disk.
12768 disk.mode = params[constants.IDISK_MODE]
12771 ("disk.mode/%d" % idx, disk.mode),
12774 def _RemoveDisk(self, idx, root, _):
12778 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12779 self.cfg.SetDiskID(disk, node)
12780 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12782 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12783 " continuing anyway", idx, node, msg)
12785 # if this is a DRBD disk, return its port to the pool
12786 if root.dev_type in constants.LDS_DRBD:
12787 self.cfg.AddTcpUdpPort(root.logical_id[2])
12790 def _CreateNewNic(idx, params, private):
12791 """Creates data structure for a new network interface.
12794 mac = params[constants.INIC_MAC]
12795 ip = params.get(constants.INIC_IP, None)
12796 nicparams = private.params
12798 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12800 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12801 (mac, ip, private.filled[constants.NIC_MODE],
12802 private.filled[constants.NIC_LINK])),
12806 def _ApplyNicMods(idx, nic, params, private):
12807 """Modifies a network interface.
12812 for key in [constants.INIC_MAC, constants.INIC_IP]:
12814 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12815 setattr(nic, key, params[key])
12818 nic.nicparams = private.params
12820 for (key, val) in params.items():
12821 changes.append(("nic.%s/%d" % (key, idx), val))
12825 def Exec(self, feedback_fn):
12826 """Modifies an instance.
12828 All parameters take effect only at the next restart of the instance.
12831 # Process here the warnings from CheckPrereq, as we don't have a
12832 # feedback_fn there.
12833 # TODO: Replace with self.LogWarning
12834 for warn in self.warn:
12835 feedback_fn("WARNING: %s" % warn)
12837 assert ((self.op.disk_template is None) ^
12838 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12839 "Not owning any node resource locks"
12842 instance = self.instance
12845 if self.op.runtime_mem:
12846 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12848 self.op.runtime_mem)
12849 rpcres.Raise("Cannot modify instance runtime memory")
12850 result.append(("runtime_memory", self.op.runtime_mem))
12852 # Apply disk changes
12853 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12854 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12855 _UpdateIvNames(0, instance.disks)
12857 if self.op.disk_template:
12859 check_nodes = set(instance.all_nodes)
12860 if self.op.remote_node:
12861 check_nodes.add(self.op.remote_node)
12862 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12863 owned = self.owned_locks(level)
12864 assert not (check_nodes - owned), \
12865 ("Not owning the correct locks, owning %r, expected at least %r" %
12866 (owned, check_nodes))
12868 r_shut = _ShutdownInstanceDisks(self, instance)
12870 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12871 " proceed with disk template conversion")
12872 mode = (instance.disk_template, self.op.disk_template)
12874 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12876 self.cfg.ReleaseDRBDMinors(instance.name)
12878 result.append(("disk_template", self.op.disk_template))
12880 assert instance.disk_template == self.op.disk_template, \
12881 ("Expected disk template '%s', found '%s'" %
12882 (self.op.disk_template, instance.disk_template))
12884 # Release node and resource locks if there are any (they might already have
12885 # been released during disk conversion)
12886 _ReleaseLocks(self, locking.LEVEL_NODE)
12887 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12889 # Apply NIC changes
12890 if self._new_nics is not None:
12891 instance.nics = self._new_nics
12892 result.extend(self._nic_chgdesc)
12895 if self.op.hvparams:
12896 instance.hvparams = self.hv_inst
12897 for key, val in self.op.hvparams.iteritems():
12898 result.append(("hv/%s" % key, val))
12901 if self.op.beparams:
12902 instance.beparams = self.be_inst
12903 for key, val in self.op.beparams.iteritems():
12904 result.append(("be/%s" % key, val))
12907 if self.op.os_name:
12908 instance.os = self.op.os_name
12911 if self.op.osparams:
12912 instance.osparams = self.os_inst
12913 for key, val in self.op.osparams.iteritems():
12914 result.append(("os/%s" % key, val))
12916 if self.op.offline is None:
12919 elif self.op.offline:
12920 # Mark instance as offline
12921 self.cfg.MarkInstanceOffline(instance.name)
12922 result.append(("admin_state", constants.ADMINST_OFFLINE))
12924 # Mark instance as online, but stopped
12925 self.cfg.MarkInstanceDown(instance.name)
12926 result.append(("admin_state", constants.ADMINST_DOWN))
12928 self.cfg.Update(instance, feedback_fn)
12930 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12931 self.owned_locks(locking.LEVEL_NODE)), \
12932 "All node locks should have been released by now"
12936 _DISK_CONVERSIONS = {
12937 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12938 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12942 class LUInstanceChangeGroup(LogicalUnit):
12943 HPATH = "instance-change-group"
12944 HTYPE = constants.HTYPE_INSTANCE
12947 def ExpandNames(self):
12948 self.share_locks = _ShareAll()
12949 self.needed_locks = {
12950 locking.LEVEL_NODEGROUP: [],
12951 locking.LEVEL_NODE: [],
12954 self._ExpandAndLockInstance()
12956 if self.op.target_groups:
12957 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12958 self.op.target_groups)
12960 self.req_target_uuids = None
12962 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12964 def DeclareLocks(self, level):
12965 if level == locking.LEVEL_NODEGROUP:
12966 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12968 if self.req_target_uuids:
12969 lock_groups = set(self.req_target_uuids)
12971 # Lock all groups used by instance optimistically; this requires going
12972 # via the node before it's locked, requiring verification later on
12973 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12974 lock_groups.update(instance_groups)
12976 # No target groups, need to lock all of them
12977 lock_groups = locking.ALL_SET
12979 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12981 elif level == locking.LEVEL_NODE:
12982 if self.req_target_uuids:
12983 # Lock all nodes used by instances
12984 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12985 self._LockInstancesNodes()
12987 # Lock all nodes in all potential target groups
12988 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12989 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12990 member_nodes = [node_name
12991 for group in lock_groups
12992 for node_name in self.cfg.GetNodeGroup(group).members]
12993 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12995 # Lock all nodes as all groups are potential targets
12996 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12998 def CheckPrereq(self):
12999 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13000 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13001 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13003 assert (self.req_target_uuids is None or
13004 owned_groups.issuperset(self.req_target_uuids))
13005 assert owned_instances == set([self.op.instance_name])
13007 # Get instance information
13008 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13010 # Check if node groups for locked instance are still correct
13011 assert owned_nodes.issuperset(self.instance.all_nodes), \
13012 ("Instance %s's nodes changed while we kept the lock" %
13013 self.op.instance_name)
13015 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13018 if self.req_target_uuids:
13019 # User requested specific target groups
13020 self.target_uuids = frozenset(self.req_target_uuids)
13022 # All groups except those used by the instance are potential targets
13023 self.target_uuids = owned_groups - inst_groups
13025 conflicting_groups = self.target_uuids & inst_groups
13026 if conflicting_groups:
13027 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13028 " used by the instance '%s'" %
13029 (utils.CommaJoin(conflicting_groups),
13030 self.op.instance_name),
13031 errors.ECODE_INVAL)
13033 if not self.target_uuids:
13034 raise errors.OpPrereqError("There are no possible target groups",
13035 errors.ECODE_INVAL)
13037 def BuildHooksEnv(self):
13038 """Build hooks env.
13041 assert self.target_uuids
13044 "TARGET_GROUPS": " ".join(self.target_uuids),
13047 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13051 def BuildHooksNodes(self):
13052 """Build hooks nodes.
13055 mn = self.cfg.GetMasterNode()
13056 return ([mn], [mn])
13058 def Exec(self, feedback_fn):
13059 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13061 assert instances == [self.op.instance_name], "Instance not locked"
13063 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13064 instances=instances, target_groups=list(self.target_uuids))
13066 ial.Run(self.op.iallocator)
13068 if not ial.success:
13069 raise errors.OpPrereqError("Can't compute solution for changing group of"
13070 " instance '%s' using iallocator '%s': %s" %
13071 (self.op.instance_name, self.op.iallocator,
13073 errors.ECODE_NORES)
13075 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13077 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13078 " instance '%s'", len(jobs), self.op.instance_name)
13080 return ResultWithJobs(jobs)
13083 class LUBackupQuery(NoHooksLU):
13084 """Query the exports list
13089 def CheckArguments(self):
13090 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13091 ["node", "export"], self.op.use_locking)
13093 def ExpandNames(self):
13094 self.expq.ExpandNames(self)
13096 def DeclareLocks(self, level):
13097 self.expq.DeclareLocks(self, level)
13099 def Exec(self, feedback_fn):
13102 for (node, expname) in self.expq.OldStyleQuery(self):
13103 if expname is None:
13104 result[node] = False
13106 result.setdefault(node, []).append(expname)
13111 class _ExportQuery(_QueryBase):
13112 FIELDS = query.EXPORT_FIELDS
13114 #: The node name is not a unique key for this query
13115 SORT_FIELD = "node"
13117 def ExpandNames(self, lu):
13118 lu.needed_locks = {}
13120 # The following variables interact with _QueryBase._GetNames
13122 self.wanted = _GetWantedNodes(lu, self.names)
13124 self.wanted = locking.ALL_SET
13126 self.do_locking = self.use_locking
13128 if self.do_locking:
13129 lu.share_locks = _ShareAll()
13130 lu.needed_locks = {
13131 locking.LEVEL_NODE: self.wanted,
13134 def DeclareLocks(self, lu, level):
13137 def _GetQueryData(self, lu):
13138 """Computes the list of nodes and their attributes.
13141 # Locking is not used
13143 assert not (compat.any(lu.glm.is_owned(level)
13144 for level in locking.LEVELS
13145 if level != locking.LEVEL_CLUSTER) or
13146 self.do_locking or self.use_locking)
13148 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13152 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13154 result.append((node, None))
13156 result.extend((node, expname) for expname in nres.payload)
13161 class LUBackupPrepare(NoHooksLU):
13162 """Prepares an instance for an export and returns useful information.
13167 def ExpandNames(self):
13168 self._ExpandAndLockInstance()
13170 def CheckPrereq(self):
13171 """Check prerequisites.
13174 instance_name = self.op.instance_name
13176 self.instance = self.cfg.GetInstanceInfo(instance_name)
13177 assert self.instance is not None, \
13178 "Cannot retrieve locked instance %s" % self.op.instance_name
13179 _CheckNodeOnline(self, self.instance.primary_node)
13181 self._cds = _GetClusterDomainSecret()
13183 def Exec(self, feedback_fn):
13184 """Prepares an instance for an export.
13187 instance = self.instance
13189 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13190 salt = utils.GenerateSecret(8)
13192 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13193 result = self.rpc.call_x509_cert_create(instance.primary_node,
13194 constants.RIE_CERT_VALIDITY)
13195 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13197 (name, cert_pem) = result.payload
13199 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13203 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13204 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13206 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13212 class LUBackupExport(LogicalUnit):
13213 """Export an instance to an image in the cluster.
13216 HPATH = "instance-export"
13217 HTYPE = constants.HTYPE_INSTANCE
13220 def CheckArguments(self):
13221 """Check the arguments.
13224 self.x509_key_name = self.op.x509_key_name
13225 self.dest_x509_ca_pem = self.op.destination_x509_ca
13227 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13228 if not self.x509_key_name:
13229 raise errors.OpPrereqError("Missing X509 key name for encryption",
13230 errors.ECODE_INVAL)
13232 if not self.dest_x509_ca_pem:
13233 raise errors.OpPrereqError("Missing destination X509 CA",
13234 errors.ECODE_INVAL)
13236 def ExpandNames(self):
13237 self._ExpandAndLockInstance()
13239 # Lock all nodes for local exports
13240 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13241 # FIXME: lock only instance primary and destination node
13243 # Sad but true, for now we have do lock all nodes, as we don't know where
13244 # the previous export might be, and in this LU we search for it and
13245 # remove it from its current node. In the future we could fix this by:
13246 # - making a tasklet to search (share-lock all), then create the
13247 # new one, then one to remove, after
13248 # - removing the removal operation altogether
13249 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13251 def DeclareLocks(self, level):
13252 """Last minute lock declaration."""
13253 # All nodes are locked anyway, so nothing to do here.
13255 def BuildHooksEnv(self):
13256 """Build hooks env.
13258 This will run on the master, primary node and target node.
13262 "EXPORT_MODE": self.op.mode,
13263 "EXPORT_NODE": self.op.target_node,
13264 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13265 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13266 # TODO: Generic function for boolean env variables
13267 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13270 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13274 def BuildHooksNodes(self):
13275 """Build hooks nodes.
13278 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13280 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13281 nl.append(self.op.target_node)
13285 def CheckPrereq(self):
13286 """Check prerequisites.
13288 This checks that the instance and node names are valid.
13291 instance_name = self.op.instance_name
13293 self.instance = self.cfg.GetInstanceInfo(instance_name)
13294 assert self.instance is not None, \
13295 "Cannot retrieve locked instance %s" % self.op.instance_name
13296 _CheckNodeOnline(self, self.instance.primary_node)
13298 if (self.op.remove_instance and
13299 self.instance.admin_state == constants.ADMINST_UP and
13300 not self.op.shutdown):
13301 raise errors.OpPrereqError("Can not remove instance without shutting it"
13304 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13305 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13306 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13307 assert self.dst_node is not None
13309 _CheckNodeOnline(self, self.dst_node.name)
13310 _CheckNodeNotDrained(self, self.dst_node.name)
13313 self.dest_disk_info = None
13314 self.dest_x509_ca = None
13316 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13317 self.dst_node = None
13319 if len(self.op.target_node) != len(self.instance.disks):
13320 raise errors.OpPrereqError(("Received destination information for %s"
13321 " disks, but instance %s has %s disks") %
13322 (len(self.op.target_node), instance_name,
13323 len(self.instance.disks)),
13324 errors.ECODE_INVAL)
13326 cds = _GetClusterDomainSecret()
13328 # Check X509 key name
13330 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13331 except (TypeError, ValueError), err:
13332 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13334 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13335 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13336 errors.ECODE_INVAL)
13338 # Load and verify CA
13340 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13341 except OpenSSL.crypto.Error, err:
13342 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13343 (err, ), errors.ECODE_INVAL)
13345 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13346 if errcode is not None:
13347 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13348 (msg, ), errors.ECODE_INVAL)
13350 self.dest_x509_ca = cert
13352 # Verify target information
13354 for idx, disk_data in enumerate(self.op.target_node):
13356 (host, port, magic) = \
13357 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13358 except errors.GenericError, err:
13359 raise errors.OpPrereqError("Target info for disk %s: %s" %
13360 (idx, err), errors.ECODE_INVAL)
13362 disk_info.append((host, port, magic))
13364 assert len(disk_info) == len(self.op.target_node)
13365 self.dest_disk_info = disk_info
13368 raise errors.ProgrammerError("Unhandled export mode %r" %
13371 # instance disk type verification
13372 # TODO: Implement export support for file-based disks
13373 for disk in self.instance.disks:
13374 if disk.dev_type == constants.LD_FILE:
13375 raise errors.OpPrereqError("Export not supported for instances with"
13376 " file-based disks", errors.ECODE_INVAL)
13378 def _CleanupExports(self, feedback_fn):
13379 """Removes exports of current instance from all other nodes.
13381 If an instance in a cluster with nodes A..D was exported to node C, its
13382 exports will be removed from the nodes A, B and D.
13385 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13387 nodelist = self.cfg.GetNodeList()
13388 nodelist.remove(self.dst_node.name)
13390 # on one-node clusters nodelist will be empty after the removal
13391 # if we proceed the backup would be removed because OpBackupQuery
13392 # substitutes an empty list with the full cluster node list.
13393 iname = self.instance.name
13395 feedback_fn("Removing old exports for instance %s" % iname)
13396 exportlist = self.rpc.call_export_list(nodelist)
13397 for node in exportlist:
13398 if exportlist[node].fail_msg:
13400 if iname in exportlist[node].payload:
13401 msg = self.rpc.call_export_remove(node, iname).fail_msg
13403 self.LogWarning("Could not remove older export for instance %s"
13404 " on node %s: %s", iname, node, msg)
13406 def Exec(self, feedback_fn):
13407 """Export an instance to an image in the cluster.
13410 assert self.op.mode in constants.EXPORT_MODES
13412 instance = self.instance
13413 src_node = instance.primary_node
13415 if self.op.shutdown:
13416 # shutdown the instance, but not the disks
13417 feedback_fn("Shutting down instance %s" % instance.name)
13418 result = self.rpc.call_instance_shutdown(src_node, instance,
13419 self.op.shutdown_timeout)
13420 # TODO: Maybe ignore failures if ignore_remove_failures is set
13421 result.Raise("Could not shutdown instance %s on"
13422 " node %s" % (instance.name, src_node))
13424 # set the disks ID correctly since call_instance_start needs the
13425 # correct drbd minor to create the symlinks
13426 for disk in instance.disks:
13427 self.cfg.SetDiskID(disk, src_node)
13429 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13432 # Activate the instance disks if we'exporting a stopped instance
13433 feedback_fn("Activating disks for %s" % instance.name)
13434 _StartInstanceDisks(self, instance, None)
13437 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13440 helper.CreateSnapshots()
13442 if (self.op.shutdown and
13443 instance.admin_state == constants.ADMINST_UP and
13444 not self.op.remove_instance):
13445 assert not activate_disks
13446 feedback_fn("Starting instance %s" % instance.name)
13447 result = self.rpc.call_instance_start(src_node,
13448 (instance, None, None), False)
13449 msg = result.fail_msg
13451 feedback_fn("Failed to start instance: %s" % msg)
13452 _ShutdownInstanceDisks(self, instance)
13453 raise errors.OpExecError("Could not start instance: %s" % msg)
13455 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13456 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13457 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13458 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13459 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13461 (key_name, _, _) = self.x509_key_name
13464 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13467 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13468 key_name, dest_ca_pem,
13473 # Check for backwards compatibility
13474 assert len(dresults) == len(instance.disks)
13475 assert compat.all(isinstance(i, bool) for i in dresults), \
13476 "Not all results are boolean: %r" % dresults
13480 feedback_fn("Deactivating disks for %s" % instance.name)
13481 _ShutdownInstanceDisks(self, instance)
13483 if not (compat.all(dresults) and fin_resu):
13486 failures.append("export finalization")
13487 if not compat.all(dresults):
13488 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13490 failures.append("disk export: disk(s) %s" % fdsk)
13492 raise errors.OpExecError("Export failed, errors in %s" %
13493 utils.CommaJoin(failures))
13495 # At this point, the export was successful, we can cleanup/finish
13497 # Remove instance if requested
13498 if self.op.remove_instance:
13499 feedback_fn("Removing instance %s" % instance.name)
13500 _RemoveInstance(self, feedback_fn, instance,
13501 self.op.ignore_remove_failures)
13503 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13504 self._CleanupExports(feedback_fn)
13506 return fin_resu, dresults
13509 class LUBackupRemove(NoHooksLU):
13510 """Remove exports related to the named instance.
13515 def ExpandNames(self):
13516 self.needed_locks = {}
13517 # We need all nodes to be locked in order for RemoveExport to work, but we
13518 # don't need to lock the instance itself, as nothing will happen to it (and
13519 # we can remove exports also for a removed instance)
13520 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13522 def Exec(self, feedback_fn):
13523 """Remove any export.
13526 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13527 # If the instance was not found we'll try with the name that was passed in.
13528 # This will only work if it was an FQDN, though.
13530 if not instance_name:
13532 instance_name = self.op.instance_name
13534 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13535 exportlist = self.rpc.call_export_list(locked_nodes)
13537 for node in exportlist:
13538 msg = exportlist[node].fail_msg
13540 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13542 if instance_name in exportlist[node].payload:
13544 result = self.rpc.call_export_remove(node, instance_name)
13545 msg = result.fail_msg
13547 logging.error("Could not remove export for instance %s"
13548 " on node %s: %s", instance_name, node, msg)
13550 if fqdn_warn and not found:
13551 feedback_fn("Export not found. If trying to remove an export belonging"
13552 " to a deleted instance please use its Fully Qualified"
13556 class LUGroupAdd(LogicalUnit):
13557 """Logical unit for creating node groups.
13560 HPATH = "group-add"
13561 HTYPE = constants.HTYPE_GROUP
13564 def ExpandNames(self):
13565 # We need the new group's UUID here so that we can create and acquire the
13566 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13567 # that it should not check whether the UUID exists in the configuration.
13568 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13569 self.needed_locks = {}
13570 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13572 def CheckPrereq(self):
13573 """Check prerequisites.
13575 This checks that the given group name is not an existing node group
13580 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13581 except errors.OpPrereqError:
13584 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13585 " node group (UUID: %s)" %
13586 (self.op.group_name, existing_uuid),
13587 errors.ECODE_EXISTS)
13589 if self.op.ndparams:
13590 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13592 if self.op.hv_state:
13593 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13595 self.new_hv_state = None
13597 if self.op.disk_state:
13598 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13600 self.new_disk_state = None
13602 if self.op.diskparams:
13603 for templ in constants.DISK_TEMPLATES:
13604 if templ in self.op.diskparams:
13605 utils.ForceDictType(self.op.diskparams[templ],
13606 constants.DISK_DT_TYPES)
13607 self.new_diskparams = self.op.diskparams
13609 self.new_diskparams = {}
13611 if self.op.ipolicy:
13612 cluster = self.cfg.GetClusterInfo()
13613 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13615 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13616 except errors.ConfigurationError, err:
13617 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13618 errors.ECODE_INVAL)
13620 def BuildHooksEnv(self):
13621 """Build hooks env.
13625 "GROUP_NAME": self.op.group_name,
13628 def BuildHooksNodes(self):
13629 """Build hooks nodes.
13632 mn = self.cfg.GetMasterNode()
13633 return ([mn], [mn])
13635 def Exec(self, feedback_fn):
13636 """Add the node group to the cluster.
13639 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13640 uuid=self.group_uuid,
13641 alloc_policy=self.op.alloc_policy,
13642 ndparams=self.op.ndparams,
13643 diskparams=self.new_diskparams,
13644 ipolicy=self.op.ipolicy,
13645 hv_state_static=self.new_hv_state,
13646 disk_state_static=self.new_disk_state)
13648 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13649 del self.remove_locks[locking.LEVEL_NODEGROUP]
13652 class LUGroupAssignNodes(NoHooksLU):
13653 """Logical unit for assigning nodes to groups.
13658 def ExpandNames(self):
13659 # These raise errors.OpPrereqError on their own:
13660 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13661 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13663 # We want to lock all the affected nodes and groups. We have readily
13664 # available the list of nodes, and the *destination* group. To gather the
13665 # list of "source" groups, we need to fetch node information later on.
13666 self.needed_locks = {
13667 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13668 locking.LEVEL_NODE: self.op.nodes,
13671 def DeclareLocks(self, level):
13672 if level == locking.LEVEL_NODEGROUP:
13673 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13675 # Try to get all affected nodes' groups without having the group or node
13676 # lock yet. Needs verification later in the code flow.
13677 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13679 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13681 def CheckPrereq(self):
13682 """Check prerequisites.
13685 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13686 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13687 frozenset(self.op.nodes))
13689 expected_locks = (set([self.group_uuid]) |
13690 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13691 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13692 if actual_locks != expected_locks:
13693 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13694 " current groups are '%s', used to be '%s'" %
13695 (utils.CommaJoin(expected_locks),
13696 utils.CommaJoin(actual_locks)))
13698 self.node_data = self.cfg.GetAllNodesInfo()
13699 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13700 instance_data = self.cfg.GetAllInstancesInfo()
13702 if self.group is None:
13703 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13704 (self.op.group_name, self.group_uuid))
13706 (new_splits, previous_splits) = \
13707 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13708 for node in self.op.nodes],
13709 self.node_data, instance_data)
13712 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13714 if not self.op.force:
13715 raise errors.OpExecError("The following instances get split by this"
13716 " change and --force was not given: %s" %
13719 self.LogWarning("This operation will split the following instances: %s",
13722 if previous_splits:
13723 self.LogWarning("In addition, these already-split instances continue"
13724 " to be split across groups: %s",
13725 utils.CommaJoin(utils.NiceSort(previous_splits)))
13727 def Exec(self, feedback_fn):
13728 """Assign nodes to a new group.
13731 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13733 self.cfg.AssignGroupNodes(mods)
13736 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13737 """Check for split instances after a node assignment.
13739 This method considers a series of node assignments as an atomic operation,
13740 and returns information about split instances after applying the set of
13743 In particular, it returns information about newly split instances, and
13744 instances that were already split, and remain so after the change.
13746 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13749 @type changes: list of (node_name, new_group_uuid) pairs.
13750 @param changes: list of node assignments to consider.
13751 @param node_data: a dict with data for all nodes
13752 @param instance_data: a dict with all instances to consider
13753 @rtype: a two-tuple
13754 @return: a list of instances that were previously okay and result split as a
13755 consequence of this change, and a list of instances that were previously
13756 split and this change does not fix.
13759 changed_nodes = dict((node, group) for node, group in changes
13760 if node_data[node].group != group)
13762 all_split_instances = set()
13763 previously_split_instances = set()
13765 def InstanceNodes(instance):
13766 return [instance.primary_node] + list(instance.secondary_nodes)
13768 for inst in instance_data.values():
13769 if inst.disk_template not in constants.DTS_INT_MIRROR:
13772 instance_nodes = InstanceNodes(inst)
13774 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13775 previously_split_instances.add(inst.name)
13777 if len(set(changed_nodes.get(node, node_data[node].group)
13778 for node in instance_nodes)) > 1:
13779 all_split_instances.add(inst.name)
13781 return (list(all_split_instances - previously_split_instances),
13782 list(previously_split_instances & all_split_instances))
13785 class _GroupQuery(_QueryBase):
13786 FIELDS = query.GROUP_FIELDS
13788 def ExpandNames(self, lu):
13789 lu.needed_locks = {}
13791 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13792 self._cluster = lu.cfg.GetClusterInfo()
13793 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13796 self.wanted = [name_to_uuid[name]
13797 for name in utils.NiceSort(name_to_uuid.keys())]
13799 # Accept names to be either names or UUIDs.
13802 all_uuid = frozenset(self._all_groups.keys())
13804 for name in self.names:
13805 if name in all_uuid:
13806 self.wanted.append(name)
13807 elif name in name_to_uuid:
13808 self.wanted.append(name_to_uuid[name])
13810 missing.append(name)
13813 raise errors.OpPrereqError("Some groups do not exist: %s" %
13814 utils.CommaJoin(missing),
13815 errors.ECODE_NOENT)
13817 def DeclareLocks(self, lu, level):
13820 def _GetQueryData(self, lu):
13821 """Computes the list of node groups and their attributes.
13824 do_nodes = query.GQ_NODE in self.requested_data
13825 do_instances = query.GQ_INST in self.requested_data
13827 group_to_nodes = None
13828 group_to_instances = None
13830 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13831 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13832 # latter GetAllInstancesInfo() is not enough, for we have to go through
13833 # instance->node. Hence, we will need to process nodes even if we only need
13834 # instance information.
13835 if do_nodes or do_instances:
13836 all_nodes = lu.cfg.GetAllNodesInfo()
13837 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13840 for node in all_nodes.values():
13841 if node.group in group_to_nodes:
13842 group_to_nodes[node.group].append(node.name)
13843 node_to_group[node.name] = node.group
13846 all_instances = lu.cfg.GetAllInstancesInfo()
13847 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13849 for instance in all_instances.values():
13850 node = instance.primary_node
13851 if node in node_to_group:
13852 group_to_instances[node_to_group[node]].append(instance.name)
13855 # Do not pass on node information if it was not requested.
13856 group_to_nodes = None
13858 return query.GroupQueryData(self._cluster,
13859 [self._all_groups[uuid]
13860 for uuid in self.wanted],
13861 group_to_nodes, group_to_instances,
13862 query.GQ_DISKPARAMS in self.requested_data)
13865 class LUGroupQuery(NoHooksLU):
13866 """Logical unit for querying node groups.
13871 def CheckArguments(self):
13872 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13873 self.op.output_fields, False)
13875 def ExpandNames(self):
13876 self.gq.ExpandNames(self)
13878 def DeclareLocks(self, level):
13879 self.gq.DeclareLocks(self, level)
13881 def Exec(self, feedback_fn):
13882 return self.gq.OldStyleQuery(self)
13885 class LUGroupSetParams(LogicalUnit):
13886 """Modifies the parameters of a node group.
13889 HPATH = "group-modify"
13890 HTYPE = constants.HTYPE_GROUP
13893 def CheckArguments(self):
13896 self.op.diskparams,
13897 self.op.alloc_policy,
13899 self.op.disk_state,
13903 if all_changes.count(None) == len(all_changes):
13904 raise errors.OpPrereqError("Please pass at least one modification",
13905 errors.ECODE_INVAL)
13907 def ExpandNames(self):
13908 # This raises errors.OpPrereqError on its own:
13909 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13911 self.needed_locks = {
13912 locking.LEVEL_INSTANCE: [],
13913 locking.LEVEL_NODEGROUP: [self.group_uuid],
13916 self.share_locks[locking.LEVEL_INSTANCE] = 1
13918 def DeclareLocks(self, level):
13919 if level == locking.LEVEL_INSTANCE:
13920 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13922 # Lock instances optimistically, needs verification once group lock has
13924 self.needed_locks[locking.LEVEL_INSTANCE] = \
13925 self.cfg.GetNodeGroupInstances(self.group_uuid)
13928 def _UpdateAndVerifyDiskParams(old, new):
13929 """Updates and verifies disk parameters.
13932 new_params = _GetUpdatedParams(old, new)
13933 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13936 def CheckPrereq(self):
13937 """Check prerequisites.
13940 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13942 # Check if locked instances are still correct
13943 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13945 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13946 cluster = self.cfg.GetClusterInfo()
13948 if self.group is None:
13949 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13950 (self.op.group_name, self.group_uuid))
13952 if self.op.ndparams:
13953 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13954 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13955 self.new_ndparams = new_ndparams
13957 if self.op.diskparams:
13958 diskparams = self.group.diskparams
13959 uavdp = self._UpdateAndVerifyDiskParams
13960 # For each disktemplate subdict update and verify the values
13961 new_diskparams = dict((dt,
13962 uavdp(diskparams.get(dt, {}),
13963 self.op.diskparams[dt]))
13964 for dt in constants.DISK_TEMPLATES
13965 if dt in self.op.diskparams)
13966 # As we've all subdicts of diskparams ready, lets merge the actual
13967 # dict with all updated subdicts
13968 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13970 if self.op.hv_state:
13971 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13972 self.group.hv_state_static)
13974 if self.op.disk_state:
13975 self.new_disk_state = \
13976 _MergeAndVerifyDiskState(self.op.disk_state,
13977 self.group.disk_state_static)
13979 if self.op.ipolicy:
13980 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13984 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13985 inst_filter = lambda inst: inst.name in owned_instances
13986 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13988 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13990 new_ipolicy, instances)
13993 self.LogWarning("After the ipolicy change the following instances"
13994 " violate them: %s",
13995 utils.CommaJoin(violations))
13997 def BuildHooksEnv(self):
13998 """Build hooks env.
14002 "GROUP_NAME": self.op.group_name,
14003 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14006 def BuildHooksNodes(self):
14007 """Build hooks nodes.
14010 mn = self.cfg.GetMasterNode()
14011 return ([mn], [mn])
14013 def Exec(self, feedback_fn):
14014 """Modifies the node group.
14019 if self.op.ndparams:
14020 self.group.ndparams = self.new_ndparams
14021 result.append(("ndparams", str(self.group.ndparams)))
14023 if self.op.diskparams:
14024 self.group.diskparams = self.new_diskparams
14025 result.append(("diskparams", str(self.group.diskparams)))
14027 if self.op.alloc_policy:
14028 self.group.alloc_policy = self.op.alloc_policy
14030 if self.op.hv_state:
14031 self.group.hv_state_static = self.new_hv_state
14033 if self.op.disk_state:
14034 self.group.disk_state_static = self.new_disk_state
14036 if self.op.ipolicy:
14037 self.group.ipolicy = self.new_ipolicy
14039 self.cfg.Update(self.group, feedback_fn)
14043 class LUGroupRemove(LogicalUnit):
14044 HPATH = "group-remove"
14045 HTYPE = constants.HTYPE_GROUP
14048 def ExpandNames(self):
14049 # This will raises errors.OpPrereqError on its own:
14050 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14051 self.needed_locks = {
14052 locking.LEVEL_NODEGROUP: [self.group_uuid],
14055 def CheckPrereq(self):
14056 """Check prerequisites.
14058 This checks that the given group name exists as a node group, that is
14059 empty (i.e., contains no nodes), and that is not the last group of the
14063 # Verify that the group is empty.
14064 group_nodes = [node.name
14065 for node in self.cfg.GetAllNodesInfo().values()
14066 if node.group == self.group_uuid]
14069 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14071 (self.op.group_name,
14072 utils.CommaJoin(utils.NiceSort(group_nodes))),
14073 errors.ECODE_STATE)
14075 # Verify the cluster would not be left group-less.
14076 if len(self.cfg.GetNodeGroupList()) == 1:
14077 raise errors.OpPrereqError("Group '%s' is the only group,"
14078 " cannot be removed" %
14079 self.op.group_name,
14080 errors.ECODE_STATE)
14082 def BuildHooksEnv(self):
14083 """Build hooks env.
14087 "GROUP_NAME": self.op.group_name,
14090 def BuildHooksNodes(self):
14091 """Build hooks nodes.
14094 mn = self.cfg.GetMasterNode()
14095 return ([mn], [mn])
14097 def Exec(self, feedback_fn):
14098 """Remove the node group.
14102 self.cfg.RemoveNodeGroup(self.group_uuid)
14103 except errors.ConfigurationError:
14104 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14105 (self.op.group_name, self.group_uuid))
14107 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14110 class LUGroupRename(LogicalUnit):
14111 HPATH = "group-rename"
14112 HTYPE = constants.HTYPE_GROUP
14115 def ExpandNames(self):
14116 # This raises errors.OpPrereqError on its own:
14117 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14119 self.needed_locks = {
14120 locking.LEVEL_NODEGROUP: [self.group_uuid],
14123 def CheckPrereq(self):
14124 """Check prerequisites.
14126 Ensures requested new name is not yet used.
14130 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14131 except errors.OpPrereqError:
14134 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14135 " node group (UUID: %s)" %
14136 (self.op.new_name, new_name_uuid),
14137 errors.ECODE_EXISTS)
14139 def BuildHooksEnv(self):
14140 """Build hooks env.
14144 "OLD_NAME": self.op.group_name,
14145 "NEW_NAME": self.op.new_name,
14148 def BuildHooksNodes(self):
14149 """Build hooks nodes.
14152 mn = self.cfg.GetMasterNode()
14154 all_nodes = self.cfg.GetAllNodesInfo()
14155 all_nodes.pop(mn, None)
14158 run_nodes.extend(node.name for node in all_nodes.values()
14159 if node.group == self.group_uuid)
14161 return (run_nodes, run_nodes)
14163 def Exec(self, feedback_fn):
14164 """Rename the node group.
14167 group = self.cfg.GetNodeGroup(self.group_uuid)
14170 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14171 (self.op.group_name, self.group_uuid))
14173 group.name = self.op.new_name
14174 self.cfg.Update(group, feedback_fn)
14176 return self.op.new_name
14179 class LUGroupEvacuate(LogicalUnit):
14180 HPATH = "group-evacuate"
14181 HTYPE = constants.HTYPE_GROUP
14184 def ExpandNames(self):
14185 # This raises errors.OpPrereqError on its own:
14186 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14188 if self.op.target_groups:
14189 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14190 self.op.target_groups)
14192 self.req_target_uuids = []
14194 if self.group_uuid in self.req_target_uuids:
14195 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14196 " as a target group (targets are %s)" %
14198 utils.CommaJoin(self.req_target_uuids)),
14199 errors.ECODE_INVAL)
14201 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14203 self.share_locks = _ShareAll()
14204 self.needed_locks = {
14205 locking.LEVEL_INSTANCE: [],
14206 locking.LEVEL_NODEGROUP: [],
14207 locking.LEVEL_NODE: [],
14210 def DeclareLocks(self, level):
14211 if level == locking.LEVEL_INSTANCE:
14212 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14214 # Lock instances optimistically, needs verification once node and group
14215 # locks have been acquired
14216 self.needed_locks[locking.LEVEL_INSTANCE] = \
14217 self.cfg.GetNodeGroupInstances(self.group_uuid)
14219 elif level == locking.LEVEL_NODEGROUP:
14220 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14222 if self.req_target_uuids:
14223 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14225 # Lock all groups used by instances optimistically; this requires going
14226 # via the node before it's locked, requiring verification later on
14227 lock_groups.update(group_uuid
14228 for instance_name in
14229 self.owned_locks(locking.LEVEL_INSTANCE)
14231 self.cfg.GetInstanceNodeGroups(instance_name))
14233 # No target groups, need to lock all of them
14234 lock_groups = locking.ALL_SET
14236 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14238 elif level == locking.LEVEL_NODE:
14239 # This will only lock the nodes in the group to be evacuated which
14240 # contain actual instances
14241 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14242 self._LockInstancesNodes()
14244 # Lock all nodes in group to be evacuated and target groups
14245 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14246 assert self.group_uuid in owned_groups
14247 member_nodes = [node_name
14248 for group in owned_groups
14249 for node_name in self.cfg.GetNodeGroup(group).members]
14250 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14252 def CheckPrereq(self):
14253 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14254 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14255 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14257 assert owned_groups.issuperset(self.req_target_uuids)
14258 assert self.group_uuid in owned_groups
14260 # Check if locked instances are still correct
14261 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14263 # Get instance information
14264 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14266 # Check if node groups for locked instances are still correct
14267 _CheckInstancesNodeGroups(self.cfg, self.instances,
14268 owned_groups, owned_nodes, self.group_uuid)
14270 if self.req_target_uuids:
14271 # User requested specific target groups
14272 self.target_uuids = self.req_target_uuids
14274 # All groups except the one to be evacuated are potential targets
14275 self.target_uuids = [group_uuid for group_uuid in owned_groups
14276 if group_uuid != self.group_uuid]
14278 if not self.target_uuids:
14279 raise errors.OpPrereqError("There are no possible target groups",
14280 errors.ECODE_INVAL)
14282 def BuildHooksEnv(self):
14283 """Build hooks env.
14287 "GROUP_NAME": self.op.group_name,
14288 "TARGET_GROUPS": " ".join(self.target_uuids),
14291 def BuildHooksNodes(self):
14292 """Build hooks nodes.
14295 mn = self.cfg.GetMasterNode()
14297 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14299 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14301 return (run_nodes, run_nodes)
14303 def Exec(self, feedback_fn):
14304 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14306 assert self.group_uuid not in self.target_uuids
14308 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14309 instances=instances, target_groups=self.target_uuids)
14311 ial.Run(self.op.iallocator)
14313 if not ial.success:
14314 raise errors.OpPrereqError("Can't compute group evacuation using"
14315 " iallocator '%s': %s" %
14316 (self.op.iallocator, ial.info),
14317 errors.ECODE_NORES)
14319 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14321 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14322 len(jobs), self.op.group_name)
14324 return ResultWithJobs(jobs)
14327 class TagsLU(NoHooksLU): # pylint: disable=W0223
14328 """Generic tags LU.
14330 This is an abstract class which is the parent of all the other tags LUs.
14333 def ExpandNames(self):
14334 self.group_uuid = None
14335 self.needed_locks = {}
14337 if self.op.kind == constants.TAG_NODE:
14338 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14339 lock_level = locking.LEVEL_NODE
14340 lock_name = self.op.name
14341 elif self.op.kind == constants.TAG_INSTANCE:
14342 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14343 lock_level = locking.LEVEL_INSTANCE
14344 lock_name = self.op.name
14345 elif self.op.kind == constants.TAG_NODEGROUP:
14346 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14347 lock_level = locking.LEVEL_NODEGROUP
14348 lock_name = self.group_uuid
14353 if lock_level and getattr(self.op, "use_locking", True):
14354 self.needed_locks[lock_level] = lock_name
14356 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14357 # not possible to acquire the BGL based on opcode parameters)
14359 def CheckPrereq(self):
14360 """Check prerequisites.
14363 if self.op.kind == constants.TAG_CLUSTER:
14364 self.target = self.cfg.GetClusterInfo()
14365 elif self.op.kind == constants.TAG_NODE:
14366 self.target = self.cfg.GetNodeInfo(self.op.name)
14367 elif self.op.kind == constants.TAG_INSTANCE:
14368 self.target = self.cfg.GetInstanceInfo(self.op.name)
14369 elif self.op.kind == constants.TAG_NODEGROUP:
14370 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14372 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14373 str(self.op.kind), errors.ECODE_INVAL)
14376 class LUTagsGet(TagsLU):
14377 """Returns the tags of a given object.
14382 def ExpandNames(self):
14383 TagsLU.ExpandNames(self)
14385 # Share locks as this is only a read operation
14386 self.share_locks = _ShareAll()
14388 def Exec(self, feedback_fn):
14389 """Returns the tag list.
14392 return list(self.target.GetTags())
14395 class LUTagsSearch(NoHooksLU):
14396 """Searches the tags for a given pattern.
14401 def ExpandNames(self):
14402 self.needed_locks = {}
14404 def CheckPrereq(self):
14405 """Check prerequisites.
14407 This checks the pattern passed for validity by compiling it.
14411 self.re = re.compile(self.op.pattern)
14412 except re.error, err:
14413 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14414 (self.op.pattern, err), errors.ECODE_INVAL)
14416 def Exec(self, feedback_fn):
14417 """Returns the tag list.
14421 tgts = [("/cluster", cfg.GetClusterInfo())]
14422 ilist = cfg.GetAllInstancesInfo().values()
14423 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14424 nlist = cfg.GetAllNodesInfo().values()
14425 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14426 tgts.extend(("/nodegroup/%s" % n.name, n)
14427 for n in cfg.GetAllNodeGroupsInfo().values())
14429 for path, target in tgts:
14430 for tag in target.GetTags():
14431 if self.re.search(tag):
14432 results.append((path, tag))
14436 class LUTagsSet(TagsLU):
14437 """Sets a tag on a given object.
14442 def CheckPrereq(self):
14443 """Check prerequisites.
14445 This checks the type and length of the tag name and value.
14448 TagsLU.CheckPrereq(self)
14449 for tag in self.op.tags:
14450 objects.TaggableObject.ValidateTag(tag)
14452 def Exec(self, feedback_fn):
14457 for tag in self.op.tags:
14458 self.target.AddTag(tag)
14459 except errors.TagError, err:
14460 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14461 self.cfg.Update(self.target, feedback_fn)
14464 class LUTagsDel(TagsLU):
14465 """Delete a list of tags from a given object.
14470 def CheckPrereq(self):
14471 """Check prerequisites.
14473 This checks that we have the given tag.
14476 TagsLU.CheckPrereq(self)
14477 for tag in self.op.tags:
14478 objects.TaggableObject.ValidateTag(tag)
14479 del_tags = frozenset(self.op.tags)
14480 cur_tags = self.target.GetTags()
14482 diff_tags = del_tags - cur_tags
14484 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14485 raise errors.OpPrereqError("Tag(s) %s not found" %
14486 (utils.CommaJoin(diff_names), ),
14487 errors.ECODE_NOENT)
14489 def Exec(self, feedback_fn):
14490 """Remove the tag from the object.
14493 for tag in self.op.tags:
14494 self.target.RemoveTag(tag)
14495 self.cfg.Update(self.target, feedback_fn)
14498 class LUTestDelay(NoHooksLU):
14499 """Sleep for a specified amount of time.
14501 This LU sleeps on the master and/or nodes for a specified amount of
14507 def ExpandNames(self):
14508 """Expand names and set required locks.
14510 This expands the node list, if any.
14513 self.needed_locks = {}
14514 if self.op.on_nodes:
14515 # _GetWantedNodes can be used here, but is not always appropriate to use
14516 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14517 # more information.
14518 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14519 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14521 def _TestDelay(self):
14522 """Do the actual sleep.
14525 if self.op.on_master:
14526 if not utils.TestDelay(self.op.duration):
14527 raise errors.OpExecError("Error during master delay test")
14528 if self.op.on_nodes:
14529 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14530 for node, node_result in result.items():
14531 node_result.Raise("Failure during rpc call to node %s" % node)
14533 def Exec(self, feedback_fn):
14534 """Execute the test delay opcode, with the wanted repetitions.
14537 if self.op.repeat == 0:
14540 top_value = self.op.repeat - 1
14541 for i in range(self.op.repeat):
14542 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14546 class LUTestJqueue(NoHooksLU):
14547 """Utility LU to test some aspects of the job queue.
14552 # Must be lower than default timeout for WaitForJobChange to see whether it
14553 # notices changed jobs
14554 _CLIENT_CONNECT_TIMEOUT = 20.0
14555 _CLIENT_CONFIRM_TIMEOUT = 60.0
14558 def _NotifyUsingSocket(cls, cb, errcls):
14559 """Opens a Unix socket and waits for another program to connect.
14562 @param cb: Callback to send socket name to client
14563 @type errcls: class
14564 @param errcls: Exception class to use for errors
14567 # Using a temporary directory as there's no easy way to create temporary
14568 # sockets without writing a custom loop around tempfile.mktemp and
14570 tmpdir = tempfile.mkdtemp()
14572 tmpsock = utils.PathJoin(tmpdir, "sock")
14574 logging.debug("Creating temporary socket at %s", tmpsock)
14575 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14580 # Send details to client
14583 # Wait for client to connect before continuing
14584 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14586 (conn, _) = sock.accept()
14587 except socket.error, err:
14588 raise errcls("Client didn't connect in time (%s)" % err)
14592 # Remove as soon as client is connected
14593 shutil.rmtree(tmpdir)
14595 # Wait for client to close
14598 # pylint: disable=E1101
14599 # Instance of '_socketobject' has no ... member
14600 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14602 except socket.error, err:
14603 raise errcls("Client failed to confirm notification (%s)" % err)
14607 def _SendNotification(self, test, arg, sockname):
14608 """Sends a notification to the client.
14611 @param test: Test name
14612 @param arg: Test argument (depends on test)
14613 @type sockname: string
14614 @param sockname: Socket path
14617 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14619 def _Notify(self, prereq, test, arg):
14620 """Notifies the client of a test.
14623 @param prereq: Whether this is a prereq-phase test
14625 @param test: Test name
14626 @param arg: Test argument (depends on test)
14630 errcls = errors.OpPrereqError
14632 errcls = errors.OpExecError
14634 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14638 def CheckArguments(self):
14639 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14640 self.expandnames_calls = 0
14642 def ExpandNames(self):
14643 checkargs_calls = getattr(self, "checkargs_calls", 0)
14644 if checkargs_calls < 1:
14645 raise errors.ProgrammerError("CheckArguments was not called")
14647 self.expandnames_calls += 1
14649 if self.op.notify_waitlock:
14650 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14652 self.LogInfo("Expanding names")
14654 # Get lock on master node (just to get a lock, not for a particular reason)
14655 self.needed_locks = {
14656 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14659 def Exec(self, feedback_fn):
14660 if self.expandnames_calls < 1:
14661 raise errors.ProgrammerError("ExpandNames was not called")
14663 if self.op.notify_exec:
14664 self._Notify(False, constants.JQT_EXEC, None)
14666 self.LogInfo("Executing")
14668 if self.op.log_messages:
14669 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14670 for idx, msg in enumerate(self.op.log_messages):
14671 self.LogInfo("Sending log message %s", idx + 1)
14672 feedback_fn(constants.JQT_MSGPREFIX + msg)
14673 # Report how many test messages have been sent
14674 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14677 raise errors.OpExecError("Opcode failure was requested")
14682 class IAllocator(object):
14683 """IAllocator framework.
14685 An IAllocator instance has three sets of attributes:
14686 - cfg that is needed to query the cluster
14687 - input data (all members of the _KEYS class attribute are required)
14688 - four buffer attributes (in|out_data|text), that represent the
14689 input (to the external script) in text and data structure format,
14690 and the output from it, again in two formats
14691 - the result variables from the script (success, info, nodes) for
14695 # pylint: disable=R0902
14696 # lots of instance attributes
14698 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14700 self.rpc = rpc_runner
14701 # init buffer variables
14702 self.in_text = self.out_text = self.in_data = self.out_data = None
14703 # init all input fields so that pylint is happy
14705 self.memory = self.disks = self.disk_template = self.spindle_use = None
14706 self.os = self.tags = self.nics = self.vcpus = None
14707 self.hypervisor = None
14708 self.relocate_from = None
14710 self.instances = None
14711 self.evac_mode = None
14712 self.target_groups = []
14714 self.required_nodes = None
14715 # init result fields
14716 self.success = self.info = self.result = None
14719 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14721 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14722 " IAllocator" % self.mode)
14724 keyset = [n for (n, _) in keydata]
14727 if key not in keyset:
14728 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14729 " IAllocator" % key)
14730 setattr(self, key, kwargs[key])
14733 if key not in kwargs:
14734 raise errors.ProgrammerError("Missing input parameter '%s' to"
14735 " IAllocator" % key)
14736 self._BuildInputData(compat.partial(fn, self), keydata)
14738 def _ComputeClusterData(self):
14739 """Compute the generic allocator input data.
14741 This is the data that is independent of the actual operation.
14745 cluster_info = cfg.GetClusterInfo()
14748 "version": constants.IALLOCATOR_VERSION,
14749 "cluster_name": cfg.GetClusterName(),
14750 "cluster_tags": list(cluster_info.GetTags()),
14751 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14752 "ipolicy": cluster_info.ipolicy,
14754 ninfo = cfg.GetAllNodesInfo()
14755 iinfo = cfg.GetAllInstancesInfo().values()
14756 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14759 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14761 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14762 hypervisor_name = self.hypervisor
14763 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14764 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14766 hypervisor_name = cluster_info.primary_hypervisor
14768 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14771 self.rpc.call_all_instances_info(node_list,
14772 cluster_info.enabled_hypervisors)
14774 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14776 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14777 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14778 i_list, config_ndata)
14779 assert len(data["nodes"]) == len(ninfo), \
14780 "Incomplete node data computed"
14782 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14784 self.in_data = data
14787 def _ComputeNodeGroupData(cfg):
14788 """Compute node groups data.
14791 cluster = cfg.GetClusterInfo()
14792 ng = dict((guuid, {
14793 "name": gdata.name,
14794 "alloc_policy": gdata.alloc_policy,
14795 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14797 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14802 def _ComputeBasicNodeData(cfg, node_cfg):
14803 """Compute global node data.
14806 @returns: a dict of name: (node dict, node config)
14809 # fill in static (config-based) values
14810 node_results = dict((ninfo.name, {
14811 "tags": list(ninfo.GetTags()),
14812 "primary_ip": ninfo.primary_ip,
14813 "secondary_ip": ninfo.secondary_ip,
14814 "offline": ninfo.offline,
14815 "drained": ninfo.drained,
14816 "master_candidate": ninfo.master_candidate,
14817 "group": ninfo.group,
14818 "master_capable": ninfo.master_capable,
14819 "vm_capable": ninfo.vm_capable,
14820 "ndparams": cfg.GetNdParams(ninfo),
14822 for ninfo in node_cfg.values())
14824 return node_results
14827 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14829 """Compute global node data.
14831 @param node_results: the basic node structures as filled from the config
14834 #TODO(dynmem): compute the right data on MAX and MIN memory
14835 # make a copy of the current dict
14836 node_results = dict(node_results)
14837 for nname, nresult in node_data.items():
14838 assert nname in node_results, "Missing basic data for node %s" % nname
14839 ninfo = node_cfg[nname]
14841 if not (ninfo.offline or ninfo.drained):
14842 nresult.Raise("Can't get data for node %s" % nname)
14843 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14845 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14847 for attr in ["memory_total", "memory_free", "memory_dom0",
14848 "vg_size", "vg_free", "cpu_total"]:
14849 if attr not in remote_info:
14850 raise errors.OpExecError("Node '%s' didn't return attribute"
14851 " '%s'" % (nname, attr))
14852 if not isinstance(remote_info[attr], int):
14853 raise errors.OpExecError("Node '%s' returned invalid value"
14855 (nname, attr, remote_info[attr]))
14856 # compute memory used by primary instances
14857 i_p_mem = i_p_up_mem = 0
14858 for iinfo, beinfo in i_list:
14859 if iinfo.primary_node == nname:
14860 i_p_mem += beinfo[constants.BE_MAXMEM]
14861 if iinfo.name not in node_iinfo[nname].payload:
14864 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14865 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14866 remote_info["memory_free"] -= max(0, i_mem_diff)
14868 if iinfo.admin_state == constants.ADMINST_UP:
14869 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14871 # compute memory used by instances
14873 "total_memory": remote_info["memory_total"],
14874 "reserved_memory": remote_info["memory_dom0"],
14875 "free_memory": remote_info["memory_free"],
14876 "total_disk": remote_info["vg_size"],
14877 "free_disk": remote_info["vg_free"],
14878 "total_cpus": remote_info["cpu_total"],
14879 "i_pri_memory": i_p_mem,
14880 "i_pri_up_memory": i_p_up_mem,
14882 pnr_dyn.update(node_results[nname])
14883 node_results[nname] = pnr_dyn
14885 return node_results
14888 def _ComputeInstanceData(cluster_info, i_list):
14889 """Compute global instance data.
14893 for iinfo, beinfo in i_list:
14895 for nic in iinfo.nics:
14896 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14900 "mode": filled_params[constants.NIC_MODE],
14901 "link": filled_params[constants.NIC_LINK],
14903 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14904 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14905 nic_data.append(nic_dict)
14907 "tags": list(iinfo.GetTags()),
14908 "admin_state": iinfo.admin_state,
14909 "vcpus": beinfo[constants.BE_VCPUS],
14910 "memory": beinfo[constants.BE_MAXMEM],
14911 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14913 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14915 "disks": [{constants.IDISK_SIZE: dsk.size,
14916 constants.IDISK_MODE: dsk.mode}
14917 for dsk in iinfo.disks],
14918 "disk_template": iinfo.disk_template,
14919 "hypervisor": iinfo.hypervisor,
14921 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14923 instance_data[iinfo.name] = pir
14925 return instance_data
14927 def _AddNewInstance(self):
14928 """Add new instance data to allocator structure.
14930 This in combination with _AllocatorGetClusterData will create the
14931 correct structure needed as input for the allocator.
14933 The checks for the completeness of the opcode must have already been
14937 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14939 if self.disk_template in constants.DTS_INT_MIRROR:
14940 self.required_nodes = 2
14942 self.required_nodes = 1
14946 "disk_template": self.disk_template,
14949 "vcpus": self.vcpus,
14950 "memory": self.memory,
14951 "spindle_use": self.spindle_use,
14952 "disks": self.disks,
14953 "disk_space_total": disk_space,
14955 "required_nodes": self.required_nodes,
14956 "hypervisor": self.hypervisor,
14961 def _AddRelocateInstance(self):
14962 """Add relocate instance data to allocator structure.
14964 This in combination with _IAllocatorGetClusterData will create the
14965 correct structure needed as input for the allocator.
14967 The checks for the completeness of the opcode must have already been
14971 instance = self.cfg.GetInstanceInfo(self.name)
14972 if instance is None:
14973 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14974 " IAllocator" % self.name)
14976 if instance.disk_template not in constants.DTS_MIRRORED:
14977 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14978 errors.ECODE_INVAL)
14980 if instance.disk_template in constants.DTS_INT_MIRROR and \
14981 len(instance.secondary_nodes) != 1:
14982 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14983 errors.ECODE_STATE)
14985 self.required_nodes = 1
14986 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14987 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14991 "disk_space_total": disk_space,
14992 "required_nodes": self.required_nodes,
14993 "relocate_from": self.relocate_from,
14997 def _AddNodeEvacuate(self):
14998 """Get data for node-evacuate requests.
15002 "instances": self.instances,
15003 "evac_mode": self.evac_mode,
15006 def _AddChangeGroup(self):
15007 """Get data for node-evacuate requests.
15011 "instances": self.instances,
15012 "target_groups": self.target_groups,
15015 def _BuildInputData(self, fn, keydata):
15016 """Build input data structures.
15019 self._ComputeClusterData()
15022 request["type"] = self.mode
15023 for keyname, keytype in keydata:
15024 if keyname not in request:
15025 raise errors.ProgrammerError("Request parameter %s is missing" %
15027 val = request[keyname]
15028 if not keytype(val):
15029 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15030 " validation, value %s, expected"
15031 " type %s" % (keyname, val, keytype))
15032 self.in_data["request"] = request
15034 self.in_text = serializer.Dump(self.in_data)
15036 _STRING_LIST = ht.TListOf(ht.TString)
15037 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15038 # pylint: disable=E1101
15039 # Class '...' has no 'OP_ID' member
15040 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15041 opcodes.OpInstanceMigrate.OP_ID,
15042 opcodes.OpInstanceReplaceDisks.OP_ID])
15046 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15047 ht.TItems([ht.TNonEmptyString,
15048 ht.TNonEmptyString,
15049 ht.TListOf(ht.TNonEmptyString),
15052 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15053 ht.TItems([ht.TNonEmptyString,
15056 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15057 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15060 constants.IALLOCATOR_MODE_ALLOC:
15063 ("name", ht.TString),
15064 ("memory", ht.TInt),
15065 ("spindle_use", ht.TInt),
15066 ("disks", ht.TListOf(ht.TDict)),
15067 ("disk_template", ht.TString),
15068 ("os", ht.TString),
15069 ("tags", _STRING_LIST),
15070 ("nics", ht.TListOf(ht.TDict)),
15071 ("vcpus", ht.TInt),
15072 ("hypervisor", ht.TString),
15074 constants.IALLOCATOR_MODE_RELOC:
15075 (_AddRelocateInstance,
15076 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15078 constants.IALLOCATOR_MODE_NODE_EVAC:
15079 (_AddNodeEvacuate, [
15080 ("instances", _STRING_LIST),
15081 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15083 constants.IALLOCATOR_MODE_CHG_GROUP:
15084 (_AddChangeGroup, [
15085 ("instances", _STRING_LIST),
15086 ("target_groups", _STRING_LIST),
15090 def Run(self, name, validate=True, call_fn=None):
15091 """Run an instance allocator and return the results.
15094 if call_fn is None:
15095 call_fn = self.rpc.call_iallocator_runner
15097 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15098 result.Raise("Failure while running the iallocator script")
15100 self.out_text = result.payload
15102 self._ValidateResult()
15104 def _ValidateResult(self):
15105 """Process the allocator results.
15107 This will process and if successful save the result in
15108 self.out_data and the other parameters.
15112 rdict = serializer.Load(self.out_text)
15113 except Exception, err:
15114 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15116 if not isinstance(rdict, dict):
15117 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15119 # TODO: remove backwards compatiblity in later versions
15120 if "nodes" in rdict and "result" not in rdict:
15121 rdict["result"] = rdict["nodes"]
15124 for key in "success", "info", "result":
15125 if key not in rdict:
15126 raise errors.OpExecError("Can't parse iallocator results:"
15127 " missing key '%s'" % key)
15128 setattr(self, key, rdict[key])
15130 if not self._result_check(self.result):
15131 raise errors.OpExecError("Iallocator returned invalid result,"
15132 " expected %s, got %s" %
15133 (self._result_check, self.result),
15134 errors.ECODE_INVAL)
15136 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15137 assert self.relocate_from is not None
15138 assert self.required_nodes == 1
15140 node2group = dict((name, ndata["group"])
15141 for (name, ndata) in self.in_data["nodes"].items())
15143 fn = compat.partial(self._NodesToGroups, node2group,
15144 self.in_data["nodegroups"])
15146 instance = self.cfg.GetInstanceInfo(self.name)
15147 request_groups = fn(self.relocate_from + [instance.primary_node])
15148 result_groups = fn(rdict["result"] + [instance.primary_node])
15150 if self.success and not set(result_groups).issubset(request_groups):
15151 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15152 " differ from original groups (%s)" %
15153 (utils.CommaJoin(result_groups),
15154 utils.CommaJoin(request_groups)))
15156 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15157 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15159 self.out_data = rdict
15162 def _NodesToGroups(node2group, groups, nodes):
15163 """Returns a list of unique group names for a list of nodes.
15165 @type node2group: dict
15166 @param node2group: Map from node name to group UUID
15168 @param groups: Group information
15170 @param nodes: Node names
15177 group_uuid = node2group[node]
15179 # Ignore unknown node
15183 group = groups[group_uuid]
15185 # Can't find group, let's use UUID
15186 group_name = group_uuid
15188 group_name = group["name"]
15190 result.add(group_name)
15192 return sorted(result)
15195 class LUTestAllocator(NoHooksLU):
15196 """Run allocator tests.
15198 This LU runs the allocator tests
15201 def CheckPrereq(self):
15202 """Check prerequisites.
15204 This checks the opcode parameters depending on the director and mode test.
15207 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15208 for attr in ["memory", "disks", "disk_template",
15209 "os", "tags", "nics", "vcpus"]:
15210 if not hasattr(self.op, attr):
15211 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15212 attr, errors.ECODE_INVAL)
15213 iname = self.cfg.ExpandInstanceName(self.op.name)
15214 if iname is not None:
15215 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15216 iname, errors.ECODE_EXISTS)
15217 if not isinstance(self.op.nics, list):
15218 raise errors.OpPrereqError("Invalid parameter 'nics'",
15219 errors.ECODE_INVAL)
15220 if not isinstance(self.op.disks, list):
15221 raise errors.OpPrereqError("Invalid parameter 'disks'",
15222 errors.ECODE_INVAL)
15223 for row in self.op.disks:
15224 if (not isinstance(row, dict) or
15225 constants.IDISK_SIZE not in row or
15226 not isinstance(row[constants.IDISK_SIZE], int) or
15227 constants.IDISK_MODE not in row or
15228 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15229 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15230 " parameter", errors.ECODE_INVAL)
15231 if self.op.hypervisor is None:
15232 self.op.hypervisor = self.cfg.GetHypervisorType()
15233 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15234 fname = _ExpandInstanceName(self.cfg, self.op.name)
15235 self.op.name = fname
15236 self.relocate_from = \
15237 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15238 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15239 constants.IALLOCATOR_MODE_NODE_EVAC):
15240 if not self.op.instances:
15241 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15242 self.op.instances = _GetWantedInstances(self, self.op.instances)
15244 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15245 self.op.mode, errors.ECODE_INVAL)
15247 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15248 if self.op.allocator is None:
15249 raise errors.OpPrereqError("Missing allocator name",
15250 errors.ECODE_INVAL)
15251 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15252 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15253 self.op.direction, errors.ECODE_INVAL)
15255 def Exec(self, feedback_fn):
15256 """Run the allocator test.
15259 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15260 ial = IAllocator(self.cfg, self.rpc,
15263 memory=self.op.memory,
15264 disks=self.op.disks,
15265 disk_template=self.op.disk_template,
15269 vcpus=self.op.vcpus,
15270 hypervisor=self.op.hypervisor,
15272 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15273 ial = IAllocator(self.cfg, self.rpc,
15276 relocate_from=list(self.relocate_from),
15278 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15279 ial = IAllocator(self.cfg, self.rpc,
15281 instances=self.op.instances,
15282 target_groups=self.op.target_groups)
15283 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15284 ial = IAllocator(self.cfg, self.rpc,
15286 instances=self.op.instances,
15287 evac_mode=self.op.evac_mode)
15289 raise errors.ProgrammerError("Uncatched mode %s in"
15290 " LUTestAllocator.Exec", self.op.mode)
15292 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15293 result = ial.in_text
15295 ial.Run(self.op.allocator, validate=False)
15296 result = ial.out_text
15300 #: Query type implementations
15302 constants.QR_CLUSTER: _ClusterQuery,
15303 constants.QR_INSTANCE: _InstanceQuery,
15304 constants.QR_NODE: _NodeQuery,
15305 constants.QR_GROUP: _GroupQuery,
15306 constants.QR_OS: _OsQuery,
15307 constants.QR_EXPORT: _ExportQuery,
15310 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15313 def _GetQueryImplementation(name):
15314 """Returns the implemtnation for a query type.
15316 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15320 return _QUERY_IMPL[name]
15322 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15323 errors.ECODE_INVAL)